From 401d279d544c2f7100dbf22d1dc8e865c2b189ac Mon Sep 17 00:00:00 2001 From: Ildar Iskhakov Date: Tue, 30 Jan 2024 16:39:04 +0800 Subject: [PATCH] Refactor create_alert task (#3759) # What this PR does This PR simplifies alert group/alert creation, so the alert created and escalation started in the same task. ## Which issue(s) this PR fixes ## Checklist - [ ] Unit, integration, and e2e (if applicable) tests updated - [ ] Documentation added (or `pr:no public docs` PR label added if not required) - [ ] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --- CHANGELOG.md | 1 + engine/apps/alerts/models/alert.py | 54 +++++++++++--------- engine/apps/alerts/tasks/distribute_alert.py | 2 + engine/apps/alerts/tests/test_alert.py | 7 ++- engine/apps/integrations/tasks.py | 6 +-- 5 files changed, 40 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 12da05ae..9aee536c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add check whether organization has Slack connection on update Slack related field using public api endpoints by @Ferril ([#3751](https://github.com/grafana/oncall/pull/3751)) - Fixed calculating the number of on-call users per team by @Ferril ([#3773](https://github.com/grafana/oncall/pull/3773)) +- Refactor create_alert task by @iskhakov ([#3604](https://github.com/grafana/oncall/pull/3759)) ## v1.3.92 (2024-01-23) diff --git a/engine/apps/alerts/models/alert.py b/engine/apps/alerts/models/alert.py index a3b5573e..6d084a9c 100644 --- a/engine/apps/alerts/models/alert.py +++ b/engine/apps/alerts/models/alert.py @@ -12,6 +12,8 @@ from django.db.models import JSONField from apps.alerts import tasks from apps.alerts.constants import TASK_DELAY_SECONDS from apps.alerts.incident_appearance.templaters import TemplateLoader +from apps.alerts.signals import alert_group_escalation_snapshot_built +from apps.alerts.tasks.distribute_alert import send_alert_create_signal from apps.labels.alert_group_labels import assign_labels from common.jinja_templater import apply_jinja_template from common.jinja_templater.apply_jinja_template import JinjaTemplateError, JinjaTemplateWarning @@ -102,28 +104,16 @@ class Alert(models.Model): if channel_filter is None: channel_filter = ChannelFilter.select_filter(alert_receive_channel, raw_request_data, force_route_id) + # Get or create group group, group_created = AlertGroup.objects.get_or_create_grouping( channel=alert_receive_channel, channel_filter=channel_filter, group_data=group_data, received_at=received_at, ) + logger.debug(f"alert group {group.pk} created={group_created}") - if group_created: - assign_labels(group, alert_receive_channel, raw_request_data) - group.log_records.create(type=AlertGroupLogRecord.TYPE_REGISTERED) - group.log_records.create(type=AlertGroupLogRecord.TYPE_ROUTE_ASSIGNED) - - mark_as_resolved = ( - enable_autoresolve and group_data.is_resolve_signal and alert_receive_channel.allow_source_based_resolving - ) - if not group.resolved and mark_as_resolved: - group.resolve_by_source() - - mark_as_acknowledged = group_data.is_acknowledge_signal - if not group.acknowledged and mark_as_acknowledged: - group.acknowledge_by_source() - + # Create alert alert = cls( is_resolve_signal=group_data.is_resolve_signal, title=title, @@ -135,21 +125,39 @@ class Alert(models.Model): raw_request_data=raw_request_data, is_the_first_alert_in_group=group_created, ) - alert.save() + logger.debug(f"alert {alert.pk} created") + + transaction.on_commit(partial(send_alert_create_signal.apply_async, (alert.pk,))) + + if group_created: + assign_labels(group, alert_receive_channel, raw_request_data) + group.log_records.create(type=AlertGroupLogRecord.TYPE_REGISTERED) + group.log_records.create(type=AlertGroupLogRecord.TYPE_ROUTE_ASSIGNED) + + if group_created or alert.group.pause_escalation: + # Build escalation snapshot if needed and start escalation + alert.group.start_escalation_if_needed(countdown=TASK_DELAY_SECONDS) + + if group_created: + # TODO: consider moving to start_escalation_if_needed + alert_group_escalation_snapshot_built.send(sender=cls.__class__, alert_group=alert.group) + + mark_as_acknowledged = group_data.is_acknowledge_signal + if not group.acknowledged and mark_as_acknowledged: + group.acknowledge_by_source() + + mark_as_resolved = ( + enable_autoresolve and group_data.is_resolve_signal and alert_receive_channel.allow_source_based_resolving + ) + if not group.resolved and mark_as_resolved: + group.resolve_by_source() # Store exact alert which resolved group. if group.resolved_by == AlertGroup.SOURCE and group.resolved_by_alert is None: group.resolved_by_alert = alert group.save(update_fields=["resolved_by_alert"]) - if settings.DEBUG: - tasks.distribute_alert(alert.pk) - else: - transaction.on_commit( - partial(tasks.distribute_alert.apply_async, (alert.pk,), countdown=TASK_DELAY_SECONDS) - ) - if group_created: # all code below related to maintenance mode maintenance_uuid = None diff --git a/engine/apps/alerts/tasks/distribute_alert.py b/engine/apps/alerts/tasks/distribute_alert.py index f1d146a4..f69f80fe 100644 --- a/engine/apps/alerts/tasks/distribute_alert.py +++ b/engine/apps/alerts/tasks/distribute_alert.py @@ -13,6 +13,8 @@ from .task_logger import task_logger def distribute_alert(alert_id): """ We need this task to make task processing async and to make sure the task is delivered. + This task is not used anymore, but we keep it for the tasks in the queue to be processed. + TODO: remove this task after all the tasks in the queue are processed. """ from apps.alerts.models import Alert diff --git a/engine/apps/alerts/tests/test_alert.py b/engine/apps/alerts/tests/test_alert.py index cb2a2d68..d0fc7e24 100644 --- a/engine/apps/alerts/tests/test_alert.py +++ b/engine/apps/alerts/tests/test_alert.py @@ -8,9 +8,9 @@ from apps.alerts.tasks import distribute_alert, escalate_alert_group @pytest.mark.django_db -@patch("apps.alerts.tasks.distribute_alert.distribute_alert.apply_async", return_value=None) +@patch("apps.alerts.tasks.distribute_alert.send_alert_create_signal.apply_async", return_value=None) def test_alert_create_default_channel_filter( - mocked_distribute_alert_task, + mocked_send_alert_create_signal, make_organization, make_alert_receive_channel, make_channel_filter, @@ -30,10 +30,9 @@ def test_alert_create_default_channel_filter( image_url=None, link_to_upstream_details=None, ) - assert alert.group.channel_filter == channel_filter assert len(callbacks) == 1 - mocked_distribute_alert_task.assert_called_once_with((alert.pk,), countdown=1) + mocked_send_alert_create_signal.assert_called_once_with((alert.pk,)) @pytest.mark.django_db diff --git a/engine/apps/integrations/tasks.py b/engine/apps/integrations/tasks.py index 4ce84c2e..94420d62 100644 --- a/engine/apps/integrations/tasks.py +++ b/engine/apps/integrations/tasks.py @@ -63,8 +63,8 @@ def create_alertmanager_alerts(alert_receive_channel_pk, alert, is_demo=False, f alert.group.active_resolve_calculation_id = task.id alert.group.save(update_fields=["active_resolve_calculation_id"]) - logger.info( - f"Created alert alert_id={alert.pk} alert_group_id={alert.group.pk} channel_id={alert_receive_channel.pk}" + logger.debug( + f"Created alertmanager alert alert_id={alert.pk} alert_group_id={alert.group.pk} channel_id={alert_receive_channel.pk}" ) @@ -109,7 +109,7 @@ def create_alert( is_demo=is_demo, received_at=received_at, ) - logger.info( + logger.debug( f"Created alert alert_id={alert.pk} alert_group_id={alert.group.pk} channel_id={alert_receive_channel.pk}" ) except ConcurrentUpdateError: