From 26a2bd9c91da3b596a66553c819eb836a688a890 Mon Sep 17 00:00:00 2001 From: Innokentii Konstantinov Date: Thu, 23 Feb 2023 08:13:03 +0100 Subject: [PATCH] Refactor maintenance (#1340) # What this PR does This PR simplifies code of maintenance mode. 1. Perform distribution/escalation maintenance checks in send_signal... tasks. 2. Use usual alert distribution flow for the maintenance incident. 3. Decouple maintenance mode from slack (all, except **notify_about_maintenance_action** methods, I don't want to make this PR too big) As a bonus from these changes, maintenance mode now mute alert group delivery in all chatops integrations, not only in slack. (Before, incidents happened while maintenance were posted to telegram and msteams anyway) ## Checklist - [ ] Tests updated - [ ] Documentation added - [ ] `CHANGELOG.md` updated --- .../escalation_snapshot_mixin.py | 5 + .../renderers/slack_renderer.py | 2 +- engine/apps/alerts/models/alert.py | 46 +++--- .../alerts/models/alert_group_log_record.py | 13 +- .../alerts/models/alert_receive_channel.py | 2 + .../apps/alerts/models/maintainable_object.py | 14 +- engine/apps/alerts/tasks/distribute_alert.py | 23 ++- .../apps/alerts/tasks/escalate_alert_group.py | 8 - .../tasks/send_update_log_report_signal.py | 35 +++-- engine/apps/api/tests/conftest.py | 6 +- .../api/tests/test_alert_receive_channel.py | 4 +- .../alert_group_representative.py | 12 -- .../apps/slack/scenarios/distribute_alerts.py | 140 +++++++++--------- .../test_distribute_alerts.py | 2 +- .../user_management/models/organization.py | 2 + 15 files changed, 160 insertions(+), 154 deletions(-) diff --git a/engine/apps/alerts/escalation_snapshot/escalation_snapshot_mixin.py b/engine/apps/alerts/escalation_snapshot/escalation_snapshot_mixin.py index 8b36f03b..1f9072ce 100644 --- a/engine/apps/alerts/escalation_snapshot/escalation_snapshot_mixin.py +++ b/engine/apps/alerts/escalation_snapshot/escalation_snapshot_mixin.py @@ -248,6 +248,11 @@ class EscalationSnapshotMixin: """ AlertGroup = apps.get_model("alerts", "AlertGroup") + is_on_maintenace_or_debug_mode = ( + self.channel.maintenance_mode is not None or self.channel.organization.maintenance_mode is not None + ) + if is_on_maintenace_or_debug_mode: + return if self.pause_escalation: return diff --git a/engine/apps/alerts/incident_appearance/renderers/slack_renderer.py b/engine/apps/alerts/incident_appearance/renderers/slack_renderer.py index c81a53d6..4d82c83e 100644 --- a/engine/apps/alerts/incident_appearance/renderers/slack_renderer.py +++ b/engine/apps/alerts/incident_appearance/renderers/slack_renderer.py @@ -169,7 +169,7 @@ class AlertGroupSlackRenderer(AlertGroupBaseRenderer): def _get_buttons_blocks(self): AlertGroup = apps.get_model("alerts", "AlertGroup") buttons = [] - if self.alert_group.maintenance_uuid is None: + if not self.alert_group.is_maintenance_incident: if not self.alert_group.resolved: if not self.alert_group.acknowledged: buttons.append( diff --git a/engine/apps/alerts/models/alert.py b/engine/apps/alerts/models/alert.py index f8b012f5..0e108879 100644 --- a/engine/apps/alerts/models/alert.py +++ b/engine/apps/alerts/models/alert.py @@ -127,29 +127,31 @@ class Alert(models.Model): alert.save() - maintenance_uuid = None - if alert_receive_channel.organization.maintenance_mode == AlertReceiveChannel.MAINTENANCE: - maintenance_uuid = alert_receive_channel.organization.maintenance_uuid + if group_created: + # all code below related to maintenance mode + maintenance_uuid = None + if alert_receive_channel.organization.maintenance_mode == AlertReceiveChannel.MAINTENANCE: + maintenance_uuid = alert_receive_channel.organization.maintenance_uuid - elif alert_receive_channel.maintenance_mode == AlertReceiveChannel.MAINTENANCE: - maintenance_uuid = alert_receive_channel.maintenance_uuid + elif alert_receive_channel.maintenance_mode == AlertReceiveChannel.MAINTENANCE: + maintenance_uuid = alert_receive_channel.maintenance_uuid - if maintenance_uuid is not None: - try: - maintenance_incident = AlertGroup.all_objects.get(maintenance_uuid=maintenance_uuid) - group.root_alert_group = maintenance_incident - group.save(update_fields=["root_alert_group"]) - log_record_for_root_incident = maintenance_incident.log_records.create( - type=AlertGroupLogRecord.TYPE_ATTACHED, dependent_alert_group=group, reason="Attach dropdown" - ) - logger.debug( - f"call send_alert_group_signal for alert_group {maintenance_incident.pk} (maintenance), " - f"log record {log_record_for_root_incident.pk} with type " - f"'{log_record_for_root_incident.get_type_display()}'" - ) - send_alert_group_signal.apply_async((log_record_for_root_incident.pk,)) - except AlertGroup.DoesNotExist: - pass + if maintenance_uuid is not None: + try: + maintenance_incident = AlertGroup.all_objects.get(maintenance_uuid=maintenance_uuid) + group.root_alert_group = maintenance_incident + group.save(update_fields=["root_alert_group"]) + log_record_for_root_incident = maintenance_incident.log_records.create( + type=AlertGroupLogRecord.TYPE_ATTACHED, dependent_alert_group=group, reason="Attach dropdown" + ) + logger.debug( + f"call send_alert_group_signal for alert_group {maintenance_incident.pk} (maintenance), " + f"log record {log_record_for_root_incident.pk} with type " + f"'{log_record_for_root_incident.get_type_display()}'" + ) + send_alert_group_signal.apply_async((log_record_for_root_incident.pk,)) + except AlertGroup.DoesNotExist: + pass return alert @@ -264,7 +266,7 @@ def listen_for_alert_model_save(sender, instance, created, *args, **kwargs): """ Here we invoke AlertShootingStep by model saving action. """ - if created and instance.group.maintenance_uuid is None: + if created: # RFCT - why additinal save ? instance.save() diff --git a/engine/apps/alerts/models/alert_group_log_record.py b/engine/apps/alerts/models/alert_group_log_record.py index 43f55d8c..2f2a805e 100644 --- a/engine/apps/alerts/models/alert_group_log_record.py +++ b/engine/apps/alerts/models/alert_group_log_record.py @@ -557,10 +557,9 @@ class AlertGroupLogRecord(models.Model): @receiver(post_save, sender=AlertGroupLogRecord) def listen_for_alertgrouplogrecord(sender, instance, created, *args, **kwargs): if instance.type != AlertGroupLogRecord.TYPE_DELETED: - if not instance.alert_group.is_maintenance_incident: - alert_group_pk = instance.alert_group.pk - logger.debug( - f"send_update_log_report_signal for alert_group {alert_group_pk}, " - f"alert group event: {instance.get_type_display()}" - ) - send_update_log_report_signal.apply_async(kwargs={"alert_group_pk": alert_group_pk}, countdown=8) + alert_group_pk = instance.alert_group.pk + logger.debug( + f"send_update_log_report_signal for alert_group {alert_group_pk}, " + f"alert group event: {instance.get_type_display()}" + ) + send_update_log_report_signal.apply_async(kwargs={"alert_group_pk": alert_group_pk}, countdown=8) diff --git a/engine/apps/alerts/models/alert_receive_channel.py b/engine/apps/alerts/models/alert_receive_channel.py index edafbf34..28c9247e 100644 --- a/engine/apps/alerts/models/alert_receive_channel.py +++ b/engine/apps/alerts/models/alert_receive_channel.py @@ -510,6 +510,8 @@ class AlertReceiveChannel(IntegrationOptionsMixin, MaintainableObject): disable_maintenance(alert_receive_channel_id=self.pk, force=True, user_id=user.pk) def notify_about_maintenance_action(self, text, send_to_general_log_channel=True): + # TODO: this method should be refactored. + # It's binded to slack and sending maintenance notification only there. channel_ids = list( self.channel_filters.filter(slack_channel_id__isnull=False, notify_in_slack=False).values_list( "slack_channel_id", flat=True diff --git a/engine/apps/alerts/models/maintainable_object.py b/engine/apps/alerts/models/maintainable_object.py index 2c0aaa0c..bfffc82b 100644 --- a/engine/apps/alerts/models/maintainable_object.py +++ b/engine/apps/alerts/models/maintainable_object.py @@ -6,7 +6,6 @@ from django.apps import apps from django.db import models, transaction from django.utils import timezone -from apps.slack.scenarios.scenario_step import ScenarioStep from common.exceptions import MaintenanceCouldNotBeStartedError from common.insight_log import MaintenanceEvent, write_maintenance_insight_log @@ -67,17 +66,6 @@ class MaintainableObject(models.Model): def notify_about_maintenance_action(self, text, send_to_general_log_channel=True): raise NotImplementedError - def send_maintenance_incident(self, organization, group, alert): - slack_team_identity = organization.slack_team_identity - if slack_team_identity is not None: - channel_id = organization.general_log_channel_id - attachments = group.render_slack_attachments() - blocks = group.render_slack_blocks() - AlertShootingStep = ScenarioStep.get_step("distribute_alerts", "AlertShootingStep") - AlertShootingStep(slack_team_identity, organization).publish_slack_messages( - slack_team_identity, group, alert, attachments, channel_id, blocks - ) - def start_maintenance(self, mode, maintenance_duration, user): AlertGroup = apps.get_model("alerts", "AlertGroup") AlertReceiveChannel = apps.get_model("alerts", "AlertReceiveChannel") @@ -142,6 +130,7 @@ class MaintainableObject(models.Model): f" During this time all alerts from integration will be collected here without escalations" ) alert = Alert( + is_the_first_alert_in_group=True, is_resolve_signal=False, title=title, message=message, @@ -154,7 +143,6 @@ class MaintainableObject(models.Model): alert.save() write_maintenance_insight_log(self, user, MaintenanceEvent.STARTED) if mode == AlertReceiveChannel.MAINTENANCE: - self.send_maintenance_incident(organization, group, alert) self.notify_about_maintenance_action( f"Maintenance of {verbal}. Initiated by {user_verbal} for {duration_verbal}.", send_to_general_log_channel=False, diff --git a/engine/apps/alerts/tasks/distribute_alert.py b/engine/apps/alerts/tasks/distribute_alert.py index 01e12f3c..87469ff9 100644 --- a/engine/apps/alerts/tasks/distribute_alert.py +++ b/engine/apps/alerts/tasks/distribute_alert.py @@ -19,14 +19,12 @@ def distribute_alert(alert_id): AlertGroup = apps.get_model("alerts", "AlertGroup") alert = Alert.objects.get(pk=alert_id) - task_logger.debug(f"Start distribute_alert for alert {alert_id} from alert_group {alert.group_id}") + send_alert_create_signal.apply_async((alert_id,)) - - alert_group = AlertGroup.all_objects.filter(pk=alert.group_id).get() - # If it's the first alert, let's launch the escalation! if alert.is_the_first_alert_in_group: + alert_group = AlertGroup.all_objects.filter(pk=alert.group_id).get() alert_group.start_escalation_if_needed(countdown=TASK_DELAY_SECONDS) updated_rows = Alert.objects.filter(pk=alert_id, delivered=True).update(delivered=True) @@ -39,12 +37,21 @@ def distribute_alert(alert_id): @shared_dedicated_queue_retry_task( - autoretry_for=(Exception,), retry_backoff=True, max_retries=0 if settings.DEBUG else None + autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None ) def send_alert_create_signal(alert_id): + Alert = apps.get_model("alerts", "Alert") + AlertReceiveChannel = apps.get_model("alerts", "AlertReceiveChannel") + task_logger.debug(f"Started send_alert_create_signal task for alert {alert_id}") - alert_create_signal.send( - sender=send_alert_create_signal, - alert=alert_id, + alert = Alert.objects.get(pk=alert_id) + is_on_maintenace_mode = ( + alert.group.channel.maintenance_mode == AlertReceiveChannel.MAINTENANCE + or alert.group.channel.organization.maintenance_mode == AlertReceiveChannel.MAINTENANCE ) + if not is_on_maintenace_mode: + alert_create_signal.send( + sender=send_alert_create_signal, + alert=alert_id, + ) task_logger.debug(f"Finished send_alert_create_signal task for alert {alert_id} ") diff --git a/engine/apps/alerts/tasks/escalate_alert_group.py b/engine/apps/alerts/tasks/escalate_alert_group.py index 0d9d4548..2ac2942f 100644 --- a/engine/apps/alerts/tasks/escalate_alert_group.py +++ b/engine/apps/alerts/tasks/escalate_alert_group.py @@ -28,14 +28,6 @@ def escalate_alert_group(alert_group_pk): except IndexError: return f"Alert group with pk {alert_group_pk} doesn't exist" - if ( - alert_group.channel.maintenance_mode is not None - or alert_group.channel.organization.maintenance_mode is not None - ): - task_logger.info(f"alert_group {alert_group.pk} organization or alert_receive_channel on maintenance.") - alert_group.stop_escalation() - return - if not compare_escalations(escalate_alert_group.request.id, alert_group.active_escalation_id): return "Active escalation ID mismatch. Duplication or non-active escalation triggered. Active: {}".format( alert_group.active_escalation_id diff --git a/engine/apps/alerts/tasks/send_update_log_report_signal.py b/engine/apps/alerts/tasks/send_update_log_report_signal.py index 0a00725f..0705654c 100644 --- a/engine/apps/alerts/tasks/send_update_log_report_signal.py +++ b/engine/apps/alerts/tasks/send_update_log_report_signal.py @@ -4,21 +4,34 @@ from django.conf import settings from apps.alerts.signals import alert_group_update_log_report_signal from common.custom_celery_tasks import shared_dedicated_queue_retry_task +from .task_logger import task_logger + @shared_dedicated_queue_retry_task( autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None ) def send_update_log_report_signal(log_record_pk=None, alert_group_pk=None): - AlertGroupLogRecord = apps.get_model("alerts", "AlertGroupLogRecord") + AlertGroup = apps.get_model("alerts", "AlertGroup") + AlertReceiveChannel = apps.get_model("alerts", "AlertReceiveChannel") - if log_record_pk and not alert_group_pk: # legacy - log_record = AlertGroupLogRecord.objects.get(pk=log_record_pk) - if log_record.type == AlertGroupLogRecord.TYPE_DELETED: - return - alert_group_pk = log_record.alert_group.pk - - if alert_group_pk is not None: - alert_group_update_log_report_signal.send( - sender=send_update_log_report_signal, - alert_group=alert_group_pk, + alert_group = AlertGroup.all_objects.get(id=alert_group_pk) + if alert_group.is_maintenance_incident: + task_logger.debug( + f'send_update_log_report_signal: alert_group={alert_group_pk} msg="skip alert_group_update_log_report_signal, alert group is maintenance incident "' ) + return + + is_on_maintenace_mode = ( + alert_group.channel.maintenance_mode == AlertReceiveChannel.MAINTENANCE + or alert_group.channel.organization.maintenance_mode == AlertReceiveChannel.MAINTENANCE + ) + if is_on_maintenace_mode: + task_logger.debug( + f'send_update_log_report_signal: alert_group={alert_group_pk} msg="skip alert_group_update_log_report_signal due to maintenace"' + ) + return + + alert_group_update_log_report_signal.send( + sender=send_update_log_report_signal, + alert_group=alert_group_pk, + ) diff --git a/engine/apps/api/tests/conftest.py b/engine/apps/api/tests/conftest.py index 6fb534a1..bfeedb25 100644 --- a/engine/apps/api/tests/conftest.py +++ b/engine/apps/api/tests/conftest.py @@ -61,8 +61,8 @@ def make_resolved_ack_new_silenced_alert_groups(make_alert_group, make_alert_rec @pytest.fixture() -def mock_alert_shooting_step_publish_slack_messages(monkeypatch): - def mock_publish_slack_messages(*args, **kwargs): +def mock_alert_shooting_step_post_alert_group_to_slack(monkeypatch): + def mock_post_alert_group_to_slack(*args, **kwargs): return None - monkeypatch.setattr(AlertShootingStep, "publish_slack_messages", mock_publish_slack_messages) + monkeypatch.setattr(AlertShootingStep, "_post_alert_group_to_slack", mock_post_alert_group_to_slack) diff --git a/engine/apps/api/tests/test_alert_receive_channel.py b/engine/apps/api/tests/test_alert_receive_channel.py index f6153c06..ceee5551 100644 --- a/engine/apps/api/tests/test_alert_receive_channel.py +++ b/engine/apps/api/tests/test_alert_receive_channel.py @@ -126,7 +126,7 @@ def test_integration_filter_by_maintenance( alert_receive_channel_internal_api_setup, make_user_auth_headers, mock_start_disable_maintenance_task, - mock_alert_shooting_step_publish_slack_messages, + mock_alert_shooting_step_post_alert_group_to_slack, ): user, token, alert_receive_channel = alert_receive_channel_internal_api_setup client = APIClient() @@ -149,7 +149,7 @@ def test_integration_filter_by_debug( alert_receive_channel_internal_api_setup, make_user_auth_headers, mock_start_disable_maintenance_task, - mock_alert_shooting_step_publish_slack_messages, + mock_alert_shooting_step_post_alert_group_to_slack, ): user, token, alert_receive_channel = alert_receive_channel_internal_api_setup client = APIClient() diff --git a/engine/apps/slack/representatives/alert_group_representative.py b/engine/apps/slack/representatives/alert_group_representative.py index 2fc26c69..28b498cc 100644 --- a/engine/apps/slack/representatives/alert_group_representative.py +++ b/engine/apps/slack/representatives/alert_group_representative.py @@ -21,7 +21,6 @@ def on_create_alert_slack_representative_async(alert_pk): It's asynced in order to prevent Slack downtime causing issues with SMS and other destinations. """ Alert = apps.get_model("alerts", "Alert") - AlertReceiveChannel = apps.get_model("alerts", "AlertReceiveChannel") alert = ( Alert.objects.filter(pk=alert_pk) @@ -35,17 +34,6 @@ def on_create_alert_slack_representative_async(alert_pk): ) logger.debug(f"Start on_create_alert_slack_representative for alert {alert_pk} from alert_group {alert.group_id}") - # don't need to publish in slack maintenance alert - # it was published earlier - if alert.group.maintenance_uuid is not None: - return - # don't need to publish alerts in slack while integration on maintenance - if ( - alert.group.channel.maintenance_mode == AlertReceiveChannel.MAINTENANCE - or alert.group.channel.organization.maintenance_mode == AlertReceiveChannel.MAINTENANCE is not None - ): - return - organization = alert.group.channel.organization if organization.slack_team_identity: logger.debug( diff --git a/engine/apps/slack/scenarios/distribute_alerts.py b/engine/apps/slack/scenarios/distribute_alerts.py index 420b0379..388ea413 100644 --- a/engine/apps/slack/scenarios/distribute_alerts.py +++ b/engine/apps/slack/scenarios/distribute_alerts.py @@ -42,7 +42,72 @@ logger.setLevel(logging.DEBUG) class AlertShootingStep(scenario_step.ScenarioStep): - def publish_slack_messages(self, slack_team_identity, alert_group, alert, attachments, channel_id, blocks): + def process_signal(self, alert): + # do not try to post alert group message to slack if its channel is rate limited + if alert.group.channel.is_rate_limited_in_slack: + logger.info("Skip posting or updating alert_group in Slack due to rate limit") + AlertGroup.all_objects.filter( + pk=alert.group.pk, + slack_message_sent=False, + ).update(slack_message_sent=True, reason_to_skip_escalation=AlertGroup.RATE_LIMITED) + return + + num_updated_rows = AlertGroup.all_objects.filter(pk=alert.group.pk, slack_message_sent=False).update( + slack_message_sent=True + ) + + if num_updated_rows == 1: + try: + channel_id = alert.group.channel_filter.slack_channel_id_or_general_log_id + self._send_first_alert(alert, channel_id) + except SlackAPIException as e: + AlertGroup.all_objects.filter(pk=alert.group.pk).update(slack_message_sent=False) + raise e + + is_on_debug_mode = ( + alert.group.channel.maintenance_mode == AlertReceiveChannel.DEBUG_MAINTENANCE + or alert.group.channel.organization.maintenance_mode == AlertReceiveChannel.DEBUG_MAINTENANCE + ) + + if is_on_debug_mode: + self._send_debug_mode_notice(alert.group, channel_id) + + if alert.group.is_maintenance_incident: + # not sending log report message for maintenance incident + pass + else: + # check if alert group was posted to slack before posting message to thread + if not alert.group.skip_escalation_in_slack: + self._send_log_report_message(alert.group, channel_id) + self._send_message_to_thread_if_bot_not_in_channel(alert.group, channel_id) + else: + # check if alert group was posted to slack before updating its message + if not alert.group.skip_escalation_in_slack: + update_task_id = update_incident_slack_message.apply_async( + (self.slack_team_identity.pk, alert.group.pk), + countdown=10, + ) + cache.set( + get_cache_key_update_incident_slack_message(alert.group.pk), + update_task_id, + timeout=CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME, + ) + else: + logger.info("Skip updating alert_group in Slack due to rate limit") + + def _send_first_alert(self, alert, channel_id): + attachments = alert.group.render_slack_attachments() + blocks = alert.group.render_slack_blocks() + self._post_alert_group_to_slack( + slack_team_identity=self.slack_team_identity, + alert_group=alert.group, + alert=alert, + attachments=attachments, + channel_id=channel_id, + blocks=blocks, + ) + + def _post_alert_group_to_slack(self, slack_team_identity, alert_group, alert, attachments, channel_id, blocks): # channel_id can be None if general log channel for slack_team_identity is not set if channel_id is None: logger.info(f"Failed to post message to Slack for alert_group {alert_group.pk} because channel_id is None") @@ -120,65 +185,6 @@ class AlertShootingStep(scenario_step.ScenarioStep): finally: alert.save() - def process_signal(self, alert): - # do not try to post alert group message to slack if its channel is rate limited - if alert.group.channel.is_rate_limited_in_slack: - logger.info("Skip posting or updating alert_group in Slack due to rate limit") - AlertGroup.all_objects.filter( - pk=alert.group.pk, - slack_message_sent=False, - ).update(slack_message_sent=True, reason_to_skip_escalation=AlertGroup.RATE_LIMITED) - return - - num_updated_rows = AlertGroup.all_objects.filter(pk=alert.group.pk, slack_message_sent=False).update( - slack_message_sent=True - ) - - if num_updated_rows == 1: - try: - channel_id = alert.group.channel_filter.slack_channel_id_or_general_log_id - self._send_first_alert(alert, channel_id) - except SlackAPIException as e: - AlertGroup.all_objects.filter(pk=alert.group.pk).update(slack_message_sent=False) - raise e - - is_debug_mode = ( - alert.group.channel.maintenance_mode is not None - or alert.group.channel.organization.maintenance_mode is not None - ) - if is_debug_mode: - self._send_debug_mode_notice(alert.group, channel_id) - else: - # check if alert group was posted to slack before posting message to thread - if not alert.group.skip_escalation_in_slack: - self._send_thread_messages(alert.group, channel_id) - else: - # check if alert group was posted to slack before updating its message - if not alert.group.skip_escalation_in_slack: - update_task_id = update_incident_slack_message.apply_async( - (self.slack_team_identity.pk, alert.group.pk), - countdown=10, - ) - cache.set( - get_cache_key_update_incident_slack_message(alert.group.pk), - update_task_id, - timeout=CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME, - ) - else: - logger.info("Skip updating alert_group in Slack due to rate limit") - - def _send_first_alert(self, alert, channel_id): - attachments = alert.group.render_slack_attachments() - blocks = alert.group.render_slack_blocks() - self.publish_slack_messages( - slack_team_identity=self.slack_team_identity, - alert_group=alert.group, - alert=alert, - attachments=attachments, - channel_id=channel_id, - blocks=blocks, - ) - def _send_debug_mode_notice(self, alert_group, channel_id): blocks = [] text = "Escalations are silenced due to Debug mode" @@ -193,11 +199,12 @@ class AlertShootingStep(scenario_step.ScenarioStep): blocks=blocks, ) - def _send_thread_messages(self, alert_group, channel_id): + def _send_log_report_message(self, alert_group, channel_id): post_or_update_log_report_message_task.apply_async( (alert_group.pk, self.slack_team_identity.pk), ) + def _send_message_to_thread_if_bot_not_in_channel(self, alert_group, channel_id): send_message_to_thread_if_bot_not_in_channel.apply_async( (alert_group.pk, self.slack_team_identity.pk, channel_id), countdown=1, # delay for message so that the log report is published first @@ -608,8 +615,10 @@ class ResolveGroupStep( if not self.check_alert_is_unarchived(slack_team_identity, payload, alert_group): return - if alert_group.maintenance_uuid is None: - + if alert_group.is_maintenance_incident: + alert_group.stop_maintenance(self.user) + else: + # TODO: refactor that check, it should be in alert core, not in slack. if self.organization.is_resolution_note_required and not alert_group.has_resolution_notes: resolution_note_data = { @@ -623,12 +632,11 @@ class ResolveGroupStep( return alert_group.resolve_by_user(self.user, action_source=ActionSource.SLACK) - else: - alert_group.stop_maintenance(self.user) def process_signal(self, log_record): alert_group = log_record.alert_group - + # Do not rerender alert_groups which happened while maintenance. + # They have no slack messages, since they just attached to the maintenance incident. if not alert_group.happened_while_maintenance: self.alert_group_slack_service.update_alert_group_slack_message(alert_group) diff --git a/engine/apps/slack/tests/test_scenario_steps/test_distribute_alerts.py b/engine/apps/slack/tests/test_scenario_steps/test_distribute_alerts.py index 053b7f74..62ebe71f 100644 --- a/engine/apps/slack/tests/test_scenario_steps/test_distribute_alerts.py +++ b/engine/apps/slack/tests/test_scenario_steps/test_distribute_alerts.py @@ -25,7 +25,7 @@ def test_restricted_action_error( with patch.object(step._slack_client, "api_call") as mock_slack_api_call: mock_slack_api_call.side_effect = SlackAPIException(response={"error": "restricted_action"}) - step.publish_slack_messages(slack_team_identity, alert_group, alert, None, "channel-id", []) + step._post_alert_group_to_slack(slack_team_identity, alert_group, alert, None, "channel-id", []) alert_group.refresh_from_db() alert.refresh_from_db() diff --git a/engine/apps/user_management/models/organization.py b/engine/apps/user_management/models/organization.py index ba11ed26..1a165abe 100644 --- a/engine/apps/user_management/models/organization.py +++ b/engine/apps/user_management/models/organization.py @@ -263,6 +263,8 @@ class Organization(MaintainableObject): return self.org_title def notify_about_maintenance_action(self, text, send_to_general_log_channel=True): + # TODO: this method should be refactored. + # It's binded to slack and sending maintenance notification only there. if send_to_general_log_channel: post_message_to_channel(self, self.general_log_channel_id, text)