From 1ccd529d270fe13a0c45c3a1e589fb7bee0c4915 Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Thu, 16 Mar 2023 16:36:29 +0100 Subject: [PATCH 01/20] fix resolution note Slack rendering bug (#1561) # Which issue(s) this PR fixes changing the block element type from `plain_text` to `mrkdwn` now allows slack to properly render Slack usernames in the Slack UI. Also, it seems that the `mrkdwn` context block type does not support the `emoji` key, hence getting rid of it. From the Slack [docs](https://api.slack.com/reference/block-kit/composition-objects#text): ![Screenshot 2023-03-16 at 16 19 25](https://user-images.githubusercontent.com/9406895/225663614-b0dbbaf1-4b39-48a4-9064-a3aa43fa4f43.png) ## Before ![Screenshot 2023-03-16 at 16 12 33](https://user-images.githubusercontent.com/9406895/225663322-7e84f7a9-dc6b-4827-b01c-1643bd27b766.png) ## After ![Screenshot 2023-03-16 at 16 12 27](https://user-images.githubusercontent.com/9406895/225663343-eff3fd27-d44b-4c2a-a06d-23e0936fcd37.png) ## Checklist - [ ] Tests updated (N/A) - [ ] Documentation added (N/A) - [x] `CHANGELOG.md` updated --- CHANGELOG.md | 2 ++ engine/apps/slack/scenarios/resolution_note.py | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 110ee554..9fcd25d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Check for duplicated positions in terraform escalation policies create/update +- Fix resolution note rendering in Slack message threads where the Slack username was not + being properly rendered ([1561](https://github.com/grafana/oncall/pull/1561)) ### Added diff --git a/engine/apps/slack/scenarios/resolution_note.py b/engine/apps/slack/scenarios/resolution_note.py index 02bff31b..94c2a8df 100644 --- a/engine/apps/slack/scenarios/resolution_note.py +++ b/engine/apps/slack/scenarios/resolution_note.py @@ -362,9 +362,8 @@ class UpdateResolutionNoteStep(scenario_step.ScenarioStep): "type": "context", "elements": [ { - "type": "plain_text", + "type": "mrkdwn", "text": f"{author_verbal} resolution note from {resolution_note.get_source_display()}.", - "emoji": True, } ], } From 515f62ab56c7b39dd5192c4ffff4da02fc68f7c4 Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Thu, 16 Mar 2023 17:43:49 +0100 Subject: [PATCH 02/20] update wording in some Slack messages which mention 'incident' instead of 'alert group' (#1565) # What this PR does ![image](https://user-images.githubusercontent.com/9406895/225678127-4a0bcf96-742e-4335-9958-36fa0be26b9f.png) ## Checklist - [ ] Tests updated (N/A) - [ ] Documentation added (N/A) - [x] `CHANGELOG.md` updated --- CHANGELOG.md | 13 +++++++++++-- engine/apps/slack/models/slack_message.py | 4 ++-- engine/apps/slack/scenarios/escalation_delivery.py | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fcd25d8..7b197c4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,13 +5,22 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +### Changed + +- Updated wording in some Slack messages to use 'Alert Group' instead of 'Incident' ([1565](https://github.com/grafana/oncall/pull/1565)) + +### Fixed + +- Fix resolution note rendering in Slack message threads where the Slack username was not + being properly rendered ([1561](https://github.com/grafana/oncall/pull/1561)) + ## v1.1.40 (2023-03-16) ### Fixed - Check for duplicated positions in terraform escalation policies create/update -- Fix resolution note rendering in Slack message threads where the Slack username was not - being properly rendered ([1561](https://github.com/grafana/oncall/pull/1561)) ### Added diff --git a/engine/apps/slack/models/slack_message.py b/engine/apps/slack/models/slack_message.py index 047c82ef..2d576378 100644 --- a/engine/apps/slack/models/slack_message.py +++ b/engine/apps/slack/models/slack_message.py @@ -116,7 +116,7 @@ class SlackMessage(models.Model): slack_user_identity = user.slack_user_identity if slack_user_identity is None: - text = "{}\nTried to invite {} to look at incident. Unfortunately {} is not in slack.".format( + text = "{}\nTried to invite {} to look at the alert group. Unfortunately {} is not in slack.".format( alert_group.long_verbose_name, user_verbal, user_verbal ) @@ -131,7 +131,7 @@ class SlackMessage(models.Model): notification_error_code=UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_IN_SLACK_USER_NOT_IN_SLACK, ).save() else: - text = "{}\nInviting {} to look at incident.".format(alert_group.long_verbose_name, user_verbal) + text = "{}\nInviting {} to look at the alert group.".format(alert_group.long_verbose_name, user_verbal) blocks = [ { diff --git a/engine/apps/slack/scenarios/escalation_delivery.py b/engine/apps/slack/scenarios/escalation_delivery.py index d0dc4c3c..43aad105 100644 --- a/engine/apps/slack/scenarios/escalation_delivery.py +++ b/engine/apps/slack/scenarios/escalation_delivery.py @@ -33,4 +33,4 @@ class EscalationDeliveryStep(scenario_step.ScenarioStep): # Don't mention if asked to notify somehow else but drop a note for colleagues user_mention_as = user_verbal notify_by = " by {}".format(UserNotificationPolicy.NotificationChannel(notification_channel).label) - return "Inviting {}{} to look at incident.".format(user_mention_as, notify_by) + return "Inviting {}{} to look at the alert group.".format(user_mention_as, notify_by) From f85f77dbc7fccaa13bdedb991a39b9b90d944566 Mon Sep 17 00:00:00 2001 From: Vadim Stepanov Date: Thu, 16 Mar 2023 19:25:00 +0000 Subject: [PATCH 03/20] PD migrator: allow migrating inbound email integrations (#1566) --- tools/pagerduty-migrator/README.md | 2 +- tools/pagerduty-migrator/migrator/resources/integrations.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/pagerduty-migrator/README.md b/tools/pagerduty-migrator/README.md index b87a8127..90e571ef 100644 --- a/tools/pagerduty-migrator/README.md +++ b/tools/pagerduty-migrator/README.md @@ -11,7 +11,7 @@ Resources that can be migrated using this tool: ## Limitations -- Not all integration types are supported (e.g. inbound email is not supported) +- Not all integration types are supported - Migrated on-call schedules in Grafana OnCall will use ICalendar files from PagerDuty - Delays between migrated notification/escalation rules could be slightly different from original. E.g. if you have a 4-minute delay between rules in PagerDuty, the resulting delay in Grafana OnCall will be 5 minutes diff --git a/tools/pagerduty-migrator/migrator/resources/integrations.py b/tools/pagerduty-migrator/migrator/resources/integrations.py index 405d5380..9230e9b1 100644 --- a/tools/pagerduty-migrator/migrator/resources/integrations.py +++ b/tools/pagerduty-migrator/migrator/resources/integrations.py @@ -20,6 +20,12 @@ def match_integration(integration: dict, oncall_integrations: list[dict]) -> Non def match_integration_type(integration: dict, vendors: list[dict]) -> None: vendors_map = {vendor["id"]: vendor for vendor in vendors} + if integration["type"] == "generic_email_inbound_integration": + # ignore vendor name for generic email inbound integrations + integration["vendor_name"] = None + integration["oncall_type"] = "inbound_email" + return + if integration["vendor"] is None: integration["vendor_name"] = None integration["oncall_type"] = None From 4d655dff60dfa19703ad124c1cdc9bfdd63a9132 Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Fri, 17 Mar 2023 11:14:08 +0100 Subject: [PATCH 04/20] modify check_escalation_finished_task task (#1266) # What this PR does This PR: - modifies the `check_escalation_finished_task` celery task to: - do stricter escalation validation based on the alert group's escalation snapshot (see the `audit_alert_group_escalation` method in `engine/apps/alerts/tasks/check_escalation_finished.py` for the validation logic) - use a read-only database for querying alert-groups if one is configured, otherwise use the "default" one - ping a configurable heartbeat (new env var `ALERT_GROUP_ESCALATION_AUDITOR_CELERY_TASK_HEARTBEAT_URL` added) - increase the task frequency from every 10 to every 13 minutes (this can be configured via an env variable) - adds public documentation on how to configure this auditor task - modifies the local celery startup command to properly take into consideration all celery related env vars (similar to the ones we use in `engine/celery_with_exporter.sh`; this made it easier to enable `celery beat` locally for testing) - removes the following code: - removes references to `AlertGroup.estimate_escalation_finish_time` and marks the model field as deprecated using the [`django-deprecate-fields` library](https://pypi.org/project/django-deprecate-fields/). This field was only used for the previous version of this validation task - `EscalationSnapshotMixin.calculate_eta_for_finish_escalation` was only used to calculate the value for `AlertGroup.estimate_escalation_finish_time` - `calculate_escalation_finish_time` celery task ## Which issue(s) this PR fixes https://github.com/grafana/oncall-private/issues/1558 ## Checklist - [x] Tests updated - [x] Documentation added - [x] `CHANGELOG.md` updated --- CHANGELOG.md | 5 + dev/.env.dev.example | 3 +- docs/sources/open-source/_index.md | 28 +- .../escalation_snapshot_mixin.py | 148 ++-- .../serializers/escalation_snapshot.py | 8 +- .../snapshot_classes/escalation_snapshot.py | 64 +- .../incident_log_builder.py | 2 +- engine/apps/alerts/models/alert_group.py | 14 +- engine/apps/alerts/tasks/__init__.py | 1 - .../calculcate_escalation_finish_time.py | 15 - .../alerts/tasks/check_escalation_finished.py | 146 +++- engine/apps/alerts/tests/conftest.py | 51 ++ .../test_check_escalation_finished_task.py | 340 ++++++++- .../tests/test_escalation_policy_snapshot.py | 4 +- .../alerts/tests/test_escalation_snapshot.py | 94 ++- .../tests/test_escalation_snapshot_mixin.py | 658 ++++++++++++++++++ engine/common/database.py | 6 +- .../management/commands/restart_escalation.py | 8 +- .../management/commands/start_celery.py | 17 +- engine/requirements.txt | 1 + engine/settings/base.py | 10 +- 21 files changed, 1369 insertions(+), 254 deletions(-) delete mode 100644 engine/apps/alerts/tasks/calculcate_escalation_finish_time.py create mode 100644 engine/apps/alerts/tests/test_escalation_snapshot_mixin.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b197c4e..b7ca9858 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- Modified `check_escalation_finished_task` celery task to use read-only databases for its query, if one is defined + + make the validation logic stricter + ping a configurable heartbeat on successful completion of this task ([1266](https://github.com/grafana/oncall/pull/1266)) + ### Changed - Updated wording in some Slack messages to use 'Alert Group' instead of 'Incident' ([1565](https://github.com/grafana/oncall/pull/1565)) diff --git a/dev/.env.dev.example b/dev/.env.dev.example index 9350004d..07258b08 100644 --- a/dev/.env.dev.example +++ b/dev/.env.dev.example @@ -29,10 +29,11 @@ GRAFANA_INCIDENT_STATIC_API_KEY= GRAFANA_API_URL=http://localhost:3000 CELERY_WORKER_QUEUE="default,critical,long,slack,telegram,webhook,retry,celery" -CELERY_WORKER_CONCURRENCY=1 +CELERY_WORKER_CONCURRENCY=3 CELERY_WORKER_MAX_TASKS_PER_CHILD=100 CELERY_WORKER_SHUTDOWN_INTERVAL=65m CELERY_WORKER_BEAT_ENABLED=True +CELERY_WORKER_DEBUG_LOGS=False RABBITMQ_USERNAME=rabbitmq RABBITMQ_PASSWORD=rabbitmq diff --git a/docs/sources/open-source/_index.md b/docs/sources/open-source/_index.md index f13c3213..5d3f981d 100644 --- a/docs/sources/open-source/_index.md +++ b/docs/sources/open-source/_index.md @@ -243,7 +243,7 @@ The limit can be changed using env variables: ## Mobile application set up ->**Note**: This application is currently in beta +> **Note**: This application is currently in beta Grafana OnCall OSS users can use the mobile app to receive push notifications from OnCall. Grafana OnCall OSS relies on Grafana Cloud as on relay for push notifications. @@ -255,3 +255,29 @@ For Grafana OnCall OSS, the mobile app QR code includes an authentication token Your Grafana OnCall OSS instance should be reachable from the same network as your mobile device, preferably from the internet. For more information, see [Grafana OnCall mobile app]({{< relref "../mobile-app" >}}) + +## Alert Group Escalation Auditor + +Grafana OnCall has a periodic background task, which runs to check that all alert group escalations have finished +properly. This feature, if configured, can also ping an OnCall Webhook Integration's heartbeat URL, so that you can be +alerted, in the event that something goes wrong. + +Logs originating from the celery worker, for the `apps.alerts.tasks.check_escalation_finished.check_escalation_finished_task` +task, that reference a `AlertGroupEscalationPolicyExecutionAuditException` exception +indicate that the auditor periodic task is failing check(s) on one or more alert groups. Logs for this task which +mention `.. passed the audit checks` indicate that there were no issues with with the escalation on the audited +alert groups. + +To configure this feature as such: + +1. Create a Webhook, or Formatted Webhook, Integration type. +1. Under the "Heartbeat" tab in the Integration modal, copy the unique heartbeat URL that is shown. +1. Set the hearbeat's expected time interval to 15 minutes (see note below regarding `ALERT_GROUP_ESCALATION_AUDITOR_CELERY_TASK_HEARTBEAT_INTERVAL`) +1. Configure the integration's escalation chain as necessary +1. Populate the following env variables: + +- `ALERT_GROUP_ESCALATION_AUDITOR_CELERY_TASK_HEARTBEAT_URL` - integration's unique heartbeat URL +- `ALERT_GROUP_ESCALATION_AUDITOR_CELERY_TASK_HEARTBEAT_INTERVAL` - how often the auditor task should run. By default the + task runs every 13 minutes so we therefore recommend setting the heartbeat's expected time interval to 15 minutes. If you + would like to modify this, we recommend configuring this env variable to 1 or 2 minutes less than the value set for the + integration's heartbeat expected time interval. diff --git a/engine/apps/alerts/escalation_snapshot/escalation_snapshot_mixin.py b/engine/apps/alerts/escalation_snapshot/escalation_snapshot_mixin.py index 1f9072ce..04c1cb8a 100644 --- a/engine/apps/alerts/escalation_snapshot/escalation_snapshot_mixin.py +++ b/engine/apps/alerts/escalation_snapshot/escalation_snapshot_mixin.py @@ -1,3 +1,4 @@ +import datetime import logging from typing import Optional @@ -5,19 +6,16 @@ import pytz from celery import uuid as celery_uuid from dateutil.parser import parse from django.apps import apps -from django.utils import timezone from django.utils.functional import cached_property from rest_framework.exceptions import ValidationError -from apps.alerts.constants import NEXT_ESCALATION_DELAY from apps.alerts.escalation_snapshot.snapshot_classes import ( ChannelFilterSnapshot, EscalationChainSnapshot, EscalationPolicySnapshot, EscalationSnapshot, ) -from apps.alerts.escalation_snapshot.utils import eta_for_escalation_step_notify_if_time -from apps.alerts.tasks import calculate_escalation_finish_time, escalate_alert_group +from apps.alerts.tasks import escalate_alert_group logger = logging.getLogger(__name__) @@ -90,8 +88,7 @@ class EscalationSnapshotMixin: 'next_step_eta': '2021-10-18T10:28:28.890369Z } """ - - escalation_snapshot = None + data = {} if self.escalation_chain_exists: channel_filter = self.channel_filter @@ -104,53 +101,7 @@ class EscalationSnapshotMixin: "escalation_policies_snapshots": escalation_policies, "slack_channel_id": self.slack_channel_id, } - escalation_snapshot = EscalationSnapshot.serializer(data).data - return escalation_snapshot - - def calculate_eta_for_finish_escalation(self, escalation_started=False, start_time=None): - if not self.escalation_snapshot: - return - EscalationPolicy = apps.get_model("alerts", "EscalationPolicy") - TOLERANCE_SECONDS = 1 - TOLERANCE_TIME = timezone.timedelta(seconds=NEXT_ESCALATION_DELAY + TOLERANCE_SECONDS) - start_time = start_time or timezone.now() # start time may be different for silenced incidents - wait_summ = timezone.timedelta() - # Get next_active_escalation_policy_order using flag `escalation_started` because this calculation can be - # started in parallel with escalation task where next_active_escalation_policy_order can be changed. - # That's why we are using `escalation_started` flag here, which means, that we want count eta from the first - # step. - next_escalation_policy_order = ( - self.escalation_snapshot.next_active_escalation_policy_order if escalation_started else 0 - ) - escalation_policies = self.escalation_snapshot.escalation_policies_snapshots[next_escalation_policy_order:] - for escalation_policy in escalation_policies: - if escalation_policy.step == EscalationPolicy.STEP_WAIT: - if escalation_policy.wait_delay is not None: - wait_summ += escalation_policy.wait_delay - else: - wait_summ += EscalationPolicy.DEFAULT_WAIT_DELAY # Default wait in case it's not selected yet - elif escalation_policy.step == EscalationPolicy.STEP_NOTIFY_IF_TIME: - if escalation_policy.from_time and escalation_policy.to_time: - estimate_start_time = start_time + wait_summ - STEP_TOLERANCE = timezone.timedelta(minutes=1) - next_step_estimate_start_time = eta_for_escalation_step_notify_if_time( - escalation_policy.from_time, - escalation_policy.to_time, - estimate_start_time + STEP_TOLERANCE, - ) - wait_summ += next_step_estimate_start_time - estimate_start_time - elif escalation_policy.step == EscalationPolicy.STEP_REPEAT_ESCALATION_N_TIMES: - # the part of escalation with repeat step will be passed six times: the first time plus five repeats - wait_summ *= EscalationPolicy.MAX_TIMES_REPEAT + 1 - elif escalation_policy.step == EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: - # In this case we cannot calculate finish time, so we return None - return - elif escalation_policy.step == EscalationPolicy.STEP_FINAL_RESOLVE: - break - wait_summ += TOLERANCE_TIME - - escalation_finish_time = start_time + wait_summ - return escalation_finish_time + return EscalationSnapshot.serializer(data).data @property def channel_filter_with_respect_to_escalation_snapshot(self): @@ -166,38 +117,52 @@ class EscalationSnapshotMixin: @cached_property def channel_filter_snapshot(self) -> Optional[ChannelFilterSnapshot]: - # in some cases we need only channel filter and don't want to serialize whole escalation - channel_filter_snapshot_object = None + """ + in some cases we need only channel filter and don't want to serialize whole escalation + """ escalation_snapshot = self.raw_escalation_snapshot - if escalation_snapshot is not None: - channel_filter_snapshot = ChannelFilterSnapshot.serializer().to_internal_value( - escalation_snapshot["channel_filter_snapshot"] - ) - channel_filter_snapshot_object = ChannelFilterSnapshot(**channel_filter_snapshot) - return channel_filter_snapshot_object + if not escalation_snapshot: + return None + + channel_filter_snapshot = escalation_snapshot["channel_filter_snapshot"] + if not channel_filter_snapshot: + return None + + channel_filter_snapshot = ChannelFilterSnapshot.serializer().to_internal_value(channel_filter_snapshot) + return ChannelFilterSnapshot(**channel_filter_snapshot) @cached_property def escalation_chain_snapshot(self) -> Optional[EscalationChainSnapshot]: - # in some cases we need only escalation chain and don't want to serialize whole escalation + """ + in some cases we need only escalation chain and don't want to serialize whole escalation escalation_chain_snapshot_object = None + """ escalation_snapshot = self.raw_escalation_snapshot - if escalation_snapshot is not None: - escalation_chain_snapshot = EscalationChainSnapshot.serializer().to_internal_value( - escalation_snapshot["escalation_chain_snapshot"] - ) - escalation_chain_snapshot_object = EscalationChainSnapshot(**escalation_chain_snapshot) - return escalation_chain_snapshot_object + if not escalation_snapshot: + return None + + escalation_chain_snapshot = escalation_snapshot["escalation_chain_snapshot"] + if not escalation_chain_snapshot: + return None + + escalation_chain_snapshot = EscalationChainSnapshot.serializer().to_internal_value(escalation_chain_snapshot) + return EscalationChainSnapshot(**escalation_chain_snapshot) @cached_property def escalation_snapshot(self) -> Optional[EscalationSnapshot]: - escalation_snapshot_object = None raw_escalation_snapshot = self.raw_escalation_snapshot - if raw_escalation_snapshot is not None: + if raw_escalation_snapshot: try: - escalation_snapshot_object = self._deserialize_escalation_snapshot(raw_escalation_snapshot) + return self._deserialize_escalation_snapshot(raw_escalation_snapshot) except ValidationError as e: logger.error(f"Error trying to deserialize raw escalation snapshot: {e}") - return escalation_snapshot_object + return None + + @cached_property + def has_escalation_policies_snapshots(self) -> bool: + if not self.raw_escalation_snapshot: + return False + return len(self.raw_escalation_snapshot["escalation_policies_snapshots"]) > 0 def _deserialize_escalation_snapshot(self, raw_escalation_snapshot) -> EscalationSnapshot: """ @@ -225,20 +190,34 @@ class EscalationSnapshotMixin: return escalation_snapshot_object @property - def escalation_chain_exists(self): - return not self.pause_escalation and self.channel_filter and self.channel_filter.escalation_chain + def escalation_chain_exists(self) -> bool: + if self.pause_escalation: + return False + elif not self.channel_filter: + return False + return self.channel_filter.escalation_chain is not None @property - def pause_escalation(self): - # get pause_escalation field directly to avoid serialization overhead - return self.raw_escalation_snapshot is not None and self.raw_escalation_snapshot.get("pause_escalation", False) + def pause_escalation(self) -> bool: + """ + get pause_escalation field directly to avoid serialization overhead + """ + if not self.raw_escalation_snapshot: + return False + return self.raw_escalation_snapshot.get("pause_escalation", False) @property - def next_step_eta(self): - # get next_step_eta field directly to avoid serialization overhead - raw_next_step_eta = ( - self.raw_escalation_snapshot.get("next_step_eta") if self.raw_escalation_snapshot is not None else None - ) + def next_step_eta(self) -> Optional[datetime.datetime]: + """ + get next_step_eta field directly to avoid serialization overhead + """ + if not self.raw_escalation_snapshot: + return None + + raw_next_step_eta = self.raw_escalation_snapshot.get("next_step_eta") + if not raw_next_step_eta: + return None + if raw_next_step_eta: return parse(raw_next_step_eta).replace(tzinfo=pytz.UTC) @@ -272,13 +251,10 @@ class EscalationSnapshotMixin: is_escalation_finished=False, raw_escalation_snapshot=raw_escalation_snapshot, ) - if not self.pause_escalation: - calculate_escalation_finish_time.apply_async((self.pk,), immutable=True) escalate_alert_group.apply_async((self.pk,), countdown=countdown, immutable=True, eta=eta, task_id=task_id) def stop_escalation(self): self.is_escalation_finished = True - self.estimate_escalation_finish_time = None # change active_escalation_id to prevent alert escalation self.active_escalation_id = "intentionally_stopped" - self.save(update_fields=["is_escalation_finished", "estimate_escalation_finish_time", "active_escalation_id"]) + self.save(update_fields=["is_escalation_finished", "active_escalation_id"]) diff --git a/engine/apps/alerts/escalation_snapshot/serializers/escalation_snapshot.py b/engine/apps/alerts/escalation_snapshot/serializers/escalation_snapshot.py index fbe15be0..f2b5fb4d 100644 --- a/engine/apps/alerts/escalation_snapshot/serializers/escalation_snapshot.py +++ b/engine/apps/alerts/escalation_snapshot/serializers/escalation_snapshot.py @@ -8,11 +8,11 @@ from apps.alerts.escalation_snapshot.serializers import ( class EscalationSnapshotSerializer(serializers.Serializer): - channel_filter_snapshot = ChannelFilterSnapshotSerializer() - escalation_chain_snapshot = EscalationChainSnapshotSerializer() + channel_filter_snapshot = ChannelFilterSnapshotSerializer(allow_null=True, default=None) + escalation_chain_snapshot = EscalationChainSnapshotSerializer(allow_null=True, default=None) last_active_escalation_policy_order = serializers.IntegerField(allow_null=True, default=None) - escalation_policies_snapshots = EscalationPolicySnapshotSerializer(many=True) - slack_channel_id = serializers.CharField(allow_null=True) + escalation_policies_snapshots = EscalationPolicySnapshotSerializer(many=True, default=list) + slack_channel_id = serializers.CharField(allow_null=True, default=None) pause_escalation = serializers.BooleanField(allow_null=True, default=False) next_step_eta = serializers.DateTimeField(allow_null=True, default=None) diff --git a/engine/apps/alerts/escalation_snapshot/snapshot_classes/escalation_snapshot.py b/engine/apps/alerts/escalation_snapshot/snapshot_classes/escalation_snapshot.py index d4845c57..1997408d 100644 --- a/engine/apps/alerts/escalation_snapshot/snapshot_classes/escalation_snapshot.py +++ b/engine/apps/alerts/escalation_snapshot/snapshot_classes/escalation_snapshot.py @@ -1,15 +1,23 @@ import logging -from typing import Optional +import typing from celery.utils.log import get_task_logger +from django.utils import timezone from apps.alerts.escalation_snapshot.serializers import EscalationSnapshotSerializer -from apps.alerts.escalation_snapshot.snapshot_classes.escalation_policy_snapshot import EscalationPolicySnapshot from apps.alerts.models.alert_group_log_record import AlertGroupLogRecord logger = get_task_logger(__name__) logger.setLevel(logging.DEBUG) +if typing.TYPE_CHECKING: + from apps.alerts.escalation_snapshot.snapshot_classes import ( + ChannelFilterSnapshot, + EscalationChainSnapshot, + EscalationPolicySnapshot, + ) + from apps.alerts.models import AlertGroup + class EscalationSnapshot: __slots__ = ( @@ -28,34 +36,34 @@ class EscalationSnapshot: def __init__( self, - alert_group, - channel_filter_snapshot, - escalation_chain_snapshot, - last_active_escalation_policy_order, - escalation_policies_snapshots, - slack_channel_id, - pause_escalation, - next_step_eta, + alert_group: "AlertGroup", + channel_filter_snapshot: "ChannelFilterSnapshot", + escalation_chain_snapshot: "EscalationChainSnapshot", + last_active_escalation_policy_order: int, + escalation_policies_snapshots: typing.List["EscalationPolicySnapshot"], + slack_channel_id: str, + pause_escalation: bool, + next_step_eta: typing.Optional[str], ): self.alert_group = alert_group - self.channel_filter_snapshot = channel_filter_snapshot # ChannelFilterSnapshot object - self.escalation_chain_snapshot = escalation_chain_snapshot # EscalationChainSnapshot object + self.channel_filter_snapshot = channel_filter_snapshot + self.escalation_chain_snapshot = escalation_chain_snapshot self.last_active_escalation_policy_order = last_active_escalation_policy_order - self.escalation_policies_snapshots = escalation_policies_snapshots # list of EscalationPolicySnapshot objects + self.escalation_policies_snapshots = escalation_policies_snapshots self.slack_channel_id = slack_channel_id self.pause_escalation = pause_escalation self.next_step_eta = next_step_eta self.stop_escalation = False @property - def last_active_escalation_policy_snapshot(self) -> Optional[EscalationPolicySnapshot]: + def last_active_escalation_policy_snapshot(self) -> typing.Optional["EscalationPolicySnapshot"]: order = self.last_active_escalation_policy_order if order is None: return None return self.escalation_policies_snapshots[order] @property - def next_active_escalation_policy_snapshot(self) -> Optional[EscalationPolicySnapshot]: + def next_active_escalation_policy_snapshot(self) -> typing.Optional["EscalationPolicySnapshot"]: order = self.next_active_escalation_policy_order if len(self.escalation_policies_snapshots) < order + 1: next_link = None @@ -71,6 +79,31 @@ class EscalationSnapshot: next_order = self.last_active_escalation_policy_order + 1 return next_order + @property + def executed_escalation_policy_snapshots(self) -> typing.List["EscalationPolicySnapshot"]: + """ + Returns a list of escalation policy snapshots that have already been executed, according + to the value of last_active_escalation_policy_order + """ + if self.last_active_escalation_policy_order is None: + return [] + elif self.last_active_escalation_policy_order == 0: + return [self.escalation_policies_snapshots[0]] + return self.escalation_policies_snapshots[: self.last_active_escalation_policy_order] + + def next_step_eta_is_valid(self) -> typing.Union[None, bool]: + """ + `next_step_eta` should never be less than the current time (with a 5 minute buffer provided) + as this field should be updated as the escalation policy is executed over time. If it is, this means that + an escalation policy step has been missed, or is substantially delayed + + if `next_step_eta` is `None` then `None` is returned, otherwise a boolean is returned + representing the result of the time comparision + """ + if self.next_step_eta is None: + return None + return self.next_step_eta > (timezone.now() - timezone.timedelta(minutes=5)) + def save_to_alert_group(self) -> None: self.alert_group.raw_escalation_snapshot = self.convert_to_dict() self.alert_group.save(update_fields=["raw_escalation_snapshot"]) @@ -83,7 +116,6 @@ class EscalationSnapshot: Executes actual escalation step and saves result of execution like stop_escalation param and eta, that will be used for start next escalate_alert_group task. Also updates self.last_active_escalation_policy_order if escalation step was executed. - :return: None """ escalation_policy_snapshot = self.next_active_escalation_policy_snapshot if escalation_policy_snapshot is None: diff --git a/engine/apps/alerts/incident_log_builder/incident_log_builder.py b/engine/apps/alerts/incident_log_builder/incident_log_builder.py index f3f55d58..1b204bae 100644 --- a/engine/apps/alerts/incident_log_builder/incident_log_builder.py +++ b/engine/apps/alerts/incident_log_builder/incident_log_builder.py @@ -134,7 +134,7 @@ class IncidentLogBuilder: # check if escalation snapshot wasn't saved and channel filter was deleted. # We cannot generate escalation plan in this case escalation_snapshot = self.alert_group.escalation_snapshot - if escalation_snapshot is None: + if not self.alert_group.has_escalation_policies_snapshots: return escalation_plan_dict if self.alert_group.silenced_until: diff --git a/engine/apps/alerts/models/alert_group.py b/engine/apps/alerts/models/alert_group.py index bf58d3d3..7d4c4c95 100644 --- a/engine/apps/alerts/models/alert_group.py +++ b/engine/apps/alerts/models/alert_group.py @@ -13,6 +13,7 @@ from django.db import IntegrityError, models, transaction from django.db.models import JSONField, Q, QuerySet from django.utils import timezone from django.utils.functional import cached_property +from django_deprecate_fields import deprecate_field from apps.alerts.escalation_snapshot import EscalationSnapshotMixin from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE @@ -336,7 +337,9 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. maintenance_uuid = models.CharField(max_length=100, unique=True, null=True, default=None) raw_escalation_snapshot = JSONField(null=True, default=None) - estimate_escalation_finish_time = models.DateTimeField(null=True, default=None) + + # THIS FIELD IS DEPRECATED AND SHOULD EVENTUALLY BE REMOVED + estimate_escalation_finish_time = deprecate_field(models.DateTimeField(null=True, default=None)) # This field is used for constraints so we can use get_or_create() in concurrent calls # https://docs.djangoproject.com/en/3.2/ref/models/querysets/#get-or-create @@ -1464,14 +1467,7 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. def start_unsilence_task(self, countdown): task_id = celery_uuid() self.unsilence_task_uuid = task_id - - # recalculate finish escalation time - escalation_start_time = timezone.now() + timezone.timedelta(seconds=countdown) - self.estimate_escalation_finish_time = self.calculate_eta_for_finish_escalation( - start_time=escalation_start_time - ) - - self.save(update_fields=["unsilence_task_uuid", "estimate_escalation_finish_time"]) + self.save(update_fields=["unsilence_task_uuid"]) unsilence_task.apply_async((self.pk,), task_id=task_id, countdown=countdown) @property diff --git a/engine/apps/alerts/tasks/__init__.py b/engine/apps/alerts/tasks/__init__.py index 0ccc5b70..70146f9c 100644 --- a/engine/apps/alerts/tasks/__init__.py +++ b/engine/apps/alerts/tasks/__init__.py @@ -3,7 +3,6 @@ from .alert_group_web_title_cache import ( # noqa:F401 update_web_title_cache, update_web_title_cache_for_alert_receive_channel, ) -from .calculcate_escalation_finish_time import calculate_escalation_finish_time # noqa from .call_ack_url import call_ack_url # noqa: F401 from .check_escalation_finished import check_escalation_finished_task # noqa: F401 from .create_contact_points_for_datasource import create_contact_points_for_datasource # noqa: F401 diff --git a/engine/apps/alerts/tasks/calculcate_escalation_finish_time.py b/engine/apps/alerts/tasks/calculcate_escalation_finish_time.py deleted file mode 100644 index ef1483c1..00000000 --- a/engine/apps/alerts/tasks/calculcate_escalation_finish_time.py +++ /dev/null @@ -1,15 +0,0 @@ -from django.apps import apps -from django.conf import settings - -from common.custom_celery_tasks import shared_dedicated_queue_retry_task - - -@shared_dedicated_queue_retry_task( - autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None -) -def calculate_escalation_finish_time(alert_group_pk): - AlertGroup = apps.get_model("alerts", "AlertGroup") - alert_group = AlertGroup.all_objects.filter(pk=alert_group_pk)[0] - if alert_group.escalation_snapshot: - alert_group.estimate_escalation_finish_time = alert_group.calculate_eta_for_finish_escalation() - alert_group.save(update_fields=["estimate_escalation_finish_time"]) diff --git a/engine/apps/alerts/tasks/check_escalation_finished.py b/engine/apps/alerts/tasks/check_escalation_finished.py index 64f44d3d..c382aa5f 100644 --- a/engine/apps/alerts/tasks/check_escalation_finished.py +++ b/engine/apps/alerts/tasks/check_escalation_finished.py @@ -1,48 +1,148 @@ +import datetime +import typing + +import requests +from celery import shared_task from django.apps import apps from django.conf import settings from django.db.models import Q from django.utils import timezone from apps.alerts.tasks.task_logger import task_logger -from common.custom_celery_tasks import shared_dedicated_queue_retry_task +from common.database import get_random_readonly_database_key_if_present_otherwise_default + +if typing.TYPE_CHECKING: + from apps.alerts.models.alert_group import AlertGroup -@shared_dedicated_queue_retry_task( - autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None, default_retry_delay=60 -) +class AlertGroupEscalationPolicyExecutionAuditException(BaseException): + """This exception is raised when an alert group's escalation policy did not execute execute properly for some reason""" + + +def send_alert_group_escalation_auditor_task_heartbeat() -> None: + heartbeat_url = settings.ALERT_GROUP_ESCALATION_AUDITOR_CELERY_TASK_HEARTBEAT_URL + if heartbeat_url: + task_logger.info(f"Sending heartbeat to configured URL: {heartbeat_url}") + requests.get(heartbeat_url).raise_for_status() + task_logger.info(f"Heartbeat successfully sent to {heartbeat_url}") + else: + task_logger.info(f"Skipping sending heartbeat as no heartbeat URL is configured") + + +def audit_alert_group_escalation(alert_group: "AlertGroup") -> None: + escalation_snapshot = alert_group.escalation_snapshot + alert_group_id = alert_group.id + base_msg = f"Alert group {alert_group_id}" + + if not escalation_snapshot: + raise AlertGroupEscalationPolicyExecutionAuditException( + f"{base_msg} does not have an escalation snapshot associated with it, this should never occur" + ) + task_logger.info(f"{base_msg} has an escalation snapshot associated with it, auditing if it executed properly") + + escalation_policies_snapshots = escalation_snapshot.escalation_policies_snapshots + + if not escalation_policies_snapshots: + task_logger.info( + f"{base_msg}'s escalation snapshot has an empty escalation_policies_snapshots, skipping further validation" + ) + return + task_logger.info( + f"{base_msg}'s escalation snapshot has a populated escalation_policies_snapshots, continuing validation" + ) + + if escalation_snapshot.next_step_eta_is_valid() is False: + raise AlertGroupEscalationPolicyExecutionAuditException( + f"{base_msg}'s escalation snapshot does not have a valid next_step_eta: {escalation_snapshot.next_step_eta}" + ) + task_logger.info(f"{base_msg}'s escalation snapshot has a valid next_step_eta: {escalation_snapshot.next_step_eta}") + + executed_escalation_policy_snapshots = escalation_snapshot.executed_escalation_policy_snapshots + num_of_executed_escalation_policy_snapshots = len(executed_escalation_policy_snapshots) + + if num_of_executed_escalation_policy_snapshots == 0: + task_logger.info( + f"{base_msg}'s escalation snapshot does not have any executed escalation policies, skipping further validation" + ) + else: + task_logger.info( + f"{base_msg}'s escalation snapshot has {num_of_executed_escalation_policy_snapshots} executed escalation policies" + ) + + # TODO: consider adding the below checks later on. This is it a bit trickier to properly audit as the + # number of log records can vary if there are any STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW or + # STEP_REPEAT_ESCALATION_N_TIMES escalation policy steps in the escalation chain + # see conversations in the original PR (https://github.com/grafana/oncall/pull/1266) for more context on this + # + # compare number of triggered/failed alert group log records to the number of executed + # escalation policy snapshot steps + # num_of_relevant_log_records = AlertGroupLogRecord.objects.filter( + # alert_group_id=alert_group_id, + # type__in=[AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED, AlertGroupLogRecord.TYPE_ESCALATION_FAILED], + # ).count() + + # if num_of_relevant_log_records < num_of_executed_escalation_policy_snapshots: + # raise AlertGroupEscalationPolicyExecutionAuditException( + # f"{base_msg}'s number of triggered/failed alert group log records ({num_of_relevant_log_records}) is less " + # f"than the number of executed escalation policy snapshot steps ({num_of_executed_escalation_policy_snapshots})" + # ) + + # task_logger.info( + # f"{base_msg}'s number of triggered/failed alert group log records ({num_of_relevant_log_records}) is greater " + # f"than or equal to the number of executed escalation policy snapshot steps ({num_of_executed_escalation_policy_snapshots})" + # ) + + task_logger.info(f"{base_msg} passed the audit checks") + + +def get_auditable_alert_groups_started_at_range() -> typing.Tuple[datetime.datetime, datetime.datetime]: + """ + NOTE: this started_at__range is a bit of a hack.. + we wanted to avoid performing a migration on the alerts_alertgroup table to update + alert groups where raw_escalation_snapshot was None. raw_escalation_snapshot being None is a legitimate case, + where the alert group's integration does not have an escalation chain associated with it. + + However, we wanted a way to be able to differentiate between "actually None" and "there was an error writing to + raw_escalation_snapshot" (as this is performed async by a celery task). + + This field was updated, in the commit that added this comment, to no longer be set to None by default. + As part of this celery task we do a check that this field is in fact not None, so if we were to check older + alert groups, whose integration did not have an escalation chain at the time the alert group was created + we would raise errors + """ + return (datetime.datetime(2023, 3, 25), timezone.now() - timezone.timedelta(days=2)) + + +# don't retry this task as the AlertGroup DB query is rather expensive +@shared_task def check_escalation_finished_task(): - """ - This task periodically checks if there are no alert groups with not finished escalations. - TODO: QA this properly, check if new type of escalations had been added - """ AlertGroup = apps.get_model("alerts", "AlertGroup") AlertReceiveChannel = apps.get_model("alerts", "AlertReceiveChannel") - CHECKING_TOLERANCE = timezone.timedelta(minutes=5) - CHECKING_TIME = timezone.now() - CHECKING_TOLERANCE - - alert_groups = AlertGroup.all_objects.filter( + alert_groups = AlertGroup.all_objects.using(get_random_readonly_database_key_if_present_otherwise_default()).filter( ~Q(channel__integration=AlertReceiveChannel.INTEGRATION_MAINTENANCE), ~Q(silenced=True, silenced_until__isnull=True), # filter silenced forever alert_groups is_escalation_finished=False, resolved=False, acknowledged=False, root_alert_group=None, - estimate_escalation_finish_time__lte=CHECKING_TIME, + started_at__range=get_auditable_alert_groups_started_at_range(), ) if not alert_groups.exists(): - return + task_logger.info("There are no alert groups to audit, everything is good :)") - exception_template = "Escalation for alert_group {} is not finished at expected time {}, now {}" + alert_group_ids_that_failed_audit: typing.List[str] = [] - now = timezone.now() - exception_text = "\n".join( - exception_template.format(alert_group.pk, alert_group.estimate_escalation_finish_time, now) - for alert_group in alert_groups - ) + for alert_group in alert_groups: + try: + audit_alert_group_escalation(alert_group) + except AlertGroupEscalationPolicyExecutionAuditException: + alert_group_ids_that_failed_audit.append(str(alert_group.id)) - ids = alert_groups.values_list("pk", flat=True) - task_logger.debug(ids) + if alert_group_ids_that_failed_audit: + raise AlertGroupEscalationPolicyExecutionAuditException( + f"The following alert group id(s) failed auditing: {', '.join(alert_group_ids_that_failed_audit)}" + ) - raise Exception(exception_text) + send_alert_group_escalation_auditor_task_heartbeat() diff --git a/engine/apps/alerts/tests/conftest.py b/engine/apps/alerts/tests/conftest.py index 2550fdef..6038a091 100644 --- a/engine/apps/alerts/tests/conftest.py +++ b/engine/apps/alerts/tests/conftest.py @@ -1,6 +1,9 @@ +import datetime + import pytest from apps.alerts.incident_appearance.templaters import AlertSlackTemplater +from apps.alerts.models import EscalationPolicy @pytest.fixture() @@ -9,3 +12,51 @@ def mock_alert_renderer_render_for(monkeypatch): return "invalid_render_for" monkeypatch.setattr(AlertSlackTemplater, "_render_for", mock_render_for) + + +@pytest.fixture() +def escalation_snapshot_test_setup( + make_organization_and_user, + make_user_for_organization, + make_alert_receive_channel, + make_channel_filter, + make_escalation_chain, + make_escalation_policy, + make_alert_group, +): + organization, user_1 = make_organization_and_user() + user_2 = make_user_for_organization(organization) + + alert_receive_channel = make_alert_receive_channel(organization) + + escalation_chain = make_escalation_chain(organization) + channel_filter = make_channel_filter( + alert_receive_channel, + escalation_chain=escalation_chain, + notification_backends={"BACKEND": {"channel_id": "abc123"}}, + ) + + notify_to_multiple_users_step = make_escalation_policy( + escalation_chain=channel_filter.escalation_chain, + escalation_policy_step=EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS, + ) + notify_to_multiple_users_step.notify_to_users_queue.set([user_1, user_2]) + wait_step = make_escalation_policy( + escalation_chain=channel_filter.escalation_chain, + escalation_policy_step=EscalationPolicy.STEP_WAIT, + wait_delay=EscalationPolicy.FIFTEEN_MINUTES, + ) + # random time for test + from_time = datetime.time(10, 30) + to_time = datetime.time(18, 45) + notify_if_time_step = make_escalation_policy( + escalation_chain=channel_filter.escalation_chain, + escalation_policy_step=EscalationPolicy.STEP_NOTIFY_IF_TIME, + from_time=from_time, + to_time=to_time, + ) + + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + alert_group.save() + return alert_group, notify_to_multiple_users_step, wait_step, notify_if_time_step diff --git a/engine/apps/alerts/tests/test_check_escalation_finished_task.py b/engine/apps/alerts/tests/test_check_escalation_finished_task.py index 2e2ce436..ad057102 100644 --- a/engine/apps/alerts/tests/test_check_escalation_finished_task.py +++ b/engine/apps/alerts/tests/test_check_escalation_finished_task.py @@ -1,45 +1,329 @@ +from unittest.mock import Mock, PropertyMock, patch + import pytest +import requests +from django.test import override_settings from django.utils import timezone -from apps.alerts.models import AlertReceiveChannel -from apps.alerts.tasks import check_escalation_finished_task +from apps.alerts.models import AlertGroup +from apps.alerts.tasks.check_escalation_finished import ( + AlertGroupEscalationPolicyExecutionAuditException, + audit_alert_group_escalation, + check_escalation_finished_task, + send_alert_group_escalation_auditor_task_heartbeat, +) + +MOCKED_HEARTBEAT_URL = "https://hello.com/lsdjjkf" + + +# def _get_relevant_log_record_type() -> int: +# return random.choice([AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED, AlertGroupLogRecord.TYPE_ESCALATION_FAILED]) + + +def test_send_alert_group_escalation_auditor_task_heartbeat_does_not_call_the_heartbeat_url_if_one_is_not_configured(): + with patch("apps.alerts.tasks.check_escalation_finished.requests") as mock_requests: + send_alert_group_escalation_auditor_task_heartbeat() + mock_requests.get.assert_not_called() + + +@override_settings(ALERT_GROUP_ESCALATION_AUDITOR_CELERY_TASK_HEARTBEAT_URL=MOCKED_HEARTBEAT_URL) +def test_send_alert_group_escalation_auditor_task_heartbeat_calls_the_heartbeat_url_if_one_is_configured(): + with patch("apps.alerts.tasks.check_escalation_finished.requests") as mock_requests: + send_alert_group_escalation_auditor_task_heartbeat() + + mock_requests.get.assert_called_once_with(MOCKED_HEARTBEAT_URL) + mock_requests.get.return_value.raise_for_status.assert_called_once_with() + + +@override_settings(ALERT_GROUP_ESCALATION_AUDITOR_CELERY_TASK_HEARTBEAT_URL=MOCKED_HEARTBEAT_URL) +def test_send_alert_group_escalation_auditor_task_heartbeat_raises_an_exception_if_the_heartbeat_url_request_fails(): + with patch("apps.alerts.tasks.check_escalation_finished.requests") as mock_requests: + mock_response = Mock() + mock_response.status_code = 500 + mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError + + mock_requests.get.return_value = mock_response + + with pytest.raises(requests.exceptions.HTTPError): + send_alert_group_escalation_auditor_task_heartbeat() + + mock_requests.get.assert_called_once_with(MOCKED_HEARTBEAT_URL) + mock_requests.get.return_value.raise_for_status.assert_called_once_with() @pytest.mark.django_db -def test_check_escalation_finished_task( +def test_audit_alert_group_escalation_raises_exception_if_the_alert_group_does_not_have_an_escalation_snapshot( + escalation_snapshot_test_setup, +): + alert_group, _, _, _ = escalation_snapshot_test_setup + alert_group.escalation_snapshot = None + + with pytest.raises(AlertGroupEscalationPolicyExecutionAuditException): + audit_alert_group_escalation(alert_group) + + +@pytest.mark.django_db +def test_audit_alert_group_escalation_skips_further_validation_if_the_escalation_policies_snapshots_is_empty( + escalation_snapshot_test_setup, +): + alert_group, _, _, _ = escalation_snapshot_test_setup + + alert_group.escalation_snapshot.escalation_policies_snapshots = [] + audit_alert_group_escalation(alert_group) + + alert_group.escalation_snapshot.escalation_policies_snapshots = None + audit_alert_group_escalation(alert_group) + + +@pytest.mark.django_db +@pytest.mark.parametrize( + "next_step_eta_is_valid_return_value,raises_exception", + [ + (None, False), + (True, False), + (False, True), + ], +) +def test_audit_alert_group_escalation_next_step_eta_validation( + escalation_snapshot_test_setup, next_step_eta_is_valid_return_value, raises_exception +): + alert_group, _, _, _ = escalation_snapshot_test_setup + + with patch( + "apps.alerts.escalation_snapshot.snapshot_classes.escalation_snapshot.EscalationSnapshot.next_step_eta_is_valid" + ) as mock_next_step_eta_is_valid: + mock_next_step_eta_is_valid.return_value = next_step_eta_is_valid_return_value + + if raises_exception: + with pytest.raises(AlertGroupEscalationPolicyExecutionAuditException): + audit_alert_group_escalation(alert_group) + else: + try: + audit_alert_group_escalation(alert_group) + except AlertGroupEscalationPolicyExecutionAuditException: + pytest.fail() + + mock_next_step_eta_is_valid.assert_called_once_with() + + +@pytest.mark.django_db +def test_audit_alert_group_escalation_no_executed_escalation_policy_snapshots(escalation_snapshot_test_setup): + alert_group, _, _, _ = escalation_snapshot_test_setup + + with patch( + "apps.alerts.escalation_snapshot.snapshot_classes.escalation_snapshot.EscalationSnapshot.executed_escalation_policy_snapshots", + new_callable=PropertyMock, + ) as mock_executed_escalation_policy_snapshots: + mock_executed_escalation_policy_snapshots.return_value = [] + audit_alert_group_escalation(alert_group) + mock_executed_escalation_policy_snapshots.assert_called_once_with() + + +# # see TODO: comment in engine/apps/alerts/tasks/check_escalation_finished.py +# @pytest.mark.django_db +# def test_audit_alert_group_escalation_all_executed_escalation_policy_snapshots_have_triggered_log_records( +# escalation_snapshot_test_setup, +# make_organization_and_user, +# make_alert_group_log_record, +# ): +# _, user = make_organization_and_user() +# alert_group, _, _, _ = escalation_snapshot_test_setup +# escalation_policies_snapshots = alert_group.escalation_snapshot.escalation_policies_snapshots + +# for escalation_policy_snapshot in escalation_policies_snapshots: +# escalation_policy = EscalationPolicy.objects.get(id=escalation_policy_snapshot.id) +# log_record_type = _get_relevant_log_record_type() + +# make_alert_group_log_record(alert_group, log_record_type, user, escalation_policy=escalation_policy) + +# with patch( +# "apps.alerts.escalation_snapshot.snapshot_classes.escalation_snapshot.EscalationSnapshot.executed_escalation_policy_snapshots", +# new_callable=PropertyMock, +# ) as mock_executed_escalation_policy_snapshots: +# mock_executed_escalation_policy_snapshots.return_value = escalation_policies_snapshots +# audit_alert_group_escalation(alert_group) +# mock_executed_escalation_policy_snapshots.assert_called_once_with() + +# see TODO: comment in engine/apps/alerts/tasks/check_escalation_finished.py +# @pytest.mark.django_db +# def test_audit_alert_group_escalation_one_executed_escalation_policy_snapshot_does_not_have_a_triggered_log_record( +# escalation_snapshot_test_setup, +# make_organization_and_user, +# make_alert_group_log_record, +# ): +# _, user = make_organization_and_user() +# alert_group, _, _, _ = escalation_snapshot_test_setup +# escalation_policies_snapshots = alert_group.escalation_snapshot.escalation_policies_snapshots + +# # let's skip creating a relevant alert group log record for the first executed escalation policy +# for idx, escalation_policy_snapshot in enumerate(escalation_policies_snapshots): +# if idx != 0: +# escalation_policy = EscalationPolicy.objects.get(id=escalation_policy_snapshot.id) +# make_alert_group_log_record( +# alert_group, _get_relevant_log_record_type(), user, escalation_policy=escalation_policy +# ) + +# with patch( +# "apps.alerts.escalation_snapshot.snapshot_classes.escalation_snapshot.EscalationSnapshot.executed_escalation_policy_snapshots", +# new_callable=PropertyMock, +# ) as mock_executed_escalation_policy_snapshots: +# mock_executed_escalation_policy_snapshots.return_value = escalation_policies_snapshots + +# with pytest.raises(AlertGroupEscalationPolicyExecutionAuditException): +# audit_alert_group_escalation(alert_group) +# mock_executed_escalation_policy_snapshots.assert_called_once_with() + + +@pytest.mark.django_db +def test_check_escalation_finished_task_queries_doesnt_grab_alert_groups_outside_of_date_range( make_organization_and_user, make_alert_receive_channel, make_alert_group, ): - organization, user = make_organization_and_user() - alert_receive_channel = make_alert_receive_channel( - organization, integration=AlertReceiveChannel.INTEGRATION_GRAFANA - ) - alert_group = make_alert_group(alert_receive_channel) + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) now = timezone.now() + two_days_ago = now - timezone.timedelta(days=2) + two_days_in_future = now + timezone.timedelta(days=2) - # we don't have escalation finish time, seems we cannot calculate it due escalation chain snapshot has uncalculated - # steps or does not exist, no exception is raised - check_escalation_finished_task() + # we can't simply pass started_at to the fixture because started_at is being "auto-set" on the Model + alert_group1 = make_alert_group(alert_receive_channel) + alert_group1.started_at = now - # it's acceptable time for finish escalation, because we have tolerance time 5 min from now, no exception is raised - alert_group.estimate_escalation_finish_time = now - alert_group.save() - check_escalation_finished_task() + alert_group2 = make_alert_group(alert_receive_channel) + alert_group2.started_at = now - timezone.timedelta(days=5) - # it is acceptable time for finish escalation, so no exception is raised - alert_group.estimate_escalation_finish_time = now + timezone.timedelta(minutes=10) - alert_group.save() - check_escalation_finished_task() + alert_group3 = make_alert_group(alert_receive_channel) + alert_group3.started_at = now + timezone.timedelta(days=5) - # escalation is not finished yet and passed more than 5 minutes after estimate time, exception is raised - alert_group.estimate_escalation_finish_time = now - timezone.timedelta(minutes=10) - alert_group.save() - with pytest.raises(Exception): - check_escalation_finished_task() + AlertGroup.all_objects.bulk_update([alert_group1, alert_group2, alert_group3], ["started_at"]) - # escalation is finished and we don't care anymore about its finish time, so no exception is raised - alert_group.is_escalation_finished = True - alert_group.save() - check_escalation_finished_task() + with patch( + "apps.alerts.tasks.check_escalation_finished.get_auditable_alert_groups_started_at_range" + ) as mocked_get_auditable_alert_groups_started_at_range: + with patch( + "apps.alerts.tasks.check_escalation_finished.audit_alert_group_escalation" + ) as mocked_audit_alert_group_escalation: + with patch( + "apps.alerts.tasks.check_escalation_finished.send_alert_group_escalation_auditor_task_heartbeat" + ) as mocked_send_alert_group_escalation_auditor_task_heartbeat: + mocked_get_auditable_alert_groups_started_at_range.return_value = (two_days_ago, two_days_in_future) + + check_escalation_finished_task() + + mocked_audit_alert_group_escalation.assert_called_once_with(alert_group1) + mocked_send_alert_group_escalation_auditor_task_heartbeat.assert_called_once_with() + + +@pytest.mark.django_db +def test_check_escalation_finished_task_calls_audit_alert_group_escalation_for_every_alert_group( + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + + now = timezone.now() + two_days_ago = now - timezone.timedelta(days=2) + two_days_in_future = now + timezone.timedelta(days=2) + + # we can't simply pass started_at to the fixture because started_at is being "auto-set" on the Model + alert_group1 = make_alert_group(alert_receive_channel) + alert_group1.started_at = now + + alert_group2 = make_alert_group(alert_receive_channel) + alert_group2.started_at = now + + alert_group3 = make_alert_group(alert_receive_channel) + alert_group3.started_at = now + + AlertGroup.all_objects.bulk_update([alert_group1, alert_group2, alert_group3], ["started_at"]) + + with patch( + "apps.alerts.tasks.check_escalation_finished.get_auditable_alert_groups_started_at_range" + ) as mocked_get_auditable_alert_groups_started_at_range: + with patch( + "apps.alerts.tasks.check_escalation_finished.audit_alert_group_escalation" + ) as mocked_audit_alert_group_escalation: + with patch( + "apps.alerts.tasks.check_escalation_finished.send_alert_group_escalation_auditor_task_heartbeat" + ) as mocked_send_alert_group_escalation_auditor_task_heartbeat: + mocked_get_auditable_alert_groups_started_at_range.return_value = (two_days_ago, two_days_in_future) + + check_escalation_finished_task() + + mocked_audit_alert_group_escalation.assert_any_call(alert_group1) + mocked_audit_alert_group_escalation.assert_any_call(alert_group2) + mocked_audit_alert_group_escalation.assert_any_call(alert_group3) + mocked_send_alert_group_escalation_auditor_task_heartbeat.assert_called_once_with() + + +@pytest.mark.django_db +def test_check_escalation_finished_task_simply_calls_heartbeat_when_no_alert_groups_found(): + with patch( + "apps.alerts.tasks.check_escalation_finished.audit_alert_group_escalation" + ) as mocked_audit_alert_group_escalation: + with patch( + "apps.alerts.tasks.check_escalation_finished.send_alert_group_escalation_auditor_task_heartbeat" + ) as mocked_send_alert_group_escalation_auditor_task_heartbeat: + check_escalation_finished_task() + mocked_audit_alert_group_escalation.assert_not_called() + mocked_send_alert_group_escalation_auditor_task_heartbeat.assert_called_once_with() + + +@pytest.mark.django_db +def test_check_escalation_finished_task_calls_audit_alert_group_escalation_for_every_alert_group_even_if_one_fails( + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + + now = timezone.now() + two_days_ago = now - timezone.timedelta(days=2) + two_days_in_future = now + timezone.timedelta(days=2) + + # we can't simply pass started_at to the fixture because started_at is being "auto-set" on the Model + alert_group1 = make_alert_group(alert_receive_channel) + alert_group1.started_at = now + + alert_group2 = make_alert_group(alert_receive_channel) + alert_group2.started_at = now + + alert_group3 = make_alert_group(alert_receive_channel) + alert_group3.started_at = now + + AlertGroup.all_objects.bulk_update([alert_group1, alert_group2, alert_group3], ["started_at"]) + + def _mocked_audit_alert_group_escalation(alert_group): + if not alert_group.id == alert_group3.id: + raise AlertGroupEscalationPolicyExecutionAuditException("asdfasdf") + + with patch( + "apps.alerts.tasks.check_escalation_finished.get_auditable_alert_groups_started_at_range" + ) as mocked_get_auditable_alert_groups_started_at_range: + with patch( + "apps.alerts.tasks.check_escalation_finished.audit_alert_group_escalation" + ) as mocked_audit_alert_group_escalation: + with patch( + "apps.alerts.tasks.check_escalation_finished.send_alert_group_escalation_auditor_task_heartbeat" + ) as mocked_send_alert_group_escalation_auditor_task_heartbeat: + mocked_get_auditable_alert_groups_started_at_range.return_value = (two_days_ago, two_days_in_future) + mocked_audit_alert_group_escalation.side_effect = _mocked_audit_alert_group_escalation + + with pytest.raises(AlertGroupEscalationPolicyExecutionAuditException) as exc: + check_escalation_finished_task() + + assert ( + str(exc.value) + == f"The following alert group id(s) failed auditing: {alert_group1.id}, {alert_group2.id}" + ) + + mocked_audit_alert_group_escalation.assert_any_call(alert_group1) + mocked_audit_alert_group_escalation.assert_any_call(alert_group2) + mocked_audit_alert_group_escalation.assert_any_call(alert_group3) + + mocked_send_alert_group_escalation_auditor_task_heartbeat.assert_not_called() diff --git a/engine/apps/alerts/tests/test_escalation_policy_snapshot.py b/engine/apps/alerts/tests/test_escalation_policy_snapshot.py index e84050b2..038a93f8 100644 --- a/engine/apps/alerts/tests/test_escalation_policy_snapshot.py +++ b/engine/apps/alerts/tests/test_escalation_policy_snapshot.py @@ -134,7 +134,7 @@ def test_escalation_step_notify_multiple_users( escalation_step_test_setup, make_escalation_policy, ): - organization, user, _, channel_filter, alert_group, reason = escalation_step_test_setup + _, user, _, channel_filter, alert_group, reason = escalation_step_test_setup notify_users_step = make_escalation_policy( escalation_chain=channel_filter.escalation_chain, @@ -386,7 +386,7 @@ def test_escalation_step_notify_if_num_alerts_in_window( ).exists() assert not mocked_execute_tasks.called - organization, user, _, channel_filter, alert_group, reason = escalation_step_test_setup + _, _, _, channel_filter, alert_group, reason = escalation_step_test_setup make_alert(alert_group=alert_group, raw_request_data={}) diff --git a/engine/apps/alerts/tests/test_escalation_snapshot.py b/engine/apps/alerts/tests/test_escalation_snapshot.py index 98aaad03..8ce55b8f 100644 --- a/engine/apps/alerts/tests/test_escalation_snapshot.py +++ b/engine/apps/alerts/tests/test_escalation_snapshot.py @@ -1,5 +1,3 @@ -import datetime - import pytest from django.utils import timezone @@ -11,54 +9,6 @@ from apps.alerts.escalation_snapshot.snapshot_classes import ( from apps.alerts.models import EscalationPolicy -@pytest.fixture() -def escalation_snapshot_test_setup( - make_organization_and_user, - make_user_for_organization, - make_alert_receive_channel, - make_channel_filter, - make_escalation_chain, - make_escalation_policy, - make_alert_group, -): - organization, user_1 = make_organization_and_user() - user_2 = make_user_for_organization(organization) - - alert_receive_channel = make_alert_receive_channel(organization) - - escalation_chain = make_escalation_chain(organization) - channel_filter = make_channel_filter( - alert_receive_channel, - escalation_chain=escalation_chain, - notification_backends={"BACKEND": {"channel_id": "abc123"}}, - ) - - notify_to_multiple_users_step = make_escalation_policy( - escalation_chain=channel_filter.escalation_chain, - escalation_policy_step=EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS, - ) - notify_to_multiple_users_step.notify_to_users_queue.set([user_1, user_2]) - wait_step = make_escalation_policy( - escalation_chain=channel_filter.escalation_chain, - escalation_policy_step=EscalationPolicy.STEP_WAIT, - wait_delay=EscalationPolicy.FIFTEEN_MINUTES, - ) - # random time for test - from_time = datetime.time(10, 30) - to_time = datetime.time(18, 45) - notify_if_time_step = make_escalation_policy( - escalation_chain=channel_filter.escalation_chain, - escalation_policy_step=EscalationPolicy.STEP_NOTIFY_IF_TIME, - from_time=from_time, - to_time=to_time, - ) - - alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) - alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() - alert_group.save() - return alert_group, notify_to_multiple_users_step, wait_step, notify_if_time_step - - @pytest.mark.django_db def test_raw_escalation_snapshot(escalation_snapshot_test_setup): alert_group, notify_to_multiple_users_step, wait_step, notify_if_time_step = escalation_snapshot_test_setup @@ -142,7 +92,7 @@ def test_raw_escalation_snapshot(escalation_snapshot_test_setup): @pytest.mark.django_db def test_serialized_escalation_snapshot(escalation_snapshot_test_setup): - alert_group, notify_to_multiple_users_step, wait_step, notify_if_time_step = escalation_snapshot_test_setup + alert_group, _, _, _ = escalation_snapshot_test_setup escalation_snapshot = alert_group.escalation_snapshot assert isinstance(escalation_snapshot, EscalationSnapshot) assert escalation_snapshot.channel_filter_snapshot is not None and isinstance( @@ -163,7 +113,7 @@ def test_serialized_escalation_snapshot(escalation_snapshot_test_setup): @pytest.mark.django_db def test_escalation_snapshot_with_deleted_channel_filter(escalation_snapshot_test_setup): - alert_group, notify_to_multiple_users_step, wait_step, notify_if_time_step = escalation_snapshot_test_setup + alert_group, _, _, _ = escalation_snapshot_test_setup alert_group.channel_filter.delete() escalation_snapshot = alert_group.escalation_snapshot @@ -174,7 +124,7 @@ def test_escalation_snapshot_with_deleted_channel_filter(escalation_snapshot_tes @pytest.mark.django_db def test_change_escalation_snapshot(escalation_snapshot_test_setup): - alert_group, notify_to_multiple_users_step, wait_step, notify_if_time_step = escalation_snapshot_test_setup + alert_group, _, _, _ = escalation_snapshot_test_setup new_active_order = 2 now = timezone.now() @@ -194,7 +144,7 @@ def test_change_escalation_snapshot(escalation_snapshot_test_setup): @pytest.mark.django_db def test_next_escalation_policy_snapshot(escalation_snapshot_test_setup): - alert_group, notify_to_multiple_users_step, wait_step, notify_if_time_step = escalation_snapshot_test_setup + alert_group, _, _, _ = escalation_snapshot_test_setup escalation_snapshot = alert_group.escalation_snapshot assert escalation_snapshot.last_active_escalation_policy_order is None @@ -226,3 +176,39 @@ def test_next_escalation_policy_snapshot(escalation_snapshot_test_setup): is escalation_snapshot.escalation_policies_snapshots[-1] ) assert escalation_snapshot.next_active_escalation_policy_snapshot is None + + +@pytest.mark.django_db +@pytest.mark.parametrize( + "next_step_eta,expected", + [ + (None, None), + (timezone.now() - timezone.timedelta(weeks=50), False), + (timezone.now() - timezone.timedelta(minutes=4), True), + (timezone.now() + timezone.timedelta(minutes=4), True), + ], +) +def test_next_step_eta_is_valid(escalation_snapshot_test_setup, next_step_eta, expected) -> None: + alert_group, _, _, _ = escalation_snapshot_test_setup + escalation_snapshot = alert_group.escalation_snapshot + + escalation_snapshot.next_step_eta = next_step_eta + + assert escalation_snapshot.next_step_eta_is_valid() is expected + + +@pytest.mark.django_db +def test_executed_escalation_policy_snapshots(escalation_snapshot_test_setup): + alert_group, _, _, _ = escalation_snapshot_test_setup + escalation_snapshot = alert_group.escalation_snapshot + + escalation_snapshot.last_active_escalation_policy_order = None + assert escalation_snapshot.executed_escalation_policy_snapshots == [] + + escalation_snapshot.last_active_escalation_policy_order = 0 + assert escalation_snapshot.executed_escalation_policy_snapshots == [ + escalation_snapshot.escalation_policies_snapshots[0] + ] + + escalation_snapshot.last_active_escalation_policy_order = len(escalation_snapshot.escalation_policies_snapshots) + assert escalation_snapshot.executed_escalation_policy_snapshots == escalation_snapshot.escalation_policies_snapshots diff --git a/engine/apps/alerts/tests/test_escalation_snapshot_mixin.py b/engine/apps/alerts/tests/test_escalation_snapshot_mixin.py new file mode 100644 index 00000000..4aa984e0 --- /dev/null +++ b/engine/apps/alerts/tests/test_escalation_snapshot_mixin.py @@ -0,0 +1,658 @@ +from unittest.mock import PropertyMock, patch + +import pytest +import pytz +from rest_framework.exceptions import ValidationError + +from apps.alerts.escalation_snapshot.snapshot_classes import EscalationSnapshot +from apps.alerts.models import EscalationPolicy + +MOCK_SLACK_CHANNEL_ID = "asdfljaskdf" +EMPTY_RAW_ESCALATION_SNAPSHOT = { + "channel_filter_snapshot": None, + "escalation_chain_snapshot": None, + "last_active_escalation_policy_order": None, + "escalation_policies_snapshots": [], + "slack_channel_id": None, + "pause_escalation": False, + "next_step_eta": None, +} + + +@patch("apps.alerts.models.alert_group.AlertGroup.slack_channel_id", new_callable=PropertyMock) +@pytest.mark.django_db +def test_build_raw_escalation_snapshot_escalation_chain_exists( + mock_alert_group_slack_channel_id, + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_escalation_chain, + make_escalation_policy, + make_alert_group, +): + mock_alert_group_slack_channel_id.return_value = MOCK_SLACK_CHANNEL_ID + + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + escalation_chain = make_escalation_chain(organization=organization) + channel_filter = make_channel_filter(alert_receive_channel, escalation_chain=escalation_chain) + make_escalation_policy( + escalation_chain=channel_filter.escalation_chain, + escalation_policy_step=EscalationPolicy.STEP_WAIT, + wait_delay=EscalationPolicy.FIFTEEN_MINUTES, + ) + + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + expected_snapshot = EscalationSnapshot.serializer( + { + "channel_filter_snapshot": alert_group.channel_filter, + "escalation_chain_snapshot": alert_group.channel_filter.escalation_chain, + "escalation_policies_snapshots": alert_group.channel_filter.escalation_chain.escalation_policies.all(), + "slack_channel_id": MOCK_SLACK_CHANNEL_ID, + } + ) + + assert alert_group.build_raw_escalation_snapshot() == expected_snapshot.data + + +@patch( + "apps.alerts.escalation_snapshot.escalation_snapshot_mixin.EscalationSnapshotMixin.pause_escalation", + new_callable=PropertyMock, +) +@pytest.mark.django_db +def test_build_raw_escalation_snapshot_escalation_chain_does_not_exist_escalation_paused( + mocked_pause_escalation, + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_escalation_chain, + make_alert_group, +): + mocked_pause_escalation.return_value = True + + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + escalation_chain = make_escalation_chain(organization=organization) + channel_filter = make_channel_filter(alert_receive_channel, escalation_chain=escalation_chain) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + assert alert_group.build_raw_escalation_snapshot() == EMPTY_RAW_ESCALATION_SNAPSHOT + + +@pytest.mark.django_db +def test_build_raw_escalation_snapshot_escalation_chain_does_not_exist_no_channel_filter( + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + alert_group = make_alert_group(alert_receive_channel) + + assert alert_group.build_raw_escalation_snapshot() == EMPTY_RAW_ESCALATION_SNAPSHOT + + +@pytest.mark.django_db +def test_build_raw_escalation_snapshot_escalation_chain_does_not_exist_no_channel_filter_escalation_chain( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + channel_filter = make_channel_filter(alert_receive_channel) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + assert alert_group.build_raw_escalation_snapshot() == EMPTY_RAW_ESCALATION_SNAPSHOT + + +@patch( + "apps.alerts.escalation_snapshot.escalation_snapshot_mixin.EscalationSnapshotMixin.channel_filter_snapshot", + new_callable=PropertyMock, +) +@pytest.mark.django_db +def test_channel_filter_with_respect_to_escalation_snapshot( + mock_channel_filter_snapshot, + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_alert_group, +): + channel_filter_snapshot = "asdfasdfadsfadsf" + mock_channel_filter_snapshot.return_value = channel_filter_snapshot + + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + channel_filter = make_channel_filter(alert_receive_channel) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + assert alert_group.channel_filter_with_respect_to_escalation_snapshot == channel_filter_snapshot + + +@patch( + "apps.alerts.escalation_snapshot.escalation_snapshot_mixin.EscalationSnapshotMixin.channel_filter_snapshot", + new_callable=PropertyMock, +) +@pytest.mark.django_db +def test_channel_filter_with_respect_to_escalation_snapshot_no_channel_filter_snapshot( + mock_channel_filter_snapshot, + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_alert_group, +): + mock_channel_filter_snapshot.return_value = None + + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + channel_filter = make_channel_filter(alert_receive_channel) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + assert alert_group.channel_filter_with_respect_to_escalation_snapshot == channel_filter + + +@patch( + "apps.alerts.escalation_snapshot.escalation_snapshot_mixin.EscalationSnapshotMixin.escalation_chain_snapshot", + new_callable=PropertyMock, +) +@pytest.mark.django_db +def test_escalation_chain_with_respect_to_escalation_snapshot( + mock_escalation_chain_snapshot, + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_alert_group, +): + escalation_chain_snapshot = "asdfasdfadsfadsf" + mock_escalation_chain_snapshot.return_value = escalation_chain_snapshot + + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + channel_filter = make_channel_filter(alert_receive_channel) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + assert alert_group.escalation_chain_with_respect_to_escalation_snapshot == escalation_chain_snapshot + + +@patch( + "apps.alerts.escalation_snapshot.escalation_snapshot_mixin.EscalationSnapshotMixin.escalation_chain_snapshot", + new_callable=PropertyMock, +) +@pytest.mark.django_db +def test_escalation_chain_with_respect_to_escalation_snapshot_no_escalation_chain_snapshot( + mock_escalation_chain_snapshot, + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_escalation_chain, + make_alert_group, +): + mock_escalation_chain_snapshot.return_value = None + + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + escalation_chain = make_escalation_chain(organization=organization) + channel_filter = make_channel_filter(alert_receive_channel, escalation_chain=escalation_chain) + + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + assert alert_group.escalation_chain_with_respect_to_escalation_snapshot == escalation_chain + + alert_group = make_alert_group(alert_receive_channel) + + assert alert_group.channel_filter is None + assert alert_group.escalation_chain_with_respect_to_escalation_snapshot is None + + +@patch( + "apps.alerts.escalation_snapshot.escalation_snapshot_mixin.EscalationSnapshotMixin.escalation_chain_snapshot", + new_callable=PropertyMock, +) +@pytest.mark.django_db +def test_escalation_chain_with_respect_to_escalation_snapshot_no_escalation_chain_snapshot_and_no_channel_filter( + mock_escalation_chain_snapshot, + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + mock_escalation_chain_snapshot.return_value = None + + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + alert_group = make_alert_group(alert_receive_channel) + + assert alert_group.escalation_chain_with_respect_to_escalation_snapshot is None + + +@pytest.mark.django_db +def test_channel_filter_snapshot( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_escalation_chain, + make_escalation_policy, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + escalation_chain = make_escalation_chain(organization=organization) + channel_filter = make_channel_filter(alert_receive_channel, escalation_chain=escalation_chain) + make_escalation_policy( + escalation_chain=channel_filter.escalation_chain, + escalation_policy_step=EscalationPolicy.STEP_WAIT, + wait_delay=EscalationPolicy.FIFTEEN_MINUTES, + ) + + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + + assert alert_group.channel_filter_snapshot.id == channel_filter.id + + +@pytest.mark.django_db +def test_channel_filter_snapshot_no_escalation_chain_exists( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + channel_filter = make_channel_filter(alert_receive_channel) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + + assert alert_group.raw_escalation_snapshot["channel_filter_snapshot"] is None + assert alert_group.channel_filter_snapshot is None + + +@pytest.mark.django_db +def test_channel_filter_snapshot_no_alert_group_raw_escalation_snapshot( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + channel_filter = make_channel_filter(alert_receive_channel) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + assert alert_group.channel_filter_snapshot is None + + +@pytest.mark.django_db +def test_escalation_chain_snapshot( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_escalation_chain, + make_escalation_policy, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + escalation_chain = make_escalation_chain(organization=organization) + channel_filter = make_channel_filter(alert_receive_channel, escalation_chain=escalation_chain) + make_escalation_policy( + escalation_chain=channel_filter.escalation_chain, + escalation_policy_step=EscalationPolicy.STEP_WAIT, + wait_delay=EscalationPolicy.FIFTEEN_MINUTES, + ) + + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + + assert alert_group.escalation_chain_snapshot.id == escalation_chain.id + + +@pytest.mark.django_db +def test_escalation_chain_snapshot_no_escalation_chain_exists( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + channel_filter = make_channel_filter(alert_receive_channel) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + + assert alert_group.raw_escalation_snapshot["escalation_chain_snapshot"] is None + assert alert_group.escalation_chain_snapshot is None + + +@pytest.mark.django_db +def test_escalation_chain_snapshot_no_alert_group_raw_escalation_snapshot( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + channel_filter = make_channel_filter(alert_receive_channel) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + assert alert_group.escalation_chain_snapshot is None + + +@pytest.mark.django_db +def test_escalation_snapshot( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + channel_filter = make_channel_filter(alert_receive_channel) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + + return_value = "asdfasdfasdf" + with patch( + "apps.alerts.escalation_snapshot.escalation_snapshot_mixin.EscalationSnapshotMixin._deserialize_escalation_snapshot", + return_value=return_value, + ): + assert alert_group.escalation_snapshot == return_value + + +@pytest.mark.django_db +def test_escalation_snapshot_validation_error( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + channel_filter = make_channel_filter(alert_receive_channel) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + + with patch( + "apps.alerts.escalation_snapshot.escalation_snapshot_mixin.EscalationSnapshotMixin._deserialize_escalation_snapshot", + side_effect=ValidationError("asdfasdf"), + ): + assert alert_group.escalation_snapshot is None + + +@pytest.mark.django_db +def test_escalation_snapshot_no_alert_group_raw_escalation_snapshot( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + channel_filter = make_channel_filter(alert_receive_channel) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + assert alert_group.escalation_snapshot is None + + +@pytest.mark.django_db +def test_escalation_snapshot_empty_escalation_policies_snapshot( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + channel_filter = make_channel_filter(alert_receive_channel) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + + assert alert_group.raw_escalation_snapshot is not None + assert alert_group.has_escalation_policies_snapshots is False + + +@pytest.mark.django_db +def test_escalation_snapshot_nonempty_escalation_policies_snapshot( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_escalation_chain, + make_escalation_policy, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + escalation_chain = make_escalation_chain(organization=organization) + channel_filter = make_channel_filter(alert_receive_channel, escalation_chain=escalation_chain) + make_escalation_policy( + escalation_chain=channel_filter.escalation_chain, + escalation_policy_step=EscalationPolicy.STEP_WAIT, + wait_delay=EscalationPolicy.FIFTEEN_MINUTES, + ) + + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + + assert alert_group.raw_escalation_snapshot is not None + assert alert_group.has_escalation_policies_snapshots is True + + +@pytest.mark.django_db +def test_has_escalation_policies_snapshots_no_alert_group_raw_escalation_snapshot( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + channel_filter = make_channel_filter(alert_receive_channel) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + assert alert_group.raw_escalation_snapshot is None + assert alert_group.has_escalation_policies_snapshots is False + + +@patch("apps.alerts.models.alert_group.AlertGroup.slack_channel_id", new_callable=PropertyMock) +@pytest.mark.django_db +def test_deserialize_escalation_snapshot( + mock_alert_group_slack_channel_id, + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_escalation_chain, + make_escalation_policy, + make_alert_group, +): + mock_alert_group_slack_channel_id.return_value = MOCK_SLACK_CHANNEL_ID + + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + escalation_chain = make_escalation_chain(organization=organization) + channel_filter = make_channel_filter(alert_receive_channel, escalation_chain=escalation_chain) + escalation_policy = make_escalation_policy( + escalation_chain=channel_filter.escalation_chain, + escalation_policy_step=EscalationPolicy.STEP_WAIT, + wait_delay=EscalationPolicy.FIFTEEN_MINUTES, + ) + + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + + deserialized_escalation_snapshot = alert_group._deserialize_escalation_snapshot(alert_group.raw_escalation_snapshot) + + assert deserialized_escalation_snapshot.alert_group == alert_group + assert deserialized_escalation_snapshot.channel_filter_snapshot.id == channel_filter.id + assert deserialized_escalation_snapshot.escalation_chain_snapshot.id == escalation_chain.id + assert deserialized_escalation_snapshot.last_active_escalation_policy_order is None + assert len(deserialized_escalation_snapshot.escalation_policies_snapshots) == 1 + assert deserialized_escalation_snapshot.escalation_policies_snapshots[0].id == escalation_policy.id + assert deserialized_escalation_snapshot.slack_channel_id == MOCK_SLACK_CHANNEL_ID + assert deserialized_escalation_snapshot.pause_escalation is False + assert deserialized_escalation_snapshot.next_step_eta is None + assert deserialized_escalation_snapshot.stop_escalation is False + + +@pytest.mark.django_db +def test_escalation_chain_exists( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_escalation_chain, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + escalation_chain = make_escalation_chain(organization=organization) + channel_filter = make_channel_filter(alert_receive_channel, escalation_chain=escalation_chain) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + assert alert_group.pause_escalation is False + assert alert_group.escalation_chain_exists is True + + +@patch( + "apps.alerts.escalation_snapshot.escalation_snapshot_mixin.EscalationSnapshotMixin.pause_escalation", + new_callable=PropertyMock, +) +@pytest.mark.django_db +def test_escalation_chain_exists_paused_escalation( + mocked_pause_escalation, + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + mocked_pause_escalation.return_value = True + + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + alert_group = make_alert_group(alert_receive_channel) + + assert alert_group.pause_escalation is True + assert alert_group.escalation_chain_exists is False + + +@pytest.mark.django_db +def test_escalation_chain_exists_no_channel_filter( + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + alert_group = make_alert_group(alert_receive_channel) + + assert alert_group.pause_escalation is False + assert alert_group.channel_filter is None + assert alert_group.escalation_chain_exists is False + + +@pytest.mark.django_db +def test_escalation_chain_exists_no_channel_filter_escalation_chain( + make_organization_and_user, + make_alert_receive_channel, + make_channel_filter, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + channel_filter = make_channel_filter(alert_receive_channel) + alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) + + assert alert_group.pause_escalation is False + assert alert_group.channel_filter == channel_filter + assert alert_group.channel_filter.escalation_chain is None + assert alert_group.escalation_chain_exists is False + + +@pytest.mark.django_db +def test_pause_escalation_no_raw_escalation_snapshot( + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + alert_group = make_alert_group(alert_receive_channel) + + assert alert_group.raw_escalation_snapshot is None + assert alert_group.pause_escalation is False + + +@pytest.mark.django_db +def test_pause_escalation_raw_escalation_snapshot_exists( + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + alert_group = make_alert_group(alert_receive_channel) + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + + assert alert_group.raw_escalation_snapshot is not None + assert alert_group.raw_escalation_snapshot["pause_escalation"] is False + + alert_group.raw_escalation_snapshot["pause_escalation"] = True + + assert alert_group.pause_escalation is True + + +@pytest.mark.django_db +def test_next_step_eta_no_raw_escalation_snapshot( + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + alert_group = make_alert_group(alert_receive_channel) + + assert alert_group.raw_escalation_snapshot is None + assert alert_group.next_step_eta is None + + +@pytest.mark.django_db +def test_next_step_eta_no_next_step_eta( + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + alert_group = make_alert_group(alert_receive_channel) + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + + assert alert_group.raw_escalation_snapshot is not None + assert alert_group.raw_escalation_snapshot["next_step_eta"] is None + assert alert_group.next_step_eta is None + + +@patch("apps.alerts.escalation_snapshot.escalation_snapshot_mixin.parse") +@pytest.mark.django_db +def test_next_step_eta( + mock_dateutil_parser, + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + mocked_raw_date = "mcvnmcvmnvc" + mocked_parsed_date = "asdfasdfaf" + mock_dateutil_parser.return_value.replace.return_value = mocked_parsed_date + + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + alert_group = make_alert_group(alert_receive_channel) + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + alert_group.raw_escalation_snapshot["next_step_eta"] = mocked_raw_date + + assert alert_group.raw_escalation_snapshot is not None + assert alert_group.raw_escalation_snapshot["next_step_eta"] is mocked_raw_date + assert alert_group.next_step_eta == mocked_parsed_date + + mock_dateutil_parser.assert_called_once_with(mocked_raw_date) + mock_dateutil_parser.return_value.replace.assert_called_once_with(tzinfo=pytz.UTC) diff --git a/engine/common/database.py b/engine/common/database.py index dd40a39e..d2ca1929 100644 --- a/engine/common/database.py +++ b/engine/common/database.py @@ -5,11 +5,11 @@ from django.conf import settings def get_random_readonly_database_key_if_present_otherwise_default() -> str: """ - This function returns a string, representing a key in the DATABASES django settings. - If settings.READONLY_DATABASES is set, and non-empty, it randomly chooses one of the read-only databases, + This function returns a string, representing a key in the `DATABASES` django settings. + If `settings.READONLY_DATABASES` is set, and non-empty, it randomly chooses one of the read-only databases, otherwise it falls back to "default". - This is primarily intended to be used for django's QuerySet.using() function + This is primarily intended to be used for django's `QuerySet.using()` function """ using_db = "default" if hasattr(settings, "READONLY_DATABASES") and len(settings.READONLY_DATABASES) > 0: diff --git a/engine/engine/management/commands/restart_escalation.py b/engine/engine/management/commands/restart_escalation.py index 6c4c0823..8ceaf01d 100644 --- a/engine/engine/management/commands/restart_escalation.py +++ b/engine/engine/management/commands/restart_escalation.py @@ -49,9 +49,6 @@ class Command(BaseCommand): alert_group.unsilence_task_uuid = task_id escalation_start_time = max(now, alert_group.silenced_until) - alert_group.estimate_escalation_finish_time = alert_group.calculate_eta_for_finish_escalation( - start_time=escalation_start_time, - ) alert_groups_to_update.append(alert_group) tasks.append( @@ -65,9 +62,6 @@ class Command(BaseCommand): # otherwise start escalate_alert_group task else: if alert_group.escalation_snapshot: - alert_group.estimate_escalation_finish_time = alert_group.calculate_eta_for_finish_escalation( - escalation_started=True, - ) alert_group.active_escalation_id = task_id alert_groups_to_update.append(alert_group) @@ -82,7 +76,7 @@ class Command(BaseCommand): AlertGroup.all_objects.bulk_update( alert_groups_to_update, - ["estimate_escalation_finish_time", "active_escalation_id", "unsilence_task_uuid"], + ["active_escalation_id", "unsilence_task_uuid"], batch_size=5000, ) diff --git a/engine/engine/management/commands/start_celery.py b/engine/engine/management/commands/start_celery.py index e61a9430..23ced0ec 100644 --- a/engine/engine/management/commands/start_celery.py +++ b/engine/engine/management/commands/start_celery.py @@ -1,9 +1,12 @@ +import os import shlex import subprocess from django.core.management.base import BaseCommand from django.utils import autoreload +from common.utils import getenv_boolean + WORKER_ID = 0 @@ -11,8 +14,18 @@ def restart_celery(*args, **kwargs): global WORKER_ID kill_worker_cmd = "celery -A engine control shutdown" subprocess.call(shlex.split(kill_worker_cmd)) - start_worker_cmd = "celery -A engine worker -l info --concurrency=3 -Q celery,retry -n {}".format(WORKER_ID) - subprocess.call(shlex.split(start_worker_cmd)) + + queues = os.environ.get("CELERY_WORKER_QUEUE", "celery,retry") + max_tasks_per_child = os.environ.get("CELERY_WORKER_MAX_TASKS_PER_CHILD", 100) + concurrency = os.environ.get("CELERY_WORKER_CONCURRENCY", 3) + log_level = "debug" if getenv_boolean("CELERY_WORKER_DEBUG_LOGS", False) else "info" + + celery_args = f"-A engine worker -l {log_level} --concurrency={concurrency} -Q {queues} --max-tasks-per-child={max_tasks_per_child} -n {WORKER_ID}" + + if getenv_boolean("CELERY_WORKER_BEAT_ENABLED", False): + celery_args += " --beat" + + subprocess.call(shlex.split(f"celery {celery_args}")) WORKER_ID = 1 + WORKER_ID diff --git a/engine/requirements.txt b/engine/requirements.txt index 5a8a1837..bc2769f4 100644 --- a/engine/requirements.txt +++ b/engine/requirements.txt @@ -51,3 +51,4 @@ pyroscope-io==0.8.1 django-dbconn-retry==0.1.7 django-ipware==4.0.2 django-anymail==8.6 +django-deprecate-fields==0.1.1 diff --git a/engine/settings/base.py b/engine/settings/base.py index dc611872..68c36f2e 100644 --- a/engine/settings/base.py +++ b/engine/settings/base.py @@ -395,6 +395,10 @@ CELERY_MAX_TASKS_PER_CHILD = 1 CELERY_WORKER_SEND_TASK_EVENTS = True CELERY_TASK_SEND_SENT_EVENT = True +ALERT_GROUP_ESCALATION_AUDITOR_CELERY_TASK_HEARTBEAT_URL = os.getenv( + "ALERT_GROUP_ESCALATION_AUDITOR_CELERY_TASK_HEARTBEAT_URL", None +) + CELERY_BEAT_SCHEDULE = { "restore_heartbeat_tasks": { "task": "apps.heartbeat.tasks.restore_heartbeat_tasks", @@ -403,7 +407,11 @@ CELERY_BEAT_SCHEDULE = { }, "check_escalations": { "task": "apps.alerts.tasks.check_escalation_finished.check_escalation_finished_task", - "schedule": 10 * 60, + # the task should be executed a minute or two less than the integration's configured interval + # + # ex. if the integration is configured to expect a heartbeat every 15 minutes then this value should be set + # to something like 13 * 60 (every 13 minutes) + "schedule": getenv_integer("ALERT_GROUP_ESCALATION_AUDITOR_CELERY_TASK_HEARTBEAT_INTERVAL", 13 * 60), "args": (), }, "start_refresh_ical_files": { From 2bfe13159159d4e36b080df4ae50d0d7a8d96c0f Mon Sep 17 00:00:00 2001 From: Vadim Stepanov Date: Fri, 17 Mar 2023 11:32:30 +0000 Subject: [PATCH 05/20] PD migrator: ignore deleted services in event rules (#1569) Update PD migrator to create routes with no escalation chain selected for event rules referencing deleted services --- .../migrator/resources/rulesets.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/pagerduty-migrator/migrator/resources/rulesets.py b/tools/pagerduty-migrator/migrator/resources/rulesets.py index 0296c8b8..2204a8f3 100644 --- a/tools/pagerduty-migrator/migrator/resources/rulesets.py +++ b/tools/pagerduty-migrator/migrator/resources/rulesets.py @@ -22,10 +22,12 @@ def match_ruleset( for r in ruleset["rules"] if not r["disabled"] and r["actions"]["route"] ] - escalation_policy_ids = [ - find_by_id(services, service_id)["escalation_policy"]["id"] - for service_id in service_ids - ] + escalation_policy_ids = [] + for service_id in service_ids: + service = find_by_id(services, service_id) + # Sometimes service cannot be found, e.g. when it is deleted but still referenced in ruleset + if service: + escalation_policy_ids.append(service["escalation_policy"]["id"]) flawed_escalation_policies = [] for escalation_policy_id in escalation_policy_ids: @@ -153,6 +155,10 @@ def _pd_service_id_to_oncall_escalation_chain_id( return None service = find_by_id(services, service_id) + if service is None: + # Service cannot be found, e.g. when it is deleted but still referenced in ruleset + return None + escalation_policy_id = service["escalation_policy"]["id"] escalation_policy = find_by_id(escalation_policies, escalation_policy_id) escalation_chain_id = escalation_policy["oncall_escalation_chain"]["id"] From 6d089558926d6e2790cfc000ddac57d666d11878 Mon Sep 17 00:00:00 2001 From: Vadim Stepanov Date: Fri, 17 Mar 2023 17:03:53 +0000 Subject: [PATCH 06/20] PD migrator: fix route order (#1570) Fix a bug when it's not possible to migrate when there's non-sequential event rule order (due to disabled event rules). --- tools/pagerduty-migrator/migrator/resources/rulesets.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/pagerduty-migrator/migrator/resources/rulesets.py b/tools/pagerduty-migrator/migrator/resources/rulesets.py index 2204a8f3..a486a0ad 100644 --- a/tools/pagerduty-migrator/migrator/resources/rulesets.py +++ b/tools/pagerduty-migrator/migrator/resources/rulesets.py @@ -60,7 +60,7 @@ def migrate_ruleset( # Migrate rules that are not disabled and not catch-all rules = [r for r in ruleset["rules"] if not r["disabled"] and not r["catch_all"]] - for rule in rules: + for rule in sorted(rules, key=lambda r: r["position"]): service_id = ( rule["actions"]["route"]["value"] if rule["actions"]["route"] else None ) @@ -72,7 +72,6 @@ def migrate_ruleset( route_payload = { "routing_type": "jinja2", "routing_regex": filtering_term, - "position": rule["position"], "integration_id": integration["id"], "escalation_chain_id": escalation_chain_id, } From b41ac587139437fc70f9c07d80f0f9c231218269 Mon Sep 17 00:00:00 2001 From: Rares Mardare Date: Mon, 20 Mar 2023 11:35:14 +0200 Subject: [PATCH 07/20] Prevent viewer from opening link to mobile app (#1537) # What this PR does ## Which issue(s) this PR fixes Prevent a user with role = Viewer to access mobile app from the link on User Profile Page --- .../MobileAppConnection.tsx | 52 +++++++++++-------- .../parts/connectors/MobileAppConnector.tsx | 10 ++-- 2 files changed, 38 insertions(+), 24 deletions(-) diff --git a/grafana-plugin/src/containers/MobileAppConnection/MobileAppConnection.tsx b/grafana-plugin/src/containers/MobileAppConnection/MobileAppConnection.tsx index 06f07aaa..2ad82347 100644 --- a/grafana-plugin/src/containers/MobileAppConnection/MobileAppConnection.tsx +++ b/grafana-plugin/src/containers/MobileAppConnection/MobileAppConnection.tsx @@ -40,20 +40,25 @@ const MobileAppConnection = observer(({ userPk }: Props) => { // Show link to cloud page for OSS instances with no cloud connection if (store.hasFeature(AppFeature.CloudConnection) && !cloudStore.cloudConnectionStatus.cloud_connection_status) { return ( - - Please connect Cloud OnCall to use the mobile app - + + Please connect Cloud OnCall to use the mobile app + - - - - - + > + + + + + + ); } @@ -180,14 +185,19 @@ const MobileAppConnection = observer(({ userPk }: Props) => { } return ( -
- - - - - {content} - -
+ +
+ + + + + {content} + +
+
); function getParsedQRCodeValue() { diff --git a/grafana-plugin/src/containers/UserSettings/parts/connectors/MobileAppConnector.tsx b/grafana-plugin/src/containers/UserSettings/parts/connectors/MobileAppConnector.tsx index 9d036526..5fe16b59 100644 --- a/grafana-plugin/src/containers/UserSettings/parts/connectors/MobileAppConnector.tsx +++ b/grafana-plugin/src/containers/UserSettings/parts/connectors/MobileAppConnector.tsx @@ -4,6 +4,8 @@ import { Button, Label } from '@grafana/ui'; import cn from 'classnames/bind'; import { UserSettingsTab } from 'containers/UserSettings/UserSettings.types'; +import { WithPermissionControlTooltip } from 'containers/WithPermissionControl/WithPermissionControlTooltip'; +import { UserActions } from 'utils/authorization'; import styles from './index.module.css'; @@ -24,9 +26,11 @@ const MobileAppConnector = (props: MobileAppConnectorProps) => {
- + + +
); From d8bfc626deac5ab1c85dc9472592cdc07ea68b9b Mon Sep 17 00:00:00 2001 From: Rares Mardare Date: Mon, 20 Mar 2023 11:36:04 +0200 Subject: [PATCH 08/20] Rares/1448 permissions (#1529) # What this PR does Adds a few more permission checks for #1448 ## Which issue(s) this PR fixes #1448 ## Checklist - [x] `CHANGELOG.md` updated --- CHANGELOG.md | 1 + .../AlertTemplatesForm.module.css | 4 -- .../AlertTemplates/AlertTemplatesForm.tsx | 44 +++++++++---------- .../IntegrationSettings/parts/Autoresolve.tsx | 34 +++++++------- .../src/pages/incident/Incident.tsx | 18 ++++---- .../src/pages/schedule/Schedule.tsx | 9 ++-- 6 files changed, 58 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7ca9858..4837b64d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fixed a few permission issues on the UI ([1448](https://github.com/grafana/oncall/pull/1448)) - Fix resolution note rendering in Slack message threads where the Slack username was not being properly rendered ([1561](https://github.com/grafana/oncall/pull/1561)) diff --git a/grafana-plugin/src/components/AlertTemplates/AlertTemplatesForm.module.css b/grafana-plugin/src/components/AlertTemplates/AlertTemplatesForm.module.css index f5da34f8..924951cf 100644 --- a/grafana-plugin/src/components/AlertTemplates/AlertTemplatesForm.module.css +++ b/grafana-plugin/src/components/AlertTemplates/AlertTemplatesForm.module.css @@ -46,10 +46,6 @@ margin-top: 8px; } -.payloadExample { - margin-top: 24px; -} - .autoresolve-condition section { border: 1px solid var(--primary-text-link); } diff --git a/grafana-plugin/src/components/AlertTemplates/AlertTemplatesForm.tsx b/grafana-plugin/src/components/AlertTemplates/AlertTemplatesForm.tsx index 9cef300f..7f6637cf 100644 --- a/grafana-plugin/src/components/AlertTemplates/AlertTemplatesForm.tsx +++ b/grafana-plugin/src/components/AlertTemplates/AlertTemplatesForm.tsx @@ -18,7 +18,7 @@ import { AlertReceiveChannel } from 'models/alert_receive_channel/alert_receive_ import { Alert } from 'models/alertgroup/alertgroup.types'; import { makeRequest } from 'network'; import LocationHelper from 'utils/LocationHelper'; -import { UserActions } from 'utils/authorization'; +import { UserActions, isUserActionAllowed } from 'utils/authorization'; import styles from './AlertTemplatesForm.module.css'; @@ -254,29 +254,29 @@ const AlertTemplatesForm = (props: AlertTemplatesFormProps) => { {templates?.payload_example ? ( - - - - - {groups[activeGroup].map((template) => ( - - ))} - - -
+ + {isUserActionAllowed(UserActions.IntegrationsTest) && ( - - {JSON.stringify(templates?.payload_example, null, 4)} + + + {groups[activeGroup].map((template) => ( + + ))} + -
+ )} + + + {JSON.stringify(templates?.payload_example, null, 4)} +
) : ( sendDemoAlertBlock diff --git a/grafana-plugin/src/containers/IntegrationSettings/parts/Autoresolve.tsx b/grafana-plugin/src/containers/IntegrationSettings/parts/Autoresolve.tsx index 7f17f1eb..057cb0c6 100644 --- a/grafana-plugin/src/containers/IntegrationSettings/parts/Autoresolve.tsx +++ b/grafana-plugin/src/containers/IntegrationSettings/parts/Autoresolve.tsx @@ -128,18 +128,20 @@ const Autoresolve = ({ alertReceiveChannelId, onSwitchToTemplate, alertGroupId } Which team should this integration belong to? - + + +
- + + +
{showSaveConfirmationModal && ( diff --git a/grafana-plugin/src/pages/incident/Incident.tsx b/grafana-plugin/src/pages/incident/Incident.tsx index fe4b46de..c5861777 100644 --- a/grafana-plugin/src/pages/incident/Incident.tsx +++ b/grafana-plugin/src/pages/incident/Incident.tsx @@ -493,14 +493,16 @@ class IncidentPage extends React.Component onChange={(e: any) => this.setState({ resolutionNoteText: e.target.value })} /> - - Add resolution note - + + + Add resolution note + + ); }; diff --git a/grafana-plugin/src/pages/schedule/Schedule.tsx b/grafana-plugin/src/pages/schedule/Schedule.tsx index e34deae8..450452ea 100644 --- a/grafana-plugin/src/pages/schedule/Schedule.tsx +++ b/grafana-plugin/src/pages/schedule/Schedule.tsx @@ -22,6 +22,7 @@ import ScheduleOverrides from 'containers/Rotations/ScheduleOverrides'; import ScheduleForm from 'containers/ScheduleForm/ScheduleForm'; import ScheduleICalSettings from 'containers/ScheduleIcalLink/ScheduleIcalLink'; import UsersTimezones from 'containers/UsersTimezones/UsersTimezones'; +import { WithPermissionControlTooltip } from 'containers/WithPermissionControl/WithPermissionControlTooltip'; import { Schedule, ScheduleType, Shift } from 'models/schedule/schedule.types'; import { Timezone } from 'models/timezone/timezone.types'; import { PageProps, WithStoreProps } from 'state/types'; @@ -183,9 +184,11 @@ class SchedulePage extends React.Component {(schedule?.type === ScheduleType.Ical || schedule?.type === ScheduleType.Calendar) && ( - + + + )} Date: Mon, 20 Mar 2023 13:15:39 +0100 Subject: [PATCH 09/20] update integration heartbeat description to use 'alert group' instead of 'incident' (#1576) ## Which issue(s) this PR fixes ![Screenshot 2023-03-20 at 12 05 32](https://user-images.githubusercontent.com/9406895/226321853-05450b0a-c6f2-4558-8d84-757c9c40c24b.png) ## Checklist - [ ] Tests updated (N/A) - [ ] Documentation added (N/A) - [x] `CHANGELOG.md` updated --- CHANGELOG.md | 3 ++- .../src/containers/HeartbeatModal/HeartbeatForm.tsx | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4837b64d..37332d74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- Updated wording in some Slack messages to use 'Alert Group' instead of 'Incident' ([1565](https://github.com/grafana/oncall/pull/1565)) +- Updated wording throughout plugin to use 'Alert Group' instead of 'Incident' ([1565](https://github.com/grafana/oncall/pull/1565), + [1576](https://github.com/grafana/oncall/pull/1576)) ### Fixed diff --git a/grafana-plugin/src/containers/HeartbeatModal/HeartbeatForm.tsx b/grafana-plugin/src/containers/HeartbeatModal/HeartbeatForm.tsx index 936afea5..cd48a4ad 100644 --- a/grafana-plugin/src/containers/HeartbeatModal/HeartbeatForm.tsx +++ b/grafana-plugin/src/containers/HeartbeatModal/HeartbeatForm.tsx @@ -85,11 +85,11 @@ const HeartbeatForm = observer(({ alertReceveChannelId, onUpdate }: HeartBeatMod

- A heartbeat acts as a healthcheck for incident monitoring. You can configure OnCall to regularly send alerts to - the heartbeat endpoint. If you don't receive one of these alerts, OnCall will issue an incident. + A heartbeat acts as a healthcheck for alert group monitoring. You can configure OnCall to regularly send alerts + to the heartbeat endpoint. If you don't receive one of these alerts, OnCall will issue an alert group.

- OnCall will issue an incident if no alert is received every + OnCall will issue an alert group if no alert is received every } placeholder="Search escalations..." diff --git a/grafana-plugin/src/pages/escalation-chains/EscalationChains.tsx b/grafana-plugin/src/pages/escalation-chains/EscalationChains.tsx index 729da478..fb9b0cd7 100644 --- a/grafana-plugin/src/pages/escalation-chains/EscalationChains.tsx +++ b/grafana-plugin/src/pages/escalation-chains/EscalationChains.tsx @@ -175,7 +175,7 @@ class EscalationChainsPage extends React.Component )} -

+
{searchResult ? ( Date: Mon, 20 Mar 2023 15:53:12 +0100 Subject: [PATCH 13/20] 319 telegram warning bug (#1424) # What this PR does When you disable Telegram feature we did not handle it in our frontend. Now if the fieture is disabled we do not shoe Telegram at Chat-ops, at Users. Also some crarification was added to the instructions that discussion group should be unique for each channel ## Which issue(s) this PR fixes https://github.com/grafana/oncall/issues/319 https://github.com/grafana/oncall/issues/938 --- CHANGELOG.md | 1 + .../components/Policy/NotificationPolicy.tsx | 9 +++- .../src/containers/AlertRules/parts/index.tsx | 4 +- .../PersonalNotificationSettings.tsx | 1 + .../TelegramIntegrationButton.tsx | 5 ++- .../containers/UserSettings/UserSettings.tsx | 3 +- .../UserSettings/parts/connectors/index.tsx | 5 ++- .../src/pages/incident/Incident.tsx | 6 +-- .../pages/settings/tabs/ChatOps/ChatOps.tsx | 41 +++++++++++-------- grafana-plugin/src/pages/users/Users.tsx | 9 +++- 10 files changed, 57 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 352afc3b..21b1cc16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Updated wording throughout plugin to use 'Alert Group' instead of 'Incident' ([1565](https://github.com/grafana/oncall/pull/1565), [1576](https://github.com/grafana/oncall/pull/1576)) +- Check for enabled Telegram feature was added to ChatOps and to User pages ([319](https://github.com/grafana/oncall/issues/319)) - Filtering for Editors/Admins was added to rotation form. It is not allowed to assign Viewer to rotation ([1124](https://github.com/grafana/oncall/issues/1124)) - Modified search behaviour on the Escalation Chains page to allow for "partial searching" ([1578](https://github.com/grafana/oncall/pull/1578)) diff --git a/grafana-plugin/src/components/Policy/NotificationPolicy.tsx b/grafana-plugin/src/components/Policy/NotificationPolicy.tsx index bc97f132..28b34aa9 100644 --- a/grafana-plugin/src/components/Policy/NotificationPolicy.tsx +++ b/grafana-plugin/src/components/Policy/NotificationPolicy.tsx @@ -13,6 +13,8 @@ import { NotificationPolicyType, prepareNotificationPolicy } from 'models/notifi import { NotifyBy } from 'models/notify_by'; import { User } from 'models/user/user.types'; import { WaitDelay } from 'models/wait_delay'; +import { RootStore } from 'state'; +import { AppFeature } from 'state/features'; import { UserAction } from 'utils/authorization'; import DragHandle from './DragHandle'; @@ -41,6 +43,7 @@ export interface NotificationPolicyProps { color: string; number: number; userAction: UserAction; + store: RootStore; } export class NotificationPolicy extends React.Component { @@ -149,7 +152,11 @@ export class NotificationPolicy extends React.ComponentTelegram is connected diff --git a/grafana-plugin/src/containers/AlertRules/parts/index.tsx b/grafana-plugin/src/containers/AlertRules/parts/index.tsx index 76d51bcc..5f861b4f 100644 --- a/grafana-plugin/src/containers/AlertRules/parts/index.tsx +++ b/grafana-plugin/src/containers/AlertRules/parts/index.tsx @@ -6,6 +6,7 @@ import Timeline from 'components/Timeline/Timeline'; import SlackConnector from 'containers/AlertRules/parts/connectors/SlackConnector'; import TelegramConnector from 'containers/AlertRules/parts/connectors/TelegramConnector'; import { ChannelFilter } from 'models/channel_filter'; +import { AppFeature } from 'state/features'; import { useStore } from 'state/useStore'; import { getVar } from 'utils/DOM'; @@ -20,7 +21,8 @@ export const ChatOpsConnectors = (props: ChatOpsConnectorsProps) => { const { telegramChannelStore } = store; const isSlackInstalled = Boolean(store.teamStore.currentTeam?.slack_team_identity); - const isTelegramInstalled = Boolean(telegramChannelStore.currentTeamToTelegramChannel?.length > 0); + const isTelegramInstalled = + store.hasFeature(AppFeature.Telegram) && telegramChannelStore.currentTeamToTelegramChannel?.length > 0; if (!isSlackInstalled && !isTelegramInstalled) { return null; diff --git a/grafana-plugin/src/containers/PersonalNotificationSettings/PersonalNotificationSettings.tsx b/grafana-plugin/src/containers/PersonalNotificationSettings/PersonalNotificationSettings.tsx index 498d2410..14514205 100644 --- a/grafana-plugin/src/containers/PersonalNotificationSettings/PersonalNotificationSettings.tsx +++ b/grafana-plugin/src/containers/PersonalNotificationSettings/PersonalNotificationSettings.tsx @@ -150,6 +150,7 @@ const PersonalNotificationSettings = observer((props: PersonalNotificationSettin waitDelays={get(userStore.notificationChoices, 'wait_delay.choices', [])} notifyByOptions={userStore.notifyByOptions} color={getColor(index)} + store={store} /> ))} diff --git a/grafana-plugin/src/containers/TelegramIntegrationButton/TelegramIntegrationButton.tsx b/grafana-plugin/src/containers/TelegramIntegrationButton/TelegramIntegrationButton.tsx index 6c993f99..f0f23c5c 100644 --- a/grafana-plugin/src/containers/TelegramIntegrationButton/TelegramIntegrationButton.tsx +++ b/grafana-plugin/src/containers/TelegramIntegrationButton/TelegramIntegrationButton.tsx @@ -103,11 +103,12 @@ const TelegramModal = (props: TelegramModalProps) => { Sign Messages in settings. - 2. Create a new Discussion group. This group handles alert actions and comments.{' '} + 2. Create a new Discussion group. This group handles alert actions, comments and + must be unique for each OnCall telegram channel.{' '} 3. Connect the discussion group with the channel. In Manage Channel, click{' '} - Discussion to find and add your group.{' '} + Discussion to find and add the freshly created group.{' '} 4. Go to{' '} diff --git a/grafana-plugin/src/containers/UserSettings/UserSettings.tsx b/grafana-plugin/src/containers/UserSettings/UserSettings.tsx index bfdf4184..2ee2d34b 100644 --- a/grafana-plugin/src/containers/UserSettings/UserSettings.tsx +++ b/grafana-plugin/src/containers/UserSettings/UserSettings.tsx @@ -7,6 +7,7 @@ import { useMediaQuery } from 'react-responsive'; import { Tabs, TabsContent } from 'containers/UserSettings/parts'; import { User as UserType } from 'models/user/user.types'; +import { AppFeature } from 'state/features'; import { useStore } from 'state/useStore'; import { isUserActionAllowed, UserActions } from 'utils/authorization'; import { BREAKPOINT_TABS } from 'utils/consts'; @@ -51,7 +52,7 @@ const UserSettings = observer(({ id, onHide, tab = UserSettingsTab.UserInfo }: U const [showNotificationSettingsTab, showSlackConnectionTab, showTelegramConnectionTab, showMobileAppConnectionTab] = [ !isDesktopOrLaptop, isCurrent && teamStore.currentTeam?.slack_team_identity && !storeUser.slack_user_identity, - isCurrent && !storeUser.telegram_configuration, + isCurrent && store.hasFeature(AppFeature.Telegram) && !storeUser.telegram_configuration, isCurrent && isUserActionAllowed(UserActions.UserSettingsWrite), ]; diff --git a/grafana-plugin/src/containers/UserSettings/parts/connectors/index.tsx b/grafana-plugin/src/containers/UserSettings/parts/connectors/index.tsx index c7fcc144..6ab5c0d1 100644 --- a/grafana-plugin/src/containers/UserSettings/parts/connectors/index.tsx +++ b/grafana-plugin/src/containers/UserSettings/parts/connectors/index.tsx @@ -2,6 +2,8 @@ import React, { FC } from 'react'; import { UserSettingsTab } from 'containers/UserSettings/UserSettings.types'; import { User } from 'models/user/user.types'; +import { AppFeature } from 'state/features'; +import { useStore } from 'state/useStore'; import ICalConnector from './ICalConnector'; import MobileAppConnector from './MobileAppConnector'; @@ -15,11 +17,12 @@ interface ConnectorsProps { } export const Connectors: FC = (props) => { + const store = useStore(); return (
- + {store.hasFeature(AppFeature.Telegram) && }
diff --git a/grafana-plugin/src/pages/incident/Incident.tsx b/grafana-plugin/src/pages/incident/Incident.tsx index c5861777..22ed9805 100644 --- a/grafana-plugin/src/pages/incident/Incident.tsx +++ b/grafana-plugin/src/pages/incident/Incident.tsx @@ -297,7 +297,7 @@ class IncidentPage extends React.Component
{getIncidentStatusTag(incident)}
{integration && ( - <> + placement="top" content={ incident.render_for_web.source_link === null - ? `The integration doesn't have direct link to the source.` + ? `The integration template Source Link is empty` : 'Go to source' } > @@ -348,7 +348,7 @@ class IncidentPage extends React.Component - + )}
diff --git a/grafana-plugin/src/pages/settings/tabs/ChatOps/ChatOps.tsx b/grafana-plugin/src/pages/settings/tabs/ChatOps/ChatOps.tsx index fbe4f886..3c91d5f7 100644 --- a/grafana-plugin/src/pages/settings/tabs/ChatOps/ChatOps.tsx +++ b/grafana-plugin/src/pages/settings/tabs/ChatOps/ChatOps.tsx @@ -8,6 +8,8 @@ import { observer } from 'mobx-react'; import VerticalTabsBar, { VerticalTab } from 'components/VerticalTabsBar/VerticalTabsBar'; import SlackSettings from 'pages/settings/tabs/ChatOps/tabs/SlackSettings/SlackSettings'; import TelegramSettings from 'pages/settings/tabs/ChatOps/tabs/TelegramSettings/TelegramSettings'; +import { AppFeature } from 'state/features'; +import { useStore } from 'state/useStore'; import { withMobXProviderContext } from 'state/withStore'; import LocationHelper from 'utils/LocationHelper'; @@ -19,13 +21,13 @@ export enum ChatOpsTab { Slack = 'Slack', Telegram = 'Telegram', } - +interface ChatOpsProps extends AppRootProps {} interface ChatOpsState { activeTab: ChatOpsTab; } @observer -class ChatOpsPage extends React.Component { +class ChatOpsPage extends React.Component { state: ChatOpsState = { activeTab: ChatOpsTab.Slack, }; @@ -70,21 +72,27 @@ interface TabsProps { const Tabs = (props: TabsProps) => { const { activeTab, onTabChange } = props; + const store = useStore(); return ( - - - - Slack - - - - - - Telegram - - + {store.hasFeature(AppFeature.Slack) && ( + + + + Slack + + + )} + + {store.hasFeature(AppFeature.Telegram) && ( + + + + Telegram + + + )} ); }; @@ -95,15 +103,16 @@ interface TabsContentProps { const TabsContent = (props: TabsContentProps) => { const { activeTab } = props; + const store = useStore(); return ( <> - {activeTab === ChatOpsTab.Slack && ( + {store.hasFeature(AppFeature.Slack) && activeTab === ChatOpsTab.Slack && (
)} - {activeTab === ChatOpsTab.Telegram && ( + {store.hasFeature(AppFeature.Telegram) && activeTab === ChatOpsTab.Telegram && (
diff --git a/grafana-plugin/src/pages/users/Users.tsx b/grafana-plugin/src/pages/users/Users.tsx index ec37e0b0..ca831254 100644 --- a/grafana-plugin/src/pages/users/Users.tsx +++ b/grafana-plugin/src/pages/users/Users.tsx @@ -20,6 +20,7 @@ import UsersFilters from 'components/UsersFilters/UsersFilters'; import UserSettings from 'containers/UserSettings/UserSettings'; import { WithPermissionControlTooltip } from 'containers/WithPermissionControl/WithPermissionControlTooltip'; import { User as UserType } from 'models/user/user.types'; +import { AppFeature } from 'state/features'; import { PageProps, WithStoreProps } from 'state/types'; import { withMobXProviderContext } from 'state/withStore'; import LocationHelper from 'utils/LocationHelper'; @@ -280,10 +281,13 @@ class Users extends React.Component { }; renderContacts = (user: UserType) => { + const { store } = this.props; return (
Slack: {user.slack_user_identity?.name || '-'}
-
Telegram: {user.telegram_configuration?.telegram_nick_name || '-'}
+ {store.hasFeature(AppFeature.Telegram) && ( +
Telegram: {user.telegram_configuration?.telegram_nick_name || '-'}
+ )}
); }; @@ -314,6 +318,7 @@ class Users extends React.Component { }; renderNote = (user: UserType) => { + const { store } = this.props; if (user.hidden_fields === true) { return null; } @@ -346,7 +351,7 @@ class Users extends React.Component { if (!user.slack_user_identity) { texts.push('Slack not verified'); } - if (!user.telegram_configuration) { + if (store.hasFeature(AppFeature.Telegram) && !user.telegram_configuration) { texts.push('Telegram not verified'); } From d722c6a8e57e8d5e564b755902a1c4c29a1b3d5c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Mar 2023 17:04:41 +0000 Subject: [PATCH 14/20] Bump webpack from 5.75.0 to 5.76.1 in /grafana-plugin (#1551) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [webpack](https://github.com/webpack/webpack) from 5.75.0 to 5.76.1.
Release notes

Sourced from webpack's releases.

v5.76.1

Fixed

  • Added assert/strict built-in to NodeTargetPlugin

Revert

v5.76.0

Bugfixes

Features

Security

Repo Changes

New Contributors

Full Changelog: https://github.com/webpack/webpack/compare/v5.75.0...v5.76.0

Commits
  • 21be52b Merge pull request #16804 from webpack/chore-patch-release
  • 1cce945 chore(release): 5.76.1
  • e76ad9e Merge pull request #16803 from ryanwilsonperkin/revert-16759-real-content-has...
  • 52b1b0e Revert "Improve performance of hashRegExp lookup"
  • c989143 Merge pull request #16766 from piranna/patch-1
  • 710eaf4 Merge pull request #16789 from dmichon-msft/contenthash-hashsalt
  • 5d64468 Merge pull request #16792 from webpack/update-version
  • 67af5ec chore(release): 5.76.0
  • 97b1718 Merge pull request #16781 from askoufis/loader-context-target-type
  • b84efe6 Merge pull request #16759 from ryanwilsonperkin/real-content-hash-regex-perf
  • Additional commits viewable in compare view
Maintainer changes

This version was pushed to npm by evilebottnawi, a new releaser for webpack since your current version.


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=webpack&package-manager=npm_and_yarn&previous-version=5.75.0&new-version=5.76.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) - `@dependabot use these labels` will set the current labels as the default for future PRs for this repo and language - `@dependabot use these reviewers` will set the current reviewers as the default for future PRs for this repo and language - `@dependabot use these assignees` will set the current assignees as the default for future PRs for this repo and language - `@dependabot use this milestone` will set the current milestone as the default for future PRs for this repo and language You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/grafana/oncall/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Joey Orlando --- grafana-plugin/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/grafana-plugin/yarn.lock b/grafana-plugin/yarn.lock index 43910fc3..838cf5ab 100644 --- a/grafana-plugin/yarn.lock +++ b/grafana-plugin/yarn.lock @@ -13882,9 +13882,9 @@ webpack-sources@^3.2.3: integrity sha512-/DyMEOrDgLKKIG0fmvtz+4dUX/3Ghozwgm6iPp8KRhvn+eQf9+Q7GWxVNMk3+uCPWfdXYC4ExGBckIXdFEfH1w== webpack@^5.72.0: - version "5.75.0" - resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.75.0.tgz#1e440468647b2505860e94c9ff3e44d5b582c152" - integrity sha512-piaIaoVJlqMsPtX/+3KTTO6jfvrSYgauFVdt8cr9LTHKmcq/AMd4mhzsiP7ZF/PGRNPGA8336jldh9l2Kt2ogQ== + version "5.76.1" + resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.76.1.tgz#7773de017e988bccb0f13c7d75ec245f377d295c" + integrity sha512-4+YIK4Abzv8172/SGqObnUjaIHjLEuUasz9EwQj/9xmPPkYJy2Mh03Q/lJfSD3YLzbxy5FeTq5Uw0323Oh6SJQ== dependencies: "@types/eslint-scope" "^3.7.3" "@types/estree" "^0.0.51" From ce5fc2e0fdaefbedf55b34f9e8c7d334b599e493 Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Tue, 21 Mar 2023 09:25:54 +0100 Subject: [PATCH 15/20] add e2e test that we have unique integrations (#1579) related to https://github.com/grafana/oncall-private/pull/1692 --- .../uniqueIntegrationNames.test.ts | 16 ++++++++++++++ .../integration-tests/utils/integrations.ts | 21 +++++++++++++------ grafana-plugin/src/components/Text/Text.tsx | 2 ++ .../CreateAlertReceiveChannelContainer.tsx | 4 +++- 4 files changed, 36 insertions(+), 7 deletions(-) create mode 100644 grafana-plugin/integration-tests/integrations/uniqueIntegrationNames.test.ts diff --git a/grafana-plugin/integration-tests/integrations/uniqueIntegrationNames.test.ts b/grafana-plugin/integration-tests/integrations/uniqueIntegrationNames.test.ts new file mode 100644 index 00000000..06fc9284 --- /dev/null +++ b/grafana-plugin/integration-tests/integrations/uniqueIntegrationNames.test.ts @@ -0,0 +1,16 @@ +import { test, expect } from '@playwright/test'; +import { configureOnCallPlugin } from '../utils/configurePlugin'; +import { openCreateIntegrationModal } from '../utils/integrations'; + +test.beforeEach(async ({ page }) => { + await configureOnCallPlugin(page); +}); + +test('integrations have unique names', async ({ page }) => { + await openCreateIntegrationModal(page); + + const integrationNames = await page.getByTestId('integration-display-name').allInnerTexts(); + const uniqueLowercasedIntegrationNames = new Set(integrationNames.map((elem) => elem.toLowerCase())); + + expect(uniqueLowercasedIntegrationNames.size).toEqual(integrationNames.length); +}); diff --git a/grafana-plugin/integration-tests/utils/integrations.ts b/grafana-plugin/integration-tests/utils/integrations.ts index 664fcd79..8e54eaa1 100644 --- a/grafana-plugin/integration-tests/utils/integrations.ts +++ b/grafana-plugin/integration-tests/utils/integrations.ts @@ -2,19 +2,28 @@ import { Page } from '@playwright/test'; import { clickButton, fillInInput, selectDropdownValue } from './forms'; import { goToOnCallPage } from './navigation'; -export const createIntegrationAndSendDemoAlert = async ( - page: Page, - integrationName: string, - escalationChainName: string -): Promise => { +const CREATE_INTEGRATION_MODAL_TEST_ID_SELECTOR = 'div[data-testid="create-integration-modal"]'; + +export const openCreateIntegrationModal = async (page: Page): Promise => { // go to the integrations page await goToOnCallPage(page, 'integrations'); // open the create integration modal (await page.waitForSelector('text=New integration to receive alerts')).click(); + // wait for it to pop up + await page.waitForSelector(CREATE_INTEGRATION_MODAL_TEST_ID_SELECTOR); +}; + +export const createIntegrationAndSendDemoAlert = async ( + page: Page, + integrationName: string, + escalationChainName: string +): Promise => { + await openCreateIntegrationModal(page); + // create a webhook integration - (await page.waitForSelector('div[data-testid="create-integration-modal"] >> text=Webhook')).click(); + (await page.waitForSelector(`${CREATE_INTEGRATION_MODAL_TEST_ID_SELECTOR} >> text=Webhook`)).click(); // wait for the integrations settings modal to open up... and then close it await clickButton({ page, buttonText: 'Open Escalations Settings' }); diff --git a/grafana-plugin/src/components/Text/Text.tsx b/grafana-plugin/src/components/Text/Text.tsx index a6c87f0f..ba7664f0 100644 --- a/grafana-plugin/src/components/Text/Text.tsx +++ b/grafana-plugin/src/components/Text/Text.tsx @@ -52,6 +52,7 @@ const Text: TextInterface = (props) => { hidden = false, editModalTitle = 'New value', style, + ...rest } = props; const [isEditMode, setIsEditMode] = useState(false); @@ -88,6 +89,7 @@ const Text: TextInterface = (props) => { keyboard, })} style={style} + {...rest} > {hidden ? PLACEHOLDER : children} {editable && ( diff --git a/grafana-plugin/src/containers/CreateAlertReceiveChannelContainer/CreateAlertReceiveChannelContainer.tsx b/grafana-plugin/src/containers/CreateAlertReceiveChannelContainer/CreateAlertReceiveChannelContainer.tsx index e0d3b179..900b51c0 100644 --- a/grafana-plugin/src/containers/CreateAlertReceiveChannelContainer/CreateAlertReceiveChannelContainer.tsx +++ b/grafana-plugin/src/containers/CreateAlertReceiveChannelContainer/CreateAlertReceiveChannelContainer.tsx @@ -79,7 +79,9 @@ const CreateAlertReceiveChannelContainer = observer((props: CreateAlertReceiveCh
- {alertReceiveChannelChoice.display_name} + + {alertReceiveChannelChoice.display_name} + {alertReceiveChannelChoice.short_description} From 3f9dec6a68eaf24345cb2817d888a663b83c3c72 Mon Sep 17 00:00:00 2001 From: Ildar Iskhakov Date: Tue, 21 Mar 2023 16:12:13 +0800 Subject: [PATCH 16/20] Add "make help" command (#1583) # What this PR does Moved part of dev/README.md into `make help` command: ``` make help start start all of the docker containers init build the frontend plugin code then run make start restart restart all docker containers build rebuild images (e.g. when changing requirements.txt) cleanup this will remove all of the images, containers, volumes, and networks lint run both frontend and backend linters test run backend tests start-celery-beat start celery beat purge-queues purge celery queues shell starts an OnCall engine Django shell dbshell opens a DB shell engine-manage run Django's `manage.py` script, inside of a docker container, passing `$CMD` as arguments. exec-engine exec into engine container's bash _backend-debug-enable enable Django's debug mode and Silk profiling (this is disabled by default for performance reasons) _backend-debug-disable disable Django's debug mode and Silk profiling backend-manage-command run Django's `manage.py` script, passing `$CMD` as arguments. ``` ## Which issue(s) this PR fixes ## Checklist - [ ] Tests updated - [ ] Documentation added - [ ] `CHANGELOG.md` updated --- Makefile | 50 +++++++++++++++++++++++++++++++++----------------- dev/README.md | 34 +--------------------------------- 2 files changed, 34 insertions(+), 50 deletions(-) diff --git a/Makefile b/Makefile index 863a9566..55cc3611 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,10 @@ +help: + @sed \ + -e '/^[a-zA-Z0-9_\-]*:.*##/!d' \ + -e 's/:.*##\s*/:/' \ + -e 's/^\(.\+\):\(.*\)/$(shell tput setaf 6)\1$(shell tput sgr0):\2/' \ + $(MAKEFILE_LIST) | column -c2 -t -s : + DOCKER_COMPOSE_FILE = docker-compose-developer.yml DOCKER_COMPOSE_DEV_LABEL = com.grafana.oncall.env=dev @@ -62,7 +69,7 @@ define run_engine_docker_command endef # touch SQLITE_DB_FILE if it does not exist and DB is eqaul to SQLITE_PROFILE -start: +start: ## start all of the docker containers ifeq ($(DB),$(SQLITE_PROFILE)) @if [ ! -f $(SQLITE_DB_FILE) ]; then \ touch $(SQLITE_DB_FILE); \ @@ -71,7 +78,7 @@ endif $(call run_docker_compose_command,up --remove-orphans -d) -init: +init: ## build the frontend plugin code then run make start # if the oncall UI is to be run in docker we should do an initial build of the frontend code # this makes sure that it will be available when the grafana container starts up without the need to # restart the grafana container initially @@ -79,16 +86,17 @@ ifeq ($(findstring $(UI_PROFILE),$(COMPOSE_PROFILES)),$(UI_PROFILE)) cd grafana-plugin && yarn install && yarn build:dev endif -stop: +stop: # stop all of the docker containers $(call run_docker_compose_command,down) -restart: +restart: ## restart all docker containers $(call run_docker_compose_command,restart) -build: +build: ## rebuild images (e.g. when changing requirements.txt) $(call run_docker_compose_command,build) -cleanup: stop +cleanup: stop ## this will remove all of the images, containers, volumes, and networks + ## associated with your local OnCall developer setup docker system prune --filter label="$(DOCKER_COMPOSE_DEV_LABEL)" --all --volumes install-pre-commit: @@ -99,38 +107,43 @@ install-pre-commit: echo "pre-commit already installed"; \ fi -lint: install-pre-commit +lint: install-pre-commit ## run both frontend and backend linters + ## may need to run `yarn install` from within `grafana-plugin` + ## to install several `pre-commit` dependencies + pre-commit run --all-files install-precommit-hook: install-pre-commit pre-commit install -test: +test: ## run backend tests $(call run_engine_docker_command,pytest) -start-celery-beat: +start-celery-beat: ## start celery beat $(call run_engine_docker_command,celery -A engine beat -l info) -purge-queues: +purge-queues: ## purge celery queues $(call run_engine_docker_command,celery -A engine purge -f) -shell: +shell: ## starts an OnCall engine Django shell $(call run_engine_docker_command,python manage.py shell) -dbshell: +dbshell: ## opens a DB shell $(call run_engine_docker_command,python manage.py dbshell) -engine-manage: +engine-manage: ## run Django's `manage.py` script, inside of a docker container, passing `$CMD` as arguments. + ## e.g. `make engine-manage CMD="makemigrations"` + ## https://docs.djangoproject.com/en/4.1/ref/django-admin/#django-admin-makemigrations $(call run_engine_docker_command,python manage.py $(CMD)) -exec-engine: +exec-engine: ## exec into engine container's bash docker exec -it oncall_engine bash -_backend-debug-enable: +_backend-debug-enable: ## enable Django's debug mode and Silk profiling (this is disabled by default for performance reasons) $(shell ./dev/add_env_var.sh DEBUG True $(DEV_ENV_FILE)) $(shell ./dev/add_env_var.sh SILK_PROFILER_ENABLED True $(DEV_ENV_FILE)) -_backend-debug-disable: +_backend-debug-disable: ## disable Django's debug mode and Silk profiling $(shell ./dev/add_env_var.sh DEBUG False $(DEV_ENV_FILE)) $(shell ./dev/add_env_var.sh SILK_PROFILER_ENABLED False $(DEV_ENV_FILE)) @@ -164,5 +177,8 @@ run-backend-celery: backend-command: $(call backend_command,$(CMD)) -backend-manage-command: +backend-manage-command: ## run Django's `manage.py` script, passing `$CMD` as arguments. + ## e.g. `make backend-manage-command CMD="makemigrations"` + ## https://docs.djangoproject.com/en/4.1/ref/django-admin/#django-admin-makemigrations + ## alternatively you can open docker container with engine and run commands from there $(call backend_command,python manage.py $(CMD)) diff --git a/dev/README.md b/dev/README.md index 3848dfbc..ec75a021 100644 --- a/dev/README.md +++ b/dev/README.md @@ -156,39 +156,7 @@ yarn test:integration ## Useful `make` commands See [`COMPOSE_PROFILES`](#compose_profiles) for more information on what this option is and how to configure it. - -```bash -make init # build the frontend plugin code then run make start -make start # start all of the docker containers -make stop # stop all of the docker containers -make restart # restart all docker containers -make build # rebuild images (e.g. when changing requirements.txt) -# run Django's `manage.py` script, inside of a docker container, passing `$CMD` as arguments. -# e.g. `make engine-manage CMD="makemigrations"` - https://docs.djangoproject.com/en/4.1/ref/django-admin/#django-admin-makemigrations -make engine-manage CMD="..." - -make backend-debug-enable # enable Django's debug mode and Silk profiling (this is disabled by default for performance reasons) -make backend-debug-disable # disable Django's debug mode and Silk profiling - -# this will remove all of the images, containers, volumes, and networks -# associated with your local OnCall developer setup -make cleanup - -make start-celery-beat # start celery beat -make purge-queues # purge celery queues -make shell # starts an OnCall engine Django shell -make dbshell # opens a DB shell -make exec-engine # exec into engine container's bash -make test # run backend tests - -# run Django's `manage.py` script, passing `$CMD` as arguments. -# e.g. `make backend-manage-command CMD="makemigrations"` - https://docs.djangoproject.com/en/4.1/ref/django-admin/#django-admin-makemigrations -make backend-manage-command CMD="..." - -# run both frontend and backend linters -# may need to run `yarn install` from within `grafana-plugin` to install several `pre-commit` dependencies -make lint -``` +> 🚶‍This part was moved to `make help` command. Run it to see all the available commands and their descriptions ## Setting environment variables From bfe06ac88845940f8ea26d64b39448795c6dc586 Mon Sep 17 00:00:00 2001 From: Innokentii Konstantinov Date: Tue, 21 Mar 2023 16:15:35 +0800 Subject: [PATCH 17/20] Add SLACK_INTEGRATION_MAINTENANCE env var (#1582) # What this PR does Add SLACK_INTEGRATION_MAINTENANCE env var to be able to disable slack install/uninstall --- engine/apps/api/views/auth.py | 6 ++++ engine/apps/slack/views.py | 33 +++++++++++-------- engine/settings/base.py | 3 ++ .../tabs/SlackSettings/SlackSettings.tsx | 12 ++++--- 4 files changed, 36 insertions(+), 18 deletions(-) diff --git a/engine/apps/api/views/auth.py b/engine/apps/api/views/auth.py index 2282c50e..582918b6 100644 --- a/engine/apps/api/views/auth.py +++ b/engine/apps/api/views/auth.py @@ -1,6 +1,7 @@ import logging from urllib.parse import urljoin +from django.conf import settings from django.contrib.auth import REDIRECT_FIELD_NAME from django.http import HttpResponseRedirect from django.views.decorators.cache import never_cache @@ -25,6 +26,11 @@ def overridden_login_slack_auth(request, backend): # We can't just redirect frontend here because we need to make a API call and pass tokens to this view from JS. # So frontend can't follow our redirect. # So wrapping and returning URL to redirect as a string. + if settings.SLACK_INTEGRATION_MAINTENANCE_ENABLED: + return Response( + "Grafana OnCall is temporary unable to connect your slack account or install OnCall to your slack workspace", + status=400, + ) url_to_redirect_to = do_auth(request.backend, redirect_name=REDIRECT_FIELD_NAME).url return Response(url_to_redirect_to, 200) diff --git a/engine/apps/slack/views.py b/engine/apps/slack/views.py index 3d91d01b..8b016271 100644 --- a/engine/apps/slack/views.py +++ b/engine/apps/slack/views.py @@ -540,20 +540,25 @@ class ResetSlackView(APIView): } def post(self, request): - organization = request.auth.organization - slack_team_identity = organization.slack_team_identity - if slack_team_identity is not None: - clean_slack_integration_leftovers.apply_async((organization.pk,)) - if settings.FEATURE_MULTIREGION_ENABLED: - delete_slack_connector_async.apply_async((slack_team_identity.slack_id,)) - write_chatops_insight_log( - author=request.user, - event_name=ChatOpsEvent.WORKSPACE_DISCONNECTED, - chatops_type=ChatOpsType.SLACK, + if settings.SLACK_INTEGRATION_MAINTENANCE_ENABLED: + response = Response( + "Grafana OnCall is temporary unable to connect your slack account or install OnCall to your slack workspace", + status=400, ) - unpopulate_slack_user_identities(organization.pk, True) - response = Response(status=200) else: - response = Response(status=400) - + organization = request.auth.organization + slack_team_identity = organization.slack_team_identity + if slack_team_identity is not None: + clean_slack_integration_leftovers.apply_async((organization.pk,)) + if settings.FEATURE_MULTIREGION_ENABLED: + delete_slack_connector_async.apply_async((slack_team_identity.slack_id,)) + write_chatops_insight_log( + author=request.user, + event_name=ChatOpsEvent.WORKSPACE_DISCONNECTED, + chatops_type=ChatOpsType.SLACK, + ) + unpopulate_slack_user_identities(organization.pk, True) + response = Response(status=200) + else: + response = Response(status=400) return response diff --git a/engine/settings/base.py b/engine/settings/base.py index 68c36f2e..c647fb51 100644 --- a/engine/settings/base.py +++ b/engine/settings/base.py @@ -517,6 +517,9 @@ SLACK_CLIENT_OAUTH_SECRET = os.environ.get("SLACK_CLIENT_OAUTH_SECRET") SLACK_SLASH_COMMAND_NAME = os.environ.get("SLACK_SLASH_COMMAND_NAME", "/oncall") SLACK_DIRECT_PAGING_SLASH_COMMAND = os.environ.get("SLACK_DIRECT_PAGING_SLASH_COMMAND", "/escalate") +# Controls if slack integration can be installed/uninstalled. +SLACK_INTEGRATION_MAINTENANCE_ENABLED = os.environ.get("SLACK_INTEGRATION_MAINTENANCE_ENABLED", False) + SOCIAL_AUTH_SLACK_LOGIN_KEY = SLACK_CLIENT_OAUTH_ID SOCIAL_AUTH_SLACK_LOGIN_SECRET = SLACK_CLIENT_OAUTH_SECRET diff --git a/grafana-plugin/src/pages/settings/tabs/ChatOps/tabs/SlackSettings/SlackSettings.tsx b/grafana-plugin/src/pages/settings/tabs/ChatOps/tabs/SlackSettings/SlackSettings.tsx index 2c597808..3bad354e 100644 --- a/grafana-plugin/src/pages/settings/tabs/ChatOps/tabs/SlackSettings/SlackSettings.tsx +++ b/grafana-plugin/src/pages/settings/tabs/ChatOps/tabs/SlackSettings/SlackSettings.tsx @@ -17,6 +17,7 @@ import { SlackChannel } from 'models/slack_channel/slack_channel.types'; import { AppFeature } from 'state/features'; import { WithStoreProps } from 'state/types'; import { withMobXProviderContext } from 'state/withStore'; +import { showApiError } from 'utils'; import { UserActions } from 'utils/authorization'; import { DOCS_SLACK_SETUP } from 'utils/consts'; @@ -49,7 +50,7 @@ class SlackSettings extends Component { handleOpenSlackInstructions = () => { const { store } = this.props; - store.slackStore.installSlackIntegration(); + store.slackStore.installSlackIntegration().catch(showApiError); }; update = () => { @@ -213,9 +214,12 @@ class SlackSettings extends Component { removeSlackIntegration = () => { const { store } = this.props; - store.slackStore.removeSlackIntegration().then(() => { - store.teamStore.loadCurrentTeam(); - }); + store.slackStore + .removeSlackIntegration() + .then(() => { + store.teamStore.loadCurrentTeam(); + }) + .catch(showApiError); }; getSlackSettingsChangeHandler = (field: string) => { From d40d3a62b8c546ca5f75103f3f969d7c65a4c4e9 Mon Sep 17 00:00:00 2001 From: Matias Bordese Date: Tue, 21 Mar 2023 10:43:37 -0300 Subject: [PATCH 18/20] Keep webhook responses data (#1580) Track all webhook responses data, and allow using this between alertgroup-related webhooks (e.g. use firing webhook response data when templating the acknowledge webhook request data). NOTE: dropping the table is not backwards compatible but the feature is not enabled (and in any case it would drop log entries only used for status display) --- engine/apps/api/serializers/webhook.py | 36 ++--- engine/apps/api/tests/test_webhooks.py | 60 ++++---- engine/apps/api/views/webhooks.py | 2 +- engine/apps/webhooks/listeners.py | 3 +- .../migrations/0002_auto_20230320_1604.py | 38 +++++ engine/apps/webhooks/models/__init__.py | 2 +- engine/apps/webhooks/models/webhook.py | 36 +++-- engine/apps/webhooks/tasks/trigger_webhook.py | 124 +++++++-------- engine/apps/webhooks/tests/factories.py | 9 +- .../webhooks/tests/test_trigger_webhook.py | 141 ++++++++++++++---- engine/apps/webhooks/utils.py | 5 +- engine/conftest.py | 11 +- .../OutgoingWebhook2Form.config.ts | 10 +- .../OutgoingWebhook2Status.tsx | 28 ++-- .../outgoing_webhook_2.types.ts | 17 +-- 15 files changed, 335 insertions(+), 187 deletions(-) create mode 100644 engine/apps/webhooks/migrations/0002_auto_20230320_1604.py diff --git a/engine/apps/api/serializers/webhook.py b/engine/apps/api/serializers/webhook.py index 5af1e5a2..f9160789 100644 --- a/engine/apps/api/serializers/webhook.py +++ b/engine/apps/api/serializers/webhook.py @@ -1,28 +1,26 @@ from collections import defaultdict -from http.client import responses from rest_framework import serializers from rest_framework.validators import UniqueTogetherValidator -from apps.webhooks.models import Webhook, WebhookLog +from apps.webhooks.models import Webhook, WebhookResponse from common.api_helpers.custom_fields import TeamPrimaryKeyRelatedField from common.api_helpers.utils import CurrentOrganizationDefault, CurrentTeamDefault, CurrentUserDefault from common.jinja_templater import apply_jinja_template from common.jinja_templater.apply_jinja_template import JinjaTemplateError, JinjaTemplateWarning -class WebhookLogSerializer(serializers.ModelSerializer): +class WebhookResponseSerializer(serializers.ModelSerializer): class Meta: - model = WebhookLog + model = WebhookResponse fields = [ - "last_run_at", - "input_data", + "timestamp", "url", - "trigger", - "headers", - "data", - "response_status", - "response", + "request_trigger", + "request_headers", + "request_data", + "status_code", + "content", ] @@ -34,7 +32,7 @@ class WebhookSerializer(serializers.ModelSerializer): last_run = serializers.SerializerMethodField() trigger_type = serializers.CharField(required=True) forward_all = serializers.BooleanField(allow_null=True, required=False) - last_status_log = serializers.SerializerMethodField() + last_response_log = serializers.SerializerMethodField() trigger_type_name = serializers.SerializerMethodField() class Meta: @@ -58,7 +56,7 @@ class WebhookSerializer(serializers.ModelSerializer): "trigger_type", "trigger_type_name", "last_run", - "last_status_log", + "last_response_log", ] extra_kwargs = { "authorization_header": {"write_only": True}, @@ -106,16 +104,12 @@ class WebhookSerializer(serializers.ModelSerializer): def get_last_run(self, obj): last_run = "" - last_log = obj.logs.all().last() - if last_log: - last_run = last_log.last_run_at.strftime("%Y-%m-%dT%H:%M:%SZ") - if last_log.response_status: - reason = responses[int(last_log.response_status)] - last_run += " ({} {})".format(last_log.response_status, reason) + if last_log := obj.responses.all().last(): + last_run = last_log.timestamp.strftime("%Y-%m-%dT%H:%M:%SZ") return last_run - def get_last_status_log(self, obj): - return WebhookLogSerializer(obj.logs.all().last()).data + def get_last_response_log(self, obj): + return WebhookResponseSerializer(obj.responses.all().last()).data def get_trigger_type_name(self, obj): trigger_type_name = "" diff --git a/engine/apps/api/tests/test_webhooks.py b/engine/apps/api/tests/test_webhooks.py index 7d163181..e91f1102 100644 --- a/engine/apps/api/tests/test_webhooks.py +++ b/engine/apps/api/tests/test_webhooks.py @@ -48,14 +48,13 @@ def test_get_list_webhooks(webhook_internal_api_setup, make_user_auth_headers): "headers": None, "http_method": "POST", "last_run": "", - "last_status_log": { - "data": "", - "headers": "", - "input_data": None, - "last_run_at": None, - "response": "", - "response_status": "", - "trigger": "", + "last_response_log": { + "request_data": "", + "request_headers": "", + "timestamp": None, + "content": "", + "status_code": None, + "request_trigger": "", "url": "", }, "trigger_template": None, @@ -86,14 +85,13 @@ def test_get_detail_webhook(webhook_internal_api_setup, make_user_auth_headers): "headers": None, "http_method": "POST", "last_run": "", - "last_status_log": { - "data": "", - "headers": "", - "input_data": None, - "last_run_at": None, - "response": "", - "response_status": "", - "trigger": "", + "last_response_log": { + "request_data": "", + "request_headers": "", + "timestamp": None, + "content": "", + "status_code": None, + "request_trigger": "", "url": "", }, "trigger_template": None, @@ -129,14 +127,13 @@ def test_create_webhook(mocked_check_webhooks_2_enabled, webhook_internal_api_se "headers": None, "http_method": "POST", "last_run": "", - "last_status_log": { - "data": "", - "headers": "", - "input_data": None, - "last_run_at": None, - "response": "", - "response_status": "", - "trigger": "", + "last_response_log": { + "request_data": "", + "request_headers": "", + "timestamp": None, + "content": "", + "status_code": None, + "request_trigger": "", "url": "", }, "trigger_template": None, @@ -185,14 +182,13 @@ def test_create_valid_templated_field( "data": None, "http_method": "POST", "last_run": "", - "last_status_log": { - "data": "", - "headers": "", - "input_data": None, - "last_run_at": None, - "response": "", - "response_status": "", - "trigger": "", + "last_response_log": { + "request_data": "", + "request_headers": "", + "timestamp": None, + "content": "", + "status_code": None, + "request_trigger": "", "url": "", }, "trigger_template": None, diff --git a/engine/apps/api/views/webhooks.py b/engine/apps/api/views/webhooks.py index 03dffe5f..f4e21062 100644 --- a/engine/apps/api/views/webhooks.py +++ b/engine/apps/api/views/webhooks.py @@ -32,7 +32,7 @@ class WebhooksView(TeamFilteringMixin, PublicPrimaryKeyMixin, ModelViewSet): queryset = Webhook.objects.filter( organization=self.request.auth.organization, team=self.request.user.current_team, - ).prefetch_related("logs") + ).prefetch_related("responses") return queryset def get_object(self): diff --git a/engine/apps/webhooks/listeners.py b/engine/apps/webhooks/listeners.py index fad28096..31c83637 100644 --- a/engine/apps/webhooks/listeners.py +++ b/engine/apps/webhooks/listeners.py @@ -13,7 +13,8 @@ def on_alert_created(**kwargs): alert_pk = kwargs["alert"] alert = Alert.objects.get(pk=alert_pk) - alert_group_created.apply_async((alert.group_id,)) + if alert.is_the_first_alert_in_group: + alert_group_created.apply_async((alert.group_id,)) def on_action_triggered(**kwargs): diff --git a/engine/apps/webhooks/migrations/0002_auto_20230320_1604.py b/engine/apps/webhooks/migrations/0002_auto_20230320_1604.py new file mode 100644 index 00000000..78a6dba4 --- /dev/null +++ b/engine/apps/webhooks/migrations/0002_auto_20230320_1604.py @@ -0,0 +1,38 @@ +# Generated by Django 3.2.18 on 2023-03-20 16:04 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + +import django_migration_linter as linter + + +class Migration(migrations.Migration): + + dependencies = [ + ('alerts', '0010_channelfilter_filtering_term_type'), + ('webhooks', '0001_initial'), + ] + + operations = [ + linter.IgnoreMigration(), + migrations.CreateModel( + name='WebhookResponse', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('trigger_type', models.IntegerField(choices=[(0, 'Escalation step'), (1, 'Triggered'), (2, 'Acknowledged'), (3, 'Resolved'), (4, 'Silenced'), (5, 'Unsilenced'), (6, 'Unresolved')])), + ('timestamp', models.DateTimeField(default=django.utils.timezone.now)), + ('request_trigger', models.TextField(default=None, null=True)), + ('request_headers', models.TextField(default=None, null=True)), + ('request_data', models.TextField(default=None, null=True)), + ('url', models.TextField(default=None, null=True)), + ('status_code', models.IntegerField(default=None, null=True)), + ('content', models.TextField(default=None, null=True)), + ('alert_group', models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='webhook_responses', to='alerts.alertgroup')), + ('webhook', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='responses', to='webhooks.webhook')), + ], + ), + migrations.DeleteModel( + name='WebhookLog', + ), + ] diff --git a/engine/apps/webhooks/models/__init__.py b/engine/apps/webhooks/models/__init__.py index 8b05fae9..d276a7cc 100644 --- a/engine/apps/webhooks/models/__init__.py +++ b/engine/apps/webhooks/models/__init__.py @@ -1 +1 @@ -from .webhook import Webhook, WebhookLog # noqa: F401 +from .webhook import Webhook, WebhookResponse # noqa: F401 diff --git a/engine/apps/webhooks/models/webhook.py b/engine/apps/webhooks/models/webhook.py index 7ff9c097..ad6b679f 100644 --- a/engine/apps/webhooks/models/webhook.py +++ b/engine/apps/webhooks/models/webhook.py @@ -244,19 +244,27 @@ class Webhook(models.Model): return result -class WebhookLog(models.Model): - last_run_at = models.DateTimeField(blank=True, null=True) - input_data = models.JSONField(default=None) - url = models.TextField(null=True, default=None) - trigger = models.TextField(null=True, default=None) - headers = models.TextField(null=True, default=None) - data = models.TextField(null=True, default=None) - response_status = models.CharField(max_length=100, null=True, default=None) - response = models.TextField(null=True, default=None) - webhook = models.ForeignKey( - to="webhooks.Webhook", +class WebhookResponse(models.Model): + alert_group = models.ForeignKey( + "alerts.AlertGroup", on_delete=models.CASCADE, - related_name="logs", - blank=False, - null=False, + null=True, + related_name="webhook_responses", ) + webhook = models.ForeignKey( + "webhooks.Webhook", + on_delete=models.SET_NULL, + null=True, + related_name="responses", + ) + trigger_type = models.IntegerField(choices=Webhook.TRIGGER_TYPES) + timestamp = models.DateTimeField(default=timezone.now) + request_trigger = models.TextField(null=True, default=None) + request_headers = models.TextField(null=True, default=None) + request_data = models.TextField(null=True, default=None) + url = models.TextField(null=True, default=None) + status_code = models.IntegerField(default=None, null=True) + content = models.TextField(null=True, default=None) + + def json(self): + return json.loads(self.content) diff --git a/engine/apps/webhooks/tasks/trigger_webhook.py b/engine/apps/webhooks/tasks/trigger_webhook.py index 6dffb655..dd9a4bf3 100644 --- a/engine/apps/webhooks/tasks/trigger_webhook.py +++ b/engine/apps/webhooks/tasks/trigger_webhook.py @@ -5,11 +5,10 @@ from json import JSONDecodeError from celery.utils.log import get_task_logger from django.apps import apps from django.conf import settings -from django.utils import timezone from apps.alerts.models import AlertGroup from apps.user_management.models import User -from apps.webhooks.models import Webhook, WebhookLog +from apps.webhooks.models import Webhook, WebhookResponse from apps.webhooks.utils import ( InvalidWebhookData, InvalidWebhookHeaders, @@ -23,6 +22,16 @@ logger = get_task_logger(__name__) logger.setLevel(logging.DEBUG) +TRIGGER_TYPE_TO_LABEL = { + Webhook.TRIGGER_NEW: "firing", + Webhook.TRIGGER_ACKNOWLEDGE: "acknowledge", + Webhook.TRIGGER_RESOLVE: "resolve", + Webhook.TRIGGER_SILENCE: "silence", + Webhook.TRIGGER_UNSILENCE: "unsilence", + Webhook.TRIGGER_UNRESOLVE: "unresolve", +} + + @shared_dedicated_queue_retry_task( autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None ) @@ -38,45 +47,36 @@ def _isoformat_date(date_value): return date_value.isoformat() if date_value else None -def _build_payload(trigger_type, alert_group_id, user_id): +def _build_payload(trigger_type, alert_group, user_id): user = None - try: - alert_group = AlertGroup.unarchived_objects.get(pk=alert_group_id) - if user_id is not None: - user = User.objects.filter(pk=user_id).first() - except AlertGroup.DoesNotExist: - return + if user_id is not None: + user = User.objects.filter(pk=user_id).first() + event = { + "type": TRIGGER_TYPE_TO_LABEL[trigger_type], + } if trigger_type == Webhook.TRIGGER_NEW: - event = { - "type": "Firing", - "time": _isoformat_date(alert_group.started_at), - } + event["time"] = _isoformat_date(alert_group.started_at) elif trigger_type == Webhook.TRIGGER_ACKNOWLEDGE: - event = { - "type": "Acknowledge", - "time": _isoformat_date(alert_group.acknowledged_at), - } + event["time"] = _isoformat_date(alert_group.acknowledged_at) elif trigger_type == Webhook.TRIGGER_RESOLVE: - event = { - "type": "Resolve", - "time": _isoformat_date(alert_group.resolved_at), - } + event["time"] = _isoformat_date(alert_group.resolved_at) elif trigger_type == Webhook.TRIGGER_SILENCE: - event = { - "type": "Silence", - "time": _isoformat_date(alert_group.silenced_at), - "until": _isoformat_date(alert_group.silenced_until), - } - elif trigger_type == Webhook.TRIGGER_UNSILENCE: - event = { - "type": "Unsilence", - } - elif trigger_type == Webhook.TRIGGER_UNRESOLVE: - event = { - "type": "Unresolve", - } - data = serialize_event(event, alert_group, user) + event["time"] = _isoformat_date(alert_group.silenced_at) + event["until"] = _isoformat_date(alert_group.silenced_until) + + # include latest response data per trigger in the event input data + responses_data = {} + responses = alert_group.webhook_responses.all().order_by("timestamp") + for r in responses: + try: + response_data = r.json() + except JSONDecodeError: + response_data = r.content + responses_data[TRIGGER_TYPE_TO_LABEL[r.trigger_type]] = response_data + + data = serialize_event(event, alert_group, user, responses_data) + return data @@ -91,54 +91,60 @@ def execute_webhook(webhook_pk, alert_group_id, user_id): logger.warn(f"Webhook {webhook_pk} does not exist") return - data = _build_payload(webhook.trigger_type, alert_group_id, user_id) + try: + alert_group = AlertGroup.unarchived_objects.get(pk=alert_group_id) + except AlertGroup.DoesNotExist: + return + + data = _build_payload(webhook.trigger_type, alert_group, user_id) status = { - "last_run_at": timezone.now(), - "input_data": data, "url": None, - "trigger": None, - "headers": None, - "data": None, - "response_status": None, - "response": None, + "request_trigger": None, + "request_headers": None, + "request_data": data, + "status_code": None, + "content": None, + "webhook": webhook, } exception = None try: - triggered, status["trigger"] = webhook.check_trigger(data) + triggered, status["request_trigger"] = webhook.check_trigger(data) if triggered: status["url"] = webhook.build_url(data) request_kwargs = webhook.build_request_kwargs(data, raise_data_errors=True) - status["headers"] = json.dumps(request_kwargs.get("headers", {})) - if webhook.forward_all: - status["data"] = "All input_data forwarded as payload" - elif "json" in request_kwargs: - status["data"] = json.dumps(request_kwargs["json"]) + status["request_headers"] = json.dumps(request_kwargs.get("headers", {})) + if "json" in request_kwargs: + status["request_data"] = json.dumps(request_kwargs["json"]) else: - status["data"] = request_kwargs.get("data") + status["request_data"] = request_kwargs.get("data") response = webhook.make_request(status["url"], request_kwargs) - status["response_status"] = response.status_code + status["status_code"] = response.status_code try: - status["response"] = json.dumps(response.json()) + status["content"] = json.dumps(response.json()) except JSONDecodeError: - status["response"] = response.content.decode("utf-8") + status["content"] = response.content.decode("utf-8") else: # do not add a log entry if the webhook is not triggered return except InvalidWebhookUrl as e: status["url"] = e.message except InvalidWebhookTrigger as e: - status["trigger"] = e.message + status["request_trigger"] = e.message except InvalidWebhookHeaders as e: - status["headers"] = e.message + status["request_headers"] = e.message except InvalidWebhookData as e: - status["data"] = e.message + status["request_data"] = e.message except Exception as e: - status["response"] = str(e) + status["content"] = str(e) exception = e - # create/update log entry - WebhookLog.objects.update_or_create(webhook_id=webhook_pk, defaults=status) + # create response entry + WebhookResponse.objects.create( + alert_group=alert_group, + trigger_type=webhook.trigger_type, + **status, + ) if exception: raise exception diff --git a/engine/apps/webhooks/tests/factories.py b/engine/apps/webhooks/tests/factories.py index 7ff6ec40..b456d41a 100644 --- a/engine/apps/webhooks/tests/factories.py +++ b/engine/apps/webhooks/tests/factories.py @@ -1,6 +1,6 @@ import factory -from apps.webhooks.models import Webhook +from apps.webhooks.models import Webhook, WebhookResponse from common.utils import UniqueFaker @@ -11,3 +11,10 @@ class CustomWebhookFactory(factory.DjangoModelFactory): class Meta: model = Webhook + + +class WebhookResponseFactory(factory.DjangoModelFactory): + timestamp = factory.Faker("date_time") + + class Meta: + model = WebhookResponse diff --git a/engine/apps/webhooks/tests/test_trigger_webhook.py b/engine/apps/webhooks/tests/test_trigger_webhook.py index ba41d11d..4a8408ab 100644 --- a/engine/apps/webhooks/tests/test_trigger_webhook.py +++ b/engine/apps/webhooks/tests/test_trigger_webhook.py @@ -100,12 +100,43 @@ def test_execute_webhook_ok( ) assert mock_requests.post.call_args == expected_call # check logs - log = webhook.logs.all()[0] - assert log.response_status == "200" - assert log.response == json.dumps(mock_response.json()) + log = webhook.responses.all()[0] + assert log.status_code == 200 + assert log.content == json.dumps(mock_response.json()) + assert log.request_data == json.dumps({"value": alert_group.public_primary_key}) + assert log.request_headers == json.dumps({"some-header": alert_group.public_primary_key}) + assert log.url == "https://something/{}/".format(alert_group.public_primary_key) + + +@pytest.mark.django_db +def test_execute_webhook_ok_forward_all( + make_organization, make_user_for_organization, make_alert_receive_channel, make_alert_group, make_custom_webhook +): + organization = make_organization() + user = make_user_for_organization(organization) + alert_receive_channel = make_alert_receive_channel(organization) + alert_group = make_alert_group( + alert_receive_channel, acknowledged_at=timezone.now(), acknowledged=True, acknowledged_by=user.pk + ) + webhook = make_custom_webhook( + organization=organization, + url="https://something/{{ alert_group_id }}/", + http_method="POST", + trigger_type=Webhook.TRIGGER_ACKNOWLEDGE, + forward_all=True, + ) + + mock_response = MockResponse() + with patch("apps.webhooks.utils.socket.gethostbyname") as mock_gethostbyname: + mock_gethostbyname.return_value = "8.8.8.8" + with patch("apps.webhooks.models.webhook.requests") as mock_requests: + mock_requests.post.return_value = mock_response + execute_webhook(webhook.pk, alert_group.pk, user.pk) + + assert mock_requests.post.called expected_data = { "event": { - "type": "Acknowledge", + "type": "acknowledge", "time": alert_group.acknowledged_at.isoformat(), }, "user": user.username, @@ -113,12 +144,78 @@ def test_execute_webhook_ok( "alert_group_id": alert_group.public_primary_key, "alert_payload": "", } - assert log.input_data == expected_data - assert log.data == json.dumps({"value": alert_group.public_primary_key}) - assert log.headers == json.dumps({"some-header": alert_group.public_primary_key}) + expected_call = call( + "https://something/{}/".format(alert_group.public_primary_key), + timeout=10, + headers={}, + json=expected_data, + ) + assert mock_requests.post.call_args == expected_call + # check logs + log = webhook.responses.all()[0] + assert log.status_code == 200 + assert log.content == json.dumps(mock_response.json()) + assert json.loads(log.request_data) == expected_data assert log.url == "https://something/{}/".format(alert_group.public_primary_key) +@pytest.mark.django_db +def test_execute_webhook_using_responses_data( + make_organization, + make_user_for_organization, + make_alert_receive_channel, + make_alert_group, + make_custom_webhook, + make_webhook_response, +): + organization = make_organization() + user = make_user_for_organization(organization) + alert_receive_channel = make_alert_receive_channel(organization) + alert_group = make_alert_group( + alert_receive_channel, acknowledged_at=timezone.now(), acknowledged=True, acknowledged_by=user.pk + ) + webhook = make_custom_webhook( + organization=organization, + url="https://something/{{ responses.firing.id }}/", + http_method="POST", + trigger_type=Webhook.TRIGGER_RESOLVE, + data='{"value": "{{ responses.acknowledge.status }}"}', + forward_all=False, + ) + # add previous webhook responses for the related alert group + make_webhook_response( + alert_group=alert_group, trigger_type=Webhook.TRIGGER_NEW, content=json.dumps({"id": "third-party-id"}) + ) + make_webhook_response( + alert_group=alert_group, + trigger_type=Webhook.TRIGGER_ACKNOWLEDGE, + content=json.dumps({"id": "third-party-id", "status": "updated"}), + ) + + mock_response = MockResponse() + with patch("apps.webhooks.utils.socket.gethostbyname") as mock_gethostbyname: + mock_gethostbyname.return_value = "8.8.8.8" + with patch("apps.webhooks.models.webhook.requests") as mock_requests: + mock_requests.post.return_value = mock_response + execute_webhook(webhook.pk, alert_group.pk, user.pk) + + assert mock_requests.post.called + expected_data = {"value": "updated"} + expected_call = call( + "https://something/third-party-id/", + timeout=10, + headers={}, + json=expected_data, + ) + assert mock_requests.post.call_args == expected_call + # check logs + log = webhook.responses.all()[0] + assert log.status_code == 200 + assert log.content == json.dumps(mock_response.json()) + assert json.loads(log.request_data) == expected_data + assert log.url == "https://something/third-party-id/" + + @pytest.mark.django_db def test_execute_webhook_trigger_false( make_organization, make_alert_receive_channel, make_alert_group, make_custom_webhook @@ -139,7 +236,7 @@ def test_execute_webhook_trigger_false( assert not mock_requests.post.called # check no logs - assert webhook.logs.count() == 0 + assert webhook.responses.count() == 0 @pytest.mark.django_db @@ -155,11 +252,16 @@ def test_execute_webhook_trigger_false( ( "trigger_template", "{{ }}", - "trigger", + "request_trigger", "Trigger - Template Error: Expected an expression, got 'end of print statement'", ), - ("headers", '"{{foo|invalid}}"', "headers", "Headers - Template Error: No filter named 'invalid'."), - ("data", "{{ }}", "data", "Data - Template Error: Expected an expression, got 'end of print statement'"), + ("headers", '"{{foo|invalid}}"', "request_headers", "Headers - Template Error: No filter named 'invalid'."), + ( + "data", + "{{ }}", + "request_data", + "Data - Template Error: Expected an expression, got 'end of print statement'", + ), ], ) def test_execute_webhook_errors( @@ -194,19 +296,8 @@ def test_execute_webhook_errors( execute_webhook(webhook.pk, alert_group.pk, None) assert not mock_requests.post.called - log = webhook.logs.all()[0] - assert log.response_status is None - assert log.response is None - expected_data = { - "event": { - "type": "Resolve", - "time": alert_group.resolved_at.isoformat(), - }, - "user": None, - "alert_group": IncidentSerializer(alert_group).data, - "alert_group_id": alert_group.public_primary_key, - "alert_payload": "", - } - assert log.input_data == expected_data + log = webhook.responses.all()[0] + assert log.status_code is None + assert log.content is None error = getattr(log, log_field_name) assert error == expected_error diff --git a/engine/apps/webhooks/utils.py b/engine/apps/webhooks/utils.py index f9014990..092c169c 100644 --- a/engine/apps/webhooks/utils.py +++ b/engine/apps/webhooks/utils.py @@ -113,7 +113,7 @@ class EscapeDoubleQuotesDict(dict): return original_str -def serialize_event(event, alert_group, user): +def serialize_event(event, alert_group, user, responses=None): from apps.public_api.serializers import IncidentSerializer alert_payload = alert_group.alerts.first() @@ -128,4 +128,7 @@ def serialize_event(event, alert_group, user): "alert_group_id": alert_group.public_primary_key, "alert_payload": alert_payload_raw, } + if responses: + data["responses"] = responses + return data diff --git a/engine/conftest.py b/engine/conftest.py index b4795ded..a79e18b0 100644 --- a/engine/conftest.py +++ b/engine/conftest.py @@ -81,7 +81,7 @@ from apps.telegram.tests.factories import ( from apps.twilioapp.tests.factories import PhoneCallFactory, SMSFactory from apps.user_management.models.user import User, listen_for_user_model_save from apps.user_management.tests.factories import OrganizationFactory, RegionFactory, TeamFactory, UserFactory -from apps.webhooks.tests.factories import CustomWebhookFactory +from apps.webhooks.tests.factories import CustomWebhookFactory, WebhookResponseFactory register(OrganizationFactory) register(UserFactory) @@ -626,6 +626,15 @@ def make_custom_webhook(): return _make_custom_webhook +@pytest.fixture +def make_webhook_response(): + def _make_webhook_response(**kwargs): + webhook_response = WebhookResponseFactory(**kwargs) + return webhook_response + + return _make_webhook_response + + @pytest.fixture def make_slack_user_group(): def _make_slack_user_group(slack_team_identity, **kwargs): diff --git a/grafana-plugin/src/containers/OutgoingWebhook2Form/OutgoingWebhook2Form.config.ts b/grafana-plugin/src/containers/OutgoingWebhook2Form/OutgoingWebhook2Form.config.ts index 8337783f..e071333e 100644 --- a/grafana-plugin/src/containers/OutgoingWebhook2Form/OutgoingWebhook2Form.config.ts +++ b/grafana-plugin/src/containers/OutgoingWebhook2Form/OutgoingWebhook2Form.config.ts @@ -14,10 +14,6 @@ export const form: { name: string; fields: FormItem[] } = { type: FormItemType.Select, extra: { options: [ - { - value: '0', - label: 'Escalation step', - }, { value: '1', label: 'Triggered', @@ -77,12 +73,14 @@ export const form: { name: string; fields: FormItem[] } = { { name: 'url', label: 'Webhook URL', + description: 'Supports templating', type: FormItemType.Input, validation: { required: true }, }, { name: 'headers', label: 'Webhook Headers', + description: 'Must be a JSON dict, templating allowed', type: FormItemType.TextArea, extra: { rows: 5, @@ -112,7 +110,7 @@ export const form: { name: string; fields: FormItem[] } = { name: 'data', getDisabled: (form_data) => Boolean(form_data?.forward_whole_payload), type: FormItemType.TextArea, - description: 'Available variables: {{ alert_payload }}, {{ alert_group_id }}', + description: 'Available variables: {{ alert_payload }}, {{ alert_group_id }}, {{ responses }}', extra: { rows: 9, }, @@ -121,7 +119,7 @@ export const form: { name: string; fields: FormItem[] } = { name: 'forward_all', normalize: (value) => Boolean(value), type: FormItemType.Switch, - description: "Forwards whole payload of the alert to the webhook's url as POST data", + description: "Forwards whole payload of the alert to the webhook's url as data", }, ], }; diff --git a/grafana-plugin/src/containers/OutgoingWebhook2Status/OutgoingWebhook2Status.tsx b/grafana-plugin/src/containers/OutgoingWebhook2Status/OutgoingWebhook2Status.tsx index 72fcd61c..67df8f6a 100644 --- a/grafana-plugin/src/containers/OutgoingWebhook2Status/OutgoingWebhook2Status.tsx +++ b/grafana-plugin/src/containers/OutgoingWebhook2Status/OutgoingWebhook2Status.tsx @@ -69,38 +69,36 @@ const OutgoingWebhook2Status = observer((props: OutgoingWebhook2StatusProps) => {data.last_run ? ( - {data.last_status_log.last_run_at} - - {JSON.stringify(data.last_status_log.input_data, null, 4)} + {data.last_response_log.timestamp} - {data.last_status_log.trigger && ( + {data.last_response_log.request_trigger && ( )} - {data.last_status_log.url && ( - + {data.last_response_log.url && ( + )} - {data.last_status_log.headers && ( - + {data.last_response_log.request_headers && ( + )} - {data.last_status_log.data && ( - + {data.last_response_log.request_data && ( + )} - {data.last_status_log.response_status && ( + {data.last_response_log.status_code && ( - {data.last_status_log.response_status} + {data.last_response_log.status_code} )} - {data.last_status_log.response && ( + {data.last_response_log.content && ( - {JSON.stringify(data.last_status_log.response, null, 4)} + {JSON.stringify(data.last_response_log.content, null, 4)} )} diff --git a/grafana-plugin/src/models/outgoing_webhook_2/outgoing_webhook_2.types.ts b/grafana-plugin/src/models/outgoing_webhook_2/outgoing_webhook_2.types.ts index 18f47314..79a20558 100644 --- a/grafana-plugin/src/models/outgoing_webhook_2/outgoing_webhook_2.types.ts +++ b/grafana-plugin/src/models/outgoing_webhook_2/outgoing_webhook_2.types.ts @@ -14,16 +14,15 @@ export interface OutgoingWebhook2 { username: null; headers: string; trigger_template: string; - last_status_log?: OutgoingWebhook2Log; + last_response_log?: OutgoingWebhook2Response; } -export interface OutgoingWebhook2Log { - last_run_at: string; - input_data: string; +export interface OutgoingWebhook2Response { + timestamp: string; url: string; - trigger: string; - headers: string; - data: string; - response_status: string; - response: string; + request_trigger: string; + request_headers: string; + request_data: string; + status_code: string; + content: string; } From 6f0f0af00edc129c870d2b8e000a04abcd7958bc Mon Sep 17 00:00:00 2001 From: Matias Bordese Date: Tue, 21 Mar 2023 12:44:29 -0300 Subject: [PATCH 19/20] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 21b1cc16..a3ef7363 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## Unreleased +## v1.1.41 (2023-03-21) ### Added From b2b7237bc4c8f2440a753eb6f4f8a262d636456c Mon Sep 17 00:00:00 2001 From: Ildar Iskhakov Date: Tue, 21 Mar 2023 23:45:22 +0800 Subject: [PATCH 20/20] Update README.md (#1587) # What this PR does ## Which issue(s) this PR fixes ## Checklist - [ ] Tests updated - [ ] Documentation added - [ ] `CHANGELOG.md` updated --- dev/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/README.md b/dev/README.md index ec75a021..e2a31124 100644 --- a/dev/README.md +++ b/dev/README.md @@ -150,6 +150,7 @@ To run these tests locally simply do the following: npx playwright install # install playwright dependencies cp ./grafana-plugin/.env.example ./grafana-plugin/.env # you may need to tweak the values in ./grafana-plugin/.env according to your local setup +cd grafana-plugin yarn test:integration ```