From 2503eafdc6eb15250dd3546be98e9f006b184e40 Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Thu, 19 Dec 2024 06:03:54 -0500 Subject: [PATCH 1/6] chore: add pagerduty migrator test + fix linting (#5378) --- tools/migrators/lib/pagerduty/migrate.py | 4 ++- .../lib/tests/pagerduty/test_migrate.py | 27 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 tools/migrators/lib/tests/pagerduty/test_migrate.py diff --git a/tools/migrators/lib/pagerduty/migrate.py b/tools/migrators/lib/pagerduty/migrate.py index 950fa104..68d5652e 100644 --- a/tools/migrators/lib/pagerduty/migrate.py +++ b/tools/migrators/lib/pagerduty/migrate.py @@ -151,7 +151,9 @@ def migrate() -> None: migrate_notification_rules(user) print(TAB + format_user(user)) else: - print("▶ Skipping migrating user notification rules as MIGRATE_USERS is false...") + print( + "▶ Skipping migrating user notification rules as MIGRATE_USERS is false..." + ) print("▶ Migrating schedules...") for schedule in schedules: diff --git a/tools/migrators/lib/tests/pagerduty/test_migrate.py b/tools/migrators/lib/tests/pagerduty/test_migrate.py new file mode 100644 index 00000000..6a7b42ed --- /dev/null +++ b/tools/migrators/lib/tests/pagerduty/test_migrate.py @@ -0,0 +1,27 @@ +from unittest.mock import call, patch + +from lib.pagerduty.migrate import migrate + + +@patch("lib.pagerduty.migrate.MIGRATE_USERS", False) +@patch("lib.pagerduty.migrate.APISession") +@patch("lib.pagerduty.migrate.OnCallAPIClient") +def test_users_are_skipped_when_migrate_users_is_false( + MockOnCallAPIClient, MockAPISession +): + mock_session = MockAPISession.return_value + mock_session.list_all.return_value = [] + mock_oncall_client = MockOnCallAPIClient.return_value + + migrate() + + # Assert no user-related fetching or migration occurs + assert mock_session.list_all.call_args_list == [ + call("schedules", params={"include[]": "schedule_layers", "time_zone": "UTC"}), + call("escalation_policies"), + call("services", params={"include[]": "integrations"}), + call("vendors"), + # no user notification rules fetching + ] + + mock_oncall_client.list_users_with_notification_rules.assert_not_called() From cc63ec314159fda82e3d8a33fe2f5fd1e2276c54 Mon Sep 17 00:00:00 2001 From: Matias Bordese Date: Thu, 19 Dec 2024 08:17:08 -0300 Subject: [PATCH 2/6] fix: return a throttled response if org is being synced for the first time during auth (#5374) Related to https://github.com/grafana/oncall-private/issues/2826 When Terraform triggers multiple requests and org needs to be synced in OnCall, the first request will wait for sync to complete but others will get an immediate response, before a 403, with these changes a 429 indicating to retry (Terraform [client](https://github.com/grafana/amixr-api-go-client/blob/main/client.go#L310) will handle the response and perform a retry). --- engine/apps/auth_token/auth.py | 3 +++ engine/apps/auth_token/tests/test_grafana_auth.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/engine/apps/auth_token/auth.py b/engine/apps/auth_token/auth.py index 36ea8d82..f96e5ef6 100644 --- a/engine/apps/auth_token/auth.py +++ b/engine/apps/auth_token/auth.py @@ -381,6 +381,9 @@ class GrafanaServiceAccountAuthentication(BaseAuthentication): # if organization exists, we are good) setup_organization(url, auth) organization = Organization.objects.filter(grafana_url=url).first() + if organization is None: + # sync may still be in progress, client should retry + raise exceptions.Throttled(detail="Organization being synced, please retry.") return organization if settings.LICENSE == settings.CLOUD_LICENSE_NAME: diff --git a/engine/apps/auth_token/tests/test_grafana_auth.py b/engine/apps/auth_token/tests/test_grafana_auth.py index 8da611a0..22831645 100644 --- a/engine/apps/auth_token/tests/test_grafana_auth.py +++ b/engine/apps/auth_token/tests/test_grafana_auth.py @@ -110,9 +110,9 @@ def test_grafana_authentication_no_org_grafana_url(): request_sync_url = f"{grafana_url}/api/plugins/{PluginID.ONCALL}/resources/plugin/sync?wait=true&force=true" httpretty.register_uri(httpretty.POST, request_sync_url, status=404) - with pytest.raises(exceptions.AuthenticationFailed) as exc: + with pytest.raises(exceptions.Throttled) as exc: GrafanaServiceAccountAuthentication().authenticate(request) - assert exc.value.detail == "Organization not found." + assert exc.value.detail == "Organization being synced, please retry." @pytest.mark.parametrize("grafana_url", ["null;", "foo", ""]) From cea98dc17abcb1ca34bfa32bc3e50c3571c76826 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 27 Dec 2024 09:08:08 +0000 Subject: [PATCH 3/6] Update `make docs` procedure (#5383) To test the changes, run the following and browse to URL output by the script: ```console git fetch git checkout origin/update-make-docs cd docs make docs ``` --------- Co-authored-by: grafanabot Co-authored-by: Jack Baldry --- docs/make-docs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/make-docs b/docs/make-docs index 2dc6726d..bc0be581 100755 --- a/docs/make-docs +++ b/docs/make-docs @@ -6,6 +6,12 @@ # [Semantic versioning](https://semver.org/) is used to help the reader identify the significance of changes. # Changes are relevant to this script and the support docs.mk GNU Make interface. # +# ## 8.2.0 (2024-12-22) +# +# ### Removed +# +# - Special cases for Oracle and Datadog plugins now that they exist in the plugins monorepo. +# # ## 8.1.0 (2024-08-22) # # ### Added @@ -13,7 +19,7 @@ # - Additional website mounts for projects that use the website repository. # # Mounts are required for `make docs` to work in the website repository or with the website project. -# The Makefile is also mounted for convenient development of the procedure that repository. +# The Makefile is also mounted for convenient development of the procedure in that repository. # # ## 8.0.1 (2024-07-01) # @@ -355,8 +361,6 @@ SOURCES_grafana_cloud_frontend_observability_faro_web_sdk='faro-web-sdk' SOURCES_helm_charts_mimir_distributed='mimir' SOURCES_helm_charts_tempo_distributed='tempo' SOURCES_opentelemetry='opentelemetry-docs' -SOURCES_plugins_grafana_datadog_datasource='datadog-datasource' -SOURCES_plugins_grafana_oracle_datasource='oracle-datasource' SOURCES_resources='website' VERSIONS_as_code='UNVERSIONED' @@ -367,8 +371,6 @@ VERSIONS_grafana_cloud_k6='UNVERSIONED' VERSIONS_grafana_cloud_data_configuration_integrations='UNVERSIONED' VERSIONS_grafana_cloud_frontend_observability_faro_web_sdk='UNVERSIONED' VERSIONS_opentelemetry='UNVERSIONED' -VERSIONS_plugins_grafana_datadog_datasource='latest' -VERSIONS_plugins_grafana_oracle_datasource='latest' VERSIONS_resources='UNVERSIONED' VERSIONS_technical_documentation='UNVERSIONED' VERSIONS_website='UNVERSIONED' @@ -378,8 +380,6 @@ PATHS_grafana_cloud='content/docs/grafana-cloud' PATHS_helm_charts_mimir_distributed='docs/sources/helm-charts/mimir-distributed' PATHS_helm_charts_tempo_distributed='docs/sources/helm-charts/tempo-distributed' PATHS_mimir='docs/sources/mimir' -PATHS_plugins_grafana_datadog_datasource='docs/sources' -PATHS_plugins_grafana_oracle_datasource='docs/sources' PATHS_resources='content' PATHS_tempo='docs/sources/tempo' PATHS_website='content' From 678833bb29d4365b2d7a253f116e862bf4ef76c4 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 30 Dec 2024 10:18:01 +0000 Subject: [PATCH 4/6] Update `make docs` procedure (#5388) To test the changes, run the following and browse to URL output by the script: ```console git fetch git checkout origin/update-make-docs cd docs make docs ``` --------- Co-authored-by: grafanabot Co-authored-by: Jack Baldry --- docs/make-docs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/docs/make-docs b/docs/make-docs index bc0be581..ba448ce0 100755 --- a/docs/make-docs +++ b/docs/make-docs @@ -6,6 +6,14 @@ # [Semantic versioning](https://semver.org/) is used to help the reader identify the significance of changes. # Changes are relevant to this script and the support docs.mk GNU Make interface. # +# ## 8.3.0 (2024-12-27) +# +# ### Added +# +# - Debug output of the final command when DEBUG=true. +# +# Useful to inspect if the script is correctly constructing the final command. +# # ## 8.2.0 (2024-12-22) # # ### Removed @@ -631,7 +639,7 @@ POSIX_HERESTRING case "${_project}" in # Workaround for arbitrary mounts where the version field is expected to be the local directory - # and the repo field is expected to be the container directory. + # and the repo field is expected to be the container directory. arbitrary) echo "${_project}^${_version}^${_repo}^" # TODO ;; @@ -801,6 +809,10 @@ case "${image}" in | sed "s#$(proj_dst "${proj}")#sources#" EOF + if [ -n "${DEBUG}" ]; then + debg "${cmd}" + fi + case "${OUTPUT_FORMAT}" in human) if ! command -v jq >/dev/null 2>&1; then @@ -837,6 +849,10 @@ EOF /hugo/content/docs EOF + if [ -n "${DEBUG}" ]; then + debg "${cmd}" + fi + case "${OUTPUT_FORMAT}" in human) ${cmd} --output=line \ From f3f7c17f8b7b4245c57c6c4b3fa87888fa6f6768 Mon Sep 17 00:00:00 2001 From: Matias Bordese Date: Thu, 2 Jan 2025 10:50:09 -0300 Subject: [PATCH 5/6] fix: update missing users / empty shifts check (#5322) Related to https://github.com/grafana/oncall-private/issues/2950 - Represent missing users in schedule events (so they are displayed in the web UI) - Fix schedule checks for gaps/empty shifts so they send notifications --- engine/apps/schedules/ical_utils.py | 13 +++++ .../schedules/models/custom_on_call_shift.py | 23 +++++--- engine/apps/schedules/tasks/__init__.py | 4 -- .../notify_about_empty_shifts_in_schedule.py | 19 ++----- .../tasks/notify_about_gaps_in_schedule.py | 15 +----- ...t_notify_about_empty_shifts_in_schedule.py | 46 +++++++++++++++- .../test_notify_about_gaps_in_schedule.py | 46 +++++++++++++++- .../schedules/tests/test_on_call_schedule.py | 52 +++++++++++++++++++ engine/settings/celery_task_routes.py | 4 -- 9 files changed, 175 insertions(+), 47 deletions(-) diff --git a/engine/apps/schedules/ical_utils.py b/engine/apps/schedules/ical_utils.py index 8678232d..788f6865 100644 --- a/engine/apps/schedules/ical_utils.py +++ b/engine/apps/schedules/ical_utils.py @@ -54,6 +54,19 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) +class MissingUser: + """Represent a missing user in a rolling users shift.""" + + DISPLAY_NAME = "(missing)" + + def __init__(self, pk): + self.pk = pk + + @property + def username(self): + return self.DISPLAY_NAME + + EmptyShift = namedtuple( "EmptyShift", ["start", "end", "summary", "description", "attendee", "all_day", "calendar_type", "calendar_tz", "shift_pk"], diff --git a/engine/apps/schedules/models/custom_on_call_shift.py b/engine/apps/schedules/models/custom_on_call_shift.py index 812b0947..8b1dda64 100644 --- a/engine/apps/schedules/models/custom_on_call_shift.py +++ b/engine/apps/schedules/models/custom_on_call_shift.py @@ -18,6 +18,7 @@ from django.utils import timezone from django.utils.functional import cached_property from icalendar.cal import Event +from apps.schedules.ical_utils import MissingUser from apps.schedules.tasks import ( check_gaps_and_empty_shifts_in_schedule, drop_cached_ical_task, @@ -645,10 +646,6 @@ class CustomOnCallShift(models.Model): all_users_pks = set() users_queue = [] if self.rolling_users is not None: - # get all users pks from rolling_users field - for users_dict in self.rolling_users: - all_users_pks.update(users_dict.keys()) - users = User.objects.filter(pk__in=all_users_pks) # generate users_queue list with user objects if self.start_rotation_from_user_index is not None: rolling_users = ( @@ -657,10 +654,22 @@ class CustomOnCallShift(models.Model): ) else: rolling_users = self.rolling_users + + # get all users pks from rolling_users field + for users_dict in self.rolling_users: + all_users_pks.update(users_dict.keys()) + users = User.objects.filter(pk__in=all_users_pks) + users_by_id = {user.pk: user for user in users} for users_dict in rolling_users: - users_list = list(users.filter(pk__in=users_dict.keys())) - if users_list: - users_queue.append(users_list) + users_list = [] + for user_pk in users_dict.keys(): + try: + user_pk = int(user_pk) + users_list.append(users_by_id.get(user_pk, MissingUser(user_pk))) + except ValueError: + users_list.append(MissingUser(user_pk)) + users_queue.append(users_list) + return users_queue def add_rolling_users(self, rolling_users_list): diff --git a/engine/apps/schedules/tasks/__init__.py b/engine/apps/schedules/tasks/__init__.py index 5e0078dd..a5617a92 100644 --- a/engine/apps/schedules/tasks/__init__.py +++ b/engine/apps/schedules/tasks/__init__.py @@ -1,17 +1,13 @@ from .check_gaps_and_empty_shifts import check_gaps_and_empty_shifts_in_schedule # noqa: F401 from .drop_cached_ical import drop_cached_ical_for_custom_events_for_organization, drop_cached_ical_task # noqa: F401 from .notify_about_empty_shifts_in_schedule import ( # noqa: F401 - check_empty_shifts_in_schedule, notify_about_empty_shifts_in_schedule_task, schedule_notify_about_empty_shifts_in_schedule, - start_check_empty_shifts_in_schedule, start_notify_about_empty_shifts_in_schedule, ) from .notify_about_gaps_in_schedule import ( # noqa: F401 - check_gaps_in_schedule, notify_about_gaps_in_schedule_task, schedule_notify_about_gaps_in_schedule, - start_check_gaps_in_schedule, start_notify_about_gaps_in_schedule, ) from .refresh_ical_files import ( # noqa: F401 diff --git a/engine/apps/schedules/tasks/notify_about_empty_shifts_in_schedule.py b/engine/apps/schedules/tasks/notify_about_empty_shifts_in_schedule.py index afdf789c..ebe68eda 100644 --- a/engine/apps/schedules/tasks/notify_about_empty_shifts_in_schedule.py +++ b/engine/apps/schedules/tasks/notify_about_empty_shifts_in_schedule.py @@ -1,6 +1,7 @@ import pytz from celery.utils.log import get_task_logger from django.core.cache import cache +from django.db.models import Q from django.utils import timezone from apps.slack.utils import format_datetime_to_slack_with_time, post_message_to_channel @@ -10,28 +11,16 @@ from common.utils import trim_if_needed task_logger = get_task_logger(__name__) -# deprecated # todo: delete this task from here and from task routes after the next release -@shared_dedicated_queue_retry_task() -def start_check_empty_shifts_in_schedule(): - return - - -# deprecated # todo: delete this task from here and from task routes after the next release -@shared_dedicated_queue_retry_task() -def check_empty_shifts_in_schedule(schedule_pk): - return - - @shared_dedicated_queue_retry_task() def start_notify_about_empty_shifts_in_schedule(): - from apps.schedules.models import OnCallScheduleICal + from apps.schedules.models import OnCallSchedule task_logger.info("Start start_notify_about_empty_shifts_in_schedule") today = timezone.now().date() week_ago = today - timezone.timedelta(days=7) - schedules = OnCallScheduleICal.objects.filter( - empty_shifts_report_sent_at__lte=week_ago, + schedules = OnCallSchedule.objects.filter( + Q(empty_shifts_report_sent_at__lte=week_ago) | Q(empty_shifts_report_sent_at__isnull=True), slack_channel__isnull=False, organization__deleted_at__isnull=True, ) diff --git a/engine/apps/schedules/tasks/notify_about_gaps_in_schedule.py b/engine/apps/schedules/tasks/notify_about_gaps_in_schedule.py index 5047943e..7a8b2c67 100644 --- a/engine/apps/schedules/tasks/notify_about_gaps_in_schedule.py +++ b/engine/apps/schedules/tasks/notify_about_gaps_in_schedule.py @@ -1,6 +1,7 @@ import pytz from celery.utils.log import get_task_logger from django.core.cache import cache +from django.db.models import Q from django.utils import timezone from apps.slack.utils import format_datetime_to_slack_with_time, post_message_to_channel @@ -9,18 +10,6 @@ from common.custom_celery_tasks import shared_dedicated_queue_retry_task task_logger = get_task_logger(__name__) -# deprecated # todo: delete this task from here and from task routes after the next release -@shared_dedicated_queue_retry_task() -def start_check_gaps_in_schedule(): - return - - -# deprecated # todo: delete this task from here and from task routes after the next release -@shared_dedicated_queue_retry_task() -def check_gaps_in_schedule(schedule_pk): - return - - @shared_dedicated_queue_retry_task() def start_notify_about_gaps_in_schedule(): from apps.schedules.models import OnCallSchedule @@ -30,7 +19,7 @@ def start_notify_about_gaps_in_schedule(): today = timezone.now().date() week_ago = today - timezone.timedelta(days=7) schedules = OnCallSchedule.objects.filter( - gaps_report_sent_at__lte=week_ago, + Q(gaps_report_sent_at__lte=week_ago) | Q(gaps_report_sent_at__isnull=True), slack_channel__isnull=False, organization__deleted_at__isnull=True, ) diff --git a/engine/apps/schedules/tests/test_notify_about_empty_shifts_in_schedule.py b/engine/apps/schedules/tests/test_notify_about_empty_shifts_in_schedule.py index 2c6bb091..1790fd7c 100644 --- a/engine/apps/schedules/tests/test_notify_about_empty_shifts_in_schedule.py +++ b/engine/apps/schedules/tests/test_notify_about_empty_shifts_in_schedule.py @@ -5,8 +5,8 @@ import pytest from django.utils import timezone from apps.api.permissions import LegacyAccessControlRole -from apps.schedules.models import CustomOnCallShift, OnCallScheduleWeb -from apps.schedules.tasks import notify_about_empty_shifts_in_schedule_task +from apps.schedules.models import CustomOnCallShift, OnCallScheduleCalendar, OnCallScheduleICal, OnCallScheduleWeb +from apps.schedules.tasks import notify_about_empty_shifts_in_schedule_task, start_notify_about_empty_shifts_in_schedule @pytest.mark.django_db @@ -174,3 +174,45 @@ def test_empty_non_empty_shifts_trigger_notification( schedule.refresh_from_db() assert empty_shifts_report_sent_at != schedule.empty_shifts_report_sent_at assert schedule.has_empty_shifts + + +@pytest.mark.parametrize( + "schedule_class", + [OnCallScheduleWeb, OnCallScheduleICal, OnCallScheduleCalendar], +) +@pytest.mark.parametrize( + "report_sent_days_ago,expected_call", + [(8, True), (6, False), (None, True)], +) +@pytest.mark.django_db +def test_start_notify_about_empty_shifts( + make_slack_team_identity, + make_slack_channel, + make_organization, + make_schedule, + schedule_class, + report_sent_days_ago, + expected_call, +): + slack_team_identity = make_slack_team_identity() + slack_channel = make_slack_channel(slack_team_identity) + organization = make_organization(slack_team_identity=slack_team_identity) + + sent = timezone.now() - datetime.timedelta(days=report_sent_days_ago) if report_sent_days_ago else None + schedule = make_schedule( + organization, + schedule_class=schedule_class, + name="test_schedule", + slack_channel=slack_channel, + empty_shifts_report_sent_at=sent, + ) + + with patch( + "apps.schedules.tasks.notify_about_empty_shifts_in_schedule.notify_about_empty_shifts_in_schedule_task.apply_async" + ) as mock_notify: + start_notify_about_empty_shifts_in_schedule() + + if expected_call: + mock_notify.assert_called_once_with((schedule.pk,)) + else: + mock_notify.assert_not_called() diff --git a/engine/apps/schedules/tests/test_notify_about_gaps_in_schedule.py b/engine/apps/schedules/tests/test_notify_about_gaps_in_schedule.py index d775c77b..5e35cf06 100644 --- a/engine/apps/schedules/tests/test_notify_about_gaps_in_schedule.py +++ b/engine/apps/schedules/tests/test_notify_about_gaps_in_schedule.py @@ -4,8 +4,8 @@ from unittest.mock import patch import pytest from django.utils import timezone -from apps.schedules.models import CustomOnCallShift, OnCallScheduleWeb -from apps.schedules.tasks import notify_about_gaps_in_schedule_task +from apps.schedules.models import CustomOnCallShift, OnCallScheduleCalendar, OnCallScheduleICal, OnCallScheduleWeb +from apps.schedules.tasks import notify_about_gaps_in_schedule_task, start_notify_about_gaps_in_schedule @pytest.mark.django_db @@ -286,3 +286,45 @@ def test_gaps_later_than_7_days_no_triggering_notification( schedule.refresh_from_db() assert gaps_report_sent_at != schedule.gaps_report_sent_at assert schedule.has_gaps is False + + +@pytest.mark.parametrize( + "schedule_class", + [OnCallScheduleWeb, OnCallScheduleICal, OnCallScheduleCalendar], +) +@pytest.mark.parametrize( + "report_sent_days_ago,expected_call", + [(8, True), (6, False), (None, True)], +) +@pytest.mark.django_db +def test_start_notify_about_gaps( + make_slack_team_identity, + make_slack_channel, + make_organization, + make_schedule, + schedule_class, + report_sent_days_ago, + expected_call, +): + slack_team_identity = make_slack_team_identity() + slack_channel = make_slack_channel(slack_team_identity) + organization = make_organization(slack_team_identity=slack_team_identity) + + sent = timezone.now() - datetime.timedelta(days=report_sent_days_ago) if report_sent_days_ago else None + schedule = make_schedule( + organization, + schedule_class=schedule_class, + name="test_schedule", + slack_channel=slack_channel, + gaps_report_sent_at=sent, + ) + + with patch( + "apps.schedules.tasks.notify_about_gaps_in_schedule.notify_about_gaps_in_schedule_task.apply_async" + ) as mock_notify: + start_notify_about_gaps_in_schedule() + + if expected_call: + mock_notify.assert_called_once_with((schedule.pk,)) + else: + mock_notify.assert_not_called() diff --git a/engine/apps/schedules/tests/test_on_call_schedule.py b/engine/apps/schedules/tests/test_on_call_schedule.py index 71a029b4..d9a0b26c 100644 --- a/engine/apps/schedules/tests/test_on_call_schedule.py +++ b/engine/apps/schedules/tests/test_on_call_schedule.py @@ -18,6 +18,7 @@ from apps.schedules.constants import ( ICAL_STATUS_CANCELLED, ICAL_SUMMARY, ) +from apps.schedules.ical_utils import MissingUser from apps.schedules.models import ( CustomOnCallShift, OnCallSchedule, @@ -358,6 +359,57 @@ def test_filter_events_include_empty(make_organization, make_user_for_organizati assert events == expected +@pytest.mark.django_db +def test_filter_events_include_empty_if_deleted( + make_organization, make_user_for_organization, make_schedule, make_on_call_shift +): + organization = make_organization() + schedule = make_schedule( + organization, + schedule_class=OnCallScheduleWeb, + name="test_web_schedule", + ) + user = make_user_for_organization(organization) + now = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) + start_date = now - timezone.timedelta(days=7) + + data = { + "start": start_date + timezone.timedelta(hours=10), + "rotation_start": start_date + timezone.timedelta(hours=10), + "duration": timezone.timedelta(hours=8), + "priority_level": 1, + "frequency": CustomOnCallShift.FREQUENCY_DAILY, + "schedule": schedule, + } + on_call_shift = make_on_call_shift( + organization=organization, shift_type=CustomOnCallShift.TYPE_ROLLING_USERS_EVENT, **data + ) + on_call_shift.add_rolling_users([[user]]) + + # user is deleted, shift data still exists but the shift is empty + user.delete() + + end_date = start_date + timezone.timedelta(days=1) + events = schedule.filter_events(start_date, end_date, filter_by=OnCallSchedule.TYPE_ICAL_PRIMARY, with_empty=True) + expected = [ + { + "calendar_type": OnCallSchedule.TYPE_ICAL_PRIMARY, + "start": on_call_shift.start, + "end": on_call_shift.start + on_call_shift.duration, + "all_day": False, + "is_override": False, + "is_empty": True, + "is_gap": False, + "priority_level": on_call_shift.priority_level, + "missing_users": [MissingUser.DISPLAY_NAME], + "users": [], + "shift": {"pk": on_call_shift.public_primary_key}, + "source": "api", + } + ] + assert events == expected + + @pytest.mark.django_db def test_filter_events_ical_all_day(make_organization, make_user_for_organization, make_schedule, get_ical): calendar = get_ical("calendar_with_all_day_event.ics") diff --git a/engine/settings/celery_task_routes.py b/engine/settings/celery_task_routes.py index 37861c43..fff08a2a 100644 --- a/engine/settings/celery_task_routes.py +++ b/engine/settings/celery_task_routes.py @@ -33,13 +33,9 @@ CELERY_TASK_ROUTES = { "apps.schedules.tasks.refresh_ical_files.refresh_ical_final_schedule": {"queue": "default"}, "apps.schedules.tasks.refresh_ical_files.start_refresh_ical_final_schedules": {"queue": "default"}, "apps.schedules.tasks.check_gaps_and_empty_shifts.check_gaps_and_empty_shifts_in_schedule": {"queue": "default"}, - "apps.schedules.tasks.notify_about_gaps_in_schedule.check_empty_shifts_in_schedule": {"queue": "default"}, "apps.schedules.tasks.notify_about_gaps_in_schedule.start_notify_about_gaps_in_schedule": {"queue": "default"}, - "apps.schedules.tasks.notify_about_gaps_in_schedule.check_gaps_in_schedule": {"queue": "default"}, "apps.schedules.tasks.notify_about_gaps_in_schedule.notify_about_gaps_in_schedule_task": {"queue": "default"}, "apps.schedules.tasks.notify_about_gaps_in_schedule.schedule_notify_about_gaps_in_schedule": {"queue": "default"}, - "apps.schedules.tasks.notify_about_gaps_in_schedule.start_check_empty_shifts_in_schedule": {"queue": "default"}, - "apps.schedules.tasks.notify_about_gaps_in_schedule.start_check_gaps_in_schedule": {"queue": "default"}, "apps.schedules.tasks.notify_about_gaps_in_schedule.start_notify_about_empty_shifts_in_schedule": { "queue": "default" }, From 152d5f74fcdc12f07790e1e0ea0232258c8839e2 Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Mon, 6 Jan 2025 10:53:13 -0500 Subject: [PATCH 6/6] feat: allow setting direct paging importance for teams (#5379) ## Which issue(s) this PR closes https://www.loom.com/share/e1858db48e8b4fa99014a997af5e3d5e Closes https://github.com/grafana/irm/issues/322 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] Added the relevant release notes label (see labels prefixed w/ `release:`). These labels dictate how your PR will show up in the autogenerated release notes. --------- Co-authored-by: Vadim Stepanov --- .../integrations/references/manual/index.md | 40 +++++- .../oncall-api-reference/escalation.md | 13 +- ...upsert_direct_paging_integration_routes.py | 84 +++++++++++++ .../alerts/models/alert_receive_channel.py | 51 ++++++-- engine/apps/alerts/paging.py | 20 ++- .../tests/test_alert_receiver_channel.py | 38 ++++-- engine/apps/alerts/tests/test_paging.py | 31 ++++- engine/apps/api/serializers/direct_paging.py | 1 + engine/apps/api/tests/test_direct_paging.py | 16 ++- engine/apps/api/views/direct_paging.py | 1 + .../apps/public_api/tests/test_escalation.py | 14 ++- engine/apps/public_api/views/escalation.py | 1 + engine/apps/slack/scenarios/paging.py | 118 +++++++++++++++++- .../slack/tests/scenario_steps/test_paging.py | 99 +++++++++++++-- .../apps/user_management/tests/test_sync.py | 24 ++-- 15 files changed, 498 insertions(+), 53 deletions(-) create mode 100644 engine/apps/alerts/migrations/0072_upsert_direct_paging_integration_routes.py diff --git a/docs/sources/configure/integrations/references/manual/index.md b/docs/sources/configure/integrations/references/manual/index.md index 42c1029f..bcf7fcd8 100644 --- a/docs/sources/configure/integrations/references/manual/index.md +++ b/docs/sources/configure/integrations/references/manual/index.md @@ -89,9 +89,16 @@ to the team's ChatOps channels and start an appropriate escalation chain. ## Set up direct paging for a team -By default all teams will have a direct paging integration created for them. However, these are not configured by default. -If a team does not have their direct paging integration configured, such that it is "contactable" (ie. it has an -escalation chain assigned to it, or has at least one Chatops integration connected to send notifications to), you will +By default all teams will have a direct paging integration created for them. Each direct paging integration will be +created with two routes: + +- a non-default route which has a Jinja2 filtering term of `{{ payload.oncall.important }}` +(see [Important Escalations](#important-escalations) below for more details) +- a default route to capture all other alerts + +However, these integrations are not configured by default to be "contactable" (ie. their routes will have no +escalation chains assigned to them, nor any Chatops integrations connected to send notifications to). +If a team does not have their direct paging integration configured, such that it is "contactable" , you will not be able to direct page this team. If this happens, consider following the following steps for the team (or reach out to the relevant team and suggest doing so). @@ -99,3 +106,30 @@ Navigate to the **Integrations** page and find the "Direct paging" integration f integration's detail page, you can customize its settings, link it to an escalation chain, and configure associated ChatOps channels. To confirm that the integration is functioning as intended, [create a new alert group](#page-a-team) and select the same team for a test run. + +### Important escalations + +Sometimes you really need to get the attention of a particular team. When directly paging a team, it is possible to +page them using an "important escalation". Practically speaking, this will create an alert, using the specified team's +direct paging integration as such: + +```json +{ + "oncall": { + "title": "IRM is paging Network team to join escalation", + "message": "I really need someone from your team to come take a look! The k8s cluster is down!", + "uid": "8a20b8d1-56fd-482e-824e-43fbd1bd7b10", + "author_username": "irm", + "permalink": null, + "important": true + } +} +``` + +When you are directly paging a team, either via the web UI, chatops apps, or the API, you can specify that this +esclation be "important", which will effectively set the value of `oncall.important` to `true`. As mentioned above in +[Set up direct paging for a team](#set-up-direct-paging-for-a-team), direct paging integrations come pre-configured with +two routes, with the non-default route having a Jinja2 filtering term of `{{ payload.oncall.important }}`. + +This allows teams to be contacted via different escalation chains, depending on whether or not the user paging them +believes that this is an "important escalation". diff --git a/docs/sources/oncall-api-reference/escalation.md b/docs/sources/oncall-api-reference/escalation.md index b2b375d6..a07547e2 100644 --- a/docs/sources/oncall-api-reference/escalation.md +++ b/docs/sources/oncall-api-reference/escalation.md @@ -18,6 +18,11 @@ refs: destination: /docs/oncall//configure/integrations/references/manual - pattern: /docs/grafana-cloud/ destination: /docs/grafana-cloud/configure/integrations/references/manual + manual-paging-team-important: + - pattern: /docs/oncall/ + destination: /docs/oncall//configure/integrations/references/manual#important-escalations + - pattern: /docs/grafana-cloud/ + destination: /docs/grafana-cloud/configure/integrations/references/manual#important-escalations --- # Escalation HTTP API @@ -90,7 +95,8 @@ curl "{{API_URL}}/api/v1/escalation/" \ "title": "We are seeing a network outage in the datacenter", "message": "I need help investigating, can you join the investigation?", "source_url": "https://github.com/myorg/myrepo/issues/123", - "team": "TI73TDU19W48J" + "team": "TI73TDU19W48J", + "important_team_escalation": true }' ``` @@ -176,6 +182,7 @@ The above command returns JSON structured in the following way: | `team` | No | Yes (see [Things to Note](#things-to-note)) | Grafana OnCall team ID. If specified, will use the "Direct Paging" Integration associated with this Grafana OnCall team, to create the Alert Group. | | `users` | No | Yes (see [Things to Note](#things-to-note)) | List of user(s) to escalate to. See above request example for object schema. `id` represents the Grafana OnCall user's ID. `important` is a boolean representing whether to escalate the Alert Group using this user's default or important personal notification policy. | | `alert_group_id` | No | No | If specified, will escalate the specified users for this Alert Group. | +| `important_team_escalation` | No | No | Sets the value of `payload.oncall.important` to the value specified here (default is `False`; see [Things to Note](#things-to-note) for more details). | ## Things to note @@ -186,6 +193,10 @@ existing Alert Group if you are trying to escalate to a set of users on an existing Alert Group, you cannot update the `title`, `message`, or `source_url` of that Alert Group - If escalating to a set of users for an existing Alert Group, the Alert Group cannot be in a resolved state +- Regarding `important_team_escalation`; this can be useful to send an "important" escalation to the specified team. +Teams can configure their Direct Paging Integration to route to different escalation chains based on the value of +`payload.oncall.important`. See [Manual paging integration - important escalations](ref:manual-paging-team-important) +for more details. **HTTP request** diff --git a/engine/apps/alerts/migrations/0072_upsert_direct_paging_integration_routes.py b/engine/apps/alerts/migrations/0072_upsert_direct_paging_integration_routes.py new file mode 100644 index 00000000..7345b3c6 --- /dev/null +++ b/engine/apps/alerts/migrations/0072_upsert_direct_paging_integration_routes.py @@ -0,0 +1,84 @@ +# Generated by Django 4.2.17 on 2024-12-20 14:19 + +import logging + +from django.db import migrations +from django.db.models import Count + +logger = logging.getLogger(__name__) + + +def upsert_direct_paging_integration_routes(apps, schema_editor): + AlertReceiveChannel = apps.get_model("alerts", "AlertReceiveChannel") + ChannelFilter = apps.get_model("alerts", "ChannelFilter") + + DIRECT_PAGING_INTEGRATION_TYPE = "direct_paging" + IMPORTANT_FILTERING_TERM = "{{ payload.oncall.important }}" + + # Fetch all direct paging integrations + logger.info("Fetching direct paging integrations which have not had their routes updated.") + + # Ignore updating Direct Paging integrations that have > 1 route, as this means that users have + # gone ahead and created their own routes. We don't want to overwrite these. + unedited_direct_paging_integrations = ( + AlertReceiveChannel.objects + .filter(integration=DIRECT_PAGING_INTEGRATION_TYPE) + .annotate(num_routes=Count("channel_filters")) + .filter(num_routes=1) + ) + + integration_count = unedited_direct_paging_integrations.count() + if integration_count == 0: + logger.info("No integrations found which meet this criteria. No routes will be upserted.") + return + + logger.info(f"Found {integration_count} direct paging integrations that meet this criteria.") + + # Direct Paging Integrations are currently created with a single default route (order=0) + # see AlertReceiveChannelManager.create_missing_direct_paging_integrations + # + # we first need to update this route to be order=1, and then we will subsequently bulk-create the + # non-default route (order=0) which will have a filtering term set + routes = ChannelFilter.objects.filter( + alert_receive_channel__in=unedited_direct_paging_integrations, + is_default=True, + order=0, + ) + + logger.info( + f"Swapping the order=0 value to order=1 for {routes.count()} Direct Paging Integrations default routes" + ) + + updated_rows = routes.update(order=1) + logger.info(f"Swapped order=0 to order=1 for {updated_rows} Direct Paging Integrations default routes") + + # Bulk create the new non-default routes + logger.info( + f"Creating new non-default routes for {len(unedited_direct_paging_integrations)} Direct Paging Integrations" + ) + created_objs = ChannelFilter.objects.bulk_create( + [ + ChannelFilter( + alert_receive_channel=integration, + filtering_term=IMPORTANT_FILTERING_TERM, + filtering_term_type=1, # 1 = ChannelFilter.FILTERING_TERM_TYPE_JINJA2 + is_default=False, + order=0, + ) for integration in unedited_direct_paging_integrations + ], + batch_size=5000, + ) + logger.info(f"Created {len(created_objs)} new non-default routes for Direct Paging Integrations") + + logger.info("Migration for direct paging integration routes completed.") + + +class Migration(migrations.Migration): + + dependencies = [ + ("alerts", "0071_migrate_labels"), + ] + + operations = [ + migrations.RunPython(upsert_direct_paging_integration_routes, migrations.RunPython.noop), + ] diff --git a/engine/apps/alerts/models/alert_receive_channel.py b/engine/apps/alerts/models/alert_receive_channel.py index 74fc5d23..a8089337 100644 --- a/engine/apps/alerts/models/alert_receive_channel.py +++ b/engine/apps/alerts/models/alert_receive_channel.py @@ -126,6 +126,8 @@ class AlertReceiveChannelManager(models.Manager): def create_missing_direct_paging_integrations(organization: "Organization") -> None: from apps.alerts.models import ChannelFilter + logger.info(f"Starting create_missing_direct_paging_integrations for organization: {organization.id}") + # fetch teams without direct paging integration teams_missing_direct_paging = list( organization.teams.exclude( @@ -134,10 +136,17 @@ class AlertReceiveChannelManager(models.Manager): ).values_list("team_id", flat=True) ) ) + number_of_teams_missing_direct_paging = len(teams_missing_direct_paging) + logger.info( + f"Found {number_of_teams_missing_direct_paging} teams missing direct paging integrations.", + ) + if not teams_missing_direct_paging: + logger.info("No missing direct paging integrations found. Exiting.") return # create missing integrations + logger.info(f"Creating missing direct paging integrations for {number_of_teams_missing_direct_paging} teams.") AlertReceiveChannel.objects.bulk_create( [ AlertReceiveChannel( @@ -151,29 +160,49 @@ class AlertReceiveChannelManager(models.Manager): batch_size=5000, ignore_conflicts=True, # ignore if direct paging integration already exists for team ) + logger.info("Missing direct paging integrations creation step completed.") # fetch integrations for teams (some of them are created above, but some may already exist previously) alert_receive_channels = organization.alert_receive_channels.filter( team__in=teams_missing_direct_paging, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING ) + logger.info(f"Fetched {alert_receive_channels.count()} direct paging integrations for the specified teams.") - # create default routes + # we create two routes for each Direct Paging Integration + # 1. route for important alerts (using the payload.oncall.important alert field value) - non-default + # 2. route for all other alerts - default + routes_to_create = [] + for alert_receive_channel in alert_receive_channels: + routes_to_create.extend( + [ + ChannelFilter( + alert_receive_channel=alert_receive_channel, + filtering_term="{{ payload.oncall.important }}", + filtering_term_type=ChannelFilter.FILTERING_TERM_TYPE_JINJA2, + is_default=False, + order=0, + ), + ChannelFilter( + alert_receive_channel=alert_receive_channel, + filtering_term=None, + is_default=True, + order=1, + ), + ] + ) + + logger.info(f"Creating {len(routes_to_create)} channel filter routes.") ChannelFilter.objects.bulk_create( - [ - ChannelFilter( - alert_receive_channel=alert_receive_channel, - filtering_term=None, - is_default=True, - order=0, - ) - for alert_receive_channel in alert_receive_channels - ], + routes_to_create, batch_size=5000, - ignore_conflicts=True, # ignore if default route already exists for integration + ignore_conflicts=True, # ignore if routes already exist for integration ) + logger.info("Direct paging routes creation completed.") # add integrations to metrics cache + logger.info("Adding integrations to metrics cache.") metrics_add_integrations_to_cache(list(alert_receive_channels), organization) + logger.info("Integrations have been added to the metrics cache.") def get_queryset(self): return AlertReceiveChannelQueryset(self.model, using=self._db).filter( diff --git a/engine/apps/alerts/paging.py b/engine/apps/alerts/paging.py index 5121d017..867ac18c 100644 --- a/engine/apps/alerts/paging.py +++ b/engine/apps/alerts/paging.py @@ -48,6 +48,7 @@ class DirectPagingAlertPayload(typing.TypedDict): def _trigger_alert( organization: Organization, team: Team | None, + important_team_escalation: bool, message: str, title: str, permalink: str | None, @@ -82,6 +83,13 @@ def _trigger_alert( "uid": str(uuid4()), # avoid grouping "author_username": from_user.username, "permalink": permalink, + # NOTE: this field is mostly being added for purposes of escalating to a team + # this field is provided via the web UI/API/slack as a checkbox, indicating that the user doing the paging + # would like to send an "important" page to the team. + # + # Teams can configure routing in their Direct Paging Integration to route based on this field to different + # escalation chains + "important": important_team_escalation, }, } @@ -128,6 +136,7 @@ def direct_paging( source_url: str | None = None, grafana_incident_id: str | None = None, team: Team | None = None, + important_team_escalation: bool = False, users: UserNotifications | None = None, alert_group: AlertGroup | None = None, ) -> AlertGroup | None: @@ -156,7 +165,16 @@ def direct_paging( # create alert group if needed with transaction.atomic(): if alert_group is None: - alert_group = _trigger_alert(organization, team, message, title, source_url, grafana_incident_id, from_user) + alert_group = _trigger_alert( + organization, + team, + important_team_escalation, + message, + title, + source_url, + grafana_incident_id, + from_user, + ) for u, important in users: alert_group.log_records.create( diff --git a/engine/apps/alerts/tests/test_alert_receiver_channel.py b/engine/apps/alerts/tests/test_alert_receiver_channel.py index 93023982..d1f6dc39 100644 --- a/engine/apps/alerts/tests/test_alert_receiver_channel.py +++ b/engine/apps/alerts/tests/test_alert_receiver_channel.py @@ -259,27 +259,47 @@ def test_create_missing_direct_paging_integrations( ): organization = make_organization() - # team with no direct paging integration + # two teams with no direct paging integration team1 = make_team(organization) + team2 = make_team(organization) # team with direct paging integration - team2 = make_team(organization) + team3 = make_team(organization) alert_receive_channel = make_alert_receive_channel( - organization, team=team2, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING + organization, team=team3, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING ) make_channel_filter(alert_receive_channel, is_default=True, order=0) # create missing direct paging integration for organization AlertReceiveChannel.objects.create_missing_direct_paging_integrations(organization) + assert organization.alert_receive_channels.count() == 3 + # check that missing integrations and default routes were created - assert organization.alert_receive_channels.count() == 2 - mock_metrics_add_integrations_to_cache.assert_called_once() + # + # NOTE: we explicitly don't test team3, it already has a Direct Paging integraiton associated with it + # and AlertReceiveChannel.objects.create_missing_direct_paging_integrations is not responsible for filling + # in missing routes. + # + # See apps/alerts/migrations/0072_upsert_direct_paging_integration_routes.py which is a data migration that does + # exactly this. for team in [team1, team2]: - alert_receive_channel = organization.alert_receive_channels.get( - team=team, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING - ) - assert alert_receive_channel.channel_filters.get().is_default + alert_receive_channel = organization.alert_receive_channels.get(team=team) + + direct_paging_integration_routes = alert_receive_channel.channel_filters.all() + + assert direct_paging_integration_routes.count() == 2 + + for route in direct_paging_integration_routes: + if route.is_default: + assert route.order == 1 + assert route.filtering_term is None + else: + assert route.order == 0 + assert route.filtering_term == "{{ payload.oncall.important }}" + assert route.filtering_term_type == route.FILTERING_TERM_TYPE_JINJA2 + + mock_metrics_add_integrations_to_cache.assert_called_once() @pytest.mark.django_db diff --git a/engine/apps/alerts/tests/test_paging.py b/engine/apps/alerts/tests/test_paging.py index d6bad7a2..4b2e4069 100644 --- a/engine/apps/alerts/tests/test_paging.py +++ b/engine/apps/alerts/tests/test_paging.py @@ -1,4 +1,4 @@ -from unittest.mock import call, patch +from unittest.mock import ANY, call, patch import pytest from django.utils import timezone @@ -86,23 +86,46 @@ def test_direct_paging_user(make_organization, make_user_for_organization, djang assert_log_record(ag, f"{from_user.username} paged user {u.username}", expected_info=expected_info) +@pytest.mark.parametrize("important_team_escalation", [True, False]) @pytest.mark.django_db -def test_direct_paging_team(make_organization, make_team, make_user_for_organization): +def test_direct_paging_team(make_organization, make_team, make_user_for_organization, important_team_escalation): organization = make_organization() from_user = make_user_for_organization(organization) team = make_team(organization) + + from_author_username = from_user.username + source_url = "https://www.example.com" + title = f"{from_author_username} is paging {team.name} to join escalation" msg = "Fire" - direct_paging(organization, from_user, msg, team=team) + direct_paging( + organization, + from_user, + msg, + source_url=source_url, + team=team, + important_team_escalation=important_team_escalation, + ) # alert group created alert_groups = AlertGroup.objects.all() assert alert_groups.count() == 1 ag = alert_groups.get() alert = ag.alerts.get() - assert alert.title == f"{from_user.username} is paging {team.name} to join escalation" + assert alert.title == title assert alert.message == msg + assert alert.raw_request_data == { + "oncall": { + "title": title, + "message": msg, + "uid": ANY, + "author_username": from_author_username, + "permalink": source_url, + "important": important_team_escalation, + }, + } + assert ag.channel.verbal_name == f"Direct paging ({team.name} team)" assert ag.channel.team == team diff --git a/engine/apps/api/serializers/direct_paging.py b/engine/apps/api/serializers/direct_paging.py index 4cb1e646..002438dc 100644 --- a/engine/apps/api/serializers/direct_paging.py +++ b/engine/apps/api/serializers/direct_paging.py @@ -39,6 +39,7 @@ class BasePagingSerializer(serializers.Serializer): users = UserReferenceSerializer(many=True, required=False, default=list) team = TeamPrimaryKeyRelatedField(allow_null=True, default=CurrentTeamDefault()) + important_team_escalation = serializers.BooleanField(required=False, default=False) alert_group_id = serializers.CharField(required=False, default=None) alert_group = serializers.HiddenField(default=None) # set in DirectPagingSerializer.validate diff --git a/engine/apps/api/tests/test_direct_paging.py b/engine/apps/api/tests/test_direct_paging.py index be529065..6496d1ec 100644 --- a/engine/apps/api/tests/test_direct_paging.py +++ b/engine/apps/api/tests/test_direct_paging.py @@ -1,3 +1,5 @@ +from unittest.mock import ANY + import pytest from django.urls import reverse from rest_framework import status @@ -59,11 +61,13 @@ def test_direct_paging_new_alert_group( assert alert.message == message +@pytest.mark.parametrize("important_team_escalation", [True, False]) @pytest.mark.django_db def test_direct_paging_page_team( make_organization_and_user_with_plugin_token, make_team, make_user_auth_headers, + important_team_escalation, ): organization, user, token = make_organization_and_user_with_plugin_token(role=LegacyAccessControlRole.EDITOR) team = make_team(organization=organization) @@ -81,6 +85,7 @@ def test_direct_paging_page_team( "message": message, "source_url": source_url, "grafana_incident_id": grafana_incident_id, + "important_team_escalation": important_team_escalation, }, format="json", **make_user_auth_headers(user, token), @@ -92,7 +97,16 @@ def test_direct_paging_page_team( alert = alert_group.alerts.first() assert alert_group.grafana_incident_id == grafana_incident_id - assert alert.raw_request_data["oncall"]["permalink"] == source_url + assert alert.raw_request_data == { + "oncall": { + "title": ANY, + "message": message, + "uid": ANY, + "author_username": ANY, + "permalink": source_url, + "important": important_team_escalation, + }, + } @pytest.mark.django_db diff --git a/engine/apps/api/views/direct_paging.py b/engine/apps/api/views/direct_paging.py index b5d7a9eb..eb923cdf 100644 --- a/engine/apps/api/views/direct_paging.py +++ b/engine/apps/api/views/direct_paging.py @@ -40,6 +40,7 @@ class DirectPagingAPIView(APIView): source_url=validated_data["source_url"], grafana_incident_id=validated_data["grafana_incident_id"], team=validated_data["team"], + important_team_escalation=validated_data["important_team_escalation"], users=[(user["instance"], user["important"]) for user in validated_data["users"]], alert_group=validated_data["alert_group"], ) diff --git a/engine/apps/public_api/tests/test_escalation.py b/engine/apps/public_api/tests/test_escalation.py index f6a7665d..025c672b 100644 --- a/engine/apps/public_api/tests/test_escalation.py +++ b/engine/apps/public_api/tests/test_escalation.py @@ -89,11 +89,13 @@ def test_escalation_new_alert_group( assert alert.message == message +@pytest.mark.parametrize("important_team_escalation", [True, False]) @pytest.mark.django_db def test_escalation_team( make_organization_and_user_with_token, make_team, make_user_auth_headers, + important_team_escalation, ): organization, user, token = make_organization_and_user_with_token() team = make_team(organization=organization) @@ -110,6 +112,7 @@ def test_escalation_team( "team": team.public_primary_key, "message": message, "source_url": source_url, + "important_team_escalation": important_team_escalation, }, format="json", **make_user_auth_headers(user, token), @@ -120,7 +123,16 @@ def test_escalation_team( alert_group = AlertGroup.objects.get(public_primary_key=response.json()["id"]) alert = alert_group.alerts.first() - assert alert.raw_request_data["oncall"]["permalink"] == source_url + assert alert.raw_request_data == { + "oncall": { + "title": mock.ANY, + "message": message, + "uid": mock.ANY, + "author_username": mock.ANY, + "permalink": source_url, + "important": important_team_escalation, + }, + } @pytest.mark.django_db diff --git a/engine/apps/public_api/views/escalation.py b/engine/apps/public_api/views/escalation.py index be545926..2d38dc0a 100644 --- a/engine/apps/public_api/views/escalation.py +++ b/engine/apps/public_api/views/escalation.py @@ -41,6 +41,7 @@ class EscalationView(APIView): title=validated_data["title"], source_url=validated_data["source_url"], team=validated_data["team"], + important_team_escalation=validated_data["important_team_escalation"], users=[(user["instance"], user["important"]) for user in validated_data["users"]], alert_group=validated_data["alert_group"], ) diff --git a/engine/apps/slack/scenarios/paging.py b/engine/apps/slack/scenarios/paging.py index f1981fc5..7d18cee8 100644 --- a/engine/apps/slack/scenarios/paging.py +++ b/engine/apps/slack/scenarios/paging.py @@ -37,12 +37,14 @@ if typing.TYPE_CHECKING: from apps.slack.models import SlackTeamIdentity, SlackUserIdentity from apps.user_management.models import Organization, Team, User - DIRECT_PAGING_TEAM_SELECT_ID = "paging_team_select" +DIRECT_PAGING_TEAM_SEVERITY_CHECKBOXES_ID = "paging_team_severity_checkboxes" DIRECT_PAGING_ORG_SELECT_ID = "paging_org_select" DIRECT_PAGING_USER_SELECT_ID = "paging_user_select" DIRECT_PAGING_MESSAGE_INPUT_ID = "paging_message_input" +DIRECT_PAGING_TEAM_SEVERITY_CHECKBOX_VALUE = "important" + DEFAULT_TEAM_VALUE = "default_team" @@ -248,6 +250,7 @@ class FinishDirectPaging(scenario_step.ScenarioStep): from_user=user, message=message, team=selected_team, + important_team_escalation=_get_team_escalation_severity_from_payload(payload, input_id_prefix), users=selected_users, ) except DirectPagingUserTeamValidationError: @@ -331,6 +334,14 @@ class OnPagingTeamChange(scenario_step.ScenarioStep): ) +class OnPagingTeamSeverityCheckboxChange(OnPagingTeamChange): + """ + Specify alert severity when escalating to a team. + + NOTE: we simply reuse `OnPagingTeamChange` step, since the behavior is the same. + """ + + class OnPagingUserChange(scenario_step.ScenarioStep): """Add selected to user to the list. @@ -491,6 +502,7 @@ def render_dialog( new_private_metadata["input_id_prefix"] = new_input_id_prefix selected_organization = predefined_org if predefined_org else available_organizations.first() is_team_selected, selected_team = False, None + is_team_escalation_important = False else: # setup form using data/state old_input_id_prefix, new_input_id_prefix, new_private_metadata = _get_and_change_input_id_prefix_from_metadata( @@ -502,6 +514,7 @@ def render_dialog( else _get_selected_org_from_payload(payload, old_input_id_prefix, slack_team_identity, slack_user_identity) ) is_team_selected, selected_team = _get_selected_team_from_payload(payload, old_input_id_prefix) + is_team_escalation_important = _get_team_escalation_severity_from_payload(payload, old_input_id_prefix) blocks: Block.AnyBlocks = [] @@ -523,9 +536,14 @@ def render_dialog( ) blocks.append(organization_select) - # Add team select and additional responders blocks + # Add team select/severity and additional responders blocks blocks += _get_team_select_blocks( - slack_user_identity, selected_organization, is_team_selected, selected_team, new_input_id_prefix + slack_user_identity, + selected_organization, + is_team_selected, + selected_team, + is_team_escalation_important, + new_input_id_prefix, ) blocks += _get_user_select_blocks(payload, selected_organization, new_input_id_prefix, error_msg) @@ -629,6 +647,25 @@ def _get_select_field_value(payload: EventPayload, prefix_id: str, routing_uid: return json.loads(field["value"])["id"] if field else None +def _get_first_selected_checkbox_option_value( + payload: EventPayload, + prefix_id: str, + routing_uid: str, + field_id: str, +) -> str | None: + """ + NOTE: if reusing this for other logic outside of the team severity checkboxes, note that this function + will only return the value of the first checkbox option... + """ + try: + selected_options = payload["view"]["state"]["values"][prefix_id + field_id][routing_uid]["selected_options"] + if not selected_options: + return None + return selected_options[0]["value"] + except KeyError: + return None + + def _get_selected_org_from_payload( payload: EventPayload, input_id_prefix: str, @@ -676,6 +713,7 @@ def _get_team_select_blocks( organization: "Organization", is_selected: bool, value: typing.Optional["Team"], + is_team_escalation_important: bool, input_id_prefix: str, ) -> Block.AnyBlocks: blocks: Block.AnyBlocks = [] @@ -702,7 +740,7 @@ def _get_team_select_blocks( if not teams: direct_paging_info_msg["elements"][0][ "text" - ] += ". There are currently no teams which have a Direct Paging integration that is configured." + ] += ".\n\nThere are currently no teams which have a Direct Paging integration that is configured." blocks.append(direct_paging_info_msg) return blocks @@ -769,6 +807,62 @@ def _get_team_select_blocks( } ) + team_severity_important_checkbox_option: CompositionObjectOption = { + "text": { + "type": "mrkdwn", + "text": "Important escalation", + }, + "value": DIRECT_PAGING_TEAM_SEVERITY_CHECKBOX_VALUE, + } + + team_severity_checkboxes_element: Block.Section = { + "type": "section", + "block_id": input_id_prefix + DIRECT_PAGING_TEAM_SEVERITY_CHECKBOXES_ID, + "text": { + "type": "plain_text", + # NOTE: this is a bit of a hack. Slack requires us to specify this text object, and it cannot be empty + # hence the empty space. We do this so that we can render the text instead in a context block below + # (which allows us to render it in a slightly smaller font size) + # https://api.slack.com/reference/block-kit/blocks#section + "text": " ", + }, + "accessory": { + "type": "checkboxes", + "options": [team_severity_important_checkbox_option], + "action_id": OnPagingTeamSeverityCheckboxChange.routing_uid(), + }, + } + + if is_team_escalation_important: + # From the docs https://api.slack.com/reference/block-kit/block-elements#checkboxes__fields + # An array of option objects that EXACTLY matches one or more of the options within options + team_severity_checkboxes_element["accessory"]["initial_options"] = [team_severity_important_checkbox_option] + + blocks.extend( + [ + team_severity_checkboxes_element, + typing.cast( + Block.Context, + { + # NOTE: we add this here instead of as a checkbox option description because those can only + # be defined as plain text (ie. not markdown where links are supported) + "type": "context", + "elements": [ + { + "type": "mrkdwn", + "text": ( + "Check the above box if you would like to escalate to this team as an 'important' " + "escalation. Teams can configure their Direct Paging Integration to route to different " + "escalation chains based on this. " + "" + ), + }, + ], + }, + ), + ] + ) + return blocks @@ -951,6 +1045,16 @@ def _get_selected_team_from_payload( return selected_team_id, Team.objects.filter(pk=selected_team_id).first() +def _get_team_escalation_severity_from_payload(payload: EventPayload, input_id_prefix: str) -> bool: + checkbox_value = _get_first_selected_checkbox_option_value( + payload, + input_id_prefix, + OnPagingTeamSeverityCheckboxChange.routing_uid(), + DIRECT_PAGING_TEAM_SEVERITY_CHECKBOXES_ID, + ) + return checkbox_value == DIRECT_PAGING_TEAM_SEVERITY_CHECKBOX_VALUE + + def _get_selected_user_from_payload(payload: EventPayload, input_id_prefix: str) -> typing.Optional["User"]: from apps.user_management.models import User @@ -1035,6 +1139,12 @@ STEPS_ROUTING: ScenarioRoute.RoutingSteps = [ "block_action_id": OnPagingTeamChange.routing_uid(), "step": OnPagingTeamChange, }, + { + "payload_type": PayloadType.BLOCK_ACTIONS, + "block_action_type": BlockActionType.CHECKBOXES, + "block_action_id": OnPagingTeamSeverityCheckboxChange.routing_uid(), + "step": OnPagingTeamSeverityCheckboxChange, + }, { "payload_type": PayloadType.BLOCK_ACTIONS, "block_action_type": BlockActionType.STATIC_SELECT, diff --git a/engine/apps/slack/tests/scenario_steps/test_paging.py b/engine/apps/slack/tests/scenario_steps/test_paging.py index 46c32f3c..e0800ee2 100644 --- a/engine/apps/slack/tests/scenario_steps/test_paging.py +++ b/engine/apps/slack/tests/scenario_steps/test_paging.py @@ -12,12 +12,14 @@ from apps.slack.scenarios.paging import ( DIRECT_PAGING_MESSAGE_INPUT_ID, DIRECT_PAGING_ORG_SELECT_ID, DIRECT_PAGING_TEAM_SELECT_ID, + DIRECT_PAGING_TEAM_SEVERITY_CHECKBOXES_ID, DIRECT_PAGING_USER_SELECT_ID, DataKey, FinishDirectPaging, OnPagingItemActionChange, OnPagingOrgChange, OnPagingTeamChange, + OnPagingTeamSeverityCheckboxChange, OnPagingUserChange, Policy, StartDirectPaging, @@ -28,7 +30,13 @@ from apps.user_management.models import Organization def make_paging_view_slack_payload( - selected_org=None, predefined_org=None, team=None, user=None, current_users=None, actions=None + selected_org=None, + predefined_org=None, + team=None, + important_team_escalation=False, + user=None, + current_users=None, + actions=None, ): """ Helper function to create a payload for paging view. @@ -66,6 +74,15 @@ def make_paging_view_slack_payload( } } }, + DIRECT_PAGING_TEAM_SEVERITY_CHECKBOXES_ID: { + OnPagingTeamSeverityCheckboxChange.routing_uid(): { + "selected_options": [ + {"value": "important"}, + ] + if important_team_escalation + else [] + }, + }, DIRECT_PAGING_TEAM_SELECT_ID: { OnPagingTeamChange.routing_uid(): { "selected_option": {"value": make_value({"id": team.pk if team else None}, organization)} @@ -141,6 +158,7 @@ def test_page_team_with_predefined_org(make_organization_and_user_with_slack_ide from_user=user, message="The Message", team=team, + important_team_escalation=False, users=[], ) @@ -385,15 +403,21 @@ def test_trigger_paging_additional_responders(make_organization_and_user_with_sl from_user=user, message="The Message", team=team, + important_team_escalation=False, users=[(user, True)], ) +@pytest.mark.parametrize("important_team_escalation", [True, False]) @pytest.mark.django_db -def test_page_team(make_organization_and_user_with_slack_identities, make_team): +def test_page_team(make_organization_and_user_with_slack_identities, make_team, important_team_escalation): organization, user, slack_team_identity, slack_user_identity = make_organization_and_user_with_slack_identities() team = make_team(organization) - payload = make_paging_view_slack_payload(selected_org=organization, team=team) + payload = make_paging_view_slack_payload( + selected_org=organization, + team=team, + important_team_escalation=important_team_escalation, + ) step = FinishDirectPaging(slack_team_identity) with patch("apps.slack.scenarios.paging.direct_paging") as mock_direct_paging: @@ -405,6 +429,7 @@ def test_page_team(make_organization_and_user_with_slack_identities, make_team): from_user=user, message="The Message", team=team, + important_team_escalation=important_team_escalation, users=[], ) @@ -421,6 +446,7 @@ def test_get_organization_select(make_organization): assert select["element"]["options"][0]["text"]["text"] == "Organization (stack_slug)" +@pytest.mark.parametrize("is_team_escalation_important", [True, False]) @pytest.mark.django_db def test_get_team_select_blocks( make_organization_and_user_with_slack_identities, @@ -428,6 +454,7 @@ def test_get_team_select_blocks( make_alert_receive_channel, make_escalation_chain, make_channel_filter, + is_team_escalation_important, ): info_msg = ( "*Note*: You can only page teams which have a Direct Paging integration that is configured. " @@ -444,7 +471,14 @@ def test_get_team_select_blocks( # no team selected - no team direct paging integrations available organization, _, _, slack_user_identity = make_organization_and_user_with_slack_identities() - blocks = _get_team_select_blocks(slack_user_identity, organization, False, None, input_id_prefix) + blocks = _get_team_select_blocks( + slack_user_identity, + organization, + False, + None, + is_team_escalation_important, + input_id_prefix, + ) assert len(blocks) == 1 @@ -452,7 +486,7 @@ def test_get_team_select_blocks( assert context_block["type"] == "context" assert ( context_block["elements"][0]["text"] - == info_msg + ". There are currently no teams which have a Direct Paging integration that is configured." + == info_msg + ".\n\nThere are currently no teams which have a Direct Paging integration that is configured." ) # no team selected - 1 team direct paging integration available @@ -462,7 +496,14 @@ def test_get_team_select_blocks( escalation_chain = make_escalation_chain(organization) make_channel_filter(arc, is_default=True, escalation_chain=escalation_chain) - blocks = _get_team_select_blocks(slack_user_identity, organization, False, None, input_id_prefix) + blocks = _get_team_select_blocks( + slack_user_identity, + organization, + False, + None, + is_team_escalation_important, + input_id_prefix, + ) assert len(blocks) == 2 input_block, context_block = blocks @@ -472,7 +513,7 @@ def test_get_team_select_blocks( assert input_block["element"]["options"] == [_contstruct_team_option(team)] assert context_block["elements"][0]["text"] == info_msg - # team selected + # team selected - team severity checkbox should also now appear organization, _, _, slack_user_identity = make_organization_and_user_with_slack_identities() team1 = make_team(organization) team2 = make_team(organization) @@ -488,10 +529,25 @@ def test_get_team_select_blocks( _setup_direct_paging_integration(team1) team2_direct_paging_arc = _setup_direct_paging_integration(team2) - blocks = _get_team_select_blocks(slack_user_identity, organization, True, team2, input_id_prefix) + blocks = _get_team_select_blocks( + slack_user_identity, + organization, + True, + team2, + is_team_escalation_important, + input_id_prefix, + ) - assert len(blocks) == 2 - input_block, context_block = blocks + assert len(blocks) == 4 + input_block, context_block, team_severity_checkboxes, team_severity_context_block = blocks + + team_severity_important_checkbox_option = { + "text": { + "type": "mrkdwn", + "text": "Important escalation", + }, + "value": "important", + } team1_option = _contstruct_team_option(team1) team2_option = _contstruct_team_option(team2) @@ -509,6 +565,20 @@ def test_get_team_select_blocks( == f"Integration <{team2_direct_paging_arc.web_link}|{team2_direct_paging_arc.verbal_name}> will be used for notification." ) + assert team_severity_context_block["elements"][0]["text"] == ( + "Check the above box if you would like to escalate to this team as an 'important' " + "escalation. Teams can configure their Direct Paging Integration to route to different " + "escalation chains based on this. " + "" + ) + assert team_severity_checkboxes["accessory"]["type"] == "checkboxes" + assert team_severity_checkboxes["accessory"]["options"] == [team_severity_important_checkbox_option] + + if is_team_escalation_important: + assert team_severity_checkboxes["accessory"]["initial_options"] == [team_severity_important_checkbox_option] + else: + assert "initial_options" not in team_severity_checkboxes["accessory"] + # team's direct paging integration has two routes associated with it # the team should only be displayed once organization, _, _, slack_user_identity = make_organization_and_user_with_slack_identities() @@ -519,7 +589,14 @@ def test_get_team_select_blocks( make_channel_filter(arc, is_default=True, escalation_chain=escalation_chain) make_channel_filter(arc, escalation_chain=escalation_chain) - blocks = _get_team_select_blocks(slack_user_identity, organization, False, None, input_id_prefix) + blocks = _get_team_select_blocks( + slack_user_identity, + organization, + False, + None, + is_team_escalation_important, + input_id_prefix, + ) assert len(blocks) == 2 input_block, context_block = blocks diff --git a/engine/apps/user_management/tests/test_sync.py b/engine/apps/user_management/tests/test_sync.py index e381c62a..3f5bdd9a 100644 --- a/engine/apps/user_management/tests/test_sync.py +++ b/engine/apps/user_management/tests/test_sync.py @@ -203,23 +203,33 @@ def test_sync_teams_for_organization(make_organization, make_team, make_alert_re assert created_team.team_id == api_teams[2]["id"] assert created_team.name == api_teams[2]["name"] + def _assert_teams_direct_paging_integration_is_configured_properly(integration): + assert integration.channel_filters.count() == 2 + + for route in integration.channel_filters.all(): + if route.is_default: + assert route.order == 1 + assert route.filtering_term is None + else: + assert route.order == 0 + assert route.filtering_term == "{{ payload.oncall.important }}" + assert route.filtering_term_type == route.FILTERING_TERM_TYPE_JINJA2 + # check that direct paging is created for created team direct_paging_integration = AlertReceiveChannel.objects.get( organization=organization, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING, team=created_team, ) - assert direct_paging_integration.channel_filters.count() == 1 - assert direct_paging_integration.channel_filters.first().order == 0 - assert direct_paging_integration.channel_filters.first().is_default + _assert_teams_direct_paging_integration_is_configured_properly(direct_paging_integration) # check that direct paging is created for existing team direct_paging_integration = AlertReceiveChannel.objects.get( - organization=organization, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING, team=teams[2] + organization=organization, + integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING, + team=teams[2], ) - assert direct_paging_integration.channel_filters.count() == 1 - assert direct_paging_integration.channel_filters.first().order == 0 - assert direct_paging_integration.channel_filters.first().is_default + _assert_teams_direct_paging_integration_is_configured_properly(direct_paging_integration) @pytest.mark.django_db