diff --git a/engine/apps/alerts/tasks/notify_user.py b/engine/apps/alerts/tasks/notify_user.py index b3377f9e..cdfd35d6 100644 --- a/engine/apps/alerts/tasks/notify_user.py +++ b/engine/apps/alerts/tasks/notify_user.py @@ -1,4 +1,5 @@ import time +import typing from functools import partial from celery.exceptions import Retry @@ -42,7 +43,6 @@ def notify_user_task( countdown = 0 stop_escalation = False - log_message = "" log_record = None with transaction.atomic(): @@ -121,20 +121,23 @@ def notify_user_task( reason = None def _create_user_notification_policy_log_record(**kwargs): - if using_fallback_default_notification_policy_step and "notification_policy" in kwargs: - kwargs["notification_policy"] = None - return UserNotificationPolicyLogRecord(**kwargs) + return UserNotificationPolicyLogRecord( + **kwargs, + using_fallback_default_notification_policy_step=using_fallback_default_notification_policy_step, + ) - if notification_policy is None: - stop_escalation = True - log_record = _create_user_notification_policy_log_record( + def _create_notification_finished_user_notification_policy_log_record(): + return _create_user_notification_policy_log_record( author=user, type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FINISHED, notification_policy=notification_policy, alert_group=alert_group, slack_prevent_posting=prevent_posting_to_thread, ) - log_message += "Personal escalation exceeded" + + if notification_policy is None: + stop_escalation = True + log_record = _create_notification_finished_user_notification_policy_log_record() else: if ( (alert_group.acknowledged and not notify_even_acknowledged) @@ -192,6 +195,7 @@ def notify_user_task( notification_step=notification_policy.step, notification_channel=notification_policy.notify_by, ) + if log_record: # log_record is None if user notification policy step is unspecified # if this is the first notification step, and user hasn't been notified for this alert group - update metric if ( @@ -208,25 +212,43 @@ def notify_user_task( if notify_user_task.request.retries == 0: transaction.on_commit(partial(send_user_notification_signal.apply_async, (log_record.pk,))) - if not stop_escalation: + def _create_perform_notification_task(log_record_pk, alert_group_pk, use_default_notification_policy_fallback): + task = perform_notification.apply_async((log_record_pk, use_default_notification_policy_fallback)) + task_logger.info( + f"Created perform_notification task {task} log_record={log_record_pk} " f"alert_group={alert_group_pk}" + ) + + def _update_user_has_notification_active_notification_policy_id(active_policy_id: typing.Optional[str]) -> None: + user_has_notification.active_notification_policy_id = active_policy_id + user_has_notification.save(update_fields=["active_notification_policy_id"]) + + def _reset_user_has_notification_active_notification_policy_id() -> None: + _update_user_has_notification_active_notification_policy_id(None) + + create_perform_notification_task = partial( + _create_perform_notification_task, + log_record.pk, + alert_group_pk, + using_fallback_default_notification_policy_step, + ) + + if using_fallback_default_notification_policy_step: + # if we are using default notification policy, we're done escalating.. there's no further notification + # policy steps in this case. Kick off the perform_notification task, create the + # TYPE_PERSONAL_NOTIFICATION_FINISHED log record, and reset the active_notification_policy_id + transaction.on_commit(create_perform_notification_task) + _create_notification_finished_user_notification_policy_log_record() + _reset_user_has_notification_active_notification_policy_id() + elif not stop_escalation: if notification_policy.step != UserNotificationPolicy.Step.WAIT: - - def _create_perform_notification_task(log_record_pk, alert_group_pk): - task = perform_notification.apply_async((log_record_pk,)) - task_logger.info( - f"Created perform_notification task {task} log_record={log_record_pk} " - f"alert_group={alert_group_pk}" - ) - - transaction.on_commit(partial(_create_perform_notification_task, log_record.pk, alert_group_pk)) + transaction.on_commit(create_perform_notification_task) delay = NEXT_ESCALATION_DELAY if countdown is not None: delay += countdown task_id = celery_uuid() - user_has_notification.active_notification_policy_id = task_id - user_has_notification.save(update_fields=["active_notification_policy_id"]) + _update_user_has_notification_active_notification_policy_id(task_id) transaction.on_commit( partial( @@ -241,10 +263,8 @@ def notify_user_task( task_id=task_id, ) ) - else: - user_has_notification.active_notification_policy_id = None - user_has_notification.save(update_fields=["active_notification_policy_id"]) + _reset_user_has_notification_active_notification_policy_id() @shared_dedicated_queue_retry_task( @@ -253,7 +273,7 @@ def notify_user_task( dont_autoretry_for=(Retry,), max_retries=1 if settings.DEBUG else None, ) -def perform_notification(log_record_pk): +def perform_notification(log_record_pk, use_default_notification_policy_fallback): from apps.base.models import UserNotificationPolicy, UserNotificationPolicyLogRecord from apps.telegram.models import TelegramToUserConnector @@ -272,8 +292,13 @@ def perform_notification(log_record_pk): user = log_record.author alert_group = log_record.alert_group - notification_policy = log_record.notification_policy + notification_policy = ( + UserNotificationPolicy.get_default_fallback_policy(user) + if use_default_notification_policy_fallback + else log_record.notification_policy + ) notification_channel = notification_policy.notify_by if notification_policy else None + if user is None or notification_policy is None: UserNotificationPolicyLogRecord( author=user, @@ -310,7 +335,9 @@ def perform_notification(log_record_pk): TelegramToUserConnector.notify_user(user, alert_group, notification_policy) except RetryAfter as e: countdown = getattr(e, "retry_after", 3) - raise perform_notification.retry((log_record_pk,), countdown=countdown, exc=e) + raise perform_notification.retry( + (log_record_pk, use_default_notification_policy_fallback), countdown=countdown, exc=e + ) elif notification_channel == UserNotificationPolicy.NotificationChannel.SLACK: # TODO: refactor checking the possibility of sending a notification in slack @@ -390,7 +417,9 @@ def perform_notification(log_record_pk): f"does not exist. Restarting perform_notification." ) restart_delay_seconds = 60 - perform_notification.apply_async((log_record_pk,), countdown=restart_delay_seconds) + perform_notification.apply_async( + (log_record_pk, use_default_notification_policy_fallback), countdown=restart_delay_seconds + ) else: task_logger.debug( f"send_slack_notification for alert_group {alert_group.pk} failed because slack message " diff --git a/engine/apps/alerts/tests/test_notify_user.py b/engine/apps/alerts/tests/test_notify_user.py index e0c32d2c..7dd2b812 100644 --- a/engine/apps/alerts/tests/test_notify_user.py +++ b/engine/apps/alerts/tests/test_notify_user.py @@ -40,7 +40,7 @@ def test_custom_backend_call( ) with patch("apps.base.tests.messaging_backend.TestOnlyBackend.notify_user") as mock_notify_user: - perform_notification(log_record.pk) + perform_notification(log_record.pk, False) mock_notify_user.assert_called_once_with(user_1, alert_group, user_notification_policy) @@ -72,7 +72,7 @@ def test_custom_backend_error( with patch("apps.alerts.tasks.notify_user.get_messaging_backend_from_id") as mock_get_backend: mock_get_backend.return_value = None - perform_notification(log_record.pk) + perform_notification(log_record.pk, False) error_log_record = UserNotificationPolicyLogRecord.objects.last() assert error_log_record.type == UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED @@ -119,7 +119,7 @@ def test_notify_user_missing_data_errors( with patch("apps.alerts.tasks.notify_user.get_messaging_backend_from_id") as mock_get_backend: mock_get_backend.return_value = None - perform_notification(log_record.pk) + perform_notification(log_record.pk, False) error_log_record = UserNotificationPolicyLogRecord.objects.last() assert error_log_record.type == UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED @@ -154,7 +154,7 @@ def test_notify_user_perform_notification_error_if_viewer( type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED, ) - perform_notification(log_record.pk) + perform_notification(log_record.pk, False) error_log_record = UserNotificationPolicyLogRecord.objects.last() assert error_log_record.type == UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED @@ -230,7 +230,7 @@ def test_perform_notification_reason_to_skip_escalation_in_slack( if not error_code: make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id") with patch.object(SlackMessage, "send_slack_notification") as mocked_send_slack_notification: - perform_notification(log_record.pk) + perform_notification(log_record.pk, False) last_log_record = UserNotificationPolicyLogRecord.objects.last() if error_code: @@ -277,7 +277,7 @@ def test_perform_notification_slack_prevent_posting( make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id") with patch.object(SlackMessage, "send_slack_notification") as mocked_send_slack_notification: - perform_notification(log_record.pk) + perform_notification(log_record.pk, False) mocked_send_slack_notification.assert_not_called() last_log_record = UserNotificationPolicyLogRecord.objects.last() @@ -292,7 +292,7 @@ def test_perform_notification_slack_prevent_posting( @pytest.mark.django_db def test_perform_notification_missing_user_notification_policy_log_record(caplog): invalid_pk = 12345 - perform_notification(invalid_pk) + perform_notification(invalid_pk, False) assert ( f"perform_notification: log_record {invalid_pk} doesn't exist. Skipping remainder of task. " @@ -327,7 +327,45 @@ def test_perform_notification_telegram_retryafter_error( exc = RetryAfter(countdown) with patch.object(TelegramToUserConnector, "notify_user", side_effect=exc) as mock_notify_user: with pytest.raises(RetryAfter): - perform_notification(log_record.pk) + perform_notification(log_record.pk, False) mock_notify_user.assert_called_once_with(user, alert_group, user_notification_policy) assert alert_group.personal_log_records.last() == log_record + + +@patch("apps.base.models.UserNotificationPolicy.get_default_fallback_policy") +@patch("apps.base.tests.messaging_backend.TestOnlyBackend.notify_user") +@pytest.mark.django_db +def test_perform_notification_use_default_notification_policy_fallback( + mock_notify_user, + mock_get_default_fallback_policy, + make_organization, + make_user, + make_alert_receive_channel, + make_alert_group, + make_user_notification_policy_log_record, +): + organization = make_organization() + user = make_user(organization=organization) + fallback_notification_policy = UserNotificationPolicy( + user=user, + step=UserNotificationPolicy.Step.NOTIFY, + notify_by=UserNotificationPolicy.NotificationChannel.TESTONLY, + important=False, + order=0, + ) + + mock_get_default_fallback_policy.return_value = fallback_notification_policy + + alert_receive_channel = make_alert_receive_channel(organization=organization) + alert_group = make_alert_group(alert_receive_channel=alert_receive_channel) + log_record = make_user_notification_policy_log_record( + author=user, + alert_group=alert_group, + notification_policy=None, + type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED, + ) + + perform_notification(log_record.pk, True) + + mock_notify_user.assert_called_once_with(user, alert_group, fallback_notification_policy) diff --git a/engine/apps/base/models/user_notification_policy.py b/engine/apps/base/models/user_notification_policy.py index 1593b3ca..40855dcc 100644 --- a/engine/apps/base/models/user_notification_policy.py +++ b/engine/apps/base/models/user_notification_policy.py @@ -1,4 +1,5 @@ import datetime +import typing from enum import unique from typing import Tuple @@ -13,6 +14,11 @@ from apps.user_management.models import User from common.ordered_model.ordered_model import OrderedModel from common.public_primary_keys import generate_public_primary_key, increase_public_primary_key_length +if typing.TYPE_CHECKING: + from django.db.models.manager import RelatedManager + + from apps.base.models import UserNotificationPolicyLogRecord + def generate_public_primary_key_for_notification_policy(): prefix = "N" @@ -66,6 +72,9 @@ def validate_channel_choice(value): class UserNotificationPolicy(OrderedModel): + personal_log_records: "RelatedManager['UserNotificationPolicyLogRecord']" + user: typing.Optional[User] + order_with_respect_to = ("user_id", "important") public_primary_key = models.CharField( @@ -129,6 +138,19 @@ class UserNotificationPolicy(OrderedModel): return default, important + @staticmethod + def get_default_fallback_policy(user: User) -> "UserNotificationPolicy": + return UserNotificationPolicy( + user=user, + step=UserNotificationPolicy.Step.NOTIFY, + notify_by=settings.EMAIL_BACKEND_INTERNAL_ID, + # The important flag doesn't really matter here.. since we're just using this as a transient/fallacbk + # in-memory object (important is really only used for allowing users to group their + # notification policy steps) + important=False, + order=0, + ) + @property def short_verbal(self) -> str: if self.step == UserNotificationPolicy.Step.NOTIFY: diff --git a/engine/apps/base/models/user_notification_policy_log_record.py b/engine/apps/base/models/user_notification_policy_log_record.py index f219857c..2d2f8a2a 100644 --- a/engine/apps/base/models/user_notification_policy_log_record.py +++ b/engine/apps/base/models/user_notification_policy_log_record.py @@ -1,4 +1,5 @@ import logging +import typing import humanize from django.db import models @@ -15,11 +16,45 @@ from apps.base.models.user_notification_policy import validate_channel_choice from apps.slack.slack_formatter import SlackFormatter from common.utils import clean_markup +if typing.TYPE_CHECKING: + from apps.alerts.models import AlertGroup + from apps.user_management.models import User + logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) +def _check_if_notification_policy_is_transient_fallback(kwargs): + """ + If `using_fallback_default_notification_policy_step` is present, and `True`, then the `notification_policy` + field should be set to `None`. This is because we do not persist default notification policies in the + database. It only exists as a transient/in-memory object, and therefore has no foreign key to reference. + """ + using_fallback_default_notification_policy_step = kwargs.pop( + "using_fallback_default_notification_policy_step", False + ) + + if using_fallback_default_notification_policy_step: + kwargs.pop("notification_policy", None) + + +class UserNotificationPolicyLogRecordQuerySet(models.QuerySet): + def create(self, **kwargs): + """ + Needed for when we do something like this: + notification_policy = UserNotificationPolicy.objects.create(arg1="foo", ...) + """ + _check_if_notification_policy_is_transient_fallback(kwargs) + return super().create(**kwargs) + + class UserNotificationPolicyLogRecord(models.Model): + alert_group: "AlertGroup" + author: typing.Optional["User"] + notification_policy: typing.Optional[UserNotificationPolicy] + + objects: models.Manager["UserNotificationPolicyLogRecord"] = UserNotificationPolicyLogRecordQuerySet.as_manager() + ( TYPE_PERSONAL_NOTIFICATION_TRIGGERED, TYPE_PERSONAL_NOTIFICATION_FINISHED, @@ -112,6 +147,15 @@ class UserNotificationPolicyLogRecord(models.Model): notification_step = models.IntegerField(choices=UserNotificationPolicy.Step.choices, null=True, default=None) notification_channel = models.IntegerField(validators=[validate_channel_choice], null=True, default=None) + def __init__(self, *args, **kwargs): + """ + Needed for when we do something like this: + notification_policy = UserNotificationPolicy(arg1="foo", ...) + notification_policy.save() + """ + _check_if_notification_policy_is_transient_fallback(kwargs) + super().__init__(*args, **kwargs) + def rendered_notification_log_line(self, for_slack=False, html=False): timeline = render_relative_timeline(self.created_at, self.alert_group.started_at) diff --git a/engine/apps/email/backend.py b/engine/apps/email/backend.py index 164f0cb8..e654a5ad 100644 --- a/engine/apps/email/backend.py +++ b/engine/apps/email/backend.py @@ -1,6 +1,13 @@ +import typing + from apps.base.messaging import BaseMessagingBackend from apps.email.tasks import notify_user_async +if typing.TYPE_CHECKING: + from apps.alerts.models import AlertGroup + from apps.base.models import UserNotificationPolicy + from apps.user_management.models import User + class EmailBackend(BaseMessagingBackend): backend_id = "EMAIL" @@ -11,10 +18,18 @@ class EmailBackend(BaseMessagingBackend): templater = "apps.email.alert_rendering.AlertEmailTemplater" template_fields = ("title", "message") - def serialize_user(self, user): + def serialize_user(self, user: "User"): return {"email": user.email} - def notify_user(self, user, alert_group, notification_policy): + def notify_user( + self, user: "User", alert_group: "AlertGroup", notification_policy: typing.Optional["UserNotificationPolicy"] + ): + """ + NOTE: `notification_policy` may be None if the user has no notification policies defined, as + email is the default backend used + """ notify_user_async.delay( - user_pk=user.pk, alert_group_pk=alert_group.pk, notification_policy_pk=notification_policy.pk + user_pk=user.pk, + alert_group_pk=alert_group.pk, + notification_policy_pk=notification_policy.pk if notification_policy else None, ) diff --git a/engine/apps/email/tasks.py b/engine/apps/email/tasks.py index 685eb808..8aaff7f3 100644 --- a/engine/apps/email/tasks.py +++ b/engine/apps/email/tasks.py @@ -43,15 +43,28 @@ def notify_user_async(user_pk, alert_group_pk, notification_policy_pk): logger.warning(f"Alert group {alert_group_pk} does not exist") return - try: - notification_policy = UserNotificationPolicy.objects.get(pk=notification_policy_pk) - except UserNotificationPolicy.DoesNotExist: - logger.warning(f"User notification policy {notification_policy_pk} does not exist") - return + using_fallback_default_notification_policy_step = False + + if notification_policy_pk is None: + # NOTE: `notification_policy_pk` may be None if the user has no notification policies defined, as + # email is the default backend used. see `UserNotificationPolicy.get_default_fallback_policy` for more details + notification_policy = UserNotificationPolicy.get_default_fallback_policy(user) + using_fallback_default_notification_policy_step = True + else: + try: + notification_policy = UserNotificationPolicy.objects.get(pk=notification_policy_pk) + except UserNotificationPolicy.DoesNotExist: + logger.warning(f"User notification policy {notification_policy_pk} does not exist") + return + + def _create_user_notification_policy_log_record(**kwargs): + return UserNotificationPolicyLogRecord.objects.create( + **kwargs, using_fallback_default_notification_policy_step=using_fallback_default_notification_policy_step + ) # create an error log in case EMAIL_HOST is not specified if not live_settings.EMAIL_HOST: - UserNotificationPolicyLogRecord.objects.create( + _create_user_notification_policy_log_record( author=user, type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED, notification_policy=notification_policy, @@ -65,7 +78,7 @@ def notify_user_async(user_pk, alert_group_pk, notification_policy_pk): emails_left = user.organization.emails_left(user) if emails_left <= 0: - UserNotificationPolicyLogRecord.objects.create( + _create_user_notification_policy_log_record( author=user, type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED, notification_policy=notification_policy, @@ -111,7 +124,7 @@ def notify_user_async(user_pk, alert_group_pk, notification_policy_pk): except (gaierror, BadHeaderError) as e: # gaierror is raised when EMAIL_HOST is invalid # BadHeaderError is raised when there's newlines in the subject - UserNotificationPolicyLogRecord.objects.create( + _create_user_notification_policy_log_record( author=user, type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED, notification_policy=notification_policy, @@ -124,7 +137,7 @@ def notify_user_async(user_pk, alert_group_pk, notification_policy_pk): return # record success log - UserNotificationPolicyLogRecord.objects.create( + _create_user_notification_policy_log_record( author=user, type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_SUCCESS, notification_policy=notification_policy, diff --git a/engine/apps/user_management/models/user.py b/engine/apps/user_management/models/user.py index f0c382c8..c6de3813 100644 --- a/engine/apps/user_management/models/user.py +++ b/engine/apps/user_management/models/user.py @@ -31,7 +31,7 @@ if typing.TYPE_CHECKING: from apps.alerts.models import AlertGroup, EscalationPolicy from apps.auth_token.models import ApiAuthToken, ScheduleExportAuthToken, UserScheduleExportAuthToken - from apps.base.models import UserNotificationPolicy + from apps.base.models import UserNotificationPolicy, UserNotificationPolicyLogRecord from apps.slack.models import SlackUserIdentity from apps.social_auth.types import GoogleOauth2Response from apps.user_management.models import Organization, Team @@ -165,6 +165,7 @@ class User(models.Model): last_notified_in_escalation_policies: "RelatedManager['EscalationPolicy']" notification_policies: "RelatedManager['UserNotificationPolicy']" organization: "Organization" + personal_log_records: "RelatedManager['UserNotificationPolicyLogRecord']" resolved_alert_groups: "RelatedManager['AlertGroup']" schedule_export_token: "RelatedManager['ScheduleExportAuthToken']" silenced_alert_groups: "RelatedManager['AlertGroup']" @@ -413,30 +414,30 @@ class User(models.Model): return PermissionsQuery(permissions__contains=[required_permission]) return RoleInQuery(role__lte=permission.fallback_role.value) + def get_default_fallback_notification_policy(self) -> "UserNotificationPolicy": + from apps.base.models import UserNotificationPolicy + + return UserNotificationPolicy.get_default_fallback_policy(self) + def get_notification_policies_or_use_default_fallback( self, important=False ) -> typing.Tuple[bool, typing.List["UserNotificationPolicy"]]: """ If the user has no notification policies defined, fallback to using e-mail as the notification channel. - """ - from apps.base.models import UserNotificationPolicy - if not self.notification_policies.filter(important=important).exists(): + The 1st tuple element is a boolean indicating if we are falling back to using a "fallback"/default + notification policy step (which occurs when the user has no notification policies defined). + """ + notification_polices = self.notification_policies.filter(important=important) + + if not notification_polices.exists(): return ( True, - [ - UserNotificationPolicy( - user=self, - step=UserNotificationPolicy.Step.NOTIFY, - notify_by=settings.EMAIL_BACKEND_INTERNAL_ID, - important=important, - order=0, - ), - ], + [self.get_default_fallback_notification_policy()], ) return ( False, - list(self.notification_policies.filter(important=important).all()), + list(notification_polices.all()), ) def update_alert_group_table_selected_columns(self, columns: typing.List[AlertGroupTableColumn]) -> None: diff --git a/grafana-plugin/e2e-tests/utils/userSettings.ts b/grafana-plugin/e2e-tests/utils/userSettings.ts index d24cdfbb..a38878bc 100644 --- a/grafana-plugin/e2e-tests/utils/userSettings.ts +++ b/grafana-plugin/e2e-tests/utils/userSettings.ts @@ -49,14 +49,15 @@ export const verifyUserPhoneNumber = async (page: Page): Promise => { await closeModal(page); }; +const getDefaultNotificationSettingsSectionByTestId = (page: Page): Locator => + page.getByTestId('default-personal-notification-settings'); + /** * gets the first row of our default notification settings * and then gets the notification type dropdown */ const getFirstDefaultNotificationSettingTypeDropdown = async (page: Page): Promise => { - const firstDefaultNotificationSettingRow = page - .getByTestId('default-personal-notification-settings') - .locator('li >> nth=0'); + const firstDefaultNotificationSettingRow = getDefaultNotificationSettingsSectionByTestId(page).locator('li >> nth=0'); // get the notification type dropdown specifically return firstDefaultNotificationSettingRow.locator('div[class*="input-wrapper"] >> nth=1'); @@ -66,7 +67,19 @@ export const configureUserNotificationSettings = async (page: Page, notifyBy: No // open the user settings modal await openUserSettingsModal(page); - // select our notification type + // select our notification type, first check if we have any already defined, if so, click the + // "Add Notification Step" button + const defaultNotificationSettingsSection = getDefaultNotificationSettingsSectionByTestId(page); + const addNotificationStepText = 'Add Notification Step'; + + if (!(await defaultNotificationSettingsSection.locator(`button >> text=${addNotificationStepText}`).isVisible())) { + await clickButton({ + page, + buttonText: addNotificationStepText, + startingLocator: defaultNotificationSettingsSection, + }); + } + const firstDefaultNotificationTypeDropdopdown = await getFirstDefaultNotificationSettingTypeDropdown(page); await selectDropdownValue({ page,