oncall-engine/engine/apps/alerts/tasks/notify_all.py

from django.conf import settings

from apps.slack.tasks import check_slack_message_exists_before_post_message_to_thread
from common.custom_celery_tasks import shared_dedicated_queue_retry_task

from .notify_user import notify_user_task
from .task_logger import task_logger


@shared_dedicated_queue_retry_task(
    autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
)
def notify_all_task(alert_group_pk, escalation_policy_snapshot_order=None):
    from apps.alerts.models import AlertGroup, AlertGroupLogRecord, EscalationPolicy

    alert_group = AlertGroup.objects.get(pk=alert_group_pk)

    # check alert group state before notifying all users in the channel
    if alert_group.resolved or alert_group.acknowledged or alert_group.silenced:
        task_logger.info(f"alert_group {alert_group.pk} was resolved, acked or silenced forever. No need to notify all")
        return

    escalation_snapshot = alert_group.escalation_snapshot
    try:
        # escalation_policy_snapshot_order refers to order as defined in the policy,
        # which is unique but not necessarily sequential and may not start from zero
        escalation_policy_snapshot = [
            policy
            for policy in escalation_snapshot.escalation_policies_snapshots
            if policy.order == escalation_policy_snapshot_order
        ][0]
    except IndexError:
        escalation_policy_snapshot = None

    if not escalation_policy_snapshot:
        # The step has an incorrect order. Probably the order was changed manually with terraform.
        # It is a quick fix, tasks notify_all_task and notify_group_task should be refactored to avoid getting snapshot
        # by order
        task_logger.warning(
            f"escalation_policy_snapshot for alert_group {alert_group.pk} with order "
            f"{escalation_policy_snapshot_order} is not found. Skip step"
        )
        return

    escalation_policy_pk = escalation_policy_snapshot.id
    escalation_policy = EscalationPolicy.objects.filter(pk=escalation_policy_pk).first()
    escalation_policy_step = escalation_policy_snapshot.step
    slack_channel_id = escalation_snapshot.slack_channel_id

    countdown = 0
    slack_team_identity = alert_group.channel.organization.slack_team_identity

    AlertGroupLogRecord(
        type=AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED,
        author=None,
        alert_group=alert_group,
        escalation_policy=escalation_policy,
        escalation_policy_step=escalation_policy_step,
    ).save()

    # we cannot notify a slack channel if team does not have slack team identity,
    # because we make a request to slack to get channel members
    if slack_team_identity is None or slack_channel_id is None:
        AlertGroupLogRecord(
            type=AlertGroupLogRecord.TYPE_ESCALATION_FAILED,
            alert_group=alert_group,
            escalation_policy=escalation_policy,
            escalation_error_code=AlertGroupLogRecord.ERROR_ESCALATION_NOTIFY_IN_SLACK,
            escalation_policy_step=escalation_policy_step,
        ).save()
        task_logger.debug(
            f"Failed to notify slack channel for alert_group {alert_group_pk} because slack team identity doesn't exist"
        )
        return

    # get users to notify
    users = slack_team_identity.get_users_from_slack_conversation_for_organization(
        channel_id=slack_channel_id,
        organization=alert_group.channel.organization,
    )

    if escalation_snapshot is not None:
        escalation_policy_snapshot.notify_to_users_queue = users
        escalation_snapshot.save_to_alert_group()

    for user in users:
        reason = "notifying everyone in the channel"

        notify_user_task.apply_async(
            args=(
                user.pk,
                alert_group.pk,
            ),
            kwargs={"reason": reason, "prevent_posting_to_thread": True},
            countdown=countdown,
        )
        countdown += 1
        AlertGroupLogRecord(
            type=AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED,
            author=user,
            alert_group=alert_group,
            reason=reason.title(),
            escalation_policy=escalation_policy,
            escalation_policy_step=escalation_policy_step,
        ).save()

    if not alert_group.skip_escalation_in_slack and alert_group.notify_in_slack_enabled:
        text = "Inviting <!channel>. Reason: *Notify All* Step"
        # Start task that checks if slack message exists every 10 seconds for 24 hours and publish message
        # to thread if it does.
        check_slack_message_exists_before_post_message_to_thread.apply_async(
            args=(alert_group_pk, text),
            kwargs={
                "escalation_policy_pk": escalation_policy_pk,
                "escalation_policy_step": escalation_policy_step,
            },
            countdown=5,
        )
World, meet OnCall! Co-authored-by: Eve832 <eve.meelan@grafana.com> Co-authored-by: Francisco Montes de Oca <nevermind89x@gmail.com> Co-authored-by: Ildar Iskhakov <ildar.iskhakov@grafana.com> Co-authored-by: Innokentii Konstantinov <innokenty.konstantinov@grafana.com> Co-authored-by: Julia <ferril.darkdiver@gmail.com> Co-authored-by: maskin25 <kengurek@gmail.com> Co-authored-by: Matias Bordese <mbordese@gmail.com> Co-authored-by: Matvey Kukuy <motakuk@gmail.com> Co-authored-by: Michael Derynck <michael.derynck@grafana.com> Co-authored-by: Richard Hartmann <richih@richih.org> Co-authored-by: Robby Milo <robbymilo@fastmail.com> Co-authored-by: Timur Olzhabayev <timur.olzhabayev@grafana.com> Co-authored-by: Vadim Stepanov <vadimkerr@gmail.com> Co-authored-by: Yulia Shanyrova <yulia.shanyrova@grafana.com> 2022-06-03 08:09:47 -06:00			`from django.conf import settings`

			`from apps.slack.tasks import check_slack_message_exists_before_post_message_to_thread`
			`from common.custom_celery_tasks import shared_dedicated_queue_retry_task`

			`from .notify_user import notify_user_task`
			`from .task_logger import task_logger`


			`@shared_dedicated_queue_retry_task(`
			`autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None`
			`)`
			`def notify_all_task(alert_group_pk, escalation_policy_snapshot_order=None):`
`apps.get_model` -> `import` (#2619) # What this PR does Remove [`apps.get_model`](https://docs.djangoproject.com/en/3.2/ref/applications/#django.apps.apps.get_model) invocations and use inline `import` statements in places where models are imported within functions/methods to avoid circular imports. I believe `import` statements are more appropriate for most use cases as they allow for better static code analysis & formatting, and solve the issue of circular imports without being unnecessarily dynamic as `apps.get_model`. With `import` statements, it's possible to: - Jump to model definitions in most IDEs - Automatically sort inline imports with `isort` - Find import errors faster/easier (most IDEs highlight broken imports) - Have more consistency across regular & inline imports when importing models This PR also adds a flake8 rule to ban imports of `django.apps.apps`, so it's harder to use `apps.get_model` by mistake (it's possible to ignore this rule by using `# noqa: I251`). The rule is not enforced on directories with migration files, because `apps.get_model` is often used to get a historical state of a model, which is useful when writing migrations ([see this SO answer for more details](https://stackoverflow.com/a/37769213)). So `apps.get_model` is considered OK in migrations (even necessary in some cases). ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) 2023-07-25 10:43:23 +01:00			`from apps.alerts.models import AlertGroup, AlertGroupLogRecord, EscalationPolicy`
World, meet OnCall! Co-authored-by: Eve832 <eve.meelan@grafana.com> Co-authored-by: Francisco Montes de Oca <nevermind89x@gmail.com> Co-authored-by: Ildar Iskhakov <ildar.iskhakov@grafana.com> Co-authored-by: Innokentii Konstantinov <innokenty.konstantinov@grafana.com> Co-authored-by: Julia <ferril.darkdiver@gmail.com> Co-authored-by: maskin25 <kengurek@gmail.com> Co-authored-by: Matias Bordese <mbordese@gmail.com> Co-authored-by: Matvey Kukuy <motakuk@gmail.com> Co-authored-by: Michael Derynck <michael.derynck@grafana.com> Co-authored-by: Richard Hartmann <richih@richih.org> Co-authored-by: Robby Milo <robbymilo@fastmail.com> Co-authored-by: Timur Olzhabayev <timur.olzhabayev@grafana.com> Co-authored-by: Vadim Stepanov <vadimkerr@gmail.com> Co-authored-by: Yulia Shanyrova <yulia.shanyrova@grafana.com> 2022-06-03 08:09:47 -06:00
remove references to AlertGroup.is_archived and AlertGroup.unarchived_objects (#2524) # What this PR does This is a follow up to #2502 which started to remove logic to "archiving" alert groups. This PR: - removes all references to `AlertGroup.is_archived` and marks the column as deprecated. We will remove it in the next release - removes the `AlertGroup.unarchived_objects` `Manager` - renames the `AlertGroup.all_objects` `Manager` to `AlertGroup.objects` ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [ ] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) 2023-07-18 13:48:34 +02:00			`alert_group = AlertGroup.objects.get(pk=alert_group_pk)`
World, meet OnCall! Co-authored-by: Eve832 <eve.meelan@grafana.com> Co-authored-by: Francisco Montes de Oca <nevermind89x@gmail.com> Co-authored-by: Ildar Iskhakov <ildar.iskhakov@grafana.com> Co-authored-by: Innokentii Konstantinov <innokenty.konstantinov@grafana.com> Co-authored-by: Julia <ferril.darkdiver@gmail.com> Co-authored-by: maskin25 <kengurek@gmail.com> Co-authored-by: Matias Bordese <mbordese@gmail.com> Co-authored-by: Matvey Kukuy <motakuk@gmail.com> Co-authored-by: Michael Derynck <michael.derynck@grafana.com> Co-authored-by: Richard Hartmann <richih@richih.org> Co-authored-by: Robby Milo <robbymilo@fastmail.com> Co-authored-by: Timur Olzhabayev <timur.olzhabayev@grafana.com> Co-authored-by: Vadim Stepanov <vadimkerr@gmail.com> Co-authored-by: Yulia Shanyrova <yulia.shanyrova@grafana.com> 2022-06-03 08:09:47 -06:00
Fix the cause of retry of notify_all and notify_group tasks (#1376) Fix the cause of retry of notify_all and notify_group tasks that was related to an incorrect step order. 2023-02-23 10:28:13 +01:00			`# check alert group state before notifying all users in the channel`
			`if alert_group.resolved or alert_group.acknowledged or alert_group.silenced:`
			`task_logger.info(f"alert_group {alert_group.pk} was resolved, acked or silenced forever. No need to notify all")`
			`return`

World, meet OnCall! Co-authored-by: Eve832 <eve.meelan@grafana.com> Co-authored-by: Francisco Montes de Oca <nevermind89x@gmail.com> Co-authored-by: Ildar Iskhakov <ildar.iskhakov@grafana.com> Co-authored-by: Innokentii Konstantinov <innokenty.konstantinov@grafana.com> Co-authored-by: Julia <ferril.darkdiver@gmail.com> Co-authored-by: maskin25 <kengurek@gmail.com> Co-authored-by: Matias Bordese <mbordese@gmail.com> Co-authored-by: Matvey Kukuy <motakuk@gmail.com> Co-authored-by: Michael Derynck <michael.derynck@grafana.com> Co-authored-by: Richard Hartmann <richih@richih.org> Co-authored-by: Robby Milo <robbymilo@fastmail.com> Co-authored-by: Timur Olzhabayev <timur.olzhabayev@grafana.com> Co-authored-by: Vadim Stepanov <vadimkerr@gmail.com> Co-authored-by: Yulia Shanyrova <yulia.shanyrova@grafana.com> 2022-06-03 08:09:47 -06:00			`escalation_snapshot = alert_group.escalation_snapshot`
Fix the cause of retry of notify_all and notify_group tasks (#1376) Fix the cause of retry of notify_all and notify_group tasks that was related to an incorrect step order. 2023-02-23 10:28:13 +01:00			`try:`
Fix order filtering when executing notify all/group steps from snapshot (#4381) Fixes https://github.com/grafana/oncall-private/issues/2708 2024-05-23 09:36:28 -03:00			`# escalation_policy_snapshot_order refers to order as defined in the policy,`
			`# which is unique but not necessarily sequential and may not start from zero`
			`escalation_policy_snapshot = [`
			`policy`
			`for policy in escalation_snapshot.escalation_policies_snapshots`
			`if policy.order == escalation_policy_snapshot_order`
			`][0]`
Fix the cause of retry of notify_all and notify_group tasks (#1376) Fix the cause of retry of notify_all and notify_group tasks that was related to an incorrect step order. 2023-02-23 10:28:13 +01:00			`except IndexError:`
			`escalation_policy_snapshot = None`

			`if not escalation_policy_snapshot:`
			`# The step has an incorrect order. Probably the order was changed manually with terraform.`
			`# It is a quick fix, tasks notify_all_task and notify_group_task should be refactored to avoid getting snapshot`
			`# by order`
			`task_logger.warning(`
			`f"escalation_policy_snapshot for alert_group {alert_group.pk} with order "`
			`f"{escalation_policy_snapshot_order} is not found. Skip step"`
			`)`
			`return`

World, meet OnCall! Co-authored-by: Eve832 <eve.meelan@grafana.com> Co-authored-by: Francisco Montes de Oca <nevermind89x@gmail.com> Co-authored-by: Ildar Iskhakov <ildar.iskhakov@grafana.com> Co-authored-by: Innokentii Konstantinov <innokenty.konstantinov@grafana.com> Co-authored-by: Julia <ferril.darkdiver@gmail.com> Co-authored-by: maskin25 <kengurek@gmail.com> Co-authored-by: Matias Bordese <mbordese@gmail.com> Co-authored-by: Matvey Kukuy <motakuk@gmail.com> Co-authored-by: Michael Derynck <michael.derynck@grafana.com> Co-authored-by: Richard Hartmann <richih@richih.org> Co-authored-by: Robby Milo <robbymilo@fastmail.com> Co-authored-by: Timur Olzhabayev <timur.olzhabayev@grafana.com> Co-authored-by: Vadim Stepanov <vadimkerr@gmail.com> Co-authored-by: Yulia Shanyrova <yulia.shanyrova@grafana.com> 2022-06-03 08:09:47 -06:00			`escalation_policy_pk = escalation_policy_snapshot.id`
			`escalation_policy = EscalationPolicy.objects.filter(pk=escalation_policy_pk).first()`
			`escalation_policy_step = escalation_policy_snapshot.step`
			`slack_channel_id = escalation_snapshot.slack_channel_id`

			`countdown = 0`
			`slack_team_identity = alert_group.channel.organization.slack_team_identity`

			`AlertGroupLogRecord(`
			`type=AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED,`
			`author=None,`
			`alert_group=alert_group,`
			`escalation_policy=escalation_policy,`
			`escalation_policy_step=escalation_policy_step,`
			`).save()`

			`# we cannot notify a slack channel if team does not have slack team identity,`
			`# because we make a request to slack to get channel members`
			`if slack_team_identity is None or slack_channel_id is None:`
			`AlertGroupLogRecord(`
			`type=AlertGroupLogRecord.TYPE_ESCALATION_FAILED,`
			`alert_group=alert_group,`
			`escalation_policy=escalation_policy,`
			`escalation_error_code=AlertGroupLogRecord.ERROR_ESCALATION_NOTIFY_IN_SLACK,`
			`escalation_policy_step=escalation_policy_step,`
			`).save()`
			`task_logger.debug(`
			`f"Failed to notify slack channel for alert_group {alert_group_pk} because slack team identity doesn't exist"`
			`)`
			`return`

			`# get users to notify`
			`users = slack_team_identity.get_users_from_slack_conversation_for_organization(`
			`channel_id=slack_channel_id,`
			`organization=alert_group.channel.organization,`
			`)`

			`if escalation_snapshot is not None:`
			`escalation_policy_snapshot.notify_to_users_queue = users`
			`escalation_snapshot.save_to_alert_group()`

			`for user in users:`
			`reason = "notifying everyone in the channel"`

			`notify_user_task.apply_async(`
			`args=(`
			`user.pk,`
			`alert_group.pk,`
			`),`
			`kwargs={"reason": reason, "prevent_posting_to_thread": True},`
			`countdown=countdown,`
			`)`
			`countdown += 1`
			`AlertGroupLogRecord(`
			`type=AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED,`
			`author=user,`
			`alert_group=alert_group,`
			`reason=reason.title(),`
			`escalation_policy=escalation_policy,`
			`escalation_policy_step=escalation_policy_step,`
			`).save()`

			`if not alert_group.skip_escalation_in_slack and alert_group.notify_in_slack_enabled:`
			`text = "Inviting <!channel>. Reason: Notify All Step"`
			`# Start task that checks if slack message exists every 10 seconds for 24 hours and publish message`
			`# to thread if it does.`
			`check_slack_message_exists_before_post_message_to_thread.apply_async(`
			`args=(alert_group_pk, text),`
			`kwargs={`
			`"escalation_policy_pk": escalation_policy_pk,`
			`"escalation_policy_step": escalation_policy_step,`
			`},`
			`countdown=5,`
			`)`