Add task logging personal notifications triggered/completed counts (#3638)
Related to https://github.com/grafana/oncall-private/issues/2347
This commit is contained in:
parent
16dfa60a92
commit
4e2e7e0a15
4 changed files with 66 additions and 4 deletions
|
|
@ -82,13 +82,13 @@ def audit_alert_group_escalation(alert_group: "AlertGroup") -> None:
|
|||
f"{base_msg}'s escalation snapshot has {num_of_executed_escalation_policy_snapshots} executed escalation policies"
|
||||
)
|
||||
|
||||
check_personal_notifications_task.apply_async((alert_group_id,))
|
||||
check_alert_group_personal_notifications_task.apply_async((alert_group_id,))
|
||||
|
||||
task_logger.info(f"{base_msg} passed the audit checks")
|
||||
|
||||
|
||||
@shared_task
|
||||
def check_personal_notifications_task(alert_group_id) -> None:
|
||||
def check_alert_group_personal_notifications_task(alert_group_id) -> None:
|
||||
# Check personal notifications are completed
|
||||
# triggered (< 5min ago) == failed + success
|
||||
from apps.base.models import UserNotificationPolicy, UserNotificationPolicyLogRecord
|
||||
|
|
@ -115,6 +115,54 @@ def check_personal_notifications_task(alert_group_id) -> None:
|
|||
task_logger.info(f"{base_msg} personal notifications check passed")
|
||||
|
||||
|
||||
@shared_task
|
||||
def check_personal_notifications_task() -> None:
|
||||
"""
|
||||
This task checks that triggered personal notifications are completed.
|
||||
It will log the triggered/completed values to be used as metrics.
|
||||
|
||||
Attention: don't retry this task, the idea is to be alerted of failures
|
||||
"""
|
||||
from apps.alerts.models import AlertGroup
|
||||
from apps.base.models import UserNotificationPolicy, UserNotificationPolicyLogRecord
|
||||
|
||||
# use readonly database if available
|
||||
readonly_db = get_random_readonly_database_key_if_present_otherwise_default()
|
||||
|
||||
now = timezone.now()
|
||||
|
||||
# consider alert groups from the last 2 days
|
||||
alert_groups = AlertGroup.objects.using(readonly_db).filter(
|
||||
started_at__range=(now - timezone.timedelta(days=2), now),
|
||||
)
|
||||
|
||||
# review notifications triggered in the last 20-minute window
|
||||
# (task should run periodically about every 15 minutes)
|
||||
since = now - timezone.timedelta(minutes=20)
|
||||
|
||||
log_records_qs = UserNotificationPolicyLogRecord.objects.using(readonly_db)
|
||||
# personal notifications triggered in the given window for those alert groups
|
||||
triggered = log_records_qs.filter(
|
||||
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED,
|
||||
notification_step=UserNotificationPolicy.Step.NOTIFY,
|
||||
created_at__gte=since,
|
||||
created_at__lte=now,
|
||||
alert_group__in=alert_groups,
|
||||
).count()
|
||||
|
||||
# personal notifications completed in the given window for those alert groups
|
||||
completed = log_records_qs.filter(
|
||||
Q(type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED)
|
||||
| Q(type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_SUCCESS),
|
||||
notification_step=UserNotificationPolicy.Step.NOTIFY,
|
||||
created_at__gt=since,
|
||||
created_at__lte=now,
|
||||
alert_group__in=alert_groups,
|
||||
).count()
|
||||
|
||||
task_logger.info(f"personal_notifications_triggered={triggered} personal_notifications_completed={completed}")
|
||||
|
||||
|
||||
@shared_task
|
||||
def check_escalation_finished_task() -> None:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from apps.alerts.models import EscalationPolicy
|
|||
from apps.alerts.tasks.check_escalation_finished import (
|
||||
AlertGroupEscalationPolicyExecutionAuditException,
|
||||
audit_alert_group_escalation,
|
||||
check_alert_group_personal_notifications_task,
|
||||
check_escalation_finished_task,
|
||||
check_personal_notifications_task,
|
||||
send_alert_group_escalation_auditor_task_heartbeat,
|
||||
|
|
@ -502,15 +503,22 @@ def test_check_escalation_finished_task_calls_audit_alert_group_personal_notific
|
|||
alert_group4.personal_log_records.update(created_at=now - timezone.timedelta(minutes=2))
|
||||
|
||||
# trigger task
|
||||
with patch("apps.alerts.tasks.check_escalation_finished.check_personal_notifications_task") as mock_check_notif:
|
||||
with patch(
|
||||
"apps.alerts.tasks.check_escalation_finished.check_alert_group_personal_notifications_task"
|
||||
) as mock_check_notif:
|
||||
check_escalation_finished_task()
|
||||
|
||||
for alert_group in alert_groups:
|
||||
mock_check_notif.apply_async.assert_any_call((alert_group.id,))
|
||||
check_personal_notifications_task(alert_group.id)
|
||||
check_alert_group_personal_notifications_task(alert_group.id)
|
||||
if alert_group == alert_group3:
|
||||
assert f"Alert group {alert_group3.id} has (1) uncompleted personal notifications" in caplog.text
|
||||
else:
|
||||
assert f"Alert group {alert_group.id} personal notifications check passed" in caplog.text
|
||||
|
||||
mocked_send_alert_group_escalation_auditor_task_heartbeat.assert_called()
|
||||
|
||||
# also trigger the general personal notification checker
|
||||
check_personal_notifications_task()
|
||||
|
||||
assert "personal_notifications_triggered=4 personal_notifications_completed=2" in caplog.text
|
||||
|
|
|
|||
|
|
@ -586,6 +586,11 @@ if ESCALATION_AUDITOR_ENABLED:
|
|||
),
|
||||
"args": (),
|
||||
}
|
||||
CELERY_BEAT_SCHEDULE["check_personal_notifications"] = {
|
||||
"task": "apps.alerts.tasks.check_escalation_finished.check_personal_notifications_task",
|
||||
"schedule": crontab(minute="*/15"), # every 15 minutes
|
||||
"args": (),
|
||||
}
|
||||
|
||||
INTERNAL_IPS = ["127.0.0.1"]
|
||||
|
||||
|
|
|
|||
|
|
@ -121,6 +121,7 @@ CELERY_TASK_ROUTES = {
|
|||
"apps.alerts.tasks.alert_group_web_title_cache.update_web_title_cache_for_alert_receive_channel": {"queue": "long"},
|
||||
"apps.alerts.tasks.alert_group_web_title_cache.update_web_title_cache": {"queue": "long"},
|
||||
"apps.alerts.tasks.check_escalation_finished.check_escalation_finished_task": {"queue": "long"},
|
||||
"apps.alerts.tasks.check_escalation_finished.check_alert_group_personal_notifications_task": {"queue": "long"},
|
||||
"apps.alerts.tasks.check_escalation_finished.check_personal_notifications_task": {"queue": "long"},
|
||||
"apps.grafana_plugin.tasks.sync.cleanup_organization_async": {"queue": "long"},
|
||||
"apps.grafana_plugin.tasks.sync.start_cleanup_deleted_organizations": {"queue": "long"},
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue