Add notifications success ratio log to auditor (#3312)
# What this PR does This PR adds alert groups success ratio over last 48 hours ## Which issue(s) this PR fixes ## Checklist - [ ] Unit, integration, and e2e (if applicable) tests updated - [ ] Documentation added (or `pr:no public docs` PR label added if not required) - [ ] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required)
This commit is contained in:
parent
e41ccb9d9b
commit
784c5ee7c1
2 changed files with 20 additions and 4 deletions
|
|
@ -99,9 +99,14 @@ def check_escalation_finished_task() -> None:
|
|||
now = timezone.now() - datetime.timedelta(minutes=5)
|
||||
two_days_ago = now - datetime.timedelta(days=2)
|
||||
|
||||
alert_groups = AlertGroup.objects.using(
|
||||
get_random_readonly_database_key_if_present_otherwise_default()
|
||||
).filter_active(started_at__range=(two_days_ago, now))
|
||||
# Total alert groups over last 2 days
|
||||
alert_groups = AlertGroup.objects.using(get_random_readonly_database_key_if_present_otherwise_default()).filter(
|
||||
started_at__range=(two_days_ago, now),
|
||||
)
|
||||
total_alert_groups_count = alert_groups.count()
|
||||
|
||||
# Filter alert groups with active escalations (that could fail)
|
||||
alert_groups = alert_groups.filter_active()
|
||||
|
||||
task_logger.info(
|
||||
f"There are {len(alert_groups)} alert group(s) to audit"
|
||||
|
|
@ -117,6 +122,14 @@ def check_escalation_finished_task() -> None:
|
|||
except AlertGroupEscalationPolicyExecutionAuditException:
|
||||
alert_group_ids_that_failed_audit.append(str(alert_group.id))
|
||||
|
||||
failed_alert_groups_count = len(alert_group_ids_that_failed_audit)
|
||||
success_ratio = (
|
||||
100
|
||||
if total_alert_groups_count == 0
|
||||
else (total_alert_groups_count - failed_alert_groups_count) / total_alert_groups_count * 100
|
||||
)
|
||||
task_logger.info(f"Alert group notifications success ratio: {success_ratio:.2f}")
|
||||
|
||||
if alert_group_ids_that_failed_audit:
|
||||
msg = f"The following alert group id(s) failed auditing: {', '.join(alert_group_ids_that_failed_audit)}"
|
||||
|
||||
|
|
|
|||
|
|
@ -341,12 +341,13 @@ def test_check_escalation_finished_task_simply_calls_heartbeat_when_no_alert_gro
|
|||
@patch("apps.alerts.tasks.check_escalation_finished.audit_alert_group_escalation")
|
||||
@patch("apps.alerts.tasks.check_escalation_finished.send_alert_group_escalation_auditor_task_heartbeat")
|
||||
@pytest.mark.django_db
|
||||
def test_check_escalation_finished_task_calls_audit_alert_group_escalation_for_every_alert_group_even_if_one_fails(
|
||||
def test_check_escalation_finished_task_calls_audit_alert_group_escalation_for_every_alert_group_even_if_one_fails_and_returns_success_ratio(
|
||||
mocked_send_alert_group_escalation_auditor_task_heartbeat,
|
||||
mocked_audit_alert_group_escalation,
|
||||
make_organization_and_user,
|
||||
make_alert_receive_channel,
|
||||
make_alert_group_that_started_at_specific_date,
|
||||
caplog,
|
||||
):
|
||||
organization, _ = make_organization_and_user()
|
||||
alert_receive_channel = make_alert_receive_channel(organization)
|
||||
|
|
@ -370,6 +371,8 @@ def test_check_escalation_finished_task_calls_audit_alert_group_escalation_for_e
|
|||
assert str(alert_group1.id) in error_msg
|
||||
assert str(alert_group2.id) in error_msg
|
||||
|
||||
assert "Alert group notifications success ratio: 33.33" in caplog.text
|
||||
|
||||
mocked_audit_alert_group_escalation.assert_any_call(alert_group1)
|
||||
mocked_audit_alert_group_escalation.assert_any_call(alert_group2)
|
||||
mocked_audit_alert_group_escalation.assert_any_call(alert_group3)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue