minor improvements for check_escalation_finished celery task (#2554)
# What this PR does This PR adds some enhancements to the `check_escalation_finished` celery task. It short-circuits auditing of an alert group if it does not have an escalation chain associated with it. In `EscalationSnapshotMixin.start_escalation_if_needed` we will not set `raw_escalation_snapshot` ([here](https://github.com/grafana/oncall/blob/dev/engine/apps/alerts/escalation_snapshot/escalation_snapshot_mixin.py#L262)) in this case: ```python3 def start_escalation_if_needed(self, countdown=START_ESCALATION_DELAY, eta=None): """ :type self:AlertGroup """ AlertGroup = apps.get_model("alerts", "AlertGroup") is_on_maintenace_or_debug_mode = self.channel.maintenance_mode is not None if ( self.is_restricted or is_on_maintenace_or_debug_mode or self.pause_escalation or not self.escalation_chain_exists <-- here ): logger.debug( f"Not escalating alert group w/ pk: {self.pk}\n" f"is_restricted: {self.is_restricted}\n" f"is_on_maintenace_or_debug_mode: {is_on_maintenace_or_debug_mode}\n" f"pause_escalation: {self.pause_escalation}\n" f"escalation_chain_exists: {self.escalation_chain_exists}" ) return logger.debug(f"Start escalation for alert group with pk: {self.pk}") # take raw escalation snapshot from db if escalation is paused raw_escalation_snapshot = ( self.build_raw_escalation_snapshot() if not self.pause_escalation else self.raw_escalation_snapshot ) task_id = celery_uuid() AlertGroup.all_objects.filter(pk=self.pk,).update( active_escalation_id=task_id, is_escalation_finished=False, raw_escalation_snapshot=raw_escalation_snapshot, ) ``` `EscalationSnapshotMixin.escalation_chain_exists` is as such: ```python3 @property def escalation_chain_exists(self) -> bool: if self.pause_escalation: return False elif not self.channel_filter: return False return self.channel_filter.escalation_chain is not None ``` ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [ ] Documentation added (or `pr:no public docs` PR label added if not required) (N/A) - [ ] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) (N/A)
This commit is contained in:
parent
2aab2a3e04
commit
d5b43b0439
2 changed files with 43 additions and 8 deletions
|
|
@ -34,10 +34,19 @@ def audit_alert_group_escalation(alert_group: "AlertGroup") -> None:
|
|||
alert_group_id = alert_group.id
|
||||
base_msg = f"Alert group {alert_group_id}"
|
||||
|
||||
if not escalation_snapshot:
|
||||
raise AlertGroupEscalationPolicyExecutionAuditException(
|
||||
f"{base_msg} does not have an escalation snapshot associated with it, this should never occur"
|
||||
if not alert_group.escalation_chain_exists:
|
||||
task_logger.info(
|
||||
f"{base_msg} does not have an escalation chain associated with it, and therefore it is expected "
|
||||
"that it will not have an escalation snapshot, skipping further validation"
|
||||
)
|
||||
return
|
||||
|
||||
if not escalation_snapshot:
|
||||
msg = f"{base_msg} does not have an escalation snapshot associated with it, this should never occur"
|
||||
|
||||
task_logger.warning(msg)
|
||||
raise AlertGroupEscalationPolicyExecutionAuditException(msg)
|
||||
|
||||
task_logger.info(f"{base_msg} has an escalation snapshot associated with it, auditing if it executed properly")
|
||||
|
||||
escalation_policies_snapshots = escalation_snapshot.escalation_policies_snapshots
|
||||
|
|
@ -118,8 +127,11 @@ def check_escalation_finished_task() -> None:
|
|||
started_at__range=(two_days_ago, now),
|
||||
)
|
||||
|
||||
if not alert_groups.exists():
|
||||
task_logger.info("There are no alert groups to audit, everything is good :)")
|
||||
task_logger.info(
|
||||
f"There are {len(alert_groups)} alert group(s) to audit"
|
||||
if alert_groups.exists()
|
||||
else "There are no alert groups to audit, everything is good :)"
|
||||
)
|
||||
|
||||
alert_group_ids_that_failed_audit: typing.List[str] = []
|
||||
|
||||
|
|
@ -130,8 +142,10 @@ def check_escalation_finished_task() -> None:
|
|||
alert_group_ids_that_failed_audit.append(str(alert_group.id))
|
||||
|
||||
if alert_group_ids_that_failed_audit:
|
||||
raise AlertGroupEscalationPolicyExecutionAuditException(
|
||||
f"The following alert group id(s) failed auditing: {', '.join(alert_group_ids_that_failed_audit)}"
|
||||
)
|
||||
msg = f"The following alert group id(s) failed auditing: {', '.join(alert_group_ids_that_failed_audit)}"
|
||||
|
||||
task_logger.warning(msg)
|
||||
raise AlertGroupEscalationPolicyExecutionAuditException(msg)
|
||||
|
||||
task_logger.info("There were no alert groups that failed auditing")
|
||||
send_alert_group_escalation_auditor_task_heartbeat()
|
||||
|
|
|
|||
|
|
@ -80,6 +80,27 @@ def test_send_alert_group_escalation_auditor_task_heartbeat_raises_an_exception_
|
|||
mock_requests.get.return_value.raise_for_status.assert_called_once_with()
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_audit_alert_group_escalation_skips_validation_if_the_alert_group_does_not_have_an_escalation_chain(
|
||||
make_organization_and_user,
|
||||
make_alert_receive_channel,
|
||||
make_alert_group,
|
||||
):
|
||||
organization, _ = make_organization_and_user()
|
||||
alert_receive_channel = make_alert_receive_channel(organization)
|
||||
alert_group = make_alert_group(alert_receive_channel)
|
||||
|
||||
alert_group.escalation_snapshot = None
|
||||
alert_group.save()
|
||||
|
||||
assert alert_group.escalation_chain_exists is False
|
||||
|
||||
try:
|
||||
audit_alert_group_escalation(alert_group)
|
||||
except AlertGroupEscalationPolicyExecutionAuditException:
|
||||
pytest.fail()
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_audit_alert_group_escalation_raises_exception_if_the_alert_group_does_not_have_an_escalation_snapshot(
|
||||
escalation_snapshot_test_setup,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue