minor improvements for check_escalation_finished celery task (#2554)

# What this PR does

This PR adds some enhancements to the `check_escalation_finished` celery
task. It short-circuits auditing of an alert group if it does not have
an escalation chain associated with it. In
`EscalationSnapshotMixin.start_escalation_if_needed`
we will not set `raw_escalation_snapshot`
([here](https://github.com/grafana/oncall/blob/dev/engine/apps/alerts/escalation_snapshot/escalation_snapshot_mixin.py#L262))
in this case:
```python3
def start_escalation_if_needed(self, countdown=START_ESCALATION_DELAY, eta=None):
        """
        :type self:AlertGroup
        """
        AlertGroup = apps.get_model("alerts", "AlertGroup")

        is_on_maintenace_or_debug_mode = self.channel.maintenance_mode is not None

        if (
            self.is_restricted
            or is_on_maintenace_or_debug_mode
            or self.pause_escalation
            or not self.escalation_chain_exists <-- here
        ):
            logger.debug(
                f"Not escalating alert group w/ pk: {self.pk}\n"
                f"is_restricted: {self.is_restricted}\n"
                f"is_on_maintenace_or_debug_mode: {is_on_maintenace_or_debug_mode}\n"
                f"pause_escalation: {self.pause_escalation}\n"
                f"escalation_chain_exists: {self.escalation_chain_exists}"
            )
            return

        logger.debug(f"Start escalation for alert group with pk: {self.pk}")

        # take raw escalation snapshot from db if escalation is paused
        raw_escalation_snapshot = (
            self.build_raw_escalation_snapshot() if not self.pause_escalation else self.raw_escalation_snapshot
        )
        task_id = celery_uuid()

        AlertGroup.all_objects.filter(pk=self.pk,).update(
            active_escalation_id=task_id,
            is_escalation_finished=False,
            raw_escalation_snapshot=raw_escalation_snapshot,
        )
```

`EscalationSnapshotMixin.escalation_chain_exists` is as such:
```python3
@property
    def escalation_chain_exists(self) -> bool:
        if self.pause_escalation:
            return False
        elif not self.channel_filter:
            return False
        return self.channel_filter.escalation_chain is not None
```

## Checklist

- [x] Unit, integration, and e2e (if applicable) tests updated
- [ ] Documentation added (or `pr:no public docs` PR label added if not
required) (N/A)
- [ ] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not
required) (N/A)
This commit is contained in:
Joey Orlando 2023-07-17 16:04:53 +02:00 committed by GitHub
parent 2aab2a3e04
commit d5b43b0439
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 8 deletions

View file

@ -34,10 +34,19 @@ def audit_alert_group_escalation(alert_group: "AlertGroup") -> None:
alert_group_id = alert_group.id
base_msg = f"Alert group {alert_group_id}"
if not escalation_snapshot:
raise AlertGroupEscalationPolicyExecutionAuditException(
f"{base_msg} does not have an escalation snapshot associated with it, this should never occur"
if not alert_group.escalation_chain_exists:
task_logger.info(
f"{base_msg} does not have an escalation chain associated with it, and therefore it is expected "
"that it will not have an escalation snapshot, skipping further validation"
)
return
if not escalation_snapshot:
msg = f"{base_msg} does not have an escalation snapshot associated with it, this should never occur"
task_logger.warning(msg)
raise AlertGroupEscalationPolicyExecutionAuditException(msg)
task_logger.info(f"{base_msg} has an escalation snapshot associated with it, auditing if it executed properly")
escalation_policies_snapshots = escalation_snapshot.escalation_policies_snapshots
@ -118,8 +127,11 @@ def check_escalation_finished_task() -> None:
started_at__range=(two_days_ago, now),
)
if not alert_groups.exists():
task_logger.info("There are no alert groups to audit, everything is good :)")
task_logger.info(
f"There are {len(alert_groups)} alert group(s) to audit"
if alert_groups.exists()
else "There are no alert groups to audit, everything is good :)"
)
alert_group_ids_that_failed_audit: typing.List[str] = []
@ -130,8 +142,10 @@ def check_escalation_finished_task() -> None:
alert_group_ids_that_failed_audit.append(str(alert_group.id))
if alert_group_ids_that_failed_audit:
raise AlertGroupEscalationPolicyExecutionAuditException(
f"The following alert group id(s) failed auditing: {', '.join(alert_group_ids_that_failed_audit)}"
)
msg = f"The following alert group id(s) failed auditing: {', '.join(alert_group_ids_that_failed_audit)}"
task_logger.warning(msg)
raise AlertGroupEscalationPolicyExecutionAuditException(msg)
task_logger.info("There were no alert groups that failed auditing")
send_alert_group_escalation_auditor_task_heartbeat()

View file

@ -80,6 +80,27 @@ def test_send_alert_group_escalation_auditor_task_heartbeat_raises_an_exception_
mock_requests.get.return_value.raise_for_status.assert_called_once_with()
@pytest.mark.django_db
def test_audit_alert_group_escalation_skips_validation_if_the_alert_group_does_not_have_an_escalation_chain(
make_organization_and_user,
make_alert_receive_channel,
make_alert_group,
):
organization, _ = make_organization_and_user()
alert_receive_channel = make_alert_receive_channel(organization)
alert_group = make_alert_group(alert_receive_channel)
alert_group.escalation_snapshot = None
alert_group.save()
assert alert_group.escalation_chain_exists is False
try:
audit_alert_group_escalation(alert_group)
except AlertGroupEscalationPolicyExecutionAuditException:
pytest.fail()
@pytest.mark.django_db
def test_audit_alert_group_escalation_raises_exception_if_the_alert_group_does_not_have_an_escalation_snapshot(
escalation_snapshot_test_setup,