Add transaction on_commit before signals for alert group actions (#3731)

# What this PR does
Add transactions around log record creation and check transaction
on_commit before sending signals passing DB id of alert group log
records. In cases for delete we can then assume any missing IDs on tasks
are from intentionally deleted alert groups and we can stop tasks from
retrying endlessly.

## Which issue(s) this PR fixes

## Checklist

- [x] Unit, integration, and e2e (if applicable) tests updated
- [x] Documentation added (or `pr:no public docs` PR label added if not
required)
- [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not
required)
This commit is contained in:
Michael Derynck 2024-01-31 15:54:50 -07:00 committed by GitHub
parent 14feaba3d1
commit 2a466a0c4f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 352 additions and 282 deletions

View file

@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased ## Unreleased
### Changed
- Ensure alert group log records are committed to DB before signalling about them @mderynck([#3731](https://github.com/grafana/oncall/pull/3731))
### Fixed ### Fixed
- Address `SlackAPIRatelimitError` exceptions in `apps.slack.tasks.send_message_to_thread_if_bot_not_in_channel` task - Address `SlackAPIRatelimitError` exceptions in `apps.slack.tasks.send_message_to_thread_if_bot_not_in_channel` task

View file

@ -20,8 +20,13 @@ from apps.alerts.escalation_snapshot.escalation_snapshot_mixin import START_ESCA
from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE
from apps.alerts.incident_appearance.renderers.slack_renderer import AlertGroupSlackRenderer from apps.alerts.incident_appearance.renderers.slack_renderer import AlertGroupSlackRenderer
from apps.alerts.incident_log_builder import IncidentLogBuilder from apps.alerts.incident_log_builder import IncidentLogBuilder
from apps.alerts.signals import alert_group_action_triggered_signal, alert_group_created_signal from apps.alerts.signals import alert_group_created_signal
from apps.alerts.tasks import acknowledge_reminder_task, send_alert_group_signal, unsilence_task from apps.alerts.tasks import (
acknowledge_reminder_task,
send_alert_group_signal,
send_alert_group_signal_for_delete,
unsilence_task,
)
from apps.metrics_exporter.tasks import update_metrics_for_alert_group from apps.metrics_exporter.tasks import update_metrics_for_alert_group
from apps.slack.slack_formatter import SlackFormatter from apps.slack.slack_formatter import SlackFormatter
from apps.user_management.models import User from apps.user_management.models import User
@ -639,20 +644,17 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
self.stop_escalation() self.stop_escalation()
self.start_ack_reminder_if_needed() self.start_ack_reminder_if_needed()
log_record = self.log_records.create( with transaction.atomic():
type=AlertGroupLogRecord.TYPE_ACK, author=user, action_source=action_source log_record = self.log_records.create(
) type=AlertGroupLogRecord.TYPE_ACK, author=user, action_source=action_source
)
logger.debug( logger.debug(
f"send alert_group_action_triggered_signal for alert_group {self.pk}, " f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
f"log record {log_record.pk} with type '{log_record.get_type_display()}', action source: {action_source}" f"log record {log_record.pk} with type '{log_record.get_type_display()}', action source: {action_source}"
) )
alert_group_action_triggered_signal.send( transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
sender=self.acknowledge_by_user,
log_record=log_record.pk,
action_source=action_source,
)
for dependent_alert_group in self.dependent_alert_groups.all(): for dependent_alert_group in self.dependent_alert_groups.all():
dependent_alert_group.acknowledge_by_user(user, action_source=action_source) dependent_alert_group.acknowledge_by_user(user, action_source=action_source)
@ -679,18 +681,15 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
) )
self.stop_escalation() self.stop_escalation()
log_record = self.log_records.create(type=AlertGroupLogRecord.TYPE_ACK) with transaction.atomic():
log_record = self.log_records.create(type=AlertGroupLogRecord.TYPE_ACK)
logger.debug( logger.debug(
f"send alert_group_action_triggered_signal for alert_group {self.pk}, " f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
f"log record {log_record.pk} with type '{log_record.get_type_display()}', action source: alert" f"log record {log_record.pk} with type '{log_record.get_type_display()}', action source: alert"
) )
alert_group_action_triggered_signal.send( transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
sender=self.acknowledge_by_source,
log_record=log_record.pk,
action_source=None,
)
for dependent_alert_group in self.dependent_alert_groups.all(): for dependent_alert_group in self.dependent_alert_groups.all():
dependent_alert_group.acknowledge_by_source() dependent_alert_group.acknowledge_by_source()
@ -707,20 +706,17 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
if self.is_root_alert_group: if self.is_root_alert_group:
self.start_escalation_if_needed() self.start_escalation_if_needed()
log_record = self.log_records.create( with transaction.atomic():
type=AlertGroupLogRecord.TYPE_UN_ACK, author=user, action_source=action_source log_record = self.log_records.create(
) type=AlertGroupLogRecord.TYPE_UN_ACK, author=user, action_source=action_source
)
logger.debug( logger.debug(
f"send alert_group_action_triggered_signal for alert_group {self.pk}, " f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
f"log record {log_record.pk} with type '{log_record.get_type_display()}', action source: {action_source}" f"log record {log_record.pk} with type '{log_record.get_type_display()}', action source: {action_source}"
) )
alert_group_action_triggered_signal.send( transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
sender=self.un_acknowledge_by_user,
log_record=log_record.pk,
action_source=action_source,
)
for dependent_alert_group in self.dependent_alert_groups.all(): for dependent_alert_group in self.dependent_alert_groups.all():
dependent_alert_group.un_acknowledge_by_user(user, action_source=action_source) dependent_alert_group.un_acknowledge_by_user(user, action_source=action_source)
@ -745,20 +741,18 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
# Update alert group state and response time metrics cache # Update alert group state and response time metrics cache
self._update_metrics(organization_id=user.organization_id, previous_state=initial_state, state=self.state) self._update_metrics(organization_id=user.organization_id, previous_state=initial_state, state=self.state)
self.stop_escalation() self.stop_escalation()
log_record = self.log_records.create(
type=AlertGroupLogRecord.TYPE_RESOLVED, author=user, action_source=action_source
)
logger.debug( with transaction.atomic():
f"send alert_group_action_triggered_signal for alert_group {self.pk}, " log_record = self.log_records.create(
f"log record {log_record.pk} with type '{log_record.get_type_display()}', action source: {action_source}" type=AlertGroupLogRecord.TYPE_RESOLVED, author=user, action_source=action_source
) )
alert_group_action_triggered_signal.send( logger.debug(
sender=self.resolve_by_user, f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
log_record=log_record.pk, f"log record {log_record.pk} with type '{log_record.get_type_display()}', action source: {action_source}"
action_source=action_source, )
)
transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
for dependent_alert_group in self.dependent_alert_groups.all(): for dependent_alert_group in self.dependent_alert_groups.all():
dependent_alert_group.resolve_by_user(user, action_source=action_source) dependent_alert_group.resolve_by_user(user, action_source=action_source)
@ -782,18 +776,16 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
organization_id=self.channel.organization_id, previous_state=initial_state, state=self.state organization_id=self.channel.organization_id, previous_state=initial_state, state=self.state
) )
self.stop_escalation() self.stop_escalation()
log_record = self.log_records.create(type=AlertGroupLogRecord.TYPE_RESOLVED)
logger.debug( with transaction.atomic():
f"send alert_group_action_triggered_signal for alert_group {self.pk}, " log_record = self.log_records.create(type=AlertGroupLogRecord.TYPE_RESOLVED)
f"log record {log_record.pk} with type '{log_record.get_type_display()}', action source: alert"
)
alert_group_action_triggered_signal.send( logger.debug(
sender=self.resolve_by_source, f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
log_record=log_record.pk, f"log record {log_record.pk} with type '{log_record.get_type_display()}', action source: alert"
action_source=None, )
)
transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
for dependent_alert_group in self.dependent_alert_groups.all(): for dependent_alert_group in self.dependent_alert_groups.all():
dependent_alert_group.resolve_by_source() dependent_alert_group.resolve_by_source()
@ -809,18 +801,16 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
organization_id=self.channel.organization_id, previous_state=initial_state, state=self.state organization_id=self.channel.organization_id, previous_state=initial_state, state=self.state
) )
self.stop_escalation() self.stop_escalation()
log_record = self.log_records.create(type=AlertGroupLogRecord.TYPE_RESOLVED)
logger.debug( with transaction.atomic():
f"send alert_group_action_triggered_signal for alert_group {self.pk}, " log_record = self.log_records.create(type=AlertGroupLogRecord.TYPE_RESOLVED)
f"log record {log_record.pk} with type '{log_record.get_type_display()}', action source: resolve step"
)
alert_group_action_triggered_signal.send( logger.debug(
sender=self.resolve_by_last_step, f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
log_record=log_record.pk, f"log record {log_record.pk} with type '{log_record.get_type_display()}', action source: resolve step"
action_source=None, )
)
transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
for dependent_alert_group in self.dependent_alert_groups.all(): for dependent_alert_group in self.dependent_alert_groups.all():
dependent_alert_group.resolve_by_last_step() dependent_alert_group.resolve_by_last_step()
@ -830,19 +820,17 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
self.resolve(resolved_by=AlertGroup.DISABLE_MAINTENANCE) self.resolve(resolved_by=AlertGroup.DISABLE_MAINTENANCE)
self.stop_escalation() self.stop_escalation()
log_record = self.log_records.create(type=AlertGroupLogRecord.TYPE_RESOLVED)
logger.debug( with transaction.atomic():
f"send alert_group_action_triggered_signal for alert_group {self.pk}, " log_record = self.log_records.create(type=AlertGroupLogRecord.TYPE_RESOLVED)
f"log record {log_record.pk} with type '{log_record.get_type_display()}', "
f"action source: disable maintenance"
)
alert_group_action_triggered_signal.send( logger.debug(
sender=self.resolve_by_disable_maintenance, f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
log_record=log_record.pk, f"log record {log_record.pk} with type '{log_record.get_type_display()}', "
action_source=None, f"action source: disable maintenance"
) )
transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
for dependent_alert_group in self.dependent_alert_groups.all(): for dependent_alert_group in self.dependent_alert_groups.all():
dependent_alert_group.resolve_by_disable_maintenance() dependent_alert_group.resolve_by_disable_maintenance()
@ -856,24 +844,21 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
# Update alert group state metric cache # Update alert group state metric cache
self._update_metrics(organization_id=user.organization_id, previous_state=initial_state, state=self.state) self._update_metrics(organization_id=user.organization_id, previous_state=initial_state, state=self.state)
log_record = self.log_records.create( with transaction.atomic():
type=AlertGroupLogRecord.TYPE_UN_RESOLVED, author=user, action_source=action_source log_record = self.log_records.create(
) type=AlertGroupLogRecord.TYPE_UN_RESOLVED, author=user, action_source=action_source
)
if self.is_root_alert_group: if self.is_root_alert_group:
self.start_escalation_if_needed() self.start_escalation_if_needed()
logger.debug( logger.debug(
f"send alert_group_action_triggered_signal for alert_group {self.pk}, " f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
f"log record {log_record.pk} with type '{log_record.get_type_display()}', " f"log record {log_record.pk} with type '{log_record.get_type_display()}', "
f"action source: {action_source}" f"action source: {action_source}"
) )
alert_group_action_triggered_signal.send( transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
sender=self.un_resolve_by_user,
log_record=log_record.pk,
action_source=action_source,
)
for dependent_alert_group in self.dependent_alert_groups.all(): for dependent_alert_group in self.dependent_alert_groups.all():
dependent_alert_group.un_resolve_by_user(user, action_source=action_source) dependent_alert_group.un_resolve_by_user(user, action_source=action_source)
@ -898,25 +883,22 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
if not root_alert_group.silenced and self.silenced: if not root_alert_group.silenced and self.silenced:
self.un_silence_by_user(user, action_source=action_source) self.un_silence_by_user(user, action_source=action_source)
log_record = self.log_records.create( with transaction.atomic():
type=AlertGroupLogRecord.TYPE_ATTACHED, log_record = self.log_records.create(
author=user, type=AlertGroupLogRecord.TYPE_ATTACHED,
root_alert_group=root_alert_group, author=user,
reason="Attach dropdown", root_alert_group=root_alert_group,
action_source=action_source, reason="Attach dropdown",
) action_source=action_source,
)
logger.debug( logger.debug(
f"send alert_group_action_triggered_signal for alert_group {self.pk}, " f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
f"log record {log_record.pk} with type '{log_record.get_type_display()}', " f"log record {log_record.pk} with type '{log_record.get_type_display()}', "
f"action source: {action_source}" f"action source: {action_source}"
) )
alert_group_action_triggered_signal.send( transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
sender=self.attach_by_user,
log_record=log_record.pk,
action_source=action_source,
)
log_record_for_root_incident = root_alert_group.log_records.create( log_record_for_root_incident = root_alert_group.log_records.create(
type=AlertGroupLogRecord.TYPE_ATTACHED, type=AlertGroupLogRecord.TYPE_ATTACHED,
@ -932,11 +914,7 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
f"'{log_record_for_root_incident.get_type_display()}', action source: {action_source}" f"'{log_record_for_root_incident.get_type_display()}', action source: {action_source}"
) )
alert_group_action_triggered_signal.send( transaction.on_commit(partial(send_alert_group_signal.delay, log_record_for_root_incident.pk))
sender=self.attach_by_user,
log_record=log_record_for_root_incident.pk,
action_source=action_source,
)
else: else:
log_record = self.log_records.create( log_record = self.log_records.create(
@ -953,11 +931,7 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
f"action source: {action_source}" f"action source: {action_source}"
) )
alert_group_action_triggered_signal.send( transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
sender=self.attach_by_user,
log_record=log_record.pk,
action_source=action_source,
)
def un_attach_by_user(self, user: User, action_source: typing.Optional[ActionSource] = None) -> None: def un_attach_by_user(self, user: User, action_source: typing.Optional[ActionSource] = None) -> None:
from apps.alerts.models import AlertGroupLogRecord from apps.alerts.models import AlertGroupLogRecord
@ -968,45 +942,38 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
self.start_escalation_if_needed() self.start_escalation_if_needed()
log_record = self.log_records.create( with transaction.atomic():
type=AlertGroupLogRecord.TYPE_UNATTACHED, log_record = self.log_records.create(
author=user, type=AlertGroupLogRecord.TYPE_UNATTACHED,
root_alert_group=root_alert_group, author=user,
reason="Unattach button", root_alert_group=root_alert_group,
action_source=action_source, reason="Unattach button",
) action_source=action_source,
)
logger.debug( logger.debug(
f"send alert_group_action_triggered_signal for alert_group {self.pk}, " f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
f"log record {log_record.pk} with type '{log_record.get_type_display()}', " f"log record {log_record.pk} with type '{log_record.get_type_display()}', "
f"action source: {action_source}" f"action source: {action_source}"
) )
alert_group_action_triggered_signal.send( transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
sender=self.un_attach_by_user,
log_record=log_record.pk,
action_source=action_source,
)
log_record_for_root_incident = root_alert_group.log_records.create( log_record_for_root_incident = root_alert_group.log_records.create(
type=AlertGroupLogRecord.TYPE_UNATTACHED, type=AlertGroupLogRecord.TYPE_UNATTACHED,
author=user, author=user,
dependent_alert_group=self, dependent_alert_group=self,
reason="Unattach dropdown", reason="Unattach dropdown",
action_source=action_source, action_source=action_source,
) )
logger.debug( logger.debug(
f"send alert_group_action_triggered_signal for alert_group {root_alert_group.pk}, " f"send alert_group_action_triggered_signal for alert_group {root_alert_group.pk}, "
f"log record {log_record_for_root_incident.pk} " f"log record {log_record_for_root_incident.pk} "
f"with type '{log_record_for_root_incident.get_type_display()}', action source: {action_source}" f"with type '{log_record_for_root_incident.get_type_display()}', action source: {action_source}"
) )
alert_group_action_triggered_signal.send( transaction.on_commit(partial(send_alert_group_signal.delay, log_record_for_root_incident.pk))
sender=self.un_attach_by_user,
log_record=log_record_for_root_incident.pk,
action_source=action_source,
)
def un_attach_by_delete(self): def un_attach_by_delete(self):
from apps.alerts.models import AlertGroupLogRecord from apps.alerts.models import AlertGroupLogRecord
@ -1016,22 +983,19 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
self.start_escalation_if_needed() self.start_escalation_if_needed()
log_record = self.log_records.create( with transaction.atomic():
type=AlertGroupLogRecord.TYPE_UNATTACHED, log_record = self.log_records.create(
reason="Unattach by deleting root incident", type=AlertGroupLogRecord.TYPE_UNATTACHED,
) reason="Unattach by deleting root incident",
)
logger.debug( logger.debug(
f"send alert_group_action_triggered_signal for alert_group {self.pk}, " f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
f"log record {log_record.pk} with type '{log_record.get_type_display()}', " f"log record {log_record.pk} with type '{log_record.get_type_display()}', "
f"action source: delete" f"action source: delete"
) )
alert_group_action_triggered_signal.send( transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
sender=self.un_attach_by_delete,
log_record=log_record.pk,
action_source=None,
)
def silence_by_user( def silence_by_user(
self, user: User, silence_delay: typing.Optional[int], action_source: typing.Optional[ActionSource] = None self, user: User, silence_delay: typing.Optional[int], action_source: typing.Optional[ActionSource] = None
@ -1086,25 +1050,23 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
# Update alert group state and response time metrics cache # Update alert group state and response time metrics cache
self._update_metrics(organization_id=user.organization_id, previous_state=initial_state, state=self.state) self._update_metrics(organization_id=user.organization_id, previous_state=initial_state, state=self.state)
log_record = self.log_records.create( with transaction.atomic():
type=AlertGroupLogRecord.TYPE_SILENCE, log_record = self.log_records.create(
author=user, type=AlertGroupLogRecord.TYPE_SILENCE,
silence_delay=silence_delay_timedelta, author=user,
reason="Silence button", silence_delay=silence_delay_timedelta,
action_source=action_source, reason="Silence button",
) action_source=action_source,
)
logger.debug( logger.debug(
f"send alert_group_action_triggered_signal for alert_group {self.pk}, " f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
f"log record {log_record.pk} with type '{log_record.get_type_display()}', " f"log record {log_record.pk} with type '{log_record.get_type_display()}', "
f"action source: {action_source}" f"action source: {action_source}"
) )
transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
alert_group_action_triggered_signal.send(
sender=self.silence_by_user,
log_record=log_record.pk,
action_source=action_source,
)
for dependent_alert_group in self.dependent_alert_groups.all(): for dependent_alert_group in self.dependent_alert_groups.all():
dependent_alert_group.silence_by_user(user, silence_delay, action_source) dependent_alert_group.silence_by_user(user, silence_delay, action_source)
@ -1120,26 +1082,24 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
if self.is_root_alert_group: if self.is_root_alert_group:
self.start_escalation_if_needed() self.start_escalation_if_needed()
log_record = self.log_records.create( with transaction.atomic():
type=AlertGroupLogRecord.TYPE_UN_SILENCE, log_record = self.log_records.create(
author=user, type=AlertGroupLogRecord.TYPE_UN_SILENCE,
silence_delay=None, author=user,
# 2.Look like some time ago there was no TYPE_UN_SILENCE silence_delay=None,
reason="Unsilence button", # 2.Look like some time ago there was no TYPE_UN_SILENCE
action_source=action_source, reason="Unsilence button",
) action_source=action_source,
)
logger.debug( logger.debug(
f"send alert_group_action_triggered_signal for alert_group {self.pk}, " f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
f"log record {log_record.pk} with type '{log_record.get_type_display()}', " f"log record {log_record.pk} with type '{log_record.get_type_display()}', "
f"action source: {action_source}" f"action source: {action_source}"
) )
transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
alert_group_action_triggered_signal.send(
sender=self.un_silence_by_user,
log_record=log_record.pk,
action_source=action_source,
)
for dependent_alert_group in self.dependent_alert_groups.all(): for dependent_alert_group in self.dependent_alert_groups.all():
dependent_alert_group.un_silence_by_user(user, action_source=action_source) dependent_alert_group.un_silence_by_user(user, action_source=action_source)
@ -1169,22 +1129,19 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
# Update alert group state and response time metrics cache # Update alert group state and response time metrics cache
self._update_metrics(organization_id=user.organization_id, previous_state=initial_state, state=self.state) self._update_metrics(organization_id=user.organization_id, previous_state=initial_state, state=self.state)
log_record = self.log_records.create( with transaction.atomic():
type=AlertGroupLogRecord.TYPE_WIPED, log_record = self.log_records.create(
author=user, type=AlertGroupLogRecord.TYPE_WIPED,
) author=user,
)
logger.debug( logger.debug(
f"send alert_group_action_triggered_signal for alert_group {self.pk}, " f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
f"log record {log_record.pk} with type '{log_record.get_type_display()}', " f"log record {log_record.pk} with type '{log_record.get_type_display()}', "
f"action source: wipe" f"action source: wipe"
) )
alert_group_action_triggered_signal.send( transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))
sender=self.wipe_by_user,
log_record=log_record.pk,
action_source=None,
)
for dependent_alert_group in self.dependent_alert_groups.all(): for dependent_alert_group in self.dependent_alert_groups.all():
dependent_alert_group.wipe_by_user(user) dependent_alert_group.wipe_by_user(user)
@ -1193,31 +1150,27 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
from apps.alerts.models import AlertGroupLogRecord from apps.alerts.models import AlertGroupLogRecord
self.stop_escalation() self.stop_escalation()
# prevent creating multiple logs
# filter instead of get_or_create cause it can be multiple logs of this type due deleting error
log_record = self.log_records.filter(type=AlertGroupLogRecord.TYPE_DELETED).last()
if not log_record: with transaction.atomic():
log_record = self.log_records.create( # prevent creating multiple logs
type=AlertGroupLogRecord.TYPE_DELETED, # filter instead of get_or_create cause it can be multiple logs of this type due deleting error
author=user, log_record = self.log_records.filter(type=AlertGroupLogRecord.TYPE_DELETED).last()
if not log_record:
log_record = self.log_records.create(
type=AlertGroupLogRecord.TYPE_DELETED,
author=user,
)
logger.debug(
f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
f"log record {log_record.pk} with type '{log_record.get_type_display()}', "
f"action source: delete"
) )
logger.debug( transaction.on_commit(partial(send_alert_group_signal_for_delete.delay, self.pk, log_record.pk))
f"send alert_group_action_triggered_signal for alert_group {self.pk}, "
f"log record {log_record.pk} with type '{log_record.get_type_display()}', "
f"action source: delete"
)
alert_group_action_triggered_signal.send(
sender=self.delete_by_user,
log_record=log_record.pk,
action_source=None, # TODO: Action source is none - it is suspicious
# this flag forces synchrony call for action handler in representatives
# (for now it is actual only for Slack representative)
force_sync=True,
)
def finish_delete_by_user(self):
dependent_alerts = list(self.dependent_alert_groups.all()) dependent_alerts = list(self.dependent_alert_groups.all())
self.hard_delete() self.hard_delete()

View file

@ -594,6 +594,12 @@ class AlertGroupLogRecord(models.Model):
step_specific_info = json.loads(self.step_specific_info) step_specific_info = json.loads(self.step_specific_info)
return step_specific_info return step_specific_info
def delete(self):
logger.debug(
f"alert_group_log_record for alert_group deleted" f"alert_group={self.alert_group.pk} log_id={self.pk}"
)
super().delete()
@receiver(post_save, sender=AlertGroupLogRecord) @receiver(post_save, sender=AlertGroupLogRecord)
def listen_for_alertgrouplogrecord(sender, instance, created, *args, **kwargs): def listen_for_alertgrouplogrecord(sender, instance, created, *args, **kwargs):

View file

@ -7,6 +7,8 @@ from .check_escalation_finished import check_escalation_finished_task # noqa: F
from .custom_button_result import custom_button_result # noqa: F401 from .custom_button_result import custom_button_result # noqa: F401
from .custom_webhook_result import custom_webhook_result # noqa: F401 from .custom_webhook_result import custom_webhook_result # noqa: F401
from .delete_alert_group import delete_alert_group # noqa: F401 from .delete_alert_group import delete_alert_group # noqa: F401
from .delete_alert_group import finish_delete_alert_group # noqa: F401
from .delete_alert_group import send_alert_group_signal_for_delete # noqa: F401
from .distribute_alert import distribute_alert # noqa: F401 from .distribute_alert import distribute_alert # noqa: F401
from .escalate_alert_group import escalate_alert_group # noqa: F401 from .escalate_alert_group import escalate_alert_group # noqa: F401
from .invite_user_to_join_incident import invite_user_to_join_incident # noqa: F401 from .invite_user_to_join_incident import invite_user_to_join_incident # noqa: F401

View file

@ -76,6 +76,7 @@ def acknowledge_reminder_task(alert_group_pk: int, unacknowledge_process_id: str
log_record = alert_group.log_records.create( log_record = alert_group.log_records.create(
type=AlertGroupLogRecord.TYPE_ACK_REMINDER_TRIGGERED, author=alert_group.acknowledged_by_user type=AlertGroupLogRecord.TYPE_ACK_REMINDER_TRIGGERED, author=alert_group.acknowledged_by_user
) )
task_logger.info(f"created log record {log_record.pk}, sending signal...")
transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk)) transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk))

View file

@ -1,6 +1,7 @@
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from django.conf import settings from django.conf import settings
from apps.alerts.signals import alert_group_action_triggered_signal
from apps.slack.errors import SlackAPIRatelimitError from apps.slack.errors import SlackAPIRatelimitError
from common.custom_celery_tasks import shared_dedicated_queue_retry_task from common.custom_celery_tasks import shared_dedicated_queue_retry_task
@ -10,7 +11,7 @@ logger = get_task_logger(__name__)
@shared_dedicated_queue_retry_task( @shared_dedicated_queue_retry_task(
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
) )
def delete_alert_group(alert_group_pk, user_pk): def delete_alert_group(alert_group_pk: int, user_pk: int) -> None:
from apps.alerts.models import AlertGroup from apps.alerts.models import AlertGroup
from apps.user_management.models import User from apps.user_management.models import User
@ -25,9 +26,35 @@ def delete_alert_group(alert_group_pk, user_pk):
return return
logger.debug(f"User {user} is deleting alert group {alert_group} (channel: {alert_group.channel})") logger.debug(f"User {user} is deleting alert group {alert_group} (channel: {alert_group.channel})")
alert_group.delete_by_user(user)
@shared_dedicated_queue_retry_task(
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
)
def send_alert_group_signal_for_delete(alert_group_pk: int, log_record_pk: int) -> None:
try: try:
alert_group.delete_by_user(user) alert_group_action_triggered_signal.send(
sender=send_alert_group_signal_for_delete,
log_record=log_record_pk,
force_sync=True,
)
except SlackAPIRatelimitError as e: except SlackAPIRatelimitError as e:
# Handle Slack API ratelimit raised in apps.slack.scenarios.distribute_alerts.DeleteGroupStep.process_signal # Handle Slack API ratelimit raised in apps.slack.scenarios.distribute_alerts.DeleteGroupStep.process_signal
delete_alert_group.apply_async((alert_group_pk, user_pk), countdown=e.retry_after) send_alert_group_signal_for_delete.apply_async((alert_group_pk, log_record_pk), countdown=e.retry_after)
return
finish_delete_alert_group.apply_async((alert_group_pk,))
@shared_dedicated_queue_retry_task(
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
)
def finish_delete_alert_group(alert_group_pk: int) -> None:
from apps.alerts.models import AlertGroup
alert_group = AlertGroup.objects.filter(pk=alert_group_pk).first()
if not alert_group:
logger.debug(f"Alert group id={alert_group_pk} not found, already deleted")
return
alert_group.finish_delete_by_user()

View file

@ -5,13 +5,15 @@ from django.conf import settings
from apps.alerts.signals import alert_group_action_triggered_signal from apps.alerts.signals import alert_group_action_triggered_signal
from common.custom_celery_tasks import shared_dedicated_queue_retry_task from common.custom_celery_tasks import shared_dedicated_queue_retry_task
from .task_logger import task_logger
@shared_dedicated_queue_retry_task( @shared_dedicated_queue_retry_task(
autoretry_for=(Exception,), retry_backoff=True, max_retries=0 if settings.DEBUG else None autoretry_for=(Exception,), retry_backoff=True, max_retries=0 if settings.DEBUG else None
) )
def send_alert_group_signal(log_record_id): def send_alert_group_signal(log_record_id):
start_time = time.time() start_time = time.time()
task_logger.info(f"sending signal for log record {log_record_id}")
alert_group_action_triggered_signal.send(sender=send_alert_group_signal, log_record=log_record_id) alert_group_action_triggered_signal.send(sender=send_alert_group_signal, log_record=log_record_id)
print("--- %s seconds ---" % (time.time() - start_time)) print("--- %s seconds ---" % (time.time() - start_time))

View file

@ -6,7 +6,11 @@ from apps.alerts.constants import ActionSource
from apps.alerts.incident_appearance.renderers.phone_call_renderer import AlertGroupPhoneCallRenderer from apps.alerts.incident_appearance.renderers.phone_call_renderer import AlertGroupPhoneCallRenderer
from apps.alerts.models import AlertGroup, AlertGroupLogRecord from apps.alerts.models import AlertGroup, AlertGroupLogRecord
from apps.alerts.tasks import wipe from apps.alerts.tasks import wipe
from apps.alerts.tasks.delete_alert_group import delete_alert_group from apps.alerts.tasks.delete_alert_group import (
delete_alert_group,
finish_delete_alert_group,
send_alert_group_signal_for_delete,
)
from apps.slack.client import SlackClient from apps.slack.client import SlackClient
from apps.slack.errors import SlackAPIMessageNotFoundError, SlackAPIRatelimitError from apps.slack.errors import SlackAPIMessageNotFoundError, SlackAPIRatelimitError
from apps.slack.models import SlackMessage from apps.slack.models import SlackMessage
@ -85,9 +89,9 @@ def test_delete(
make_alert, make_alert,
make_slack_message, make_slack_message,
make_resolution_note_slack_message, make_resolution_note_slack_message,
django_capture_on_commit_callbacks,
): ):
"""test alert group deleting""" """test alert group deleting"""
organization, slack_team_identity = make_organization_with_slack_team_identity() organization, slack_team_identity = make_organization_with_slack_team_identity()
user = make_user(organization=organization) user = make_user(organization=organization)
@ -119,7 +123,20 @@ def test_delete(
assert alert_group.slack_messages.count() == 1 assert alert_group.slack_messages.count() == 1
assert alert_group.resolution_note_slack_messages.count() == 2 assert alert_group.resolution_note_slack_messages.count() == 2
delete_alert_group(alert_group.pk, user.pk) with patch(
"apps.alerts.tasks.delete_alert_group.send_alert_group_signal_for_delete.delay", return_value=None
) as mock_send_alert_group_signal:
with django_capture_on_commit_callbacks(execute=True):
delete_alert_group(alert_group.pk, user.pk)
assert mock_send_alert_group_signal.call_count == 1
with patch(
"apps.alerts.tasks.delete_alert_group.finish_delete_alert_group.apply_async", return_value=None
) as mock_finish_delete_alert_group:
send_alert_group_signal_for_delete(*mock_send_alert_group_signal.call_args.args)
assert mock_finish_delete_alert_group.call_count == 1
finish_delete_alert_group(alert_group.pk)
assert not alert_group.alerts.exists() assert not alert_group.alerts.exists()
assert not alert_group.slack_messages.exists() assert not alert_group.slack_messages.exists()
@ -140,10 +157,10 @@ def test_delete(
@pytest.mark.parametrize("api_method", ["reactions_remove", "chat_delete"]) @pytest.mark.parametrize("api_method", ["reactions_remove", "chat_delete"])
@patch.object(delete_alert_group, "apply_async") @patch.object(send_alert_group_signal_for_delete, "apply_async")
@pytest.mark.django_db @pytest.mark.django_db
def test_delete_slack_ratelimit( def test_delete_slack_ratelimit(
mock_delete_alert_group, mock_send_alert_group_signal_for_delete,
api_method, api_method,
make_organization_with_slack_team_identity, make_organization_with_slack_team_identity,
make_user, make_user,
@ -152,6 +169,7 @@ def test_delete_slack_ratelimit(
make_alert, make_alert,
make_slack_message, make_slack_message,
make_resolution_note_slack_message, make_resolution_note_slack_message,
django_capture_on_commit_callbacks,
): ):
organization, slack_team_identity = make_organization_with_slack_team_identity() organization, slack_team_identity = make_organization_with_slack_team_identity()
user = make_user(organization=organization) user = make_user(organization=organization)
@ -180,17 +198,31 @@ def test_delete_slack_ratelimit(
ts="test2_ts", ts="test2_ts",
) )
with patch.object( with patch(
SlackClient, "apps.alerts.tasks.delete_alert_group.send_alert_group_signal_for_delete.delay", return_value=None
api_method, ) as mock_send_alert_group_signal:
side_effect=SlackAPIRatelimitError( with django_capture_on_commit_callbacks(execute=True):
response=build_slack_response({"ok": False, "error": "ratelimited"}, headers={"Retry-After": 42}) delete_alert_group(alert_group.pk, user.pk)
), assert mock_send_alert_group_signal.call_count == 1
):
delete_alert_group(alert_group.pk, user.pk) with patch(
"apps.alerts.tasks.delete_alert_group.finish_delete_alert_group.apply_async", return_value=None
) as mock_finish_delete_alert_group:
with patch.object(
SlackClient,
api_method,
side_effect=SlackAPIRatelimitError(
response=build_slack_response({"ok": False, "error": "ratelimited"}, headers={"Retry-After": 42})
),
):
send_alert_group_signal_for_delete(*mock_send_alert_group_signal.call_args.args)
assert mock_finish_delete_alert_group.call_count == 0
# Check task is retried gracefully # Check task is retried gracefully
mock_delete_alert_group.assert_called_once_with((alert_group.pk, user.pk), countdown=42) mock_send_alert_group_signal_for_delete.assert_called_once_with(
mock_send_alert_group_signal.call_args.args, countdown=42
)
@pytest.mark.parametrize("api_method", ["reactions_remove", "chat_delete"]) @pytest.mark.parametrize("api_method", ["reactions_remove", "chat_delete"])
@ -582,7 +614,7 @@ def test_filter_active_alert_groups(
@patch("apps.alerts.models.AlertGroup.hard_delete") @patch("apps.alerts.models.AlertGroup.hard_delete")
@patch("apps.alerts.models.AlertGroup.un_attach_by_delete") @patch("apps.alerts.models.AlertGroup.un_attach_by_delete")
@patch("apps.alerts.models.AlertGroup.stop_escalation") @patch("apps.alerts.models.AlertGroup.stop_escalation")
@patch("apps.alerts.models.alert_group.alert_group_action_triggered_signal") @patch("apps.alerts.tasks.delete_alert_group.alert_group_action_triggered_signal")
@pytest.mark.django_db @pytest.mark.django_db
def test_delete_by_user( def test_delete_by_user(
mock_alert_group_action_triggered_signal, mock_alert_group_action_triggered_signal,
@ -592,6 +624,7 @@ def test_delete_by_user(
make_organization_and_user, make_organization_and_user,
make_alert_receive_channel, make_alert_receive_channel,
make_alert_group, make_alert_group,
django_capture_on_commit_callbacks,
): ):
organization, user = make_organization_and_user() organization, user = make_organization_and_user()
alert_receive_channel = make_alert_receive_channel(organization) alert_receive_channel = make_alert_receive_channel(organization)
@ -603,20 +636,31 @@ def test_delete_by_user(
assert alert_group.log_records.filter(type=AlertGroupLogRecord.TYPE_DELETED).count() == 0 assert alert_group.log_records.filter(type=AlertGroupLogRecord.TYPE_DELETED).count() == 0
alert_group.delete_by_user(user) with patch(
"apps.alerts.tasks.delete_alert_group.send_alert_group_signal_for_delete.delay", return_value=None
) as mock_send_alert_group_signal:
with django_capture_on_commit_callbacks(execute=True):
delete_alert_group(alert_group.pk, user.pk)
assert mock_send_alert_group_signal.call_count == 1
assert alert_group.log_records.filter(type=AlertGroupLogRecord.TYPE_DELETED).count() == 1 assert alert_group.log_records.filter(type=AlertGroupLogRecord.TYPE_DELETED).count() == 1
deleted_log_record = alert_group.log_records.get(type=AlertGroupLogRecord.TYPE_DELETED) deleted_log_record = alert_group.log_records.get(type=AlertGroupLogRecord.TYPE_DELETED)
alert_group.stop_escalation.assert_called_once_with() alert_group.stop_escalation.assert_called_once_with()
with patch(
"apps.alerts.tasks.delete_alert_group.finish_delete_alert_group.apply_async", return_value=None
) as mock_finish_delete_alert_group:
send_alert_group_signal_for_delete(*mock_send_alert_group_signal.call_args.args)
assert mock_finish_delete_alert_group.call_count == 1
mock_alert_group_action_triggered_signal.send.assert_called_once_with( mock_alert_group_action_triggered_signal.send.assert_called_once_with(
sender=alert_group.delete_by_user, sender=send_alert_group_signal_for_delete,
log_record=deleted_log_record.pk, log_record=deleted_log_record.pk,
action_source=None,
force_sync=True, force_sync=True,
) )
finish_delete_alert_group(alert_group.pk)
alert_group.hard_delete.assert_called_once_with() alert_group.hard_delete.assert_called_once_with()
for dependent_alert_group in dependent_alert_groups: for dependent_alert_group in dependent_alert_groups:

View file

@ -31,7 +31,7 @@ def mock_apply_async(monkeypatch):
@patch("apps.alerts.models.alert_group_log_record.tasks.send_update_log_report_signal.apply_async") @patch("apps.alerts.models.alert_group_log_record.tasks.send_update_log_report_signal.apply_async")
@patch("apps.alerts.models.alert_group.alert_group_action_triggered_signal.send") @patch("apps.alerts.tasks.send_alert_group_signal.alert_group_action_triggered_signal.send")
@pytest.mark.django_db @pytest.mark.django_db
@override_settings(CELERY_TASK_ALWAYS_EAGER=True) @override_settings(CELERY_TASK_ALWAYS_EAGER=True)
def test_update_metric_alert_groups_total_cache_on_action( def test_update_metric_alert_groups_total_cache_on_action(
@ -142,7 +142,7 @@ def test_update_metric_alert_groups_total_cache_on_action(
@patch("apps.alerts.models.alert_group_log_record.tasks.send_update_log_report_signal.apply_async") @patch("apps.alerts.models.alert_group_log_record.tasks.send_update_log_report_signal.apply_async")
@patch("apps.alerts.models.alert_group.alert_group_action_triggered_signal.send") @patch("apps.alerts.tasks.send_alert_group_signal.alert_group_action_triggered_signal.send")
@pytest.mark.django_db @pytest.mark.django_db
@override_settings(CELERY_TASK_ALWAYS_EAGER=True) @override_settings(CELERY_TASK_ALWAYS_EAGER=True)
def test_update_metric_alert_groups_response_time_cache_on_action( def test_update_metric_alert_groups_response_time_cache_on_action(

View file

@ -2,6 +2,7 @@ import logging
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from django.conf import settings from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from apps.alerts.constants import ActionSource from apps.alerts.constants import ActionSource
from apps.alerts.representative import AlertGroupAbstractRepresentative from apps.alerts.representative import AlertGroupAbstractRepresentative
@ -49,14 +50,20 @@ def on_create_alert_slack_representative_async(alert_pk):
@shared_dedicated_queue_retry_task( @shared_dedicated_queue_retry_task(
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None autoretry_for=(Exception,),
retry_backoff=True,
dont_autoretry_for=(ObjectDoesNotExist,),
max_retries=1 if settings.DEBUG else None,
) )
def on_alert_group_action_triggered_async(log_record_id): def on_alert_group_action_triggered_async(log_record_id):
from apps.alerts.models import AlertGroupLogRecord from apps.alerts.models import AlertGroupLogRecord
logger.debug(f"SLACK representative: get log record {log_record_id}") try:
log_record = AlertGroupLogRecord.objects.get(pk=log_record_id)
except AlertGroupLogRecord.DoesNotExist as e:
logger.warning(f"SLACK representative: log record {log_record_id} never created or has been deleted")
raise e
log_record = AlertGroupLogRecord.objects.get(pk=log_record_id)
alert_group_id = log_record.alert_group_id alert_group_id = log_record.alert_group_id
logger.debug(f"Start on_alert_group_action_triggered for alert_group {alert_group_id}, log record {log_record_id}") logger.debug(f"Start on_alert_group_action_triggered for alert_group {alert_group_id}, log record {log_record_id}")
instance = AlertGroupSlackRepresentative(log_record) instance = AlertGroupSlackRepresentative(log_record)
@ -145,16 +152,25 @@ class AlertGroupSlackRepresentative(AlertGroupAbstractRepresentative):
from apps.alerts.models import AlertGroupLogRecord from apps.alerts.models import AlertGroupLogRecord
log_record = kwargs["log_record"] log_record = kwargs["log_record"]
action_source = kwargs.get("action_source")
force_sync = kwargs.get("force_sync", False) force_sync = kwargs.get("force_sync", False)
if isinstance(log_record, AlertGroupLogRecord): if isinstance(log_record, AlertGroupLogRecord):
log_record_id = log_record.pk log_record_id = log_record.pk
else: else:
log_record_id = log_record log_record_id = log_record
if action_source == ActionSource.SLACK or force_sync: try:
log_record = AlertGroupLogRecord.objects.get(pk=log_record_id)
except AlertGroupLogRecord.DoesNotExist:
logger.warning(
f"on_alert_group_action_triggered: log record {log_record_id} never created or has been deleted"
)
return
if log_record.action_source == ActionSource.SLACK or force_sync:
logger.debug(f"SLACK on_alert_group_action_triggered: sync {log_record_id} {force_sync}")
on_alert_group_action_triggered_async(log_record_id) on_alert_group_action_triggered_async(log_record_id)
else: else:
logger.debug(f"SLACK on_alert_group_action_triggered: async {log_record_id} {force_sync}")
on_alert_group_action_triggered_async.apply_async((log_record_id,)) on_alert_group_action_triggered_async.apply_async((log_record_id,))
@classmethod @classmethod
@ -167,7 +183,11 @@ class AlertGroupSlackRepresentative(AlertGroupAbstractRepresentative):
alert_group_id = alert_group.pk alert_group_id = alert_group.pk
else: else:
alert_group_id = alert_group alert_group_id = alert_group
alert_group = AlertGroup.objects.get(pk=alert_group_id) try:
alert_group = AlertGroup.objects.get(pk=alert_group_id)
except AlertGroup.DoesNotExist as e:
logger.warning(f"SLACK update log report: alert group {alert_group_id} has been deleted")
raise e
logger.debug( logger.debug(
f"Received alert_group_update_log_report signal in SLACK representative for alert_group {alert_group_id}" f"Received alert_group_update_log_report signal in SLACK representative for alert_group {alert_group_id}"

View file

@ -64,8 +64,13 @@ class AlertGroupTelegramRepresentative(AlertGroupAbstractRepresentative):
def on_alert_group_update_log_report(cls, **kwargs): def on_alert_group_update_log_report(cls, **kwargs):
logger.info("AlertGroupTelegramRepresentative UPDATE LOG REPORT SIGNAL") logger.info("AlertGroupTelegramRepresentative UPDATE LOG REPORT SIGNAL")
alert_group = kwargs["alert_group"] alert_group = kwargs["alert_group"]
if not isinstance(alert_group, AlertGroup): if not isinstance(alert_group, AlertGroup):
alert_group = AlertGroup.objects.get(pk=alert_group) try:
alert_group = AlertGroup.objects.get(pk=alert_group)
except AlertGroup.DoesNotExist as e:
logger.warning(f"Telegram update log report: alert group {alert_group} has been deleted")
raise e
messages_to_edit = alert_group.telegram_messages.filter( messages_to_edit = alert_group.telegram_messages.filter(
message_type__in=( message_type__in=(

View file

@ -3,6 +3,7 @@ import logging
from celery import uuid as celery_uuid from celery import uuid as celery_uuid
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from django.conf import settings from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from telegram import error from telegram import error
from apps.alerts.models import Alert, AlertGroup from apps.alerts.models import Alert, AlertGroup
@ -230,7 +231,10 @@ def on_create_alert_telegram_representative_async(self, alert_pk):
@shared_dedicated_queue_retry_task( @shared_dedicated_queue_retry_task(
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None autoretry_for=(Exception,),
retry_backoff=True,
dont_autoretry_for=(ObjectDoesNotExist,),
max_retries=1 if settings.DEBUG else None,
) )
def on_alert_group_action_triggered_async(log_record_id): def on_alert_group_action_triggered_async(log_record_id):
from apps.alerts.models import AlertGroupLogRecord from apps.alerts.models import AlertGroupLogRecord
@ -239,18 +243,14 @@ def on_alert_group_action_triggered_async(log_record_id):
logger.info(f"AlertGroupTelegramRepresentative ACTION SIGNAL, log record {log_record_id}") logger.info(f"AlertGroupTelegramRepresentative ACTION SIGNAL, log record {log_record_id}")
# temporary solution to handle cases when alert group and related log records were deleted # temporary solution to handle cases when alert group and related log records were deleted
try: try:
log_record = AlertGroupLogRecord.objects.get(pk=log_record_id) log_record = AlertGroupLogRecord.objects.get(pk=log_record_id)
except AlertGroupLogRecord.DoesNotExist as e: except AlertGroupLogRecord.DoesNotExist as e:
retries_count = on_alert_group_action_triggered_async.request.retries logger.warning(
if retries_count >= 10: f"AlertGroupTelegramRepresentative: log record {log_record_id} never created or has been deleted"
logger.error( )
f"AlertGroupTelegramRepresentative: was not able to get AlertGroupLogRecord, probably alert group " raise e
f"was deleted. log record {log_record_id}, retries: {retries_count}"
)
return
else:
raise e
instance = AlertGroupTelegramRepresentative(log_record) instance = AlertGroupTelegramRepresentative(log_record)
if instance.is_applicable(): if instance.is_applicable():

View file

@ -15,5 +15,9 @@ def on_action_triggered(**kwargs):
log_record = kwargs["log_record"] log_record = kwargs["log_record"]
if not isinstance(log_record, AlertGroupLogRecord): if not isinstance(log_record, AlertGroupLogRecord):
log_record = AlertGroupLogRecord.objects.get(pk=log_record) try:
log_record = AlertGroupLogRecord.objects.get(pk=log_record)
except AlertGroupLogRecord.DoesNotExist as e:
logger.warning(f"Webhook action triggered: log record {log_record} never created or has been deleted")
raise e
alert_group_status_change.apply_async((log_record.type, log_record.alert_group_id, log_record.author_id)) alert_group_status_change.apply_async((log_record.type, log_record.alert_group_id, log_record.author_id))

View file

@ -4,6 +4,8 @@ CELERY_TASK_ROUTES = {
"queue": "default" "queue": "default"
}, },
"apps.alerts.tasks.delete_alert_group.delete_alert_group": {"queue": "default"}, "apps.alerts.tasks.delete_alert_group.delete_alert_group": {"queue": "default"},
"apps.alerts.tasks.delete_alert_group.send_alert_group_signal_for_delete": {"queue": "default"},
"apps.alerts.tasks.delete_alert_group.finish_delete_alert_group": {"queue": "default"},
"apps.alerts.tasks.invalidate_web_cache_for_alert_group.invalidate_web_cache_for_alert_group": {"queue": "default"}, "apps.alerts.tasks.invalidate_web_cache_for_alert_group.invalidate_web_cache_for_alert_group": {"queue": "default"},
"apps.alerts.tasks.send_alert_group_signal.send_alert_group_signal": {"queue": "default"}, "apps.alerts.tasks.send_alert_group_signal.send_alert_group_signal": {"queue": "default"},
"apps.alerts.tasks.wipe.wipe": {"queue": "default"}, "apps.alerts.tasks.wipe.wipe": {"queue": "default"},