fix infinitely retrying apps.alerts.tasks.notify_user.perform_notification task (#3708)
# Which issue(s) this PR fixes Closes https://github.com/grafana/oncall-private/issues/2318 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required)
This commit is contained in:
parent
b312667d31
commit
16b648bd15
4 changed files with 36 additions and 10 deletions
|
|
@ -11,6 +11,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
- Improvements for the columns selector ([3668](https://github.com/grafana/oncall/pull/3668))
|
||||
|
||||
### Fixed
|
||||
|
||||
- Address infinite retrying `apps.alerts.tasks.notify_user.perform_notification` task when `UserNotificationPolicyLogRecord`
|
||||
object cannot be found by @joeyorlando ([#3708](https://github.com/grafana/oncall/pull/3708))
|
||||
|
||||
## v1.3.89 (2024-01-17)
|
||||
|
||||
### Fixed
|
||||
|
|
|
|||
|
|
@ -196,7 +196,15 @@ def notify_user_task(
|
|||
|
||||
if not stop_escalation:
|
||||
if notification_policy.step != UserNotificationPolicy.Step.WAIT:
|
||||
transaction.on_commit(partial(perform_notification.apply_async, (log_record.pk,)))
|
||||
|
||||
def _create_perform_notification_task():
|
||||
task = perform_notification.apply_async((log_record.pk,))
|
||||
task_logger.info(
|
||||
f"Created perform_notification task {task} log_record={log_record.pk} "
|
||||
f"alert_group={alert_group_pk}"
|
||||
)
|
||||
|
||||
transaction.on_commit(_create_perform_notification_task)
|
||||
|
||||
delay = NEXT_ESCALATION_DELAY
|
||||
if countdown is not None:
|
||||
|
|
@ -232,17 +240,18 @@ def perform_notification(log_record_pk):
|
|||
from apps.base.models import UserNotificationPolicy, UserNotificationPolicyLogRecord
|
||||
from apps.telegram.models import TelegramToUserConnector
|
||||
|
||||
# TODO: remove this log line once done investigation
|
||||
task_logger.info(f"perform_notification: log_record {log_record_pk}")
|
||||
|
||||
log_record = UserNotificationPolicyLogRecord.objects.get(pk=log_record_pk)
|
||||
try:
|
||||
log_record = UserNotificationPolicyLogRecord.objects.get(pk=log_record_pk)
|
||||
except UserNotificationPolicyLogRecord.DoesNotExist:
|
||||
task_logger.warning(
|
||||
f"perform_notification: log_record {log_record_pk} doesn't exist. Skipping remainder of task. "
|
||||
"The alert group associated with this log record may have been deleted."
|
||||
)
|
||||
return
|
||||
|
||||
# TODO: uncomment this out once done investigation
|
||||
# try:
|
||||
# log_record = UserNotificationPolicyLogRecord.objects.get(pk=log_record_pk)
|
||||
# except UserNotificationPolicyLogRecord.DoesNotExist:
|
||||
# task_logger.info(f"perform_notification: log_record {log_record_pk} doesn't exist. Skipping remainder of task")
|
||||
# return
|
||||
task_logger.info(f"perform_notification: found record for {log_record_pk}")
|
||||
|
||||
user = log_record.author
|
||||
alert_group = log_record.alert_group
|
||||
|
|
|
|||
|
|
@ -285,3 +285,15 @@ def test_perform_notification_slack_prevent_posting(
|
|||
last_log_record.notification_error_code
|
||||
== UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_POSTING_TO_SLACK_IS_DISABLED
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_perform_notification_missing_user_notification_policy_log_record(caplog):
|
||||
invalid_pk = 12345
|
||||
perform_notification(invalid_pk)
|
||||
|
||||
assert (
|
||||
f"perform_notification: log_record {invalid_pk} doesn't exist. Skipping remainder of task. "
|
||||
"The alert group associated with this log record may have been deleted."
|
||||
) in caplog.text
|
||||
assert f"perform_notification: found record for {invalid_pk}" not in caplog.text
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ class DedicatedQueueRetryTask(LogExceptionOnFailureTask):
|
|||
def retry(
|
||||
self, args=None, kwargs=None, exc=None, throw=True, eta=None, countdown=None, max_retries=None, **options
|
||||
):
|
||||
logger.warn("Retrying celery task", exc_info=exc)
|
||||
logger.warning("Retrying celery task", exc_info=exc)
|
||||
|
||||
# Just call retry with queue argument
|
||||
return super().retry(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue