do not retry firebase.messaging.UnregisteredError exceptions for FCM relay tasks (#3637)
# What this PR does _tldr_; we had a lengthy discussion about this [here](https://raintank-corp.slack.com/archives/C04JCU51NF8/p1701893410542629?thread_ts=1701690117.016909&cid=C04JCU51NF8). `firebase.messaging.UnregisteredError` errors occur because of events outside of our control and retrying will never fix them, therefore we should simply skip retrying in this case. We retry these fairly often ([logs](https://ops.grafana-ops.net/explore?schemaVersion=1&panes=%7B%22iWZ%22:%7B%22datasource%22:%22000000193%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22%23%20%7Bcluster%3D~%5C%22prod-%28eu-west-0%7Cus-central-0%29%5C%22,%20namespace%3D%5C%22amixr-prod%5C%22%7D%20%7C%3D%20%5C%22task_name%3Dapps.webhooks.tasks.trigger_webhook.execute_webhook%5C%22%20%7C%3D%20%5C%22retry%5C%22%5Cn%7Bcluster%3D~%5C%22prod-%28eu-west-0%7Cus-central-0%29%5C%22,%20namespace%3D%5C%22amixr-prod%5C%22%7D%20%7C%3D%20%5C%22apps.mobile_app.fcm_relay.fcm_relay_async%5C%22%20%7C%3D%20%5C%22UnregisteredError%5C%22%22,%22queryType%22:%22range%22,%22datasource%22:%7B%22type%22:%22loki%22,%22uid%22:%22000000193%22%7D,%22editorMode%22:%22code%22%7D%5D,%22range%22:%7B%22from%22:%22now-7d%22,%22to%22:%22now%22%7D%7D%7D&orgId=1)) which eats up unnecessary celery worker resources. Related to https://github.com/grafana/oncall-private/issues/1820 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required)
This commit is contained in:
parent
9fab4a7b06
commit
72e7224ad3
3 changed files with 54 additions and 6 deletions
|
|
@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
## Unreleased
|
||||
|
||||
### Changed
|
||||
|
||||
- Do not retry `firebase.messaging.UnregisteredError` exceptions for FCM relay tasks by @joeyorlando ([#3637](https://github.com/grafana/oncall/pull/3637))
|
||||
|
||||
## v1.3.83 (2024-01-08)
|
||||
|
||||
### Changed
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from unittest.mock import Mock, patch
|
||||
|
||||
import firebase_admin.messaging
|
||||
import pytest
|
||||
from firebase_admin.exceptions import FirebaseError
|
||||
from requests import HTTPError
|
||||
|
|
@ -59,6 +60,40 @@ def test_send_push_notification_cloud_firebase_error(
|
|||
mock_send_message.assert_called_once_with(mock_message)
|
||||
|
||||
|
||||
@patch.object(FCMDevice, "send_message")
|
||||
@pytest.mark.parametrize(
|
||||
"ExceptionClass,exception_kwargs",
|
||||
[
|
||||
(firebase_admin.messaging.UnregisteredError, {"message": "test_error_message"}),
|
||||
],
|
||||
)
|
||||
@pytest.mark.django_db
|
||||
def test_send_push_notification_cloud_ignores_certain_errors(
|
||||
mock_send_message,
|
||||
settings,
|
||||
make_organization_and_user,
|
||||
ExceptionClass,
|
||||
exception_kwargs,
|
||||
):
|
||||
mock_send_message.return_value = ExceptionClass(**exception_kwargs)
|
||||
|
||||
# create a user and connect a mobile device
|
||||
_, user = make_organization_and_user()
|
||||
device = FCMDevice.objects.create(user=user, registration_id="test_device_id")
|
||||
mock_message = {"foo": "bar"}
|
||||
|
||||
# check FCM is contacted directly when using the cloud license
|
||||
settings.LICENSE = CLOUD_LICENSE_NAME
|
||||
settings.IS_OPEN_SOURCE = False
|
||||
|
||||
try:
|
||||
utils.send_push_notification(device, mock_message)
|
||||
except Exception:
|
||||
pytest.fail(f"send_push_notification should not raise an exception for {ExceptionClass.__name__} errors")
|
||||
|
||||
mock_send_message.assert_called_once_with(mock_message)
|
||||
|
||||
|
||||
@patch("apps.mobile_app.utils._send_push_notification_to_fcm_relay", return_value="ok")
|
||||
@pytest.mark.django_db
|
||||
def test_send_push_notification_oss(
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import typing
|
|||
import requests
|
||||
from django.conf import settings
|
||||
from firebase_admin.exceptions import FirebaseError
|
||||
from firebase_admin.messaging import AndroidConfig, APNSConfig, APNSPayload, Message
|
||||
from firebase_admin.messaging import AndroidConfig, APNSConfig, APNSPayload, Message, UnregisteredError
|
||||
from requests import HTTPError
|
||||
from rest_framework import status
|
||||
|
||||
|
|
@ -19,6 +19,13 @@ if typing.TYPE_CHECKING:
|
|||
|
||||
|
||||
MAX_RETRIES = 1 if settings.DEBUG else 10
|
||||
|
||||
# UnregisteredError
|
||||
# App instance was unregistered from FCM. This usually means that the token used is no longer valid and a
|
||||
# new one must be used.
|
||||
#
|
||||
# In other words, this error occurs outside of our control and retrying will never fix it, therefore we should skip
|
||||
FIREBASE_ERRORS_TO_NOT_RETRY = (UnregisteredError,)
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
|
@ -46,13 +53,15 @@ def send_message_to_fcm_device(device: "FCMDevice", message: Message) -> None:
|
|||
|
||||
if isinstance(response, FirebaseError):
|
||||
logger.exception(
|
||||
f"FCM error occured in mobile_app.utils.send_message_to_fcm_device\n"
|
||||
f"FCMDevice info: {device}\n"
|
||||
f"FirebaseError code: {response._code}\n"
|
||||
f"FirebaseError cause: {response._cause}\n"
|
||||
f"FirebaseError http_response: {response._http_response}\n"
|
||||
f"FCM error occured in mobile_app.utils.send_message_to_fcm_device fcm_device_info={device} "
|
||||
f"firebase_error_code={response._code} firebase_error_cause={response._cause} "
|
||||
f"firebase_error_http_response={response._http_response}"
|
||||
)
|
||||
|
||||
if isinstance(response, FIREBASE_ERRORS_TO_NOT_RETRY):
|
||||
logger.warning(f"FCM error {response} is not being retried as we explicitly do not want to retry it")
|
||||
return
|
||||
|
||||
raise response
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue