Handle Slack ratelimit on alert group deletion (#3038)
# What this PR does - gracefully retry `apps.alerts.tasks.delete_alert_group.delete_alert_group` when hitting Slack ratelimits - remove Slack messages from the DB as soon as they are deleted from Slack, so the tasks are not retrying perpetually ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required)
This commit is contained in:
parent
78849d2e43
commit
6caacf4048
4 changed files with 186 additions and 49 deletions
|
|
@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
- Fix Slack access token length issue by @toolchainX ([#3016](https://github.com/grafana/oncall/pull/3016))
|
||||
- Fix shifts for current user internal endpoint to return the right shift PK ([#3036](https://github.com/grafana/oncall/pull/3036))
|
||||
- Handle Slack ratelimit on alert group deletion by @vadimkerr ([#3038](https://github.com/grafana/oncall/pull/3038))
|
||||
|
||||
## v1.3.37 (2023-09-12)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from celery.utils.log import get_task_logger
|
||||
from django.conf import settings
|
||||
|
||||
from apps.slack.errors import SlackAPIRatelimitError
|
||||
from common.custom_celery_tasks import shared_dedicated_queue_retry_task
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
|
|
@ -23,4 +24,8 @@ def delete_alert_group(alert_group_pk, user_pk):
|
|||
logger.debug("User not found, skipping delete_alert_group")
|
||||
return
|
||||
|
||||
alert_group.delete_by_user(user)
|
||||
try:
|
||||
alert_group.delete_by_user(user)
|
||||
except SlackAPIRatelimitError as e:
|
||||
# Handle Slack API ratelimit raised in apps.slack.scenarios.distribute_alerts.DeleteGroupStep.process_signal
|
||||
delete_alert_group.apply_async((alert_group_pk, user_pk), countdown=e.retry_after)
|
||||
|
|
|
|||
|
|
@ -1,11 +1,14 @@
|
|||
from unittest.mock import patch
|
||||
from unittest.mock import call, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from apps.alerts.incident_appearance.renderers.phone_call_renderer import AlertGroupPhoneCallRenderer
|
||||
from apps.alerts.models import AlertGroup, AlertGroupLogRecord
|
||||
from apps.alerts.tasks.delete_alert_group import delete_alert_group
|
||||
from apps.slack.client import SlackClient
|
||||
from apps.slack.errors import SlackAPIMessageNotFoundError, SlackAPIRatelimitError
|
||||
from apps.slack.models import SlackMessage
|
||||
from apps.slack.tests.conftest import build_slack_response
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
|
|
@ -46,56 +49,180 @@ def test_render_for_phone_call(
|
|||
assert expected_verbose_name in rendered_text
|
||||
|
||||
|
||||
@patch.object(SlackClient, "reactions_remove")
|
||||
@patch.object(SlackClient, "chat_delete")
|
||||
@pytest.mark.django_db
|
||||
def test_delete(
|
||||
mock_chat_delete,
|
||||
mock_reactions_remove,
|
||||
make_organization_with_slack_team_identity,
|
||||
make_user,
|
||||
make_slack_channel,
|
||||
make_alert_receive_channel,
|
||||
make_alert_group,
|
||||
make_alert,
|
||||
make_slack_message,
|
||||
make_resolution_note_slack_message,
|
||||
):
|
||||
"""test alert group deleting"""
|
||||
|
||||
organization, slack_team_identity = make_organization_with_slack_team_identity()
|
||||
slack_channel = make_slack_channel(slack_team_identity, name="general", slack_id="CWER1ASD")
|
||||
user = make_user(organization=organization)
|
||||
|
||||
alert_receive_channel = make_alert_receive_channel(organization)
|
||||
|
||||
alert_group = make_alert_group(alert_receive_channel)
|
||||
SlackMessage.objects.create(channel_id="CWER1ASD", alert_group=alert_group)
|
||||
make_alert(alert_group, raw_request_data={})
|
||||
|
||||
make_alert(
|
||||
alert_group,
|
||||
raw_request_data={
|
||||
"evalMatches": [
|
||||
{"value": 100, "metric": "High value", "tags": None},
|
||||
{"value": 200, "metric": "Higher Value", "tags": None},
|
||||
],
|
||||
"message": "Someone is testing the alert notification within grafana.",
|
||||
"ruleId": 0,
|
||||
"ruleName": "Test notification",
|
||||
"ruleUrl": "http://localhost:3000/",
|
||||
"state": "alerting",
|
||||
"title": f"Incident for channel <#{slack_channel.slack_id}> Where a > b & c < d",
|
||||
},
|
||||
# Create Slack messages
|
||||
slack_message = make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id")
|
||||
resolution_note_1 = make_resolution_note_slack_message(
|
||||
alert_group=alert_group,
|
||||
user=user,
|
||||
added_by_user=user,
|
||||
posted_by_bot=True,
|
||||
slack_channel_id="test1_channel_id",
|
||||
ts="test1_ts",
|
||||
)
|
||||
resolution_note_2 = make_resolution_note_slack_message(
|
||||
alert_group=alert_group,
|
||||
user=user,
|
||||
added_by_user=user,
|
||||
added_to_resolution_note=True,
|
||||
slack_channel_id="test2_channel_id",
|
||||
ts="test2_ts",
|
||||
)
|
||||
|
||||
alerts = alert_group.alerts
|
||||
slack_messages = alert_group.slack_messages
|
||||
|
||||
assert alerts.count() > 0
|
||||
assert slack_messages.count() > 0
|
||||
assert alert_group.alerts.count() == 1
|
||||
assert alert_group.slack_messages.count() == 1
|
||||
assert alert_group.resolution_note_slack_messages.count() == 2
|
||||
|
||||
delete_alert_group(alert_group.pk, user.pk)
|
||||
|
||||
assert alerts.count() == 0
|
||||
assert slack_messages.count() == 0
|
||||
assert not alert_group.alerts.exists()
|
||||
assert not alert_group.slack_messages.exists()
|
||||
assert not alert_group.resolution_note_slack_messages.exists()
|
||||
|
||||
with pytest.raises(AlertGroup.DoesNotExist):
|
||||
alert_group.refresh_from_db()
|
||||
|
||||
# Check that appropriate Slack API calls are made
|
||||
assert mock_chat_delete.call_count == 2
|
||||
assert mock_chat_delete.call_args_list[0] == call(
|
||||
channel=resolution_note_1.slack_channel_id, ts=resolution_note_1.ts
|
||||
)
|
||||
assert mock_chat_delete.call_args_list[1] == call(channel=slack_message.channel_id, ts=slack_message.slack_id)
|
||||
mock_reactions_remove.assert_called_once_with(
|
||||
channel=resolution_note_2.slack_channel_id, name="memo", timestamp=resolution_note_2.ts
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("api_method", ["reactions_remove", "chat_delete"])
|
||||
@patch.object(delete_alert_group, "apply_async")
|
||||
@pytest.mark.django_db
|
||||
def test_delete_slack_ratelimit(
|
||||
mock_delete_alert_group,
|
||||
api_method,
|
||||
make_organization_with_slack_team_identity,
|
||||
make_user,
|
||||
make_alert_receive_channel,
|
||||
make_alert_group,
|
||||
make_alert,
|
||||
make_slack_message,
|
||||
make_resolution_note_slack_message,
|
||||
):
|
||||
organization, slack_team_identity = make_organization_with_slack_team_identity()
|
||||
user = make_user(organization=organization)
|
||||
|
||||
alert_receive_channel = make_alert_receive_channel(organization)
|
||||
|
||||
alert_group = make_alert_group(alert_receive_channel)
|
||||
make_alert(alert_group, raw_request_data={})
|
||||
|
||||
# Create Slack messages
|
||||
make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id")
|
||||
make_resolution_note_slack_message(
|
||||
alert_group=alert_group,
|
||||
user=user,
|
||||
added_by_user=user,
|
||||
posted_by_bot=True,
|
||||
slack_channel_id="test1_channel_id",
|
||||
ts="test1_ts",
|
||||
)
|
||||
make_resolution_note_slack_message(
|
||||
alert_group=alert_group,
|
||||
user=user,
|
||||
added_by_user=user,
|
||||
added_to_resolution_note=True,
|
||||
slack_channel_id="test2_channel_id",
|
||||
ts="test2_ts",
|
||||
)
|
||||
|
||||
with patch.object(
|
||||
SlackClient,
|
||||
api_method,
|
||||
side_effect=SlackAPIRatelimitError(
|
||||
response=build_slack_response({"ok": False, "error": "ratelimited"}, headers={"Retry-After": 42})
|
||||
),
|
||||
):
|
||||
delete_alert_group(alert_group.pk, user.pk)
|
||||
|
||||
# Check task is retried gracefully
|
||||
mock_delete_alert_group.assert_called_once_with((alert_group.pk, user.pk), countdown=42)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("api_method", ["reactions_remove", "chat_delete"])
|
||||
@patch.object(delete_alert_group, "apply_async")
|
||||
@pytest.mark.django_db
|
||||
def test_delete_slack_api_error_other_than_ratelimit(
|
||||
mock_delete_alert_group,
|
||||
api_method,
|
||||
make_organization_with_slack_team_identity,
|
||||
make_user,
|
||||
make_alert_receive_channel,
|
||||
make_alert_group,
|
||||
make_alert,
|
||||
make_slack_message,
|
||||
make_resolution_note_slack_message,
|
||||
):
|
||||
organization, slack_team_identity = make_organization_with_slack_team_identity()
|
||||
user = make_user(organization=organization)
|
||||
|
||||
alert_receive_channel = make_alert_receive_channel(organization)
|
||||
|
||||
alert_group = make_alert_group(alert_receive_channel)
|
||||
make_alert(alert_group, raw_request_data={})
|
||||
|
||||
# Create Slack messages
|
||||
make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id")
|
||||
make_resolution_note_slack_message(
|
||||
alert_group=alert_group,
|
||||
user=user,
|
||||
added_by_user=user,
|
||||
posted_by_bot=True,
|
||||
slack_channel_id="test1_channel_id",
|
||||
ts="test1_ts",
|
||||
)
|
||||
make_resolution_note_slack_message(
|
||||
alert_group=alert_group,
|
||||
user=user,
|
||||
added_by_user=user,
|
||||
added_to_resolution_note=True,
|
||||
slack_channel_id="test2_channel_id",
|
||||
ts="test2_ts",
|
||||
)
|
||||
|
||||
with patch.object(
|
||||
SlackClient,
|
||||
api_method,
|
||||
side_effect=SlackAPIMessageNotFoundError(
|
||||
response=build_slack_response({"ok": False, "error": "message_not_found"})
|
||||
),
|
||||
):
|
||||
delete_alert_group(alert_group.pk, user.pk) # check no exception is raised
|
||||
|
||||
# Check task is not retried
|
||||
mock_delete_alert_group.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_alerts_count_gt(
|
||||
|
|
|
|||
|
|
@ -880,34 +880,38 @@ class DeleteGroupStep(scenario_step.ScenarioStep):
|
|||
def process_signal(self, log_record: AlertGroupLogRecord) -> None:
|
||||
alert_group = log_record.alert_group
|
||||
|
||||
self.remove_resolution_note_reaction(alert_group)
|
||||
|
||||
bot_messages_ts: typing.List[str] = []
|
||||
bot_messages_ts.extend(alert_group.slack_messages.values_list("slack_id", flat=True))
|
||||
bot_messages_ts.extend(
|
||||
alert_group.resolution_note_slack_messages.filter(posted_by_bot=True).values_list("ts", flat=True)
|
||||
)
|
||||
channel_id = alert_group.slack_message.channel_id
|
||||
|
||||
for message_ts in bot_messages_ts:
|
||||
try:
|
||||
self._slack_client.chat_delete(channel=channel_id, ts=message_ts)
|
||||
except (
|
||||
SlackAPITokenError,
|
||||
SlackAPIChannelNotFoundError,
|
||||
SlackAPIMessageNotFoundError,
|
||||
SlackAPIChannelArchivedError,
|
||||
):
|
||||
pass
|
||||
|
||||
def remove_resolution_note_reaction(self, alert_group: AlertGroup) -> None:
|
||||
# Remove "memo" emoji from resolution note messages
|
||||
for message in alert_group.resolution_note_slack_messages.filter(added_to_resolution_note=True):
|
||||
message.added_to_resolution_note = False
|
||||
message.save(update_fields=["added_to_resolution_note"])
|
||||
try:
|
||||
self._slack_client.reactions_remove(channel=message.slack_channel_id, name="memo", timestamp=message.ts)
|
||||
except SlackAPIRatelimitError:
|
||||
# retries on ratelimit are handled in apps.alerts.tasks.delete_alert_group.delete_alert_group
|
||||
raise
|
||||
except SlackAPIError:
|
||||
pass
|
||||
message.delete()
|
||||
|
||||
# Remove resolution note messages posted by OnCall bot
|
||||
for message in alert_group.resolution_note_slack_messages.filter(posted_by_bot=True):
|
||||
try:
|
||||
self._slack_client.chat_delete(channel=message.slack_channel_id, ts=message.ts)
|
||||
except SlackAPIRatelimitError:
|
||||
# retries on ratelimit are handled in apps.alerts.tasks.delete_alert_group.delete_alert_group
|
||||
raise
|
||||
except SlackAPIError:
|
||||
pass
|
||||
message.delete()
|
||||
|
||||
# Remove alert group Slack messages
|
||||
for message in alert_group.slack_messages.all():
|
||||
try:
|
||||
self._slack_client.chat_delete(channel=message.channel_id, ts=message.slack_id)
|
||||
except SlackAPIRatelimitError:
|
||||
# retries on ratelimit are handled in apps.alerts.tasks.delete_alert_group.delete_alert_group
|
||||
raise
|
||||
except SlackAPIError:
|
||||
pass
|
||||
message.delete()
|
||||
|
||||
|
||||
class UpdateLogReportMessageStep(scenario_step.ScenarioStep):
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue