From 0d7352a17b40f7409b067b69f2e2ebf0961d8d84 Mon Sep 17 00:00:00 2001 From: Vadim Stepanov Date: Thu, 7 Sep 2023 12:25:29 +0100 Subject: [PATCH] Fix handling Slack rate limits (#2991) ## Which issue(s) this PR fixes https://github.com/grafana/oncall-private/issues/2156 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --- CHANGELOG.md | 1 + .../apps/slack/alert_group_slack_service.py | 4 +-- engine/apps/slack/client.py | 28 +++++++------------ .../apps/slack/scenarios/distribute_alerts.py | 11 +++----- engine/apps/slack/tests/conftest.py | 20 +++++++++++++ .../test_distribute_alerts.py | 5 +++- .../test_resolution_note.py | 3 +- .../test_slack_channel_integration.py | 3 +- engine/apps/slack/tests/test_slack_client.py | 23 +++++++++++++++ 9 files changed, 67 insertions(+), 31 deletions(-) create mode 100644 engine/apps/slack/tests/test_slack_client.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d364df6..261db26a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Don't update Slack user groups for deleted organizations by @vadimkerr ([#2985](https://github.com/grafana/oncall/pull/2985)) - Fix Slack integration leftovers after disconnecting by @vadimkerr ([#2986](https://github.com/grafana/oncall/pull/2986)) +- Fix handling Slack rate limits by @vadimkerr ([#2991](https://github.com/grafana/oncall/pull/2991)) ## v1.3.35 (2023-09-05) diff --git a/engine/apps/slack/alert_group_slack_service.py b/engine/apps/slack/alert_group_slack_service.py index d779f172..259513e2 100644 --- a/engine/apps/slack/alert_group_slack_service.py +++ b/engine/apps/slack/alert_group_slack_service.py @@ -8,7 +8,6 @@ from apps.slack.client import ( SlackAPITokenException, SlackClientWithErrorHandling, ) -from apps.slack.constants import SLACK_RATE_LIMIT_DELAY if typing.TYPE_CHECKING: from apps.alerts.models import AlertGroup @@ -46,8 +45,7 @@ class AlertGroupSlackService: except SlackAPIRateLimitException as e: if alert_group.channel.integration != AlertReceiveChannel.INTEGRATION_MAINTENANCE: if not alert_group.channel.is_rate_limited_in_slack: - delay = e.response.get("rate_limit_delay") or SLACK_RATE_LIMIT_DELAY - alert_group.channel.start_send_rate_limit_message_task(delay) + alert_group.channel.start_send_rate_limit_message_task(e.retry_after) logger.info( f"Message has not been updated for alert_group {alert_group.pk} due to slack rate limit." ) diff --git a/engine/apps/slack/client.py b/engine/apps/slack/client.py index 8837f13c..81fe4493 100644 --- a/engine/apps/slack/client.py +++ b/engine/apps/slack/client.py @@ -3,7 +3,7 @@ from typing import Optional, Tuple from django.utils import timezone from slack_sdk.errors import SlackApiError -from slack_sdk.web import WebClient +from slack_sdk.web import SlackResponse, WebClient from apps.slack.constants import SLACK_RATE_LIMIT_DELAY @@ -11,11 +11,9 @@ logger = logging.getLogger(__name__) class SlackAPIException(Exception): - def __init__(self, *args, **kwargs): - self.response = {} - if "response" in kwargs: - self.response = kwargs["response"] - super().__init__(*args) + def __init__(self, msg: str, response: SlackResponse): + super().__init__(msg) + self.response = response class SlackAPITokenException(SlackAPIException): @@ -27,7 +25,9 @@ class SlackAPIChannelArchivedException(SlackAPIException): class SlackAPIRateLimitException(SlackAPIException): - pass + def __init__(self, msg: str, response: SlackResponse, retry_after: int): + super().__init__(msg, response) + self.retry_after = retry_after class SlackClientWithErrorHandling(WebClient): @@ -103,18 +103,10 @@ class SlackClientWithErrorHandling(WebClient): if response["error"] == "is_archived": raise SlackAPIChannelArchivedException(exception_text, response=response) - if ( - response["error"] == "rate_limited" - or response["error"] == "ratelimited" - or response["error"] == "message_limit_exceeded" + if response["error"] in ("rate_limited", "ratelimited", "message_limit_exceeded"): # "message_limit_exceeded" is related to the limit on post messages for free Slack workspace - ): - if "headers" in response and response["headers"].get("Retry-After") is not None: - delay = int(response["headers"]["Retry-After"]) - else: - delay = SLACK_RATE_LIMIT_DELAY - response["rate_limit_delay"] = delay - raise SlackAPIRateLimitException(exception_text, response=response) + retry_after = int(response.headers.get("Retry-After", SLACK_RATE_LIMIT_DELAY)) + raise SlackAPIRateLimitException(exception_text, response, retry_after) if response["error"] == "code_already_used": return response diff --git a/engine/apps/slack/scenarios/distribute_alerts.py b/engine/apps/slack/scenarios/distribute_alerts.py index 8e0301f7..6b0e3575 100644 --- a/engine/apps/slack/scenarios/distribute_alerts.py +++ b/engine/apps/slack/scenarios/distribute_alerts.py @@ -22,7 +22,7 @@ from apps.slack.client import ( SlackAPITokenException, SlackClientWithErrorHandling, ) -from apps.slack.constants import CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME, SLACK_RATE_LIMIT_DELAY +from apps.slack.constants import CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME from apps.slack.scenarios import scenario_step from apps.slack.scenarios.slack_renderer import AlertGroupLogSlackRenderer from apps.slack.slack_formatter import SlackFormatter @@ -172,8 +172,7 @@ class AlertShootingStep(scenario_step.ScenarioStep): if alert_group.channel.integration != AlertReceiveChannel.INTEGRATION_MAINTENANCE: alert_group.reason_to_skip_escalation = AlertGroup.RATE_LIMITED alert_group.save(update_fields=["reason_to_skip_escalation"]) - delay = e.response.get("rate_limit_delay") or SLACK_RATE_LIMIT_DELAY - alert_group.channel.start_send_rate_limit_message_task(delay) + alert_group.channel.start_send_rate_limit_message_task(e.retry_after) logger.info("Not delivering alert due to slack rate limit.") else: raise e @@ -997,8 +996,7 @@ class UpdateLogReportMessageStep(scenario_step.ScenarioStep): print(e) except SlackAPIRateLimitException as e: if not alert_group.channel.is_rate_limited_in_slack: - delay = e.response.get("rate_limit_delay") or SLACK_RATE_LIMIT_DELAY - alert_group.channel.start_send_rate_limit_message_task(delay) + alert_group.channel.start_send_rate_limit_message_task(e.retry_after) logger.info( f"Log message has not been posted for alert_group {alert_group.pk} due to slack rate limit." ) @@ -1060,8 +1058,7 @@ class UpdateLogReportMessageStep(scenario_step.ScenarioStep): print(e) except SlackAPIRateLimitException as e: if not alert_group.channel.is_rate_limited_in_slack: - delay = e.response.get("rate_limit_delay") or SLACK_RATE_LIMIT_DELAY - alert_group.channel.start_send_rate_limit_message_task(delay) + alert_group.channel.start_send_rate_limit_message_task(e.retry_after) logger.info( f"Log message has not been updated for alert_group {alert_group.pk} due to slack rate limit." ) diff --git a/engine/apps/slack/tests/conftest.py b/engine/apps/slack/tests/conftest.py index ff8057ee..5e38598a 100644 --- a/engine/apps/slack/tests/conftest.py +++ b/engine/apps/slack/tests/conftest.py @@ -1,4 +1,24 @@ +import typing + import pytest +from rest_framework import status +from slack_sdk.web import SlackResponse + + +def build_slack_response( + data: dict[str, typing.Any], + status_code: int = status.HTTP_200_OK, + headers: typing.Optional[dict[str, typing.Any]] = None, +): + return SlackResponse( + client=None, + http_verb="POST", + api_url="test", + req_args={}, + data=data, + headers=headers if headers else {}, + status_code=status_code, + ) @pytest.fixture diff --git a/engine/apps/slack/tests/test_scenario_steps/test_distribute_alerts.py b/engine/apps/slack/tests/test_scenario_steps/test_distribute_alerts.py index 91b336bd..18727c3a 100644 --- a/engine/apps/slack/tests/test_scenario_steps/test_distribute_alerts.py +++ b/engine/apps/slack/tests/test_scenario_steps/test_distribute_alerts.py @@ -6,6 +6,7 @@ from apps.alerts.models import AlertGroup from apps.slack.client import SlackAPIException from apps.slack.models import SlackMessage from apps.slack.scenarios.scenario_step import ScenarioStep +from apps.slack.tests.conftest import build_slack_response @pytest.mark.django_db @@ -24,7 +25,9 @@ def test_restricted_action_error( step = SlackAlertShootingStep(slack_team_identity) with patch.object(step._slack_client, "api_call") as mock_slack_api_call: - mock_slack_api_call.side_effect = SlackAPIException(response={"error": "restricted_action"}) + mock_slack_api_call.side_effect = SlackAPIException( + "error!", response=build_slack_response({"error": "restricted_action"}) + ) step._post_alert_group_to_slack(slack_team_identity, alert_group, alert, None, "channel-id", []) alert_group.refresh_from_db() diff --git a/engine/apps/slack/tests/test_scenario_steps/test_resolution_note.py b/engine/apps/slack/tests/test_scenario_steps/test_resolution_note.py index 1cec2437..9a5d43f3 100644 --- a/engine/apps/slack/tests/test_scenario_steps/test_resolution_note.py +++ b/engine/apps/slack/tests/test_scenario_steps/test_resolution_note.py @@ -5,6 +5,7 @@ import pytest from apps.slack.client import SlackAPIException, SlackClientWithErrorHandling from apps.slack.scenarios.scenario_step import ScenarioStep +from apps.slack.tests.conftest import build_slack_response from common.api_helpers.utils import create_engine_url @@ -185,7 +186,7 @@ def test_get_resolution_notes_blocks_latest_limit( @patch.object( SlackClientWithErrorHandling, "api_call", - side_effect=SlackAPIException(response={"ok": False, "error": "not_found"}), + side_effect=SlackAPIException("error!", response=build_slack_response({"ok": False, "error": "not_found"})), ) def test_resolution_notes_modal_closed_before_update( mock_slack_api_call, diff --git a/engine/apps/slack/tests/test_scenario_steps/test_slack_channel_integration.py b/engine/apps/slack/tests/test_scenario_steps/test_slack_channel_integration.py index 7052d28f..0c163ac2 100644 --- a/engine/apps/slack/tests/test_scenario_steps/test_slack_channel_integration.py +++ b/engine/apps/slack/tests/test_scenario_steps/test_slack_channel_integration.py @@ -5,6 +5,7 @@ import pytest from apps.alerts.models import ResolutionNoteSlackMessage from apps.slack.client import SlackAPIException from apps.slack.scenarios.slack_channel_integration import SlackChannelMessageEventStep +from apps.slack.tests.conftest import build_slack_response @pytest.mark.django_db @@ -290,7 +291,7 @@ class TestSlackChannelMessageEventStep: step = SlackChannelMessageEventStep(slack_team_identity, organization, user) step._slack_client = Mock() step._slack_client.chat_getPermalink.side_effect = [ - SlackAPIException("error!", response={"ok": False, "error": "message_not_found"}) + SlackAPIException("error!", response=build_slack_response({"ok": False, "error": "message_not_found"})) ] payload = { diff --git a/engine/apps/slack/tests/test_slack_client.py b/engine/apps/slack/tests/test_slack_client.py new file mode 100644 index 00000000..e0fdd5eb --- /dev/null +++ b/engine/apps/slack/tests/test_slack_client.py @@ -0,0 +1,23 @@ +import json +from unittest.mock import patch + +import pytest + +from apps.slack.client import SlackAPIRateLimitException, SlackClientWithErrorHandling + + +@pytest.mark.parametrize("error", ["ratelimited", "rate_limited", "message_limit_exceeded"]) +def test_slack_client_ratelimit(monkeypatch, error): + # undo engine.conftest.mock_slack_api_call + monkeypatch.undo() + + return_value = {"status": 429, "body": json.dumps({"ok": False, "error": error}), "headers": {"Retry-After": "42"}} + + with patch( + "slack_sdk.web.base_client.BaseClient._perform_urllib_http_request_internal", return_value=return_value + ) as mock_request: + with pytest.raises(SlackAPIRateLimitException) as exc_info: + SlackClientWithErrorHandling().api_call("auth.test") + + mock_request.assert_called_once() + assert exc_info.value.retry_after == 42