Fix handling Slack rate limits (#2991)

## Which issue(s) this PR fixes

https://github.com/grafana/oncall-private/issues/2156

## Checklist

- [x] Unit, integration, and e2e (if applicable) tests updated
- [x] Documentation added (or `pr:no public docs` PR label added if not
required)
- [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not
required)
This commit is contained in:
Vadim Stepanov 2023-09-07 12:25:29 +01:00 committed by GitHub
parent d90cf0f593
commit 0d7352a17b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 67 additions and 31 deletions

View file

@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Don't update Slack user groups for deleted organizations by @vadimkerr ([#2985](https://github.com/grafana/oncall/pull/2985))
- Fix Slack integration leftovers after disconnecting by @vadimkerr ([#2986](https://github.com/grafana/oncall/pull/2986))
- Fix handling Slack rate limits by @vadimkerr ([#2991](https://github.com/grafana/oncall/pull/2991))
## v1.3.35 (2023-09-05)

View file

@ -8,7 +8,6 @@ from apps.slack.client import (
SlackAPITokenException,
SlackClientWithErrorHandling,
)
from apps.slack.constants import SLACK_RATE_LIMIT_DELAY
if typing.TYPE_CHECKING:
from apps.alerts.models import AlertGroup
@ -46,8 +45,7 @@ class AlertGroupSlackService:
except SlackAPIRateLimitException as e:
if alert_group.channel.integration != AlertReceiveChannel.INTEGRATION_MAINTENANCE:
if not alert_group.channel.is_rate_limited_in_slack:
delay = e.response.get("rate_limit_delay") or SLACK_RATE_LIMIT_DELAY
alert_group.channel.start_send_rate_limit_message_task(delay)
alert_group.channel.start_send_rate_limit_message_task(e.retry_after)
logger.info(
f"Message has not been updated for alert_group {alert_group.pk} due to slack rate limit."
)

View file

@ -3,7 +3,7 @@ from typing import Optional, Tuple
from django.utils import timezone
from slack_sdk.errors import SlackApiError
from slack_sdk.web import WebClient
from slack_sdk.web import SlackResponse, WebClient
from apps.slack.constants import SLACK_RATE_LIMIT_DELAY
@ -11,11 +11,9 @@ logger = logging.getLogger(__name__)
class SlackAPIException(Exception):
def __init__(self, *args, **kwargs):
self.response = {}
if "response" in kwargs:
self.response = kwargs["response"]
super().__init__(*args)
def __init__(self, msg: str, response: SlackResponse):
super().__init__(msg)
self.response = response
class SlackAPITokenException(SlackAPIException):
@ -27,7 +25,9 @@ class SlackAPIChannelArchivedException(SlackAPIException):
class SlackAPIRateLimitException(SlackAPIException):
pass
def __init__(self, msg: str, response: SlackResponse, retry_after: int):
super().__init__(msg, response)
self.retry_after = retry_after
class SlackClientWithErrorHandling(WebClient):
@ -103,18 +103,10 @@ class SlackClientWithErrorHandling(WebClient):
if response["error"] == "is_archived":
raise SlackAPIChannelArchivedException(exception_text, response=response)
if (
response["error"] == "rate_limited"
or response["error"] == "ratelimited"
or response["error"] == "message_limit_exceeded"
if response["error"] in ("rate_limited", "ratelimited", "message_limit_exceeded"):
# "message_limit_exceeded" is related to the limit on post messages for free Slack workspace
):
if "headers" in response and response["headers"].get("Retry-After") is not None:
delay = int(response["headers"]["Retry-After"])
else:
delay = SLACK_RATE_LIMIT_DELAY
response["rate_limit_delay"] = delay
raise SlackAPIRateLimitException(exception_text, response=response)
retry_after = int(response.headers.get("Retry-After", SLACK_RATE_LIMIT_DELAY))
raise SlackAPIRateLimitException(exception_text, response, retry_after)
if response["error"] == "code_already_used":
return response

View file

@ -22,7 +22,7 @@ from apps.slack.client import (
SlackAPITokenException,
SlackClientWithErrorHandling,
)
from apps.slack.constants import CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME, SLACK_RATE_LIMIT_DELAY
from apps.slack.constants import CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME
from apps.slack.scenarios import scenario_step
from apps.slack.scenarios.slack_renderer import AlertGroupLogSlackRenderer
from apps.slack.slack_formatter import SlackFormatter
@ -172,8 +172,7 @@ class AlertShootingStep(scenario_step.ScenarioStep):
if alert_group.channel.integration != AlertReceiveChannel.INTEGRATION_MAINTENANCE:
alert_group.reason_to_skip_escalation = AlertGroup.RATE_LIMITED
alert_group.save(update_fields=["reason_to_skip_escalation"])
delay = e.response.get("rate_limit_delay") or SLACK_RATE_LIMIT_DELAY
alert_group.channel.start_send_rate_limit_message_task(delay)
alert_group.channel.start_send_rate_limit_message_task(e.retry_after)
logger.info("Not delivering alert due to slack rate limit.")
else:
raise e
@ -997,8 +996,7 @@ class UpdateLogReportMessageStep(scenario_step.ScenarioStep):
print(e)
except SlackAPIRateLimitException as e:
if not alert_group.channel.is_rate_limited_in_slack:
delay = e.response.get("rate_limit_delay") or SLACK_RATE_LIMIT_DELAY
alert_group.channel.start_send_rate_limit_message_task(delay)
alert_group.channel.start_send_rate_limit_message_task(e.retry_after)
logger.info(
f"Log message has not been posted for alert_group {alert_group.pk} due to slack rate limit."
)
@ -1060,8 +1058,7 @@ class UpdateLogReportMessageStep(scenario_step.ScenarioStep):
print(e)
except SlackAPIRateLimitException as e:
if not alert_group.channel.is_rate_limited_in_slack:
delay = e.response.get("rate_limit_delay") or SLACK_RATE_LIMIT_DELAY
alert_group.channel.start_send_rate_limit_message_task(delay)
alert_group.channel.start_send_rate_limit_message_task(e.retry_after)
logger.info(
f"Log message has not been updated for alert_group {alert_group.pk} due to slack rate limit."
)

View file

@ -1,4 +1,24 @@
import typing
import pytest
from rest_framework import status
from slack_sdk.web import SlackResponse
def build_slack_response(
data: dict[str, typing.Any],
status_code: int = status.HTTP_200_OK,
headers: typing.Optional[dict[str, typing.Any]] = None,
):
return SlackResponse(
client=None,
http_verb="POST",
api_url="test",
req_args={},
data=data,
headers=headers if headers else {},
status_code=status_code,
)
@pytest.fixture

View file

@ -6,6 +6,7 @@ from apps.alerts.models import AlertGroup
from apps.slack.client import SlackAPIException
from apps.slack.models import SlackMessage
from apps.slack.scenarios.scenario_step import ScenarioStep
from apps.slack.tests.conftest import build_slack_response
@pytest.mark.django_db
@ -24,7 +25,9 @@ def test_restricted_action_error(
step = SlackAlertShootingStep(slack_team_identity)
with patch.object(step._slack_client, "api_call") as mock_slack_api_call:
mock_slack_api_call.side_effect = SlackAPIException(response={"error": "restricted_action"})
mock_slack_api_call.side_effect = SlackAPIException(
"error!", response=build_slack_response({"error": "restricted_action"})
)
step._post_alert_group_to_slack(slack_team_identity, alert_group, alert, None, "channel-id", [])
alert_group.refresh_from_db()

View file

@ -5,6 +5,7 @@ import pytest
from apps.slack.client import SlackAPIException, SlackClientWithErrorHandling
from apps.slack.scenarios.scenario_step import ScenarioStep
from apps.slack.tests.conftest import build_slack_response
from common.api_helpers.utils import create_engine_url
@ -185,7 +186,7 @@ def test_get_resolution_notes_blocks_latest_limit(
@patch.object(
SlackClientWithErrorHandling,
"api_call",
side_effect=SlackAPIException(response={"ok": False, "error": "not_found"}),
side_effect=SlackAPIException("error!", response=build_slack_response({"ok": False, "error": "not_found"})),
)
def test_resolution_notes_modal_closed_before_update(
mock_slack_api_call,

View file

@ -5,6 +5,7 @@ import pytest
from apps.alerts.models import ResolutionNoteSlackMessage
from apps.slack.client import SlackAPIException
from apps.slack.scenarios.slack_channel_integration import SlackChannelMessageEventStep
from apps.slack.tests.conftest import build_slack_response
@pytest.mark.django_db
@ -290,7 +291,7 @@ class TestSlackChannelMessageEventStep:
step = SlackChannelMessageEventStep(slack_team_identity, organization, user)
step._slack_client = Mock()
step._slack_client.chat_getPermalink.side_effect = [
SlackAPIException("error!", response={"ok": False, "error": "message_not_found"})
SlackAPIException("error!", response=build_slack_response({"ok": False, "error": "message_not_found"}))
]
payload = {

View file

@ -0,0 +1,23 @@
import json
from unittest.mock import patch
import pytest
from apps.slack.client import SlackAPIRateLimitException, SlackClientWithErrorHandling
@pytest.mark.parametrize("error", ["ratelimited", "rate_limited", "message_limit_exceeded"])
def test_slack_client_ratelimit(monkeypatch, error):
# undo engine.conftest.mock_slack_api_call
monkeypatch.undo()
return_value = {"status": 429, "body": json.dumps({"ok": False, "error": error}), "headers": {"Retry-After": "42"}}
with patch(
"slack_sdk.web.base_client.BaseClient._perform_urllib_http_request_internal", return_value=return_value
) as mock_request:
with pytest.raises(SlackAPIRateLimitException) as exc_info:
SlackClientWithErrorHandling().api_call("auth.test")
mock_request.assert_called_once()
assert exc_info.value.retry_after == 42