fix: improve Slack rate limiting logic when updating alert groups (#5287)

# What this PR does

https://www.loom.com/share/1ac33822301444748133ffe72638ddc4

The two asks in the [original GH
issue](https://github.com/grafana/oncall-private/issues/2947) were:

> 1. Make the error message clearer. We can identify if it's delivering
or updating and being rate-limited. This is possible because Slack sets
limits per API method. Also, this limit is a per-slack channel while we
are posting messages & applying ratelimit per on-call integration, which
confuses customers.
> 2. Debounce update alert group message in Slack

Both of these have been addressed in this PR

## Which issue(s) this PR closes

Closes https://github.com/grafana/oncall-private/issues/2947

## Checklist

- [x] Unit, integration, and e2e (if applicable) tests updated
- [x] Documentation added (or `pr:no public docs` PR label added if not
required)
- [x] Added the relevant release notes label (see labels prefixed w/
`release:`). These labels dictate how your PR will
    show up in the autogenerated release notes.
This commit is contained in:
Joey Orlando 2024-12-02 14:40:30 -05:00 committed by GitHub
parent fa071bcd6e
commit 26946f0d43
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 975 additions and 187 deletions

View file

@ -29,7 +29,7 @@ from apps.metrics_exporter.helpers import (
metrics_remove_deleted_integration_from_cache,
metrics_update_integration_cache,
)
from apps.slack.constants import SLACK_RATE_LIMIT_DELAY, SLACK_RATE_LIMIT_TIMEOUT
from apps.slack.constants import SLACK_RATE_LIMIT_TIMEOUT
from apps.slack.tasks import post_slack_rate_limit_message
from apps.slack.utils import post_message_to_channel
from common.api_helpers.utils import create_engine_url
@ -442,12 +442,14 @@ class AlertReceiveChannel(IntegrationOptionsMixin, MaintainableObject):
and self.rate_limited_in_slack_at + SLACK_RATE_LIMIT_TIMEOUT > timezone.now()
)
def start_send_rate_limit_message_task(self, delay=SLACK_RATE_LIMIT_DELAY):
def start_send_rate_limit_message_task(self, error_message_verb: str, delay: int) -> None:
task_id = celery_uuid()
self.rate_limit_message_task_id = task_id
self.rate_limited_in_slack_at = timezone.now()
self.save(update_fields=["rate_limit_message_task_id", "rate_limited_in_slack_at"])
post_slack_rate_limit_message.apply_async((self.pk,), countdown=delay, task_id=task_id)
post_slack_rate_limit_message.apply_async((self.pk, error_message_verb), countdown=delay, task_id=task_id)
@property
def alert_groups_count(self) -> int:

View file

@ -3,13 +3,9 @@ import typing
from apps.slack.client import SlackClient
from apps.slack.errors import (
SlackAPICantUpdateMessageError,
SlackAPIChannelArchivedError,
SlackAPIChannelInactiveError,
SlackAPIChannelNotFoundError,
SlackAPIInvalidAuthError,
SlackAPIMessageNotFoundError,
SlackAPIRatelimitError,
SlackAPITokenError,
)
@ -34,41 +30,12 @@ class AlertGroupSlackService:
else:
self._slack_client = SlackClient(slack_team_identity)
def update_alert_group_slack_message(self, alert_group: "AlertGroup") -> None:
logger.info(f"Update message for alert_group {alert_group.pk}")
try:
self._slack_client.chat_update(
# TODO: once _channel_id has been fully migrated to channel, remove _channel_id
# see https://raintank-corp.slack.com/archives/C06K1MQ07GS/p173255546
# channel=alert_group.slack_message.channel.slack_id,
channel=alert_group.slack_message._channel_id,
ts=alert_group.slack_message.slack_id,
attachments=alert_group.render_slack_attachments(),
blocks=alert_group.render_slack_blocks(),
)
logger.info(f"Message has been updated for alert_group {alert_group.pk}")
except SlackAPIRatelimitError as e:
if not alert_group.channel.is_maintenace_integration:
if not alert_group.channel.is_rate_limited_in_slack:
alert_group.channel.start_send_rate_limit_message_task(e.retry_after)
logger.info(
f"Message has not been updated for alert_group {alert_group.pk} due to slack rate limit."
)
else:
raise
except (
SlackAPIMessageNotFoundError,
SlackAPICantUpdateMessageError,
SlackAPIChannelInactiveError,
SlackAPITokenError,
SlackAPIChannelNotFoundError,
):
pass
def publish_message_to_alert_group_thread(
self, alert_group: "AlertGroup", attachments=None, mrkdwn=True, unfurl_links=True, text=None
) -> None:
"""
TODO: refactor this method and move it to the `SlackMessage` model, such that we can remove this class..
"""
# TODO: refactor checking the possibility of sending message to slack
# do not try to post message to slack if integration is rate limited
if alert_group.channel.is_rate_limited_in_slack:

View file

@ -10,7 +10,6 @@ SLACK_WRONG_TEAM_NAMES = [SLACK_INVALID_AUTH_RESPONSE, PLACEHOLDER]
SLACK_RATE_LIMIT_TIMEOUT = datetime.timedelta(minutes=5)
SLACK_RATE_LIMIT_DELAY = 10
CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME = 60 * 10
BLOCK_SECTION_TEXT_MAX_SIZE = 2800
PRIVATE_METADATA_MAX_LENGTH = 3000

View file

@ -3,7 +3,10 @@ import time
import typing
import uuid
from celery import uuid as celery_uuid
from django.core.cache import cache
from django.db import models
from django.utils import timezone
from apps.slack.client import SlackClient
from apps.slack.constants import BLOCK_SECTION_TEXT_MAX_SIZE
@ -15,6 +18,7 @@ from apps.slack.errors import (
SlackAPIRatelimitError,
SlackAPITokenError,
)
from apps.slack.tasks import update_alert_group_slack_message
if typing.TYPE_CHECKING:
from apps.alerts.models import AlertGroup
@ -30,6 +34,8 @@ class SlackMessage(models.Model):
alert_group: typing.Optional["AlertGroup"]
channel: "SlackChannel"
ALERT_GROUP_UPDATE_DEBOUNCE_INTERVAL_SECONDS = 45
id = models.CharField(primary_key=True, default=uuid.uuid4, editable=False, max_length=36)
slack_id = models.CharField(max_length=100)
@ -85,7 +91,7 @@ class SlackMessage(models.Model):
active_update_task_id = models.CharField(max_length=100, null=True, default=None)
"""
ID of the latest celery task to update the message
DEPRECATED/TODO: drop this field in a separate PR/release
"""
class Meta:
@ -259,3 +265,87 @@ class SlackMessage(models.Model):
slack_user_identity.send_link_to_slack_message(slack_message)
except (SlackAPITokenError, SlackAPIMethodNotSupportedForChannelTypeError):
pass
def _get_update_message_cache_key(self) -> str:
return f"update_alert_group_slack_message_{self.alert_group.pk}"
def get_active_update_task_id(self) -> typing.Optional[str]:
return cache.get(self._get_update_message_cache_key(), default=None)
def set_active_update_task_id(self, task_id: str) -> None:
"""
NOTE: we store the task ID in the cache for twice the debounce interval to ensure that the task ID is
EVENTUALLY removed. The background task which updates the message will remove the task ID from the cache, but
this is a safety measure in case the task fails to run or complete. The task ID would be removed from the cache
which would then allow the message to be updated again in a subsequent call to this method.
"""
cache.set(
self._get_update_message_cache_key(),
task_id,
timeout=self.ALERT_GROUP_UPDATE_DEBOUNCE_INTERVAL_SECONDS * 2,
)
def mark_active_update_task_as_complete(self) -> None:
self.last_updated = timezone.now()
self.save(update_fields=["last_updated"])
cache.delete(self._get_update_message_cache_key())
def update_alert_groups_message(self, debounce: bool) -> None:
"""
Schedule an update task for the associated alert group's Slack message, respecting the debounce interval.
This method ensures that updates to the Slack message related to an alert group are not performed
too frequently, adhering to the `ALERT_GROUP_UPDATE_DEBOUNCE_INTERVAL_SECONDS` debounce interval.
It schedules a background task to update the message after the appropriate countdown.
The method performs the following steps:
- Checks if there's already an active update task ID set in the cache. If so, exits to prevent
duplicate scheduling.
- Calculates the time since the last update (`last_updated` field) and determines the remaining time needed
to respect the debounce interval.
- Schedules the `update_alert_group_slack_message` task with the calculated countdown.
- Stores the task ID in the cache to prevent multiple tasks from being scheduled.
debounce: bool - this is intended to be used when we want to debounce updates to the message. Examples:
- when set to True, we will skip scheduling an update task if there's an active update task (eg. debounce it)
- when set to False, we will immediately schedule an update task
"""
if not self.alert_group:
logger.warning(
f"skipping update_alert_groups_message as SlackMessage {self.pk} has no alert_group associated with it"
)
return
active_update_task_id = self.get_active_update_task_id()
if debounce and active_update_task_id is not None:
logger.info(
f"skipping update_alert_groups_message as SlackMessage {self.pk} has an active update task "
f"{active_update_task_id} and debounce is set to True"
)
return
now = timezone.now()
# we previously weren't updating the last_updated field for messages, so there will be cases
# where the last_updated field is None
last_updated = self.last_updated or now
time_since_last_update = (now - last_updated).total_seconds()
remaining_time = self.ALERT_GROUP_UPDATE_DEBOUNCE_INTERVAL_SECONDS - int(time_since_last_update)
countdown = max(remaining_time, 10) if debounce else 0
logger.info(
f"updating message for alert_group {self.alert_group.pk} in {countdown} seconds "
f"(debounce interval: {self.ALERT_GROUP_UPDATE_DEBOUNCE_INTERVAL_SECONDS})"
)
task_id = celery_uuid()
# NOTE: we need to persist the task ID in the cache before scheduling the task to prevent
# a race condition where the task starts before the task ID is stored in the cache as the task
# does a check to verify that the celery task id matches the one stored in the cache
#
# (see update_alert_group_slack_message task for more details)
self.set_active_update_task_id(task_id)
update_alert_group_slack_message.apply_async((self.pk,), countdown=countdown, task_id=task_id)

View file

@ -3,15 +3,12 @@ import logging
import typing
from datetime import datetime
from django.core.cache import cache
from apps.alerts.constants import ActionSource
from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE
from apps.alerts.incident_appearance.renderers.slack_renderer import AlertSlackRenderer
from apps.alerts.models import Alert, AlertGroup, AlertGroupLogRecord, AlertReceiveChannel, Invitation
from apps.api.permissions import RBACPermission
from apps.slack.chatops_proxy_routing import make_private_metadata, make_value
from apps.slack.constants import CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME
from apps.slack.errors import (
SlackAPIChannelArchivedError,
SlackAPIChannelNotFoundError,
@ -25,7 +22,7 @@ from apps.slack.errors import (
from apps.slack.models import SlackTeamIdentity, SlackUserIdentity
from apps.slack.scenarios import scenario_step
from apps.slack.slack_formatter import SlackFormatter
from apps.slack.tasks import send_message_to_thread_if_bot_not_in_channel, update_incident_slack_message
from apps.slack.tasks import send_message_to_thread_if_bot_not_in_channel
from apps.slack.types import (
Block,
BlockActionType,
@ -36,7 +33,6 @@ from apps.slack.types import (
PayloadType,
ScenarioRoute,
)
from apps.slack.utils import get_cache_key_update_incident_slack_message
from common.utils import clean_markup, is_string_with_visible_characters
from .step_mixins import AlertGroupActionsMixin
@ -116,6 +112,7 @@ class IncomingAlertStep(scenario_step.ScenarioStep):
# do not try to post alert group message to slack if its channel is rate limited
if alert_receive_channel.is_rate_limited_in_slack:
logger.info("Skip posting or updating alert_group in Slack due to rate limit")
AlertGroup.objects.filter(
pk=alert_group_pk,
slack_message_sent=False,
@ -184,9 +181,9 @@ class IncomingAlertStep(scenario_step.ScenarioStep):
if not alert_receive_channel.is_maintenace_integration:
# we do not want to rate limit maintenace alerts..
reason_to_skip_escalation = AlertGroup.RATE_LIMITED
extra_log_msg += (
f" integration is a maintenance integration alert_receive_channel={alert_receive_channel}"
)
extra_log_msg += f" integration is a maintenance integration alert_receive_channel={alert_receive_channel.pk}"
alert_receive_channel.start_send_rate_limit_message_task("Delivering", e.retry_after)
else:
reraise_exception = True
elif isinstance(e, SlackAPITokenError):
@ -268,18 +265,24 @@ class IncomingAlertStep(scenario_step.ScenarioStep):
else:
# if a new alert comes in, and is grouped to an alert group that has already been posted to Slack,
# then we will update that existing Slack message
if not should_skip_escalation_in_slack:
update_task_id = update_incident_slack_message.apply_async(
(self.slack_team_identity.pk, alert_group_pk),
countdown=10,
alert_group_slack_message = alert_group.slack_message
if not alert_group_slack_message:
logger.info(
f"Skip updating alert group in Slack because alert_group {alert_group_pk} doesn't "
"have a slack message associated with it"
)
cache.set(
get_cache_key_update_incident_slack_message(alert_group_pk),
update_task_id,
timeout=CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME,
return
elif should_skip_escalation_in_slack:
logger.info(
f"Skip updating alert group in Slack because alert_group {alert_group_pk} is set to skip escalation"
)
else:
logger.info("Skip updating alert_group in Slack due to rate limit")
return
# NOTE: very important. We need to debounce the update_alert_groups_message call here. This is because
# we may possibly receive a flood of incoming alerts. We do not want to trigger a Slack message update
# for each of these, and hence we should instead debounce them
alert_group_slack_message.update_alert_groups_message(debounce=True)
def process_scenario(
self,
@ -324,13 +327,16 @@ class InviteOtherPersonToIncident(AlertGroupActionsMixin, scenario_step.Scenario
# for old version with user slack_id selection
warning_text = "Oops! Something goes wrong, please try again"
self.open_warning_window(payload, warning_text)
if selected_user is not None:
Invitation.invite_user(selected_user, alert_group, self.user)
else:
self.alert_group_slack_service.update_alert_group_slack_message(alert_group)
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
alert_group.slack_message.update_alert_groups_message(debounce=False)
def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.update_alert_group_slack_message(log_record.alert_group)
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
log_record.alert_group.slack_message.update_alert_groups_message(debounce=False)
class SilenceGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
@ -360,7 +366,8 @@ class SilenceGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
)
def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.update_alert_group_slack_message(log_record.alert_group)
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
log_record.alert_group.slack_message.update_alert_groups_message(debounce=False)
class UnSilenceGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
@ -381,7 +388,8 @@ class UnSilenceGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
alert_group.un_silence_by_user_or_backsync(self.user, action_source=ActionSource.SLACK)
def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.update_alert_group_slack_message(log_record.alert_group)
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
log_record.alert_group.slack_message.update_alert_groups_message(debounce=False)
class SelectAttachGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
@ -555,7 +563,8 @@ class AttachGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
unfurl_links=True,
)
self.alert_group_slack_service.update_alert_group_slack_message(alert_group)
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
alert_group.slack_message.update_alert_groups_message(debounce=False)
def process_scenario(
self,
@ -625,7 +634,8 @@ class UnAttachGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
alert_group.un_attach_by_user(self.user, action_source=ActionSource.SLACK)
def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.update_alert_group_slack_message(log_record.alert_group)
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
log_record.alert_group.slack_message.update_alert_groups_message(debounce=False)
class StopInvitationProcess(AlertGroupActionsMixin, scenario_step.ScenarioStep):
@ -658,7 +668,8 @@ class StopInvitationProcess(AlertGroupActionsMixin, scenario_step.ScenarioStep):
Invitation.stop_invitation(invitation_id, self.user)
def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.update_alert_group_slack_message(log_record.invitation.alert_group)
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
log_record.alert_group.slack_message.update_alert_groups_message(debounce=False)
class ResolveGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
@ -696,11 +707,11 @@ class ResolveGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
alert_group.resolve_by_user_or_backsync(self.user, action_source=ActionSource.SLACK)
def process_signal(self, log_record: AlertGroupLogRecord) -> None:
alert_group = log_record.alert_group
# Do not rerender alert_groups which happened while maintenance.
# They have no slack messages, since they just attached to the maintenance incident.
if not alert_group.happened_while_maintenance:
self.alert_group_slack_service.update_alert_group_slack_message(alert_group)
if not log_record.alert_group.happened_while_maintenance:
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
log_record.alert_group.slack_message.update_alert_groups_message(debounce=False)
class UnResolveGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
@ -721,7 +732,8 @@ class UnResolveGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
alert_group.un_resolve_by_user_or_backsync(self.user, action_source=ActionSource.SLACK)
def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.update_alert_group_slack_message(log_record.alert_group)
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
log_record.alert_group.slack_message.update_alert_groups_message(debounce=False)
class AcknowledgeGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
@ -742,7 +754,8 @@ class AcknowledgeGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
alert_group.acknowledge_by_user_or_backsync(self.user, action_source=ActionSource.SLACK)
def process_signal(self, log_record: AlertGroupLogRecord) -> None:
self.alert_group_slack_service.update_alert_group_slack_message(log_record.alert_group)
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
log_record.alert_group.slack_message.update_alert_groups_message(debounce=False)
class UnAcknowledgeGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep):
@ -811,7 +824,8 @@ class UnAcknowledgeGroupStep(AlertGroupActionsMixin, scenario_step.ScenarioStep)
alert_group, attachments=message_attachments, text=text
)
self.alert_group_slack_service.update_alert_group_slack_message(alert_group)
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
slack_message.update_alert_groups_message(debounce=False)
logger.debug(f"Finished process_signal in UnAcknowledgeGroupStep for alert_group {alert_group.pk}")
@ -932,7 +946,8 @@ class WipeGroupStep(scenario_step.ScenarioStep):
text=f"Wiped by {log_record.author.get_username_with_slack_verbal()}",
)
self.alert_group_slack_service.update_alert_group_slack_message(alert_group)
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
alert_group.slack_message.update_alert_groups_message(debounce=False)
class DeleteGroupStep(scenario_step.ScenarioStep):

View file

@ -222,7 +222,8 @@ class AddToResolutionNoteStep(scenario_step.ScenarioStep):
except SlackAPIError:
pass
self.alert_group_slack_service.update_alert_group_slack_message(alert_group)
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
slack_message.update_alert_groups_message(debounce=False)
else:
warning_text = "Unable to add this message to resolution note."
self.open_warning_window(payload, warning_text)
@ -261,6 +262,7 @@ class UpdateResolutionNoteStep(scenario_step.ScenarioStep):
resolution_note_slack_message = resolution_note.resolution_note_slack_message
alert_group = resolution_note.alert_group
alert_group_slack_message = alert_group.slack_message
blocks = self.get_resolution_note_blocks(resolution_note)
# TODO: once _channel_id has been fully migrated to channel, remove _channel_id
@ -321,7 +323,8 @@ class UpdateResolutionNoteStep(scenario_step.ScenarioStep):
def update_alert_group_resolution_note_button(self, alert_group: "AlertGroup") -> None:
if alert_group.slack_message is not None:
self.alert_group_slack_service.update_alert_group_slack_message(alert_group)
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
alert_group.slack_message.update_alert_groups_message(debounce=False)
def add_resolution_note_reaction(self, slack_thread_message: "ResolutionNoteSlackMessage"):
try:

View file

@ -145,9 +145,10 @@ class SlackChannelMessageEventStep(scenario_step.ScenarioStep):
except ResolutionNoteSlackMessage.DoesNotExist:
pass
else:
alert_group = slack_thread_message.alert_group
slack_thread_message.delete()
self.alert_group_slack_service.update_alert_group_slack_message(alert_group)
# don't debounce, so that we update the message immediately, this isn't a high traffic activity
slack_thread_message.alert_group.slack_message.update_alert_groups_message(debounce=False)
STEPS_ROUTING: ScenarioRoute.RoutingSteps = [

View file

@ -11,19 +11,19 @@ from django.utils import timezone
from apps.slack.alert_group_slack_service import AlertGroupSlackService
from apps.slack.client import SlackClient
from apps.slack.constants import CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME, SLACK_BOT_ID
from apps.slack.constants import SLACK_BOT_ID
from apps.slack.errors import (
SlackAPICantUpdateMessageError,
SlackAPIChannelInactiveError,
SlackAPIChannelNotFoundError,
SlackAPIInvalidAuthError,
SlackAPIMessageNotFoundError,
SlackAPIPlanUpgradeRequiredError,
SlackAPIRatelimitError,
SlackAPITokenError,
SlackAPIUsergroupNotFoundError,
)
from apps.slack.utils import (
get_cache_key_update_incident_slack_message,
get_populate_slack_channel_task_id_key,
post_message_to_channel,
)
from apps.slack.utils import get_populate_slack_channel_task_id_key, post_message_to_channel
from common.custom_celery_tasks import shared_dedicated_queue_retry_task
from common.utils import batch_queryset
@ -32,39 +32,120 @@ logger.setLevel(logging.DEBUG)
@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True)
def update_incident_slack_message(slack_team_identity_pk, alert_group_pk):
cache_key = get_cache_key_update_incident_slack_message(alert_group_pk)
cached_task_id = cache.get(cache_key)
current_task_id = update_incident_slack_message.request.id
def update_alert_group_slack_message(slack_message_pk: int) -> None:
"""
Background task to update the Slack message for an alert group.
if cached_task_id is None:
update_task_id = update_incident_slack_message.apply_async(
(slack_team_identity_pk, alert_group_pk),
countdown=10,
)
cache.set(cache_key, update_task_id, timeout=CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME)
This function is intended to be executed as a Celery task. It performs the following:
- Compares the current task ID with the task ID stored in the cache.
- If they do not match, it means a newer task has been scheduled, so the current task exits to prevent duplicated updates.
- Does the actual update of the Slack message.
- Upon successful completion, clears the task ID from the cache to allow future updates (also note that
the task ID is set in the cache with a timeout, so it will be automatically cleared after a certain period, even
if this task fails to clear it. See `SlackMessage.update_alert_groups_message` for more details).
return (
f"update_incident_slack_message rescheduled because of current task_id ({current_task_id})"
f" for alert_group {alert_group_pk} doesn't exist in cache"
Args:
slack_message_pk (int): The primary key of the `SlackMessage` instance to update.
"""
from apps.slack.models import SlackMessage
current_task_id = update_alert_group_slack_message.request.id
logger.info(
f"update_alert_group_slack_message for slack message {slack_message_pk} started with task_id {current_task_id}"
)
try:
slack_message = SlackMessage.objects.get(pk=slack_message_pk)
except SlackMessage.DoesNotExist:
logger.warning(f"SlackMessage {slack_message_pk} doesn't exist")
return
active_update_task_id = slack_message.get_active_update_task_id()
if current_task_id != active_update_task_id:
logger.warning(
f"update_alert_group_slack_message skipped, because current_task_id ({current_task_id}) "
f"does not equal to active_update_task_id ({active_update_task_id}) "
)
if not current_task_id == cached_task_id:
return (
f"update_incident_slack_message skipped, because of current task_id ({current_task_id})"
f" doesn't equal to cached task_id ({cached_task_id}) for alert_group {alert_group_pk}"
return
alert_group = slack_message.alert_group
if not alert_group:
logger.warning(
f"skipping update_alert_group_slack_message as SlackMessage {slack_message_pk} "
"doesn't have an alert group associated with it"
)
return
alert_group_pk = alert_group.pk
alert_receive_channel = alert_group.channel
alert_receive_channel_is_rate_limited = alert_receive_channel.is_rate_limited_in_slack
if alert_group.skip_escalation_in_slack:
logger.warning(
f"skipping update_alert_group_slack_message as AlertGroup {alert_group_pk} "
"has skip_escalation_in_slack set to True"
)
return
elif alert_receive_channel_is_rate_limited:
logger.warning(
f"skipping update_alert_group_slack_message as AlertGroup {alert_group.pk}'s "
f"integration ({alert_receive_channel.pk}) is rate-limited"
)
return
slack_client = SlackClient(slack_message.slack_team_identity)
try:
slack_client.chat_update(
# TODO: once _channel_id has been fully migrated to channel, remove _channel_id
# see https://raintank-corp.slack.com/archives/C06K1MQ07GS/p173255546
# channel=slack_message.channel.slack_id,
channel=slack_message._channel_id,
ts=slack_message.slack_id,
attachments=alert_group.render_slack_attachments(),
blocks=alert_group.render_slack_blocks(),
)
logger.info(f"Message has been updated for alert_group {alert_group_pk}")
except SlackAPIRatelimitError as e:
if not alert_receive_channel.is_maintenace_integration:
if not alert_receive_channel_is_rate_limited:
alert_receive_channel.start_send_rate_limit_message_task("Updating", e.retry_after)
logger.info(f"Message has not been updated for alert_group {alert_group_pk} due to slack rate limit.")
else:
raise
except (
SlackAPIMessageNotFoundError,
SlackAPICantUpdateMessageError,
SlackAPIChannelInactiveError,
SlackAPITokenError,
SlackAPIChannelNotFoundError,
):
pass
slack_message.mark_active_update_task_as_complete()
@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True)
def update_incident_slack_message(slack_team_identity_pk: int, alert_group_pk: int) -> None:
"""
TODO: this method has been deprecated, and all references to it removed, remove it once task queues no
longer reference it.
"""
from apps.alerts.models import AlertGroup
from apps.slack.models import SlackTeamIdentity
slack_team_identity = SlackTeamIdentity.objects.get(pk=slack_team_identity_pk)
alert_group = AlertGroup.objects.get(pk=alert_group_pk)
if alert_group.skip_escalation_in_slack or alert_group.channel.is_rate_limited_in_slack:
return "Skip message update in Slack due to rate limit"
if alert_group.slack_message is None:
return "Skip message update in Slack due to absence of slack message"
AlertGroupSlackService(slack_team_identity).update_alert_group_slack_message(alert_group)
# NOTE: alert_group can't be None here, AlertGroup.objects.get(pk=alert_group_pk) would
# raise AlertGroup.DoesNotExist in this case
if not alert_group.slack_message:
logger.info(
f"skipping update_incident_slack_message as AlertGroup {alert_group_pk} doesn't have a slack message"
)
return
alert_group.slack_message.update_alert_groups_message(debounce=False)
@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True)
@ -153,7 +234,6 @@ def send_message_to_thread_if_bot_not_in_channel(
"""
Send message to alert group's thread if bot is not in current channel
"""
from apps.alerts.models import AlertGroup
from apps.slack.models import SlackTeamIdentity
@ -286,7 +366,13 @@ def populate_slack_user_identities(organization_pk):
@shared_dedicated_queue_retry_task(
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
)
def post_slack_rate_limit_message(integration_id):
def post_slack_rate_limit_message(integration_id: int, error_message_verb: typing.Optional[str] = None) -> None:
"""
NOTE: error_message_verb was added to the function signature to allow for more descriptive error messages.
We set it to None by default to maintain backwards compatibility with existing tasks. The default of None
can likely be removed in the near future (once existing tasks on the queue have been processed).
"""
from apps.alerts.models import AlertReceiveChannel
try:
@ -304,11 +390,14 @@ def post_slack_rate_limit_message(integration_id):
default_route = integration.channel_filters.get(is_default=True)
if (slack_channel := default_route.slack_channel_or_org_default) is not None:
# NOTE: see function docstring above 👆
if error_message_verb is None:
error_message_verb = "Sending messages for"
text = (
f"Delivering and updating alert groups of integration {integration.verbal_name} in Slack is "
f"temporarily stopped due to rate limit. You could find new alert groups at "
f"<{integration.new_incidents_web_link}|web page "
'"Alert Groups">'
f"{error_message_verb} Alert Groups in Slack, for integration {integration.verbal_name}, is "
f"temporarily rate-limited (due to a Slack rate-limit). Meanwhile, you can still find new Alert Groups "
f'in the <{integration.new_incidents_web_link}|"Alert Groups" web page>'
)
post_message_to_channel(integration.organization, slack_channel.slack_id, text)

View file

@ -2,7 +2,6 @@ from datetime import timedelta
from unittest.mock import patch
import pytest
from django.core.cache import cache
from django.utils import timezone
from apps.alerts.models import AlertGroup, AlertReceiveChannel
@ -10,7 +9,6 @@ from apps.slack.errors import SlackAPIFetchMembersFailedError, SlackAPIRatelimit
from apps.slack.models import SlackMessage
from apps.slack.scenarios.distribute_alerts import IncomingAlertStep
from apps.slack.tests.conftest import build_slack_response
from apps.slack.utils import get_cache_key_update_incident_slack_message
SLACK_MESSAGE_TS = "1234567890.123456"
SLACK_POST_MESSAGE_SUCCESS_RESPONSE = {"ts": SLACK_MESSAGE_TS}
@ -283,22 +281,20 @@ class TestIncomingAlertStep:
)
@patch("apps.slack.client.SlackClient.chat_postMessage")
@patch("apps.slack.scenarios.distribute_alerts.update_incident_slack_message")
@patch("apps.slack.models.SlackMessage.update_alert_groups_message")
@pytest.mark.django_db
def test_process_signal_update_existing_message(
self,
mock_update_incident_slack_message,
mock_update_alert_groups_message,
mock_chat_postMessage,
make_slack_team_identity,
make_slack_channel,
make_slack_message,
make_organization,
make_alert_receive_channel,
make_alert_group,
make_alert,
):
mocked_update_incident_task_id = "1234"
mock_update_incident_slack_message.apply_async.return_value = mocked_update_incident_task_id
slack_team_identity = make_slack_team_identity()
slack_channel = make_slack_channel(slack_team_identity)
organization = make_organization(slack_team_identity=slack_team_identity, default_slack_channel=slack_channel)
@ -310,6 +306,8 @@ class TestIncomingAlertStep:
slack_message_sent=True,
reason_to_skip_escalation=AlertGroup.NO_REASON,
)
make_slack_message(slack_channel, alert_group=alert_group)
assert alert_group.skip_escalation_in_slack is False
alert = make_alert(alert_group, raw_request_data={})
@ -317,23 +315,20 @@ class TestIncomingAlertStep:
step = IncomingAlertStep(slack_team_identity)
step.process_signal(alert)
# assert that the background task is scheduled
mock_update_incident_slack_message.apply_async.assert_called_once_with(
(slack_team_identity.pk, alert_group.pk), countdown=10
)
# assert that the SlackMessage is updated, and that it is debounced
mock_update_alert_groups_message.assert_called_once_with(debounce=True)
mock_chat_postMessage.assert_not_called()
# Verify that the cache is set correctly
assert cache.get(get_cache_key_update_incident_slack_message(alert_group.pk)) == mocked_update_incident_task_id
@patch("apps.slack.client.SlackClient.chat_postMessage")
@patch("apps.slack.scenarios.distribute_alerts.update_incident_slack_message")
@patch("apps.slack.models.SlackMessage.update_alert_groups_message")
@pytest.mark.django_db
def test_process_signal_do_not_update_due_to_skip_escalation(
self,
mock_update_incident_slack_message,
mock_update_alert_groups_message,
mock_chat_postMessage,
make_organization_with_slack_team_identity,
make_slack_channel,
make_slack_message,
make_alert_receive_channel,
make_alert_group,
make_alert,
@ -343,6 +338,7 @@ class TestIncomingAlertStep:
"""
organization, slack_team_identity = make_organization_with_slack_team_identity()
alert_receive_channel = make_alert_receive_channel(organization)
slack_channel = make_slack_channel(slack_team_identity)
# Simulate that slack_message_sent is already True and skip escalation due to RATE_LIMITED
alert_group = make_alert_group(
@ -351,12 +347,13 @@ class TestIncomingAlertStep:
reason_to_skip_escalation=AlertGroup.RATE_LIMITED, # Ensures skip_escalation_in_slack is True
)
alert = make_alert(alert_group, raw_request_data={})
make_slack_message(slack_channel, alert_group=alert_group)
step = IncomingAlertStep(slack_team_identity)
step.process_signal(alert)
# assert that the background task is not scheduled
mock_update_incident_slack_message.apply_async.assert_not_called()
# assert that we don't update the SlackMessage
mock_update_alert_groups_message.assert_not_called()
mock_chat_postMessage.assert_not_called()
@patch("apps.slack.client.SlackClient.chat_postMessage", side_effect=TimeoutError)
@ -399,6 +396,7 @@ class TestIncomingAlertStep:
assert SlackMessage.objects.count() == 0
assert not alert.delivered
@patch("apps.alerts.models.AlertReceiveChannel.start_send_rate_limit_message_task")
@pytest.mark.parametrize(
"reason,slack_error",
[
@ -412,6 +410,7 @@ class TestIncomingAlertStep:
@pytest.mark.django_db
def test_process_signal_slack_errors(
self,
mock_start_send_rate_limit_message_task,
make_slack_team_identity,
make_organization,
make_alert_receive_channel,
@ -433,7 +432,8 @@ class TestIncomingAlertStep:
with patch.object(step._slack_client, "chat_postMessage") as mock_chat_postMessage:
error_response = build_slack_response({"error": slack_error})
error_class = get_error_class(error_response)
mock_chat_postMessage.side_effect = error_class(error_response)
slack_error_raised = error_class(error_response)
mock_chat_postMessage.side_effect = slack_error_raised
step.process_signal(alert)
@ -446,6 +446,13 @@ class TestIncomingAlertStep:
blocks=alert_group.render_slack_blocks(),
)
if error_class == SlackAPIRatelimitError:
mock_start_send_rate_limit_message_task.assert_called_once_with(
"Delivering", slack_error_raised.retry_after
)
else:
mock_start_send_rate_limit_message_task.assert_not_called()
# For these Slack errors, retrying won't really help, so we should not set slack_message_sent back to False
assert alert_group.slack_message_sent is True
@ -454,6 +461,7 @@ class TestIncomingAlertStep:
assert SlackMessage.objects.count() == 0
assert not alert.delivered
@patch("apps.alerts.models.AlertReceiveChannel.start_send_rate_limit_message_task")
@patch(
"apps.slack.client.SlackClient.chat_postMessage",
side_effect=SlackAPIRatelimitError(build_slack_response({"error": "ratelimited"})),
@ -462,6 +470,7 @@ class TestIncomingAlertStep:
def test_process_signal_slack_api_ratelimit_for_maintenance_integration(
self,
mock_chat_postMessage,
mock_start_send_rate_limit_message_task,
make_slack_team_identity,
make_slack_channel,
make_organization,
@ -496,6 +505,8 @@ class TestIncomingAlertStep:
alert_group.refresh_from_db()
mock_start_send_rate_limit_message_task.assert_not_called()
# Ensure that slack_message_sent is set back to False, this will allow us to retry.. a SlackAPIRatelimitError,
# may have been a transient error that is "recoverable"
#

View file

@ -347,12 +347,14 @@ def test_resolution_notes_modal_closed_before_update(
assert call_args[0] == "views.update"
@patch("apps.slack.models.SlackMessage.update_alert_groups_message")
@patch.object(SlackClient, "reactions_add")
@patch.object(SlackClient, "chat_getPermalink", return_value={"permalink": "https://example.com"})
@pytest.mark.django_db
def test_add_to_resolution_note(
_mock_chat_getPermalink,
mock_reactions_add,
mock_update_alert_groups_message,
make_organization_and_user_with_slack_identities,
make_alert_receive_channel,
make_alert_group,
@ -386,6 +388,8 @@ def test_add_to_resolution_note(
step.process_scenario(slack_user_identity, slack_team_identity, payload)
mock_reactions_add.assert_called_once()
mock_update_alert_groups_message.assert_called_once_with(debounce=False)
assert alert_group.resolution_notes.get().text == "Test resolution note"

View file

@ -393,8 +393,9 @@ class TestSlackChannelMessageEventStep:
MockResolutionNoteSlackMessage.objects.get.assert_not_called()
@patch("apps.slack.models.SlackMessage.update_alert_groups_message")
def test_delete_thread_message_from_resolution_note_no_message_found(
self, make_organization_and_user_with_slack_identities
self, mock_update_alert_groups_message, make_organization_and_user_with_slack_identities
) -> None:
(
organization,
@ -418,19 +419,20 @@ class TestSlackChannelMessageEventStep:
}
step = SlackChannelMessageEventStep(slack_team_identity, organization, user)
step.alert_group_slack_service = Mock()
step.delete_thread_message_from_resolution_note(slack_user_identity, payload)
step.alert_group_slack_service.assert_not_called()
mock_update_alert_groups_message.assert_not_called()
@patch("apps.slack.models.SlackMessage.update_alert_groups_message")
def test_delete_thread_message_from_resolution_note(
self,
mock_update_alert_groups_message,
make_organization_and_user_with_slack_identities,
make_alert_receive_channel,
make_alert_group,
make_resolution_note_slack_message,
make_slack_channel,
make_slack_message,
) -> None:
channel_id = "potato"
ts = 88945.4849
@ -445,6 +447,7 @@ class TestSlackChannelMessageEventStep:
slack_channel = make_slack_channel(slack_team_identity, slack_id=channel_id)
integration = make_alert_receive_channel(organization)
alert_group = make_alert_group(integration)
make_slack_message(alert_group=alert_group, slack_id=thread_ts, channel=slack_channel)
payload = {
"event": {
@ -461,11 +464,8 @@ class TestSlackChannelMessageEventStep:
)
step = SlackChannelMessageEventStep(slack_team_identity, organization, user)
step.alert_group_slack_service = Mock()
step.delete_thread_message_from_resolution_note(slack_user_identity, payload)
step.alert_group_slack_service.update_alert_group_slack_message.assert_called_once_with(alert_group)
assert (
ResolutionNoteSlackMessage.objects.filter(
ts=ts,
@ -475,6 +475,8 @@ class TestSlackChannelMessageEventStep:
== 0
)
mock_update_alert_groups_message.assert_called_once_with(debounce=False)
def test_slack_message_has_no_alert_group(
self,
make_organization_and_user_with_slack_identities,

View file

@ -0,0 +1,377 @@
from unittest.mock import patch
import pytest
from django.utils import timezone
from apps.alerts.models import AlertGroup
from apps.slack.errors import (
SlackAPICantUpdateMessageError,
SlackAPIChannelInactiveError,
SlackAPIChannelNotFoundError,
SlackAPIMessageNotFoundError,
SlackAPIRatelimitError,
SlackAPITokenError,
)
from apps.slack.tasks import update_alert_group_slack_message
from apps.slack.tests.conftest import build_slack_response
@pytest.fixture
def mocked_rate_limited_slack_response():
return build_slack_response({}, status_code=429, headers={"Retry-After": 123})
class TestUpdateAlertGroupSlackMessageTask:
@pytest.mark.django_db
def test_update_alert_group_slack_message_slack_message_not_found(self):
"""
Test that the task exits early if SlackMessage does not exist.
"""
# No need to patch anything, just run the task with a non-existing pk
update_alert_group_slack_message.apply((99999,), task_id="task-id")
# Since there is no exception raised, the test passes
@patch("apps.slack.tasks.SlackClient.chat_update")
@pytest.mark.django_db
def test_update_alert_group_slack_message_task_id_mismatch(
self,
mock_chat_update,
make_organization_with_slack_team_identity,
make_alert_receive_channel,
make_slack_channel,
make_slack_message,
make_alert_group,
make_alert,
):
"""
Test that the task exits early if current_task_id doesn't match the task ID that exists in the cache
"""
organization, slack_team_identity = make_organization_with_slack_team_identity()
alert_receive_channel = make_alert_receive_channel(organization)
alert_group = make_alert_group(alert_receive_channel)
make_alert(alert_group=alert_group, raw_request_data={})
slack_channel = make_slack_channel(slack_team_identity)
slack_message = make_slack_message(alert_group=alert_group, channel=slack_channel)
slack_message.set_active_update_task_id("original-task-id")
update_alert_group_slack_message.apply((slack_message.pk,), task_id="different-task-id")
# Ensure that SlackClient.chat_update is not called
mock_chat_update.assert_not_called()
@patch("apps.slack.tasks.SlackClient.chat_update")
@pytest.mark.django_db
def test_update_alert_group_slack_message_no_alert_group(
self,
mock_chat_update,
make_organization_with_slack_team_identity,
make_slack_channel,
make_slack_message,
):
"""
Test that the task exits early if SlackMessage has no alert_group.
"""
organization, slack_team_identity = make_organization_with_slack_team_identity()
slack_channel = make_slack_channel(slack_team_identity)
slack_message = make_slack_message(alert_group=None, channel=slack_channel, organization=organization)
update_alert_group_slack_message.apply((slack_message.pk,), task_id="task-id")
# Ensure that SlackClient.chat_update is not called
mock_chat_update.assert_not_called()
@patch("apps.slack.tasks.SlackClient.chat_update")
@pytest.mark.django_db
def test_update_alert_group_slack_message_skip_escalation_in_slack(
self,
mock_chat_update,
make_organization_with_slack_team_identity,
make_alert_receive_channel,
make_slack_channel,
make_slack_message,
make_alert_group,
make_alert,
):
"""
Test that the task exits early if alert_group.skip_escalation_in_slack is True.
"""
organization, slack_team_identity = make_organization_with_slack_team_identity()
alert_receive_channel = make_alert_receive_channel(organization)
alert_group = make_alert_group(
alert_receive_channel,
reason_to_skip_escalation=AlertGroup.CHANNEL_ARCHIVED,
)
make_alert(alert_group=alert_group, raw_request_data={})
slack_channel = make_slack_channel(slack_team_identity)
slack_message = make_slack_message(alert_group=alert_group, channel=slack_channel)
slack_message.set_active_update_task_id("task-id")
# Ensure skip_escalation_in_slack is True
assert alert_group.skip_escalation_in_slack is True
update_alert_group_slack_message.apply((slack_message.pk,), task_id="task-id")
# Ensure that SlackClient.chat_update is not called
mock_chat_update.assert_not_called()
# Verify that the active update task ID is not cleared and last_updated is not set
slack_message.refresh_from_db()
assert slack_message.get_active_update_task_id() == "task-id"
assert slack_message.last_updated is None
@patch("apps.slack.tasks.SlackClient.chat_update")
@pytest.mark.django_db
def test_update_alert_group_slack_message_alert_receive_channel_rate_limited(
self,
mock_chat_update,
make_organization_with_slack_team_identity,
make_alert_receive_channel,
make_slack_channel,
make_slack_message,
make_alert_group,
make_alert,
):
"""
Test that the task exits early if alert_receive_channel.is_rate_limited_in_slack is True.
"""
organization, slack_team_identity = make_organization_with_slack_team_identity()
alert_receive_channel = make_alert_receive_channel(
organization,
rate_limited_in_slack_at=timezone.now(),
)
alert_group = make_alert_group(alert_receive_channel)
make_alert(alert_group=alert_group, raw_request_data={})
slack_channel = make_slack_channel(slack_team_identity)
slack_message = make_slack_message(alert_group=alert_group, channel=slack_channel)
slack_message.set_active_update_task_id("task-id")
# Ensure is_rate_limited_in_slack is True
assert alert_receive_channel.is_rate_limited_in_slack is True
update_alert_group_slack_message.apply((slack_message.pk,), task_id="task-id")
# Ensure that SlackClient.chat_update is not called
mock_chat_update.assert_not_called()
# Verify that the active update task ID is not cleared and last_updated is not set
slack_message.refresh_from_db()
assert slack_message.get_active_update_task_id() == "task-id"
assert slack_message.last_updated is None
@patch("apps.slack.tasks.SlackClient.chat_update")
@pytest.mark.django_db
def test_update_alert_group_slack_message_successful(
self,
mock_chat_update,
make_organization_with_slack_team_identity,
make_alert_receive_channel,
make_slack_channel,
make_slack_message,
make_alert_group,
make_alert,
):
"""
Test that the task successfully updates the alert group's Slack message.
"""
organization, slack_team_identity = make_organization_with_slack_team_identity()
alert_receive_channel = make_alert_receive_channel(organization)
alert_group = make_alert_group(alert_receive_channel)
make_alert(alert_group=alert_group, raw_request_data={})
slack_channel = make_slack_channel(slack_team_identity)
slack_message = make_slack_message(alert_group=alert_group, channel=slack_channel)
slack_message.set_active_update_task_id("task-id")
update_alert_group_slack_message.apply((slack_message.pk,), task_id="task-id")
# Assert that SlackClient.chat_update was called with correct parameters
mock_chat_update.assert_called_once_with(
channel=slack_message._channel_id,
ts=slack_message.slack_id,
attachments=alert_group.render_slack_attachments(),
blocks=alert_group.render_slack_blocks(),
)
# Verify that cache ID is cleared from the cache and last_updated is set
slack_message.refresh_from_db()
assert slack_message.get_active_update_task_id() is None
assert slack_message.last_updated is not None
@patch("apps.slack.tasks.SlackClient.chat_update")
@patch("apps.alerts.models.AlertReceiveChannel.start_send_rate_limit_message_task")
@pytest.mark.django_db
def test_update_alert_group_slack_message_ratelimit_error_not_maintenance(
self,
mock_start_send_rate_limit_message_task,
mock_chat_update,
mocked_rate_limited_slack_response,
make_organization_with_slack_team_identity,
make_alert_receive_channel,
make_slack_channel,
make_slack_message,
make_alert_group,
make_alert,
):
"""
Test handling of SlackAPIRatelimitError when not a maintenance integration.
"""
organization, slack_team_identity = make_organization_with_slack_team_identity()
alert_receive_channel = make_alert_receive_channel(organization)
# Ensure channel is not a maintenance integration and not already rate-limited
assert alert_receive_channel.is_maintenace_integration is False
assert alert_receive_channel.is_rate_limited_in_slack is False
alert_group = make_alert_group(alert_receive_channel)
make_alert(alert_group=alert_group, raw_request_data={})
slack_channel = make_slack_channel(slack_team_identity)
slack_message = make_slack_message(alert_group=alert_group, channel=slack_channel)
slack_message.set_active_update_task_id("task-id")
# SlackClient.chat_update raises SlackAPIRatelimitError
slack_api_ratelimit_error = SlackAPIRatelimitError(mocked_rate_limited_slack_response)
mock_chat_update.side_effect = slack_api_ratelimit_error
update_alert_group_slack_message.apply((slack_message.pk,), task_id="task-id")
# Assert that start_send_rate_limit_message_task was called
mock_start_send_rate_limit_message_task.assert_called_with("Updating", slack_api_ratelimit_error.retry_after)
@patch("apps.slack.tasks.SlackClient.chat_update")
@patch("apps.alerts.models.AlertReceiveChannel.start_send_rate_limit_message_task")
@pytest.mark.django_db
def test_update_alert_group_slack_message_ratelimit_error_is_maintenance(
self,
mock_start_send_rate_limit_message_task,
mock_chat_update,
mocked_rate_limited_slack_response,
make_organization_with_slack_team_identity,
make_alert_receive_channel,
make_slack_channel,
make_slack_message,
make_alert_group,
make_alert,
):
"""
Test that SlackAPIRatelimitError is re-raised when it is a maintenance integration.
"""
organization, slack_team_identity = make_organization_with_slack_team_identity()
alert_receive_channel = make_alert_receive_channel(organization, integration="maintenance")
# Ensure channel is a maintenance integration and not already rate-limited
assert alert_receive_channel.is_maintenace_integration is True
assert alert_receive_channel.is_rate_limited_in_slack is False
alert_group = make_alert_group(alert_receive_channel)
make_alert(alert_group=alert_group, raw_request_data={})
slack_channel = make_slack_channel(slack_team_identity)
slack_message = make_slack_message(alert_group=alert_group, channel=slack_channel)
slack_message.set_active_update_task_id("task-id")
# SlackClient.chat_update raises SlackAPIRatelimitError
slack_api_ratelimit_error = SlackAPIRatelimitError(mocked_rate_limited_slack_response)
mock_chat_update.side_effect = slack_api_ratelimit_error
update_alert_group_slack_message.apply((slack_message.pk,), task_id="task-id")
slack_message.refresh_from_db()
# Assert that start_send_rate_limit_message_task was not called, task id is not cleared, and we don't
# update last_updated
mock_start_send_rate_limit_message_task.assert_not_called()
assert slack_message.get_active_update_task_id() == "task-id"
assert slack_message.last_updated is None
@patch("apps.slack.tasks.SlackClient.chat_update")
@patch("apps.alerts.models.AlertReceiveChannel.start_send_rate_limit_message_task")
@pytest.mark.parametrize(
"ExceptionClass",
[
SlackAPIMessageNotFoundError,
SlackAPICantUpdateMessageError,
SlackAPIChannelInactiveError,
SlackAPITokenError,
SlackAPIChannelNotFoundError,
],
)
@pytest.mark.django_db
def test_update_alert_group_slack_message_other_exceptions(
self,
mock_start_send_rate_limit_message_task,
mock_chat_update,
ExceptionClass,
make_organization_with_slack_team_identity,
make_alert_receive_channel,
make_slack_channel,
make_slack_message,
make_alert_group,
make_alert,
):
"""
Test that other Slack API exceptions are handled silently.
"""
organization, slack_team_identity = make_organization_with_slack_team_identity()
alert_receive_channel = make_alert_receive_channel(organization)
alert_group = make_alert_group(alert_receive_channel)
make_alert(alert_group=alert_group, raw_request_data={})
slack_channel = make_slack_channel(slack_team_identity)
slack_message = make_slack_message(alert_group=alert_group, channel=slack_channel)
slack_message.set_active_update_task_id("task-id")
# SlackClient.chat_update raises the exception class
mock_chat_update.side_effect = ExceptionClass("foo bar")
# Call the task
update_alert_group_slack_message.apply((slack_message.pk,), task_id="task-id")
# Ensure that exception was caught and passed
# SlackClient.chat_update was called
mock_chat_update.assert_called_once()
# Assert that start_send_rate_limit_message_task was not called
mock_start_send_rate_limit_message_task.assert_not_called()
# Verify that cache ID is cleared from the cache and last_updated is set
slack_message.refresh_from_db()
assert slack_message.get_active_update_task_id() is None
assert slack_message.last_updated is not None
@patch("apps.slack.tasks.SlackClient.chat_update")
@pytest.mark.django_db
def test_update_alert_group_slack_message_unexpected_exception(
self,
mock_chat_update,
make_organization_with_slack_team_identity,
make_alert_receive_channel,
make_slack_channel,
make_slack_message,
make_alert_group,
make_alert,
):
"""
Test that an unexpected exception propagates as expected.
"""
organization, slack_team_identity = make_organization_with_slack_team_identity()
alert_receive_channel = make_alert_receive_channel(organization)
alert_group = make_alert_group(alert_receive_channel)
make_alert(alert_group=alert_group, raw_request_data={})
slack_channel = make_slack_channel(slack_team_identity)
slack_message = make_slack_message(alert_group=alert_group, channel=slack_channel)
slack_message.set_active_update_task_id("task-id")
# SlackClient.chat_update raises a generic exception
mock_chat_update.side_effect = ValueError("Unexpected error")
update_alert_group_slack_message.apply((slack_message.pk,), task_id="task-id")
# Assert that task id is not cleared, and we don't update last_updated
assert slack_message.get_active_update_task_id() == "task-id"
assert slack_message.last_updated is None

View file

@ -1,3 +1,4 @@
from datetime import timedelta
from unittest.mock import patch
import pytest
@ -6,6 +7,7 @@ from django.utils import timezone
from apps.base.models import UserNotificationPolicy, UserNotificationPolicyLogRecord
from apps.slack.client import SlackClient
from apps.slack.errors import SlackAPIError
from apps.slack.models import SlackMessage
from apps.slack.tests.conftest import build_slack_response
@ -28,6 +30,59 @@ def slack_message_setup(
return _slack_message_setup
@patch.object(
SlackClient,
"chat_getPermalink",
return_value=build_slack_response({"ok": True, "permalink": "test_permalink"}),
)
@pytest.mark.django_db
def test_slack_message_permalink(mock_slack_api_call, slack_message_setup):
slack_message = slack_message_setup(cached_permalink=None)
assert slack_message.permalink == "test_permalink"
mock_slack_api_call.assert_called_once()
@patch.object(
SlackClient,
"chat_getPermalink",
side_effect=SlackAPIError(response=build_slack_response({"ok": False, "error": "message_not_found"})),
)
@pytest.mark.django_db
def test_slack_message_permalink_error(mock_slack_api_call, slack_message_setup):
slack_message = slack_message_setup(cached_permalink=None)
assert slack_message.permalink is None
mock_slack_api_call.assert_called_once()
@patch.object(
SlackClient,
"chat_getPermalink",
return_value=build_slack_response({"ok": True, "permalink": "test_permalink"}),
)
@pytest.mark.django_db
def test_slack_message_permalink_cache(mock_slack_api_call, slack_message_setup):
slack_message = slack_message_setup(cached_permalink="cached_permalink")
assert slack_message.permalink == "cached_permalink"
mock_slack_api_call.assert_not_called()
@patch.object(
SlackClient,
"chat_getPermalink",
return_value=build_slack_response({"ok": False, "error": "account_inactive"}),
)
@pytest.mark.django_db
def test_slack_message_permalink_token_revoked(mock_slack_api_call, slack_message_setup):
slack_message = slack_message_setup(cached_permalink=None)
slack_message.slack_team_identity.detected_token_revoked = timezone.now()
slack_message.slack_team_identity.save()
assert slack_message.slack_team_identity is not None
assert slack_message.permalink is None
mock_slack_api_call.assert_not_called()
@pytest.mark.django_db
def test_send_slack_notification(
make_organization_and_user_with_slack_identities,
@ -86,54 +141,230 @@ def test_slack_message_deep_link(
assert slack_message.deep_link == expected
@patch.object(
SlackClient,
"chat_getPermalink",
return_value=build_slack_response({"ok": True, "permalink": "test_permalink"}),
)
@pytest.mark.django_db
def test_slack_message_permalink(mock_slack_api_call, slack_message_setup):
slack_message = slack_message_setup(cached_permalink=None)
assert slack_message.permalink == "test_permalink"
mock_slack_api_call.assert_called_once()
class TestSlackMessageUpdateAlertGroupsMessage:
@patch("apps.slack.models.slack_message.update_alert_group_slack_message")
@pytest.mark.django_db
def test_update_alert_groups_message_no_alert_group(
self,
mock_update_alert_group_slack_message,
make_organization_with_slack_team_identity,
make_slack_channel,
make_slack_message,
):
"""
Test that the method exits early if alert_group is None.
"""
organization, slack_team_identity = make_organization_with_slack_team_identity()
slack_channel = make_slack_channel(slack_team_identity)
slack_message = make_slack_message(channel=slack_channel, alert_group=None, organization=organization)
slack_message.update_alert_groups_message(debounce=True)
@patch.object(
SlackClient,
"chat_getPermalink",
side_effect=SlackAPIError(response=build_slack_response({"ok": False, "error": "message_not_found"})),
)
@pytest.mark.django_db
def test_slack_message_permalink_error(mock_slack_api_call, slack_message_setup):
slack_message = slack_message_setup(cached_permalink=None)
assert slack_message.permalink is None
mock_slack_api_call.assert_called_once()
# Ensure no task is scheduled
mock_update_alert_group_slack_message.apply_async.assert_not_called()
@patch("apps.slack.models.slack_message.update_alert_group_slack_message")
@pytest.mark.django_db
def test_update_alert_groups_message_active_task_exists(
self,
mock_update_alert_group_slack_message,
make_organization_with_slack_team_identity,
make_alert_receive_channel,
make_alert_group,
make_slack_channel,
make_slack_message,
):
"""
Test that the method exits early if a task ID is set in the cache and debounce is True.
"""
task_id = "some-task-id"
@patch.object(
SlackClient,
"chat_getPermalink",
return_value=build_slack_response({"ok": True, "permalink": "test_permalink"}),
)
@pytest.mark.django_db
def test_slack_message_permalink_cache(mock_slack_api_call, slack_message_setup):
slack_message = slack_message_setup(cached_permalink="cached_permalink")
assert slack_message.permalink == "cached_permalink"
mock_slack_api_call.assert_not_called()
organization, slack_team_identity = make_organization_with_slack_team_identity()
alert_receive_channel = make_alert_receive_channel(organization)
alert_group = make_alert_group(alert_receive_channel)
slack_channel = make_slack_channel(slack_team_identity)
slack_message = make_slack_message(channel=slack_channel, alert_group=alert_group)
slack_message.set_active_update_task_id(task_id)
@patch.object(
SlackClient,
"chat_getPermalink",
return_value=build_slack_response({"ok": False, "error": "account_inactive"}),
)
@pytest.mark.django_db
def test_slack_message_permalink_token_revoked(mock_slack_api_call, slack_message_setup):
slack_message = slack_message_setup(cached_permalink=None)
slack_message.slack_team_identity.detected_token_revoked = timezone.now()
slack_message.slack_team_identity.save()
slack_message.update_alert_groups_message(debounce=True)
assert slack_message.slack_team_identity is not None
assert slack_message.permalink is None
# Ensure no task is scheduled
mock_update_alert_group_slack_message.apply_async.assert_not_called()
mock_slack_api_call.assert_not_called()
# Ensure task ID in the cache remains unchanged
assert slack_message.get_active_update_task_id() == task_id
@patch("apps.slack.models.slack_message.celery_uuid")
@patch("apps.slack.models.slack_message.update_alert_group_slack_message")
@pytest.mark.django_db
def test_update_alert_groups_message_last_updated_none(
self,
mock_update_alert_group_slack_message,
mock_celery_uuid,
make_organization_with_slack_team_identity,
make_alert_receive_channel,
make_slack_channel,
make_slack_message,
make_alert_group,
):
"""
Test that the method handles last_updated being None and schedules with default debounce interval.
"""
task_id = "some-task-id"
mock_celery_uuid.return_value = task_id
organization, slack_team_identity = make_organization_with_slack_team_identity()
alert_receive_channel = make_alert_receive_channel(organization)
alert_group = make_alert_group(alert_receive_channel)
slack_channel = make_slack_channel(slack_team_identity)
slack_message = make_slack_message(channel=slack_channel, alert_group=alert_group, last_updated=None)
assert slack_message.get_active_update_task_id() is None
slack_message.update_alert_groups_message(debounce=True)
# Verify that apply_async was called with correct countdown
mock_update_alert_group_slack_message.apply_async.assert_called_once_with(
(slack_message.pk,),
countdown=SlackMessage.ALERT_GROUP_UPDATE_DEBOUNCE_INTERVAL_SECONDS,
task_id=task_id,
)
# Verify task ID is set in the cache
assert slack_message.get_active_update_task_id() == task_id
@patch("apps.slack.models.slack_message.celery_uuid")
@patch("apps.slack.models.slack_message.update_alert_group_slack_message")
@pytest.mark.django_db
def test_update_alert_groups_message_schedules_task_correctly(
self,
mock_update_alert_group_slack_message,
mock_celery_uuid,
make_organization_with_slack_team_identity,
make_alert_receive_channel,
make_slack_channel,
make_slack_message,
make_alert_group,
):
"""
Test that the method schedules the task with correct countdown and updates the task ID in the cache
"""
task_id = "some-task-id"
mock_celery_uuid.return_value = task_id
organization, slack_team_identity = make_organization_with_slack_team_identity()
alert_receive_channel = make_alert_receive_channel(organization)
alert_group = make_alert_group(alert_receive_channel)
slack_channel = make_slack_channel(slack_team_identity)
slack_message = make_slack_message(
channel=slack_channel,
alert_group=alert_group,
last_updated=timezone.now() - timedelta(seconds=10),
)
assert slack_message.get_active_update_task_id() is None
slack_message.update_alert_groups_message(debounce=True)
# Verify that apply_async was called with correct countdown
mock_update_alert_group_slack_message.apply_async.assert_called_once_with(
(slack_message.pk,),
countdown=35,
task_id=task_id,
)
# Verify the task ID in the cache is updated to new task_id
slack_message.refresh_from_db()
assert slack_message.get_active_update_task_id() == task_id
@patch("apps.slack.models.slack_message.celery_uuid")
@patch("apps.slack.models.slack_message.update_alert_group_slack_message")
@pytest.mark.django_db
def test_update_alert_groups_message_handles_minimum_countdown(
self,
mock_update_alert_group_slack_message,
mock_celery_uuid,
make_organization_with_slack_team_identity,
make_alert_receive_channel,
make_slack_channel,
make_slack_message,
make_alert_group,
):
"""
Test that the countdown is at least 10 seconds when the debounce interval has passed.
"""
task_id = "some-task-id"
mock_celery_uuid.return_value = task_id
organization, slack_team_identity = make_organization_with_slack_team_identity()
alert_receive_channel = make_alert_receive_channel(organization)
alert_group = make_alert_group(alert_receive_channel)
slack_channel = make_slack_channel(slack_team_identity)
slack_message = make_slack_message(
channel=slack_channel,
alert_group=alert_group,
last_updated=timezone.now()
- timedelta(seconds=SlackMessage.ALERT_GROUP_UPDATE_DEBOUNCE_INTERVAL_SECONDS + 1),
)
assert slack_message.get_active_update_task_id() is None
slack_message.update_alert_groups_message(debounce=True)
# Verify that apply_async was called with correct countdown
mock_update_alert_group_slack_message.apply_async.assert_called_once_with(
(slack_message.pk,),
# Since the time since last update exceeds the debounce interval, countdown should be 10
countdown=10,
task_id=task_id,
)
# Verify the task ID in the cache is updated to new task_id
slack_message.refresh_from_db()
assert slack_message.get_active_update_task_id() == task_id
@patch("apps.slack.models.slack_message.celery_uuid")
@patch("apps.slack.models.slack_message.update_alert_group_slack_message")
@pytest.mark.django_db
def test_update_alert_groups_message_debounce_false_schedules_immediately(
self,
mock_update_alert_group_slack_message,
mock_celery_uuid,
make_organization_with_slack_team_identity,
make_alert_receive_channel,
make_slack_channel,
make_slack_message,
make_alert_group,
):
"""
Test that when debounce is False, the task is scheduled immediately with countdown=0,
even if a task ID is set in the cache.
"""
new_task_id = "new-task-id"
mock_celery_uuid.return_value = new_task_id
organization, slack_team_identity = make_organization_with_slack_team_identity()
alert_receive_channel = make_alert_receive_channel(organization)
alert_group = make_alert_group(alert_receive_channel)
slack_channel = make_slack_channel(slack_team_identity)
# Set up SlackMessage with existing task ID in the cache
slack_message = make_slack_message(channel=slack_channel, alert_group=alert_group)
slack_message.set_active_update_task_id("existing-task-id")
slack_message.update_alert_groups_message(debounce=False)
# Verify that apply_async was called with countdown=0
mock_update_alert_group_slack_message.apply_async.assert_called_once_with(
(slack_message.pk,),
countdown=0,
task_id=new_task_id,
)
# Verify the task ID in the cache is updated to new task_id
slack_message.refresh_from_db()
assert slack_message.get_active_update_task_id() == new_task_id

View file

@ -103,10 +103,5 @@ def format_datetime_to_slack_with_time(timestamp: float, format: SlackDateFormat
return _format_datetime_to_slack(timestamp, f"{{{format}}} {{time}}")
def get_cache_key_update_incident_slack_message(alert_group_pk: str) -> str:
CACHE_KEY_PREFIX = "update_incident_slack_message"
return f"{CACHE_KEY_PREFIX}_{alert_group_pk}"
def get_populate_slack_channel_task_id_key(slack_team_identity_id: str) -> str:
return f"SLACK_CHANNELS_TASK_ID_TEAM_{slack_team_identity_id}"

View file

@ -170,7 +170,9 @@ CELERY_TASK_ROUTES = {
"apps.slack.tasks.send_message_to_thread_if_bot_not_in_channel": {"queue": "slack"},
"apps.slack.tasks.start_update_slack_user_group_for_schedules": {"queue": "slack"},
"apps.slack.tasks.unpopulate_slack_user_identities": {"queue": "slack"},
# TODO: remove apps.slack.tasks.update_incident_slack_message after current tasks in queue have been processed
"apps.slack.tasks.update_incident_slack_message": {"queue": "slack"},
"apps.slack.tasks.update_alert_group_slack_message": {"queue": "slack"},
"apps.slack.tasks.update_slack_user_group_for_schedules": {"queue": "slack"},
"apps.slack.representatives.alert_group_representative.on_create_alert_slack_representative_async": {
"queue": "slack"