commit
4f837bf353
24 changed files with 18 additions and 1138 deletions
|
|
@ -12,13 +12,11 @@ from apps.alerts.models.alert_group_log_record import AlertGroupLogRecord
|
|||
from apps.alerts.models.escalation_policy import EscalationPolicy
|
||||
from apps.alerts.tasks import (
|
||||
custom_webhook_result,
|
||||
declare_incident,
|
||||
notify_all_task,
|
||||
notify_group_task,
|
||||
notify_user_task,
|
||||
resolve_by_last_step_task,
|
||||
)
|
||||
from apps.alerts.utils import is_declare_incident_step_enabled
|
||||
from apps.schedules.ical_utils import list_users_to_notify_from_ical
|
||||
from apps.user_management.models import User
|
||||
|
||||
|
|
@ -138,7 +136,6 @@ class EscalationPolicySnapshot:
|
|||
EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: self._escalation_step_notify_if_num_alerts_in_time_window,
|
||||
EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS: self._escalation_step_notify_multiple_users,
|
||||
EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT: self._escalation_step_notify_multiple_users,
|
||||
EscalationPolicy.STEP_DECLARE_INCIDENT: self._escalation_step_declare_incident,
|
||||
None: self._escalation_step_not_configured,
|
||||
}
|
||||
result = action_map[self.step](alert_group, reason)
|
||||
|
|
@ -413,32 +410,6 @@ class EscalationPolicySnapshot:
|
|||
|
||||
self._execute_tasks(tasks)
|
||||
|
||||
def _escalation_step_declare_incident(self, alert_group: "AlertGroup", _reason: str) -> None:
|
||||
grafana_declare_incident_enabled = is_declare_incident_step_enabled(
|
||||
organization=alert_group.channel.organization
|
||||
)
|
||||
if not grafana_declare_incident_enabled:
|
||||
AlertGroupLogRecord(
|
||||
type=AlertGroupLogRecord.TYPE_ESCALATION_FAILED,
|
||||
alert_group=alert_group,
|
||||
reason="Declare Incident step is not enabled",
|
||||
escalation_policy=self.escalation_policy,
|
||||
escalation_error_code=AlertGroupLogRecord.ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED,
|
||||
escalation_policy_step=self.step,
|
||||
).save()
|
||||
return
|
||||
tasks = []
|
||||
declare_incident_task = declare_incident.signature(
|
||||
args=(alert_group.pk,),
|
||||
kwargs={
|
||||
"escalation_policy_pk": self.id,
|
||||
"severity": self.severity,
|
||||
},
|
||||
immutable=True,
|
||||
)
|
||||
tasks.append(declare_incident_task)
|
||||
self._execute_tasks(tasks)
|
||||
|
||||
def _escalation_step_notify_if_time(self, alert_group: "AlertGroup", _reason: str) -> StepExecutionResultData:
|
||||
eta = None
|
||||
|
||||
|
|
|
|||
|
|
@ -22,20 +22,4 @@ class Migration(migrations.Migration):
|
|||
name='step',
|
||||
field=models.IntegerField(choices=[(0, 'Wait'), (1, 'Notify User'), (2, 'Notify Whole Channel'), (3, 'Repeat Escalation (5 times max)'), (4, 'Resolve'), (5, 'Notify Group'), (6, 'Notify Schedule'), (7, 'Notify User (Important)'), (8, 'Notify Group (Important)'), (9, 'Notify Schedule (Important)'), (10, 'Trigger Outgoing Webhook'), (11, 'Notify User (next each time)'), (12, 'Continue escalation only if time is from'), (13, 'Notify multiple Users'), (14, 'Notify multiple Users (Important)'), (15, 'Continue escalation if >X alerts per Y minutes'), (16, 'Trigger Webhook'), (17, 'Notify all users in a Team'), (18, 'Notify all users in a Team (Important)'), (19, 'Declare Incident')], default=None, null=True),
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='DeclaredIncident',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('incident_id', models.CharField(db_index=True, max_length=50)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
('is_active', models.BooleanField(default=True)),
|
||||
('channel_filter', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='declared_incidents', to='alerts.channelfilter')),
|
||||
('organization', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='declared_incidents', to='user_management.organization')),
|
||||
],
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='alertgroup',
|
||||
name='declared_incident',
|
||||
field=models.ForeignKey(default=None, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='attached_alert_groups', to='alerts.declaredincident'),
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ from .alert_receive_channel_connection import AlertGroupExternalID # noqa: F401
|
|||
from .alert_receive_channel_connection import AlertReceiveChannelConnection # noqa: F401
|
||||
from .channel_filter import ChannelFilter # noqa: F401
|
||||
from .custom_button import CustomButton # noqa: F401
|
||||
from .declared_incident import DeclaredIncident # noqa: F401
|
||||
from .escalation_chain import EscalationChain # noqa: F401
|
||||
from .escalation_policy import EscalationPolicy # noqa: F401
|
||||
from .grafana_alerting_contact_point import GrafanaAlertingContactPoint # noqa: F401
|
||||
|
|
|
|||
|
|
@ -44,7 +44,6 @@ if typing.TYPE_CHECKING:
|
|||
AlertGroupLogRecord,
|
||||
AlertReceiveChannel,
|
||||
BundledNotification,
|
||||
DeclaredIncident,
|
||||
ResolutionNote,
|
||||
ResolutionNoteSlackMessage,
|
||||
)
|
||||
|
|
@ -207,7 +206,6 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
|
|||
slack_messages: "RelatedManager['SlackMessage']"
|
||||
users: "RelatedManager['User']"
|
||||
labels: "RelatedManager['AlertGroupAssociatedLabel']"
|
||||
declared_incident: typing.Optional["DeclaredIncident"]
|
||||
|
||||
objects: models.Manager["AlertGroup"] = AlertGroupQuerySet.as_manager()
|
||||
|
||||
|
|
@ -423,17 +421,8 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
|
|||
# https://code.djangoproject.com/ticket/28545
|
||||
is_open_for_grouping = models.BooleanField(default=None, null=True, blank=True)
|
||||
|
||||
# todo: rework using this field to use DeclaredIncident model field instead
|
||||
grafana_incident_id = models.CharField(max_length=100, null=True, default=None)
|
||||
|
||||
declared_incident = models.ForeignKey(
|
||||
"alerts.DeclaredIncident",
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
default=None,
|
||||
related_name="attached_alert_groups",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_silenced_state_filter():
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -11,24 +11,18 @@ from rest_framework.fields import DateTimeField
|
|||
|
||||
from apps.alerts import tasks
|
||||
from apps.alerts.constants import ActionSource
|
||||
from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE
|
||||
from apps.alerts.utils import render_relative_timeline
|
||||
from apps.slack.slack_formatter import SlackFormatter
|
||||
from common.utils import clean_markup
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from apps.alerts.models import AlertGroup, CustomButton, EscalationPolicy, Invitation
|
||||
from apps.user_management.models import Organization, User
|
||||
from apps.user_management.models import User
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
class RelatedIncidentData(typing.TypedDict):
|
||||
incident_link: typing.Optional[str]
|
||||
incident_title: str
|
||||
|
||||
|
||||
class AlertGroupLogRecord(models.Model):
|
||||
alert_group: "AlertGroup"
|
||||
author: typing.Optional["User"]
|
||||
|
|
@ -167,9 +161,7 @@ class AlertGroupLogRecord(models.Model):
|
|||
ERROR_ESCALATION_TRIGGER_CUSTOM_WEBHOOK_ERROR,
|
||||
ERROR_ESCALATION_NOTIFY_TEAM_MEMBERS_STEP_IS_NOT_CONFIGURED,
|
||||
ERROR_ESCALATION_TRIGGER_WEBHOOK_IS_DISABLED,
|
||||
ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED,
|
||||
ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED,
|
||||
) = range(22)
|
||||
) = range(20)
|
||||
|
||||
type = models.IntegerField(choices=TYPE_CHOICES)
|
||||
|
||||
|
|
@ -233,60 +225,16 @@ class AlertGroupLogRecord(models.Model):
|
|||
escalation_policy_step = models.IntegerField(null=True, default=None)
|
||||
step_specific_info = JSONField(null=True, default=None)
|
||||
|
||||
STEP_SPECIFIC_INFO_KEYS = [
|
||||
"schedule_name",
|
||||
"custom_button_name",
|
||||
"usergroup_handle",
|
||||
"source_integration_name",
|
||||
"incident_link",
|
||||
"incident_title",
|
||||
]
|
||||
|
||||
def _make_log_line_link(self, url, title, html=False, for_slack=False, substitute_with_tag=False):
|
||||
if html and url:
|
||||
return f"<a href='{url}'>{title}</a>"
|
||||
elif for_slack and url:
|
||||
return f"<{url}|{title}>"
|
||||
elif substitute_with_tag:
|
||||
return f"{{{{{substitute_with_tag}}}}}"
|
||||
else:
|
||||
return title
|
||||
STEP_SPECIFIC_INFO_KEYS = ["schedule_name", "custom_button_name", "usergroup_handle", "source_integration_name"]
|
||||
|
||||
def render_log_line_json(self):
|
||||
time = humanize.naturaldelta(self.alert_group.started_at - self.created_at)
|
||||
created_at = DateTimeField().to_representation(self.created_at)
|
||||
organization = self.alert_group.channel.organization
|
||||
author = self.author.short(organization) if self.author is not None else None
|
||||
escalation_chain = self.alert_group.channel_filter.escalation_chain if self.alert_group.channel_filter else None
|
||||
step_info = self.get_step_specific_info()
|
||||
related_incident = self.render_incident_data_from_step_info(organization, step_info)
|
||||
escalation_chain_data = (
|
||||
{
|
||||
"pk": escalation_chain.public_primary_key,
|
||||
"title": escalation_chain.name,
|
||||
}
|
||||
if escalation_chain
|
||||
else None
|
||||
)
|
||||
schedule = (
|
||||
{
|
||||
"pk": self.escalation_policy.notify_schedule.public_primary_key,
|
||||
"title": self.escalation_policy.notify_schedule.name,
|
||||
}
|
||||
if self.escalation_policy and self.escalation_policy.notify_schedule
|
||||
else None
|
||||
)
|
||||
webhook = (
|
||||
{
|
||||
"pk": step_info["webhook_id"],
|
||||
"title": step_info.get("webhook_name", "webhook"),
|
||||
}
|
||||
if step_info and "webhook_id" in step_info
|
||||
else None
|
||||
)
|
||||
|
||||
sf = SlackFormatter(organization)
|
||||
action = sf.format(self.rendered_log_line_action(substitute_with_tag=True))
|
||||
action = sf.format(self.rendered_log_line_action(substitute_author_with_tag=True))
|
||||
action = clean_markup(action)
|
||||
|
||||
result = {
|
||||
|
|
@ -296,10 +244,6 @@ class AlertGroupLogRecord(models.Model):
|
|||
"type": self.type,
|
||||
"created_at": created_at,
|
||||
"author": author,
|
||||
"incident": related_incident,
|
||||
"escalation_chain": escalation_chain_data,
|
||||
"schedule": schedule,
|
||||
"webhook": webhook,
|
||||
}
|
||||
return result
|
||||
|
||||
|
|
@ -314,7 +258,7 @@ class AlertGroupLogRecord(models.Model):
|
|||
result += self.rendered_log_line_action(for_slack=for_slack, html=html)
|
||||
return result
|
||||
|
||||
def rendered_log_line_action(self, for_slack=False, html=False, substitute_with_tag=False):
|
||||
def rendered_log_line_action(self, for_slack=False, html=False, substitute_author_with_tag=False):
|
||||
from apps.alerts.models import EscalationPolicy
|
||||
|
||||
result = ""
|
||||
|
|
@ -332,7 +276,7 @@ class AlertGroupLogRecord(models.Model):
|
|||
elif self.action_source == ActionSource.BACKSYNC:
|
||||
author_name = "source integration " + step_specific_info.get("source_integration_name", "")
|
||||
elif self.author:
|
||||
if substitute_with_tag:
|
||||
if substitute_author_with_tag:
|
||||
author_name = "{{author}}"
|
||||
elif for_slack:
|
||||
author_name = self.author.get_username_with_slack_verbal()
|
||||
|
|
@ -359,9 +303,7 @@ class AlertGroupLogRecord(models.Model):
|
|||
result += f'alert group assigned to route "{channel_filter.str_for_clients}"'
|
||||
|
||||
if escalation_chain is not None:
|
||||
tag = "escalation_chain" if substitute_with_tag else False
|
||||
escalation_chain_text = self._make_log_line_link(None, escalation_chain.name, html, for_slack, tag)
|
||||
result += f' with escalation chain "{escalation_chain_text}"'
|
||||
result += f' with escalation chain "{escalation_chain.name}"'
|
||||
else:
|
||||
result += " with no escalation chain, skipping escalation"
|
||||
else:
|
||||
|
|
@ -437,19 +379,9 @@ class AlertGroupLogRecord(models.Model):
|
|||
important_text = ""
|
||||
if escalation_policy_step == EscalationPolicy.STEP_NOTIFY_SCHEDULE_IMPORTANT:
|
||||
important_text = " (Important)"
|
||||
tag = "schedule" if substitute_with_tag else False
|
||||
schedule_text = self._make_log_line_link(None, schedule_name, html, for_slack, tag)
|
||||
result += f'triggered step "Notify on-call from Schedule {schedule_text}{important_text}"'
|
||||
result += f'triggered step "Notify on-call from Schedule {schedule_name}{important_text}"'
|
||||
elif escalation_policy_step == EscalationPolicy.STEP_REPEAT_ESCALATION_N_TIMES:
|
||||
result += "escalation started from the beginning"
|
||||
elif escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT:
|
||||
organization = self.alert_group.channel.organization
|
||||
incident_data = self.render_incident_data_from_step_info(organization, step_specific_info)
|
||||
incident_link = incident_data["incident_link"]
|
||||
incident_title = incident_data["incident_title"]
|
||||
tag = "related_incident" if substitute_with_tag else False
|
||||
incident_text = self._make_log_line_link(incident_link, incident_title, html, for_slack, tag)
|
||||
result += self.reason + f": {incident_text}"
|
||||
else:
|
||||
result += f'triggered step "{EscalationPolicy.get_step_display_name(escalation_policy_step)}"'
|
||||
elif self.type == AlertGroupLogRecord.TYPE_SILENCE:
|
||||
|
|
@ -553,10 +485,7 @@ class AlertGroupLogRecord(models.Model):
|
|||
trigger = f"{author_name}"
|
||||
else:
|
||||
trigger = trigger or "escalation chain"
|
||||
tag = "webhook" if substitute_with_tag else False
|
||||
webhook_text = self._make_log_line_link(None, webhook_name, html, for_slack, tag)
|
||||
result += f"outgoing webhook `{webhook_text}` triggered by {trigger}"
|
||||
|
||||
result += f"outgoing webhook `{webhook_name}` triggered by {trigger}"
|
||||
elif self.type == AlertGroupLogRecord.TYPE_FAILED_ATTACHMENT:
|
||||
if self.alert_group.slack_message is not None:
|
||||
result += (
|
||||
|
|
@ -665,32 +594,8 @@ class AlertGroupLogRecord(models.Model):
|
|||
result += f"failed to notify User Group{usergroup_handle_text} in Slack"
|
||||
elif self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_TRIGGER_WEBHOOK_IS_DISABLED:
|
||||
result += 'skipped escalation step "Trigger Outgoing Webhook" because it is disabled'
|
||||
elif (
|
||||
self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED
|
||||
):
|
||||
result += 'skipped escalation step "Declare Incident": step is not enabled'
|
||||
elif self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED:
|
||||
result += "failed to declare an Incident"
|
||||
if self.reason:
|
||||
result += f": {self.reason}"
|
||||
return result
|
||||
|
||||
def render_incident_data_from_step_info(
|
||||
self, organization: "Organization", step_specific_info: dict
|
||||
) -> RelatedIncidentData | None:
|
||||
from apps.alerts.models.declared_incident import get_incident_url
|
||||
|
||||
if not step_specific_info or not all(key in step_specific_info for key in ["incident_title", "incident_id"]):
|
||||
return None
|
||||
|
||||
incident_link = (
|
||||
get_incident_url(organization, step_specific_info["incident_id"])
|
||||
if step_specific_info["incident_id"]
|
||||
else None
|
||||
)
|
||||
incident_title = step_specific_info["incident_title"] or DEFAULT_BACKUP_TITLE
|
||||
return {"incident_link": incident_link, "incident_title": incident_title}
|
||||
|
||||
def get_step_specific_info(self):
|
||||
step_specific_info = None
|
||||
# in some cases step_specific_info was saved with using json.dumps
|
||||
|
|
|
|||
|
|
@ -1,38 +0,0 @@
|
|||
import typing
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from django.db import models
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from django.db.models.manager import RelatedManager
|
||||
|
||||
from apps.alerts.models import AlertGroup, ChannelFilter
|
||||
from apps.user_management.models import Organization
|
||||
|
||||
|
||||
def get_incident_url(organization, incident_id) -> str:
|
||||
return urljoin(organization.grafana_url, f"a/grafana-incident-app/incidents/{incident_id}")
|
||||
|
||||
|
||||
class DeclaredIncident(models.Model):
|
||||
attached_alert_groups: "RelatedManager['AlertGroup']"
|
||||
channel_filter: typing.Optional["ChannelFilter"]
|
||||
organization: "Organization"
|
||||
|
||||
incident_id = models.CharField(db_index=True, max_length=50)
|
||||
organization = models.ForeignKey(
|
||||
"user_management.Organization",
|
||||
on_delete=models.CASCADE,
|
||||
related_name="declared_incidents",
|
||||
)
|
||||
channel_filter = models.ForeignKey(
|
||||
"alerts.ChannelFilter",
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
related_name="declared_incidents",
|
||||
)
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
is_active = models.BooleanField(default=True)
|
||||
|
||||
def get_incident_link(self) -> str:
|
||||
return get_incident_url(self.organization, self.incident_id)
|
||||
|
|
@ -92,7 +92,6 @@ class EscalationPolicy(OrderedModel):
|
|||
STEP_NOTIFY_IF_TIME,
|
||||
STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW,
|
||||
STEP_REPEAT_ESCALATION_N_TIMES,
|
||||
STEP_DECLARE_INCIDENT,
|
||||
]
|
||||
# Steps can be stored in db while interacting with internal api
|
||||
# Includes important versions of default steps
|
||||
|
|
@ -219,7 +218,6 @@ class EscalationPolicy(OrderedModel):
|
|||
STEP_NOTIFY_IF_TIME,
|
||||
STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW,
|
||||
STEP_REPEAT_ESCALATION_N_TIMES,
|
||||
STEP_DECLARE_INCIDENT,
|
||||
]
|
||||
|
||||
PUBLIC_STEP_CHOICES_MAP = {
|
||||
|
|
@ -241,7 +239,6 @@ class EscalationPolicy(OrderedModel):
|
|||
STEP_NOTIFY_IF_TIME: "notify_if_time_from_to",
|
||||
STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: "notify_if_num_alerts_in_window",
|
||||
STEP_REPEAT_ESCALATION_N_TIMES: "repeat_escalation",
|
||||
STEP_DECLARE_INCIDENT: "declare_incident",
|
||||
}
|
||||
|
||||
public_primary_key = models.CharField(
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ from .alert_group_web_title_cache import ( # noqa:F401
|
|||
)
|
||||
from .check_escalation_finished import check_escalation_finished_task # noqa: F401
|
||||
from .custom_webhook_result import custom_webhook_result # noqa: F401
|
||||
from .declare_incident import declare_incident # noqa: F401
|
||||
from .delete_alert_group import delete_alert_group # noqa: F401
|
||||
from .delete_alert_group import finish_delete_alert_group # noqa: F401
|
||||
from .delete_alert_group import send_alert_group_signal_for_delete # noqa: F401
|
||||
|
|
|
|||
|
|
@ -1,149 +0,0 @@
|
|||
import logging
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE
|
||||
from common.custom_celery_tasks import shared_dedicated_queue_retry_task
|
||||
from common.incident_api.client import (
|
||||
DEFAULT_INCIDENT_SEVERITY,
|
||||
DEFAULT_INCIDENT_STATUS,
|
||||
IncidentAPIClient,
|
||||
IncidentAPIException,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ATTACHMENT_CAPTION = "OnCall Alert Group"
|
||||
ERROR_SEVERITY_NOT_FOUND = "Severity.FindOne: not found"
|
||||
MAX_RETRIES = 1 if settings.DEBUG else 10
|
||||
MAX_ATTACHED_ALERT_GROUPS_PER_INCIDENT = 5
|
||||
|
||||
|
||||
def _attach_alert_group_to_incident(alert_group, incident_id, incident_title, escalation_policy, attached=False):
|
||||
from apps.alerts.models import AlertGroupLogRecord, DeclaredIncident, EscalationPolicy
|
||||
|
||||
declared_incident, _ = DeclaredIncident.objects.get_or_create(
|
||||
incident_id=incident_id,
|
||||
defaults={
|
||||
"organization": alert_group.channel.organization,
|
||||
"channel_filter": alert_group.channel_filter,
|
||||
},
|
||||
)
|
||||
alert_group.declared_incident = declared_incident
|
||||
alert_group.save(update_fields=["declared_incident"])
|
||||
reason = "attached to existing incident" if attached else "incident declared"
|
||||
AlertGroupLogRecord.objects.create(
|
||||
type=AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED,
|
||||
reason=reason,
|
||||
alert_group=alert_group,
|
||||
step_specific_info={"incident_id": incident_id, "incident_title": incident_title},
|
||||
escalation_policy=escalation_policy,
|
||||
escalation_policy_step=EscalationPolicy.STEP_DECLARE_INCIDENT,
|
||||
)
|
||||
|
||||
|
||||
def _create_error_log_record(alert_group, escalation_policy, reason=""):
|
||||
from apps.alerts.models import AlertGroupLogRecord, EscalationPolicy
|
||||
|
||||
AlertGroupLogRecord.objects.create(
|
||||
type=AlertGroupLogRecord.TYPE_ESCALATION_FAILED,
|
||||
escalation_error_code=AlertGroupLogRecord.ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED,
|
||||
reason=reason,
|
||||
alert_group=alert_group,
|
||||
escalation_policy=escalation_policy,
|
||||
escalation_policy_step=EscalationPolicy.STEP_DECLARE_INCIDENT,
|
||||
)
|
||||
|
||||
|
||||
@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True, max_retries=MAX_RETRIES)
|
||||
def declare_incident(alert_group_pk, escalation_policy_pk, severity=None):
|
||||
from apps.alerts.models import AlertGroup, DeclaredIncident, EscalationPolicy
|
||||
|
||||
alert_group = AlertGroup.objects.get(pk=alert_group_pk)
|
||||
organization = alert_group.channel.organization
|
||||
escalation_policy = None
|
||||
if escalation_policy_pk:
|
||||
escalation_policy = EscalationPolicy.objects.filter(pk=escalation_policy_pk).first()
|
||||
|
||||
if alert_group.channel_filter.is_default:
|
||||
_create_error_log_record(
|
||||
alert_group, escalation_policy, reason="Declare incident step is not enabled for default routes"
|
||||
)
|
||||
return
|
||||
|
||||
if declare_incident.request.retries == MAX_RETRIES:
|
||||
_create_error_log_record(alert_group, escalation_policy)
|
||||
return
|
||||
|
||||
incident_client = IncidentAPIClient(organization.grafana_url, organization.api_token)
|
||||
|
||||
# check for currently active related incident in the same route (channel_filter)
|
||||
existing_incident = (
|
||||
DeclaredIncident.objects.filter(
|
||||
organization=organization, channel_filter=alert_group.channel_filter, is_active=True
|
||||
)
|
||||
.order_by("-created_at")
|
||||
.first()
|
||||
)
|
||||
|
||||
if existing_incident:
|
||||
incident_id = existing_incident.incident_id
|
||||
try:
|
||||
# get existing incident details
|
||||
incident_data, _ = incident_client.get_incident(incident_id)
|
||||
except IncidentAPIException as e:
|
||||
logger.error(f"Error getting incident details: {e.msg}")
|
||||
if e.status == 404:
|
||||
# incident not found, mark as not opened
|
||||
existing_incident.is_active = False
|
||||
existing_incident.save(update_fields=["is_active"])
|
||||
else:
|
||||
# raise (and retry)
|
||||
raise
|
||||
else:
|
||||
# incident exists, check if it is still active
|
||||
if incident_data["status"] == DEFAULT_INCIDENT_STATUS:
|
||||
# attach to incident context
|
||||
incident_title = incident_data["title"]
|
||||
num_attached = AlertGroup.objects.filter(declared_incident=existing_incident).count()
|
||||
if num_attached < MAX_ATTACHED_ALERT_GROUPS_PER_INCIDENT:
|
||||
try:
|
||||
incident_data, _ = incident_client.add_activity(incident_id, alert_group.web_link)
|
||||
except IncidentAPIException as e:
|
||||
logger.error(f"Error attaching to existing incident: {e.msg}")
|
||||
# setup association between alert group and incident (even if not attached)
|
||||
_attach_alert_group_to_incident(
|
||||
alert_group, incident_id, incident_title, escalation_policy, attached=True
|
||||
)
|
||||
else:
|
||||
existing_incident.is_active = False
|
||||
existing_incident.save(update_fields=["is_active"])
|
||||
|
||||
if existing_incident is None or not existing_incident.is_active:
|
||||
# create new incident
|
||||
if severity == EscalationPolicy.SEVERITY_SET_FROM_LABEL:
|
||||
severity_label = alert_group.labels.filter(key_name="severity").first()
|
||||
severity = severity_label.value_name if severity_label else None
|
||||
severity = severity or DEFAULT_INCIDENT_SEVERITY
|
||||
try:
|
||||
incident_data, _ = incident_client.create_incident(
|
||||
alert_group.web_title_cache if alert_group.web_title_cache else DEFAULT_BACKUP_TITLE,
|
||||
severity=severity,
|
||||
attachCaption=ATTACHMENT_CAPTION,
|
||||
attachURL=alert_group.web_link,
|
||||
)
|
||||
except IncidentAPIException as e:
|
||||
logger.error(f"Error creating new incident: {e.msg}")
|
||||
if ERROR_SEVERITY_NOT_FOUND.lower() in e.msg.lower() and severity != DEFAULT_INCIDENT_SEVERITY:
|
||||
# invalid severity, retry with default severity
|
||||
declare_incident.apply_async(
|
||||
args=(alert_group_pk, escalation_policy_pk),
|
||||
kwargs={"severity": DEFAULT_INCIDENT_SEVERITY},
|
||||
)
|
||||
return
|
||||
# else raise (and retry)
|
||||
raise
|
||||
else:
|
||||
_attach_alert_group_to_incident(
|
||||
alert_group, incident_data["incidentID"], incident_data["title"], escalation_policy
|
||||
)
|
||||
|
|
@ -8,7 +8,6 @@ from apps.alerts.models import (
|
|||
AlertReceiveChannelConnection,
|
||||
ChannelFilter,
|
||||
CustomButton,
|
||||
DeclaredIncident,
|
||||
EscalationChain,
|
||||
EscalationPolicy,
|
||||
Invitation,
|
||||
|
|
@ -92,8 +91,3 @@ class InvitationFactory(factory.DjangoModelFactory):
|
|||
class UserNotificationBundleFactory(factory.DjangoModelFactory):
|
||||
class Meta:
|
||||
model = UserNotificationBundle
|
||||
|
||||
|
||||
class DeclaredIncidentFactory(factory.DjangoModelFactory):
|
||||
class Meta:
|
||||
model = DeclaredIncident
|
||||
|
|
|
|||
|
|
@ -2,8 +2,7 @@ from unittest.mock import patch
|
|||
|
||||
import pytest
|
||||
|
||||
from apps.alerts.models import AlertGroupLogRecord, EscalationPolicy
|
||||
from apps.schedules.models import OnCallScheduleWeb
|
||||
from apps.alerts.models import AlertGroupLogRecord
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
|
|
@ -38,138 +37,3 @@ def test_trigger_update_signal(
|
|||
with patch("apps.alerts.tasks.send_update_log_report_signal") as mock_update_log_signal:
|
||||
alert_group.log_records.create(type=log_type)
|
||||
mock_update_log_signal.apply_async.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.parametrize(
|
||||
"for_slack, html, substitute_with_tag, expected",
|
||||
[
|
||||
(True, False, False, 'with escalation chain "Escalation name"'),
|
||||
(False, True, False, 'with escalation chain "Escalation name"'),
|
||||
(False, False, True, 'with escalation chain "{{escalation_chain}}'),
|
||||
],
|
||||
)
|
||||
def test_log_record_escalation_chain_link(
|
||||
make_organization_with_slack_team_identity,
|
||||
make_alert_receive_channel,
|
||||
make_escalation_chain,
|
||||
make_channel_filter,
|
||||
make_alert_group,
|
||||
for_slack,
|
||||
html,
|
||||
substitute_with_tag,
|
||||
expected,
|
||||
):
|
||||
organization, _ = make_organization_with_slack_team_identity()
|
||||
alert_receive_channel = make_alert_receive_channel(organization)
|
||||
escalation_chain = make_escalation_chain(organization, name="Escalation name")
|
||||
channel_filter = make_channel_filter(alert_receive_channel, escalation_chain=escalation_chain)
|
||||
alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter)
|
||||
alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot()
|
||||
|
||||
log = alert_group.log_records.create(
|
||||
type=AlertGroupLogRecord.TYPE_ROUTE_ASSIGNED,
|
||||
)
|
||||
|
||||
log_line = log.rendered_log_line_action(for_slack=for_slack, html=html, substitute_with_tag=substitute_with_tag)
|
||||
assert expected in log_line
|
||||
|
||||
log_data = log.render_log_line_json()
|
||||
escalation_chain_data = log_data.get("escalation_chain")
|
||||
assert escalation_chain_data == {"pk": escalation_chain.public_primary_key, "title": escalation_chain.name}
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.parametrize(
|
||||
"for_slack, html, substitute_with_tag, expected",
|
||||
[
|
||||
(True, False, False, "Notify on-call from Schedule 'Schedule name'"),
|
||||
(False, True, False, "Notify on-call from Schedule 'Schedule name'"),
|
||||
(False, False, True, "Notify on-call from Schedule {{schedule}}"),
|
||||
],
|
||||
)
|
||||
def test_log_record_schedule_link(
|
||||
make_organization_with_slack_team_identity,
|
||||
make_alert_receive_channel,
|
||||
make_channel_filter,
|
||||
make_alert_group,
|
||||
make_schedule,
|
||||
make_escalation_chain,
|
||||
make_escalation_policy,
|
||||
for_slack,
|
||||
html,
|
||||
substitute_with_tag,
|
||||
expected,
|
||||
):
|
||||
organization, _ = make_organization_with_slack_team_identity()
|
||||
alert_receive_channel = make_alert_receive_channel(organization)
|
||||
alert_group = make_alert_group(alert_receive_channel)
|
||||
schedule = make_schedule(organization, schedule_class=OnCallScheduleWeb, name="Schedule name")
|
||||
escalation_chain = make_escalation_chain(organization, name="Escalation name")
|
||||
channel_filter = make_channel_filter(alert_receive_channel, escalation_chain=escalation_chain)
|
||||
escalation_policy = make_escalation_policy(
|
||||
escalation_chain=channel_filter.escalation_chain,
|
||||
escalation_policy_step=EscalationPolicy.STEP_NOTIFY_SCHEDULE,
|
||||
notify_schedule=schedule,
|
||||
)
|
||||
|
||||
log = alert_group.log_records.create(
|
||||
type=AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED,
|
||||
step_specific_info={"schedule_name": schedule.name},
|
||||
escalation_policy=escalation_policy,
|
||||
)
|
||||
|
||||
log_line = log.rendered_log_line_action(for_slack=for_slack, html=html, substitute_with_tag=substitute_with_tag)
|
||||
assert expected in log_line
|
||||
|
||||
log_data = log.render_log_line_json()
|
||||
schedule_data = log_data.get("schedule")
|
||||
assert schedule_data == {"pk": schedule.public_primary_key, "title": schedule.name}
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.parametrize(
|
||||
"for_slack, html, substitute_with_tag, expected",
|
||||
[
|
||||
(True, False, False, "outgoing webhook `Webhook name`"),
|
||||
(False, True, False, "outgoing webhook `Webhook name`"),
|
||||
(False, False, True, "outgoing webhook `{{webhook}}`"),
|
||||
],
|
||||
)
|
||||
def test_log_record_webhook_link(
|
||||
make_organization_with_slack_team_identity,
|
||||
make_alert_receive_channel,
|
||||
make_channel_filter,
|
||||
make_alert_group,
|
||||
make_custom_webhook,
|
||||
make_escalation_chain,
|
||||
make_escalation_policy,
|
||||
for_slack,
|
||||
html,
|
||||
substitute_with_tag,
|
||||
expected,
|
||||
):
|
||||
organization, _ = make_organization_with_slack_team_identity()
|
||||
alert_receive_channel = make_alert_receive_channel(organization)
|
||||
alert_group = make_alert_group(alert_receive_channel)
|
||||
webhook = make_custom_webhook(organization, name="Webhook name")
|
||||
escalation_chain = make_escalation_chain(organization, name="Escalation name")
|
||||
channel_filter = make_channel_filter(alert_receive_channel, escalation_chain=escalation_chain)
|
||||
escalation_policy = make_escalation_policy(
|
||||
escalation_chain=channel_filter.escalation_chain,
|
||||
escalation_policy_step=EscalationPolicy.STEP_TRIGGER_CUSTOM_WEBHOOK,
|
||||
custom_webhook=webhook,
|
||||
)
|
||||
|
||||
log = alert_group.log_records.create(
|
||||
type=AlertGroupLogRecord.TYPE_CUSTOM_WEBHOOK_TRIGGERED,
|
||||
step_specific_info={"webhook_id": webhook.public_primary_key, "webhook_name": webhook.name},
|
||||
escalation_policy=escalation_policy,
|
||||
)
|
||||
|
||||
log_line = log.rendered_log_line_action(for_slack=for_slack, html=html, substitute_with_tag=substitute_with_tag)
|
||||
assert expected in log_line
|
||||
|
||||
log_data = log.render_log_line_json()
|
||||
webhook_data = log_data.get("webhook")
|
||||
assert webhook_data == {"pk": webhook.public_primary_key, "title": webhook.name}
|
||||
|
|
|
|||
|
|
@ -1,335 +0,0 @@
|
|||
from unittest.mock import patch
|
||||
|
||||
import httpretty
|
||||
import pytest
|
||||
|
||||
from apps.alerts.models import AlertGroupLogRecord, DeclaredIncident, EscalationPolicy
|
||||
from apps.alerts.tasks.declare_incident import (
|
||||
ATTACHMENT_CAPTION,
|
||||
DEFAULT_BACKUP_TITLE,
|
||||
DEFAULT_INCIDENT_SEVERITY,
|
||||
ERROR_SEVERITY_NOT_FOUND,
|
||||
MAX_ATTACHED_ALERT_GROUPS_PER_INCIDENT,
|
||||
declare_incident,
|
||||
)
|
||||
from common.incident_api.client import IncidentAPIException
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_alert_group_and_escalation_step(
|
||||
make_organization,
|
||||
make_alert_receive_channel,
|
||||
make_alert_group,
|
||||
make_channel_filter,
|
||||
make_escalation_chain,
|
||||
make_escalation_policy,
|
||||
):
|
||||
def _setup_alert_group_and_escalation_step(is_default_route=False, already_declared_incident=False):
|
||||
organization = make_organization(grafana_url="https://stack.grafana.net", api_token="token")
|
||||
alert_receive_channel = make_alert_receive_channel(organization=organization)
|
||||
escalation_chain = make_escalation_chain(organization)
|
||||
declare_incident_step = make_escalation_policy(
|
||||
escalation_chain=escalation_chain,
|
||||
escalation_policy_step=EscalationPolicy.STEP_DECLARE_INCIDENT,
|
||||
)
|
||||
channel_filter = make_channel_filter(
|
||||
alert_receive_channel,
|
||||
escalation_chain=escalation_chain,
|
||||
is_default=is_default_route,
|
||||
)
|
||||
alert_group = make_alert_group(
|
||||
alert_receive_channel=alert_receive_channel,
|
||||
channel_filter=channel_filter,
|
||||
)
|
||||
declared_incident = None
|
||||
if already_declared_incident:
|
||||
declared_incident = DeclaredIncident.objects.create(
|
||||
incident_id="123",
|
||||
organization=organization,
|
||||
channel_filter=channel_filter,
|
||||
)
|
||||
|
||||
return alert_group, declare_incident_step, declared_incident
|
||||
|
||||
return _setup_alert_group_and_escalation_step
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_declare_incident_default_route(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(is_default_route=True)
|
||||
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
alert_group.refresh_from_db()
|
||||
# check triggered log
|
||||
log_record = alert_group.log_records.last()
|
||||
assert log_record.type == log_record.TYPE_ESCALATION_FAILED
|
||||
assert log_record.escalation_policy == declare_incident_step
|
||||
assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert log_record.step_specific_info is None
|
||||
assert log_record.reason == "Declare incident step is not enabled for default routes"
|
||||
assert log_record.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_declare_incident_ok(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
|
||||
mock_create_incident.return_value = {"incidentID": "123", "title": "Incident"}, None
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
mock_create_incident.assert_called_with(
|
||||
DEFAULT_BACKUP_TITLE,
|
||||
severity=DEFAULT_INCIDENT_SEVERITY,
|
||||
attachCaption=ATTACHMENT_CAPTION,
|
||||
attachURL=alert_group.web_link,
|
||||
)
|
||||
|
||||
alert_group.refresh_from_db()
|
||||
|
||||
# check declared incident
|
||||
assert alert_group.declared_incident.incident_id == "123"
|
||||
assert alert_group.declared_incident.organization == alert_group.channel.organization
|
||||
assert alert_group.declared_incident.channel_filter == alert_group.channel_filter
|
||||
# check triggered log
|
||||
log_record = alert_group.log_records.last()
|
||||
assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
|
||||
assert log_record.escalation_policy == declare_incident_step
|
||||
assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert log_record.step_specific_info == {"incident_id": "123", "incident_title": "Incident"}
|
||||
assert log_record.reason == "incident declared"
|
||||
assert log_record.escalation_error_code is None
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_declare_incident_set_severity(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
|
||||
severity = "critical"
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
|
||||
mock_create_incident.return_value = {"incidentID": "123", "title": "Incident"}, None
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk, severity=severity)
|
||||
|
||||
mock_create_incident.assert_called_with(
|
||||
DEFAULT_BACKUP_TITLE, severity=severity, attachCaption=ATTACHMENT_CAPTION, attachURL=alert_group.web_link
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_declare_incident_set_severity_from_label(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
|
||||
expected_severity = "minor"
|
||||
# set alert group label
|
||||
alert_group.labels.create(
|
||||
organization=alert_group.channel.organization, key_name="severity", value_name=expected_severity
|
||||
)
|
||||
severity = EscalationPolicy.SEVERITY_SET_FROM_LABEL
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
|
||||
mock_create_incident.return_value = {"incidentID": "123", "title": "Incident"}, None
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk, severity=severity)
|
||||
|
||||
mock_create_incident.assert_called_with(
|
||||
DEFAULT_BACKUP_TITLE,
|
||||
severity=expected_severity,
|
||||
attachCaption=ATTACHMENT_CAPTION,
|
||||
attachURL=alert_group.web_link,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_declare_incident_invalid_severity_fallback(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
|
||||
severity = "INVALID"
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
|
||||
with patch.object(declare_incident, "apply_async") as mock_declare_incident_apply_async:
|
||||
mock_create_incident.side_effect = IncidentAPIException(
|
||||
status=500, url="some-url", msg=ERROR_SEVERITY_NOT_FOUND
|
||||
)
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk, severity=severity)
|
||||
|
||||
# create call failing with invalid severity
|
||||
mock_create_incident.assert_called_with(
|
||||
DEFAULT_BACKUP_TITLE, severity=severity, attachCaption=ATTACHMENT_CAPTION, attachURL=alert_group.web_link
|
||||
)
|
||||
# new task is queued with default severity instead
|
||||
mock_declare_incident_apply_async.assert_called_with(
|
||||
args=(alert_group.pk, declare_incident_step.pk), kwargs={"severity": DEFAULT_INCIDENT_SEVERITY}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_declare_incident_attach_alert_group(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
|
||||
already_declared_incident=True
|
||||
)
|
||||
incident_id = existing_open_incident.incident_id
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
|
||||
with patch("common.incident_api.client.IncidentAPIClient.add_activity") as mock_add_activity:
|
||||
mock_get_incident.return_value = {"incidentID": incident_id, "title": "Incident", "status": "active"}, None
|
||||
mock_add_activity.return_value = {"activityItemID": "111"}, None
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
alert_group.refresh_from_db()
|
||||
|
||||
# check declared incident
|
||||
assert alert_group.declared_incident == existing_open_incident
|
||||
log_record = alert_group.log_records.last()
|
||||
assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
|
||||
assert log_record.escalation_policy == declare_incident_step
|
||||
assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert log_record.step_specific_info == {"incident_id": incident_id, "incident_title": "Incident"}
|
||||
assert log_record.reason == "attached to existing incident"
|
||||
assert log_record.escalation_error_code is None
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_declare_incident_resolved_update(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
|
||||
already_declared_incident=True
|
||||
)
|
||||
incident_id = existing_open_incident.incident_id
|
||||
new_incident_id = "333"
|
||||
assert new_incident_id != incident_id
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
|
||||
with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
|
||||
mock_get_incident.return_value = {
|
||||
"incidentID": incident_id,
|
||||
"title": "Incident1",
|
||||
"status": "resolved",
|
||||
}, None
|
||||
mock_create_incident.return_value = {"incidentID": new_incident_id, "title": "Incident2"}, None
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
alert_group.refresh_from_db()
|
||||
existing_open_incident.refresh_from_db()
|
||||
|
||||
assert existing_open_incident.is_active is False
|
||||
# check declared incident
|
||||
assert alert_group.declared_incident != existing_open_incident
|
||||
assert alert_group.declared_incident.incident_id == new_incident_id
|
||||
log_record = alert_group.log_records.last()
|
||||
assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
|
||||
assert log_record.escalation_policy == declare_incident_step
|
||||
assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert log_record.step_specific_info == {"incident_id": new_incident_id, "incident_title": "Incident2"}
|
||||
assert log_record.reason == "incident declared"
|
||||
assert log_record.escalation_error_code is None
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_declare_incident_attach_alert_group_skip_incident_update(
|
||||
setup_alert_group_and_escalation_step, make_alert_group
|
||||
):
|
||||
alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
|
||||
already_declared_incident=True
|
||||
)
|
||||
alert_receive_channel = alert_group.channel
|
||||
channel_filter = alert_group.channel_filter
|
||||
incident_id = existing_open_incident.incident_id
|
||||
|
||||
# attach max alert groups to incident
|
||||
for _ in range(MAX_ATTACHED_ALERT_GROUPS_PER_INCIDENT):
|
||||
ag = make_alert_group(alert_receive_channel=alert_receive_channel, channel_filter=channel_filter)
|
||||
ag.declared_incident = existing_open_incident
|
||||
ag.save()
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
|
||||
with patch("common.incident_api.client.IncidentAPIClient.add_activity") as mock_add_activity:
|
||||
mock_get_incident.return_value = {"incidentID": incident_id, "title": "Incident", "status": "active"}, None
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
assert not mock_add_activity.called
|
||||
|
||||
alert_group.refresh_from_db()
|
||||
|
||||
# check declared incident
|
||||
assert alert_group.declared_incident == existing_open_incident
|
||||
log_record = alert_group.log_records.last()
|
||||
assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
|
||||
assert log_record.escalation_policy == declare_incident_step
|
||||
assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert log_record.step_specific_info == {"incident_id": incident_id, "incident_title": "Incident"}
|
||||
assert log_record.reason == "attached to existing incident"
|
||||
assert log_record.escalation_error_code is None
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_get_existing_incident_error(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
|
||||
already_declared_incident=True
|
||||
)
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
|
||||
mock_get_incident.side_effect = IncidentAPIException(status=500, url="some-url")
|
||||
with pytest.raises(IncidentAPIException):
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
# but if incident was not found, a new one should be created
|
||||
incident_id = existing_open_incident.incident_id
|
||||
new_incident_id = "333"
|
||||
assert new_incident_id != incident_id
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
|
||||
with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
|
||||
mock_get_incident.side_effect = IncidentAPIException(status=404, url="some-url")
|
||||
mock_create_incident.return_value = {"incidentID": new_incident_id, "title": "Incident"}, None
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
alert_group.refresh_from_db()
|
||||
|
||||
# check declared incident
|
||||
assert alert_group.declared_incident != existing_open_incident
|
||||
assert alert_group.declared_incident.incident_id == new_incident_id
|
||||
assert alert_group.declared_incident.organization == alert_group.channel.organization
|
||||
assert alert_group.declared_incident.channel_filter == alert_group.channel_filter
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_attach_alert_group_error(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
|
||||
already_declared_incident=True
|
||||
)
|
||||
incident_id = existing_open_incident.incident_id
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
|
||||
with patch("common.incident_api.client.IncidentAPIClient.add_activity") as mock_add_activity:
|
||||
mock_get_incident.return_value = {"incidentID": incident_id, "title": "Incident", "status": "active"}, None
|
||||
mock_add_activity.side_effect = IncidentAPIException(status=500, url="some-url")
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
alert_group.refresh_from_db()
|
||||
|
||||
# incident attachment failed, but DB is still updated
|
||||
assert alert_group.declared_incident == existing_open_incident
|
||||
log_record = alert_group.log_records.last()
|
||||
assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
|
||||
assert log_record.escalation_policy == declare_incident_step
|
||||
assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert log_record.step_specific_info == {"incident_id": incident_id, "incident_title": "Incident"}
|
||||
assert log_record.reason == "attached to existing incident"
|
||||
assert log_record.escalation_error_code is None
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_create_incident_error(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
|
||||
mock_create_incident.side_effect = IncidentAPIException(status=500, url="some-url")
|
||||
with pytest.raises(IncidentAPIException):
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
|
@ -690,52 +690,3 @@ def test_notify_team_members(
|
|||
(user_2.pk, alert_group.pk), expected_kwargs, immutable=True
|
||||
)
|
||||
assert mock_execute.signature.call_count == 2
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_escalation_step_declare_incident(
|
||||
escalation_step_test_setup,
|
||||
make_escalation_policy,
|
||||
):
|
||||
organization, _, _, channel_filter, alert_group, reason = escalation_step_test_setup
|
||||
|
||||
declare_incident_step = make_escalation_policy(
|
||||
escalation_chain=channel_filter.escalation_chain,
|
||||
escalation_policy_step=EscalationPolicy.STEP_DECLARE_INCIDENT,
|
||||
)
|
||||
escalation_policy_snapshot = get_escalation_policy_snapshot_from_model(declare_incident_step)
|
||||
expected_eta = timezone.now() + timezone.timedelta(seconds=NEXT_ESCALATION_DELAY)
|
||||
with patch.object(EscalationPolicySnapshot, "_execute_tasks") as mocked_execute_tasks:
|
||||
with patch(
|
||||
"apps.alerts.escalation_snapshot.snapshot_classes.escalation_policy_snapshot.is_declare_incident_step_enabled",
|
||||
return_value=True,
|
||||
):
|
||||
result = escalation_policy_snapshot.execute(alert_group, reason)
|
||||
expected_result = EscalationPolicySnapshot.StepExecutionResultData(
|
||||
eta=result.eta,
|
||||
stop_escalation=False,
|
||||
pause_escalation=False,
|
||||
start_from_beginning=False,
|
||||
)
|
||||
assert (
|
||||
expected_eta + timezone.timedelta(seconds=15)
|
||||
> result.eta
|
||||
> expected_eta - timezone.timedelta(seconds=15)
|
||||
)
|
||||
assert result == expected_result
|
||||
assert not alert_group.log_records.exists()
|
||||
mocked_execute_tasks.assert_called_once()
|
||||
with patch.object(EscalationPolicySnapshot, "_execute_tasks") as mocked_execute_tasks:
|
||||
with patch(
|
||||
"apps.alerts.escalation_snapshot.snapshot_classes.escalation_policy_snapshot.is_declare_incident_step_enabled",
|
||||
return_value=False,
|
||||
):
|
||||
escalation_policy_snapshot.execute(alert_group, reason)
|
||||
mocked_execute_tasks.assert_not_called()
|
||||
assert alert_group.log_records.exists()
|
||||
log_record = alert_group.log_records.get()
|
||||
assert log_record.type == AlertGroupLogRecord.TYPE_ESCALATION_FAILED
|
||||
assert (
|
||||
log_record.escalation_error_code
|
||||
== AlertGroupLogRecord.ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,11 +1,3 @@
|
|||
import typing
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from apps.user_management.models import Organization
|
||||
|
||||
|
||||
def render_relative_timeline(log_created_at, alert_group_started_at):
|
||||
time_delta = log_created_at - alert_group_started_at
|
||||
seconds = int(time_delta.total_seconds())
|
||||
|
|
@ -20,7 +12,3 @@ def render_relative_timeline(log_created_at, alert_group_started_at):
|
|||
return "%dm%ds" % (minutes, seconds)
|
||||
else:
|
||||
return "%ds" % (seconds,)
|
||||
|
||||
|
||||
def is_declare_incident_step_enabled(organization: "Organization") -> bool:
|
||||
return organization.is_grafana_incident_enabled and settings.FEATURE_DECLARE_INCIDENT_STEP_ENABLED
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ from datetime import timedelta
|
|||
from rest_framework import serializers
|
||||
|
||||
from apps.alerts.models import EscalationChain, EscalationPolicy
|
||||
from apps.alerts.utils import is_declare_incident_step_enabled
|
||||
from apps.schedules.models import OnCallSchedule
|
||||
from apps.slack.models import SlackUserGroup
|
||||
from apps.user_management.models import Team, User
|
||||
|
|
@ -25,7 +24,6 @@ TO_TIME = "to_time"
|
|||
NUM_ALERTS_IN_WINDOW = "num_alerts_in_window"
|
||||
NUM_MINUTES_IN_WINDOW = "num_minutes_in_window"
|
||||
CUSTOM_WEBHOOK_TRIGGER = "custom_webhook"
|
||||
SEVERITY = "severity"
|
||||
|
||||
STEP_TYPE_TO_RELATED_FIELD_MAP = {
|
||||
EscalationPolicy.STEP_WAIT: [WAIT_DELAY],
|
||||
|
|
@ -37,7 +35,6 @@ STEP_TYPE_TO_RELATED_FIELD_MAP = {
|
|||
EscalationPolicy.STEP_NOTIFY_IF_TIME: [FROM_TIME, TO_TIME],
|
||||
EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: [NUM_ALERTS_IN_WINDOW, NUM_MINUTES_IN_WINDOW],
|
||||
EscalationPolicy.STEP_TRIGGER_CUSTOM_WEBHOOK: [CUSTOM_WEBHOOK_TRIGGER],
|
||||
EscalationPolicy.STEP_DECLARE_INCIDENT: [SEVERITY],
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -84,7 +81,6 @@ class EscalationPolicySerializer(EagerLoadingMixin, serializers.ModelSerializer)
|
|||
allow_null=True,
|
||||
filter_field="organization",
|
||||
)
|
||||
severity = serializers.CharField(required=False, allow_null=True)
|
||||
|
||||
class Meta:
|
||||
model = EscalationPolicy
|
||||
|
|
@ -103,7 +99,6 @@ class EscalationPolicySerializer(EagerLoadingMixin, serializers.ModelSerializer)
|
|||
"notify_schedule",
|
||||
"notify_to_group",
|
||||
"notify_to_team_members",
|
||||
"severity",
|
||||
"important",
|
||||
]
|
||||
|
||||
|
|
@ -128,7 +123,6 @@ class EscalationPolicySerializer(EagerLoadingMixin, serializers.ModelSerializer)
|
|||
NUM_ALERTS_IN_WINDOW,
|
||||
NUM_MINUTES_IN_WINDOW,
|
||||
CUSTOM_WEBHOOK_TRIGGER,
|
||||
SEVERITY,
|
||||
]
|
||||
|
||||
step = data.get("step")
|
||||
|
|
@ -157,8 +151,6 @@ class EscalationPolicySerializer(EagerLoadingMixin, serializers.ModelSerializer)
|
|||
raise serializers.ValidationError("Invalid step value")
|
||||
if step_type in EscalationPolicy.SLACK_INTEGRATION_REQUIRED_STEPS and organization.slack_team_identity is None:
|
||||
raise serializers.ValidationError("Invalid escalation step type: step is Slack-specific")
|
||||
if step_type == EscalationPolicy.STEP_DECLARE_INCIDENT and not is_declare_incident_step_enabled(organization):
|
||||
raise serializers.ValidationError("Invalid escalation step type: step is not enabled")
|
||||
return step_type
|
||||
|
||||
def to_representation(self, instance):
|
||||
|
|
@ -222,7 +214,6 @@ class EscalationPolicyUpdateSerializer(EscalationPolicySerializer):
|
|||
NUM_ALERTS_IN_WINDOW,
|
||||
NUM_MINUTES_IN_WINDOW,
|
||||
CUSTOM_WEBHOOK_TRIGGER,
|
||||
SEVERITY,
|
||||
]
|
||||
|
||||
for f in STEP_TYPE_TO_RELATED_FIELD_MAP.get(step, []):
|
||||
|
|
|
|||
|
|
@ -975,38 +975,6 @@ def test_get_filter_labels(
|
|||
assert response.json()["results"][0]["pk"] == alert_groups[0].public_primary_key
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_get_filter_by_related_incident(
|
||||
alert_group_internal_api_setup, make_declared_incident, make_alert_group, make_user_auth_headers
|
||||
):
|
||||
user, token, alert_groups = alert_group_internal_api_setup
|
||||
|
||||
alert_group = alert_groups[0]
|
||||
declared_incident = make_declared_incident("1", alert_group.channel.organization, alert_group.channel_filter)
|
||||
alert_group.declared_incident = declared_incident
|
||||
alert_group.save()
|
||||
|
||||
client = APIClient()
|
||||
url = reverse("api-internal:alertgroup-list")
|
||||
response = client.get(
|
||||
url + "?has_related_incident=true",
|
||||
format="json",
|
||||
**make_user_auth_headers(user, token),
|
||||
)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data["results"]) == 1
|
||||
|
||||
response = client.get(
|
||||
url + "?has_related_incident=false",
|
||||
format="json",
|
||||
**make_user_auth_headers(user, token),
|
||||
)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data["results"]) == 3
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_get_title_search(
|
||||
settings,
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ from rest_framework.test import APIClient
|
|||
|
||||
from apps.alerts.models import EscalationPolicy
|
||||
from apps.api.permissions import LegacyAccessControlRole
|
||||
from common.incident_api.client import DEFAULT_INCIDENT_SEVERITY, IncidentAPIException
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
|
|
@ -652,16 +651,15 @@ def test_create_escalation_policy_with_no_important_version(
|
|||
make_escalation_chain,
|
||||
step,
|
||||
make_user_auth_headers,
|
||||
settings,
|
||||
):
|
||||
organization, user, _, _ = make_organization_and_user_with_slack_identities()
|
||||
# make sure declare incident step is enabled
|
||||
settings.FEATURE_DECLARE_INCIDENT_STEP_ENABLED = True
|
||||
organization.is_grafana_incident_enabled = True
|
||||
organization.save()
|
||||
_, token = make_token_for_organization(organization)
|
||||
escalation_chain = make_escalation_chain(organization)
|
||||
|
||||
if step == EscalationPolicy.STEP_DECLARE_INCIDENT:
|
||||
# declare incident step is disabled
|
||||
return
|
||||
|
||||
client = APIClient()
|
||||
data_for_creation = {
|
||||
"escalation_chain": escalation_chain.public_primary_key,
|
||||
|
|
@ -834,7 +832,6 @@ def test_escalation_policy_switch_importance(
|
|||
"notify_schedule": None,
|
||||
"notify_to_group": None,
|
||||
"notify_to_team_members": None,
|
||||
"severity": None,
|
||||
"important": True,
|
||||
"wait_delay": None,
|
||||
}
|
||||
|
|
@ -892,7 +889,6 @@ def test_escalation_policy_filter_by_user(
|
|||
"notify_schedule": None,
|
||||
"notify_to_group": None,
|
||||
"notify_to_team_members": None,
|
||||
"severity": None,
|
||||
"important": False,
|
||||
},
|
||||
{
|
||||
|
|
@ -910,7 +906,6 @@ def test_escalation_policy_filter_by_user(
|
|||
"notify_schedule": None,
|
||||
"notify_to_group": None,
|
||||
"notify_to_team_members": None,
|
||||
"severity": None,
|
||||
"important": False,
|
||||
},
|
||||
]
|
||||
|
|
@ -976,7 +971,6 @@ def test_escalation_policy_filter_by_slack_channel(
|
|||
"notify_schedule": None,
|
||||
"notify_to_group": None,
|
||||
"notify_to_team_members": None,
|
||||
"severity": None,
|
||||
"important": False,
|
||||
},
|
||||
]
|
||||
|
|
@ -1007,88 +1001,3 @@ def test_escalation_policy_escalation_options_webhooks(
|
|||
returned_options = [option["value"] for option in response.json()]
|
||||
|
||||
assert EscalationPolicy.STEP_TRIGGER_CUSTOM_WEBHOOK in returned_options
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_escalation_policy_severity_options(
|
||||
make_organization_and_user_with_plugin_token,
|
||||
make_user_auth_headers,
|
||||
):
|
||||
organization, user, token = make_organization_and_user_with_plugin_token()
|
||||
organization.is_grafana_labels_enabled = False
|
||||
organization.save()
|
||||
|
||||
client = APIClient()
|
||||
url = reverse("api-internal:escalation_policy-severity-options")
|
||||
|
||||
# without labels enabled
|
||||
available_severities = [
|
||||
{"severityID": "abc", "orgID": "1", "displayLabel": "Pending", "level": -1},
|
||||
{"severityID": "def", "orgID": "1", "displayLabel": "Critical", "level": 1},
|
||||
]
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_severities") as mock_get_severities:
|
||||
mock_get_severities.return_value = available_severities, None
|
||||
response = client.get(url, format="json", **make_user_auth_headers(user, token))
|
||||
|
||||
expected_options = [{"value": s["displayLabel"], "display_name": s["displayLabel"]} for s in available_severities]
|
||||
assert response.json() == expected_options
|
||||
|
||||
# failing request does not break; fallback to default option only
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_severities") as mock_get_severities:
|
||||
mock_get_severities.side_effect = IncidentAPIException(status=404, url="some-url")
|
||||
response = client.get(url, format="json", **make_user_auth_headers(user, token))
|
||||
|
||||
fallback_options = [{"value": DEFAULT_INCIDENT_SEVERITY, "display_name": DEFAULT_INCIDENT_SEVERITY}]
|
||||
assert response.json() == fallback_options
|
||||
|
||||
# labels enabled
|
||||
organization.is_grafana_labels_enabled = True
|
||||
organization.save()
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_severities") as mock_get_severities:
|
||||
mock_get_severities.return_value = available_severities, None
|
||||
response = client.get(url, format="json", **make_user_auth_headers(user, token))
|
||||
# include set from label option
|
||||
expected_options = [
|
||||
{
|
||||
"value": EscalationPolicy.SEVERITY_SET_FROM_LABEL,
|
||||
"display_name": EscalationPolicy.SEVERITY_SET_FROM_LABEL_DISPLAY_VALUE,
|
||||
}
|
||||
] + expected_options
|
||||
assert response.json() == expected_options
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_create_escalation_policy_declare_incident(
|
||||
escalation_policy_internal_api_setup, make_user_auth_headers, settings
|
||||
):
|
||||
token, escalation_chain, _, user, _ = escalation_policy_internal_api_setup
|
||||
organization = escalation_chain.organization
|
||||
client = APIClient()
|
||||
url = reverse("api-internal:escalation_policy-list")
|
||||
|
||||
data = {
|
||||
"step": EscalationPolicy.STEP_DECLARE_INCIDENT,
|
||||
"severity": "critical",
|
||||
"escalation_chain": escalation_chain.public_primary_key,
|
||||
}
|
||||
|
||||
response = client.post(url, data, format="json", **make_user_auth_headers(user, token))
|
||||
assert response.status_code == status.HTTP_400_BAD_REQUEST
|
||||
|
||||
# make sure declare incident step is enabled
|
||||
settings.FEATURE_DECLARE_INCIDENT_STEP_ENABLED = True
|
||||
organization.is_grafana_incident_enabled = True
|
||||
organization.save()
|
||||
|
||||
response = client.post(url, data, format="json", **make_user_auth_headers(user, token))
|
||||
assert response.status_code == status.HTTP_201_CREATED
|
||||
escalation_policy = EscalationPolicy.objects.get(public_primary_key=response.data["id"])
|
||||
assert escalation_policy.step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert escalation_policy.severity == "critical"
|
||||
|
||||
url = reverse("api-internal:escalation_policy-detail", kwargs={"pk": escalation_policy.public_primary_key})
|
||||
response = client.get(url, format="json", **make_user_auth_headers(user, token))
|
||||
response_data = response.json()
|
||||
assert response_data["step"] == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert response_data["severity"] == "critical"
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ from apps.alerts.constants import ActionSource
|
|||
from apps.alerts.models import AlertGroup, AlertReceiveChannel, EscalationChain, ResolutionNote
|
||||
from apps.alerts.paging import unpage_user
|
||||
from apps.alerts.tasks import delete_alert_group, send_update_resolution_note_signal
|
||||
from apps.alerts.utils import is_declare_incident_step_enabled
|
||||
from apps.api.errors import AlertGroupAPIError
|
||||
from apps.api.label_filtering import parse_label_query
|
||||
from apps.api.permissions import RBACPermission
|
||||
|
|
@ -121,7 +120,6 @@ class AlertGroupFilter(DateRangeFilterMixin, ModelFieldFilterMixin, filters.Filt
|
|||
)
|
||||
with_resolution_note = filters.BooleanFilter(method="filter_with_resolution_note")
|
||||
mine = filters.BooleanFilter(method="filter_mine")
|
||||
has_related_incident = filters.BooleanFilter(field_name="declared_incident", lookup_expr="isnull", exclude=True)
|
||||
|
||||
def filter_status(self, queryset, name, value):
|
||||
if not value:
|
||||
|
|
@ -721,7 +719,6 @@ class AlertGroupView(
|
|||
"""
|
||||
Retrieve a list of valid filter options that can be used to filter alert groups
|
||||
"""
|
||||
organization = self.request.auth.organization
|
||||
api_root = "/api/internal/v1/"
|
||||
default_day_range = 30
|
||||
|
||||
|
|
@ -807,7 +804,7 @@ class AlertGroupView(
|
|||
|
||||
filter_options = [{"name": "search", "type": "search", "description": description}] + filter_options
|
||||
|
||||
if is_labels_feature_enabled(organization):
|
||||
if is_labels_feature_enabled(self.request.auth.organization):
|
||||
filter_options.append(
|
||||
{
|
||||
"name": "label",
|
||||
|
|
@ -816,15 +813,6 @@ class AlertGroupView(
|
|||
}
|
||||
)
|
||||
|
||||
if is_declare_incident_step_enabled(organization):
|
||||
filter_options.append(
|
||||
{
|
||||
"name": "has_related_incident",
|
||||
"type": "boolean",
|
||||
"default": "true",
|
||||
}
|
||||
)
|
||||
|
||||
return Response(filter_options)
|
||||
|
||||
@extend_schema(
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
import logging
|
||||
|
||||
from django.conf import settings
|
||||
from django.db.models import Q
|
||||
from rest_framework.decorators import action
|
||||
|
|
@ -7,7 +5,6 @@ from rest_framework.permissions import IsAuthenticated
|
|||
from rest_framework.response import Response
|
||||
|
||||
from apps.alerts.models import EscalationPolicy
|
||||
from apps.alerts.utils import is_declare_incident_step_enabled
|
||||
from apps.api.permissions import RBACPermission
|
||||
from apps.api.serializers.escalation_policy import (
|
||||
EscalationPolicyCreateSerializer,
|
||||
|
|
@ -22,12 +19,9 @@ from common.api_helpers.mixins import (
|
|||
TeamFilteringMixin,
|
||||
UpdateSerializerMixin,
|
||||
)
|
||||
from common.incident_api.client import DEFAULT_INCIDENT_SEVERITY, IncidentAPIClient, IncidentAPIException
|
||||
from common.insight_log import EntityEvent, write_resource_insight_log
|
||||
from common.ordered_model.viewset import OrderedModelViewSet
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EscalationPolicyView(
|
||||
TeamFilteringMixin,
|
||||
|
|
@ -48,7 +42,6 @@ class EscalationPolicyView(
|
|||
"escalation_options": [RBACPermission.Permissions.ESCALATION_CHAINS_READ],
|
||||
"delay_options": [RBACPermission.Permissions.ESCALATION_CHAINS_READ],
|
||||
"num_minutes_in_window_options": [RBACPermission.Permissions.ESCALATION_CHAINS_READ],
|
||||
"severity_options": [RBACPermission.Permissions.ESCALATION_CHAINS_READ],
|
||||
"create": [RBACPermission.Permissions.ESCALATION_CHAINS_WRITE],
|
||||
"update": [RBACPermission.Permissions.ESCALATION_CHAINS_WRITE],
|
||||
"partial_update": [RBACPermission.Permissions.ESCALATION_CHAINS_WRITE],
|
||||
|
|
@ -123,7 +116,6 @@ class EscalationPolicyView(
|
|||
|
||||
@action(detail=False, methods=["get"])
|
||||
def escalation_options(self, request):
|
||||
grafana_declare_incident_enabled = is_declare_incident_step_enabled(organization=self.request.auth.organization)
|
||||
choices = []
|
||||
for step in EscalationPolicy.INTERNAL_API_STEPS:
|
||||
verbal = EscalationPolicy.INTERNAL_API_STEPS_TO_VERBAL_MAP[step]
|
||||
|
|
@ -133,8 +125,10 @@ class EscalationPolicyView(
|
|||
slack_integration_required = step in EscalationPolicy.SLACK_INTEGRATION_REQUIRED_STEPS
|
||||
if slack_integration_required and not settings.FEATURE_SLACK_INTEGRATION_ENABLED:
|
||||
continue
|
||||
if step == EscalationPolicy.STEP_DECLARE_INCIDENT and not grafana_declare_incident_enabled:
|
||||
|
||||
if step == EscalationPolicy.STEP_DECLARE_INCIDENT:
|
||||
continue
|
||||
|
||||
choices.append(
|
||||
{
|
||||
"value": step,
|
||||
|
|
@ -161,25 +155,3 @@ class EscalationPolicyView(
|
|||
{"value": choice[0], "display_name": choice[1]} for choice in EscalationPolicy.WEB_DURATION_CHOICES_MINUTES
|
||||
]
|
||||
return Response(choices)
|
||||
|
||||
@action(detail=False, methods=["get"])
|
||||
def severity_options(self, request):
|
||||
organization = self.request.auth.organization
|
||||
choices = []
|
||||
if organization.is_grafana_labels_enabled:
|
||||
choices = [
|
||||
{
|
||||
"value": EscalationPolicy.SEVERITY_SET_FROM_LABEL,
|
||||
"display_name": EscalationPolicy.SEVERITY_SET_FROM_LABEL_DISPLAY_VALUE,
|
||||
}
|
||||
]
|
||||
incident_client = IncidentAPIClient(organization.grafana_url, organization.api_token)
|
||||
try:
|
||||
severities, _ = incident_client.get_severities()
|
||||
choices += [
|
||||
{"value": severity["displayLabel"], "display_name": severity["displayLabel"]} for severity in severities
|
||||
]
|
||||
except IncidentAPIException as e:
|
||||
logger.error(f"Error getting severities: {e.msg}")
|
||||
choices += [{"value": DEFAULT_INCIDENT_SEVERITY, "display_name": DEFAULT_INCIDENT_SEVERITY}]
|
||||
return Response(choices)
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ from django.utils.functional import cached_property
|
|||
from rest_framework import fields, serializers
|
||||
|
||||
from apps.alerts.models import EscalationChain, EscalationPolicy
|
||||
from apps.alerts.utils import is_declare_incident_step_enabled
|
||||
from apps.schedules.models import OnCallSchedule
|
||||
from apps.slack.models import SlackUserGroup
|
||||
from apps.user_management.models import Team, User
|
||||
|
|
@ -73,7 +72,6 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
|
|||
required=False,
|
||||
source="custom_webhook",
|
||||
)
|
||||
severity = serializers.CharField(required=False)
|
||||
important = serializers.BooleanField(required=False)
|
||||
|
||||
TIME_FORMAT = "%H:%M:%SZ"
|
||||
|
|
@ -103,7 +101,6 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
|
|||
"notify_if_time_to",
|
||||
"num_alerts_in_window",
|
||||
"num_minutes_in_window",
|
||||
"severity",
|
||||
]
|
||||
|
||||
PREFETCH_RELATED = ["notify_to_users_queue"]
|
||||
|
|
@ -123,9 +120,6 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
|
|||
if step_type == EscalationPolicy.STEP_FINAL_NOTIFYALL and organization.slack_team_identity is None:
|
||||
raise BadRequest(detail="Invalid escalation step type: step is Slack-specific")
|
||||
|
||||
if step_type == EscalationPolicy.STEP_DECLARE_INCIDENT and not is_declare_incident_step_enabled(organization):
|
||||
raise BadRequest("Invalid escalation step type: step is not enabled")
|
||||
|
||||
return step_type
|
||||
|
||||
def create(self, validated_data):
|
||||
|
|
@ -169,7 +163,6 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
|
|||
"notify_if_time_to",
|
||||
"num_alerts_in_window",
|
||||
"num_minutes_in_window",
|
||||
"severity",
|
||||
]
|
||||
if step == EscalationPolicy.STEP_WAIT:
|
||||
fields_to_remove.remove("duration")
|
||||
|
|
@ -197,8 +190,6 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
|
|||
elif step == EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW:
|
||||
fields_to_remove.remove("num_alerts_in_window")
|
||||
fields_to_remove.remove("num_minutes_in_window")
|
||||
elif step == EscalationPolicy.STEP_DECLARE_INCIDENT:
|
||||
fields_to_remove.remove("severity")
|
||||
|
||||
if (
|
||||
step in EscalationPolicy.DEFAULT_TO_IMPORTANT_STEP_MAPPING
|
||||
|
|
@ -222,7 +213,6 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
|
|||
"to_time",
|
||||
"num_alerts_in_window",
|
||||
"num_minutes_in_window",
|
||||
"severity",
|
||||
]
|
||||
step = validated_data.get("step")
|
||||
important = validated_data.pop("important", None)
|
||||
|
|
@ -253,8 +243,6 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
|
|||
elif step == EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW:
|
||||
validated_data_fields_to_remove.remove("num_alerts_in_window")
|
||||
validated_data_fields_to_remove.remove("num_minutes_in_window")
|
||||
elif step == EscalationPolicy.STEP_DECLARE_INCIDENT:
|
||||
validated_data_fields_to_remove.remove("severity")
|
||||
|
||||
for field in validated_data_fields_to_remove:
|
||||
validated_data.pop(field, None)
|
||||
|
|
@ -311,7 +299,5 @@ class EscalationPolicyUpdateSerializer(EscalationPolicySerializer):
|
|||
if step != EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW:
|
||||
instance.num_alerts_in_window = None
|
||||
instance.num_minutes_in_window = None
|
||||
if step != EscalationPolicy.STEP_DECLARE_INCIDENT:
|
||||
instance.severity = None
|
||||
|
||||
return super().update(instance, validated_data)
|
||||
|
|
|
|||
|
|
@ -463,43 +463,3 @@ def test_update_escalation_policy_using_notify_team_members(
|
|||
escalation_policy = EscalationPolicy.objects.get(public_primary_key=response.data["id"])
|
||||
serializer = EscalationPolicySerializer(escalation_policy)
|
||||
assert response.data == serializer.data
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_create_escalation_policy_declare_incident(
|
||||
make_organization_and_user_with_token,
|
||||
escalation_policies_setup,
|
||||
settings,
|
||||
):
|
||||
organization, user, token = make_organization_and_user_with_token()
|
||||
escalation_chain, _, _ = escalation_policies_setup(organization, user)
|
||||
|
||||
data_for_create = {
|
||||
"escalation_chain_id": escalation_chain.public_primary_key,
|
||||
"type": "declare_incident",
|
||||
"position": 0,
|
||||
"severity": "critical",
|
||||
}
|
||||
|
||||
client = APIClient()
|
||||
url = reverse("api-public:escalation_policies-list")
|
||||
response = client.post(url, data=data_for_create, format="json", HTTP_AUTHORIZATION=token)
|
||||
assert response.status_code == status.HTTP_400_BAD_REQUEST
|
||||
|
||||
# make sure declare incident step is enabled
|
||||
settings.FEATURE_DECLARE_INCIDENT_STEP_ENABLED = True
|
||||
organization.is_grafana_incident_enabled = True
|
||||
organization.save()
|
||||
|
||||
response = client.post(url, data=data_for_create, format="json", HTTP_AUTHORIZATION=token)
|
||||
assert response.status_code == status.HTTP_201_CREATED
|
||||
|
||||
escalation_policy = EscalationPolicy.objects.get(public_primary_key=response.data["id"])
|
||||
assert escalation_policy.step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert escalation_policy.severity == "critical"
|
||||
|
||||
url = reverse("api-public:escalation_policies-detail", kwargs={"pk": escalation_policy.public_primary_key})
|
||||
response = client.get(url, format="json", HTTP_AUTHORIZATION=token)
|
||||
response_data = response.json()
|
||||
assert response_data["type"] == EscalationPolicy.PUBLIC_STEP_CHOICES_MAP[EscalationPolicy.STEP_DECLARE_INCIDENT]
|
||||
assert response_data["severity"] == "critical"
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@ from apps.alerts.tests.factories import (
|
|||
AlertReceiveChannelFactory,
|
||||
ChannelFilterFactory,
|
||||
CustomActionFactory,
|
||||
DeclaredIncidentFactory,
|
||||
EscalationChainFactory,
|
||||
EscalationPolicyFactory,
|
||||
InvitationFactory,
|
||||
|
|
@ -1113,13 +1112,3 @@ def make_user_notification_bundle():
|
|||
)
|
||||
|
||||
return _make_user_notification_bundle
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def make_declared_incident():
|
||||
def _make_declared_incident(incident_id, organization, channel_filter):
|
||||
return DeclaredIncidentFactory(
|
||||
incident_id=incident_id, organization=organization, channel_filter=channel_filter
|
||||
)
|
||||
|
||||
return _make_declared_incident
|
||||
|
|
|
|||
|
|
@ -75,7 +75,6 @@ FEATURE_LABELS_ENABLED_PER_ORG = getenv_list("FEATURE_LABELS_ENABLED_PER_ORG", d
|
|||
FEATURE_ALERT_GROUP_SEARCH_ENABLED = getenv_boolean("FEATURE_ALERT_GROUP_SEARCH_ENABLED", default=True)
|
||||
FEATURE_ALERT_GROUP_SEARCH_CUTOFF_DAYS = getenv_integer("FEATURE_ALERT_GROUP_SEARCH_CUTOFF_DAYS", default=None)
|
||||
FEATURE_NOTIFICATION_BUNDLE_ENABLED = getenv_boolean("FEATURE_NOTIFICATION_BUNDLE_ENABLED", default=True)
|
||||
FEATURE_DECLARE_INCIDENT_STEP_ENABLED = getenv_boolean("FEATURE_DECLARE_INCIDENT_STEP_ENABLED", default=False)
|
||||
|
||||
TWILIO_API_KEY_SID = os.environ.get("TWILIO_API_KEY_SID")
|
||||
TWILIO_API_KEY_SECRET = os.environ.get("TWILIO_API_KEY_SECRET")
|
||||
|
|
|
|||
|
|
@ -94,7 +94,6 @@ CELERY_TASK_ROUTES = {
|
|||
# CRITICAL
|
||||
"apps.alerts.tasks.acknowledge_reminder.acknowledge_reminder_task": {"queue": "critical"},
|
||||
"apps.alerts.tasks.acknowledge_reminder.unacknowledge_timeout_task": {"queue": "critical"},
|
||||
"apps.alerts.tasks.declare_incident.declare_incident": {"queue": "critical"},
|
||||
"apps.alerts.tasks.distribute_alert.send_alert_create_signal": {"queue": "critical"},
|
||||
"apps.alerts.tasks.escalate_alert_group.escalate_alert_group": {"queue": "critical"},
|
||||
"apps.alerts.tasks.invite_user_to_join_incident.invite_user_to_join_incident": {"queue": "critical"},
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue