commit
250f2c5117
27 changed files with 1000 additions and 26 deletions
15
.github/workflows/add-to-docs-project.yml
vendored
15
.github/workflows/add-to-docs-project.yml
vendored
|
|
@ -1,15 +0,0 @@
|
|||
name: Add to docs project
|
||||
on:
|
||||
issues:
|
||||
types: [labeled]
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
jobs:
|
||||
main:
|
||||
if: ${{ github.event.label.name == 'type/docs' }}
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: grafana/writers-toolkit/add-to-docs-project@add-to-docs-project/v1
|
||||
|
|
@ -12,11 +12,13 @@ from apps.alerts.models.alert_group_log_record import AlertGroupLogRecord
|
|||
from apps.alerts.models.escalation_policy import EscalationPolicy
|
||||
from apps.alerts.tasks import (
|
||||
custom_webhook_result,
|
||||
declare_incident,
|
||||
notify_all_task,
|
||||
notify_group_task,
|
||||
notify_user_task,
|
||||
resolve_by_last_step_task,
|
||||
)
|
||||
from apps.alerts.utils import is_declare_incident_step_enabled
|
||||
from apps.schedules.ical_utils import list_users_to_notify_from_ical
|
||||
from apps.user_management.models import User
|
||||
|
||||
|
|
@ -136,6 +138,7 @@ class EscalationPolicySnapshot:
|
|||
EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: self._escalation_step_notify_if_num_alerts_in_time_window,
|
||||
EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS: self._escalation_step_notify_multiple_users,
|
||||
EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT: self._escalation_step_notify_multiple_users,
|
||||
EscalationPolicy.STEP_DECLARE_INCIDENT: self._escalation_step_declare_incident,
|
||||
None: self._escalation_step_not_configured,
|
||||
}
|
||||
result = action_map[self.step](alert_group, reason)
|
||||
|
|
@ -410,6 +413,32 @@ class EscalationPolicySnapshot:
|
|||
|
||||
self._execute_tasks(tasks)
|
||||
|
||||
def _escalation_step_declare_incident(self, alert_group: "AlertGroup", _reason: str) -> None:
|
||||
grafana_declare_incident_enabled = is_declare_incident_step_enabled(
|
||||
organization=alert_group.channel.organization
|
||||
)
|
||||
if not grafana_declare_incident_enabled:
|
||||
AlertGroupLogRecord(
|
||||
type=AlertGroupLogRecord.TYPE_ESCALATION_FAILED,
|
||||
alert_group=alert_group,
|
||||
reason="Declare Incident step is not enabled",
|
||||
escalation_policy=self.escalation_policy,
|
||||
escalation_error_code=AlertGroupLogRecord.ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED,
|
||||
escalation_policy_step=self.step,
|
||||
).save()
|
||||
return
|
||||
tasks = []
|
||||
declare_incident_task = declare_incident.signature(
|
||||
args=(alert_group.pk,),
|
||||
kwargs={
|
||||
"escalation_policy_pk": self.id,
|
||||
"severity": self.severity,
|
||||
},
|
||||
immutable=True,
|
||||
)
|
||||
tasks.append(declare_incident_task)
|
||||
self._execute_tasks(tasks)
|
||||
|
||||
def _escalation_step_notify_if_time(self, alert_group: "AlertGroup", _reason: str) -> StepExecutionResultData:
|
||||
eta = None
|
||||
|
||||
|
|
|
|||
30
engine/apps/alerts/migrations/0060_relatedincident.py
Normal file
30
engine/apps/alerts/migrations/0060_relatedincident.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
# Generated by Django 4.2.15 on 2024-10-04 16:38
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('user_management', '0022_alter_team_unique_together'),
|
||||
('alerts', '0059_escalationpolicy_severity_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='RelatedIncident',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('incident_id', models.CharField(db_index=True, max_length=50)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
('is_active', models.BooleanField(default=True)),
|
||||
('attached_alert_groups', models.ManyToManyField(related_name='related_incidents', to='alerts.alertgroup')),
|
||||
('channel_filter', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='related_incidents', to='alerts.channelfilter')),
|
||||
('organization', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related_incidents', to='user_management.organization')),
|
||||
],
|
||||
options={
|
||||
'unique_together': {('organization', 'incident_id')},
|
||||
},
|
||||
),
|
||||
]
|
||||
|
|
@ -13,6 +13,7 @@ from .escalation_policy import EscalationPolicy # noqa: F401
|
|||
from .grafana_alerting_contact_point import GrafanaAlertingContactPoint # noqa: F401
|
||||
from .invitation import Invitation # noqa: F401
|
||||
from .maintainable_object import MaintainableObject # noqa: F401
|
||||
from .related_incident import RelatedIncident # noqa: F401
|
||||
from .resolution_note import ResolutionNote, ResolutionNoteSlackMessage # noqa: F401
|
||||
from .user_has_notification import UserHasNotification # noqa: F401
|
||||
from .user_notification_bundle import BundledNotification, UserNotificationBundle # noqa: F401
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ if typing.TYPE_CHECKING:
|
|||
AlertGroupLogRecord,
|
||||
AlertReceiveChannel,
|
||||
BundledNotification,
|
||||
RelatedIncident,
|
||||
ResolutionNote,
|
||||
ResolutionNoteSlackMessage,
|
||||
)
|
||||
|
|
@ -193,6 +194,7 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
|
|||
acknowledged_by_user: typing.Optional["User"]
|
||||
alerts: "RelatedManager['Alert']"
|
||||
bundled_notifications: "RelatedManager['BundledNotification']"
|
||||
related_incidents: "RelatedManager['RelatedIncident']"
|
||||
dependent_alert_groups: "RelatedManager['AlertGroup']"
|
||||
channel: "AlertReceiveChannel"
|
||||
log_records: "RelatedManager['AlertGroupLogRecord']"
|
||||
|
|
|
|||
|
|
@ -11,18 +11,24 @@ from rest_framework.fields import DateTimeField
|
|||
|
||||
from apps.alerts import tasks
|
||||
from apps.alerts.constants import ActionSource
|
||||
from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE
|
||||
from apps.alerts.utils import render_relative_timeline
|
||||
from apps.slack.slack_formatter import SlackFormatter
|
||||
from common.utils import clean_markup
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from apps.alerts.models import AlertGroup, CustomButton, EscalationPolicy, Invitation
|
||||
from apps.user_management.models import User
|
||||
from apps.user_management.models import Organization, User
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
class RelatedIncidentData(typing.TypedDict):
|
||||
incident_link: typing.Optional[str]
|
||||
incident_title: str
|
||||
|
||||
|
||||
class AlertGroupLogRecord(models.Model):
|
||||
alert_group: "AlertGroup"
|
||||
author: typing.Optional["User"]
|
||||
|
|
@ -161,7 +167,9 @@ class AlertGroupLogRecord(models.Model):
|
|||
ERROR_ESCALATION_TRIGGER_CUSTOM_WEBHOOK_ERROR,
|
||||
ERROR_ESCALATION_NOTIFY_TEAM_MEMBERS_STEP_IS_NOT_CONFIGURED,
|
||||
ERROR_ESCALATION_TRIGGER_WEBHOOK_IS_DISABLED,
|
||||
) = range(20)
|
||||
ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED,
|
||||
ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED,
|
||||
) = range(22)
|
||||
|
||||
type = models.IntegerField(choices=TYPE_CHOICES)
|
||||
|
||||
|
|
@ -225,7 +233,14 @@ class AlertGroupLogRecord(models.Model):
|
|||
escalation_policy_step = models.IntegerField(null=True, default=None)
|
||||
step_specific_info = JSONField(null=True, default=None)
|
||||
|
||||
STEP_SPECIFIC_INFO_KEYS = ["schedule_name", "custom_button_name", "usergroup_handle", "source_integration_name"]
|
||||
STEP_SPECIFIC_INFO_KEYS = [
|
||||
"schedule_name",
|
||||
"custom_button_name",
|
||||
"usergroup_handle",
|
||||
"source_integration_name",
|
||||
"incident_id",
|
||||
"incident_title",
|
||||
]
|
||||
|
||||
def _make_log_line_link(self, url, title, html=False, for_slack=False, substitute_with_tag=False):
|
||||
if html and url:
|
||||
|
|
@ -244,6 +259,7 @@ class AlertGroupLogRecord(models.Model):
|
|||
author = self.author.short(organization) if self.author is not None else None
|
||||
escalation_chain = self.alert_group.channel_filter.escalation_chain if self.alert_group.channel_filter else None
|
||||
step_info = self.get_step_specific_info()
|
||||
related_incident = self.render_incident_data_from_step_info(organization, step_info)
|
||||
escalation_chain_data = (
|
||||
{
|
||||
"pk": escalation_chain.public_primary_key,
|
||||
|
|
@ -280,6 +296,7 @@ class AlertGroupLogRecord(models.Model):
|
|||
"type": self.type,
|
||||
"created_at": created_at,
|
||||
"author": author,
|
||||
"incident": related_incident,
|
||||
"escalation_chain": escalation_chain_data,
|
||||
"schedule": schedule,
|
||||
"webhook": webhook,
|
||||
|
|
@ -425,6 +442,14 @@ class AlertGroupLogRecord(models.Model):
|
|||
result += f'triggered step "Notify on-call from Schedule {schedule_text}{important_text}"'
|
||||
elif escalation_policy_step == EscalationPolicy.STEP_REPEAT_ESCALATION_N_TIMES:
|
||||
result += "escalation started from the beginning"
|
||||
elif escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT:
|
||||
organization = self.alert_group.channel.organization
|
||||
incident_data = self.render_incident_data_from_step_info(organization, step_specific_info)
|
||||
incident_link = incident_data["incident_link"]
|
||||
incident_title = incident_data["incident_title"]
|
||||
tag = "related_incident" if substitute_with_tag else False
|
||||
incident_text = self._make_log_line_link(incident_link, incident_title, html, for_slack, tag)
|
||||
result += self.reason + f": {incident_text}"
|
||||
else:
|
||||
result += f'triggered step "{EscalationPolicy.get_step_display_name(escalation_policy_step)}"'
|
||||
elif self.type == AlertGroupLogRecord.TYPE_SILENCE:
|
||||
|
|
@ -640,8 +665,32 @@ class AlertGroupLogRecord(models.Model):
|
|||
result += f"failed to notify User Group{usergroup_handle_text} in Slack"
|
||||
elif self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_TRIGGER_WEBHOOK_IS_DISABLED:
|
||||
result += 'skipped escalation step "Trigger Outgoing Webhook" because it is disabled'
|
||||
elif (
|
||||
self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED
|
||||
):
|
||||
result += 'skipped escalation step "Declare Incident": step is not enabled'
|
||||
elif self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED:
|
||||
result += "failed to declare an Incident"
|
||||
if self.reason:
|
||||
result += f": {self.reason}"
|
||||
return result
|
||||
|
||||
def render_incident_data_from_step_info(
|
||||
self, organization: "Organization", step_specific_info: dict
|
||||
) -> RelatedIncidentData | None:
|
||||
from apps.alerts.models.related_incident import get_incident_url
|
||||
|
||||
if not step_specific_info or not all(key in step_specific_info for key in ["incident_title", "incident_id"]):
|
||||
return None
|
||||
|
||||
incident_link = (
|
||||
get_incident_url(organization, step_specific_info["incident_id"])
|
||||
if step_specific_info["incident_id"]
|
||||
else None
|
||||
)
|
||||
incident_title = step_specific_info["incident_title"] or DEFAULT_BACKUP_TITLE
|
||||
return {"incident_link": incident_link, "incident_title": incident_title}
|
||||
|
||||
def get_step_specific_info(self):
|
||||
step_specific_info = None
|
||||
# in some cases step_specific_info was saved with using json.dumps
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@ class EscalationPolicy(OrderedModel):
|
|||
STEP_NOTIFY_IF_TIME,
|
||||
STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW,
|
||||
STEP_REPEAT_ESCALATION_N_TIMES,
|
||||
STEP_DECLARE_INCIDENT,
|
||||
]
|
||||
# Steps can be stored in db while interacting with internal api
|
||||
# Includes important versions of default steps
|
||||
|
|
@ -218,6 +219,7 @@ class EscalationPolicy(OrderedModel):
|
|||
STEP_NOTIFY_IF_TIME,
|
||||
STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW,
|
||||
STEP_REPEAT_ESCALATION_N_TIMES,
|
||||
STEP_DECLARE_INCIDENT,
|
||||
]
|
||||
|
||||
PUBLIC_STEP_CHOICES_MAP = {
|
||||
|
|
@ -239,6 +241,7 @@ class EscalationPolicy(OrderedModel):
|
|||
STEP_NOTIFY_IF_TIME: "notify_if_time_from_to",
|
||||
STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: "notify_if_num_alerts_in_window",
|
||||
STEP_REPEAT_ESCALATION_N_TIMES: "repeat_escalation",
|
||||
STEP_DECLARE_INCIDENT: "declare_incident",
|
||||
}
|
||||
|
||||
public_primary_key = models.CharField(
|
||||
|
|
|
|||
48
engine/apps/alerts/models/related_incident.py
Normal file
48
engine/apps/alerts/models/related_incident.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
import typing
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from django.db import models
|
||||
|
||||
from common.constants.plugin_ids import PluginID
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from django.db.models.manager import RelatedManager
|
||||
|
||||
from apps.alerts.models import AlertGroup, ChannelFilter
|
||||
from apps.user_management.models import Organization
|
||||
|
||||
|
||||
def get_incident_url(organization, incident_id) -> str:
|
||||
return urljoin(organization.grafana_url, f"a/{PluginID.INCIDENT}/incidents/{incident_id}")
|
||||
|
||||
|
||||
class RelatedIncident(models.Model):
|
||||
attached_alert_groups: "RelatedManager['AlertGroup']"
|
||||
channel_filter: typing.Optional["ChannelFilter"]
|
||||
organization: "Organization"
|
||||
|
||||
incident_id = models.CharField(db_index=True, max_length=50)
|
||||
organization = models.ForeignKey(
|
||||
"user_management.Organization",
|
||||
on_delete=models.CASCADE,
|
||||
related_name="related_incidents",
|
||||
)
|
||||
channel_filter = models.ForeignKey(
|
||||
"alerts.ChannelFilter",
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
related_name="related_incidents",
|
||||
)
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
is_active = models.BooleanField(default=True)
|
||||
|
||||
attached_alert_groups = models.ManyToManyField(
|
||||
"alerts.AlertGroup",
|
||||
related_name="related_incidents",
|
||||
)
|
||||
|
||||
class Meta:
|
||||
unique_together = ("organization", "incident_id")
|
||||
|
||||
def get_incident_link(self) -> str:
|
||||
return get_incident_url(self.organization, self.incident_id)
|
||||
|
|
@ -5,6 +5,7 @@ from .alert_group_web_title_cache import ( # noqa:F401
|
|||
)
|
||||
from .check_escalation_finished import check_escalation_finished_task # noqa: F401
|
||||
from .custom_webhook_result import custom_webhook_result # noqa: F401
|
||||
from .declare_incident import declare_incident # noqa: F401
|
||||
from .delete_alert_group import delete_alert_group # noqa: F401
|
||||
from .delete_alert_group import finish_delete_alert_group # noqa: F401
|
||||
from .delete_alert_group import send_alert_group_signal_for_delete # noqa: F401
|
||||
|
|
|
|||
148
engine/apps/alerts/tasks/declare_incident.py
Normal file
148
engine/apps/alerts/tasks/declare_incident.py
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
import logging
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE
|
||||
from common.custom_celery_tasks import shared_dedicated_queue_retry_task
|
||||
from common.incident_api.client import (
|
||||
DEFAULT_INCIDENT_SEVERITY,
|
||||
DEFAULT_INCIDENT_STATUS,
|
||||
IncidentAPIClient,
|
||||
IncidentAPIException,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ATTACHMENT_CAPTION = "OnCall Alert Group"
|
||||
ERROR_SEVERITY_NOT_FOUND = "Severity.FindOne: not found"
|
||||
MAX_RETRIES = 1 if settings.DEBUG else 10
|
||||
MAX_ATTACHED_ALERT_GROUPS_PER_INCIDENT = 5
|
||||
|
||||
|
||||
def _attach_alert_group_to_incident(alert_group, incident_id, incident_title, escalation_policy, attached=False):
|
||||
from apps.alerts.models import AlertGroupLogRecord, EscalationPolicy, RelatedIncident
|
||||
|
||||
declared_incident, _ = RelatedIncident.objects.get_or_create(
|
||||
incident_id=incident_id,
|
||||
organization=alert_group.channel.organization,
|
||||
defaults={
|
||||
"channel_filter": alert_group.channel_filter,
|
||||
},
|
||||
)
|
||||
declared_incident.attached_alert_groups.add(alert_group)
|
||||
reason = "attached to existing incident" if attached else "incident declared"
|
||||
AlertGroupLogRecord.objects.create(
|
||||
type=AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED,
|
||||
reason=reason,
|
||||
alert_group=alert_group,
|
||||
step_specific_info={"incident_id": incident_id, "incident_title": incident_title},
|
||||
escalation_policy=escalation_policy,
|
||||
escalation_policy_step=EscalationPolicy.STEP_DECLARE_INCIDENT,
|
||||
)
|
||||
|
||||
|
||||
def _create_error_log_record(alert_group, escalation_policy, reason=""):
|
||||
from apps.alerts.models import AlertGroupLogRecord, EscalationPolicy
|
||||
|
||||
AlertGroupLogRecord.objects.create(
|
||||
type=AlertGroupLogRecord.TYPE_ESCALATION_FAILED,
|
||||
escalation_error_code=AlertGroupLogRecord.ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED,
|
||||
reason=reason,
|
||||
alert_group=alert_group,
|
||||
escalation_policy=escalation_policy,
|
||||
escalation_policy_step=EscalationPolicy.STEP_DECLARE_INCIDENT,
|
||||
)
|
||||
|
||||
|
||||
@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True, max_retries=MAX_RETRIES)
|
||||
def declare_incident(alert_group_pk, escalation_policy_pk, severity=None):
|
||||
from apps.alerts.models import AlertGroup, EscalationPolicy, RelatedIncident
|
||||
|
||||
alert_group = AlertGroup.objects.get(pk=alert_group_pk)
|
||||
organization = alert_group.channel.organization
|
||||
escalation_policy = None
|
||||
if escalation_policy_pk:
|
||||
escalation_policy = EscalationPolicy.objects.filter(pk=escalation_policy_pk).first()
|
||||
|
||||
if alert_group.channel_filter.is_default:
|
||||
_create_error_log_record(
|
||||
alert_group, escalation_policy, reason="Declare incident step is not enabled for default routes"
|
||||
)
|
||||
return
|
||||
|
||||
if declare_incident.request.retries == MAX_RETRIES:
|
||||
_create_error_log_record(alert_group, escalation_policy)
|
||||
return
|
||||
|
||||
incident_client = IncidentAPIClient(organization.grafana_url, organization.api_token)
|
||||
|
||||
# check for currently active related incident in the same route (channel_filter)
|
||||
existing_incident = (
|
||||
RelatedIncident.objects.filter(
|
||||
organization=organization, channel_filter=alert_group.channel_filter, is_active=True
|
||||
)
|
||||
.order_by("-created_at")
|
||||
.first()
|
||||
)
|
||||
|
||||
if existing_incident:
|
||||
incident_id = existing_incident.incident_id
|
||||
try:
|
||||
# get existing incident details
|
||||
incident_data, _ = incident_client.get_incident(incident_id)
|
||||
except IncidentAPIException as e:
|
||||
logger.error(f"Error getting incident details: {e.msg}")
|
||||
if e.status == 404:
|
||||
# incident not found, mark as not opened
|
||||
existing_incident.is_active = False
|
||||
existing_incident.save(update_fields=["is_active"])
|
||||
else:
|
||||
# raise (and retry)
|
||||
raise
|
||||
else:
|
||||
# incident exists, check if it is still active
|
||||
if incident_data["status"] == DEFAULT_INCIDENT_STATUS:
|
||||
# attach to incident context
|
||||
incident_title = incident_data["title"]
|
||||
num_attached = existing_incident.attached_alert_groups.count()
|
||||
if num_attached < MAX_ATTACHED_ALERT_GROUPS_PER_INCIDENT:
|
||||
try:
|
||||
incident_data, _ = incident_client.add_activity(incident_id, alert_group.web_link)
|
||||
except IncidentAPIException as e:
|
||||
logger.error(f"Error attaching to existing incident: {e.msg}")
|
||||
# setup association between alert group and incident (even if not attached)
|
||||
_attach_alert_group_to_incident(
|
||||
alert_group, incident_id, incident_title, escalation_policy, attached=True
|
||||
)
|
||||
else:
|
||||
existing_incident.is_active = False
|
||||
existing_incident.save(update_fields=["is_active"])
|
||||
|
||||
if existing_incident is None or not existing_incident.is_active:
|
||||
# create new incident
|
||||
if severity == EscalationPolicy.SEVERITY_SET_FROM_LABEL:
|
||||
severity_label = alert_group.labels.filter(key_name="severity").first()
|
||||
severity = severity_label.value_name if severity_label else None
|
||||
severity = severity or DEFAULT_INCIDENT_SEVERITY
|
||||
try:
|
||||
incident_data, _ = incident_client.create_incident(
|
||||
alert_group.web_title_cache if alert_group.web_title_cache else DEFAULT_BACKUP_TITLE,
|
||||
severity=severity,
|
||||
attachCaption=ATTACHMENT_CAPTION,
|
||||
attachURL=alert_group.web_link,
|
||||
)
|
||||
except IncidentAPIException as e:
|
||||
logger.error(f"Error creating new incident: {e.msg}")
|
||||
if ERROR_SEVERITY_NOT_FOUND.lower() in e.msg.lower() and severity != DEFAULT_INCIDENT_SEVERITY:
|
||||
# invalid severity, retry with default severity
|
||||
declare_incident.apply_async(
|
||||
args=(alert_group_pk, escalation_policy_pk),
|
||||
kwargs={"severity": DEFAULT_INCIDENT_SEVERITY},
|
||||
)
|
||||
return
|
||||
# else raise (and retry)
|
||||
raise
|
||||
else:
|
||||
_attach_alert_group_to_incident(
|
||||
alert_group, incident_data["incidentID"], incident_data["title"], escalation_policy
|
||||
)
|
||||
|
|
@ -11,6 +11,7 @@ from apps.alerts.models import (
|
|||
EscalationChain,
|
||||
EscalationPolicy,
|
||||
Invitation,
|
||||
RelatedIncident,
|
||||
ResolutionNote,
|
||||
ResolutionNoteSlackMessage,
|
||||
UserNotificationBundle,
|
||||
|
|
@ -91,3 +92,8 @@ class InvitationFactory(factory.DjangoModelFactory):
|
|||
class UserNotificationBundleFactory(factory.DjangoModelFactory):
|
||||
class Meta:
|
||||
model = UserNotificationBundle
|
||||
|
||||
|
||||
class RelatedIncidentFactory(factory.DjangoModelFactory):
|
||||
class Meta:
|
||||
model = RelatedIncident
|
||||
|
|
|
|||
|
|
@ -690,3 +690,52 @@ def test_notify_team_members(
|
|||
(user_2.pk, alert_group.pk), expected_kwargs, immutable=True
|
||||
)
|
||||
assert mock_execute.signature.call_count == 2
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_escalation_step_declare_incident(
|
||||
escalation_step_test_setup,
|
||||
make_escalation_policy,
|
||||
):
|
||||
organization, _, _, channel_filter, alert_group, reason = escalation_step_test_setup
|
||||
|
||||
declare_incident_step = make_escalation_policy(
|
||||
escalation_chain=channel_filter.escalation_chain,
|
||||
escalation_policy_step=EscalationPolicy.STEP_DECLARE_INCIDENT,
|
||||
)
|
||||
escalation_policy_snapshot = get_escalation_policy_snapshot_from_model(declare_incident_step)
|
||||
expected_eta = timezone.now() + timezone.timedelta(seconds=NEXT_ESCALATION_DELAY)
|
||||
with patch.object(EscalationPolicySnapshot, "_execute_tasks") as mocked_execute_tasks:
|
||||
with patch(
|
||||
"apps.alerts.escalation_snapshot.snapshot_classes.escalation_policy_snapshot.is_declare_incident_step_enabled",
|
||||
return_value=True,
|
||||
):
|
||||
result = escalation_policy_snapshot.execute(alert_group, reason)
|
||||
expected_result = EscalationPolicySnapshot.StepExecutionResultData(
|
||||
eta=result.eta,
|
||||
stop_escalation=False,
|
||||
pause_escalation=False,
|
||||
start_from_beginning=False,
|
||||
)
|
||||
assert (
|
||||
expected_eta + timezone.timedelta(seconds=15)
|
||||
> result.eta
|
||||
> expected_eta - timezone.timedelta(seconds=15)
|
||||
)
|
||||
assert result == expected_result
|
||||
assert not alert_group.log_records.exists()
|
||||
mocked_execute_tasks.assert_called_once()
|
||||
with patch.object(EscalationPolicySnapshot, "_execute_tasks") as mocked_execute_tasks:
|
||||
with patch(
|
||||
"apps.alerts.escalation_snapshot.snapshot_classes.escalation_policy_snapshot.is_declare_incident_step_enabled",
|
||||
return_value=False,
|
||||
):
|
||||
escalation_policy_snapshot.execute(alert_group, reason)
|
||||
mocked_execute_tasks.assert_not_called()
|
||||
assert alert_group.log_records.exists()
|
||||
log_record = alert_group.log_records.get()
|
||||
assert log_record.type == AlertGroupLogRecord.TYPE_ESCALATION_FAILED
|
||||
assert (
|
||||
log_record.escalation_error_code
|
||||
== AlertGroupLogRecord.ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED
|
||||
)
|
||||
|
|
|
|||
332
engine/apps/alerts/tests/test_related_incident.py
Normal file
332
engine/apps/alerts/tests/test_related_incident.py
Normal file
|
|
@ -0,0 +1,332 @@
|
|||
from unittest.mock import patch
|
||||
|
||||
import httpretty
|
||||
import pytest
|
||||
|
||||
from apps.alerts.models import AlertGroupLogRecord, EscalationPolicy, RelatedIncident
|
||||
from apps.alerts.tasks.declare_incident import (
|
||||
ATTACHMENT_CAPTION,
|
||||
DEFAULT_BACKUP_TITLE,
|
||||
DEFAULT_INCIDENT_SEVERITY,
|
||||
ERROR_SEVERITY_NOT_FOUND,
|
||||
MAX_ATTACHED_ALERT_GROUPS_PER_INCIDENT,
|
||||
declare_incident,
|
||||
)
|
||||
from common.incident_api.client import IncidentAPIException
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_alert_group_and_escalation_step(
|
||||
make_organization,
|
||||
make_alert_receive_channel,
|
||||
make_alert_group,
|
||||
make_channel_filter,
|
||||
make_escalation_chain,
|
||||
make_escalation_policy,
|
||||
):
|
||||
def _setup_alert_group_and_escalation_step(is_default_route=False, already_declared_incident=False):
|
||||
organization = make_organization(grafana_url="https://stack.grafana.net", api_token="token")
|
||||
alert_receive_channel = make_alert_receive_channel(organization=organization)
|
||||
escalation_chain = make_escalation_chain(organization)
|
||||
declare_incident_step = make_escalation_policy(
|
||||
escalation_chain=escalation_chain,
|
||||
escalation_policy_step=EscalationPolicy.STEP_DECLARE_INCIDENT,
|
||||
)
|
||||
channel_filter = make_channel_filter(
|
||||
alert_receive_channel,
|
||||
escalation_chain=escalation_chain,
|
||||
is_default=is_default_route,
|
||||
)
|
||||
alert_group = make_alert_group(
|
||||
alert_receive_channel=alert_receive_channel,
|
||||
channel_filter=channel_filter,
|
||||
)
|
||||
declared_incident = None
|
||||
if already_declared_incident:
|
||||
declared_incident = RelatedIncident.objects.create(
|
||||
incident_id="123",
|
||||
organization=organization,
|
||||
channel_filter=channel_filter,
|
||||
)
|
||||
|
||||
return alert_group, declare_incident_step, declared_incident
|
||||
|
||||
return _setup_alert_group_and_escalation_step
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_declare_incident_default_route(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(is_default_route=True)
|
||||
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
alert_group.refresh_from_db()
|
||||
# check triggered log
|
||||
log_record = alert_group.log_records.last()
|
||||
assert log_record.type == log_record.TYPE_ESCALATION_FAILED
|
||||
assert log_record.escalation_policy == declare_incident_step
|
||||
assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert log_record.step_specific_info is None
|
||||
assert log_record.reason == "Declare incident step is not enabled for default routes"
|
||||
assert log_record.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_declare_incident_ok(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
|
||||
mock_create_incident.return_value = {"incidentID": "123", "title": "Incident"}, None
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
mock_create_incident.assert_called_with(
|
||||
DEFAULT_BACKUP_TITLE,
|
||||
severity=DEFAULT_INCIDENT_SEVERITY,
|
||||
attachCaption=ATTACHMENT_CAPTION,
|
||||
attachURL=alert_group.web_link,
|
||||
)
|
||||
|
||||
alert_group.refresh_from_db()
|
||||
|
||||
# check declared incident
|
||||
new_incident = alert_group.related_incidents.get()
|
||||
assert new_incident.incident_id == "123"
|
||||
assert new_incident.organization == alert_group.channel.organization
|
||||
assert new_incident.channel_filter == alert_group.channel_filter
|
||||
# check triggered log
|
||||
log_record = alert_group.log_records.last()
|
||||
assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
|
||||
assert log_record.escalation_policy == declare_incident_step
|
||||
assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert log_record.step_specific_info == {"incident_id": "123", "incident_title": "Incident"}
|
||||
assert log_record.reason == "incident declared"
|
||||
assert log_record.escalation_error_code is None
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_declare_incident_set_severity(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
|
||||
severity = "critical"
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
|
||||
mock_create_incident.return_value = {"incidentID": "123", "title": "Incident"}, None
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk, severity=severity)
|
||||
|
||||
mock_create_incident.assert_called_with(
|
||||
DEFAULT_BACKUP_TITLE, severity=severity, attachCaption=ATTACHMENT_CAPTION, attachURL=alert_group.web_link
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_declare_incident_set_severity_from_label(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
|
||||
expected_severity = "minor"
|
||||
# set alert group label
|
||||
alert_group.labels.create(
|
||||
organization=alert_group.channel.organization, key_name="severity", value_name=expected_severity
|
||||
)
|
||||
severity = EscalationPolicy.SEVERITY_SET_FROM_LABEL
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
|
||||
mock_create_incident.return_value = {"incidentID": "123", "title": "Incident"}, None
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk, severity=severity)
|
||||
|
||||
mock_create_incident.assert_called_with(
|
||||
DEFAULT_BACKUP_TITLE,
|
||||
severity=expected_severity,
|
||||
attachCaption=ATTACHMENT_CAPTION,
|
||||
attachURL=alert_group.web_link,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_declare_incident_invalid_severity_fallback(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
|
||||
severity = "INVALID"
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
|
||||
with patch.object(declare_incident, "apply_async") as mock_declare_incident_apply_async:
|
||||
mock_create_incident.side_effect = IncidentAPIException(
|
||||
status=500, url="some-url", msg=ERROR_SEVERITY_NOT_FOUND
|
||||
)
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk, severity=severity)
|
||||
|
||||
# create call failing with invalid severity
|
||||
mock_create_incident.assert_called_with(
|
||||
DEFAULT_BACKUP_TITLE, severity=severity, attachCaption=ATTACHMENT_CAPTION, attachURL=alert_group.web_link
|
||||
)
|
||||
# new task is queued with default severity instead
|
||||
mock_declare_incident_apply_async.assert_called_with(
|
||||
args=(alert_group.pk, declare_incident_step.pk), kwargs={"severity": DEFAULT_INCIDENT_SEVERITY}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_declare_incident_attach_alert_group(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
|
||||
already_declared_incident=True
|
||||
)
|
||||
incident_id = existing_open_incident.incident_id
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
|
||||
with patch("common.incident_api.client.IncidentAPIClient.add_activity") as mock_add_activity:
|
||||
mock_get_incident.return_value = {"incidentID": incident_id, "title": "Incident", "status": "active"}, None
|
||||
mock_add_activity.return_value = {"activityItemID": "111"}, None
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
# check declared incident
|
||||
assert existing_open_incident.attached_alert_groups.filter(id=alert_group.id).exists()
|
||||
log_record = alert_group.log_records.last()
|
||||
assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
|
||||
assert log_record.escalation_policy == declare_incident_step
|
||||
assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert log_record.step_specific_info == {"incident_id": incident_id, "incident_title": "Incident"}
|
||||
assert log_record.reason == "attached to existing incident"
|
||||
assert log_record.escalation_error_code is None
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_declare_incident_resolved_update(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
|
||||
already_declared_incident=True
|
||||
)
|
||||
incident_id = existing_open_incident.incident_id
|
||||
new_incident_id = "333"
|
||||
assert new_incident_id != incident_id
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
|
||||
with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
|
||||
mock_get_incident.return_value = {
|
||||
"incidentID": incident_id,
|
||||
"title": "Incident1",
|
||||
"status": "resolved",
|
||||
}, None
|
||||
mock_create_incident.return_value = {"incidentID": new_incident_id, "title": "Incident2"}, None
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
existing_open_incident.refresh_from_db()
|
||||
|
||||
assert existing_open_incident.is_active is False
|
||||
# check declared incident
|
||||
assert not existing_open_incident.attached_alert_groups.filter(id=alert_group.id).exists()
|
||||
assert alert_group.related_incidents.get().incident_id == new_incident_id
|
||||
log_record = alert_group.log_records.last()
|
||||
assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
|
||||
assert log_record.escalation_policy == declare_incident_step
|
||||
assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert log_record.step_specific_info == {"incident_id": new_incident_id, "incident_title": "Incident2"}
|
||||
assert log_record.reason == "incident declared"
|
||||
assert log_record.escalation_error_code is None
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_declare_incident_attach_alert_group_skip_incident_update(
|
||||
setup_alert_group_and_escalation_step, make_alert_group
|
||||
):
|
||||
alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
|
||||
already_declared_incident=True
|
||||
)
|
||||
alert_receive_channel = alert_group.channel
|
||||
channel_filter = alert_group.channel_filter
|
||||
incident_id = existing_open_incident.incident_id
|
||||
|
||||
# attach max alert groups to incident
|
||||
for _ in range(MAX_ATTACHED_ALERT_GROUPS_PER_INCIDENT):
|
||||
ag = make_alert_group(alert_receive_channel=alert_receive_channel, channel_filter=channel_filter)
|
||||
existing_open_incident.attached_alert_groups.add(ag)
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
|
||||
with patch("common.incident_api.client.IncidentAPIClient.add_activity") as mock_add_activity:
|
||||
mock_get_incident.return_value = {"incidentID": incident_id, "title": "Incident", "status": "active"}, None
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
assert not mock_add_activity.called
|
||||
|
||||
# check declared incident
|
||||
assert existing_open_incident.attached_alert_groups.filter(id=alert_group.id).exists()
|
||||
log_record = alert_group.log_records.last()
|
||||
assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
|
||||
assert log_record.escalation_policy == declare_incident_step
|
||||
assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert log_record.step_specific_info == {"incident_id": incident_id, "incident_title": "Incident"}
|
||||
assert log_record.reason == "attached to existing incident"
|
||||
assert log_record.escalation_error_code is None
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_get_existing_incident_error(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
|
||||
already_declared_incident=True
|
||||
)
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
|
||||
mock_get_incident.side_effect = IncidentAPIException(status=500, url="some-url")
|
||||
with pytest.raises(IncidentAPIException):
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
# but if incident was not found, a new one should be created
|
||||
incident_id = existing_open_incident.incident_id
|
||||
new_incident_id = "333"
|
||||
assert new_incident_id != incident_id
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
|
||||
with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
|
||||
mock_get_incident.side_effect = IncidentAPIException(status=404, url="some-url")
|
||||
mock_create_incident.return_value = {"incidentID": new_incident_id, "title": "Incident"}, None
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
alert_group.refresh_from_db()
|
||||
|
||||
# check declared incident
|
||||
assert not existing_open_incident.attached_alert_groups.filter(id=alert_group.id).exists()
|
||||
new_incident = alert_group.related_incidents.get()
|
||||
assert new_incident != existing_open_incident
|
||||
assert new_incident.incident_id == new_incident_id
|
||||
assert new_incident.organization == alert_group.channel.organization
|
||||
assert new_incident.channel_filter == alert_group.channel_filter
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_attach_alert_group_error(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
|
||||
already_declared_incident=True
|
||||
)
|
||||
incident_id = existing_open_incident.incident_id
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
|
||||
with patch("common.incident_api.client.IncidentAPIClient.add_activity") as mock_add_activity:
|
||||
mock_get_incident.return_value = {"incidentID": incident_id, "title": "Incident", "status": "active"}, None
|
||||
mock_add_activity.side_effect = IncidentAPIException(status=500, url="some-url")
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
||||
alert_group.refresh_from_db()
|
||||
|
||||
# incident attachment failed, but DB is still updated
|
||||
assert existing_open_incident.attached_alert_groups.filter(id=alert_group.id).exists()
|
||||
log_record = alert_group.log_records.last()
|
||||
assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
|
||||
assert log_record.escalation_policy == declare_incident_step
|
||||
assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert log_record.step_specific_info == {"incident_id": incident_id, "incident_title": "Incident"}
|
||||
assert log_record.reason == "attached to existing incident"
|
||||
assert log_record.escalation_error_code is None
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_create_incident_error(setup_alert_group_and_escalation_step):
|
||||
alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
|
||||
mock_create_incident.side_effect = IncidentAPIException(status=500, url="some-url")
|
||||
with pytest.raises(IncidentAPIException):
|
||||
declare_incident(alert_group.pk, declare_incident_step.pk)
|
||||
|
|
@ -1,3 +1,11 @@
|
|||
import typing
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from apps.user_management.models import Organization
|
||||
|
||||
|
||||
def render_relative_timeline(log_created_at, alert_group_started_at):
|
||||
time_delta = log_created_at - alert_group_started_at
|
||||
seconds = int(time_delta.total_seconds())
|
||||
|
|
@ -12,3 +20,7 @@ def render_relative_timeline(log_created_at, alert_group_started_at):
|
|||
return "%dm%ds" % (minutes, seconds)
|
||||
else:
|
||||
return "%ds" % (seconds,)
|
||||
|
||||
|
||||
def is_declare_incident_step_enabled(organization: "Organization") -> bool:
|
||||
return organization.is_grafana_incident_enabled and settings.FEATURE_DECLARE_INCIDENT_STEP_ENABLED
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ from datetime import timedelta
|
|||
from rest_framework import serializers
|
||||
|
||||
from apps.alerts.models import EscalationChain, EscalationPolicy
|
||||
from apps.alerts.utils import is_declare_incident_step_enabled
|
||||
from apps.schedules.models import OnCallSchedule
|
||||
from apps.slack.models import SlackUserGroup
|
||||
from apps.user_management.models import Team, User
|
||||
|
|
@ -24,6 +25,7 @@ TO_TIME = "to_time"
|
|||
NUM_ALERTS_IN_WINDOW = "num_alerts_in_window"
|
||||
NUM_MINUTES_IN_WINDOW = "num_minutes_in_window"
|
||||
CUSTOM_WEBHOOK_TRIGGER = "custom_webhook"
|
||||
SEVERITY = "severity"
|
||||
|
||||
STEP_TYPE_TO_RELATED_FIELD_MAP = {
|
||||
EscalationPolicy.STEP_WAIT: [WAIT_DELAY],
|
||||
|
|
@ -35,6 +37,7 @@ STEP_TYPE_TO_RELATED_FIELD_MAP = {
|
|||
EscalationPolicy.STEP_NOTIFY_IF_TIME: [FROM_TIME, TO_TIME],
|
||||
EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: [NUM_ALERTS_IN_WINDOW, NUM_MINUTES_IN_WINDOW],
|
||||
EscalationPolicy.STEP_TRIGGER_CUSTOM_WEBHOOK: [CUSTOM_WEBHOOK_TRIGGER],
|
||||
EscalationPolicy.STEP_DECLARE_INCIDENT: [SEVERITY],
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -81,6 +84,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, serializers.ModelSerializer)
|
|||
allow_null=True,
|
||||
filter_field="organization",
|
||||
)
|
||||
severity = serializers.CharField(required=False, allow_null=True)
|
||||
|
||||
class Meta:
|
||||
model = EscalationPolicy
|
||||
|
|
@ -99,6 +103,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, serializers.ModelSerializer)
|
|||
"notify_schedule",
|
||||
"notify_to_group",
|
||||
"notify_to_team_members",
|
||||
"severity",
|
||||
"important",
|
||||
]
|
||||
|
||||
|
|
@ -123,6 +128,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, serializers.ModelSerializer)
|
|||
NUM_ALERTS_IN_WINDOW,
|
||||
NUM_MINUTES_IN_WINDOW,
|
||||
CUSTOM_WEBHOOK_TRIGGER,
|
||||
SEVERITY,
|
||||
]
|
||||
|
||||
step = data.get("step")
|
||||
|
|
@ -151,6 +157,8 @@ class EscalationPolicySerializer(EagerLoadingMixin, serializers.ModelSerializer)
|
|||
raise serializers.ValidationError("Invalid step value")
|
||||
if step_type in EscalationPolicy.SLACK_INTEGRATION_REQUIRED_STEPS and organization.slack_team_identity is None:
|
||||
raise serializers.ValidationError("Invalid escalation step type: step is Slack-specific")
|
||||
if step_type == EscalationPolicy.STEP_DECLARE_INCIDENT and not is_declare_incident_step_enabled(organization):
|
||||
raise serializers.ValidationError("Invalid escalation step type: step is not enabled")
|
||||
return step_type
|
||||
|
||||
def to_representation(self, instance):
|
||||
|
|
@ -214,6 +222,7 @@ class EscalationPolicyUpdateSerializer(EscalationPolicySerializer):
|
|||
NUM_ALERTS_IN_WINDOW,
|
||||
NUM_MINUTES_IN_WINDOW,
|
||||
CUSTOM_WEBHOOK_TRIGGER,
|
||||
SEVERITY,
|
||||
]
|
||||
|
||||
for f in STEP_TYPE_TO_RELATED_FIELD_MAP.get(step, []):
|
||||
|
|
|
|||
|
|
@ -975,6 +975,37 @@ def test_get_filter_labels(
|
|||
assert response.json()["results"][0]["pk"] == alert_groups[0].public_primary_key
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_get_filter_by_related_incident(
|
||||
alert_group_internal_api_setup, make_related_incident, make_alert_group, make_user_auth_headers
|
||||
):
|
||||
user, token, alert_groups = alert_group_internal_api_setup
|
||||
|
||||
alert_group = alert_groups[0]
|
||||
related_incident = make_related_incident("1", alert_group.channel.organization, alert_group.channel_filter)
|
||||
related_incident.attached_alert_groups.add(alert_group)
|
||||
|
||||
client = APIClient()
|
||||
url = reverse("api-internal:alertgroup-list")
|
||||
response = client.get(
|
||||
url + "?has_related_incident=true",
|
||||
format="json",
|
||||
**make_user_auth_headers(user, token),
|
||||
)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data["results"]) == 1
|
||||
|
||||
response = client.get(
|
||||
url + "?has_related_incident=false",
|
||||
format="json",
|
||||
**make_user_auth_headers(user, token),
|
||||
)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data["results"]) == 3
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_get_title_search(
|
||||
settings,
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from rest_framework.test import APIClient
|
|||
|
||||
from apps.alerts.models import EscalationPolicy
|
||||
from apps.api.permissions import LegacyAccessControlRole
|
||||
from common.incident_api.client import DEFAULT_INCIDENT_SEVERITY, IncidentAPIException
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
|
|
@ -651,8 +652,13 @@ def test_create_escalation_policy_with_no_important_version(
|
|||
make_escalation_chain,
|
||||
step,
|
||||
make_user_auth_headers,
|
||||
settings,
|
||||
):
|
||||
organization, user, _, _ = make_organization_and_user_with_slack_identities()
|
||||
# make sure declare incident step is enabled
|
||||
settings.FEATURE_DECLARE_INCIDENT_STEP_ENABLED = True
|
||||
organization.is_grafana_incident_enabled = True
|
||||
organization.save()
|
||||
_, token = make_token_for_organization(organization)
|
||||
escalation_chain = make_escalation_chain(organization)
|
||||
|
||||
|
|
@ -832,6 +838,7 @@ def test_escalation_policy_switch_importance(
|
|||
"notify_schedule": None,
|
||||
"notify_to_group": None,
|
||||
"notify_to_team_members": None,
|
||||
"severity": None,
|
||||
"important": True,
|
||||
"wait_delay": None,
|
||||
}
|
||||
|
|
@ -889,6 +896,7 @@ def test_escalation_policy_filter_by_user(
|
|||
"notify_schedule": None,
|
||||
"notify_to_group": None,
|
||||
"notify_to_team_members": None,
|
||||
"severity": None,
|
||||
"important": False,
|
||||
},
|
||||
{
|
||||
|
|
@ -906,6 +914,7 @@ def test_escalation_policy_filter_by_user(
|
|||
"notify_schedule": None,
|
||||
"notify_to_group": None,
|
||||
"notify_to_team_members": None,
|
||||
"severity": None,
|
||||
"important": False,
|
||||
},
|
||||
]
|
||||
|
|
@ -971,6 +980,7 @@ def test_escalation_policy_filter_by_slack_channel(
|
|||
"notify_schedule": None,
|
||||
"notify_to_group": None,
|
||||
"notify_to_team_members": None,
|
||||
"severity": None,
|
||||
"important": False,
|
||||
},
|
||||
]
|
||||
|
|
@ -1001,3 +1011,88 @@ def test_escalation_policy_escalation_options_webhooks(
|
|||
returned_options = [option["value"] for option in response.json()]
|
||||
|
||||
assert EscalationPolicy.STEP_TRIGGER_CUSTOM_WEBHOOK in returned_options
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_escalation_policy_severity_options(
|
||||
make_organization_and_user_with_plugin_token,
|
||||
make_user_auth_headers,
|
||||
):
|
||||
organization, user, token = make_organization_and_user_with_plugin_token()
|
||||
organization.is_grafana_labels_enabled = False
|
||||
organization.save()
|
||||
|
||||
client = APIClient()
|
||||
url = reverse("api-internal:escalation_policy-severity-options")
|
||||
|
||||
# without labels enabled
|
||||
available_severities = [
|
||||
{"severityID": "abc", "orgID": "1", "displayLabel": "Pending", "level": -1},
|
||||
{"severityID": "def", "orgID": "1", "displayLabel": "Critical", "level": 1},
|
||||
]
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_severities") as mock_get_severities:
|
||||
mock_get_severities.return_value = available_severities, None
|
||||
response = client.get(url, format="json", **make_user_auth_headers(user, token))
|
||||
|
||||
expected_options = [{"value": s["displayLabel"], "display_name": s["displayLabel"]} for s in available_severities]
|
||||
assert response.json() == expected_options
|
||||
|
||||
# failing request does not break; fallback to default option only
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_severities") as mock_get_severities:
|
||||
mock_get_severities.side_effect = IncidentAPIException(status=404, url="some-url")
|
||||
response = client.get(url, format="json", **make_user_auth_headers(user, token))
|
||||
|
||||
fallback_options = [{"value": DEFAULT_INCIDENT_SEVERITY, "display_name": DEFAULT_INCIDENT_SEVERITY}]
|
||||
assert response.json() == fallback_options
|
||||
|
||||
# labels enabled
|
||||
organization.is_grafana_labels_enabled = True
|
||||
organization.save()
|
||||
|
||||
with patch("common.incident_api.client.IncidentAPIClient.get_severities") as mock_get_severities:
|
||||
mock_get_severities.return_value = available_severities, None
|
||||
response = client.get(url, format="json", **make_user_auth_headers(user, token))
|
||||
# include set from label option
|
||||
expected_options = [
|
||||
{
|
||||
"value": EscalationPolicy.SEVERITY_SET_FROM_LABEL,
|
||||
"display_name": EscalationPolicy.SEVERITY_SET_FROM_LABEL_DISPLAY_VALUE,
|
||||
}
|
||||
] + expected_options
|
||||
assert response.json() == expected_options
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_create_escalation_policy_declare_incident(
|
||||
escalation_policy_internal_api_setup, make_user_auth_headers, settings
|
||||
):
|
||||
token, escalation_chain, _, user, _ = escalation_policy_internal_api_setup
|
||||
organization = escalation_chain.organization
|
||||
client = APIClient()
|
||||
url = reverse("api-internal:escalation_policy-list")
|
||||
|
||||
data = {
|
||||
"step": EscalationPolicy.STEP_DECLARE_INCIDENT,
|
||||
"severity": "critical",
|
||||
"escalation_chain": escalation_chain.public_primary_key,
|
||||
}
|
||||
|
||||
response = client.post(url, data, format="json", **make_user_auth_headers(user, token))
|
||||
assert response.status_code == status.HTTP_400_BAD_REQUEST
|
||||
|
||||
# make sure declare incident step is enabled
|
||||
settings.FEATURE_DECLARE_INCIDENT_STEP_ENABLED = True
|
||||
organization.is_grafana_incident_enabled = True
|
||||
organization.save()
|
||||
|
||||
response = client.post(url, data, format="json", **make_user_auth_headers(user, token))
|
||||
assert response.status_code == status.HTTP_201_CREATED
|
||||
escalation_policy = EscalationPolicy.objects.get(public_primary_key=response.data["id"])
|
||||
assert escalation_policy.step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert escalation_policy.severity == "critical"
|
||||
|
||||
url = reverse("api-internal:escalation_policy-detail", kwargs={"pk": escalation_policy.public_primary_key})
|
||||
response = client.get(url, format="json", **make_user_auth_headers(user, token))
|
||||
response_data = response.json()
|
||||
assert response_data["step"] == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert response_data["severity"] == "critical"
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ from apps.alerts.constants import ActionSource
|
|||
from apps.alerts.models import AlertGroup, AlertReceiveChannel, EscalationChain, ResolutionNote
|
||||
from apps.alerts.paging import unpage_user
|
||||
from apps.alerts.tasks import delete_alert_group, send_update_resolution_note_signal
|
||||
from apps.alerts.utils import is_declare_incident_step_enabled
|
||||
from apps.api.errors import AlertGroupAPIError
|
||||
from apps.api.label_filtering import parse_label_query
|
||||
from apps.api.permissions import RBACPermission
|
||||
|
|
@ -120,6 +121,7 @@ class AlertGroupFilter(DateRangeFilterMixin, ModelFieldFilterMixin, filters.Filt
|
|||
)
|
||||
with_resolution_note = filters.BooleanFilter(method="filter_with_resolution_note")
|
||||
mine = filters.BooleanFilter(method="filter_mine")
|
||||
has_related_incident = filters.BooleanFilter(field_name="related_incidents", lookup_expr="isnull", exclude=True)
|
||||
|
||||
def filter_status(self, queryset, name, value):
|
||||
if not value:
|
||||
|
|
@ -719,6 +721,7 @@ class AlertGroupView(
|
|||
"""
|
||||
Retrieve a list of valid filter options that can be used to filter alert groups
|
||||
"""
|
||||
organization = self.request.auth.organization
|
||||
api_root = "/api/internal/v1/"
|
||||
default_day_range = 30
|
||||
|
||||
|
|
@ -804,7 +807,7 @@ class AlertGroupView(
|
|||
|
||||
filter_options = [{"name": "search", "type": "search", "description": description}] + filter_options
|
||||
|
||||
if is_labels_feature_enabled(self.request.auth.organization):
|
||||
if is_labels_feature_enabled(organization):
|
||||
filter_options.append(
|
||||
{
|
||||
"name": "label",
|
||||
|
|
@ -813,6 +816,15 @@ class AlertGroupView(
|
|||
}
|
||||
)
|
||||
|
||||
if is_declare_incident_step_enabled(organization):
|
||||
filter_options.append(
|
||||
{
|
||||
"name": "has_related_incident",
|
||||
"type": "boolean",
|
||||
"default": "true",
|
||||
}
|
||||
)
|
||||
|
||||
return Response(filter_options)
|
||||
|
||||
@extend_schema(
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import logging
|
||||
|
||||
from django.conf import settings
|
||||
from django.db.models import Q
|
||||
from rest_framework.decorators import action
|
||||
|
|
@ -5,6 +7,7 @@ from rest_framework.permissions import IsAuthenticated
|
|||
from rest_framework.response import Response
|
||||
|
||||
from apps.alerts.models import EscalationPolicy
|
||||
from apps.alerts.utils import is_declare_incident_step_enabled
|
||||
from apps.api.permissions import RBACPermission
|
||||
from apps.api.serializers.escalation_policy import (
|
||||
EscalationPolicyCreateSerializer,
|
||||
|
|
@ -19,9 +22,12 @@ from common.api_helpers.mixins import (
|
|||
TeamFilteringMixin,
|
||||
UpdateSerializerMixin,
|
||||
)
|
||||
from common.incident_api.client import DEFAULT_INCIDENT_SEVERITY, IncidentAPIClient, IncidentAPIException
|
||||
from common.insight_log import EntityEvent, write_resource_insight_log
|
||||
from common.ordered_model.viewset import OrderedModelViewSet
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EscalationPolicyView(
|
||||
TeamFilteringMixin,
|
||||
|
|
@ -42,6 +48,7 @@ class EscalationPolicyView(
|
|||
"escalation_options": [RBACPermission.Permissions.ESCALATION_CHAINS_READ],
|
||||
"delay_options": [RBACPermission.Permissions.ESCALATION_CHAINS_READ],
|
||||
"num_minutes_in_window_options": [RBACPermission.Permissions.ESCALATION_CHAINS_READ],
|
||||
"severity_options": [RBACPermission.Permissions.ESCALATION_CHAINS_READ],
|
||||
"create": [RBACPermission.Permissions.ESCALATION_CHAINS_WRITE],
|
||||
"update": [RBACPermission.Permissions.ESCALATION_CHAINS_WRITE],
|
||||
"partial_update": [RBACPermission.Permissions.ESCALATION_CHAINS_WRITE],
|
||||
|
|
@ -116,6 +123,7 @@ class EscalationPolicyView(
|
|||
|
||||
@action(detail=False, methods=["get"])
|
||||
def escalation_options(self, request):
|
||||
grafana_declare_incident_enabled = is_declare_incident_step_enabled(organization=self.request.auth.organization)
|
||||
choices = []
|
||||
for step in EscalationPolicy.INTERNAL_API_STEPS:
|
||||
verbal = EscalationPolicy.INTERNAL_API_STEPS_TO_VERBAL_MAP[step]
|
||||
|
|
@ -126,7 +134,7 @@ class EscalationPolicyView(
|
|||
if slack_integration_required and not settings.FEATURE_SLACK_INTEGRATION_ENABLED:
|
||||
continue
|
||||
|
||||
if step == EscalationPolicy.STEP_DECLARE_INCIDENT:
|
||||
if step == EscalationPolicy.STEP_DECLARE_INCIDENT and not grafana_declare_incident_enabled:
|
||||
continue
|
||||
|
||||
choices.append(
|
||||
|
|
@ -155,3 +163,25 @@ class EscalationPolicyView(
|
|||
{"value": choice[0], "display_name": choice[1]} for choice in EscalationPolicy.WEB_DURATION_CHOICES_MINUTES
|
||||
]
|
||||
return Response(choices)
|
||||
|
||||
@action(detail=False, methods=["get"])
|
||||
def severity_options(self, request):
|
||||
organization = self.request.auth.organization
|
||||
choices = []
|
||||
if organization.is_grafana_labels_enabled:
|
||||
choices = [
|
||||
{
|
||||
"value": EscalationPolicy.SEVERITY_SET_FROM_LABEL,
|
||||
"display_name": EscalationPolicy.SEVERITY_SET_FROM_LABEL_DISPLAY_VALUE,
|
||||
}
|
||||
]
|
||||
incident_client = IncidentAPIClient(organization.grafana_url, organization.api_token)
|
||||
try:
|
||||
severities, _ = incident_client.get_severities()
|
||||
choices += [
|
||||
{"value": severity["displayLabel"], "display_name": severity["displayLabel"]} for severity in severities
|
||||
]
|
||||
except IncidentAPIException as e:
|
||||
logger.error(f"Error getting severities: {e.msg}")
|
||||
choices += [{"value": DEFAULT_INCIDENT_SEVERITY, "display_name": DEFAULT_INCIDENT_SEVERITY}]
|
||||
return Response(choices)
|
||||
|
|
|
|||
25
engine/apps/grafana_plugin/tests/test_install_v2.py
Normal file
25
engine/apps/grafana_plugin/tests/test_install_v2.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from django.urls import reverse
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from apps.grafana_plugin.views.sync_v2 import SyncException
|
||||
from common.api_helpers.errors import INVALID_SELF_HOSTED_ID
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_install_v2_error_encoding(make_organization_and_user_with_plugin_token, make_user_auth_headers):
|
||||
organization, user, token = make_organization_and_user_with_plugin_token()
|
||||
client = APIClient()
|
||||
|
||||
auth_headers = make_user_auth_headers(user, token)
|
||||
|
||||
exc = SyncException(INVALID_SELF_HOSTED_ID)
|
||||
|
||||
with patch("apps.grafana_plugin.views.InstallV2View.do_sync", side_effect=exc):
|
||||
response = client.post(reverse("grafana-plugin:install-v2"), format="json", **auth_headers)
|
||||
assert response.data["code"] == INVALID_SELF_HOSTED_ID.code
|
||||
assert response.data["message"] == INVALID_SELF_HOSTED_ID.message
|
||||
assert response.status_code == status.HTTP_400_BAD_REQUEST
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
import logging
|
||||
from dataclasses import asdict
|
||||
from dataclasses import asdict, is_dataclass
|
||||
|
||||
from django.conf import settings
|
||||
from rest_framework import status
|
||||
|
|
@ -23,7 +23,10 @@ class InstallV2View(SyncV2View):
|
|||
try:
|
||||
organization = self.do_sync(request)
|
||||
except SyncException as e:
|
||||
return Response(data=e.error_data, status=status.HTTP_400_BAD_REQUEST)
|
||||
return Response(
|
||||
data=asdict(e.error_data) if is_dataclass(e.error_data) else e.error_data,
|
||||
status=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
organization.revoke_plugin()
|
||||
provisioned_data = organization.provision_plugin()
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import datetime
|
|||
import random
|
||||
import typing
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
from django.utils import timezone
|
||||
|
||||
|
|
@ -50,7 +51,10 @@ def get_organization_ids():
|
|||
if not organizations_ids:
|
||||
organizations_ids = get_organization_ids_from_db()
|
||||
cache.set(organizations_ids, METRICS_ORGANIZATIONS_IDS, METRICS_ORGANIZATIONS_IDS_CACHE_TIMEOUT)
|
||||
return organizations_ids
|
||||
|
||||
group_id = settings.METRICS_EXPORTER_ORGANIZATION_GROUP_ID
|
||||
group_count = settings.METRICS_EXPORTER_TOTAL_ORGANIZATION_GROUPS
|
||||
return [i for i in organizations_ids if i % group_count == group_id]
|
||||
|
||||
|
||||
def is_allowed_to_start_metrics_calculation(organization_id, force=False) -> bool:
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from django.utils.functional import cached_property
|
|||
from rest_framework import fields, serializers
|
||||
|
||||
from apps.alerts.models import EscalationChain, EscalationPolicy
|
||||
from apps.alerts.utils import is_declare_incident_step_enabled
|
||||
from apps.schedules.models import OnCallSchedule
|
||||
from apps.slack.models import SlackUserGroup
|
||||
from apps.user_management.models import Team, User
|
||||
|
|
@ -72,6 +73,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
|
|||
required=False,
|
||||
source="custom_webhook",
|
||||
)
|
||||
severity = serializers.CharField(required=False)
|
||||
important = serializers.BooleanField(required=False)
|
||||
|
||||
TIME_FORMAT = "%H:%M:%SZ"
|
||||
|
|
@ -101,6 +103,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
|
|||
"notify_if_time_to",
|
||||
"num_alerts_in_window",
|
||||
"num_minutes_in_window",
|
||||
"severity",
|
||||
]
|
||||
|
||||
PREFETCH_RELATED = ["notify_to_users_queue"]
|
||||
|
|
@ -120,6 +123,9 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
|
|||
if step_type == EscalationPolicy.STEP_FINAL_NOTIFYALL and organization.slack_team_identity is None:
|
||||
raise BadRequest(detail="Invalid escalation step type: step is Slack-specific")
|
||||
|
||||
if step_type == EscalationPolicy.STEP_DECLARE_INCIDENT and not is_declare_incident_step_enabled(organization):
|
||||
raise BadRequest("Invalid escalation step type: step is not enabled")
|
||||
|
||||
return step_type
|
||||
|
||||
def create(self, validated_data):
|
||||
|
|
@ -163,6 +169,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
|
|||
"notify_if_time_to",
|
||||
"num_alerts_in_window",
|
||||
"num_minutes_in_window",
|
||||
"severity",
|
||||
]
|
||||
if step == EscalationPolicy.STEP_WAIT:
|
||||
fields_to_remove.remove("duration")
|
||||
|
|
@ -190,6 +197,8 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
|
|||
elif step == EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW:
|
||||
fields_to_remove.remove("num_alerts_in_window")
|
||||
fields_to_remove.remove("num_minutes_in_window")
|
||||
elif step == EscalationPolicy.STEP_DECLARE_INCIDENT:
|
||||
fields_to_remove.remove("severity")
|
||||
|
||||
if (
|
||||
step in EscalationPolicy.DEFAULT_TO_IMPORTANT_STEP_MAPPING
|
||||
|
|
@ -213,6 +222,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
|
|||
"to_time",
|
||||
"num_alerts_in_window",
|
||||
"num_minutes_in_window",
|
||||
"severity",
|
||||
]
|
||||
step = validated_data.get("step")
|
||||
important = validated_data.pop("important", None)
|
||||
|
|
@ -243,6 +253,8 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
|
|||
elif step == EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW:
|
||||
validated_data_fields_to_remove.remove("num_alerts_in_window")
|
||||
validated_data_fields_to_remove.remove("num_minutes_in_window")
|
||||
elif step == EscalationPolicy.STEP_DECLARE_INCIDENT:
|
||||
validated_data_fields_to_remove.remove("severity")
|
||||
|
||||
for field in validated_data_fields_to_remove:
|
||||
validated_data.pop(field, None)
|
||||
|
|
@ -299,5 +311,7 @@ class EscalationPolicyUpdateSerializer(EscalationPolicySerializer):
|
|||
if step != EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW:
|
||||
instance.num_alerts_in_window = None
|
||||
instance.num_minutes_in_window = None
|
||||
if step != EscalationPolicy.STEP_DECLARE_INCIDENT:
|
||||
instance.severity = None
|
||||
|
||||
return super().update(instance, validated_data)
|
||||
|
|
|
|||
|
|
@ -463,3 +463,43 @@ def test_update_escalation_policy_using_notify_team_members(
|
|||
escalation_policy = EscalationPolicy.objects.get(public_primary_key=response.data["id"])
|
||||
serializer = EscalationPolicySerializer(escalation_policy)
|
||||
assert response.data == serializer.data
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_create_escalation_policy_declare_incident(
|
||||
make_organization_and_user_with_token,
|
||||
escalation_policies_setup,
|
||||
settings,
|
||||
):
|
||||
organization, user, token = make_organization_and_user_with_token()
|
||||
escalation_chain, _, _ = escalation_policies_setup(organization, user)
|
||||
|
||||
data_for_create = {
|
||||
"escalation_chain_id": escalation_chain.public_primary_key,
|
||||
"type": "declare_incident",
|
||||
"position": 0,
|
||||
"severity": "critical",
|
||||
}
|
||||
|
||||
client = APIClient()
|
||||
url = reverse("api-public:escalation_policies-list")
|
||||
response = client.post(url, data=data_for_create, format="json", HTTP_AUTHORIZATION=token)
|
||||
assert response.status_code == status.HTTP_400_BAD_REQUEST
|
||||
|
||||
# make sure declare incident step is enabled
|
||||
settings.FEATURE_DECLARE_INCIDENT_STEP_ENABLED = True
|
||||
organization.is_grafana_incident_enabled = True
|
||||
organization.save()
|
||||
|
||||
response = client.post(url, data=data_for_create, format="json", HTTP_AUTHORIZATION=token)
|
||||
assert response.status_code == status.HTTP_201_CREATED
|
||||
|
||||
escalation_policy = EscalationPolicy.objects.get(public_primary_key=response.data["id"])
|
||||
assert escalation_policy.step == EscalationPolicy.STEP_DECLARE_INCIDENT
|
||||
assert escalation_policy.severity == "critical"
|
||||
|
||||
url = reverse("api-public:escalation_policies-detail", kwargs={"pk": escalation_policy.public_primary_key})
|
||||
response = client.get(url, format="json", HTTP_AUTHORIZATION=token)
|
||||
response_data = response.json()
|
||||
assert response_data["type"] == EscalationPolicy.PUBLIC_STEP_CHOICES_MAP[EscalationPolicy.STEP_DECLARE_INCIDENT]
|
||||
assert response_data["severity"] == "critical"
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ from apps.alerts.tests.factories import (
|
|||
EscalationChainFactory,
|
||||
EscalationPolicyFactory,
|
||||
InvitationFactory,
|
||||
RelatedIncidentFactory,
|
||||
ResolutionNoteFactory,
|
||||
ResolutionNoteSlackMessageFactory,
|
||||
UserNotificationBundleFactory,
|
||||
|
|
@ -1112,3 +1113,11 @@ def make_user_notification_bundle():
|
|||
)
|
||||
|
||||
return _make_user_notification_bundle
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def make_related_incident():
|
||||
def _make_related_incident(incident_id, organization, channel_filter):
|
||||
return RelatedIncidentFactory(incident_id=incident_id, organization=organization, channel_filter=channel_filter)
|
||||
|
||||
return _make_related_incident
|
||||
|
|
|
|||
|
|
@ -75,6 +75,7 @@ FEATURE_LABELS_ENABLED_PER_ORG = getenv_list("FEATURE_LABELS_ENABLED_PER_ORG", d
|
|||
FEATURE_ALERT_GROUP_SEARCH_ENABLED = getenv_boolean("FEATURE_ALERT_GROUP_SEARCH_ENABLED", default=True)
|
||||
FEATURE_ALERT_GROUP_SEARCH_CUTOFF_DAYS = getenv_integer("FEATURE_ALERT_GROUP_SEARCH_CUTOFF_DAYS", default=None)
|
||||
FEATURE_NOTIFICATION_BUNDLE_ENABLED = getenv_boolean("FEATURE_NOTIFICATION_BUNDLE_ENABLED", default=True)
|
||||
FEATURE_DECLARE_INCIDENT_STEP_ENABLED = getenv_boolean("FEATURE_DECLARE_INCIDENT_STEP_ENABLED", default=False)
|
||||
|
||||
TWILIO_API_KEY_SID = os.environ.get("TWILIO_API_KEY_SID")
|
||||
TWILIO_API_KEY_SECRET = os.environ.get("TWILIO_API_KEY_SECRET")
|
||||
|
|
@ -120,6 +121,11 @@ METRICS_ALL = [
|
|||
# List of metrics to collect. Collect all available application metrics by default
|
||||
METRICS_TO_COLLECT = getenv_list("METRICS_TO_COLLECT", METRICS_ALL)
|
||||
|
||||
# Total number of exporters collecting the same set of metrics
|
||||
METRICS_EXPORTER_TOTAL_ORGANIZATION_GROUPS = getenv_integer("METRICS_EXPORTER_TOTAL_ORGANIZATION_GROUPS", 1)
|
||||
# ID of this exporter, used to filter which orgs to collect for
|
||||
METRICS_EXPORTER_ORGANIZATION_GROUP_ID = getenv_integer("METRICS_EXPORTER_ORGANIZATION_GROUP_ID", 0)
|
||||
|
||||
|
||||
# Database
|
||||
class DatabaseTypes:
|
||||
|
|
@ -829,9 +835,9 @@ SELF_HOSTED_SETTINGS = {
|
|||
|
||||
GRAFANA_INCIDENT_STATIC_API_KEY = os.environ.get("GRAFANA_INCIDENT_STATIC_API_KEY", None)
|
||||
|
||||
JINJA_TEMPLATE_MAX_LENGTH = os.getenv("JINJA_TEMPLATE_MAX_LENGTH", 50000)
|
||||
JINJA_RESULT_TITLE_MAX_LENGTH = os.getenv("JINJA_RESULT_TITLE_MAX_LENGTH", 500)
|
||||
JINJA_RESULT_MAX_LENGTH = os.getenv("JINJA_RESULT_MAX_LENGTH", 50000)
|
||||
JINJA_TEMPLATE_MAX_LENGTH = getenv_integer("JINJA_TEMPLATE_MAX_LENGTH", 50000)
|
||||
JINJA_RESULT_TITLE_MAX_LENGTH = getenv_integer("JINJA_RESULT_TITLE_MAX_LENGTH", 500)
|
||||
JINJA_RESULT_MAX_LENGTH = getenv_integer("JINJA_RESULT_MAX_LENGTH", 50000)
|
||||
|
||||
# Log inbound/outbound calls as slow=1 if they exceed threshold
|
||||
SLOW_THRESHOLD_SECONDS = getenv_float("SLOW_THRESHOLD_SECONDS", 2.0)
|
||||
|
|
|
|||
|
|
@ -94,6 +94,7 @@ CELERY_TASK_ROUTES = {
|
|||
# CRITICAL
|
||||
"apps.alerts.tasks.acknowledge_reminder.acknowledge_reminder_task": {"queue": "critical"},
|
||||
"apps.alerts.tasks.acknowledge_reminder.unacknowledge_timeout_task": {"queue": "critical"},
|
||||
"apps.alerts.tasks.declare_incident.declare_incident": {"queue": "critical"},
|
||||
"apps.alerts.tasks.distribute_alert.send_alert_create_signal": {"queue": "critical"},
|
||||
"apps.alerts.tasks.escalate_alert_group.escalate_alert_group": {"queue": "critical"},
|
||||
"apps.alerts.tasks.invite_user_to_join_incident.invite_user_to_join_incident": {"queue": "critical"},
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue