oncall-engine/engine/apps/alerts/models/alert.py
Yulya Artyukhina 3d4ce622cb
Add default service_name label for Alerting integrations (#5373)
# What this PR does
- The `service_name` label will be added to Grafana Alerting integration
when it is created, if it wasn't added by user.
- Adds celery task that should be started manually and will add the
`service_name` dynamic label to all existing Grafana Alerting
integrations.

## Which issue(s) this PR closes

Related to https://github.com/grafana/oncall-private/issues/2975

## Checklist

- [x] Unit, integration, and e2e (if applicable) tests updated
- [x] Documentation added (or `pr:no public docs` PR label added if not
required)
- [x] Added the relevant release notes label (see labels prefixed w/
`release:`). These labels dictate how your PR will
    show up in the autogenerated release notes.

---------

Co-authored-by: Innokentii Konstantinov <innokenty.konstantinov@grafana.com>
2025-01-14 10:02:23 +00:00

321 lines
13 KiB
Python

import hashlib
import logging
import typing
from functools import partial
from uuid import uuid4
from django.conf import settings
from django.core.validators import MinLengthValidator
from django.db import models, transaction
from django.db.models import JSONField
from apps.alerts import tasks
from apps.alerts.constants import TASK_DELAY_SECONDS
from apps.alerts.incident_appearance.templaters import TemplateLoader
from apps.alerts.signals import alert_group_escalation_snapshot_built
from apps.alerts.tasks.distribute_alert import send_alert_create_signal
from apps.labels.alert_group_labels import gather_alert_labels, save_alert_group_labels
from apps.labels.types import AlertLabels
from common.jinja_templater import apply_jinja_template_to_alert_payload_and_labels
from common.jinja_templater.apply_jinja_template import (
JinjaTemplateError,
JinjaTemplateWarning,
templated_value_is_truthy,
)
from common.public_primary_keys import generate_public_primary_key, increase_public_primary_key_length
if typing.TYPE_CHECKING:
from django.db.models.manager import RelatedManager
from apps.alerts.models import AlertGroup, AlertReceiveChannel, ChannelFilter
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
def generate_public_primary_key_for_alert():
prefix = "A"
new_public_primary_key = generate_public_primary_key(prefix)
failure_counter = 0
while Alert.objects.filter(public_primary_key=new_public_primary_key).exists():
new_public_primary_key = increase_public_primary_key_length(
failure_counter=failure_counter, prefix=prefix, model_name="Alert"
)
failure_counter += 1
return new_public_primary_key
class Alert(models.Model):
group: typing.Optional["AlertGroup"]
resolved_alert_groups: "RelatedManager['AlertGroup']"
public_primary_key = models.CharField(
max_length=20,
validators=[MinLengthValidator(settings.PUBLIC_PRIMARY_KEY_MIN_LENGTH + 1)],
unique=True,
default=generate_public_primary_key_for_alert,
)
is_resolve_signal = models.BooleanField(default=False)
is_the_first_alert_in_group = models.BooleanField(default=False)
message = models.TextField(max_length=3000, default=None, null=True)
image_url = models.URLField(default=None, null=True, max_length=300)
delivered = models.BooleanField(default=False)
title = models.TextField(max_length=1500, default=None, null=True)
created_at = models.DateTimeField(auto_now_add=True)
link_to_upstream_details = models.URLField(max_length=500, default=None, null=True)
integration_unique_data = JSONField(default=None, null=True)
raw_request_data = JSONField()
# This hash is for integration-specific needs
integration_optimization_hash = models.CharField(max_length=100, db_index=True, default=None, null=True)
group = models.ForeignKey(
"alerts.AlertGroup", on_delete=models.CASCADE, null=True, default=None, related_name="alerts"
)
RawRequestData: typing.TypeAlias = typing.Union[typing.Dict, typing.List]
def get_integration_optimization_hash(self):
"""
Should be overloaded in child classes.
"""
raise NotImplementedError
@classmethod
def create(
cls,
title: typing.Optional[str],
message: typing.Optional[str],
image_url: typing.Optional[str],
link_to_upstream_details: typing.Optional[str],
alert_receive_channel: "AlertReceiveChannel",
integration_unique_data: typing.Optional[typing.Dict],
raw_request_data: RawRequestData,
enable_autoresolve=True,
is_demo: bool = False,
channel_filter: typing.Optional["ChannelFilter"] = None,
received_at: typing.Optional[str] = None,
) -> "Alert":
"""
Creates an alert and a group if needed.
"""
# This import is here to avoid circular imports
from apps.alerts.models import AlertGroup, AlertGroupLogRecord, AlertReceiveChannel, ChannelFilter
alert_labels = gather_alert_labels(alert_receive_channel, raw_request_data)
group_data = Alert.render_group_data(alert_receive_channel, raw_request_data, alert_labels, is_demo)
if channel_filter is None:
channel_filter = ChannelFilter.select_filter(alert_receive_channel, raw_request_data, alert_labels)
# Get or create group
group, group_created = AlertGroup.objects.get_or_create_grouping(
channel=alert_receive_channel,
channel_filter=channel_filter,
group_data=group_data,
received_at=received_at,
)
logger.debug(f"alert group {group.pk} created={group_created}")
# Create alert
alert = cls(
is_resolve_signal=group_data.is_resolve_signal,
title=title,
message=message,
image_url=image_url,
link_to_upstream_details=link_to_upstream_details,
group=group,
integration_unique_data=integration_unique_data,
raw_request_data=raw_request_data,
is_the_first_alert_in_group=group_created,
)
alert.save()
logger.debug(f"alert {alert.pk} created for alert group {group.pk}")
transaction.on_commit(partial(send_alert_create_signal.apply_async, (alert.pk,)))
if group_created:
save_alert_group_labels(group, alert_receive_channel, alert_labels)
group.log_records.create(type=AlertGroupLogRecord.TYPE_REGISTERED)
group.log_records.create(type=AlertGroupLogRecord.TYPE_ROUTE_ASSIGNED)
if group_created or alert.group.pause_escalation:
# Build escalation snapshot if needed and start escalation
alert.group.start_escalation_if_needed(countdown=TASK_DELAY_SECONDS)
if group_created:
# TODO: consider moving to start_escalation_if_needed
alert_group_escalation_snapshot_built.send(sender=cls.__class__, alert_group=alert.group)
mark_as_acknowledged = group_data.is_acknowledge_signal
if not group.acknowledged and mark_as_acknowledged:
group.acknowledge_by_source()
mark_as_resolved = (
enable_autoresolve and group_data.is_resolve_signal and alert_receive_channel.allow_source_based_resolving
)
if not group.resolved and mark_as_resolved:
group.resolve_by_source()
if group_created:
# all code below related to maintenance mode
maintenance_uuid = None
if alert_receive_channel.maintenance_mode == AlertReceiveChannel.MAINTENANCE:
maintenance_uuid = alert_receive_channel.maintenance_uuid
if maintenance_uuid is not None:
try:
maintenance_incident = AlertGroup.objects.get(maintenance_uuid=maintenance_uuid)
group.root_alert_group = maintenance_incident
group.save(update_fields=["root_alert_group"])
log_record_for_root_incident = maintenance_incident.log_records.create(
type=AlertGroupLogRecord.TYPE_ATTACHED, dependent_alert_group=group, reason="Attach dropdown"
)
logger.debug(
f"call send_alert_group_signal for alert_group {maintenance_incident.pk} (maintenance), "
f"log record {log_record_for_root_incident.pk} with type "
f"'{log_record_for_root_incident.get_type_display()}'"
)
transaction.on_commit(partial(tasks.send_alert_group_signal.delay, log_record_for_root_incident.pk))
except AlertGroup.DoesNotExist:
pass
return alert
def wipe(self, wiped_by, wiped_at):
wiped_by_user_verbal = "by " + wiped_by.username
self.integration_unique_data = {}
self.raw_request_data = {}
self.title = f"Wiped {wiped_by_user_verbal} at {wiped_at.strftime('%Y-%m-%d')}"
self.message = ""
self.image_url = None
self.link_to_upstream_details = None
self.save(
update_fields=[
"integration_unique_data",
"raw_request_data",
"title",
"message",
"image_url",
"link_to_upstream_details",
]
)
@classmethod
def _apply_jinja_template_to_alert_payload_and_labels(
cls,
template: str,
template_name: str,
alert_receive_channel: "AlertReceiveChannel",
raw_request_data: RawRequestData,
labels: typing.Optional[AlertLabels],
use_error_msg_as_fallback=False,
check_if_templated_value_is_truthy=False,
) -> typing.Union[str, None, bool]:
try:
templated_value = apply_jinja_template_to_alert_payload_and_labels(template, raw_request_data, labels)
return templated_value_is_truthy(templated_value) if check_if_templated_value_is_truthy else templated_value
except (JinjaTemplateError, JinjaTemplateWarning) as e:
fallback_msg = e.fallback_message
logger.warning(
f"{template_name} error on channel={alert_receive_channel.public_primary_key}: {fallback_msg}"
)
if use_error_msg_as_fallback:
return fallback_msg
elif check_if_templated_value_is_truthy:
return False
return None
@classmethod
def render_group_data(
cls,
alert_receive_channel: "AlertReceiveChannel",
raw_request_data: RawRequestData,
labels: typing.Optional[AlertLabels],
is_demo=False,
) -> "AlertGroup.GroupData":
from apps.alerts.models import AlertGroup
template_manager = TemplateLoader()
is_resolve_signal = False
is_acknowledge_signal = False
group_distinction: typing.Optional[str] = None
web_title_cache: typing.Optional[str] = None
# set web_title_cache to web title to allow alert group searching based on web_title_cache
if (
web_title_template := template_manager.get_attr_template("title", alert_receive_channel, render_for="web")
) is not None:
web_title_cache = cls._apply_jinja_template_to_alert_payload_and_labels(
web_title_template,
"web_title_cache",
alert_receive_channel,
raw_request_data,
labels,
use_error_msg_as_fallback=True,
)
if (
grouping_id_template := template_manager.get_attr_template("grouping_id", alert_receive_channel)
) is not None:
group_distinction = cls._apply_jinja_template_to_alert_payload_and_labels(
grouping_id_template, "grouping_id_template", alert_receive_channel, raw_request_data, labels
)
# Insert random uuid to prevent grouping of demo alerts or alerts with group_distinction=None
if is_demo or not group_distinction:
group_distinction = cls.insert_random_uuid(group_distinction)
if group_distinction is not None:
group_distinction = hashlib.md5(str(group_distinction).encode()).hexdigest()
if (
resolve_condition_template := template_manager.get_attr_template("resolve_condition", alert_receive_channel)
) is not None:
is_resolve_signal = cls._apply_jinja_template_to_alert_payload_and_labels(
resolve_condition_template,
"resolve_condition_template",
alert_receive_channel,
raw_request_data,
labels,
check_if_templated_value_is_truthy=True,
)
if (
acknowledge_condition_template := template_manager.get_attr_template(
"acknowledge_condition", alert_receive_channel
)
) is not None:
is_acknowledge_signal = cls._apply_jinja_template_to_alert_payload_and_labels(
acknowledge_condition_template,
"acknowledge_condition_template",
alert_receive_channel,
raw_request_data,
labels,
check_if_templated_value_is_truthy=True,
)
return AlertGroup.GroupData(
is_resolve_signal=is_resolve_signal,
is_acknowledge_signal=is_acknowledge_signal,
group_distinction=group_distinction,
web_title_cache=web_title_cache,
)
@staticmethod
def insert_random_uuid(distinction: typing.Optional[str]) -> str:
if distinction is not None:
distinction += str(uuid4())
else:
distinction = str(uuid4())
return distinction