Alert group search (#488)

* use web title template to render alert group verbose name

* remove group_verbose_name from tests

* clean up group_verbose_name

* remove verbose_name from API & plugin

* verbose_name migration

* update verbose name on web title template change

* use long queue for updating verbose name

* use first alert for updating verbose name

* improve batch_ids

* fix update_verbose_name

* post-review fixes

* post-review fixes
This commit is contained in:
Vadim Stepanov 2022-09-06 11:30:12 +01:00 committed by GitHub
parent 315a356fda
commit afe13550da
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 138 additions and 49 deletions

View file

@ -23,7 +23,7 @@ class AlertAdmin(CustomModelAdmin):
@admin.register(AlertGroup)
class AlertGroupAdmin(CustomModelAdmin):
list_display = ("id", "public_primary_key", "verbose_name", "channel", "channel_filter", "state", "started_at")
list_display = ("id", "public_primary_key", "web_title_cache", "channel", "channel_filter", "state", "started_at")
list_filter = ("started_at",)
def get_queryset(self, request):

View file

@ -69,7 +69,6 @@ class IntegrationOptionsMixin:
"grouping_id",
"resolve_condition",
"acknowledge_condition",
"group_verbose_name",
"source_link",
]

View file

@ -0,0 +1,28 @@
# Generated by Django 3.2.15 on 2022-09-01 16:54
from django.db import migrations
from apps.alerts.models import AlertReceiveChannel
from apps.alerts.tasks import update_web_title_cache_for_alert_receive_channel
def populate_web_title_cache(apps, _):
pks = AlertReceiveChannel.objects_with_deleted.values_list("pk", flat=True)
for pk in pks:
update_web_title_cache_for_alert_receive_channel.delay(pk)
class Migration(migrations.Migration):
dependencies = [
('alerts', '0006_alertgroup_alerts_aler_channel_ee84a7_idx'),
]
operations = [
migrations.RenameField(
model_name='alertgroup',
old_name='verbose_name',
new_name='web_title_cache',
),
migrations.RunPython(populate_web_title_cache, migrations.RunPython.noop),
]

View file

@ -179,19 +179,19 @@ class Alert(models.Model):
is_resolve_signal = False
is_acknowledge_signal = False
group_distinction = None
group_verbose_name = "Incident"
acknowledge_condition_template = template_manager.get_attr_template(
"acknowledge_condition", alert_receive_channel
)
resolve_condition_template = template_manager.get_attr_template("resolve_condition", alert_receive_channel)
grouping_id_template = template_manager.get_attr_template("grouping_id", alert_receive_channel)
# use get_default_attr_template because there is no ability to customize group_verbose_name, only default value
group_verbose_name_template = template_manager.get_default_attr_template(
"group_verbose_name", alert_receive_channel
)
if group_verbose_name_template is not None:
group_verbose_name, _ = apply_jinja_template(group_verbose_name_template, raw_request_data)
# set web_title_cache to web title to allow alert group searching based on web_title_cache
web_title_template = template_manager.get_attr_template("title", alert_receive_channel, render_for="web")
if web_title_template:
web_title_cache = apply_jinja_template(web_title_template, raw_request_data)[0] or None
else:
web_title_cache = None
if grouping_id_template is not None:
group_distinction, _ = apply_jinja_template(grouping_id_template, raw_request_data)
@ -220,7 +220,7 @@ class Alert(models.Model):
is_resolve_signal=is_resolve_signal,
is_acknowledge_signal=is_acknowledge_signal,
group_distinction=group_distinction,
group_verbose_name=group_verbose_name,
web_title_cache=web_title_cache,
)
@staticmethod

View file

@ -82,7 +82,7 @@ class AlertGroupQuerySet(models.QuerySet):
# Create a new group if we couldn't group it to any existing ones
try:
return (
self.create(**search_params, is_open_for_grouping=True, verbose_name=group_data.group_verbose_name),
self.create(**search_params, is_open_for_grouping=True, web_title_cache=group_data.web_title_cache),
True,
)
except IntegrityError:
@ -134,7 +134,7 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
STATUS_CHOICES = ((NEW, "New"), (ACKNOWLEDGED, "Acknowledged"), (RESOLVED, "Resolved"), (SILENCED, "Silenced"))
GroupData = namedtuple(
"GroupData", ["is_resolve_signal", "group_distinction", "group_verbose_name", "is_acknowledge_signal"]
"GroupData", ["is_resolve_signal", "group_distinction", "web_title_cache", "is_acknowledge_signal"]
)
SOURCE, USER, NOT_YET, LAST_STEP, ARCHIVED, WIPED, DISABLE_MAINTENANCE = range(7)
@ -177,7 +177,7 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
# For example different types of alerts from the same channel should go to different groups.
# Distinction is what describes their difference.
distinction = models.CharField(max_length=100, null=True, default=None, db_index=True)
verbose_name = models.TextField(null=True, default=None)
web_title_cache = models.TextField(null=True, default=None)
inside_organization_number = models.IntegerField(default=0)
@ -357,7 +357,7 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
]
def __str__(self):
return f"{self.pk}: {self.verbose_name}"
return f"{self.pk}: {self.web_title_cache}"
@property
def is_maintenance_incident(self):
@ -899,13 +899,13 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
self.resolve(resolved_by=AlertGroup.WIPED)
self.stop_escalation()
self.distinction = ""
self.verbose_name = "Wiped incident"
self.web_title_cache = None
self.wiped_at = timezone.now()
self.wiped_by = user
for alert in self.alerts.all():
alert.wipe(wiped_by=self.wiped_by, wiped_at=self.wiped_at)
self.save(update_fields=["distinction", "verbose_name", "wiped_at", "wiped_by"])
self.save(update_fields=["distinction", "web_title_cache", "wiped_at", "wiped_by"])
log_record = self.log_records.create(
type=AlertGroupLogRecord.TYPE_WIPED,

View file

@ -131,7 +131,7 @@ class MaintainableObject(models.Model):
if mode == AlertReceiveChannel.MAINTENANCE:
group = AlertGroup.all_objects.create(
distinction=uuid4(),
verbose_name=f"Maintenance of {verbal} for {maintenance_duration}",
web_title_cache=f"Maintenance of {verbal} for {maintenance_duration}",
maintenance_uuid=maintenance_uuid,
channel_filter_id=maintenance_integration.default_channel_filter.pk,
channel=maintenance_integration,

View file

@ -1,4 +1,8 @@
from .acknowledge_reminder import acknowledge_reminder_task # noqa: F401
from .alert_group_web_title_cache import ( # noqa:F401
update_web_title_cache,
update_web_title_cache_for_alert_receive_channel,
)
from .calculcate_escalation_finish_time import calculate_escalation_finish_time # noqa
from .call_ack_url import call_ack_url # noqa: F401
from .check_escalation_finished import check_escalation_finished_task # noqa: F401

View file

@ -0,0 +1,87 @@
from django.db.models import Min
from apps.alerts.incident_appearance.templaters import TemplateLoader
from apps.alerts.tasks.task_logger import task_logger
from common.custom_celery_tasks import shared_dedicated_queue_retry_task
from common.jinja_templater import apply_jinja_template
# BATCH_SIZE is how many alert groups will be processed per second (for every individual alert receive channel)
BATCH_SIZE = 1000
def batch_ids(queryset, cursor):
return list(queryset.filter(id__gt=cursor).order_by("id").values_list("id", flat=True)[:BATCH_SIZE])
@shared_dedicated_queue_retry_task
def update_web_title_cache_for_alert_receive_channel(alert_receive_channel_pk):
"""
Update the web_title_cache field for all alert groups of alert receive channel with pk = alert_receive_channel_pk.
Note that it's not invoked on web title template change due to performance considerations.
"""
task_logger.debug(
f"Starting update_web_title_cache_for_alert_receive_channel, alert_receive_channel_pk: {alert_receive_channel_pk}"
)
from apps.alerts.models import AlertGroup
countdown = 0
cursor = 0
queryset = AlertGroup.all_objects.filter(channel_id=alert_receive_channel_pk)
ids = batch_ids(queryset, cursor)
while ids:
update_web_title_cache.apply_async((alert_receive_channel_pk, ids), countdown=countdown)
cursor = ids[-1]
ids = batch_ids(queryset, cursor)
countdown += 1
@shared_dedicated_queue_retry_task
def update_web_title_cache(alert_receive_channel_pk, alert_group_pks):
"""
Update the web_title_cache field for alert groups with pk in alert_group_pks,
for alert receive channel with pk = alert_receive_channel_pk.
"""
task_logger.debug(
f"Starting update_web_title_cache, alert_receive_channel_pk: {alert_receive_channel_pk}, "
f"first alert_group_pk: {alert_group_pks[0]}, last alert_group_pk: {alert_group_pks[-1]}"
)
from apps.alerts.models import Alert, AlertGroup, AlertReceiveChannel
try:
alert_receive_channel = AlertReceiveChannel.objects_with_deleted.get(pk=alert_receive_channel_pk)
except AlertReceiveChannel.DoesNotExist:
task_logger.warning(f"AlertReceiveChannel {alert_receive_channel_pk} doesn't exist")
return
alert_groups = AlertGroup.all_objects.filter(pk__in=alert_group_pks).only("pk")
# get first alerts in 2 SQL queries
alerts_info = (
Alert.objects.values("group_id").filter(group_id__in=alert_group_pks).annotate(first_alert_id=Min("id"))
)
alerts_info_map = {info["group_id"]: info for info in alerts_info}
first_alert_ids = [info["first_alert_id"] for info in alerts_info_map.values()]
first_alerts = Alert.objects.filter(pk__in=first_alert_ids).values("group_id", "raw_request_data")
first_alert_map = {alert["group_id"]: alert for alert in first_alerts}
template_manager = TemplateLoader()
web_title_template = template_manager.get_attr_template("title", alert_receive_channel, render_for="web")
for alert_group in alert_groups:
if web_title_template:
if alert_group.pk in first_alert_map:
raw_request_data = first_alert_map[alert_group.pk]["raw_request_data"]
web_title_cache = apply_jinja_template(web_title_template, raw_request_data)[0] or None
else:
web_title_cache = None
else:
web_title_cache = None
alert_group.web_title_cache = web_title_cache
AlertGroup.all_objects.bulk_update(alert_groups, ["web_title_cache"])

View file

@ -92,7 +92,6 @@ def test_render_group_data_templates(
assert group_data.group_distinction == template_module.tests.get("group_distinction")
assert group_data.is_resolve_signal == template_module.tests.get("is_resolve_signal")
assert group_data.is_acknowledge_signal == template_module.tests.get("is_acknowledge_signal")
assert group_data.group_verbose_name == template_module.tests.get("group_verbose_name")
def test_default_templates_are_valid():

View file

@ -61,7 +61,6 @@ class AlertGroupListSerializer(EagerLoadingMixin, serializers.ModelSerializer):
"pk",
"alerts_count",
"inside_organization_number",
"verbose_name",
"alert_receive_channel",
"resolved",
"resolved_by",

View file

@ -191,8 +191,7 @@ class AlertGroupView(
pagination_class = TwentyFiveCursorPaginator
filter_backends = [SearchFilter, filters.DjangoFilterBackend]
# todo: add ability to search by templated title
search_fields = ["public_primary_key", "inside_organization_number"]
search_fields = ["public_primary_key", "inside_organization_number", "web_title_cache"]
filterset_class = AlertGroupFilter

View file

@ -116,8 +116,6 @@ resolve_condition = """\
acknowledge_condition = None
group_verbose_name = "Incident"
tests = {
"payload": {
"endsAt": "0001-01-01T00:00:00Z",

View file

@ -61,6 +61,4 @@ resolve_condition = """\
acknowledge_condition = None
group_verbose_name = "Incident"
example_payload = {"message": "This alert was sent by user for the demonstration purposes"}

View file

@ -50,8 +50,6 @@ resolve_condition = '{{ payload.get("state", "").upper() == "OK" }}'
acknowledge_condition = None
group_verbose_name = web_title
example_payload = {
"alert_uid": "08d6891a-835c-e661-39fa-96b6a9e26552",
"title": "TestAlert: The whole system is down",

View file

@ -143,10 +143,6 @@ resolve_condition = """\
acknowledge_condition = None
group_verbose_name = """\
{{ payload.get("ruleName", "Incident") }}
"""
tests = {
"payload": {
"endsAt": "0001-01-01T00:00:00Z",
@ -257,7 +253,6 @@ tests = {
"group_distinction": "c6bf5494a2d3052459b4dac837e41455",
"is_resolve_signal": False,
"is_acknowledge_signal": False,
"group_verbose_name": "Incident",
}
# Miscellaneous

View file

@ -120,8 +120,6 @@ resolve_condition = """\
acknowledge_condition = None
group_verbose_name = "Incident"
tests = {
"payload": {
"endsAt": "0001-01-01T00:00:00Z",

View file

@ -26,6 +26,4 @@ resolve_condition = '{{ payload.get("is_resolve", False) == True }}'
acknowledge_condition = None
group_verbose_name = '{{ payload.get("title", "Title") }}'
example_payload = {"foo": "bar"}

View file

@ -49,5 +49,3 @@ grouping_id = '{{ payload.get("title", "")}}'
resolve_condition = '{{ payload.get("state", "").upper() == "OK" }}'
acknowledge_condition = None
group_verbose_name = web_title

View file

@ -56,8 +56,6 @@ resolve_condition = '{{ payload.get("level", "").startswith("OK") }}'
acknowledge_condition = None
group_verbose_name = '{{ payload.get("id", "") }}'
example_payload = {
"id": "TestAlert",
"message": "This alert was sent by user for the demonstration purposes",

View file

@ -49,5 +49,3 @@ grouping_id = None
resolve_condition = None
acknowledge_condition = None
group_verbose_name = "Incident"

View file

@ -58,5 +58,3 @@ grouping_id = """{{ payload }}"""
resolve_condition = None
acknowledge_condition = None
group_verbose_name = web_title

View file

@ -39,6 +39,4 @@ resolve_condition = None
acknowledge_condition = None
group_verbose_name = '<#{{ payload.get("channel", "") }}>'
source_link = '{{ payload.get("amixr_mixin", {}).get("permalink", "")}}'

View file

@ -60,6 +60,4 @@ resolve_condition = """\
{%- endif %}"""
acknowledge_condition = None
group_verbose_name = web_title
example_payload = {"message": "This alert was sent by user for the demonstration purposes"}

View file

@ -139,6 +139,8 @@ CELERY_TASK_ROUTES = {
"apps.schedules.tasks.drop_cached_ical.drop_cached_ical_for_custom_events_for_organization": {"queue": "critical"},
"apps.schedules.tasks.drop_cached_ical.drop_cached_ical_task": {"queue": "critical"},
# LONG
"apps.alerts.tasks.alert_group_web_title_cache.update_web_title_cache_for_alert_receive_channel": {"queue": "long"},
"apps.alerts.tasks.alert_group_web_title_cache.update_web_title_cache": {"queue": "long"},
"apps.alerts.tasks.check_escalation_finished.check_escalation_finished_task": {"queue": "long"},
"apps.grafana_plugin.tasks.sync.start_sync_organizations": {"queue": "long"},
"apps.grafana_plugin.tasks.sync.sync_organization_async": {"queue": "long"},

View file

@ -72,7 +72,6 @@ export interface Alert {
silenced_until: string;
started_at: string;
last_alert_at: string;
verbose_name: string;
dependent_alert_groups: Alert[];
status: IncidentStatus;
short?: boolean;