Alert group search (#488)
* use web title template to render alert group verbose name * remove group_verbose_name from tests * clean up group_verbose_name * remove verbose_name from API & plugin * verbose_name migration * update verbose name on web title template change * use long queue for updating verbose name * use first alert for updating verbose name * improve batch_ids * fix update_verbose_name * post-review fixes * post-review fixes
This commit is contained in:
parent
315a356fda
commit
afe13550da
25 changed files with 138 additions and 49 deletions
|
|
@ -23,7 +23,7 @@ class AlertAdmin(CustomModelAdmin):
|
|||
|
||||
@admin.register(AlertGroup)
|
||||
class AlertGroupAdmin(CustomModelAdmin):
|
||||
list_display = ("id", "public_primary_key", "verbose_name", "channel", "channel_filter", "state", "started_at")
|
||||
list_display = ("id", "public_primary_key", "web_title_cache", "channel", "channel_filter", "state", "started_at")
|
||||
list_filter = ("started_at",)
|
||||
|
||||
def get_queryset(self, request):
|
||||
|
|
|
|||
|
|
@ -69,7 +69,6 @@ class IntegrationOptionsMixin:
|
|||
"grouping_id",
|
||||
"resolve_condition",
|
||||
"acknowledge_condition",
|
||||
"group_verbose_name",
|
||||
"source_link",
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,28 @@
|
|||
# Generated by Django 3.2.15 on 2022-09-01 16:54
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
from apps.alerts.models import AlertReceiveChannel
|
||||
from apps.alerts.tasks import update_web_title_cache_for_alert_receive_channel
|
||||
|
||||
|
||||
def populate_web_title_cache(apps, _):
|
||||
pks = AlertReceiveChannel.objects_with_deleted.values_list("pk", flat=True)
|
||||
for pk in pks:
|
||||
update_web_title_cache_for_alert_receive_channel.delay(pk)
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('alerts', '0006_alertgroup_alerts_aler_channel_ee84a7_idx'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RenameField(
|
||||
model_name='alertgroup',
|
||||
old_name='verbose_name',
|
||||
new_name='web_title_cache',
|
||||
),
|
||||
migrations.RunPython(populate_web_title_cache, migrations.RunPython.noop),
|
||||
]
|
||||
|
|
@ -179,19 +179,19 @@ class Alert(models.Model):
|
|||
is_resolve_signal = False
|
||||
is_acknowledge_signal = False
|
||||
group_distinction = None
|
||||
group_verbose_name = "Incident"
|
||||
|
||||
acknowledge_condition_template = template_manager.get_attr_template(
|
||||
"acknowledge_condition", alert_receive_channel
|
||||
)
|
||||
resolve_condition_template = template_manager.get_attr_template("resolve_condition", alert_receive_channel)
|
||||
grouping_id_template = template_manager.get_attr_template("grouping_id", alert_receive_channel)
|
||||
# use get_default_attr_template because there is no ability to customize group_verbose_name, only default value
|
||||
group_verbose_name_template = template_manager.get_default_attr_template(
|
||||
"group_verbose_name", alert_receive_channel
|
||||
)
|
||||
if group_verbose_name_template is not None:
|
||||
group_verbose_name, _ = apply_jinja_template(group_verbose_name_template, raw_request_data)
|
||||
|
||||
# set web_title_cache to web title to allow alert group searching based on web_title_cache
|
||||
web_title_template = template_manager.get_attr_template("title", alert_receive_channel, render_for="web")
|
||||
if web_title_template:
|
||||
web_title_cache = apply_jinja_template(web_title_template, raw_request_data)[0] or None
|
||||
else:
|
||||
web_title_cache = None
|
||||
|
||||
if grouping_id_template is not None:
|
||||
group_distinction, _ = apply_jinja_template(grouping_id_template, raw_request_data)
|
||||
|
|
@ -220,7 +220,7 @@ class Alert(models.Model):
|
|||
is_resolve_signal=is_resolve_signal,
|
||||
is_acknowledge_signal=is_acknowledge_signal,
|
||||
group_distinction=group_distinction,
|
||||
group_verbose_name=group_verbose_name,
|
||||
web_title_cache=web_title_cache,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ class AlertGroupQuerySet(models.QuerySet):
|
|||
# Create a new group if we couldn't group it to any existing ones
|
||||
try:
|
||||
return (
|
||||
self.create(**search_params, is_open_for_grouping=True, verbose_name=group_data.group_verbose_name),
|
||||
self.create(**search_params, is_open_for_grouping=True, web_title_cache=group_data.web_title_cache),
|
||||
True,
|
||||
)
|
||||
except IntegrityError:
|
||||
|
|
@ -134,7 +134,7 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
|
|||
STATUS_CHOICES = ((NEW, "New"), (ACKNOWLEDGED, "Acknowledged"), (RESOLVED, "Resolved"), (SILENCED, "Silenced"))
|
||||
|
||||
GroupData = namedtuple(
|
||||
"GroupData", ["is_resolve_signal", "group_distinction", "group_verbose_name", "is_acknowledge_signal"]
|
||||
"GroupData", ["is_resolve_signal", "group_distinction", "web_title_cache", "is_acknowledge_signal"]
|
||||
)
|
||||
|
||||
SOURCE, USER, NOT_YET, LAST_STEP, ARCHIVED, WIPED, DISABLE_MAINTENANCE = range(7)
|
||||
|
|
@ -177,7 +177,7 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
|
|||
# For example different types of alerts from the same channel should go to different groups.
|
||||
# Distinction is what describes their difference.
|
||||
distinction = models.CharField(max_length=100, null=True, default=None, db_index=True)
|
||||
verbose_name = models.TextField(null=True, default=None)
|
||||
web_title_cache = models.TextField(null=True, default=None)
|
||||
|
||||
inside_organization_number = models.IntegerField(default=0)
|
||||
|
||||
|
|
@ -357,7 +357,7 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
|
|||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.pk}: {self.verbose_name}"
|
||||
return f"{self.pk}: {self.web_title_cache}"
|
||||
|
||||
@property
|
||||
def is_maintenance_incident(self):
|
||||
|
|
@ -899,13 +899,13 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
|
|||
self.resolve(resolved_by=AlertGroup.WIPED)
|
||||
self.stop_escalation()
|
||||
self.distinction = ""
|
||||
self.verbose_name = "Wiped incident"
|
||||
self.web_title_cache = None
|
||||
self.wiped_at = timezone.now()
|
||||
self.wiped_by = user
|
||||
for alert in self.alerts.all():
|
||||
alert.wipe(wiped_by=self.wiped_by, wiped_at=self.wiped_at)
|
||||
|
||||
self.save(update_fields=["distinction", "verbose_name", "wiped_at", "wiped_by"])
|
||||
self.save(update_fields=["distinction", "web_title_cache", "wiped_at", "wiped_by"])
|
||||
|
||||
log_record = self.log_records.create(
|
||||
type=AlertGroupLogRecord.TYPE_WIPED,
|
||||
|
|
|
|||
|
|
@ -131,7 +131,7 @@ class MaintainableObject(models.Model):
|
|||
if mode == AlertReceiveChannel.MAINTENANCE:
|
||||
group = AlertGroup.all_objects.create(
|
||||
distinction=uuid4(),
|
||||
verbose_name=f"Maintenance of {verbal} for {maintenance_duration}",
|
||||
web_title_cache=f"Maintenance of {verbal} for {maintenance_duration}",
|
||||
maintenance_uuid=maintenance_uuid,
|
||||
channel_filter_id=maintenance_integration.default_channel_filter.pk,
|
||||
channel=maintenance_integration,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,8 @@
|
|||
from .acknowledge_reminder import acknowledge_reminder_task # noqa: F401
|
||||
from .alert_group_web_title_cache import ( # noqa:F401
|
||||
update_web_title_cache,
|
||||
update_web_title_cache_for_alert_receive_channel,
|
||||
)
|
||||
from .calculcate_escalation_finish_time import calculate_escalation_finish_time # noqa
|
||||
from .call_ack_url import call_ack_url # noqa: F401
|
||||
from .check_escalation_finished import check_escalation_finished_task # noqa: F401
|
||||
|
|
|
|||
87
engine/apps/alerts/tasks/alert_group_web_title_cache.py
Normal file
87
engine/apps/alerts/tasks/alert_group_web_title_cache.py
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
from django.db.models import Min
|
||||
|
||||
from apps.alerts.incident_appearance.templaters import TemplateLoader
|
||||
from apps.alerts.tasks.task_logger import task_logger
|
||||
from common.custom_celery_tasks import shared_dedicated_queue_retry_task
|
||||
from common.jinja_templater import apply_jinja_template
|
||||
|
||||
# BATCH_SIZE is how many alert groups will be processed per second (for every individual alert receive channel)
|
||||
BATCH_SIZE = 1000
|
||||
|
||||
|
||||
def batch_ids(queryset, cursor):
|
||||
return list(queryset.filter(id__gt=cursor).order_by("id").values_list("id", flat=True)[:BATCH_SIZE])
|
||||
|
||||
|
||||
@shared_dedicated_queue_retry_task
|
||||
def update_web_title_cache_for_alert_receive_channel(alert_receive_channel_pk):
|
||||
"""
|
||||
Update the web_title_cache field for all alert groups of alert receive channel with pk = alert_receive_channel_pk.
|
||||
Note that it's not invoked on web title template change due to performance considerations.
|
||||
"""
|
||||
task_logger.debug(
|
||||
f"Starting update_web_title_cache_for_alert_receive_channel, alert_receive_channel_pk: {alert_receive_channel_pk}"
|
||||
)
|
||||
|
||||
from apps.alerts.models import AlertGroup
|
||||
|
||||
countdown = 0
|
||||
cursor = 0
|
||||
queryset = AlertGroup.all_objects.filter(channel_id=alert_receive_channel_pk)
|
||||
ids = batch_ids(queryset, cursor)
|
||||
|
||||
while ids:
|
||||
update_web_title_cache.apply_async((alert_receive_channel_pk, ids), countdown=countdown)
|
||||
|
||||
cursor = ids[-1]
|
||||
ids = batch_ids(queryset, cursor)
|
||||
countdown += 1
|
||||
|
||||
|
||||
@shared_dedicated_queue_retry_task
|
||||
def update_web_title_cache(alert_receive_channel_pk, alert_group_pks):
|
||||
"""
|
||||
Update the web_title_cache field for alert groups with pk in alert_group_pks,
|
||||
for alert receive channel with pk = alert_receive_channel_pk.
|
||||
"""
|
||||
task_logger.debug(
|
||||
f"Starting update_web_title_cache, alert_receive_channel_pk: {alert_receive_channel_pk}, "
|
||||
f"first alert_group_pk: {alert_group_pks[0]}, last alert_group_pk: {alert_group_pks[-1]}"
|
||||
)
|
||||
|
||||
from apps.alerts.models import Alert, AlertGroup, AlertReceiveChannel
|
||||
|
||||
try:
|
||||
alert_receive_channel = AlertReceiveChannel.objects_with_deleted.get(pk=alert_receive_channel_pk)
|
||||
except AlertReceiveChannel.DoesNotExist:
|
||||
task_logger.warning(f"AlertReceiveChannel {alert_receive_channel_pk} doesn't exist")
|
||||
return
|
||||
|
||||
alert_groups = AlertGroup.all_objects.filter(pk__in=alert_group_pks).only("pk")
|
||||
|
||||
# get first alerts in 2 SQL queries
|
||||
alerts_info = (
|
||||
Alert.objects.values("group_id").filter(group_id__in=alert_group_pks).annotate(first_alert_id=Min("id"))
|
||||
)
|
||||
alerts_info_map = {info["group_id"]: info for info in alerts_info}
|
||||
|
||||
first_alert_ids = [info["first_alert_id"] for info in alerts_info_map.values()]
|
||||
first_alerts = Alert.objects.filter(pk__in=first_alert_ids).values("group_id", "raw_request_data")
|
||||
first_alert_map = {alert["group_id"]: alert for alert in first_alerts}
|
||||
|
||||
template_manager = TemplateLoader()
|
||||
web_title_template = template_manager.get_attr_template("title", alert_receive_channel, render_for="web")
|
||||
|
||||
for alert_group in alert_groups:
|
||||
if web_title_template:
|
||||
if alert_group.pk in first_alert_map:
|
||||
raw_request_data = first_alert_map[alert_group.pk]["raw_request_data"]
|
||||
web_title_cache = apply_jinja_template(web_title_template, raw_request_data)[0] or None
|
||||
else:
|
||||
web_title_cache = None
|
||||
else:
|
||||
web_title_cache = None
|
||||
|
||||
alert_group.web_title_cache = web_title_cache
|
||||
|
||||
AlertGroup.all_objects.bulk_update(alert_groups, ["web_title_cache"])
|
||||
|
|
@ -92,7 +92,6 @@ def test_render_group_data_templates(
|
|||
assert group_data.group_distinction == template_module.tests.get("group_distinction")
|
||||
assert group_data.is_resolve_signal == template_module.tests.get("is_resolve_signal")
|
||||
assert group_data.is_acknowledge_signal == template_module.tests.get("is_acknowledge_signal")
|
||||
assert group_data.group_verbose_name == template_module.tests.get("group_verbose_name")
|
||||
|
||||
|
||||
def test_default_templates_are_valid():
|
||||
|
|
|
|||
|
|
@ -61,7 +61,6 @@ class AlertGroupListSerializer(EagerLoadingMixin, serializers.ModelSerializer):
|
|||
"pk",
|
||||
"alerts_count",
|
||||
"inside_organization_number",
|
||||
"verbose_name",
|
||||
"alert_receive_channel",
|
||||
"resolved",
|
||||
"resolved_by",
|
||||
|
|
|
|||
|
|
@ -191,8 +191,7 @@ class AlertGroupView(
|
|||
pagination_class = TwentyFiveCursorPaginator
|
||||
|
||||
filter_backends = [SearchFilter, filters.DjangoFilterBackend]
|
||||
# todo: add ability to search by templated title
|
||||
search_fields = ["public_primary_key", "inside_organization_number"]
|
||||
search_fields = ["public_primary_key", "inside_organization_number", "web_title_cache"]
|
||||
|
||||
filterset_class = AlertGroupFilter
|
||||
|
||||
|
|
|
|||
|
|
@ -116,8 +116,6 @@ resolve_condition = """\
|
|||
|
||||
acknowledge_condition = None
|
||||
|
||||
group_verbose_name = "Incident"
|
||||
|
||||
tests = {
|
||||
"payload": {
|
||||
"endsAt": "0001-01-01T00:00:00Z",
|
||||
|
|
|
|||
|
|
@ -61,6 +61,4 @@ resolve_condition = """\
|
|||
|
||||
acknowledge_condition = None
|
||||
|
||||
group_verbose_name = "Incident"
|
||||
|
||||
example_payload = {"message": "This alert was sent by user for the demonstration purposes"}
|
||||
|
|
|
|||
|
|
@ -50,8 +50,6 @@ resolve_condition = '{{ payload.get("state", "").upper() == "OK" }}'
|
|||
|
||||
acknowledge_condition = None
|
||||
|
||||
group_verbose_name = web_title
|
||||
|
||||
example_payload = {
|
||||
"alert_uid": "08d6891a-835c-e661-39fa-96b6a9e26552",
|
||||
"title": "TestAlert: The whole system is down",
|
||||
|
|
|
|||
|
|
@ -143,10 +143,6 @@ resolve_condition = """\
|
|||
|
||||
acknowledge_condition = None
|
||||
|
||||
group_verbose_name = """\
|
||||
{{ payload.get("ruleName", "Incident") }}
|
||||
"""
|
||||
|
||||
tests = {
|
||||
"payload": {
|
||||
"endsAt": "0001-01-01T00:00:00Z",
|
||||
|
|
@ -257,7 +253,6 @@ tests = {
|
|||
"group_distinction": "c6bf5494a2d3052459b4dac837e41455",
|
||||
"is_resolve_signal": False,
|
||||
"is_acknowledge_signal": False,
|
||||
"group_verbose_name": "Incident",
|
||||
}
|
||||
|
||||
# Miscellaneous
|
||||
|
|
|
|||
|
|
@ -120,8 +120,6 @@ resolve_condition = """\
|
|||
|
||||
acknowledge_condition = None
|
||||
|
||||
group_verbose_name = "Incident"
|
||||
|
||||
tests = {
|
||||
"payload": {
|
||||
"endsAt": "0001-01-01T00:00:00Z",
|
||||
|
|
|
|||
|
|
@ -26,6 +26,4 @@ resolve_condition = '{{ payload.get("is_resolve", False) == True }}'
|
|||
|
||||
acknowledge_condition = None
|
||||
|
||||
group_verbose_name = '{{ payload.get("title", "Title") }}'
|
||||
|
||||
example_payload = {"foo": "bar"}
|
||||
|
|
|
|||
|
|
@ -49,5 +49,3 @@ grouping_id = '{{ payload.get("title", "")}}'
|
|||
resolve_condition = '{{ payload.get("state", "").upper() == "OK" }}'
|
||||
|
||||
acknowledge_condition = None
|
||||
|
||||
group_verbose_name = web_title
|
||||
|
|
|
|||
|
|
@ -56,8 +56,6 @@ resolve_condition = '{{ payload.get("level", "").startswith("OK") }}'
|
|||
|
||||
acknowledge_condition = None
|
||||
|
||||
group_verbose_name = '{{ payload.get("id", "") }}'
|
||||
|
||||
example_payload = {
|
||||
"id": "TestAlert",
|
||||
"message": "This alert was sent by user for the demonstration purposes",
|
||||
|
|
|
|||
|
|
@ -49,5 +49,3 @@ grouping_id = None
|
|||
resolve_condition = None
|
||||
|
||||
acknowledge_condition = None
|
||||
|
||||
group_verbose_name = "Incident"
|
||||
|
|
|
|||
|
|
@ -58,5 +58,3 @@ grouping_id = """{{ payload }}"""
|
|||
resolve_condition = None
|
||||
|
||||
acknowledge_condition = None
|
||||
|
||||
group_verbose_name = web_title
|
||||
|
|
|
|||
|
|
@ -39,6 +39,4 @@ resolve_condition = None
|
|||
|
||||
acknowledge_condition = None
|
||||
|
||||
group_verbose_name = '<#{{ payload.get("channel", "") }}>'
|
||||
|
||||
source_link = '{{ payload.get("amixr_mixin", {}).get("permalink", "")}}'
|
||||
|
|
|
|||
|
|
@ -60,6 +60,4 @@ resolve_condition = """\
|
|||
{%- endif %}"""
|
||||
acknowledge_condition = None
|
||||
|
||||
group_verbose_name = web_title
|
||||
|
||||
example_payload = {"message": "This alert was sent by user for the demonstration purposes"}
|
||||
|
|
|
|||
|
|
@ -139,6 +139,8 @@ CELERY_TASK_ROUTES = {
|
|||
"apps.schedules.tasks.drop_cached_ical.drop_cached_ical_for_custom_events_for_organization": {"queue": "critical"},
|
||||
"apps.schedules.tasks.drop_cached_ical.drop_cached_ical_task": {"queue": "critical"},
|
||||
# LONG
|
||||
"apps.alerts.tasks.alert_group_web_title_cache.update_web_title_cache_for_alert_receive_channel": {"queue": "long"},
|
||||
"apps.alerts.tasks.alert_group_web_title_cache.update_web_title_cache": {"queue": "long"},
|
||||
"apps.alerts.tasks.check_escalation_finished.check_escalation_finished_task": {"queue": "long"},
|
||||
"apps.grafana_plugin.tasks.sync.start_sync_organizations": {"queue": "long"},
|
||||
"apps.grafana_plugin.tasks.sync.sync_organization_async": {"queue": "long"},
|
||||
|
|
|
|||
|
|
@ -72,7 +72,6 @@ export interface Alert {
|
|||
silenced_until: string;
|
||||
started_at: string;
|
||||
last_alert_at: string;
|
||||
verbose_name: string;
|
||||
dependent_alert_groups: Alert[];
|
||||
status: IncidentStatus;
|
||||
short?: boolean;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue