From a1b1b83fd17572cf891dc850fb72b3e57e3597dc Mon Sep 17 00:00:00 2001 From: Vadim Stepanov Date: Wed, 27 Jul 2022 12:14:59 +0100 Subject: [PATCH] Optimize alert group list view (#299) * Revert "Revert "Alert list view & caching rework (#216)"" This reverts commit 730dccc3 * Revert "fix AlertGroupClassicMarkdownRenderer" This reverts commit 82e53d8e0c513feb8aaef332fc1d3800e90538fc. * optimize alert group list view * optimize alert group list view --- .../renderers/base_renderer.py | 7 +- .../renderers/classic_markdown_renderer.py | 8 +- .../renderers/web_renderer.py | 8 +- ...ertgroup_alerts_aler_channel_ee84a7_idx.py | 17 ++ engine/apps/alerts/models/alert.py | 5 +- engine/apps/alerts/models/alert_group.py | 93 ++------ .../alerts/models/alert_group_log_record.py | 6 +- .../alerts/models/alert_receive_channel.py | 21 +- engine/apps/alerts/tasks/__init__.py | 2 +- .../alerts/tasks/cache_alert_group_for_web.py | 43 +--- .../invalidate_web_cache_for_alert_group.py | 25 +-- engine/apps/api/serializers/alert_group.py | 136 +++++------- .../apps/api/serializers/resolution_note.py | 11 +- engine/apps/api/tasks.py | 55 ----- engine/apps/api/tests/test_alert_group.py | 52 ++--- engine/apps/api/views/alert_group.py | 200 +++++++----------- engine/apps/api/views/route_regex_debugger.py | 5 +- .../user_notification_policy_log_record.py | 1 - .../apps/public_api/tests/test_incidents.py | 2 +- .../slack/scenarios/alertgroup_appearance.py | 12 -- .../apps/slack/scenarios/resolution_note.py | 6 - engine/common/api_helpers/paginators.py | 9 +- .../use_random_readonly_db_manager_mixin.py | 21 -- engine/settings/dev.py | 4 - engine/settings/prod_without_db.py | 7 +- .../CursorPagination.module.css | 3 + .../CursorPagination/CursorPagination.tsx | 79 +++++++ .../IncidentsFilters/IncidentsFilters.tsx | 10 +- .../src/models/alertgroup/alertgroup.ts | 83 ++++---- .../src/models/alertgroup/alertgroup.types.ts | 11 +- .../src/pages/incident/Incident.tsx | 27 ++- .../src/pages/incidents/Incidents.module.css | 5 + .../src/pages/incidents/Incidents.tsx | 156 ++++++++------ 33 files changed, 471 insertions(+), 659 deletions(-) create mode 100644 engine/apps/alerts/migrations/0006_alertgroup_alerts_aler_channel_ee84a7_idx.py delete mode 100644 engine/apps/api/tasks.py delete mode 100644 engine/common/mixins/use_random_readonly_db_manager_mixin.py create mode 100644 grafana-plugin/src/components/CursorPagination/CursorPagination.module.css create mode 100644 grafana-plugin/src/components/CursorPagination/CursorPagination.tsx diff --git a/engine/apps/alerts/incident_appearance/renderers/base_renderer.py b/engine/apps/alerts/incident_appearance/renderers/base_renderer.py index 234c8038..f18fd6a3 100644 --- a/engine/apps/alerts/incident_appearance/renderers/base_renderer.py +++ b/engine/apps/alerts/incident_appearance/renderers/base_renderer.py @@ -18,9 +18,12 @@ class AlertBaseRenderer(ABC): class AlertGroupBaseRenderer(ABC): - def __init__(self, alert_group): + def __init__(self, alert_group, alert=None): + if alert is None: + alert = alert_group.alerts.first() + self.alert_group = alert_group - self.alert_renderer = self.alert_renderer_class(self.alert_group.alerts.first()) + self.alert_renderer = self.alert_renderer_class(alert) @property @abstractmethod diff --git a/engine/apps/alerts/incident_appearance/renderers/classic_markdown_renderer.py b/engine/apps/alerts/incident_appearance/renderers/classic_markdown_renderer.py index 9759e86b..aa7a059e 100644 --- a/engine/apps/alerts/incident_appearance/renderers/classic_markdown_renderer.py +++ b/engine/apps/alerts/incident_appearance/renderers/classic_markdown_renderer.py @@ -20,11 +20,11 @@ class AlertClassicMarkdownRenderer(AlertBaseRenderer): class AlertGroupClassicMarkdownRenderer(AlertGroupBaseRenderer): - def __init__(self, alert_group): - super().__init__(alert_group) + def __init__(self, alert_group, alert=None): + if alert is None: + alert = alert_group.alerts.last() - # use the last alert to render content - self.alert_renderer = self.alert_renderer_class(self.alert_group.alerts.last()) + super().__init__(alert_group, alert) @property def alert_renderer_class(self): diff --git a/engine/apps/alerts/incident_appearance/renderers/web_renderer.py b/engine/apps/alerts/incident_appearance/renderers/web_renderer.py index e68d453c..681f94f5 100644 --- a/engine/apps/alerts/incident_appearance/renderers/web_renderer.py +++ b/engine/apps/alerts/incident_appearance/renderers/web_renderer.py @@ -20,11 +20,11 @@ class AlertWebRenderer(AlertBaseRenderer): class AlertGroupWebRenderer(AlertGroupBaseRenderer): - def __init__(self, alert_group): - super().__init__(alert_group) + def __init__(self, alert_group, alert=None): + if alert is None: + alert = alert_group.alerts.last() - # use the last alert to render content - self.alert_renderer = self.alert_renderer_class(self.alert_group.alerts.last()) + super().__init__(alert_group, alert) @property def alert_renderer_class(self): diff --git a/engine/apps/alerts/migrations/0006_alertgroup_alerts_aler_channel_ee84a7_idx.py b/engine/apps/alerts/migrations/0006_alertgroup_alerts_aler_channel_ee84a7_idx.py new file mode 100644 index 00000000..ada7b0da --- /dev/null +++ b/engine/apps/alerts/migrations/0006_alertgroup_alerts_aler_channel_ee84a7_idx.py @@ -0,0 +1,17 @@ +# Generated by Django 3.2.13 on 2022-07-27 10:51 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('alerts', '0005_alertgroup_cached_render_for_web'), + ] + + operations = [ + migrations.AddIndex( + model_name='alertgroup', + index=models.Index(fields=['channel_id', 'resolved', 'acknowledged', 'silenced', 'root_alert_group_id', 'is_archived'], name='alerts_aler_channel_ee84a7_idx'), + ), + ] diff --git a/engine/apps/alerts/models/alert.py b/engine/apps/alerts/models/alert.py index 3e08e7b9..e5bd504d 100644 --- a/engine/apps/alerts/models/alert.py +++ b/engine/apps/alerts/models/alert.py @@ -5,7 +5,7 @@ from uuid import uuid4 from django.apps import apps from django.conf import settings from django.core.validators import MinLengthValidator -from django.db import models, transaction +from django.db import models from django.db.models import JSONField from django.db.models.signals import post_save @@ -261,9 +261,6 @@ def listen_for_alert_model_save(sender, instance, created, *args, **kwargs): else: distribute_alert.apply_async((instance.pk,), countdown=TASK_DELAY_SECONDS) - logger.info(f"Recalculate AG cache. Reason: save alert model {instance.pk}") - transaction.on_commit(instance.group.schedule_cache_for_web) - # Connect signal to base Alert class post_save.connect(listen_for_alert_model_save, Alert) diff --git a/engine/apps/alerts/models/alert_group.py b/engine/apps/alerts/models/alert_group.py index 16b2d19b..84a0a9aa 100644 --- a/engine/apps/alerts/models/alert_group.py +++ b/engine/apps/alerts/models/alert_group.py @@ -8,12 +8,9 @@ import pytz from celery import uuid as celery_uuid from django.apps import apps from django.conf import settings -from django.core.cache import cache from django.core.validators import MinLengthValidator -from django.db import IntegrityError, models, transaction +from django.db import IntegrityError, models from django.db.models import JSONField, Q, QuerySet -from django.db.models.signals import post_save -from django.dispatch import receiver from django.utils import timezone from django.utils.functional import cached_property @@ -22,16 +19,9 @@ from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_T from apps.alerts.incident_appearance.renderers.slack_renderer import AlertGroupSlackRenderer from apps.alerts.incident_log_builder import IncidentLogBuilder from apps.alerts.signals import alert_group_action_triggered_signal -from apps.alerts.tasks import ( - acknowledge_reminder_task, - call_ack_url, - schedule_cache_for_alert_group, - send_alert_group_signal, - unsilence_task, -) +from apps.alerts.tasks import acknowledge_reminder_task, call_ack_url, send_alert_group_signal, unsilence_task from apps.slack.slack_formatter import SlackFormatter from apps.user_management.models import User -from common.mixins.use_random_readonly_db_manager_mixin import UseRandomReadonlyDbManagerMixin from common.public_primary_keys import generate_public_primary_key, increase_public_primary_key_length from common.utils import clean_markup, str_or_backup @@ -108,10 +98,6 @@ class UnarchivedAlertGroupQuerySet(models.QuerySet): return super().filter(*args, **kwargs, is_archived=False) -class AlertGroupManager(UseRandomReadonlyDbManagerMixin, models.Manager): - pass - - class AlertGroupSlackRenderingMixin: """ Ideally this mixin should not exist. Instead of this instance of AlertGroupSlackRenderer should be created and used @@ -134,8 +120,8 @@ class AlertGroupSlackRenderingMixin: class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.Model): - all_objects = AlertGroupManager.from_queryset(AlertGroupQuerySet)() - unarchived_objects = AlertGroupManager.from_queryset(UnarchivedAlertGroupQuerySet)() + all_objects = AlertGroupQuerySet.as_manager() + unarchived_objects = UnarchivedAlertGroupQuerySet.as_manager() ( NEW, @@ -242,8 +228,6 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. active_escalation_id = models.CharField(max_length=100, null=True, default=None) # ID generated by celery active_resolve_calculation_id = models.CharField(max_length=100, null=True, default=None) # ID generated by celery - # ID generated by celery - active_cache_for_web_calculation_id = models.CharField(max_length=100, null=True, default=None) SILENCE_DELAY_OPTIONS = ( (1800, "30 minutes"), @@ -315,7 +299,9 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. related_name="dependent_alert_groups", ) - cached_render_for_web = JSONField(default=dict) + # cached_render_for_web and active_cache_for_web_calculation_id are deprecated + cached_render_for_web = models.JSONField(default=dict) + active_cache_for_web_calculation_id = models.CharField(max_length=100, null=True, default=None) last_unique_unacknowledge_process_id = models.CharField(max_length=100, null=True, default=None) is_archived = models.BooleanField(default=False) @@ -364,6 +350,11 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. "distinction", "is_open_for_grouping", ] + indexes = [ + models.Index( + fields=["channel_id", "resolved", "acknowledged", "silenced", "root_alert_group_id", "is_archived"] + ), + ] def __str__(self): return f"{self.pk}: {self.verbose_name}" @@ -404,18 +395,6 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. def is_alert_a_resolve_signal(self, alert): raise NotImplementedError - def cache_for_web(self, organization): - from apps.api.serializers.alert_group import AlertGroupSerializer - - # Re-take object to switch connection from readonly db to master. - _self = AlertGroup.all_objects.get(pk=self.pk) - _self.cached_render_for_web = AlertGroupSerializer(self, context={"organization": organization}).data - self.cached_render_for_web = _self.cached_render_for_web - _self.save(update_fields=["cached_render_for_web"]) - - def schedule_cache_for_web(self): - schedule_cache_for_alert_group.apply_async((self.pk,)) - @property def permalink(self): if self.slack_message is not None: @@ -425,10 +404,6 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. def web_link(self): return urljoin(self.channel.organization.web_link, f"?page=incident&id={self.public_primary_key}") - @property - def alerts_count(self): - return self.alerts.count() - @property def happened_while_maintenance(self): return self.root_alert_group is not None and self.root_alert_group.maintenance_uuid is not None @@ -449,10 +424,6 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. self.unresolve() self.log_records.create(type=AlertGroupLogRecord.TYPE_UN_RESOLVED, author=user, reason="Acknowledge button") - # clear resolve report cache - cache_key = "render_after_resolve_report_json_{}".format(self.pk) - cache.delete(cache_key) - self.acknowledge(acknowledged_by_user=user, acknowledged_by=AlertGroup.USER) self.stop_escalation() if self.is_root_alert_group: @@ -673,9 +644,6 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. self.unresolve() log_record = self.log_records.create(type=AlertGroupLogRecord.TYPE_UN_RESOLVED, author=user) - # clear resolve report cache - self.drop_cached_after_resolve_report_json() - if self.is_root_alert_group: self.start_escalation_if_needed() @@ -848,10 +816,6 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. self.unresolve() self.log_records.create(type=AlertGroupLogRecord.TYPE_UN_RESOLVED, author=user, reason="Silence button") - # clear resolve report cache - cache_key = "render_after_resolve_report_json_{}".format(self.pk) - cache.delete(cache_key) - if self.acknowledged: self.unacknowledge() self.log_records.create(type=AlertGroupLogRecord.TYPE_UN_ACK, author=user, reason="Silence button") @@ -1060,8 +1024,6 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. author=user, reason="Bulk action acknowledge", ) - # clear resolve report cache - alert_group.drop_cached_after_resolve_report_json() for alert_group in alert_groups_to_unsilence_before_acknowledge_list: alert_group.log_records.create( @@ -1194,8 +1156,6 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. reason="Bulk action restart", ) - alert_group.drop_cached_after_resolve_report_json() - if alert_group.is_root_alert_group: alert_group.start_escalation_if_needed() @@ -1293,7 +1253,6 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. author=user, reason="Bulk action silence", ) - alert_group.drop_cached_after_resolve_report_json() for alert_group in alert_groups_to_unsilence_before_silence_list: alert_group.log_records.create( @@ -1483,7 +1442,7 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. else: return "Acknowledged" - def non_cached_after_resolve_report_json(self): + def render_after_resolve_report_json(self): AlertGroupLogRecord = apps.get_model("alerts", "AlertGroupLogRecord") UserNotificationPolicyLogRecord = apps.get_model("base", "UserNotificationPolicyLogRecord") ResolutionNote = apps.get_model("alerts", "ResolutionNote") @@ -1501,21 +1460,6 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. result_log_report.append(log_record.render_log_line_json()) return result_log_report - def render_after_resolve_report_json(self): - cache_key = "render_after_resolve_report_json_{}".format(self.pk) - - # cache.get_or_set in some cases returns None, so use get and set cache methods separately - log_report = cache.get(cache_key) - if log_report is None: - log_report = self.non_cached_after_resolve_report_json() - cache.set(cache_key, log_report) - return log_report - - def drop_cached_after_resolve_report_json(self): - cache_key = "render_after_resolve_report_json_{}".format(self.pk) - if cache_key in cache: - cache.delete(cache_key) - @property def has_resolution_notes(self): return self.resolution_notes.exists() @@ -1595,14 +1539,3 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. ) return stop_escalation_log - - -@receiver(post_save, sender=AlertGroup) -def listen_for_alert_group_model_save(sender, instance, created, *args, **kwargs): - if ( - kwargs is not None - and "update_fields" in kwargs - and kwargs["update_fields"] is dict - and "cached_render_for_web" not in kwargs["update_fields"] - ): - transaction.on_commit(instance.schedule_cache_for_alert_group) diff --git a/engine/apps/alerts/models/alert_group_log_record.py b/engine/apps/alerts/models/alert_group_log_record.py index 7e5e30c9..c2bacc7d 100644 --- a/engine/apps/alerts/models/alert_group_log_record.py +++ b/engine/apps/alerts/models/alert_group_log_record.py @@ -3,7 +3,7 @@ import logging import humanize from django.apps import apps -from django.db import models, transaction +from django.db import models from django.db.models import JSONField from django.db.models.signals import post_save from django.dispatch import receiver @@ -546,7 +546,6 @@ class AlertGroupLogRecord(models.Model): @receiver(post_save, sender=AlertGroupLogRecord) def listen_for_alertgrouplogrecord(sender, instance, created, *args, **kwargs): - instance.alert_group.drop_cached_after_resolve_report_json() if instance.type != AlertGroupLogRecord.TYPE_DELETED: if not instance.alert_group.is_maintenance_incident: alert_group_pk = instance.alert_group.pk @@ -555,6 +554,3 @@ def listen_for_alertgrouplogrecord(sender, instance, created, *args, **kwargs): f"alert group event: {instance.get_type_display()}" ) send_update_log_report_signal.apply_async(kwargs={"alert_group_pk": alert_group_pk}, countdown=8) - - logger.info(f"Recalculate AG cache. Reason: save alert_group_log_record model {instance.pk}") - transaction.on_commit(instance.alert_group.schedule_cache_for_web) diff --git a/engine/apps/alerts/models/alert_receive_channel.py b/engine/apps/alerts/models/alert_receive_channel.py index 2f0cc016..643f737e 100644 --- a/engine/apps/alerts/models/alert_receive_channel.py +++ b/engine/apps/alerts/models/alert_receive_channel.py @@ -19,11 +19,7 @@ from jinja2 import Template from apps.alerts.grafana_alerting_sync_manager.grafana_alerting_sync import GrafanaAlertingSyncManager from apps.alerts.integration_options_mixin import IntegrationOptionsMixin from apps.alerts.models.maintainable_object import MaintainableObject -from apps.alerts.tasks import ( - disable_maintenance, - invalidate_web_cache_for_alert_group, - sync_grafana_alerting_contact_points, -) +from apps.alerts.tasks import disable_maintenance, sync_grafana_alerting_contact_points from apps.base.messaging import get_messaging_backend_from_id from apps.base.utils import live_settings from apps.integrations.metadata import heartbeat @@ -693,21 +689,6 @@ def listen_for_alertreceivechannel_model_save(sender, instance, created, *args, create_organization_log( instance.organization, None, OrganizationLogType.TYPE_HEARTBEAT_CREATED, description ) - else: - if kwargs is not None: - if "update_fields" in kwargs: - if kwargs["update_fields"] is not None: - fields_to_not_to_invalidate_cache = [ - "rate_limit_message_task_id", - "rate_limited_in_slack_at", - "reason_to_skip_escalation", - ] - # Hack to not to invalidate web cache on AlertReceiveChannel.start_send_rate_limit_message_task - for f in fields_to_not_to_invalidate_cache: - if f in kwargs["update_fields"]: - return - logger.info(f"Drop AG cache. Reason: save alert_receive_channel {instance.pk}") - invalidate_web_cache_for_alert_group.apply_async(kwargs={"channel_pk": instance.pk}) if instance.integration == AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING: if created: diff --git a/engine/apps/alerts/tasks/__init__.py b/engine/apps/alerts/tasks/__init__.py index 3ff8501e..79b8b0ed 100644 --- a/engine/apps/alerts/tasks/__init__.py +++ b/engine/apps/alerts/tasks/__init__.py @@ -9,7 +9,7 @@ from .custom_button_result import custom_button_result # noqa: F401 from .delete_alert_group import delete_alert_group # noqa: F401 from .distribute_alert import distribute_alert # noqa: F401 from .escalate_alert_group import escalate_alert_group # noqa: F401 -from .invalidate_web_cache_for_alert_group import invalidate_web_cache_for_alert_group # noqa: F401 +from .invalidate_web_cache_for_alert_group import invalidate_web_cache_for_alert_group # noqa: F401, todo: remove from .invite_user_to_join_incident import invite_user_to_join_incident # noqa: F401 from .maintenance import disable_maintenance # noqa: F401 from .notify_all import notify_all_task # noqa: F401 diff --git a/engine/apps/alerts/tasks/cache_alert_group_for_web.py b/engine/apps/alerts/tasks/cache_alert_group_for_web.py index 677e0a19..5f0c52d5 100644 --- a/engine/apps/alerts/tasks/cache_alert_group_for_web.py +++ b/engine/apps/alerts/tasks/cache_alert_group_for_web.py @@ -1,54 +1,19 @@ -from celery.utils.log import get_task_logger -from django.apps import apps from django.conf import settings -from django.core.cache import cache from common.custom_celery_tasks import shared_dedicated_queue_retry_task -logger = get_task_logger(__name__) - - -def get_cache_key_caching_alert_group_for_web(alert_group_pk): - CACHE_KEY_PREFIX = "cache_alert_group_for_web" - return f"{CACHE_KEY_PREFIX}_{alert_group_pk}" - @shared_dedicated_queue_retry_task( autoretry_for=(Exception,), retry_backoff=True, max_retries=0 if settings.DEBUG else None ) def schedule_cache_for_alert_group(alert_group_pk): - CACHE_FOR_ALERT_GROUP_LIFETIME = 60 - START_CACHE_DELAY = 5 # we introduce delay to avoid recaching after each alert. - - task = cache_alert_group_for_web.apply_async(args=[alert_group_pk], countdown=START_CACHE_DELAY) - cache_key = get_cache_key_caching_alert_group_for_web(alert_group_pk) - cache.set(cache_key, task.id, timeout=CACHE_FOR_ALERT_GROUP_LIFETIME) + # todo: remove + pass @shared_dedicated_queue_retry_task( autoretry_for=(Exception,), retry_backoff=True, max_retries=0 if settings.DEBUG else None ) def cache_alert_group_for_web(alert_group_pk): - """ - Async task to re-cache alert_group for web. - """ - cache_key = get_cache_key_caching_alert_group_for_web(alert_group_pk) - cached_task_id = cache.get(cache_key) - current_task_id = cache_alert_group_for_web.request.id - - if cached_task_id is None: - return ( - f"cache_alert_group_for_web skipped, because of current task_id ({current_task_id})" - f" for alert_group {alert_group_pk} doesn't exist in cache, which means this task is not" - f" relevant: cache was dropped by engine restart ot CACHE_FOR_ALERT_GROUP_LIFETIME" - ) - if not current_task_id == cached_task_id or cached_task_id is None: - return ( - f"cache_alert_group_for_web skipped, because of current task_id ({current_task_id})" - f" doesn't equal to cached task_id ({cached_task_id}) for alert_group {alert_group_pk}," - ) - else: - AlertGroup = apps.get_model("alerts", "AlertGroup") - alert_group = AlertGroup.all_objects.using_readonly_db.get(pk=alert_group_pk) - alert_group.cache_for_web(alert_group.channel.organization) - logger.info(f"cache_alert_group_for_web: cache refreshed for alert_group {alert_group_pk}") + # todo: remove + pass diff --git a/engine/apps/alerts/tasks/invalidate_web_cache_for_alert_group.py b/engine/apps/alerts/tasks/invalidate_web_cache_for_alert_group.py index d9c7c4f9..9c8786d9 100644 --- a/engine/apps/alerts/tasks/invalidate_web_cache_for_alert_group.py +++ b/engine/apps/alerts/tasks/invalidate_web_cache_for_alert_group.py @@ -1,32 +1,11 @@ -from django.apps import apps from django.conf import settings from common.custom_celery_tasks import shared_dedicated_queue_retry_task -from .task_logger import task_logger - @shared_dedicated_queue_retry_task( autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None ) def invalidate_web_cache_for_alert_group(org_pk=None, channel_pk=None, alert_group_pk=None, alert_group_pks=None): - AlertGroup = apps.get_model("alerts", "AlertGroup") - DynamicSetting = apps.get_model("base", "DynamicSetting") - - if channel_pk: - task_logger.debug(f"invalidate_web_cache_for_alert_group: Reason - alert_receive_channel {channel_pk}") - q = AlertGroup.all_objects.filter(channel__pk=channel_pk) - elif org_pk: - task_logger.debug(f"invalidate_web_cache_for_alert_group: Reason - organization {org_pk}") - q = AlertGroup.all_objects.filter(channel__organization__pk=org_pk) - elif alert_group_pk: - task_logger.debug(f"invalidate_web_cache_for_alert_group: Reason - alert_group {alert_group_pk}") - q = AlertGroup.all_objects.filter(pk=alert_group_pk) - elif alert_group_pks: - task_logger.debug(f"invalidate_web_cache_for_alert_group: Reason - alert_groups {alert_group_pks}") - q = AlertGroup.all_objects.filter(pk__in=alert_group_pks) - - skip_task = DynamicSetting.objects.get_or_create(name="skip_invalidate_web_cache_for_alert_group")[0] - if skip_task.boolean_value: - return "Task has been skipped because of skip_invalidate_web_cache_for_alert_group DynamicSetting" - q.update(cached_render_for_web={}) + # todo: remove + pass diff --git a/engine/apps/api/serializers/alert_group.py b/engine/apps/api/serializers/alert_group.py index a9e5f9f2..df5583c4 100644 --- a/engine/apps/api/serializers/alert_group.py +++ b/engine/apps/api/serializers/alert_group.py @@ -1,7 +1,5 @@ import logging -from datetime import datetime -import humanize from rest_framework import serializers from apps.alerts.incident_appearance.renderers.classic_markdown_renderer import AlertGroupClassicMarkdownRenderer @@ -29,51 +27,31 @@ class ShortAlertGroupSerializer(serializers.ModelSerializer): return AlertGroupWebRenderer(obj).render() -class AlertGroupSerializer(EagerLoadingMixin, serializers.ModelSerializer): - """ - Attention: It's heavily cached. Make sure to invalidate alertgroup's web cache if you update the format! - """ - +class AlertGroupListSerializer(EagerLoadingMixin, serializers.ModelSerializer): pk = serializers.CharField(read_only=True, source="public_primary_key") alert_receive_channel = FastAlertReceiveChannelSerializer(source="channel") - alerts = serializers.SerializerMethodField("get_limited_alerts") - resolved_by_verbose = serializers.CharField(source="get_resolved_by_display") + status = serializers.ReadOnlyField() resolved_by_user = FastUserSerializer(required=False) acknowledged_by_user = FastUserSerializer(required=False) silenced_by_user = FastUserSerializer(required=False) related_users = serializers.SerializerMethodField() - - last_alert_at = serializers.SerializerMethodField() - - started_at_verbose = serializers.SerializerMethodField() - acknowledged_at_verbose = serializers.SerializerMethodField() - resolved_at_verbose = serializers.SerializerMethodField() - silenced_at_verbose = serializers.SerializerMethodField() - dependent_alert_groups = ShortAlertGroupSerializer(many=True) root_alert_group = ShortAlertGroupSerializer() - alerts_count = serializers.ReadOnlyField() - - status = serializers.ReadOnlyField() + alerts_count = serializers.IntegerField(read_only=True) render_for_web = serializers.SerializerMethodField() render_for_classic_markdown = serializers.SerializerMethodField() PREFETCH_RELATED = [ - "alerts", "dependent_alert_groups", - "log_records", "log_records__author", - "log_records__escalation_policy", - "log_records__invitation__invitee", ] SELECT_RELATED = [ - "slack_message", "channel__organization", - "slack_message___slack_team_identity", - "acknowledged_by_user", + "root_alert_group", "resolved_by_user", + "acknowledged_by_user", "silenced_by_user", ] @@ -87,7 +65,6 @@ class AlertGroupSerializer(EagerLoadingMixin, serializers.ModelSerializer): "alert_receive_channel", "resolved", "resolved_by", - "resolved_by_verbose", "resolved_by_user", "resolved_at", "acknowledged_at", @@ -98,48 +75,30 @@ class AlertGroupSerializer(EagerLoadingMixin, serializers.ModelSerializer): "silenced", "silenced_by_user", "silenced_at", - "silenced_at_verbose", "silenced_until", "started_at", - "last_alert_at", "silenced_until", - "permalink", - "alerts", "related_users", - "started_at_verbose", - "acknowledged_at_verbose", - "resolved_at_verbose", "render_for_web", - "render_after_resolve_report_json", "render_for_classic_markdown", "dependent_alert_groups", "root_alert_group", "status", ] - def get_last_alert_at(self, obj): - last_alert = obj.alerts.last() - # TODO: This is a Hotfix for 0.0.27 - if last_alert is None: - logger.warning(f"obj {obj} doesn't have last_alert!") - return "" - return str(last_alert.created_at) + def get_render_for_web(self, obj): + # alert group has no alerts + if not obj.last_alert: + return {} - def get_limited_alerts(self, obj): - """ - Overriding default alerts because there are alert_groups with thousands of them. - It's just too slow, we need to cut here. - """ - alerts = obj.alerts.all()[:100] - - if len(alerts) > 90: - for alert in alerts: - alert.title = str(alert.title) + " Only last 100 alerts are shown. Use Amixr API to fetch all of them." - - return AlertSerializer(alerts, many=True).data + return AlertGroupWebRenderer(obj, obj.last_alert).render() def get_render_for_classic_markdown(self, obj): - return AlertGroupClassicMarkdownRenderer(obj).render() + # alert group has no alerts + if not obj.last_alert: + return {} + + return AlertGroupClassicMarkdownRenderer(obj, obj.last_alert).render() def get_related_users(self, obj): users_ids = set() @@ -165,37 +124,44 @@ class AlertGroupSerializer(EagerLoadingMixin, serializers.ModelSerializer): users_ids.add(log_record.author.public_primary_key) return users - def get_started_at_verbose(self, obj): - started_at_verbose = None - if obj.started_at is not None: - started_at_verbose = humanize.naturaltime( - datetime.now().replace(tzinfo=None) - obj.started_at.replace(tzinfo=None) - ) - return started_at_verbose - def get_acknowledged_at_verbose(self, obj): - acknowledged_at_verbose = None - if obj.acknowledged_at is not None: - acknowledged_at_verbose = humanize.naturaltime( - datetime.now().replace(tzinfo=None) - obj.acknowledged_at.replace(tzinfo=None) - ) # TODO: Deal with timezones - return acknowledged_at_verbose +class AlertGroupSerializer(AlertGroupListSerializer): + alerts = serializers.SerializerMethodField("get_limited_alerts") + last_alert_at = serializers.SerializerMethodField() - def get_resolved_at_verbose(self, obj): - resolved_at_verbose = None - if obj.resolved_at is not None: - resolved_at_verbose = humanize.naturaltime( - datetime.now().replace(tzinfo=None) - obj.resolved_at.replace(tzinfo=None) - ) # TODO: Deal with timezones - return resolved_at_verbose - - def get_silenced_at_verbose(self, obj): - silenced_at_verbose = None - if obj.silenced_at is not None: - silenced_at_verbose = humanize.naturaltime( - datetime.now().replace(tzinfo=None) - obj.silenced_at.replace(tzinfo=None) - ) # TODO: Deal with timezones - return silenced_at_verbose + class Meta(AlertGroupListSerializer.Meta): + fields = AlertGroupListSerializer.Meta.fields + [ + "alerts", + "render_after_resolve_report_json", + "permalink", + "last_alert_at", + ] def get_render_for_web(self, obj): + # alert group has no alerts + alert = obj.alerts.last() + if not alert: + return {} + return AlertGroupWebRenderer(obj).render() + + def get_last_alert_at(self, obj): + last_alert = obj.alerts.last() + + if not last_alert: + return obj.started_at + + return last_alert.created_at + + def get_limited_alerts(self, obj): + """ + Overriding default alerts because there are alert_groups with thousands of them. + It's just too slow, we need to cut here. + """ + alerts = obj.alerts.all()[:100] + + if len(alerts) > 90: + for alert in alerts: + alert.title = str(alert.title) + " Only last 100 alerts are shown. Use OnCall API to fetch all of them." + + return AlertSerializer(alerts, many=True).data diff --git a/engine/apps/api/serializers/resolution_note.py b/engine/apps/api/serializers/resolution_note.py index 330259e3..00178685 100644 --- a/engine/apps/api/serializers/resolution_note.py +++ b/engine/apps/api/serializers/resolution_note.py @@ -1,7 +1,6 @@ from rest_framework import serializers from apps.alerts.models import AlertGroup, ResolutionNote -from apps.alerts.tasks import invalidate_web_cache_for_alert_group from apps.api.serializers.user import FastUserSerializer from common.api_helpers.custom_fields import OrganizationFilteredPrimaryKeyRelatedField from common.api_helpers.exceptions import BadRequest @@ -39,9 +38,6 @@ class ResolutionNoteSerializer(EagerLoadingMixin, serializers.ModelSerializer): validated_data["author"] = self.context["request"].user validated_data["source"] = ResolutionNote.Source.WEB created_instance = super().create(validated_data) - # Invalidate alert group cache because resolution notes shown in alert group's timeline - created_instance.alert_group.drop_cached_after_resolve_report_json() - invalidate_web_cache_for_alert_group(alert_group_pk=created_instance.alert_group.pk) return created_instance def to_representation(self, instance): @@ -57,8 +53,5 @@ class ResolutionNoteUpdateSerializer(ResolutionNoteSerializer): def update(self, instance, validated_data): if instance.source != ResolutionNote.Source.WEB: raise BadRequest(detail="Cannot update message with this source type") - updated_instance = super().update(instance, validated_data) - # Invalidate alert group cache because resolution notes shown in alert group's timeline - updated_instance.alert_group.drop_cached_after_resolve_report_json() - invalidate_web_cache_for_alert_group(alert_group_pk=updated_instance.alert_group.pk) - return updated_instance + + return super().update(instance, validated_data) diff --git a/engine/apps/api/tasks.py b/engine/apps/api/tasks.py deleted file mode 100644 index 4240178a..00000000 --- a/engine/apps/api/tasks.py +++ /dev/null @@ -1,55 +0,0 @@ -from celery.utils.log import get_task_logger -from django.apps import apps -from django.conf import settings -from django.core.cache import cache - -from common.custom_celery_tasks import shared_dedicated_queue_retry_task - -logger = get_task_logger(__name__) - - -def get_cache_key_caching_alert_group_for_web(alert_group_pk): - CACHE_KEY_PREFIX = "cache_alert_group_for_web" - return f"{CACHE_KEY_PREFIX}_{alert_group_pk}" - - -# TODO: remove this tasks after all of them will be processed in prod -@shared_dedicated_queue_retry_task( - autoretry_for=(Exception,), retry_backoff=True, max_retries=0 if settings.DEBUG else None -) -def schedule_cache_for_alert_group(alert_group_pk): - CACHE_FOR_ALERT_GROUP_LIFETIME = 60 - START_CACHE_DELAY = 5 # we introduce delay to avoid recaching after each alert. - - task = cache_alert_group_for_web.apply_async(args=[alert_group_pk], countdown=START_CACHE_DELAY) - cache_key = get_cache_key_caching_alert_group_for_web(alert_group_pk) - cache.set(cache_key, task.id, timeout=CACHE_FOR_ALERT_GROUP_LIFETIME) - - -@shared_dedicated_queue_retry_task( - autoretry_for=(Exception,), retry_backoff=True, max_retries=0 if settings.DEBUG else None -) -def cache_alert_group_for_web(alert_group_pk): - """ - Async task to re-cache alert_group for web. - """ - cache_key = get_cache_key_caching_alert_group_for_web(alert_group_pk) - cached_task_id = cache.get(cache_key) - current_task_id = cache_alert_group_for_web.request.id - - if cached_task_id is None: - return ( - f"cache_alert_group_for_web skipped, because of current task_id ({current_task_id})" - f" for alert_group {alert_group_pk} doesn't exist in cache, which means this task is not" - f" relevant: cache was dropped by engine restart ot CACHE_FOR_ALERT_GROUP_LIFETIME" - ) - if not current_task_id == cached_task_id or cached_task_id is None: - return ( - f"cache_alert_group_for_web skipped, because of current task_id ({current_task_id})" - f" doesn't equal to cached task_id ({cached_task_id}) for alert_group {alert_group_pk}," - ) - else: - AlertGroup = apps.get_model("alerts", "AlertGroup") - alert_group = AlertGroup.all_objects.using_readonly_db.get(pk=alert_group_pk) - alert_group.cache_for_web(alert_group.channel.organization) - logger.info(f"cache_alert_group_for_web: cache refreshed for alert_group {alert_group_pk}") diff --git a/engine/apps/api/tests/test_alert_group.py b/engine/apps/api/tests/test_alert_group.py index 983a22bf..6d4a0b9e 100644 --- a/engine/apps/api/tests/test_alert_group.py +++ b/engine/apps/api/tests/test_alert_group.py @@ -63,7 +63,7 @@ def test_get_filter_started_at(alert_group_internal_api_setup, make_user_auth_he ) assert response.status_code == status.HTTP_200_OK - assert response.data["count"] == 4 + assert len(response.data["results"]) == 4 @pytest.mark.django_db @@ -78,7 +78,7 @@ def test_get_filter_resolved_at_alertgroup_empty_result(alert_group_internal_api **make_user_auth_headers(user, token), ) assert response.status_code == status.HTTP_200_OK - assert response.data["count"] == 0 + assert len(response.data["results"]) == 0 @pytest.mark.django_db @@ -105,7 +105,7 @@ def test_get_filter_resolved_at(alert_group_internal_api_setup, make_user_auth_h **make_user_auth_headers(user, token), ) assert response.status_code == status.HTTP_200_OK - assert response.data["count"] == 1 + assert len(response.data["results"]) == 1 @pytest.mark.django_db @@ -117,7 +117,7 @@ def test_status_new(alert_group_internal_api_setup, make_user_auth_headers): url = reverse("api-internal:alertgroup-list") response = client.get(url + "?status=0", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert response.data["count"] == 1 + assert len(response.data["results"]) == 1 assert response.data["results"][0]["pk"] == new_alert_group.public_primary_key @@ -130,7 +130,7 @@ def test_status_ack(alert_group_internal_api_setup, make_user_auth_headers): url = reverse("api-internal:alertgroup-list") response = client.get(url + "?status=1", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert response.data["count"] == 1 + assert len(response.data["results"]) == 1 assert response.data["results"][0]["pk"] == ack_alert_group.public_primary_key @@ -143,7 +143,7 @@ def test_status_resolved(alert_group_internal_api_setup, make_user_auth_headers) url = reverse("api-internal:alertgroup-list") response = client.get(url + "?status=2", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert response.data["count"] == 1 + assert len(response.data["results"]) == 1 assert response.data["results"][0]["pk"] == resolved_alert_group.public_primary_key @@ -156,7 +156,7 @@ def test_status_silenced(alert_group_internal_api_setup, make_user_auth_headers) url = reverse("api-internal:alertgroup-list") response = client.get(url + "?status=3", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert response.data["count"] == 1 + assert len(response.data["results"]) == 1 assert response.data["results"][0]["pk"] == silenced_alert_group.public_primary_key @@ -171,7 +171,7 @@ def test_all_statuses(alert_group_internal_api_setup, make_user_auth_headers): url + "?status=0&status=1&&status=2&status=3", format="json", **make_user_auth_headers(user, token) ) assert response.status_code == status.HTTP_200_OK - assert response.data["count"] == 4 + assert len(response.data["results"]) == 4 @pytest.mark.django_db @@ -213,7 +213,7 @@ def test_get_filter_resolved_by( **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert first_response.data["count"] == 1 + assert len(first_response.data["results"]) == 1 second_response = client.get( url + f"?resolved_by={second_user.public_primary_key}", @@ -221,7 +221,7 @@ def test_get_filter_resolved_by( **make_user_auth_headers(first_user, token), ) assert second_response.status_code == status.HTTP_200_OK - assert second_response.data["count"] == 0 + assert len(second_response.data["results"]) == 0 @pytest.mark.django_db @@ -269,7 +269,7 @@ def test_get_filter_resolved_by_multiple_values( **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert first_response.data["count"] == 2 + assert len(first_response.data["results"]) == 2 @pytest.mark.django_db @@ -309,7 +309,7 @@ def test_get_filter_acknowledged_by( **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert first_response.data["count"] == 1 + assert len(first_response.data["results"]) == 1 second_response = client.get( url + f"?acknowledged_by={second_user.public_primary_key}", @@ -317,7 +317,7 @@ def test_get_filter_acknowledged_by( **make_user_auth_headers(first_user, token), ) assert second_response.status_code == status.HTTP_200_OK - assert second_response.data["count"] == 0 + assert len(second_response.data["results"]) == 0 @pytest.mark.django_db @@ -363,7 +363,7 @@ def test_get_filter_acknowledged_by_multiple_values( **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert first_response.data["count"] == 2 + assert len(first_response.data["results"]) == 2 @pytest.mark.django_db @@ -402,7 +402,7 @@ def test_get_filter_silenced_by( **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert first_response.data["count"] == 1 + assert len(first_response.data["results"]) == 1 second_response = client.get( url + f"?silenced_by={second_user.public_primary_key}", @@ -410,7 +410,7 @@ def test_get_filter_silenced_by( **make_user_auth_headers(first_user, token), ) assert second_response.status_code == status.HTTP_200_OK - assert second_response.data["count"] == 0 + assert len(second_response.data["results"]) == 0 @pytest.mark.django_db @@ -455,7 +455,7 @@ def test_get_filter_silenced_by_multiple_values( **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert first_response.data["count"] == 2 + assert len(first_response.data["results"]) == 2 @pytest.mark.django_db @@ -494,7 +494,7 @@ def test_get_filter_invitees_are( **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert first_response.data["count"] == 1 + assert len(first_response.data["results"]) == 1 second_response = client.get( url + f"?invitees_are={second_user.public_primary_key}", @@ -502,7 +502,7 @@ def test_get_filter_invitees_are( **make_user_auth_headers(first_user, token), ) assert second_response.status_code == status.HTTP_200_OK - assert second_response.data["count"] == 0 + assert len(second_response.data["results"]) == 0 @pytest.mark.django_db @@ -548,7 +548,7 @@ def test_get_filter_invitees_are_multiple_values( **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert first_response.data["count"] == 2 + assert len(first_response.data["results"]) == 2 @pytest.mark.django_db @@ -593,7 +593,7 @@ def test_get_filter_invitees_are_ag_with_multiple_logs( **make_user_auth_headers(first_user, token), ) assert first_response.status_code == status.HTTP_200_OK - assert first_response.data["count"] == 1 + assert len(first_response.data["results"]) == 1 @pytest.mark.django_db @@ -611,11 +611,11 @@ def test_get_filter_with_resolution_note( # there are no alert groups with resolution_notes response = client.get(url + "?with_resolution_note=true", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert response.data["count"] == 0 + assert len(response.data["results"]) == 0 response = client.get(url + "?with_resolution_note=false", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert response.data["count"] == 4 + assert len(response.data["results"]) == 4 # add resolution_notes to two of four alert groups make_resolution_note(res_alert_group) @@ -623,11 +623,11 @@ def test_get_filter_with_resolution_note( response = client.get(url + "?with_resolution_note=true", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert response.data["count"] == 2 + assert len(response.data["results"]) == 2 response = client.get(url + "?with_resolution_note=false", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert response.data["count"] == 2 + assert len(response.data["results"]) == 2 @pytest.mark.django_db @@ -653,7 +653,7 @@ def test_get_filter_with_resolution_note_after_delete_resolution_note( response = client.get(url + "?with_resolution_note=true", format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - assert response.data["count"] == 1 + assert len(response.data["results"]) == 1 @pytest.mark.django_db diff --git a/engine/apps/api/views/alert_group.py b/engine/apps/api/views/alert_group.py index 5ea7e93b..838a372e 100644 --- a/engine/apps/api/views/alert_group.py +++ b/engine/apps/api/views/alert_group.py @@ -1,10 +1,6 @@ -from datetime import datetime, timedelta +from datetime import timedelta -from django import forms -from django.db import models -from django.db.models import CharField, Q -from django.db.models.constants import LOOKUP_SEP -from django.db.models.functions import Cast +from django.db.models import Count, Max, Q from django.utils import timezone from django_filters import rest_framework as filters from django_filters.widgets import RangeWidget @@ -15,16 +11,15 @@ from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response from apps.alerts.constants import ActionSource -from apps.alerts.models import AlertGroup, AlertReceiveChannel -from apps.alerts.tasks import invalidate_web_cache_for_alert_group +from apps.alerts.models import Alert, AlertGroup, AlertReceiveChannel from apps.api.permissions import MODIFY_ACTIONS, READ_ACTIONS, ActionPermission, AnyRole, IsAdminOrEditor -from apps.api.serializers.alert_group import AlertGroupSerializer +from apps.api.serializers.alert_group import AlertGroupListSerializer, AlertGroupSerializer from apps.auth_token.auth import MobileAppAuthTokenAuthentication, PluginAuthentication from apps.user_management.models import User from common.api_helpers.exceptions import BadRequest from common.api_helpers.filters import DateRangeFilterMixin, ModelFieldFilterMixin from common.api_helpers.mixins import PreviewTemplateMixin, PublicPrimaryKeyMixin -from common.api_helpers.paginators import FiftyPageSizePaginator +from common.api_helpers.paginators import TwentyFiveCursorPaginator def get_integration_queryset(request): @@ -148,34 +143,6 @@ class AlertGroupFilter(DateRangeFilterMixin, ModelFieldFilterMixin, filters.Filt return queryset -class CustomSearchFilter(SearchFilter): - def must_call_distinct(self, queryset, search_fields): - """ - Return True if 'distinct()' should be used to query the given lookups. - """ - for search_field in search_fields: - opts = queryset.model._meta - if search_field[0] in self.lookup_prefixes: - search_field = search_field[1:] - - # From https://github.com/encode/django-rest-framework/pull/6240/files#diff-01f357e474dd8fd702e4951b9227bffcR88 - # Annotated fields do not need to be distinct - if isinstance(queryset, models.QuerySet) and search_field in queryset.query.annotations: - continue - - parts = search_field.split(LOOKUP_SEP) - for part in parts: - field = opts.get_field(part) - if hasattr(field, "get_path_info"): - # This field is a relation, update opts to follow the relation - path_info = field.get_path_info() - opts = path_info[-1].to_opts - if any(path.m2m for path in path_info): - # This field is a m2m relation so we know we need to call distinct - return True - return False - - class AlertGroupView( PreviewTemplateMixin, PublicPrimaryKeyMixin, @@ -216,90 +183,90 @@ class AlertGroupView( serializer_class = AlertGroupSerializer - pagination_class = FiftyPageSizePaginator + pagination_class = TwentyFiveCursorPaginator - filter_backends = [CustomSearchFilter, filters.DjangoFilterBackend] - search_fields = ["cached_render_for_web_str"] + filter_backends = [SearchFilter, filters.DjangoFilterBackend] + # todo: add ability to search by templated title + search_fields = ["public_primary_key", "inside_organization_number"] filterset_class = AlertGroupFilter - def list(self, request, *args, **kwargs): - """ - It's compute-heavy so we rely on cache here. - Attention: Make sure to invalidate cache if you update the format! - """ - queryset = self.filter_queryset(self.get_queryset(eager=False, readonly=True)) + def get_serializer_class(self): + if self.action == "list": + return AlertGroupListSerializer - page = self.paginate_queryset(queryset) - skip_slow_rendering = request.query_params.get("skip_slow_rendering") == "true" - data = [] + return super().get_serializer_class() - for alert_group in page: - if alert_group.cached_render_for_web == {}: - # We cannot give empty data to web. So caching synchronously here. - if skip_slow_rendering: - # We just return dummy data. - # Cache is not launched because after skip_slow_rendering request should come usual one - # which will start caching - data.append({"pk": alert_group.pk, "short": True}) - else: - # Synchronously cache and return. It could be slow. - alert_group.cache_for_web(alert_group.channel.organization) - data.append(alert_group.cached_render_for_web) - else: - data.append(alert_group.cached_render_for_web) - if not skip_slow_rendering: - # Cache is not launched because after skip_slow_rendering request should come usual one - # which will start caching - alert_group.schedule_cache_for_web() + def get_queryset(self): + # no select_related or prefetch_related is used at this point, it will be done on paginate_queryset. + queryset = AlertGroup.unarchived_objects.filter( + channel__organization=self.request.auth.organization, channel__team=self.request.user.current_team + ).only("id") - return self.get_paginated_response(data) - - def get_queryset(self, eager=True, readonly=False, order=True): - if readonly: - queryset = AlertGroup.unarchived_objects.using_readonly_db - else: - queryset = AlertGroup.unarchived_objects - - queryset = queryset.filter( - channel__organization=self.request.auth.organization, - channel__team=self.request.user.current_team, - ) - - if order: - queryset = queryset.order_by("-started_at") - - queryset = queryset.annotate(cached_render_for_web_str=Cast("cached_render_for_web", output_field=CharField())) - - if eager: - queryset = self.serializer_class.setup_eager_loading(queryset) return queryset - def get_alert_groups_and_days_for_previous_same_period(self): - prev_alert_groups = AlertGroup.unarchived_objects.none() - delta_days = None + def paginate_queryset(self, queryset): + """ + All SQL joins (select_related and prefetch_related) will be performed AFTER pagination, so it only joins tables + for 25 alert groups, not the whole table. + """ + alert_groups = super().paginate_queryset(queryset) + alert_groups = self.enrich(alert_groups) + return alert_groups - started_at = self.request.query_params.get("started_at", None) - if started_at is not None: - started_at_gte, started_at_lte = AlertGroupFilter.parse_custom_datetime_range(started_at) - delta_days = None - if started_at_lte is not None: - started_at_lte = forms.DateTimeField().to_python(started_at_lte) - else: - started_at_lte = datetime.now() + def get_object(self): + obj = super().get_object() + obj = self.enrich([obj])[0] + return obj - if started_at_gte is not None: - started_at_gte = forms.DateTimeField().to_python(value=started_at_gte) - delta = started_at_lte.replace(tzinfo=None) - started_at_gte.replace(tzinfo=None) - prev_alert_groups = self.get_queryset().filter( - started_at__range=[started_at_gte - delta, started_at_gte] - ) - delta_days = delta.days - return prev_alert_groups, delta_days + def enrich(self, alert_groups): + """ + This method performs select_related and prefetch_related (using setup_eager_loading) as well as in-memory joins + to add additional info like alert_count and last_alert for every alert group efficiently. + We need the last_alert because it's used by AlertGroupWebRenderer. + """ + + # enrich alert groups with select_related and prefetch_related + alert_group_pks = [alert_group.pk for alert_group in alert_groups] + queryset = AlertGroup.all_objects.filter(pk__in=alert_group_pks).order_by("-pk") + + # do not load cached_render_for_web as it's deprecated and can be very large + queryset = queryset.defer("cached_render_for_web") + + queryset = self.get_serializer_class().setup_eager_loading(queryset) + alert_groups = list(queryset) + + # get info on alerts count and last alert ID for every alert group + alerts_info = ( + Alert.objects.values("group_id") + .filter(group_id__in=alert_group_pks) + .annotate(alerts_count=Count("group_id"), last_alert_id=Max("id")) + ) + alerts_info_map = {info["group_id"]: info for info in alerts_info} + + # fetch last alerts for every alert group + last_alert_ids = [info["last_alert_id"] for info in alerts_info_map.values()] + last_alerts = Alert.objects.filter(pk__in=last_alert_ids) + for alert in last_alerts: + # link group back to alert + alert.group = [alert_group for alert_group in alert_groups if alert_group.pk == alert.group_id][0] + alerts_info_map[alert.group_id].update({"last_alert": alert}) + + # add additional "alerts_count" and "last_alert" fields to every alert group + for alert_group in alert_groups: + try: + alert_group.last_alert = alerts_info_map[alert_group.pk]["last_alert"] + alert_group.alerts_count = alerts_info_map[alert_group.pk]["alerts_count"] + except KeyError: + # alert group has no alerts + alert_group.last_alert = None + alert_group.alerts_count = 0 + + return alert_groups @action(detail=False) def stats(self, *args, **kwargs): - alert_groups = self.filter_queryset(self.get_queryset(eager=False)) + alert_groups = self.filter_queryset(self.get_queryset()) # Only count field is used, other fields left just in case for the backward compatibility return Response( { @@ -324,7 +291,6 @@ class AlertGroupView( if alert_group.root_alert_group is not None: raise BadRequest(detail="Can't acknowledge an attached alert group") alert_group.acknowledge_by_user(self.request.user, action_source=ActionSource.WEB) - invalidate_web_cache_for_alert_group(alert_group_pk=alert_group.pk) return Response(AlertGroupSerializer(alert_group, context={"request": self.request}).data) @@ -344,7 +310,6 @@ class AlertGroupView( raise BadRequest(detail="Can't unacknowledge a resolved alert group") alert_group.un_acknowledge_by_user(self.request.user, action_source=ActionSource.WEB) - invalidate_web_cache_for_alert_group(alert_group_pk=alert_group.pk) return Response(AlertGroupSerializer(alert_group, context={"request": self.request}).data) @@ -365,7 +330,6 @@ class AlertGroupView( status=status.HTTP_400_BAD_REQUEST, ) alert_group.resolve_by_user(self.request.user, action_source=ActionSource.WEB) - invalidate_web_cache_for_alert_group(alert_group_pk=alert_group.pk) return Response(AlertGroupSerializer(alert_group, context={"request": self.request}).data) @action(methods=["post"], detail=True) @@ -381,7 +345,6 @@ class AlertGroupView( raise BadRequest(detail="The alert group is not resolved") alert_group.un_resolve_by_user(self.request.user, action_source=ActionSource.WEB) - invalidate_web_cache_for_alert_group(alert_group_pk=alert_group.pk) return Response(AlertGroupSerializer(alert_group, context={"request": self.request}).data) @action(methods=["post"], detail=True) @@ -404,8 +367,6 @@ class AlertGroupView( return Response(status=status.HTTP_400_BAD_REQUEST) alert_group.attach_by_user(self.request.user, root_alert_group, action_source=ActionSource.WEB) - invalidate_web_cache_for_alert_group(alert_group_pk=alert_group.pk) - invalidate_web_cache_for_alert_group(alert_group_pk=root_alert_group.pk) return Response(AlertGroupSerializer(alert_group, context={"request": self.request}).data) @action(methods=["post"], detail=True) @@ -415,10 +376,8 @@ class AlertGroupView( raise BadRequest(detail="Can't unattach maintenance alert group") if alert_group.is_root_alert_group: raise BadRequest(detail="Can't unattach an alert group because it is not attached") - root_alert_group_pk = alert_group.root_alert_group_id + alert_group.un_attach_by_user(self.request.user, action_source=ActionSource.WEB) - invalidate_web_cache_for_alert_group(alert_group_pk=alert_group.pk) - invalidate_web_cache_for_alert_group(alert_group_pk=root_alert_group_pk) return Response(AlertGroupSerializer(alert_group, context={"request": self.request}).data) @action(methods=["post"], detail=True) @@ -433,7 +392,6 @@ class AlertGroupView( raise BadRequest(detail="Can't silence an attached alert group") alert_group.silence_by_user(request.user, silence_delay=delay, action_source=ActionSource.WEB) - invalidate_web_cache_for_alert_group(alert_group_pk=alert_group.pk) return Response(AlertGroupSerializer(alert_group, context={"request": request}).data) @action(methods=["get"], detail=False) @@ -548,9 +506,9 @@ class AlertGroupView( raise BadRequest(detail="Please specify a delay for silence") kwargs["silence_delay"] = delay - alert_groups = self.get_queryset(eager=False).filter(public_primary_key__in=alert_group_public_pks) - alert_group_pks = list(alert_groups.values_list("id", flat=True)) - invalidate_web_cache_for_alert_group(alert_group_pks=alert_group_pks) + alert_groups = AlertGroup.unarchived_objects.filter( + channel__organization=self.request.auth.organization, public_primary_key__in=alert_group_public_pks + ) kwargs["user"] = self.request.user kwargs["alert_groups"] = alert_groups diff --git a/engine/apps/api/views/route_regex_debugger.py b/engine/apps/api/views/route_regex_debugger.py index 527684ac..ffa9cc71 100644 --- a/engine/apps/api/views/route_regex_debugger.py +++ b/engine/apps/api/views/route_regex_debugger.py @@ -43,10 +43,7 @@ class RouteRegexDebuggerView(APIView): if len(incidents_matching_regex) < MAX_INCIDENTS_TO_SHOW: first_alert = ag.alerts.all()[0] if re.search(regex, json.dumps(first_alert.raw_request_data)): - if ag.cached_render_for_web: - title = ag.cached_render_for_web["render_for_web"]["title"] - else: - title = AlertWebRenderer(first_alert).render()["title"] + title = AlertWebRenderer(first_alert).render()["title"] incidents_matching_regex.append( { "title": title, diff --git a/engine/apps/base/models/user_notification_policy_log_record.py b/engine/apps/base/models/user_notification_policy_log_record.py index d8afed2d..ed261b2b 100644 --- a/engine/apps/base/models/user_notification_policy_log_record.py +++ b/engine/apps/base/models/user_notification_policy_log_record.py @@ -315,7 +315,6 @@ class UserNotificationPolicyLogRecord(models.Model): @receiver(post_save, sender=UserNotificationPolicyLogRecord) def listen_for_usernotificationpolicylogrecord_model_save(sender, instance, created, *args, **kwargs): - instance.alert_group.drop_cached_after_resolve_report_json() alert_group_pk = instance.alert_group.pk if instance.type != UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FINISHED: logger.debug( diff --git a/engine/apps/public_api/tests/test_incidents.py b/engine/apps/public_api/tests/test_incidents.py index d43a1fb8..ea1198a0 100644 --- a/engine/apps/public_api/tests/test_incidents.py +++ b/engine/apps/public_api/tests/test_incidents.py @@ -32,7 +32,7 @@ def construct_expected_response_from_incidents(incidents): "id": incident.public_primary_key, "integration_id": incident.channel.public_primary_key, "route_id": incident.channel_filter.public_primary_key, - "alerts_count": incident.alerts_count, + "alerts_count": incident.alerts.count(), "state": incident.state, "created_at": created_at, "resolved_at": resolved_at, diff --git a/engine/apps/slack/scenarios/alertgroup_appearance.py b/engine/apps/slack/scenarios/alertgroup_appearance.py index 1ccba05f..588b70d0 100644 --- a/engine/apps/slack/scenarios/alertgroup_appearance.py +++ b/engine/apps/slack/scenarios/alertgroup_appearance.py @@ -247,10 +247,6 @@ class UpdateAppearanceStep(scenario_step.ScenarioStep): if new_value is None and old_value is not None: setattr(alert_receive_channel, attr_name, None) alert_receive_channel.save() - # Drop caches for current alert group - if notification_channel == "web": - setattr(alert_group, f"cached_render_for_web_{templatizable_attr}", None) - alert_group.save() elif new_value is not None: default_values = getattr( AlertReceiveChannel, @@ -265,18 +261,10 @@ class UpdateAppearanceStep(scenario_step.ScenarioStep): jinja_template_env.from_string(new_value) setattr(alert_receive_channel, attr_name, new_value) alert_receive_channel.save() - # Drop caches for current alert group - if notification_channel == "web": - setattr(alert_group, f"cached_render_for_web_{templatizable_attr}", None) - alert_group.save() elif default_value is not None and new_value.strip() == default_value.strip(): new_value = None setattr(alert_receive_channel, attr_name, new_value) alert_receive_channel.save() - # Drop caches for current alert group - if notification_channel == "web": - setattr(alert_group, f"cached_render_for_web_{templatizable_attr}", None) - alert_group.save() except TemplateSyntaxError: return Response( {"response_action": "errors", "errors": {attr_name: "Template has incorrect format"}}, diff --git a/engine/apps/slack/scenarios/resolution_note.py b/engine/apps/slack/scenarios/resolution_note.py index 364704b7..f6c78305 100644 --- a/engine/apps/slack/scenarios/resolution_note.py +++ b/engine/apps/slack/scenarios/resolution_note.py @@ -674,7 +674,6 @@ class AddRemoveThreadMessageStep(UpdateResolutionNoteStep, scenario_step.Scenari add_to_resolution_note = True if value["msg_value"].startswith("add") else False slack_thread_message = None resolution_note = None - drop_ag_cache = False alert_group = AlertGroup.all_objects.get(pk=alert_group_pk) @@ -695,7 +694,6 @@ class AddRemoveThreadMessageStep(UpdateResolutionNoteStep, scenario_step.Scenari else: resolution_note.recreate() self.add_resolution_note_reaction(slack_thread_message) - drop_ag_cache = True elif not add_to_resolution_note: # Check if resolution_note can be removed if ( @@ -720,13 +718,9 @@ class AddRemoveThreadMessageStep(UpdateResolutionNoteStep, scenario_step.Scenari slack_thread_message.added_to_resolution_note = False slack_thread_message.save(update_fields=["added_to_resolution_note"]) self.remove_resolution_note_reaction(slack_thread_message) - drop_ag_cache = True self.update_alert_group_resolution_note_button( alert_group, ) - if drop_ag_cache: - alert_group.drop_cached_after_resolve_report_json() - alert_group.schedule_cache_for_web() resolution_note_data = json.loads(payload["actions"][0]["value"]) resolution_note_data["resolution_note_window_action"] = "edit_update" ResolutionNoteModalStep(slack_team_identity, self.organization, self.user).process_scenario( diff --git a/engine/common/api_helpers/paginators.py b/engine/common/api_helpers/paginators.py index 023f2294..01ce2cc6 100644 --- a/engine/common/api_helpers/paginators.py +++ b/engine/common/api_helpers/paginators.py @@ -1,4 +1,4 @@ -from rest_framework.pagination import PageNumberPagination +from rest_framework.pagination import CursorPagination, PageNumberPagination class HundredPageSizePaginator(PageNumberPagination): @@ -11,3 +11,10 @@ class FiftyPageSizePaginator(PageNumberPagination): class TwentyFivePageSizePaginator(PageNumberPagination): page_size = 25 + + +class TwentyFiveCursorPaginator(CursorPagination): + page_size = 25 + max_page_size = 100 + page_size_query_param = "perpage" + ordering = "-pk" diff --git a/engine/common/mixins/use_random_readonly_db_manager_mixin.py b/engine/common/mixins/use_random_readonly_db_manager_mixin.py deleted file mode 100644 index 46559aa4..00000000 --- a/engine/common/mixins/use_random_readonly_db_manager_mixin.py +++ /dev/null @@ -1,21 +0,0 @@ -import random - -from django.conf import settings - - -class UseRandomReadonlyDbManagerMixin: - """ - Use this Mixin in ModelManagers, when you want to use the random readonly replica - """ - - @property - def using_readonly_db(self): - """Select one of the readonly databases this QuerySet should execute against.""" - if hasattr(settings, "READONLY_DATABASES") and len(settings.READONLY_DATABASES) > 0: - using_db = random.choice(list(settings.READONLY_DATABASES.keys())) - return self.using(using_db) - else: - # Use "default" database - # Django uses the database with the alias of default when no other database has been selected. - # https://docs.djangoproject.com/en/3.2/topics/db/multi-db/#defining-your-databases - return self.using("default") diff --git a/engine/settings/dev.py b/engine/settings/dev.py index d63b6f74..b5e0e2f5 100644 --- a/engine/settings/dev.py +++ b/engine/settings/dev.py @@ -32,10 +32,6 @@ DATABASES = { TESTING = "pytest" in sys.modules or "unittest" in sys.modules -READONLY_DATABASES = {} - -# Dictionaries concatenation, introduced in python3.9 -DATABASES = DATABASES | READONLY_DATABASES CACHES = { "default": { diff --git a/engine/settings/prod_without_db.py b/engine/settings/prod_without_db.py index 60b4cc28..5b8a83b4 100644 --- a/engine/settings/prod_without_db.py +++ b/engine/settings/prod_without_db.py @@ -84,12 +84,11 @@ CELERY_TASK_ROUTES = { "apps.alerts.tasks.create_contact_points_for_datasource.create_contact_points_for_datasource": {"queue": "default"}, "apps.alerts.tasks.sync_grafana_alerting_contact_points.sync_grafana_alerting_contact_points": {"queue": "default"}, "apps.alerts.tasks.delete_alert_group.delete_alert_group": {"queue": "default"}, - "apps.alerts.tasks.invalidate_web_cache_for_alert_group.invalidate_web_cache_for_alert_group": {"queue": "default"}, + "apps.alerts.tasks.invalidate_web_cache_for_alert_group.invalidate_web_cache_for_alert_group": { + "queue": "default" + }, # todo: remove "apps.alerts.tasks.send_alert_group_signal.send_alert_group_signal": {"queue": "default"}, "apps.alerts.tasks.wipe.wipe": {"queue": "default"}, - # TODO: remove cache_alert_group_for_web and schedule_cache_for_alert_group once existing task will be processed - "apps.api.tasks.cache_alert_group_for_web": {"queue": "default"}, - "apps.api.tasks.schedule_cache_for_alert_group": {"queue": "default"}, "apps.heartbeat.tasks.heartbeat_checkup": {"queue": "default"}, "apps.heartbeat.tasks.integration_heartbeat_checkup": {"queue": "default"}, "apps.heartbeat.tasks.process_heartbeat_task": {"queue": "default"}, diff --git a/grafana-plugin/src/components/CursorPagination/CursorPagination.module.css b/grafana-plugin/src/components/CursorPagination/CursorPagination.module.css new file mode 100644 index 00000000..63d08ecc --- /dev/null +++ b/grafana-plugin/src/components/CursorPagination/CursorPagination.module.css @@ -0,0 +1,3 @@ +.root { + display: block; +} diff --git a/grafana-plugin/src/components/CursorPagination/CursorPagination.tsx b/grafana-plugin/src/components/CursorPagination/CursorPagination.tsx new file mode 100644 index 00000000..33b228e1 --- /dev/null +++ b/grafana-plugin/src/components/CursorPagination/CursorPagination.tsx @@ -0,0 +1,79 @@ +import React, { FC, useCallback, useEffect, useState } from 'react'; + +import { SelectableValue } from '@grafana/data'; +import { Button, HorizontalGroup, Icon, Select } from '@grafana/ui'; +import cn from 'classnames/bind'; + +import Text from 'components/Text/Text'; + +import styles from './CursorPagination.module.css'; + +interface CursorPaginationProps { + current: string; + onChange: (cursor: string, direction: 'prev' | 'next') => void; + itemsPerPageOptions: Array>; + itemsPerPage: number; + onChangeItemsPerPage: (value: number) => void; + prev: string; + next: string; +} + +const cx = cn.bind(styles); + +const CursorPagination: FC = (props) => { + const { current, onChange, prev, next, itemsPerPage, itemsPerPageOptions, onChangeItemsPerPage } = props; + + const [disabled, setDisabled] = useState(false); + + useEffect(() => { + setDisabled(false); + }, [prev, next]); + + const onChangeItemsPerPageCallback = useCallback((option) => { + setDisabled(true); + onChangeItemsPerPage(option.value); + }, []); + + return ( + + + Items per list +