From b5aa53d3d684b2aaf7df6e271c472543c0463e5a Mon Sep 17 00:00:00 2001 From: Vadim Stepanov Date: Tue, 23 Jan 2024 10:36:58 +0000 Subject: [PATCH] Alertmanager V2 migration prep (#3722) # What this PR does - Adds a Django management command and database fields required for the Alertmanager V2 migration - Adds a post-migration warning alert Screenshot 2024-01-19 at 17 41 04 Related to https://github.com/grafana/oncall-private/issues/2260 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --- ...ertmanager_v2_backup_templates_and_more.py | 23 +++ .../alerts/models/alert_receive_channel.py | 39 ++++- .../tests/test_alert_receiver_channel.py | 132 ++++++++++++++++ .../api/serializers/alert_receive_channel.py | 2 + .../commands/alertmanager_v2_migrate.py | 148 ++++++++++++++++++ .../alert_receive_channel.types.ts | 1 + .../src/pages/integration/Integration.tsx | 62 ++++++++ 7 files changed, 406 insertions(+), 1 deletion(-) create mode 100644 engine/apps/alerts/migrations/0044_alertreceivechannel_alertmanager_v2_backup_templates_and_more.py create mode 100644 engine/engine/management/commands/alertmanager_v2_migrate.py diff --git a/engine/apps/alerts/migrations/0044_alertreceivechannel_alertmanager_v2_backup_templates_and_more.py b/engine/apps/alerts/migrations/0044_alertreceivechannel_alertmanager_v2_backup_templates_and_more.py new file mode 100644 index 00000000..c9dbdcdf --- /dev/null +++ b/engine/apps/alerts/migrations/0044_alertreceivechannel_alertmanager_v2_backup_templates_and_more.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.7 on 2024-01-19 13:16 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('alerts', '0043_remove_alertgroup_alerts_aler_channel_81aeec_idx_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='alertreceivechannel', + name='alertmanager_v2_backup_templates', + field=models.JSONField(default=None, null=True), + ), + migrations.AddField( + model_name='alertreceivechannel', + name='alertmanager_v2_migrated_at', + field=models.DateTimeField(default=None, null=True), + ), + ] diff --git a/engine/apps/alerts/models/alert_receive_channel.py b/engine/apps/alerts/models/alert_receive_channel.py index 02705ea1..cb605014 100644 --- a/engine/apps/alerts/models/alert_receive_channel.py +++ b/engine/apps/alerts/models/alert_receive_channel.py @@ -48,6 +48,34 @@ if typing.TYPE_CHECKING: logger = logging.getLogger(__name__) +class MessagingBackendTemplatesItem: + title: str | None + message: str | None + image_url: str | None + + +MessagingBackendTemplates = dict[str, MessagingBackendTemplatesItem] + + +class AlertmanagerV2LegacyTemplates(typing.TypedDict): + web_title_template: str | None + web_message_template: str | None + web_image_url_template: str | None + sms_title_template: str | None + phone_call_title_template: str | None + source_link_template: str | None + grouping_id_template: str | None + resolve_condition_template: str | None + acknowledge_condition_template: str | None + slack_title_template: str | None + slack_message_template: str | None + slack_image_url_template: str | None + telegram_title_template: str | None + telegram_message_template: str | None + telegram_image_url_template: str | None + messaging_backends_templates: MessagingBackendTemplates | None + + def generate_public_primary_key_for_alert_receive_channel(): prefix = "C" new_public_primary_key = generate_public_primary_key(prefix) @@ -252,7 +280,16 @@ class AlertReceiveChannel(IntegrationOptionsMixin, MaintainableObject): # additional messaging backends templates # e.g. {'': {'title': 'title template', 'message': 'message template', 'image_url': 'url template'}} - messaging_backends_templates = models.JSONField(null=True, default=None) + messaging_backends_templates: MessagingBackendTemplates | None = models.JSONField(null=True, default=None) + + alertmanager_v2_migrated_at = models.DateTimeField(null=True, default=None) + """ + Timestamp of when Alertmanager V2 migration was run for this integration using the 'alertmanager_v2_migrate' + Django management command. + """ + + alertmanager_v2_backup_templates: AlertmanagerV2LegacyTemplates | None = models.JSONField(null=True, default=None) + """Backing up templates before the Alertmanager V2 migration, so that they can be restored if needed.""" rate_limited_in_slack_at = models.DateTimeField(null=True, default=None) rate_limit_message_task_id = models.CharField(max_length=100, null=True, default=None) diff --git a/engine/apps/alerts/tests/test_alert_receiver_channel.py b/engine/apps/alerts/tests/test_alert_receiver_channel.py index 9d542b56..82c314d8 100644 --- a/engine/apps/alerts/tests/test_alert_receiver_channel.py +++ b/engine/apps/alerts/tests/test_alert_receiver_channel.py @@ -4,10 +4,12 @@ from unittest.mock import patch import pytest from django.db import IntegrityError from django.urls import reverse +from django.utils import timezone from apps.alerts.models import AlertReceiveChannel from common.api_helpers.utils import create_engine_url from common.exceptions import UnableToSendDemoAlert +from engine.management.commands import alertmanager_v2_migrate @pytest.mark.django_db @@ -281,3 +283,133 @@ def test_create_duplicate_direct_paging_integrations(make_organization, make_tea integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING, ) super(AlertReceiveChannel, arc).save() # bypass the custom save method, so that IntegrityError is raised + + +@pytest.mark.django_db +def test_alertmanager_v2_migrate_forward(make_organization, make_alert_receive_channel): + organization = make_organization() + + legacy_alertmanager = make_alert_receive_channel( + organization, + integration=AlertReceiveChannel.INTEGRATION_LEGACY_ALERTMANAGER, + slack_title_template="slack_title_template", + web_title_template="web_title_template", + grouping_id_template="grouping_id_template", + resolve_condition_template="resolve_condition_template", + ) + + alertmanager = make_alert_receive_channel( + organization, + integration=AlertReceiveChannel.INTEGRATION_ALERTMANAGER, + slack_title_template="slack_title_template", + ) + legacy_grafana_alerting = make_alert_receive_channel( + organization, integration=AlertReceiveChannel.INTEGRATION_LEGACY_GRAFANA_ALERTING + ) + grafana_alerting = make_alert_receive_channel( + organization, + integration=AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING, + slack_title_template="slack_title_template", + ) + + alertmanager_v2_migrate.Command().handle(backward=False) + + legacy_alertmanager.refresh_from_db() + alertmanager.refresh_from_db() + legacy_grafana_alerting.refresh_from_db() + grafana_alerting.refresh_from_db() + + assert legacy_alertmanager.integration == AlertReceiveChannel.INTEGRATION_ALERTMANAGER + assert legacy_alertmanager.alertmanager_v2_migrated_at is not None + assert legacy_alertmanager.slack_title_template is None + assert legacy_alertmanager.web_title_template is None + assert legacy_alertmanager.grouping_id_template is None + assert legacy_alertmanager.resolve_condition_template is None + assert legacy_alertmanager.alertmanager_v2_backup_templates["slack_title_template"] == "slack_title_template" + assert legacy_alertmanager.alertmanager_v2_backup_templates["web_title_template"] == "web_title_template" + assert legacy_alertmanager.alertmanager_v2_backup_templates["grouping_id_template"] == "grouping_id_template" + assert ( + legacy_alertmanager.alertmanager_v2_backup_templates["resolve_condition_template"] + == "resolve_condition_template" + ) + assert legacy_alertmanager.alertmanager_v2_backup_templates["messaging_backends_templates"] is None + + assert legacy_grafana_alerting.integration == AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING + assert legacy_grafana_alerting.alertmanager_v2_migrated_at is not None + assert legacy_grafana_alerting.alertmanager_v2_backup_templates is None + + assert alertmanager.integration == AlertReceiveChannel.INTEGRATION_ALERTMANAGER + assert alertmanager.alertmanager_v2_migrated_at is None + assert alertmanager.slack_title_template == "slack_title_template" + assert alertmanager.alertmanager_v2_backup_templates is None + + assert grafana_alerting.integration == AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING + assert grafana_alerting.alertmanager_v2_migrated_at is None + assert grafana_alerting.slack_title_template == "slack_title_template" + assert grafana_alerting.alertmanager_v2_backup_templates is None + + +@pytest.mark.django_db +def test_alertmanager_v2_migrate_backward(make_organization, make_alert_receive_channel): + organization = make_organization() + + migrated_alertmanager = make_alert_receive_channel( + organization, + integration=AlertReceiveChannel.INTEGRATION_ALERTMANAGER, + alertmanager_v2_migrated_at=timezone.now(), + alertmanager_v2_backup_templates={ + "slack_title_template": "slack_title_template", + "web_title_template": "web_title_template", + "grouping_id_template": "grouping_id_template", + "resolve_condition_template": "resolve_condition_template", + }, + ) + + alertmanager = make_alert_receive_channel( + organization, + integration=AlertReceiveChannel.INTEGRATION_ALERTMANAGER, + slack_title_template="slack_title_template", + ) + migrated_grafana_alerting = make_alert_receive_channel( + organization, + integration=AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING, + alertmanager_v2_migrated_at=timezone.now(), + ) + grafana_alerting = make_alert_receive_channel( + organization, + integration=AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING, + slack_title_template="slack_title_template", + ) + + alertmanager_v2_migrate.Command().handle(backward=True) + + migrated_alertmanager.refresh_from_db() + alertmanager.refresh_from_db() + migrated_grafana_alerting.refresh_from_db() + grafana_alerting.refresh_from_db() + + assert migrated_alertmanager.integration == AlertReceiveChannel.INTEGRATION_LEGACY_ALERTMANAGER + assert migrated_alertmanager.alertmanager_v2_migrated_at is None + assert migrated_alertmanager.slack_title_template == "slack_title_template" + assert migrated_alertmanager.web_title_template == "web_title_template" + assert migrated_alertmanager.grouping_id_template == "grouping_id_template" + assert migrated_alertmanager.resolve_condition_template == "resolve_condition_template" + assert migrated_alertmanager.alertmanager_v2_backup_templates is None + + assert migrated_grafana_alerting.integration == AlertReceiveChannel.INTEGRATION_LEGACY_GRAFANA_ALERTING + assert migrated_grafana_alerting.alertmanager_v2_migrated_at is None + assert migrated_grafana_alerting.slack_title_template is None + assert migrated_grafana_alerting.web_title_template is None + assert migrated_grafana_alerting.grouping_id_template is None + assert migrated_grafana_alerting.resolve_condition_template is None + assert migrated_grafana_alerting.alertmanager_v2_backup_templates is None + + assert alertmanager.integration == AlertReceiveChannel.INTEGRATION_ALERTMANAGER + assert alertmanager.alertmanager_v2_migrated_at is None + assert alertmanager.slack_title_template == "slack_title_template" + assert alertmanager.alertmanager_v2_backup_templates is None + + assert grafana_alerting.integration == AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING + assert grafana_alerting.alertmanager_v2_migrated_at is None + assert grafana_alerting.slack_title_template == "slack_title_template" + assert grafana_alerting.alertmanager_v2_backup_templates is None diff --git a/engine/apps/api/serializers/alert_receive_channel.py b/engine/apps/api/serializers/alert_receive_channel.py index 76fb3b75..a08baf2d 100644 --- a/engine/apps/api/serializers/alert_receive_channel.py +++ b/engine/apps/api/serializers/alert_receive_channel.py @@ -266,6 +266,7 @@ class AlertReceiveChannelSerializer( "is_legacy", "labels", "alert_group_labels", + "alertmanager_v2_migrated_at", ] read_only_fields = [ "created_at", @@ -282,6 +283,7 @@ class AlertReceiveChannelSerializer( "is_based_on_alertmanager", "inbound_email", "is_legacy", + "alertmanager_v2_migrated_at", ] extra_kwargs = {"integration": {"required": True}} diff --git a/engine/engine/management/commands/alertmanager_v2_migrate.py b/engine/engine/management/commands/alertmanager_v2_migrate.py new file mode 100644 index 00000000..6b96646c --- /dev/null +++ b/engine/engine/management/commands/alertmanager_v2_migrate.py @@ -0,0 +1,148 @@ +from django.core.management.base import BaseCommand +from django.db import transaction +from django.db.models import Q +from django.utils import timezone + +from apps.alerts.models import AlertReceiveChannel + +ALERTMANAGER = "alertmanager" +LEGACY_ALERTMANAGER = "legacy_alertmanager" +GRAFANA_ALERTING = "grafana_alerting" +LEGACY_GRAFANA_ALERTING = "legacy_grafana_alerting" +TEMPLATE_FIELDS = [ + "web_title_template", + "web_message_template", + "web_image_url_template", + "sms_title_template", + "phone_call_title_template", + "source_link_template", + "grouping_id_template", + "resolve_condition_template", + "acknowledge_condition_template", + "slack_title_template", + "slack_message_template", + "slack_image_url_template", + "telegram_title_template", + "telegram_message_template", + "telegram_image_url_template", + "messaging_backends_templates", +] + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument("--backward", action="store_true", help="Run the migration backward.") + + def handle(self, *args, **options): + if options["backward"]: + self.migrate_backward() + else: + self.migrate_forward() + + @transaction.atomic + def migrate_forward(self): + now = timezone.now() + self.stdout.write(f"Forward migration started at {now}.") + + self.stdout.write( + "Migrating legacy Alertmanager integrations " + "(updating fields 'integration' and 'alertmanager_v2_migrated_at')." + ) + num_updated = AlertReceiveChannel.objects.filter(integration=LEGACY_ALERTMANAGER).update( + integration=ALERTMANAGER, alertmanager_v2_migrated_at=now + ) + self.stdout.write(f"Migrated {num_updated} legacy Alertmanager integrations.") + + self.stdout.write( + "Migrating legacy Grafana Alerting integrations " + "(updating fields 'integration' and 'alertmanager_v2_migrated_at')." + ) + num_updated = AlertReceiveChannel.objects.filter(integration=LEGACY_GRAFANA_ALERTING).update( + integration=GRAFANA_ALERTING, alertmanager_v2_migrated_at=now + ) + self.stdout.write(f"Migrated {num_updated} legacy Grafana Alerting integrations.") + + self.stdout.write("Fetching integrations to back up & reset templates.") + alert_receive_channels = AlertReceiveChannel.objects.filter( + Q( + **{f"{field}__isnull": False for field in TEMPLATE_FIELDS}, + _connector=Q.OR, + ), + integration__in=[ALERTMANAGER, GRAFANA_ALERTING], + alertmanager_v2_migrated_at__isnull=False, + ) + self.stdout.write(f"Backing up & resetting templates for {len(alert_receive_channels)} integrations.") + + for alert_receive_channel in alert_receive_channels: + self.stdout.write( + f"Backing up & resetting templates for integration {alert_receive_channel.public_primary_key}." + ) + alert_receive_channel.alertmanager_v2_backup_templates = { + field: getattr(alert_receive_channel, field) for field in TEMPLATE_FIELDS + } + for field in TEMPLATE_FIELDS: + setattr(alert_receive_channel, field, None) + + self.stdout.write(f"Bulk updating templates for {len(alert_receive_channels)} integrations.") + num_updated = AlertReceiveChannel.objects.bulk_update( + alert_receive_channels, + fields=[ + *TEMPLATE_FIELDS, + "alertmanager_v2_backup_templates", + ], + batch_size=1000, + ) + self.stdout.write(f"Bulk updated templates for {num_updated} integrations.") + + self.stdout.write("Forward migration finished.") + + @transaction.atomic + def migrate_backward(self): + now = timezone.now() + self.stdout.write(f"Backward migration started at {now}.") + + self.stdout.write( + "Backward migrating Alertmanager integrations " + "(updating fields 'integration' and 'alertmanager_v2_migrated_at')." + ) + num_updated = AlertReceiveChannel.objects.filter( + integration=ALERTMANAGER, alertmanager_v2_migrated_at__isnull=False + ).update(integration=LEGACY_ALERTMANAGER, alertmanager_v2_migrated_at=None) + self.stdout.write(f"Backward migrated {num_updated} Alertmanager integrations.") + + self.stdout.write( + "Backward migrating Grafana Alerting integrations " + "(updating fields 'integration' and 'alertmanager_v2_migrated_at')." + ) + num_updated = AlertReceiveChannel.objects.filter( + integration=GRAFANA_ALERTING, alertmanager_v2_migrated_at__isnull=False + ).update(integration=LEGACY_GRAFANA_ALERTING, alertmanager_v2_migrated_at=None) + self.stdout.write(f"Backward migrated {num_updated} Grafana Alerting integrations.") + + self.stdout.write("Fetching integrations to restore templates from backup.") + alert_receive_channels = AlertReceiveChannel.objects.filter( + integration__in=[LEGACY_ALERTMANAGER, LEGACY_GRAFANA_ALERTING], + alertmanager_v2_backup_templates__isnull=False, + ) + self.stdout.write(f"Restoring templates for {len(alert_receive_channels)} integrations.") + + for alert_receive_channel in alert_receive_channels: + self.stdout.write(f"Restoring templates for integration {alert_receive_channel.public_primary_key}.") + if alert_receive_channel.alertmanager_v2_backup_templates is None: + continue + for field in TEMPLATE_FIELDS: + setattr(alert_receive_channel, field, alert_receive_channel.alertmanager_v2_backup_templates.get(field)) + alert_receive_channel.alertmanager_v2_backup_templates = None + + self.stdout.write(f"Bulk updating templates for {len(alert_receive_channels)} integrations.") + num_updated = AlertReceiveChannel.objects.bulk_update( + alert_receive_channels, + fields=[ + *TEMPLATE_FIELDS, + "alertmanager_v2_backup_templates", + ], + batch_size=1000, + ) + self.stdout.write(f"Bulk updated templates for {num_updated} integrations.") + + self.stdout.write("Backward migration finished.") diff --git a/grafana-plugin/src/models/alert_receive_channel/alert_receive_channel.types.ts b/grafana-plugin/src/models/alert_receive_channel/alert_receive_channel.types.ts index fd3ea806..3746921c 100644 --- a/grafana-plugin/src/models/alert_receive_channel/alert_receive_channel.types.ts +++ b/grafana-plugin/src/models/alert_receive_channel/alert_receive_channel.types.ts @@ -54,6 +54,7 @@ export interface AlertReceiveChannel { custom: LabelKeyValue[]; template: string; }; + alertmanager_v2_migrated_at?: string | null; } export interface AlertReceiveChannelChoice { diff --git a/grafana-plugin/src/pages/integration/Integration.tsx b/grafana-plugin/src/pages/integration/Integration.tsx index e0783449..c531d31a 100644 --- a/grafana-plugin/src/pages/integration/Integration.tsx +++ b/grafana-plugin/src/pages/integration/Integration.tsx @@ -15,6 +15,7 @@ import { import cn from 'classnames/bind'; import { get } from 'lodash-es'; import { observer } from 'mobx-react'; +import moment from 'moment-timezone'; import CopyToClipboard from 'react-copy-to-clipboard'; import Emoji from 'react-emoji-render'; import { RouteComponentProps, useHistory, withRouter } from 'react-router-dom'; @@ -69,6 +70,7 @@ import { getVar } from 'utils/DOM'; import LocationHelper from 'utils/LocationHelper'; import { UserActions } from 'utils/authorization'; import { PLUGIN_ROOT } from 'utils/consts'; +import { getItem, setItem } from 'utils/localStorage'; import sanitize from 'utils/sanitize'; const cx = cn.bind(styles); @@ -225,6 +227,7 @@ class Integration extends React.Component {
{this.renderDeprecatedHeaderMaybe(integration, isLegacyIntegration)} + {this.renderAlertmanagerV2MigrationHeaderMaybe(alertReceiveChannel)} {this.renderDescriptionMaybe(alertReceiveChannel)} @@ -327,6 +330,65 @@ class Integration extends React.Component { } } + renderAlertmanagerV2MigrationHeaderMaybe(alertReceiveChannel: AlertReceiveChannel) { + if (!alertReceiveChannel.alertmanager_v2_migrated_at) { + return null; + } + + const alertID = `alertmanager_v2_alert_hidden_${alertReceiveChannel.id}`; + if (getItem(alertID)) { + return null; + } + const onAlertRemove = () => { + setItem(alertID, true); + this.forceUpdate(); + }; + + const migratedAt = moment(alertReceiveChannel.alertmanager_v2_migrated_at).toString(); + const docsURL = `https://grafana.com/docs/oncall/latest/integrations/${alertReceiveChannel.integration.replace( + '_', + '-' + )}`; + + return ( +
+ + + This legacy integration was automatically migrated at {migratedAt}. It now relies on Alertmanager's + grouping and autoresolution mechanism. + + Here are the steps you need to take to ensure a smooth transition: + + 1. Check and adjust integration templates, as they were dropped back to default values during the + migration. + + + 2. Check and adjust integration routes so that they match the new payload shape. + + + 3. Check and adjust outgoing webhooks that use alerts from this integration so that they match the new + payload shape. + + + Refer to{' '} + + the docs + {' '} + for more information. + + + ) as any + } + /> +
+ ); + } + renderDescriptionMaybe(alertReceiveChannel: AlertReceiveChannel) { if (!alertReceiveChannel.description_short) { return null;