Alertmanager V2 migration prep (#3722)

# What this PR does

- Adds a Django management command and database fields required for the
Alertmanager V2 migration
- Adds a post-migration warning alert

<img width="1177" alt="Screenshot 2024-01-19 at 17 41 04"
src="https://github.com/grafana/oncall/assets/20116910/512ab22e-9a00-481e-883d-3dadfc95b587">


Related to https://github.com/grafana/oncall-private/issues/2260

## Checklist

- [x] Unit, integration, and e2e (if applicable) tests updated
- [x] Documentation added (or `pr:no public docs` PR label added if not
required)
- [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not
required)
This commit is contained in:
Vadim Stepanov 2024-01-23 10:36:58 +00:00 committed by GitHub
parent fdbccdac99
commit b5aa53d3d6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 406 additions and 1 deletions

View file

@ -0,0 +1,23 @@
# Generated by Django 4.2.7 on 2024-01-19 13:16
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('alerts', '0043_remove_alertgroup_alerts_aler_channel_81aeec_idx_and_more'),
]
operations = [
migrations.AddField(
model_name='alertreceivechannel',
name='alertmanager_v2_backup_templates',
field=models.JSONField(default=None, null=True),
),
migrations.AddField(
model_name='alertreceivechannel',
name='alertmanager_v2_migrated_at',
field=models.DateTimeField(default=None, null=True),
),
]

View file

@ -48,6 +48,34 @@ if typing.TYPE_CHECKING:
logger = logging.getLogger(__name__)
class MessagingBackendTemplatesItem:
title: str | None
message: str | None
image_url: str | None
MessagingBackendTemplates = dict[str, MessagingBackendTemplatesItem]
class AlertmanagerV2LegacyTemplates(typing.TypedDict):
web_title_template: str | None
web_message_template: str | None
web_image_url_template: str | None
sms_title_template: str | None
phone_call_title_template: str | None
source_link_template: str | None
grouping_id_template: str | None
resolve_condition_template: str | None
acknowledge_condition_template: str | None
slack_title_template: str | None
slack_message_template: str | None
slack_image_url_template: str | None
telegram_title_template: str | None
telegram_message_template: str | None
telegram_image_url_template: str | None
messaging_backends_templates: MessagingBackendTemplates | None
def generate_public_primary_key_for_alert_receive_channel():
prefix = "C"
new_public_primary_key = generate_public_primary_key(prefix)
@ -252,7 +280,16 @@ class AlertReceiveChannel(IntegrationOptionsMixin, MaintainableObject):
# additional messaging backends templates
# e.g. {'<BACKEND-ID>': {'title': 'title template', 'message': 'message template', 'image_url': 'url template'}}
messaging_backends_templates = models.JSONField(null=True, default=None)
messaging_backends_templates: MessagingBackendTemplates | None = models.JSONField(null=True, default=None)
alertmanager_v2_migrated_at = models.DateTimeField(null=True, default=None)
"""
Timestamp of when Alertmanager V2 migration was run for this integration using the 'alertmanager_v2_migrate'
Django management command.
"""
alertmanager_v2_backup_templates: AlertmanagerV2LegacyTemplates | None = models.JSONField(null=True, default=None)
"""Backing up templates before the Alertmanager V2 migration, so that they can be restored if needed."""
rate_limited_in_slack_at = models.DateTimeField(null=True, default=None)
rate_limit_message_task_id = models.CharField(max_length=100, null=True, default=None)

View file

@ -4,10 +4,12 @@ from unittest.mock import patch
import pytest
from django.db import IntegrityError
from django.urls import reverse
from django.utils import timezone
from apps.alerts.models import AlertReceiveChannel
from common.api_helpers.utils import create_engine_url
from common.exceptions import UnableToSendDemoAlert
from engine.management.commands import alertmanager_v2_migrate
@pytest.mark.django_db
@ -281,3 +283,133 @@ def test_create_duplicate_direct_paging_integrations(make_organization, make_tea
integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING,
)
super(AlertReceiveChannel, arc).save() # bypass the custom save method, so that IntegrityError is raised
@pytest.mark.django_db
def test_alertmanager_v2_migrate_forward(make_organization, make_alert_receive_channel):
organization = make_organization()
legacy_alertmanager = make_alert_receive_channel(
organization,
integration=AlertReceiveChannel.INTEGRATION_LEGACY_ALERTMANAGER,
slack_title_template="slack_title_template",
web_title_template="web_title_template",
grouping_id_template="grouping_id_template",
resolve_condition_template="resolve_condition_template",
)
alertmanager = make_alert_receive_channel(
organization,
integration=AlertReceiveChannel.INTEGRATION_ALERTMANAGER,
slack_title_template="slack_title_template",
)
legacy_grafana_alerting = make_alert_receive_channel(
organization, integration=AlertReceiveChannel.INTEGRATION_LEGACY_GRAFANA_ALERTING
)
grafana_alerting = make_alert_receive_channel(
organization,
integration=AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING,
slack_title_template="slack_title_template",
)
alertmanager_v2_migrate.Command().handle(backward=False)
legacy_alertmanager.refresh_from_db()
alertmanager.refresh_from_db()
legacy_grafana_alerting.refresh_from_db()
grafana_alerting.refresh_from_db()
assert legacy_alertmanager.integration == AlertReceiveChannel.INTEGRATION_ALERTMANAGER
assert legacy_alertmanager.alertmanager_v2_migrated_at is not None
assert legacy_alertmanager.slack_title_template is None
assert legacy_alertmanager.web_title_template is None
assert legacy_alertmanager.grouping_id_template is None
assert legacy_alertmanager.resolve_condition_template is None
assert legacy_alertmanager.alertmanager_v2_backup_templates["slack_title_template"] == "slack_title_template"
assert legacy_alertmanager.alertmanager_v2_backup_templates["web_title_template"] == "web_title_template"
assert legacy_alertmanager.alertmanager_v2_backup_templates["grouping_id_template"] == "grouping_id_template"
assert (
legacy_alertmanager.alertmanager_v2_backup_templates["resolve_condition_template"]
== "resolve_condition_template"
)
assert legacy_alertmanager.alertmanager_v2_backup_templates["messaging_backends_templates"] is None
assert legacy_grafana_alerting.integration == AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING
assert legacy_grafana_alerting.alertmanager_v2_migrated_at is not None
assert legacy_grafana_alerting.alertmanager_v2_backup_templates is None
assert alertmanager.integration == AlertReceiveChannel.INTEGRATION_ALERTMANAGER
assert alertmanager.alertmanager_v2_migrated_at is None
assert alertmanager.slack_title_template == "slack_title_template"
assert alertmanager.alertmanager_v2_backup_templates is None
assert grafana_alerting.integration == AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING
assert grafana_alerting.alertmanager_v2_migrated_at is None
assert grafana_alerting.slack_title_template == "slack_title_template"
assert grafana_alerting.alertmanager_v2_backup_templates is None
@pytest.mark.django_db
def test_alertmanager_v2_migrate_backward(make_organization, make_alert_receive_channel):
organization = make_organization()
migrated_alertmanager = make_alert_receive_channel(
organization,
integration=AlertReceiveChannel.INTEGRATION_ALERTMANAGER,
alertmanager_v2_migrated_at=timezone.now(),
alertmanager_v2_backup_templates={
"slack_title_template": "slack_title_template",
"web_title_template": "web_title_template",
"grouping_id_template": "grouping_id_template",
"resolve_condition_template": "resolve_condition_template",
},
)
alertmanager = make_alert_receive_channel(
organization,
integration=AlertReceiveChannel.INTEGRATION_ALERTMANAGER,
slack_title_template="slack_title_template",
)
migrated_grafana_alerting = make_alert_receive_channel(
organization,
integration=AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING,
alertmanager_v2_migrated_at=timezone.now(),
)
grafana_alerting = make_alert_receive_channel(
organization,
integration=AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING,
slack_title_template="slack_title_template",
)
alertmanager_v2_migrate.Command().handle(backward=True)
migrated_alertmanager.refresh_from_db()
alertmanager.refresh_from_db()
migrated_grafana_alerting.refresh_from_db()
grafana_alerting.refresh_from_db()
assert migrated_alertmanager.integration == AlertReceiveChannel.INTEGRATION_LEGACY_ALERTMANAGER
assert migrated_alertmanager.alertmanager_v2_migrated_at is None
assert migrated_alertmanager.slack_title_template == "slack_title_template"
assert migrated_alertmanager.web_title_template == "web_title_template"
assert migrated_alertmanager.grouping_id_template == "grouping_id_template"
assert migrated_alertmanager.resolve_condition_template == "resolve_condition_template"
assert migrated_alertmanager.alertmanager_v2_backup_templates is None
assert migrated_grafana_alerting.integration == AlertReceiveChannel.INTEGRATION_LEGACY_GRAFANA_ALERTING
assert migrated_grafana_alerting.alertmanager_v2_migrated_at is None
assert migrated_grafana_alerting.slack_title_template is None
assert migrated_grafana_alerting.web_title_template is None
assert migrated_grafana_alerting.grouping_id_template is None
assert migrated_grafana_alerting.resolve_condition_template is None
assert migrated_grafana_alerting.alertmanager_v2_backup_templates is None
assert alertmanager.integration == AlertReceiveChannel.INTEGRATION_ALERTMANAGER
assert alertmanager.alertmanager_v2_migrated_at is None
assert alertmanager.slack_title_template == "slack_title_template"
assert alertmanager.alertmanager_v2_backup_templates is None
assert grafana_alerting.integration == AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING
assert grafana_alerting.alertmanager_v2_migrated_at is None
assert grafana_alerting.slack_title_template == "slack_title_template"
assert grafana_alerting.alertmanager_v2_backup_templates is None

View file

@ -266,6 +266,7 @@ class AlertReceiveChannelSerializer(
"is_legacy",
"labels",
"alert_group_labels",
"alertmanager_v2_migrated_at",
]
read_only_fields = [
"created_at",
@ -282,6 +283,7 @@ class AlertReceiveChannelSerializer(
"is_based_on_alertmanager",
"inbound_email",
"is_legacy",
"alertmanager_v2_migrated_at",
]
extra_kwargs = {"integration": {"required": True}}

View file

@ -0,0 +1,148 @@
from django.core.management.base import BaseCommand
from django.db import transaction
from django.db.models import Q
from django.utils import timezone
from apps.alerts.models import AlertReceiveChannel
ALERTMANAGER = "alertmanager"
LEGACY_ALERTMANAGER = "legacy_alertmanager"
GRAFANA_ALERTING = "grafana_alerting"
LEGACY_GRAFANA_ALERTING = "legacy_grafana_alerting"
TEMPLATE_FIELDS = [
"web_title_template",
"web_message_template",
"web_image_url_template",
"sms_title_template",
"phone_call_title_template",
"source_link_template",
"grouping_id_template",
"resolve_condition_template",
"acknowledge_condition_template",
"slack_title_template",
"slack_message_template",
"slack_image_url_template",
"telegram_title_template",
"telegram_message_template",
"telegram_image_url_template",
"messaging_backends_templates",
]
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument("--backward", action="store_true", help="Run the migration backward.")
def handle(self, *args, **options):
if options["backward"]:
self.migrate_backward()
else:
self.migrate_forward()
@transaction.atomic
def migrate_forward(self):
now = timezone.now()
self.stdout.write(f"Forward migration started at {now}.")
self.stdout.write(
"Migrating legacy Alertmanager integrations "
"(updating fields 'integration' and 'alertmanager_v2_migrated_at')."
)
num_updated = AlertReceiveChannel.objects.filter(integration=LEGACY_ALERTMANAGER).update(
integration=ALERTMANAGER, alertmanager_v2_migrated_at=now
)
self.stdout.write(f"Migrated {num_updated} legacy Alertmanager integrations.")
self.stdout.write(
"Migrating legacy Grafana Alerting integrations "
"(updating fields 'integration' and 'alertmanager_v2_migrated_at')."
)
num_updated = AlertReceiveChannel.objects.filter(integration=LEGACY_GRAFANA_ALERTING).update(
integration=GRAFANA_ALERTING, alertmanager_v2_migrated_at=now
)
self.stdout.write(f"Migrated {num_updated} legacy Grafana Alerting integrations.")
self.stdout.write("Fetching integrations to back up & reset templates.")
alert_receive_channels = AlertReceiveChannel.objects.filter(
Q(
**{f"{field}__isnull": False for field in TEMPLATE_FIELDS},
_connector=Q.OR,
),
integration__in=[ALERTMANAGER, GRAFANA_ALERTING],
alertmanager_v2_migrated_at__isnull=False,
)
self.stdout.write(f"Backing up & resetting templates for {len(alert_receive_channels)} integrations.")
for alert_receive_channel in alert_receive_channels:
self.stdout.write(
f"Backing up & resetting templates for integration {alert_receive_channel.public_primary_key}."
)
alert_receive_channel.alertmanager_v2_backup_templates = {
field: getattr(alert_receive_channel, field) for field in TEMPLATE_FIELDS
}
for field in TEMPLATE_FIELDS:
setattr(alert_receive_channel, field, None)
self.stdout.write(f"Bulk updating templates for {len(alert_receive_channels)} integrations.")
num_updated = AlertReceiveChannel.objects.bulk_update(
alert_receive_channels,
fields=[
*TEMPLATE_FIELDS,
"alertmanager_v2_backup_templates",
],
batch_size=1000,
)
self.stdout.write(f"Bulk updated templates for {num_updated} integrations.")
self.stdout.write("Forward migration finished.")
@transaction.atomic
def migrate_backward(self):
now = timezone.now()
self.stdout.write(f"Backward migration started at {now}.")
self.stdout.write(
"Backward migrating Alertmanager integrations "
"(updating fields 'integration' and 'alertmanager_v2_migrated_at')."
)
num_updated = AlertReceiveChannel.objects.filter(
integration=ALERTMANAGER, alertmanager_v2_migrated_at__isnull=False
).update(integration=LEGACY_ALERTMANAGER, alertmanager_v2_migrated_at=None)
self.stdout.write(f"Backward migrated {num_updated} Alertmanager integrations.")
self.stdout.write(
"Backward migrating Grafana Alerting integrations "
"(updating fields 'integration' and 'alertmanager_v2_migrated_at')."
)
num_updated = AlertReceiveChannel.objects.filter(
integration=GRAFANA_ALERTING, alertmanager_v2_migrated_at__isnull=False
).update(integration=LEGACY_GRAFANA_ALERTING, alertmanager_v2_migrated_at=None)
self.stdout.write(f"Backward migrated {num_updated} Grafana Alerting integrations.")
self.stdout.write("Fetching integrations to restore templates from backup.")
alert_receive_channels = AlertReceiveChannel.objects.filter(
integration__in=[LEGACY_ALERTMANAGER, LEGACY_GRAFANA_ALERTING],
alertmanager_v2_backup_templates__isnull=False,
)
self.stdout.write(f"Restoring templates for {len(alert_receive_channels)} integrations.")
for alert_receive_channel in alert_receive_channels:
self.stdout.write(f"Restoring templates for integration {alert_receive_channel.public_primary_key}.")
if alert_receive_channel.alertmanager_v2_backup_templates is None:
continue
for field in TEMPLATE_FIELDS:
setattr(alert_receive_channel, field, alert_receive_channel.alertmanager_v2_backup_templates.get(field))
alert_receive_channel.alertmanager_v2_backup_templates = None
self.stdout.write(f"Bulk updating templates for {len(alert_receive_channels)} integrations.")
num_updated = AlertReceiveChannel.objects.bulk_update(
alert_receive_channels,
fields=[
*TEMPLATE_FIELDS,
"alertmanager_v2_backup_templates",
],
batch_size=1000,
)
self.stdout.write(f"Bulk updated templates for {num_updated} integrations.")
self.stdout.write("Backward migration finished.")

View file

@ -54,6 +54,7 @@ export interface AlertReceiveChannel {
custom: LabelKeyValue[];
template: string;
};
alertmanager_v2_migrated_at?: string | null;
}
export interface AlertReceiveChannelChoice {

View file

@ -15,6 +15,7 @@ import {
import cn from 'classnames/bind';
import { get } from 'lodash-es';
import { observer } from 'mobx-react';
import moment from 'moment-timezone';
import CopyToClipboard from 'react-copy-to-clipboard';
import Emoji from 'react-emoji-render';
import { RouteComponentProps, useHistory, withRouter } from 'react-router-dom';
@ -69,6 +70,7 @@ import { getVar } from 'utils/DOM';
import LocationHelper from 'utils/LocationHelper';
import { UserActions } from 'utils/authorization';
import { PLUGIN_ROOT } from 'utils/consts';
import { getItem, setItem } from 'utils/localStorage';
import sanitize from 'utils/sanitize';
const cx = cn.bind(styles);
@ -225,6 +227,7 @@ class Integration extends React.Component<IntegrationProps, IntegrationState> {
<div className={cx('integration__subheading-container')}>
{this.renderDeprecatedHeaderMaybe(integration, isLegacyIntegration)}
{this.renderAlertmanagerV2MigrationHeaderMaybe(alertReceiveChannel)}
{this.renderDescriptionMaybe(alertReceiveChannel)}
@ -327,6 +330,65 @@ class Integration extends React.Component<IntegrationProps, IntegrationState> {
}
}
renderAlertmanagerV2MigrationHeaderMaybe(alertReceiveChannel: AlertReceiveChannel) {
if (!alertReceiveChannel.alertmanager_v2_migrated_at) {
return null;
}
const alertID = `alertmanager_v2_alert_hidden_${alertReceiveChannel.id}`;
if (getItem(alertID)) {
return null;
}
const onAlertRemove = () => {
setItem(alertID, true);
this.forceUpdate();
};
const migratedAt = moment(alertReceiveChannel.alertmanager_v2_migrated_at).toString();
const docsURL = `https://grafana.com/docs/oncall/latest/integrations/${alertReceiveChannel.integration.replace(
'_',
'-'
)}`;
return (
<div className="u-padding-top-md">
<Alert
severity="warning"
onRemove={onAlertRemove}
title={
(
<VerticalGroup>
<Text type="secondary">
This legacy integration was automatically migrated at {migratedAt}. It now relies on Alertmanager's
grouping and autoresolution mechanism.
</Text>
<Text type="secondary">Here are the steps you need to take to ensure a smooth transition:</Text>
<Text type="secondary">
1. Check and adjust integration templates, as they were dropped back to default values during the
migration.
</Text>
<Text type="secondary">
2. Check and adjust integration routes so that they match the new payload shape.
</Text>
<Text type="secondary">
3. Check and adjust outgoing webhooks that use alerts from this integration so that they match the new
payload shape.
</Text>
<Text type="secondary">
Refer to{' '}
<a href={docsURL} target="_blank" rel="noreferrer">
<Text type="link">the docs</Text>
</a>{' '}
for more information.
</Text>
</VerticalGroup>
) as any
}
/>
</div>
);
}
renderDescriptionMaybe(alertReceiveChannel: AlertReceiveChannel) {
if (!alertReceiveChannel.description_short) {
return null;