From f0f2e7c8c63cd3582f58172aeef937968ddf3029 Mon Sep 17 00:00:00 2001
From: Innokentii Konstantinov
Date: Tue, 13 Jun 2023 15:10:38 +0800
Subject: [PATCH] Draft AlertManager integration v2 (#2167)
# What this PR does
Introduces AlertManagerV2 integration with better grouping and
autoresolving, not intended for production use yet.
---------
Co-authored-by: Ildar Iskhakov
---
.../html/integration_alertmanager_v2.html | 20 ++
engine/apps/integrations/urls.py | 2 +
engine/apps/integrations/views.py | 36 +++
.../public_api/serializers/integrations.py | 3 +
engine/config_integrations/alertmanager.py | 12 +-
engine/config_integrations/alertmanager_v2.py | 281 ++++++++++++++++++
engine/settings/base.py | 1 +
7 files changed, 344 insertions(+), 11 deletions(-)
create mode 100644 engine/apps/integrations/templates/html/integration_alertmanager_v2.html
create mode 100644 engine/config_integrations/alertmanager_v2.py
diff --git a/engine/apps/integrations/templates/html/integration_alertmanager_v2.html b/engine/apps/integrations/templates/html/integration_alertmanager_v2.html
new file mode 100644
index 00000000..784bbd86
--- /dev/null
+++ b/engine/apps/integrations/templates/html/integration_alertmanager_v2.html
@@ -0,0 +1,20 @@
+How to start sending alerts to Grafana OnCall from AlertManager
+
+ -
+ 1. Add the new receiver to the AlertManager configuration file:
+
+ receivers:
+ - name: 'grafana_oncall'
+ webhook_configs:
+ - url: {{ alert_receive_channel.integration_url }}
+ max_alerts: 100
+
+ 2. Use receiver in route tree:
+
+ routes:
+ - matchers:
+ - severity="critical"
+ receiver: grafana_oncall
+
+
+
diff --git a/engine/apps/integrations/urls.py b/engine/apps/integrations/urls.py
index 79dd96cf..c99af177 100644
--- a/engine/apps/integrations/urls.py
+++ b/engine/apps/integrations/urls.py
@@ -8,6 +8,7 @@ from common.api_helpers.optional_slash_router import optional_slash_path
from .views import (
AlertManagerAPIView,
+ AlertManagerV2View,
AmazonSNS,
GrafanaAlertingAPIView,
GrafanaAPIView,
@@ -33,6 +34,7 @@ urlpatterns = [
path("alertmanager//", AlertManagerAPIView.as_view(), name="alertmanager"),
path("amazon_sns//", AmazonSNS.as_view(), name="amazon_sns"),
path("heartbeat//", HeartBeatAPIView.as_view(), name="heartbeat"),
+ path("alertmanager_testing//", AlertManagerV2View.as_view(), name="alertmanager_v2"),
path("//", UniversalAPIView.as_view(), name="universal"),
]
diff --git a/engine/apps/integrations/views.py b/engine/apps/integrations/views.py
index 856ca514..e78c3cf4 100644
--- a/engine/apps/integrations/views.py
+++ b/engine/apps/integrations/views.py
@@ -391,3 +391,39 @@ class IntegrationHeartBeatAPIView(AlertChannelDefiningMixin, IntegrationHeartBea
process_heartbeat_task.apply_async(
(alert_receive_channel.pk,),
)
+
+
+class AlertManagerV2View(BrowsableInstructionMixin, AlertChannelDefiningMixin, IntegrationRateLimitMixin, APIView):
+ """
+ AlertManagerV2View consumes alerts from AlertManager. It expects data to be in format of AM webhook receiver.
+ """
+
+ def post(self, request, *args, **kwargs):
+ alert_receive_channel = self.request.alert_receive_channel
+ if not alert_receive_channel.integration == AlertReceiveChannel.INTEGRATION_ALERTMANAGER_V2:
+ return HttpResponseBadRequest(
+ f"This url is for integration with {alert_receive_channel.config.title}."
+ f"Key is for {alert_receive_channel.get_integration_display()}"
+ )
+ alerts = request.data.get("alerts", [])
+
+ data = request.data
+ if "numFiring" not in request.data:
+ # Count firing and resolved alerts manually if not present in payload
+ num_firing = len(list(filter(lambda a: a["status"] == "firing", alerts)))
+ num_resolved = len(list(filter(lambda a: a["status"] == "resolved", alerts)))
+ data = {**request.data, "numFiring": num_firing, "numResolved": num_resolved}
+
+ create_alert.apply_async(
+ [],
+ {
+ "title": None,
+ "message": None,
+ "image_url": None,
+ "link_to_upstream_details": None,
+ "alert_receive_channel_pk": alert_receive_channel.pk,
+ "integration_unique_data": None,
+ "raw_request_data": data,
+ },
+ )
+ return Response("Ok.")
diff --git a/engine/apps/public_api/serializers/integrations.py b/engine/apps/public_api/serializers/integrations.py
index b9ee7427..eb0b7da6 100644
--- a/engine/apps/public_api/serializers/integrations.py
+++ b/engine/apps/public_api/serializers/integrations.py
@@ -117,6 +117,9 @@ class IntegrationSerializer(EagerLoadingMixin, serializers.ModelSerializer, Main
default_route_data = validated_data.pop("default_route", None)
organization = self.context["request"].auth.organization
integration = validated_data.get("integration")
+ # hack to block alertmanager_v2 integration, will be removed
+ if integration == "alertmanager_v2":
+ raise BadRequest
if integration == AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING:
connection_error = GrafanaAlertingSyncManager.check_for_connection_errors(organization)
if connection_error:
diff --git a/engine/config_integrations/alertmanager.py b/engine/config_integrations/alertmanager.py
index a7b47527..4d94ed3c 100644
--- a/engine/config_integrations/alertmanager.py
+++ b/engine/config_integrations/alertmanager.py
@@ -8,17 +8,7 @@ is_featured = False
is_able_to_autoresolve = True
is_demo_alert_enabled = True
-description = """
-Alerts from Grafana Alertmanager are automatically routed to this integration.
-{% for dict_item in grafana_alerting_entities %}
-
Click here
- to open contact point, and
- here
- to open routes for {{dict_item.alertmanager_name}} Alertmanager.
-{% endfor %}
-{% if not is_finished_alerting_setup %}
-
Creating contact points and routes for other alertmanagers...
-{% endif %}"""
+description = None
# Web
web_title = """{{- payload.get("labels", {}).get("alertname", "No title (check Title Template)") -}}"""
diff --git a/engine/config_integrations/alertmanager_v2.py b/engine/config_integrations/alertmanager_v2.py
new file mode 100644
index 00000000..7fe88baa
--- /dev/null
+++ b/engine/config_integrations/alertmanager_v2.py
@@ -0,0 +1,281 @@
+# Main
+enabled = True
+title = "AlertManagerV2"
+slug = "alertmanager_v2"
+short_description = "Prometheus"
+is_displayed_on_web = True
+is_featured = False
+is_able_to_autoresolve = True
+is_demo_alert_enabled = True
+description = None
+
+
+# Behaviour
+source_link = "{{ payload.externalURL }}"
+
+grouping_id = "{{ payload.groupKey }}"
+
+resolve_condition = """{{ payload.status == "resolved" }}"""
+
+acknowledge_condition = None
+
+
+web_title = """\
+{%- set groupLabels = payload.groupLabels.copy() -%}
+{%- set alertname = groupLabels.pop('alertname') | default("") -%}
+
+
+[{{ payload.status }}{% if payload.status == 'firing' %}:{{ payload.numFiring }}{% endif %}] {{ alertname }} {% if groupLabels | length > 0 %}({{ groupLabels|join(", ") }}){% endif %}
+""" # noqa
+
+web_message = """\
+{%- set annotations = payload.commonAnnotations.copy() -%}
+
+{% set severity = payload.groupLabels.severity -%}
+{% if severity %}
+{%- set severity_emoji = {"critical": ":rotating_light:", "warning": ":warning:" }[severity] | default(":question:") -%}
+Severity: {{ severity }} {{ severity_emoji }}
+{% endif %}
+
+{%- set status = payload.status | default("Unknown") %}
+{%- set status_emoji = {"firing": ":fire:", "resolved": ":white_check_mark:"}[status] | default(":warning:") %}
+Status: {{ status }} {{ status_emoji }} (on the source)
+{% if status == "firing" %}
+Firing alerts – {{ payload.numFiring }}
+Resolved alerts – {{ payload.numResolved }}
+{% endif %}
+
+{% if "runbook_url" in annotations -%}
+[:book: Runbook:link:]({{ annotations.runbook_url }})
+{%- set _ = annotations.pop('runbook_url') -%}
+{%- endif %}
+
+{%- if "runbook_url_internal" in annotations -%}
+[:closed_book: Runbook (internal):link:]({{ annotations.runbook_url_internal }})
+{%- set _ = annotations.pop('runbook_url_internal') -%}
+{%- endif %}
+
+GroupLabels:
+{%- for k, v in payload["groupLabels"].items() %}
+- {{ k }}: {{ v }}
+{%- endfor %}
+
+{% if payload["commonLabels"] | length > 0 -%}
+CommonLabels:
+{%- for k, v in payload["commonLabels"].items() %}
+- {{ k }}: {{ v }}
+{%- endfor %}
+{% endif %}
+
+{% if annotations | length > 0 -%}
+Annotations:
+{%- for k, v in annotations.items() %}
+- {{ k }}: {{ v }}
+{%- endfor %}
+{% endif %}
+
+[View in AlertManager]({{ source_link }})
+"""
+
+
+# Slack templates
+slack_title = """\
+{%- set groupLabels = payload.groupLabels.copy() -%}
+{%- set alertname = groupLabels.pop('alertname') | default("") -%}
+*<{{ grafana_oncall_link }}|#{{ grafana_oncall_incident_id }} {{ web_title }}>* via {{ integration_name }}
+{% if source_link %}
+ (*<{{ source_link }}|source>*)
+{%- endif %}
+"""
+
+# default slack message template is identical to web message template, except urls
+# It can be based on web message template (see example), but it can affect existing templates
+# slack_message = """
+# {% set mkdwn_link_regex = "\[([\w\s\d:]+)\]\((https?:\/\/[\w\d./?=#]+)\)" %}
+# {{ web_message
+# | regex_replace(mkdwn_link_regex, "<\\2|\\1>")
+# }}
+# """
+
+slack_message = """\
+{%- set annotations = payload.commonAnnotations.copy() -%}
+
+{% set severity = payload.groupLabels.severity -%}
+{% if severity %}
+{%- set severity_emoji = {"critical": ":rotating_light:", "warning": ":warning:" }[severity] | default(":question:") -%}
+Severity: {{ severity }} {{ severity_emoji }}
+{% endif %}
+
+{%- set status = payload.status | default("Unknown") %}
+{%- set status_emoji = {"firing": ":fire:", "resolved": ":white_check_mark:"}[status] | default(":warning:") %}
+Status: {{ status }} {{ status_emoji }} (on the source)
+{% if status == "firing" %}
+Firing alerts – {{ payload.numFiring }}
+Resolved alerts – {{ payload.numResolved }}
+{% endif %}
+
+{% if "runbook_url" in annotations -%}
+<{{ annotations.runbook_url }}|:book: Runbook:link:>
+{%- set _ = annotations.pop('runbook_url') -%}
+{%- endif %}
+
+{%- if "runbook_url_internal" in annotations -%}
+<{{ annotations.runbook_url_internal }}|:closed_book: Runbook (internal):link:>
+{%- set _ = annotations.pop('runbook_url_internal') -%}
+{%- endif %}
+
+GroupLabels:
+{%- for k, v in payload["groupLabels"].items() %}
+- {{ k }}: {{ v }}
+{%- endfor %}
+
+{% if payload["commonLabels"] | length > 0 -%}
+CommonLabels:
+{%- for k, v in payload["commonLabels"].items() %}
+- {{ k }}: {{ v }}
+{%- endfor %}
+{% endif %}
+
+{% if annotations | length > 0 -%}
+Annotations:
+{%- for k, v in annotations.items() %}
+- {{ k }}: {{ v }}
+{%- endfor %}
+{% endif %}
+"""
+# noqa: W291
+
+
+slack_image_url = None
+
+web_image_url = None
+
+sms_title = web_title
+
+
+phone_call_title = """{{ payload.groupLabels|join(", ") }}"""
+
+telegram_title = web_title
+
+telegram_message = """\
+{%- set annotations = payload.commonAnnotations.copy() -%}
+
+{% set severity = payload.groupLabels.severity -%}
+{% if severity %}
+{%- set severity_emoji = {"critical": ":rotating_light:", "warning": ":warning:" }[severity] | default(":question:") -%}
+Severity: {{ severity }} {{ severity_emoji }}
+{% endif %}
+
+{%- set status = payload.status | default("Unknown") %}
+{%- set status_emoji = {"firing": ":fire:", "resolved": ":white_check_mark:"}[status] | default(":warning:") %}
+Status: {{ status }} {{ status_emoji }} (on the source)
+{% if status == "firing" %}
+Firing alerts – {{ payload.numFiring }}
+Resolved alerts – {{ payload.numResolved }}
+{% endif %}
+
+{% if "runbook_url" in annotations -%}
+:book: Runbook:link:
+{%- set _ = annotations.pop('runbook_url') -%}
+{%- endif %}
+
+{%- if "runbook_url_internal" in annotations -%}
+:closed_book: Runbook (internal):link:
+{%- set _ = annotations.pop('runbook_url_internal') -%}
+{%- endif %}
+
+GroupLabels:
+{%- for k, v in payload["groupLabels"].items() %}
+- {{ k }}: {{ v }}
+{%- endfor %}
+
+{% if payload["commonLabels"] | length > 0 -%}
+CommonLabels:
+{%- for k, v in payload["commonLabels"].items() %}
+- {{ k }}: {{ v }}
+{%- endfor %}
+{% endif %}
+
+{% if annotations | length > 0 -%}
+Annotations:
+{%- for k, v in annotations.items() %}
+- {{ k }}: {{ v }}
+{%- endfor %}
+{% endif %}
+
+View in AlertManager
+"""
+
+telegram_image_url = None
+
+
+example_payload = {
+ "alerts": [
+ {
+ "endsAt": "0001-01-01T00:00:00Z",
+ "labels": {
+ "job": "node",
+ "group": "production",
+ "instance": "localhost:8081",
+ "severity": "critical",
+ "alertname": "InstanceDown",
+ },
+ "status": "firing",
+ "startsAt": "2023-06-12T08:24:38.326Z",
+ "annotations": {
+ "title": "Instance localhost:8081 down",
+ "description": "localhost:8081 of job node has been down for more than 1 minute.",
+ },
+ "fingerprint": "f404ecabc8dd5cd7",
+ "generatorURL": "",
+ },
+ {
+ "endsAt": "0001-01-01T00:00:00Z",
+ "labels": {
+ "job": "node",
+ "group": "canary",
+ "instance": "localhost:8082",
+ "severity": "critical",
+ "alertname": "InstanceDown",
+ },
+ "status": "firing",
+ "startsAt": "2023-06-12T08:24:38.326Z",
+ "annotations": {
+ "title": "Instance localhost:8082 down",
+ "description": "localhost:8082 of job node has been down for more than 1 minute.",
+ },
+ "fingerprint": "f8f08d4e32c61a9d",
+ "generatorURL": "",
+ },
+ {
+ "endsAt": "0001-01-01T00:00:00Z",
+ "labels": {
+ "job": "node",
+ "group": "production",
+ "instance": "localhost:8083",
+ "severity": "critical",
+ "alertname": "InstanceDown",
+ },
+ "status": "firing",
+ "startsAt": "2023-06-12T08:24:38.326Z",
+ "annotations": {
+ "title": "Instance localhost:8083 down",
+ "description": "localhost:8083 of job node has been down for more than 1 minute.",
+ },
+ "fingerprint": "39f38c0611ee7abd",
+ "generatorURL": "",
+ },
+ ],
+ "status": "firing",
+ "version": "4",
+ "groupKey": '{}:{alertname="InstanceDown"}',
+ "receiver": "combo",
+ "numFiring": 3,
+ "externalURL": "",
+ "groupLabels": {"alertname": "InstanceDown"},
+ "numResolved": 0,
+ "commonLabels": {"job": "node", "severity": "critical", "alertname": "InstanceDown"},
+ "truncatedBytes": 0,
+ "truncatedAlerts": 0,
+ "commonAnnotations": {},
+}
diff --git a/engine/settings/base.py b/engine/settings/base.py
index aa2adca4..f62acdb4 100644
--- a/engine/settings/base.py
+++ b/engine/settings/base.py
@@ -660,6 +660,7 @@ INBOUND_EMAIL_DOMAIN = os.getenv("INBOUND_EMAIL_DOMAIN")
INBOUND_EMAIL_WEBHOOK_SECRET = os.getenv("INBOUND_EMAIL_WEBHOOK_SECRET")
INSTALLED_ONCALL_INTEGRATIONS = [
+ "config_integrations.alertmanager_v2",
"config_integrations.alertmanager",
"config_integrations.grafana",
"config_integrations.grafana_alerting",