From f0f2e7c8c63cd3582f58172aeef937968ddf3029 Mon Sep 17 00:00:00 2001 From: Innokentii Konstantinov Date: Tue, 13 Jun 2023 15:10:38 +0800 Subject: [PATCH] Draft AlertManager integration v2 (#2167) # What this PR does Introduces AlertManagerV2 integration with better grouping and autoresolving, not intended for production use yet. --------- Co-authored-by: Ildar Iskhakov --- .../html/integration_alertmanager_v2.html | 20 ++ engine/apps/integrations/urls.py | 2 + engine/apps/integrations/views.py | 36 +++ .../public_api/serializers/integrations.py | 3 + engine/config_integrations/alertmanager.py | 12 +- engine/config_integrations/alertmanager_v2.py | 281 ++++++++++++++++++ engine/settings/base.py | 1 + 7 files changed, 344 insertions(+), 11 deletions(-) create mode 100644 engine/apps/integrations/templates/html/integration_alertmanager_v2.html create mode 100644 engine/config_integrations/alertmanager_v2.py diff --git a/engine/apps/integrations/templates/html/integration_alertmanager_v2.html b/engine/apps/integrations/templates/html/integration_alertmanager_v2.html new file mode 100644 index 00000000..784bbd86 --- /dev/null +++ b/engine/apps/integrations/templates/html/integration_alertmanager_v2.html @@ -0,0 +1,20 @@ +

How to start sending alerts to Grafana OnCall from AlertManager

+

    +
  1. + 1. Add the new receiver to the AlertManager configuration file: +
    +        receivers:
    +        - name: 'grafana_oncall'
    +          webhook_configs:
    +          - url: {{ alert_receive_channel.integration_url }}
    +            max_alerts: 100
    +        
    + 2. Use receiver in route tree: + + routes: + - matchers: + - severity="critical" + receiver: grafana_oncall + +
  2. +

diff --git a/engine/apps/integrations/urls.py b/engine/apps/integrations/urls.py index 79dd96cf..c99af177 100644 --- a/engine/apps/integrations/urls.py +++ b/engine/apps/integrations/urls.py @@ -8,6 +8,7 @@ from common.api_helpers.optional_slash_router import optional_slash_path from .views import ( AlertManagerAPIView, + AlertManagerV2View, AmazonSNS, GrafanaAlertingAPIView, GrafanaAPIView, @@ -33,6 +34,7 @@ urlpatterns = [ path("alertmanager//", AlertManagerAPIView.as_view(), name="alertmanager"), path("amazon_sns//", AmazonSNS.as_view(), name="amazon_sns"), path("heartbeat//", HeartBeatAPIView.as_view(), name="heartbeat"), + path("alertmanager_testing//", AlertManagerV2View.as_view(), name="alertmanager_v2"), path("//", UniversalAPIView.as_view(), name="universal"), ] diff --git a/engine/apps/integrations/views.py b/engine/apps/integrations/views.py index 856ca514..e78c3cf4 100644 --- a/engine/apps/integrations/views.py +++ b/engine/apps/integrations/views.py @@ -391,3 +391,39 @@ class IntegrationHeartBeatAPIView(AlertChannelDefiningMixin, IntegrationHeartBea process_heartbeat_task.apply_async( (alert_receive_channel.pk,), ) + + +class AlertManagerV2View(BrowsableInstructionMixin, AlertChannelDefiningMixin, IntegrationRateLimitMixin, APIView): + """ + AlertManagerV2View consumes alerts from AlertManager. It expects data to be in format of AM webhook receiver. + """ + + def post(self, request, *args, **kwargs): + alert_receive_channel = self.request.alert_receive_channel + if not alert_receive_channel.integration == AlertReceiveChannel.INTEGRATION_ALERTMANAGER_V2: + return HttpResponseBadRequest( + f"This url is for integration with {alert_receive_channel.config.title}." + f"Key is for {alert_receive_channel.get_integration_display()}" + ) + alerts = request.data.get("alerts", []) + + data = request.data + if "numFiring" not in request.data: + # Count firing and resolved alerts manually if not present in payload + num_firing = len(list(filter(lambda a: a["status"] == "firing", alerts))) + num_resolved = len(list(filter(lambda a: a["status"] == "resolved", alerts))) + data = {**request.data, "numFiring": num_firing, "numResolved": num_resolved} + + create_alert.apply_async( + [], + { + "title": None, + "message": None, + "image_url": None, + "link_to_upstream_details": None, + "alert_receive_channel_pk": alert_receive_channel.pk, + "integration_unique_data": None, + "raw_request_data": data, + }, + ) + return Response("Ok.") diff --git a/engine/apps/public_api/serializers/integrations.py b/engine/apps/public_api/serializers/integrations.py index b9ee7427..eb0b7da6 100644 --- a/engine/apps/public_api/serializers/integrations.py +++ b/engine/apps/public_api/serializers/integrations.py @@ -117,6 +117,9 @@ class IntegrationSerializer(EagerLoadingMixin, serializers.ModelSerializer, Main default_route_data = validated_data.pop("default_route", None) organization = self.context["request"].auth.organization integration = validated_data.get("integration") + # hack to block alertmanager_v2 integration, will be removed + if integration == "alertmanager_v2": + raise BadRequest if integration == AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING: connection_error = GrafanaAlertingSyncManager.check_for_connection_errors(organization) if connection_error: diff --git a/engine/config_integrations/alertmanager.py b/engine/config_integrations/alertmanager.py index a7b47527..4d94ed3c 100644 --- a/engine/config_integrations/alertmanager.py +++ b/engine/config_integrations/alertmanager.py @@ -8,17 +8,7 @@ is_featured = False is_able_to_autoresolve = True is_demo_alert_enabled = True -description = """ -Alerts from Grafana Alertmanager are automatically routed to this integration. -{% for dict_item in grafana_alerting_entities %} -
Click here - to open contact point, and - here - to open routes for {{dict_item.alertmanager_name}} Alertmanager. -{% endfor %} -{% if not is_finished_alerting_setup %} -
Creating contact points and routes for other alertmanagers... -{% endif %}""" +description = None # Web web_title = """{{- payload.get("labels", {}).get("alertname", "No title (check Title Template)") -}}""" diff --git a/engine/config_integrations/alertmanager_v2.py b/engine/config_integrations/alertmanager_v2.py new file mode 100644 index 00000000..7fe88baa --- /dev/null +++ b/engine/config_integrations/alertmanager_v2.py @@ -0,0 +1,281 @@ +# Main +enabled = True +title = "AlertManagerV2" +slug = "alertmanager_v2" +short_description = "Prometheus" +is_displayed_on_web = True +is_featured = False +is_able_to_autoresolve = True +is_demo_alert_enabled = True +description = None + + +# Behaviour +source_link = "{{ payload.externalURL }}" + +grouping_id = "{{ payload.groupKey }}" + +resolve_condition = """{{ payload.status == "resolved" }}""" + +acknowledge_condition = None + + +web_title = """\ +{%- set groupLabels = payload.groupLabels.copy() -%} +{%- set alertname = groupLabels.pop('alertname') | default("") -%} + + +[{{ payload.status }}{% if payload.status == 'firing' %}:{{ payload.numFiring }}{% endif %}] {{ alertname }} {% if groupLabels | length > 0 %}({{ groupLabels|join(", ") }}){% endif %} +""" # noqa + +web_message = """\ +{%- set annotations = payload.commonAnnotations.copy() -%} + +{% set severity = payload.groupLabels.severity -%} +{% if severity %} +{%- set severity_emoji = {"critical": ":rotating_light:", "warning": ":warning:" }[severity] | default(":question:") -%} +Severity: {{ severity }} {{ severity_emoji }} +{% endif %} + +{%- set status = payload.status | default("Unknown") %} +{%- set status_emoji = {"firing": ":fire:", "resolved": ":white_check_mark:"}[status] | default(":warning:") %} +Status: {{ status }} {{ status_emoji }} (on the source) +{% if status == "firing" %} +Firing alerts – {{ payload.numFiring }} +Resolved alerts – {{ payload.numResolved }} +{% endif %} + +{% if "runbook_url" in annotations -%} +[:book: Runbook:link:]({{ annotations.runbook_url }}) +{%- set _ = annotations.pop('runbook_url') -%} +{%- endif %} + +{%- if "runbook_url_internal" in annotations -%} +[:closed_book: Runbook (internal):link:]({{ annotations.runbook_url_internal }}) +{%- set _ = annotations.pop('runbook_url_internal') -%} +{%- endif %} + +GroupLabels: +{%- for k, v in payload["groupLabels"].items() %} +- {{ k }}: {{ v }} +{%- endfor %} + +{% if payload["commonLabels"] | length > 0 -%} +CommonLabels: +{%- for k, v in payload["commonLabels"].items() %} +- {{ k }}: {{ v }} +{%- endfor %} +{% endif %} + +{% if annotations | length > 0 -%} +Annotations: +{%- for k, v in annotations.items() %} +- {{ k }}: {{ v }} +{%- endfor %} +{% endif %} + +[View in AlertManager]({{ source_link }}) +""" + + +# Slack templates +slack_title = """\ +{%- set groupLabels = payload.groupLabels.copy() -%} +{%- set alertname = groupLabels.pop('alertname') | default("") -%} +*<{{ grafana_oncall_link }}|#{{ grafana_oncall_incident_id }} {{ web_title }}>* via {{ integration_name }} +{% if source_link %} + (*<{{ source_link }}|source>*) +{%- endif %} +""" + +# default slack message template is identical to web message template, except urls +# It can be based on web message template (see example), but it can affect existing templates +# slack_message = """ +# {% set mkdwn_link_regex = "\[([\w\s\d:]+)\]\((https?:\/\/[\w\d./?=#]+)\)" %} +# {{ web_message +# | regex_replace(mkdwn_link_regex, "<\\2|\\1>") +# }} +# """ + +slack_message = """\ +{%- set annotations = payload.commonAnnotations.copy() -%} + +{% set severity = payload.groupLabels.severity -%} +{% if severity %} +{%- set severity_emoji = {"critical": ":rotating_light:", "warning": ":warning:" }[severity] | default(":question:") -%} +Severity: {{ severity }} {{ severity_emoji }} +{% endif %} + +{%- set status = payload.status | default("Unknown") %} +{%- set status_emoji = {"firing": ":fire:", "resolved": ":white_check_mark:"}[status] | default(":warning:") %} +Status: {{ status }} {{ status_emoji }} (on the source) +{% if status == "firing" %} +Firing alerts – {{ payload.numFiring }} +Resolved alerts – {{ payload.numResolved }} +{% endif %} + +{% if "runbook_url" in annotations -%} +<{{ annotations.runbook_url }}|:book: Runbook:link:> +{%- set _ = annotations.pop('runbook_url') -%} +{%- endif %} + +{%- if "runbook_url_internal" in annotations -%} +<{{ annotations.runbook_url_internal }}|:closed_book: Runbook (internal):link:> +{%- set _ = annotations.pop('runbook_url_internal') -%} +{%- endif %} + +GroupLabels: +{%- for k, v in payload["groupLabels"].items() %} +- {{ k }}: {{ v }} +{%- endfor %} + +{% if payload["commonLabels"] | length > 0 -%} +CommonLabels: +{%- for k, v in payload["commonLabels"].items() %} +- {{ k }}: {{ v }} +{%- endfor %} +{% endif %} + +{% if annotations | length > 0 -%} +Annotations: +{%- for k, v in annotations.items() %} +- {{ k }}: {{ v }} +{%- endfor %} +{% endif %} +""" +# noqa: W291 + + +slack_image_url = None + +web_image_url = None + +sms_title = web_title + + +phone_call_title = """{{ payload.groupLabels|join(", ") }}""" + +telegram_title = web_title + +telegram_message = """\ +{%- set annotations = payload.commonAnnotations.copy() -%} + +{% set severity = payload.groupLabels.severity -%} +{% if severity %} +{%- set severity_emoji = {"critical": ":rotating_light:", "warning": ":warning:" }[severity] | default(":question:") -%} +Severity: {{ severity }} {{ severity_emoji }} +{% endif %} + +{%- set status = payload.status | default("Unknown") %} +{%- set status_emoji = {"firing": ":fire:", "resolved": ":white_check_mark:"}[status] | default(":warning:") %} +Status: {{ status }} {{ status_emoji }} (on the source) +{% if status == "firing" %} +Firing alerts – {{ payload.numFiring }} +Resolved alerts – {{ payload.numResolved }} +{% endif %} + +{% if "runbook_url" in annotations -%} +:book: Runbook:link: +{%- set _ = annotations.pop('runbook_url') -%} +{%- endif %} + +{%- if "runbook_url_internal" in annotations -%} +:closed_book: Runbook (internal):link: +{%- set _ = annotations.pop('runbook_url_internal') -%} +{%- endif %} + +GroupLabels: +{%- for k, v in payload["groupLabels"].items() %} +- {{ k }}: {{ v }} +{%- endfor %} + +{% if payload["commonLabels"] | length > 0 -%} +CommonLabels: +{%- for k, v in payload["commonLabels"].items() %} +- {{ k }}: {{ v }} +{%- endfor %} +{% endif %} + +{% if annotations | length > 0 -%} +Annotations: +{%- for k, v in annotations.items() %} +- {{ k }}: {{ v }} +{%- endfor %} +{% endif %} + +View in AlertManager +""" + +telegram_image_url = None + + +example_payload = { + "alerts": [ + { + "endsAt": "0001-01-01T00:00:00Z", + "labels": { + "job": "node", + "group": "production", + "instance": "localhost:8081", + "severity": "critical", + "alertname": "InstanceDown", + }, + "status": "firing", + "startsAt": "2023-06-12T08:24:38.326Z", + "annotations": { + "title": "Instance localhost:8081 down", + "description": "localhost:8081 of job node has been down for more than 1 minute.", + }, + "fingerprint": "f404ecabc8dd5cd7", + "generatorURL": "", + }, + { + "endsAt": "0001-01-01T00:00:00Z", + "labels": { + "job": "node", + "group": "canary", + "instance": "localhost:8082", + "severity": "critical", + "alertname": "InstanceDown", + }, + "status": "firing", + "startsAt": "2023-06-12T08:24:38.326Z", + "annotations": { + "title": "Instance localhost:8082 down", + "description": "localhost:8082 of job node has been down for more than 1 minute.", + }, + "fingerprint": "f8f08d4e32c61a9d", + "generatorURL": "", + }, + { + "endsAt": "0001-01-01T00:00:00Z", + "labels": { + "job": "node", + "group": "production", + "instance": "localhost:8083", + "severity": "critical", + "alertname": "InstanceDown", + }, + "status": "firing", + "startsAt": "2023-06-12T08:24:38.326Z", + "annotations": { + "title": "Instance localhost:8083 down", + "description": "localhost:8083 of job node has been down for more than 1 minute.", + }, + "fingerprint": "39f38c0611ee7abd", + "generatorURL": "", + }, + ], + "status": "firing", + "version": "4", + "groupKey": '{}:{alertname="InstanceDown"}', + "receiver": "combo", + "numFiring": 3, + "externalURL": "", + "groupLabels": {"alertname": "InstanceDown"}, + "numResolved": 0, + "commonLabels": {"job": "node", "severity": "critical", "alertname": "InstanceDown"}, + "truncatedBytes": 0, + "truncatedAlerts": 0, + "commonAnnotations": {}, +} diff --git a/engine/settings/base.py b/engine/settings/base.py index aa2adca4..f62acdb4 100644 --- a/engine/settings/base.py +++ b/engine/settings/base.py @@ -660,6 +660,7 @@ INBOUND_EMAIL_DOMAIN = os.getenv("INBOUND_EMAIL_DOMAIN") INBOUND_EMAIL_WEBHOOK_SECRET = os.getenv("INBOUND_EMAIL_WEBHOOK_SECRET") INSTALLED_ONCALL_INTEGRATIONS = [ + "config_integrations.alertmanager_v2", "config_integrations.alertmanager", "config_integrations.grafana", "config_integrations.grafana_alerting",