Draft AlertManager integration v2 (#2167)

# What this PR does
Introduces AlertManagerV2 integration with better grouping and
autoresolving, not intended for production use yet.

---------

Co-authored-by: Ildar Iskhakov <Ildar.iskhakov@grafana.com>
This commit is contained in:
Innokentii Konstantinov 2023-06-13 15:10:38 +08:00 committed by GitHub
parent 40890e26ba
commit f0f2e7c8c6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 344 additions and 11 deletions

View file

@ -0,0 +1,20 @@
<h4>How to start sending alerts to Grafana OnCall from AlertManager</h4>
<p><ol>
<li>
1. Add the new receiver to the AlertManager configuration file:
<pre>
receivers:
- name: 'grafana_oncall'
webhook_configs:
- url: {{ alert_receive_channel.integration_url }}
max_alerts: 100
</pre>
2. Use receiver in route tree:
</pre>
routes:
- matchers:
- severity="critical"
receiver: grafana_oncall
</pre>
</li>
</ol></p>

View file

@ -8,6 +8,7 @@ from common.api_helpers.optional_slash_router import optional_slash_path
from .views import (
AlertManagerAPIView,
AlertManagerV2View,
AmazonSNS,
GrafanaAlertingAPIView,
GrafanaAPIView,
@ -33,6 +34,7 @@ urlpatterns = [
path("alertmanager/<str:alert_channel_key>/", AlertManagerAPIView.as_view(), name="alertmanager"),
path("amazon_sns/<str:alert_channel_key>/", AmazonSNS.as_view(), name="amazon_sns"),
path("heartbeat/<str:alert_channel_key>/", HeartBeatAPIView.as_view(), name="heartbeat"),
path("alertmanager_testing/<str:alert_channel_key>/", AlertManagerV2View.as_view(), name="alertmanager_v2"),
path("<str:integration_type>/<str:alert_channel_key>/", UniversalAPIView.as_view(), name="universal"),
]

View file

@ -391,3 +391,39 @@ class IntegrationHeartBeatAPIView(AlertChannelDefiningMixin, IntegrationHeartBea
process_heartbeat_task.apply_async(
(alert_receive_channel.pk,),
)
class AlertManagerV2View(BrowsableInstructionMixin, AlertChannelDefiningMixin, IntegrationRateLimitMixin, APIView):
"""
AlertManagerV2View consumes alerts from AlertManager. It expects data to be in format of AM webhook receiver.
"""
def post(self, request, *args, **kwargs):
alert_receive_channel = self.request.alert_receive_channel
if not alert_receive_channel.integration == AlertReceiveChannel.INTEGRATION_ALERTMANAGER_V2:
return HttpResponseBadRequest(
f"This url is for integration with {alert_receive_channel.config.title}."
f"Key is for {alert_receive_channel.get_integration_display()}"
)
alerts = request.data.get("alerts", [])
data = request.data
if "numFiring" not in request.data:
# Count firing and resolved alerts manually if not present in payload
num_firing = len(list(filter(lambda a: a["status"] == "firing", alerts)))
num_resolved = len(list(filter(lambda a: a["status"] == "resolved", alerts)))
data = {**request.data, "numFiring": num_firing, "numResolved": num_resolved}
create_alert.apply_async(
[],
{
"title": None,
"message": None,
"image_url": None,
"link_to_upstream_details": None,
"alert_receive_channel_pk": alert_receive_channel.pk,
"integration_unique_data": None,
"raw_request_data": data,
},
)
return Response("Ok.")

View file

@ -117,6 +117,9 @@ class IntegrationSerializer(EagerLoadingMixin, serializers.ModelSerializer, Main
default_route_data = validated_data.pop("default_route", None)
organization = self.context["request"].auth.organization
integration = validated_data.get("integration")
# hack to block alertmanager_v2 integration, will be removed
if integration == "alertmanager_v2":
raise BadRequest
if integration == AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING:
connection_error = GrafanaAlertingSyncManager.check_for_connection_errors(organization)
if connection_error:

View file

@ -8,17 +8,7 @@ is_featured = False
is_able_to_autoresolve = True
is_demo_alert_enabled = True
description = """
Alerts from Grafana Alertmanager are automatically routed to this integration.
{% for dict_item in grafana_alerting_entities %}
<br>Click <a href='{{dict_item.contact_point_url}}' target='_blank'>here</a>
to open contact point, and
<a href='{{dict_item.routes_url}}' target='_blank'>here</a>
to open routes for {{dict_item.alertmanager_name}} Alertmanager.
{% endfor %}
{% if not is_finished_alerting_setup %}
<br>Creating contact points and routes for other alertmanagers...
{% endif %}"""
description = None
# Web
web_title = """{{- payload.get("labels", {}).get("alertname", "No title (check Title Template)") -}}"""

View file

@ -0,0 +1,281 @@
# Main
enabled = True
title = "AlertManagerV2"
slug = "alertmanager_v2"
short_description = "Prometheus"
is_displayed_on_web = True
is_featured = False
is_able_to_autoresolve = True
is_demo_alert_enabled = True
description = None
# Behaviour
source_link = "{{ payload.externalURL }}"
grouping_id = "{{ payload.groupKey }}"
resolve_condition = """{{ payload.status == "resolved" }}"""
acknowledge_condition = None
web_title = """\
{%- set groupLabels = payload.groupLabels.copy() -%}
{%- set alertname = groupLabels.pop('alertname') | default("") -%}
[{{ payload.status }}{% if payload.status == 'firing' %}:{{ payload.numFiring }}{% endif %}] {{ alertname }} {% if groupLabels | length > 0 %}({{ groupLabels|join(", ") }}){% endif %}
""" # noqa
web_message = """\
{%- set annotations = payload.commonAnnotations.copy() -%}
{% set severity = payload.groupLabels.severity -%}
{% if severity %}
{%- set severity_emoji = {"critical": ":rotating_light:", "warning": ":warning:" }[severity] | default(":question:") -%}
Severity: {{ severity }} {{ severity_emoji }}
{% endif %}
{%- set status = payload.status | default("Unknown") %}
{%- set status_emoji = {"firing": ":fire:", "resolved": ":white_check_mark:"}[status] | default(":warning:") %}
Status: {{ status }} {{ status_emoji }} (on the source)
{% if status == "firing" %}
Firing alerts {{ payload.numFiring }}
Resolved alerts {{ payload.numResolved }}
{% endif %}
{% if "runbook_url" in annotations -%}
[:book: Runbook:link:]({{ annotations.runbook_url }})
{%- set _ = annotations.pop('runbook_url') -%}
{%- endif %}
{%- if "runbook_url_internal" in annotations -%}
[:closed_book: Runbook (internal):link:]({{ annotations.runbook_url_internal }})
{%- set _ = annotations.pop('runbook_url_internal') -%}
{%- endif %}
GroupLabels:
{%- for k, v in payload["groupLabels"].items() %}
- {{ k }}: {{ v }}
{%- endfor %}
{% if payload["commonLabels"] | length > 0 -%}
CommonLabels:
{%- for k, v in payload["commonLabels"].items() %}
- {{ k }}: {{ v }}
{%- endfor %}
{% endif %}
{% if annotations | length > 0 -%}
Annotations:
{%- for k, v in annotations.items() %}
- {{ k }}: {{ v }}
{%- endfor %}
{% endif %}
[View in AlertManager]({{ source_link }})
"""
# Slack templates
slack_title = """\
{%- set groupLabels = payload.groupLabels.copy() -%}
{%- set alertname = groupLabels.pop('alertname') | default("") -%}
*<{{ grafana_oncall_link }}|#{{ grafana_oncall_incident_id }} {{ web_title }}>* via {{ integration_name }}
{% if source_link %}
(*<{{ source_link }}|source>*)
{%- endif %}
"""
# default slack message template is identical to web message template, except urls
# It can be based on web message template (see example), but it can affect existing templates
# slack_message = """
# {% set mkdwn_link_regex = "\[([\w\s\d:]+)\]\((https?:\/\/[\w\d./?=#]+)\)" %}
# {{ web_message
# | regex_replace(mkdwn_link_regex, "<\\2|\\1>")
# }}
# """
slack_message = """\
{%- set annotations = payload.commonAnnotations.copy() -%}
{% set severity = payload.groupLabels.severity -%}
{% if severity %}
{%- set severity_emoji = {"critical": ":rotating_light:", "warning": ":warning:" }[severity] | default(":question:") -%}
Severity: {{ severity }} {{ severity_emoji }}
{% endif %}
{%- set status = payload.status | default("Unknown") %}
{%- set status_emoji = {"firing": ":fire:", "resolved": ":white_check_mark:"}[status] | default(":warning:") %}
Status: {{ status }} {{ status_emoji }} (on the source)
{% if status == "firing" %}
Firing alerts {{ payload.numFiring }}
Resolved alerts {{ payload.numResolved }}
{% endif %}
{% if "runbook_url" in annotations -%}
<{{ annotations.runbook_url }}|:book: Runbook:link:>
{%- set _ = annotations.pop('runbook_url') -%}
{%- endif %}
{%- if "runbook_url_internal" in annotations -%}
<{{ annotations.runbook_url_internal }}|:closed_book: Runbook (internal):link:>
{%- set _ = annotations.pop('runbook_url_internal') -%}
{%- endif %}
GroupLabels:
{%- for k, v in payload["groupLabels"].items() %}
- {{ k }}: {{ v }}
{%- endfor %}
{% if payload["commonLabels"] | length > 0 -%}
CommonLabels:
{%- for k, v in payload["commonLabels"].items() %}
- {{ k }}: {{ v }}
{%- endfor %}
{% endif %}
{% if annotations | length > 0 -%}
Annotations:
{%- for k, v in annotations.items() %}
- {{ k }}: {{ v }}
{%- endfor %}
{% endif %}
"""
# noqa: W291
slack_image_url = None
web_image_url = None
sms_title = web_title
phone_call_title = """{{ payload.groupLabels|join(", ") }}"""
telegram_title = web_title
telegram_message = """\
{%- set annotations = payload.commonAnnotations.copy() -%}
{% set severity = payload.groupLabels.severity -%}
{% if severity %}
{%- set severity_emoji = {"critical": ":rotating_light:", "warning": ":warning:" }[severity] | default(":question:") -%}
Severity: {{ severity }} {{ severity_emoji }}
{% endif %}
{%- set status = payload.status | default("Unknown") %}
{%- set status_emoji = {"firing": ":fire:", "resolved": ":white_check_mark:"}[status] | default(":warning:") %}
Status: {{ status }} {{ status_emoji }} (on the source)
{% if status == "firing" %}
Firing alerts {{ payload.numFiring }}
Resolved alerts {{ payload.numResolved }}
{% endif %}
{% if "runbook_url" in annotations -%}
<a href='{{ annotations.runbook_url }}'>:book: Runbook:link:</a>
{%- set _ = annotations.pop('runbook_url') -%}
{%- endif %}
{%- if "runbook_url_internal" in annotations -%}
<a href='{{ annotations.runbook_url_internal }}'>:closed_book: Runbook (internal):link:</a>
{%- set _ = annotations.pop('runbook_url_internal') -%}
{%- endif %}
GroupLabels:
{%- for k, v in payload["groupLabels"].items() %}
- {{ k }}: {{ v }}
{%- endfor %}
{% if payload["commonLabels"] | length > 0 -%}
CommonLabels:
{%- for k, v in payload["commonLabels"].items() %}
- {{ k }}: {{ v }}
{%- endfor %}
{% endif %}
{% if annotations | length > 0 -%}
Annotations:
{%- for k, v in annotations.items() %}
- {{ k }}: {{ v }}
{%- endfor %}
{% endif %}
<a href='{{ source_link }}'>View in AlertManager</a>
"""
telegram_image_url = None
example_payload = {
"alerts": [
{
"endsAt": "0001-01-01T00:00:00Z",
"labels": {
"job": "node",
"group": "production",
"instance": "localhost:8081",
"severity": "critical",
"alertname": "InstanceDown",
},
"status": "firing",
"startsAt": "2023-06-12T08:24:38.326Z",
"annotations": {
"title": "Instance localhost:8081 down",
"description": "localhost:8081 of job node has been down for more than 1 minute.",
},
"fingerprint": "f404ecabc8dd5cd7",
"generatorURL": "",
},
{
"endsAt": "0001-01-01T00:00:00Z",
"labels": {
"job": "node",
"group": "canary",
"instance": "localhost:8082",
"severity": "critical",
"alertname": "InstanceDown",
},
"status": "firing",
"startsAt": "2023-06-12T08:24:38.326Z",
"annotations": {
"title": "Instance localhost:8082 down",
"description": "localhost:8082 of job node has been down for more than 1 minute.",
},
"fingerprint": "f8f08d4e32c61a9d",
"generatorURL": "",
},
{
"endsAt": "0001-01-01T00:00:00Z",
"labels": {
"job": "node",
"group": "production",
"instance": "localhost:8083",
"severity": "critical",
"alertname": "InstanceDown",
},
"status": "firing",
"startsAt": "2023-06-12T08:24:38.326Z",
"annotations": {
"title": "Instance localhost:8083 down",
"description": "localhost:8083 of job node has been down for more than 1 minute.",
},
"fingerprint": "39f38c0611ee7abd",
"generatorURL": "",
},
],
"status": "firing",
"version": "4",
"groupKey": '{}:{alertname="InstanceDown"}',
"receiver": "combo",
"numFiring": 3,
"externalURL": "",
"groupLabels": {"alertname": "InstanceDown"},
"numResolved": 0,
"commonLabels": {"job": "node", "severity": "critical", "alertname": "InstanceDown"},
"truncatedBytes": 0,
"truncatedAlerts": 0,
"commonAnnotations": {},
}

View file

@ -660,6 +660,7 @@ INBOUND_EMAIL_DOMAIN = os.getenv("INBOUND_EMAIL_DOMAIN")
INBOUND_EMAIL_WEBHOOK_SECRET = os.getenv("INBOUND_EMAIL_WEBHOOK_SECRET")
INSTALLED_ONCALL_INTEGRATIONS = [
"config_integrations.alertmanager_v2",
"config_integrations.alertmanager",
"config_integrations.grafana",
"config_integrations.grafana_alerting",