Draft AlertManager integration v2 (#2167)
# What this PR does Introduces AlertManagerV2 integration with better grouping and autoresolving, not intended for production use yet. --------- Co-authored-by: Ildar Iskhakov <Ildar.iskhakov@grafana.com>
This commit is contained in:
parent
40890e26ba
commit
f0f2e7c8c6
7 changed files with 344 additions and 11 deletions
|
|
@ -0,0 +1,20 @@
|
|||
<h4>How to start sending alerts to Grafana OnCall from AlertManager</h4>
|
||||
<p><ol>
|
||||
<li>
|
||||
1. Add the new receiver to the AlertManager configuration file:
|
||||
<pre>
|
||||
receivers:
|
||||
- name: 'grafana_oncall'
|
||||
webhook_configs:
|
||||
- url: {{ alert_receive_channel.integration_url }}
|
||||
max_alerts: 100
|
||||
</pre>
|
||||
2. Use receiver in route tree:
|
||||
</pre>
|
||||
routes:
|
||||
- matchers:
|
||||
- severity="critical"
|
||||
receiver: grafana_oncall
|
||||
</pre>
|
||||
</li>
|
||||
</ol></p>
|
||||
|
|
@ -8,6 +8,7 @@ from common.api_helpers.optional_slash_router import optional_slash_path
|
|||
|
||||
from .views import (
|
||||
AlertManagerAPIView,
|
||||
AlertManagerV2View,
|
||||
AmazonSNS,
|
||||
GrafanaAlertingAPIView,
|
||||
GrafanaAPIView,
|
||||
|
|
@ -33,6 +34,7 @@ urlpatterns = [
|
|||
path("alertmanager/<str:alert_channel_key>/", AlertManagerAPIView.as_view(), name="alertmanager"),
|
||||
path("amazon_sns/<str:alert_channel_key>/", AmazonSNS.as_view(), name="amazon_sns"),
|
||||
path("heartbeat/<str:alert_channel_key>/", HeartBeatAPIView.as_view(), name="heartbeat"),
|
||||
path("alertmanager_testing/<str:alert_channel_key>/", AlertManagerV2View.as_view(), name="alertmanager_v2"),
|
||||
path("<str:integration_type>/<str:alert_channel_key>/", UniversalAPIView.as_view(), name="universal"),
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -391,3 +391,39 @@ class IntegrationHeartBeatAPIView(AlertChannelDefiningMixin, IntegrationHeartBea
|
|||
process_heartbeat_task.apply_async(
|
||||
(alert_receive_channel.pk,),
|
||||
)
|
||||
|
||||
|
||||
class AlertManagerV2View(BrowsableInstructionMixin, AlertChannelDefiningMixin, IntegrationRateLimitMixin, APIView):
|
||||
"""
|
||||
AlertManagerV2View consumes alerts from AlertManager. It expects data to be in format of AM webhook receiver.
|
||||
"""
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
alert_receive_channel = self.request.alert_receive_channel
|
||||
if not alert_receive_channel.integration == AlertReceiveChannel.INTEGRATION_ALERTMANAGER_V2:
|
||||
return HttpResponseBadRequest(
|
||||
f"This url is for integration with {alert_receive_channel.config.title}."
|
||||
f"Key is for {alert_receive_channel.get_integration_display()}"
|
||||
)
|
||||
alerts = request.data.get("alerts", [])
|
||||
|
||||
data = request.data
|
||||
if "numFiring" not in request.data:
|
||||
# Count firing and resolved alerts manually if not present in payload
|
||||
num_firing = len(list(filter(lambda a: a["status"] == "firing", alerts)))
|
||||
num_resolved = len(list(filter(lambda a: a["status"] == "resolved", alerts)))
|
||||
data = {**request.data, "numFiring": num_firing, "numResolved": num_resolved}
|
||||
|
||||
create_alert.apply_async(
|
||||
[],
|
||||
{
|
||||
"title": None,
|
||||
"message": None,
|
||||
"image_url": None,
|
||||
"link_to_upstream_details": None,
|
||||
"alert_receive_channel_pk": alert_receive_channel.pk,
|
||||
"integration_unique_data": None,
|
||||
"raw_request_data": data,
|
||||
},
|
||||
)
|
||||
return Response("Ok.")
|
||||
|
|
|
|||
|
|
@ -117,6 +117,9 @@ class IntegrationSerializer(EagerLoadingMixin, serializers.ModelSerializer, Main
|
|||
default_route_data = validated_data.pop("default_route", None)
|
||||
organization = self.context["request"].auth.organization
|
||||
integration = validated_data.get("integration")
|
||||
# hack to block alertmanager_v2 integration, will be removed
|
||||
if integration == "alertmanager_v2":
|
||||
raise BadRequest
|
||||
if integration == AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING:
|
||||
connection_error = GrafanaAlertingSyncManager.check_for_connection_errors(organization)
|
||||
if connection_error:
|
||||
|
|
|
|||
|
|
@ -8,17 +8,7 @@ is_featured = False
|
|||
is_able_to_autoresolve = True
|
||||
is_demo_alert_enabled = True
|
||||
|
||||
description = """
|
||||
Alerts from Grafana Alertmanager are automatically routed to this integration.
|
||||
{% for dict_item in grafana_alerting_entities %}
|
||||
<br>Click <a href='{{dict_item.contact_point_url}}' target='_blank'>here</a>
|
||||
to open contact point, and
|
||||
<a href='{{dict_item.routes_url}}' target='_blank'>here</a>
|
||||
to open routes for {{dict_item.alertmanager_name}} Alertmanager.
|
||||
{% endfor %}
|
||||
{% if not is_finished_alerting_setup %}
|
||||
<br>Creating contact points and routes for other alertmanagers...
|
||||
{% endif %}"""
|
||||
description = None
|
||||
|
||||
# Web
|
||||
web_title = """{{- payload.get("labels", {}).get("alertname", "No title (check Title Template)") -}}"""
|
||||
|
|
|
|||
281
engine/config_integrations/alertmanager_v2.py
Normal file
281
engine/config_integrations/alertmanager_v2.py
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
# Main
|
||||
enabled = True
|
||||
title = "AlertManagerV2"
|
||||
slug = "alertmanager_v2"
|
||||
short_description = "Prometheus"
|
||||
is_displayed_on_web = True
|
||||
is_featured = False
|
||||
is_able_to_autoresolve = True
|
||||
is_demo_alert_enabled = True
|
||||
description = None
|
||||
|
||||
|
||||
# Behaviour
|
||||
source_link = "{{ payload.externalURL }}"
|
||||
|
||||
grouping_id = "{{ payload.groupKey }}"
|
||||
|
||||
resolve_condition = """{{ payload.status == "resolved" }}"""
|
||||
|
||||
acknowledge_condition = None
|
||||
|
||||
|
||||
web_title = """\
|
||||
{%- set groupLabels = payload.groupLabels.copy() -%}
|
||||
{%- set alertname = groupLabels.pop('alertname') | default("") -%}
|
||||
|
||||
|
||||
[{{ payload.status }}{% if payload.status == 'firing' %}:{{ payload.numFiring }}{% endif %}] {{ alertname }} {% if groupLabels | length > 0 %}({{ groupLabels|join(", ") }}){% endif %}
|
||||
""" # noqa
|
||||
|
||||
web_message = """\
|
||||
{%- set annotations = payload.commonAnnotations.copy() -%}
|
||||
|
||||
{% set severity = payload.groupLabels.severity -%}
|
||||
{% if severity %}
|
||||
{%- set severity_emoji = {"critical": ":rotating_light:", "warning": ":warning:" }[severity] | default(":question:") -%}
|
||||
Severity: {{ severity }} {{ severity_emoji }}
|
||||
{% endif %}
|
||||
|
||||
{%- set status = payload.status | default("Unknown") %}
|
||||
{%- set status_emoji = {"firing": ":fire:", "resolved": ":white_check_mark:"}[status] | default(":warning:") %}
|
||||
Status: {{ status }} {{ status_emoji }} (on the source)
|
||||
{% if status == "firing" %}
|
||||
Firing alerts – {{ payload.numFiring }}
|
||||
Resolved alerts – {{ payload.numResolved }}
|
||||
{% endif %}
|
||||
|
||||
{% if "runbook_url" in annotations -%}
|
||||
[:book: Runbook:link:]({{ annotations.runbook_url }})
|
||||
{%- set _ = annotations.pop('runbook_url') -%}
|
||||
{%- endif %}
|
||||
|
||||
{%- if "runbook_url_internal" in annotations -%}
|
||||
[:closed_book: Runbook (internal):link:]({{ annotations.runbook_url_internal }})
|
||||
{%- set _ = annotations.pop('runbook_url_internal') -%}
|
||||
{%- endif %}
|
||||
|
||||
GroupLabels:
|
||||
{%- for k, v in payload["groupLabels"].items() %}
|
||||
- {{ k }}: {{ v }}
|
||||
{%- endfor %}
|
||||
|
||||
{% if payload["commonLabels"] | length > 0 -%}
|
||||
CommonLabels:
|
||||
{%- for k, v in payload["commonLabels"].items() %}
|
||||
- {{ k }}: {{ v }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
|
||||
{% if annotations | length > 0 -%}
|
||||
Annotations:
|
||||
{%- for k, v in annotations.items() %}
|
||||
- {{ k }}: {{ v }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
|
||||
[View in AlertManager]({{ source_link }})
|
||||
"""
|
||||
|
||||
|
||||
# Slack templates
|
||||
slack_title = """\
|
||||
{%- set groupLabels = payload.groupLabels.copy() -%}
|
||||
{%- set alertname = groupLabels.pop('alertname') | default("") -%}
|
||||
*<{{ grafana_oncall_link }}|#{{ grafana_oncall_incident_id }} {{ web_title }}>* via {{ integration_name }}
|
||||
{% if source_link %}
|
||||
(*<{{ source_link }}|source>*)
|
||||
{%- endif %}
|
||||
"""
|
||||
|
||||
# default slack message template is identical to web message template, except urls
|
||||
# It can be based on web message template (see example), but it can affect existing templates
|
||||
# slack_message = """
|
||||
# {% set mkdwn_link_regex = "\[([\w\s\d:]+)\]\((https?:\/\/[\w\d./?=#]+)\)" %}
|
||||
# {{ web_message
|
||||
# | regex_replace(mkdwn_link_regex, "<\\2|\\1>")
|
||||
# }}
|
||||
# """
|
||||
|
||||
slack_message = """\
|
||||
{%- set annotations = payload.commonAnnotations.copy() -%}
|
||||
|
||||
{% set severity = payload.groupLabels.severity -%}
|
||||
{% if severity %}
|
||||
{%- set severity_emoji = {"critical": ":rotating_light:", "warning": ":warning:" }[severity] | default(":question:") -%}
|
||||
Severity: {{ severity }} {{ severity_emoji }}
|
||||
{% endif %}
|
||||
|
||||
{%- set status = payload.status | default("Unknown") %}
|
||||
{%- set status_emoji = {"firing": ":fire:", "resolved": ":white_check_mark:"}[status] | default(":warning:") %}
|
||||
Status: {{ status }} {{ status_emoji }} (on the source)
|
||||
{% if status == "firing" %}
|
||||
Firing alerts – {{ payload.numFiring }}
|
||||
Resolved alerts – {{ payload.numResolved }}
|
||||
{% endif %}
|
||||
|
||||
{% if "runbook_url" in annotations -%}
|
||||
<{{ annotations.runbook_url }}|:book: Runbook:link:>
|
||||
{%- set _ = annotations.pop('runbook_url') -%}
|
||||
{%- endif %}
|
||||
|
||||
{%- if "runbook_url_internal" in annotations -%}
|
||||
<{{ annotations.runbook_url_internal }}|:closed_book: Runbook (internal):link:>
|
||||
{%- set _ = annotations.pop('runbook_url_internal') -%}
|
||||
{%- endif %}
|
||||
|
||||
GroupLabels:
|
||||
{%- for k, v in payload["groupLabels"].items() %}
|
||||
- {{ k }}: {{ v }}
|
||||
{%- endfor %}
|
||||
|
||||
{% if payload["commonLabels"] | length > 0 -%}
|
||||
CommonLabels:
|
||||
{%- for k, v in payload["commonLabels"].items() %}
|
||||
- {{ k }}: {{ v }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
|
||||
{% if annotations | length > 0 -%}
|
||||
Annotations:
|
||||
{%- for k, v in annotations.items() %}
|
||||
- {{ k }}: {{ v }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
"""
|
||||
# noqa: W291
|
||||
|
||||
|
||||
slack_image_url = None
|
||||
|
||||
web_image_url = None
|
||||
|
||||
sms_title = web_title
|
||||
|
||||
|
||||
phone_call_title = """{{ payload.groupLabels|join(", ") }}"""
|
||||
|
||||
telegram_title = web_title
|
||||
|
||||
telegram_message = """\
|
||||
{%- set annotations = payload.commonAnnotations.copy() -%}
|
||||
|
||||
{% set severity = payload.groupLabels.severity -%}
|
||||
{% if severity %}
|
||||
{%- set severity_emoji = {"critical": ":rotating_light:", "warning": ":warning:" }[severity] | default(":question:") -%}
|
||||
Severity: {{ severity }} {{ severity_emoji }}
|
||||
{% endif %}
|
||||
|
||||
{%- set status = payload.status | default("Unknown") %}
|
||||
{%- set status_emoji = {"firing": ":fire:", "resolved": ":white_check_mark:"}[status] | default(":warning:") %}
|
||||
Status: {{ status }} {{ status_emoji }} (on the source)
|
||||
{% if status == "firing" %}
|
||||
Firing alerts – {{ payload.numFiring }}
|
||||
Resolved alerts – {{ payload.numResolved }}
|
||||
{% endif %}
|
||||
|
||||
{% if "runbook_url" in annotations -%}
|
||||
<a href='{{ annotations.runbook_url }}'>:book: Runbook:link:</a>
|
||||
{%- set _ = annotations.pop('runbook_url') -%}
|
||||
{%- endif %}
|
||||
|
||||
{%- if "runbook_url_internal" in annotations -%}
|
||||
<a href='{{ annotations.runbook_url_internal }}'>:closed_book: Runbook (internal):link:</a>
|
||||
{%- set _ = annotations.pop('runbook_url_internal') -%}
|
||||
{%- endif %}
|
||||
|
||||
GroupLabels:
|
||||
{%- for k, v in payload["groupLabels"].items() %}
|
||||
- {{ k }}: {{ v }}
|
||||
{%- endfor %}
|
||||
|
||||
{% if payload["commonLabels"] | length > 0 -%}
|
||||
CommonLabels:
|
||||
{%- for k, v in payload["commonLabels"].items() %}
|
||||
- {{ k }}: {{ v }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
|
||||
{% if annotations | length > 0 -%}
|
||||
Annotations:
|
||||
{%- for k, v in annotations.items() %}
|
||||
- {{ k }}: {{ v }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
|
||||
<a href='{{ source_link }}'>View in AlertManager</a>
|
||||
"""
|
||||
|
||||
telegram_image_url = None
|
||||
|
||||
|
||||
example_payload = {
|
||||
"alerts": [
|
||||
{
|
||||
"endsAt": "0001-01-01T00:00:00Z",
|
||||
"labels": {
|
||||
"job": "node",
|
||||
"group": "production",
|
||||
"instance": "localhost:8081",
|
||||
"severity": "critical",
|
||||
"alertname": "InstanceDown",
|
||||
},
|
||||
"status": "firing",
|
||||
"startsAt": "2023-06-12T08:24:38.326Z",
|
||||
"annotations": {
|
||||
"title": "Instance localhost:8081 down",
|
||||
"description": "localhost:8081 of job node has been down for more than 1 minute.",
|
||||
},
|
||||
"fingerprint": "f404ecabc8dd5cd7",
|
||||
"generatorURL": "",
|
||||
},
|
||||
{
|
||||
"endsAt": "0001-01-01T00:00:00Z",
|
||||
"labels": {
|
||||
"job": "node",
|
||||
"group": "canary",
|
||||
"instance": "localhost:8082",
|
||||
"severity": "critical",
|
||||
"alertname": "InstanceDown",
|
||||
},
|
||||
"status": "firing",
|
||||
"startsAt": "2023-06-12T08:24:38.326Z",
|
||||
"annotations": {
|
||||
"title": "Instance localhost:8082 down",
|
||||
"description": "localhost:8082 of job node has been down for more than 1 minute.",
|
||||
},
|
||||
"fingerprint": "f8f08d4e32c61a9d",
|
||||
"generatorURL": "",
|
||||
},
|
||||
{
|
||||
"endsAt": "0001-01-01T00:00:00Z",
|
||||
"labels": {
|
||||
"job": "node",
|
||||
"group": "production",
|
||||
"instance": "localhost:8083",
|
||||
"severity": "critical",
|
||||
"alertname": "InstanceDown",
|
||||
},
|
||||
"status": "firing",
|
||||
"startsAt": "2023-06-12T08:24:38.326Z",
|
||||
"annotations": {
|
||||
"title": "Instance localhost:8083 down",
|
||||
"description": "localhost:8083 of job node has been down for more than 1 minute.",
|
||||
},
|
||||
"fingerprint": "39f38c0611ee7abd",
|
||||
"generatorURL": "",
|
||||
},
|
||||
],
|
||||
"status": "firing",
|
||||
"version": "4",
|
||||
"groupKey": '{}:{alertname="InstanceDown"}',
|
||||
"receiver": "combo",
|
||||
"numFiring": 3,
|
||||
"externalURL": "",
|
||||
"groupLabels": {"alertname": "InstanceDown"},
|
||||
"numResolved": 0,
|
||||
"commonLabels": {"job": "node", "severity": "critical", "alertname": "InstanceDown"},
|
||||
"truncatedBytes": 0,
|
||||
"truncatedAlerts": 0,
|
||||
"commonAnnotations": {},
|
||||
}
|
||||
|
|
@ -660,6 +660,7 @@ INBOUND_EMAIL_DOMAIN = os.getenv("INBOUND_EMAIL_DOMAIN")
|
|||
INBOUND_EMAIL_WEBHOOK_SECRET = os.getenv("INBOUND_EMAIL_WEBHOOK_SECRET")
|
||||
|
||||
INSTALLED_ONCALL_INTEGRATIONS = [
|
||||
"config_integrations.alertmanager_v2",
|
||||
"config_integrations.alertmanager",
|
||||
"config_integrations.grafana",
|
||||
"config_integrations.grafana_alerting",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue