From 3d4ce622cbd2597a905e7d293a42bf1b4845fb63 Mon Sep 17 00:00:00 2001 From: Yulya Artyukhina Date: Tue, 14 Jan 2025 11:02:23 +0100 Subject: [PATCH] Add default `service_name` label for Alerting integrations (#5373) # What this PR does - The `service_name` label will be added to Grafana Alerting integration when it is created, if it wasn't added by user. - Adds celery task that should be started manually and will add the `service_name` dynamic label to all existing Grafana Alerting integrations. ## Which issue(s) this PR closes Related to https://github.com/grafana/oncall-private/issues/2975 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] Added the relevant release notes label (see labels prefixed w/ `release:`). These labels dictate how your PR will show up in the autogenerated release notes. --------- Co-authored-by: Innokentii Konstantinov --- engine/apps/alerts/constants.py | 4 + engine/apps/alerts/models/alert.py | 12 +- .../alerts/models/alert_receive_channel.py | 54 +++++ engine/apps/alerts/tests/test_alert.py | 4 +- .../api/serializers/alert_receive_channel.py | 196 ++++++++---------- .../api/tests/test_alert_receive_channel.py | 168 +++++++++++++-- engine/apps/api/tests/test_labels.py | 24 +++ engine/apps/api/urls.py | 5 + .../apps/api/views/alert_receive_channel.py | 2 +- engine/apps/api/views/labels.py | 16 +- engine/apps/labels/alert_group_labels.py | 9 +- engine/apps/labels/client.py | 9 + ...vechannelassociatedlabel_inheritable_db.py | 6 +- engine/apps/labels/models.py | 33 +++ engine/apps/labels/tasks.py | 74 ++++++- .../labels/tests/test_add_service_label.py | 40 ++++ engine/apps/labels/tests/test_labels_cache.py | 26 ++- .../public_api/serializers/integrations.py | 3 + engine/settings/celery_task_routes.py | 3 + 19 files changed, 535 insertions(+), 153 deletions(-) rename engine/apps/labels/{ => migrations}/0007_remove_alertreceivechannelassociatedlabel_inheritable_db.py (78%) create mode 100644 engine/apps/labels/tests/test_add_service_label.py diff --git a/engine/apps/alerts/constants.py b/engine/apps/alerts/constants.py index 496836ca..c91cabfa 100644 --- a/engine/apps/alerts/constants.py +++ b/engine/apps/alerts/constants.py @@ -25,3 +25,7 @@ class AlertGroupState(str, Enum): ACKNOWLEDGED = "acknowledged" RESOLVED = "resolved" SILENCED = "silenced" + + +SERVICE_LABEL = "service_name" +SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION = "{{ payload.common_labels.service_name }}" diff --git a/engine/apps/alerts/models/alert.py b/engine/apps/alerts/models/alert.py index 844cbf67..2f1d1af6 100644 --- a/engine/apps/alerts/models/alert.py +++ b/engine/apps/alerts/models/alert.py @@ -14,7 +14,7 @@ from apps.alerts.constants import TASK_DELAY_SECONDS from apps.alerts.incident_appearance.templaters import TemplateLoader from apps.alerts.signals import alert_group_escalation_snapshot_built from apps.alerts.tasks.distribute_alert import send_alert_create_signal -from apps.labels.alert_group_labels import assign_labels, gather_labels_from_alert_receive_channel_and_raw_request_data +from apps.labels.alert_group_labels import gather_alert_labels, save_alert_group_labels from apps.labels.types import AlertLabels from common.jinja_templater import apply_jinja_template_to_alert_payload_and_labels from common.jinja_templater.apply_jinja_template import ( @@ -106,13 +106,11 @@ class Alert(models.Model): # This import is here to avoid circular imports from apps.alerts.models import AlertGroup, AlertGroupLogRecord, AlertReceiveChannel, ChannelFilter - parsed_labels = gather_labels_from_alert_receive_channel_and_raw_request_data( - alert_receive_channel, raw_request_data - ) - group_data = Alert.render_group_data(alert_receive_channel, raw_request_data, parsed_labels, is_demo) + alert_labels = gather_alert_labels(alert_receive_channel, raw_request_data) + group_data = Alert.render_group_data(alert_receive_channel, raw_request_data, alert_labels, is_demo) if channel_filter is None: - channel_filter = ChannelFilter.select_filter(alert_receive_channel, raw_request_data, parsed_labels) + channel_filter = ChannelFilter.select_filter(alert_receive_channel, raw_request_data, alert_labels) # Get or create group group, group_created = AlertGroup.objects.get_or_create_grouping( @@ -141,7 +139,7 @@ class Alert(models.Model): transaction.on_commit(partial(send_alert_create_signal.apply_async, (alert.pk,))) if group_created: - assign_labels(group, alert_receive_channel, parsed_labels) + save_alert_group_labels(group, alert_receive_channel, alert_labels) group.log_records.create(type=AlertGroupLogRecord.TYPE_REGISTERED) group.log_records.create(type=AlertGroupLogRecord.TYPE_ROUTE_ASSIGNED) diff --git a/engine/apps/alerts/models/alert_receive_channel.py b/engine/apps/alerts/models/alert_receive_channel.py index a8089337..791162d8 100644 --- a/engine/apps/alerts/models/alert_receive_channel.py +++ b/engine/apps/alerts/models/alert_receive_channel.py @@ -14,6 +14,7 @@ from django.utils import timezone from django.utils.crypto import get_random_string from emoji import emojize +from apps.alerts.constants import SERVICE_LABEL, SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION from apps.alerts.grafana_alerting_sync_manager.grafana_alerting_sync import GrafanaAlertingSyncManager from apps.alerts.integration_options_mixin import IntegrationOptionsMixin from apps.alerts.models.maintainable_object import MaintainableObject @@ -24,6 +25,7 @@ from apps.grafana_plugin.ui_url_builder import UIURLBuilder from apps.integrations.legacy_prefix import remove_legacy_prefix from apps.integrations.metadata import heartbeat from apps.integrations.tasks import create_alert, create_alertmanager_alerts +from apps.labels.tasks import add_service_label_for_integration from apps.metrics_exporter.helpers import ( metrics_add_integrations_to_cache, metrics_remove_deleted_integration_from_cache, @@ -48,6 +50,10 @@ if typing.TYPE_CHECKING: logger = logging.getLogger(__name__) +class CreatingServiceNameDynamicLabelFailed(Exception): + """Raised when failed to create a dynamic service name label""" + + class MessagingBackendTemplatesItem: title: str | None message: str | None @@ -790,6 +796,54 @@ class AlertReceiveChannel(IntegrationOptionsMixin, MaintainableObject): result["team"] = "General" return result + def create_service_name_dynamic_label(self, is_called_async: bool = False): + """ + create_service_name_dynamic_label creates a dynamic label for service_name for Grafana Alerting integration. + Warning: It might make a request to the labels repo API. + That's why it's called in api handlers, not in post_save. + Once we will have labels operator & get rid of syncing labels from repo, this method should be moved + to post_save. + """ + from apps.labels.models import LabelKeyCache + + if not self.organization.is_grafana_labels_enabled: + return + if self.integration != AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING: + return + + # validate that service_name label doesn't exist in already + service_name_label = LabelKeyCache.objects.filter(organization=self.organization, name=SERVICE_LABEL).first() + + if service_name_label is not None and self.alert_group_labels_custom is not None: + for k, _, _ in self.alert_group_labels_custom: + if k == service_name_label.id: + return + + service_name_dynamic_label = self._build_service_name_label_custom(self.organization) + if service_name_dynamic_label is None: + # if this method was called from a celery task, raise exception to retry it + if is_called_async: + raise CreatingServiceNameDynamicLabelFailed + # otherwise start a celery task to retry the label creation async + add_service_label_for_integration.apply_async((self.id,)) + return + self.alert_group_labels_custom = [service_name_dynamic_label] + (self.alert_group_labels_custom or []) + self.save(update_fields=["alert_group_labels_custom"]) + + @staticmethod + def _build_service_name_label_custom(organization: "Organization") -> DynamicLabelsEntryDB | None: + """ + _build_service_name_label_custom returns `service_name` label template in dynamic label format: + [key_id, None, template]. + If there is no label key service_name in the cache - it tries to fetch it from the labels repo API. + """ + from apps.labels.models import LabelKeyCache + + service_label_key = LabelKeyCache.get_or_create_by_name(organization, SERVICE_LABEL) + return ( + [service_label_key.id, None, SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION] if service_label_key else None + ) + @receiver(post_save, sender=AlertReceiveChannel) def listen_for_alertreceivechannel_model_save( diff --git a/engine/apps/alerts/tests/test_alert.py b/engine/apps/alerts/tests/test_alert.py index 33cee023..28839b20 100644 --- a/engine/apps/alerts/tests/test_alert.py +++ b/engine/apps/alerts/tests/test_alert.py @@ -56,8 +56,8 @@ def test_alert_create_custom_channel_filter(make_organization, make_alert_receiv assert alert.group.channel_filter == other_channel_filter -@patch("apps.alerts.models.alert.assign_labels") -@patch("apps.alerts.models.alert.gather_labels_from_alert_receive_channel_and_raw_request_data") +@patch("apps.alerts.models.alert.save_alert_group_labels") +@patch("apps.alerts.models.alert.gather_alert_labels") @patch("apps.alerts.models.ChannelFilter.select_filter", wraps=ChannelFilter.select_filter) @pytest.mark.django_db def test_alert_create_labels_are_assigned( diff --git a/engine/apps/api/serializers/alert_receive_channel.py b/engine/apps/api/serializers/alert_receive_channel.py index 41c0e979..c3fbc58e 100644 --- a/engine/apps/api/serializers/alert_receive_channel.py +++ b/engine/apps/api/serializers/alert_receive_channel.py @@ -13,7 +13,7 @@ from apps.alerts.grafana_alerting_sync_manager.grafana_alerting_sync import Graf from apps.alerts.models import AlertReceiveChannel from apps.base.messaging import get_messaging_backends from apps.integrations.legacy_prefix import has_legacy_prefix -from apps.labels.models import AlertReceiveChannelAssociatedLabel, LabelKeyCache, LabelValueCache +from apps.labels.models import LabelKeyCache, LabelValueCache from apps.labels.types import LabelKey from apps.user_management.models import Organization from common.api_helpers.custom_fields import TeamPrimaryKeyRelatedField @@ -33,7 +33,7 @@ def _additional_settings_serializer_from_type(integration_type: str) -> serializ return cls -# TODO: refactor this types as w no longer support storing static labels in this field. +# TODO: refactor this types as we no longer support storing static labels in this field. # AlertGroupCustomLabelValue represents custom alert group label value for API requests # It handles two types of label's value: # 1. Just Label Value from a label repo for a static label @@ -79,7 +79,10 @@ class AdditionalSettingsField(serializers.DictField): class CustomLabelSerializer(serializers.Serializer): - """This serializer is consistent with apps.api.serializers.labels.LabelPairSerializer, but allows null for value ID.""" + """ + This serializer is consistent with apps.api.serializers.labels.LabelPairSerializer, + but allows null for value ID to support templated labels. + """ class CustomLabelKeySerializer(serializers.Serializer): id = serializers.CharField() @@ -97,98 +100,12 @@ class CustomLabelSerializer(serializers.Serializer): class IntegrationAlertGroupLabelsSerializer(serializers.Serializer): - """Alert group labels configuration for the integration. See AlertReceiveChannel.alert_group_labels for details.""" - # todo: inheritable field is deprecated. Remove in a future release inheritable = serializers.DictField(child=serializers.BooleanField(), required=False) custom = CustomLabelSerializer(many=True) template = serializers.CharField(allow_null=True) - @staticmethod - def pop_alert_group_labels(validated_data: dict) -> IntegrationAlertGroupLabels | None: - """Get alert group labels from validated data.""" - - # the "alert_group_labels" field is optional, so either all 2 fields are present or none - # "inheritable" field is deprecated - if "custom" not in validated_data: - return None - - return { - "inheritable": validated_data.pop("inheritable", None), # deprecated - "custom": validated_data.pop("custom"), - "template": validated_data.pop("template"), - } - - @classmethod - def update( - cls, instance: AlertReceiveChannel, alert_group_labels: IntegrationAlertGroupLabels | None - ) -> AlertReceiveChannel: - if alert_group_labels is None: - return instance - - # update DB cache for custom labels - cls._create_custom_labels(instance.organization, alert_group_labels["custom"]) - # save static labels as integration labels - # todo: it's needed to cover delay between backend and frontend rollout, and can be removed later - cls._save_static_labels_as_integration_labels(instance, alert_group_labels["custom"]) - # update custom labels - instance.alert_group_labels_custom = cls._custom_labels_to_internal_value(alert_group_labels["custom"]) - - # update template - instance.alert_group_labels_template = alert_group_labels["template"] - - instance.save(update_fields=["alert_group_labels_custom", "alert_group_labels_template"]) - return instance - - @staticmethod - def _create_custom_labels(organization: Organization, labels: AlertGroupCustomLabelsAPI) -> None: - """Create LabelKeyCache and LabelValueCache objects for custom labels.""" - - label_keys = [ - LabelKeyCache( - id=label["key"]["id"], - name=label["key"]["name"], - prescribed=label["key"]["prescribed"], - organization=organization, - ) - for label in labels - ] - - label_values = [ - LabelValueCache( - id=label["value"]["id"], - name=label["value"]["name"], - prescribed=label["value"]["prescribed"], - key_id=label["key"]["id"], - ) - for label in labels - if label["value"]["id"] # don't create LabelValueCache objects for templated labels - ] - - LabelKeyCache.objects.bulk_create(label_keys, ignore_conflicts=True, batch_size=5000) - LabelValueCache.objects.bulk_create(label_values, ignore_conflicts=True, batch_size=5000) - - @staticmethod - def _save_static_labels_as_integration_labels(instance: AlertReceiveChannel, labels: AlertGroupCustomLabelsAPI): - labels_associations_to_create = [] - labels_copy = labels[:] - for label in labels_copy: - if label["value"]["id"] is not None: - labels_associations_to_create.append( - AlertReceiveChannelAssociatedLabel( - key_id=label["key"]["id"], - value_id=label["value"]["id"], - organization=instance.organization, - alert_receive_channel=instance, - ) - ) - labels.remove(label) - AlertReceiveChannelAssociatedLabel.objects.bulk_create( - labels_associations_to_create, ignore_conflicts=True, batch_size=5000 - ) - - @classmethod - def to_representation(cls, instance: AlertReceiveChannel) -> IntegrationAlertGroupLabels: + def to_representation(self, instance: AlertReceiveChannel) -> IntegrationAlertGroupLabels: """ The API representation of alert group labels is very different from the underlying model. @@ -200,20 +117,28 @@ class IntegrationAlertGroupLabelsSerializer(serializers.Serializer): return { # todo: "inheritable" field is deprecated, remove in a future release. "inheritable": {label.key_id: True for label in instance.labels.all()}, - "custom": cls._custom_labels_to_representation(instance.alert_group_labels_custom), + "custom": self._custom_labels_to_representation(instance.alert_group_labels_custom), "template": instance.alert_group_labels_template, } - @staticmethod - def _custom_labels_to_internal_value( - custom_labels: AlertGroupCustomLabelsAPI, - ) -> AlertReceiveChannel.DynamicLabelsConfigDB: - """Convert custom labels from API representation to the schema used by the JSONField on the model.""" + def to_internal_value(self, validated_data: dict) -> dict: + """ + to_internal_value converts dynamic labels from API format to internal format and updates labels cache + """ + alert_group_labels = self._pop_alert_group_labels(validated_data) + if alert_group_labels is None: + return validated_data - return [ - [label["key"]["id"], label["value"]["id"], None if label["value"]["id"] else label["value"]["name"]] - for label in custom_labels - ] + organization = self.context["request"].auth.organization + self._create_custom_labels(organization, alert_group_labels["custom"] if alert_group_labels else []) + + custom_labels = ( + self._custom_labels_to_internal_value(alert_group_labels["custom"]) if alert_group_labels else [] + ) + validated_data["alert_group_labels_custom"] = custom_labels or None + validated_data["alert_group_labels_template"] = alert_group_labels["template"] if alert_group_labels else None + + return validated_data @staticmethod def _custom_labels_to_representation( @@ -262,6 +187,63 @@ class IntegrationAlertGroupLabelsSerializer(serializers.Serializer): if key_id in label_key_index and (value_id in label_value_index or not value_id) ] + @staticmethod + def _custom_labels_to_internal_value( + custom_labels: AlertGroupCustomLabelsAPI, + ) -> AlertReceiveChannel.DynamicLabelsConfigDB: + """ + Convert dynamic labels from API representation to the schema used by the JSONField on the model: + [[key.id, None, template(stored in value.name here)]]. + """ + + return [ + [label["key"]["id"], None, label["value"]["name"]] + for label in custom_labels + if label["value"]["id"] is None + # value.id is not None for deprecated static labels, for dynamic labels it's always None + ] + + @staticmethod + def _pop_alert_group_labels(validated_data: dict) -> IntegrationAlertGroupLabels | None: + # the "alert_group_labels" field is optional, so either all 2 fields (custom and template) are present or none + # "inheritable" field is deprecated + if "custom" not in validated_data: + return None + + return { + "inheritable": validated_data.pop("inheritable", None), # deprecated + "custom": validated_data.pop("custom"), + "template": validated_data.pop("template"), + } + + @staticmethod + def _create_custom_labels(organization: Organization, labels: AlertGroupCustomLabelsAPI) -> None: + """Create LabelKeyCache and LabelValueCache objects for labels used in labelsSchema""" + + label_keys = [ + LabelKeyCache( + id=label["key"]["id"], + name=label["key"]["name"], + prescribed=label["key"]["prescribed"], + organization=organization, + ) + for label in labels + ] + + label_values = [ + LabelValueCache( + id=label["value"]["id"], + name=label["value"]["name"], + prescribed=label["value"]["prescribed"], + key_id=label["key"]["id"], + ) + for label in labels + if label["value"]["id"] # don't create LabelValueCache objects for templated labels + ] + + LabelKeyCache.objects.bulk_create(label_keys, ignore_conflicts=True, batch_size=5000) + LabelValueCache.objects.bulk_create(label_values, ignore_conflicts=True, batch_size=5000) + class AlertReceiveChannelSerializer( EagerLoadingMixin, LabelsSerializerMixin, serializers.ModelSerializer[AlertReceiveChannel] @@ -411,9 +393,8 @@ class AlertReceiveChannelSerializer( if _integration.slug == integration: is_able_to_autoresolve = _integration.is_able_to_autoresolve - # pop associated labels and alert group labels, so they are not passed to AlertReceiveChannel.create + # pop associated labels, so they are not passed to AlertReceiveChannel.create. They will be created later. labels = validated_data.pop("labels", None) - alert_group_labels = IntegrationAlertGroupLabelsSerializer.pop_alert_group_labels(validated_data) try: instance = AlertReceiveChannel.create( @@ -425,14 +406,16 @@ class AlertReceiveChannelSerializer( except AlertReceiveChannel.DuplicateDirectPagingError: raise BadRequest(detail=AlertReceiveChannel.DuplicateDirectPagingError.DETAIL) - # Create label associations first, then update alert group labels + # Create label associations self.update_labels_association_if_needed(labels, instance, organization) - instance = IntegrationAlertGroupLabelsSerializer.update(instance, alert_group_labels) # Create default webhooks if needed if create_default_webhooks and hasattr(instance.config, "create_default_webhooks"): instance.config.create_default_webhooks(instance) + # Create default service_name label + instance.create_service_name_dynamic_label() + return instance def update(self, instance, validated_data): @@ -440,11 +423,6 @@ class AlertReceiveChannelSerializer( labels = validated_data.pop("labels", None) self.update_labels_association_if_needed(labels, instance, self.context["request"].auth.organization) - # update alert group labels - instance = IntegrationAlertGroupLabelsSerializer.update( - instance, IntegrationAlertGroupLabelsSerializer.pop_alert_group_labels(validated_data) - ) - try: updated_instance = super().update(instance, validated_data) except AlertReceiveChannel.DuplicateDirectPagingError: diff --git a/engine/apps/api/tests/test_alert_receive_channel.py b/engine/apps/api/tests/test_alert_receive_channel.py index fabf3196..fa7d4748 100644 --- a/engine/apps/api/tests/test_alert_receive_channel.py +++ b/engine/apps/api/tests/test_alert_receive_channel.py @@ -7,10 +7,12 @@ from rest_framework import serializers, status from rest_framework.response import Response from rest_framework.test import APIClient +from apps.alerts.constants import SERVICE_LABEL, SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION +from apps.alerts.grafana_alerting_sync_manager import GrafanaAlertingSyncManager from apps.alerts.models import AlertReceiveChannel, EscalationPolicy from apps.api.permissions import LegacyAccessControlRole from apps.base.messaging import load_backend -from apps.labels.models import LabelKeyCache, LabelValueCache +from apps.labels.models import LabelKeyCache from common.exceptions import BacksyncIntegrationRequestError @@ -1717,24 +1719,20 @@ def test_alert_group_labels_put( label_3 = make_static_label_config(organization, alert_receive_channel) custom = [ - # plain label + # static label (deprecated, will be skipped) { "key": {"id": label_2.key.id, "name": label_2.key.name, "prescribed": False}, "value": {"id": label_2.value.id, "name": label_2.value.name, "prescribed": False}, }, - # plain label not present in DB cache - { - "key": {"id": "hello", "name": "world", "prescribed": False}, - "value": {"id": "foo", "name": "bar", "prescribed": False}, - }, - # templated label + # dynamic label { "key": {"id": label_3.key.id, "name": label_3.key.name, "prescribed": False}, - "value": { - "id": None, - "name": "{{ payload.foo }}", - "prescribed": False, - }, + "value": {"id": None, "name": "{{ payload.foo }}", "prescribed": False}, + }, + # dynamic label not present in DB cache + { + "key": {"id": "hello", "name": "world", "prescribed": False}, + "value": {"id": None, "name": "{{ payload.bar }}", "prescribed": False}, }, ] template = "{{ payload.labels | tojson }}" # advanced template @@ -1751,31 +1749,31 @@ def test_alert_group_labels_put( response = client.put(url, data, format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - # check static labels were saved as integration labels assert response.json()["alert_group_labels"] == { - "inheritable": {label_1.key_id: True, label_2.key_id: True, label_3.key_id: True, "hello": True}, + "inheritable": {label_1.key_id: True, label_2.key_id: True, label_3.key_id: True}, "custom": [ { "key": {"id": label_3.key.id, "name": label_3.key.name, "prescribed": False}, "value": {"id": None, "name": "{{ payload.foo }}", "prescribed": False}, - } + }, + { + "key": {"id": "hello", "name": "world", "prescribed": False}, + "value": {"id": None, "name": "{{ payload.bar }}", "prescribed": False}, + }, ], "template": template, } alert_receive_channel.refresh_from_db() - # check static labels are not in the custom labels list + # check deprecated static label is not in the custom labels list assert alert_receive_channel.alert_group_labels_custom == [ [label_3.key_id, None, "{{ payload.foo }}"], + ["hello", None, "{{ payload.bar }}"], ] assert alert_receive_channel.alert_group_labels_template == template - # check static labels were assigned to integration - assert alert_receive_channel.labels.filter(key_id__in=[label_2.key_id, "hello"]).count() == 2 - # check label keys & values are created - key = LabelKeyCache.objects.filter(id="hello", name="world", organization=organization).first() - assert key is not None - assert LabelValueCache.objects.filter(key=key, id="foo", name="bar").exists() + # check label key is created + assert LabelKeyCache.objects.filter(id="hello", name="world", organization=organization).exists() @pytest.mark.django_db @@ -1850,6 +1848,130 @@ def test_alert_group_labels_post(alert_receive_channel_internal_api_setup, make_ assert alert_receive_channel.alert_group_labels_template == "{{ payload.labels | tojson }}" +@patch.object(GrafanaAlertingSyncManager, "check_for_connection_errors", return_value=None) +@pytest.mark.django_db +def test_create_service_name_label_for_new_alerting_integration( + _, + make_organization_and_user_with_plugin_token, + make_label_key, + make_user_auth_headers, +): + """Test adding default `service_name` dynamic label for new alerting integration.""" + + organization, user, token = make_organization_and_user_with_plugin_token() + service_name_label_key = make_label_key( + organization=organization, key_id="test", key_name=SERVICE_LABEL, prescribed=True + ) + + client = APIClient() + url = reverse("api-internal:alert_receive_channel-list") + + data = { + "integration": AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING, + "team": None, + "labels": [], + "alert_group_labels": { + "inheritable": {}, + "custom": [ + { + "key": {"id": "testid", "name": "testname", "prescribed": False}, + "value": {"id": None, "name": "{{ payload.foo }}", "prescribed": False}, + } + ], + "template": None, + }, + } + expected_alert_group_labels_response = { + "inheritable": {}, + "custom": [ + { + "key": {"id": service_name_label_key.id, "name": SERVICE_LABEL, "prescribed": True}, + "value": {"id": None, "name": SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION, "prescribed": False}, + }, + { + "key": {"id": "testid", "name": "testname", "prescribed": False}, + "value": {"id": None, "name": "{{ payload.foo }}", "prescribed": False}, + }, + ], + "template": None, + } + expected_alert_group_labels = [ + [service_name_label_key.id, None, SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION], + ["testid", None, "{{ payload.foo }}"], + ] + + response = client.post(url, data, format="json", **make_user_auth_headers(user, token)) + + assert response.status_code == status.HTTP_201_CREATED + assert response.json()["alert_group_labels"] == expected_alert_group_labels_response + + alert_receive_channel = organization.alert_receive_channels.filter(public_primary_key=response.json()["id"]).first() + + assert alert_receive_channel is not None + assert alert_receive_channel.alert_group_labels_custom == expected_alert_group_labels + + +@patch.object(GrafanaAlertingSyncManager, "check_for_connection_errors", return_value=None) +@pytest.mark.django_db +def test_skip_creating_service_name_label_for_new_alerting_integration( + _, + make_organization_and_user_with_plugin_token, + make_label_key, + make_user_auth_headers, +): + """ + Test skipping adding default `service_name` dynamic label for new alerting integration, + when this label was already added by user + """ + + organization, user, token = make_organization_and_user_with_plugin_token() + service_name_label_key = make_label_key( + organization=organization, key_id="test", key_name=SERVICE_LABEL, prescribed=True + ) + + client = APIClient() + url = reverse("api-internal:alert_receive_channel-list") + + data = { + "integration": AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING, + "team": None, + "labels": [], + "alert_group_labels": { + "inheritable": {}, + "custom": [ + { + "key": {"id": service_name_label_key.id, "name": SERVICE_LABEL, "prescribed": True}, + "value": {"id": None, "name": "{{ payload.foo }}", "prescribed": False}, + } + ], + "template": None, + }, + } + expected_alert_group_labels_response = { + "inheritable": {}, + "custom": [ + { + "key": {"id": service_name_label_key.id, "name": SERVICE_LABEL, "prescribed": True}, + "value": {"id": None, "name": "{{ payload.foo }}", "prescribed": False}, + } + ], + "template": None, + } + expected_alert_group_labels = [ + [service_name_label_key.id, None, "{{ payload.foo }}"], + ] + + response = client.post(url, data, format="json", **make_user_auth_headers(user, token)) + + assert response.status_code == status.HTTP_201_CREATED + assert response.json()["alert_group_labels"] == expected_alert_group_labels_response + + alert_receive_channel = organization.alert_receive_channels.filter(public_primary_key=response.json()["id"]).first() + + assert alert_receive_channel is not None + assert alert_receive_channel.alert_group_labels_custom == expected_alert_group_labels + + @pytest.mark.django_db def test_team_not_updated_if_not_in_data( make_organization_and_user_with_plugin_token, diff --git a/engine/apps/api/tests/test_labels.py b/engine/apps/api/tests/test_labels.py index 2c36363e..47fdb3c2 100644 --- a/engine/apps/api/tests/test_labels.py +++ b/engine/apps/api/tests/test_labels.py @@ -84,6 +84,30 @@ def test_get_update_key_put( assert response.json() == expected_result +@patch( + "apps.labels.client.LabelsAPIClient.get_label_by_key_name", + return_value=( + {"key": {"id": "keyid123", "name": "keyname12"}, "values": [{"id": "valueid123", "name": "yolo"}]}, + MockResponse(status_code=200), + ), +) +@pytest.mark.django_db +def test_get_key_by_name( + mocked_get_label_by_key_name, + make_organization_and_user_with_plugin_token, + make_user_auth_headers, +): + _, user, token = make_organization_and_user_with_plugin_token() + client = APIClient() + url = reverse("api-internal:get_key_by_name", kwargs={"key_name": "keyname12"}) + response = client.get(url, format="json", **make_user_auth_headers(user, token)) + expected_result = {"key": {"id": "keyid123", "name": "keyname12"}, "values": [{"id": "valueid123", "name": "yolo"}]} + + assert mocked_get_label_by_key_name.called + assert response.status_code == status.HTTP_200_OK + assert response.json() == expected_result + + @patch( "apps.labels.client.LabelsAPIClient.add_value", return_value=( diff --git a/engine/apps/api/urls.py b/engine/apps/api/urls.py index 46999cee..51cee7da 100644 --- a/engine/apps/api/urls.py +++ b/engine/apps/api/urls.py @@ -127,6 +127,11 @@ urlpatterns += [ LabelsViewSet.as_view({"get": "get_key", "put": "rename_key"}), name="get_update_key", ), + re_path( + r"^labels/name/(?P[\w\-]+)/?$", + LabelsViewSet.as_view({"get": "get_key_by_name"}), + name="get_key_by_name", + ), re_path( r"^labels/id/(?P[\w\-]+)/values/?$", LabelsViewSet.as_view({"post": "add_value"}), name="add_value" ), diff --git a/engine/apps/api/views/alert_receive_channel.py b/engine/apps/api/views/alert_receive_channel.py index 19c40a57..7c7ac102 100644 --- a/engine/apps/api/views/alert_receive_channel.py +++ b/engine/apps/api/views/alert_receive_channel.py @@ -312,7 +312,7 @@ class AlertReceiveChannelView( if instance is None: # pop extra fields so they are not passed to AlertReceiveChannel(**serializer.validated_data) serializer.validated_data.pop("create_default_webhooks", None) - IntegrationAlertGroupLabelsSerializer.pop_alert_group_labels(serializer.validated_data) + IntegrationAlertGroupLabelsSerializer._pop_alert_group_labels(serializer.validated_data) # create in-memory instance to test with the (possible) unsaved data instance = AlertReceiveChannel(**serializer.validated_data) diff --git a/engine/apps/api/views/labels.py b/engine/apps/api/views/labels.py index d27215f2..f290765b 100644 --- a/engine/apps/api/views/labels.py +++ b/engine/apps/api/views/labels.py @@ -17,6 +17,7 @@ from apps.api.serializers.labels import ( from apps.auth_token.auth import PluginAuthentication from apps.labels.client import LabelsAPIClient, LabelsRepoAPIException from apps.labels.tasks import update_instances_labels_cache, update_label_option_cache +from apps.labels.types import LabelOption from apps.labels.utils import is_labels_feature_enabled from common.api_helpers.exceptions import BadRequest @@ -44,6 +45,7 @@ class LabelsViewSet(LabelsFeatureFlagViewSet): "rename_value": [RBACPermission.Permissions.LABEL_WRITE], "get_keys": [RBACPermission.Permissions.LABEL_READ], "get_key": [RBACPermission.Permissions.LABEL_READ], + "get_key_by_name": [RBACPermission.Permissions.LABEL_READ], "get_value": [RBACPermission.Permissions.LABEL_READ], } @@ -66,6 +68,18 @@ class LabelsViewSet(LabelsFeatureFlagViewSet): self._update_labels_cache(label_option) return Response(label_option, status=response.status_code) + @extend_schema(responses=LabelOptionSerializer) + def get_key_by_name(self, request, key_name): + """ + get_key_by_name returns LabelOption – key with the list of values + """ + organization = self.request.auth.organization + label_option, response = LabelsAPIClient( + organization.grafana_url, + organization.api_token, + ).get_label_by_key_name(key_name) + return Response(label_option, status=response.status_code) + @extend_schema(responses=LabelValueSerializer) def get_value(self, request, key_id, value_id): """get_value returns a Value""" @@ -133,7 +147,7 @@ class LabelsViewSet(LabelsFeatureFlagViewSet): self._update_labels_cache(label_option) return Response(label_option, status=status) - def _update_labels_cache(self, label_option): + def _update_labels_cache(self, label_option: LabelOption): if not label_option: return serializer = LabelOptionSerializer(data=label_option) diff --git a/engine/apps/labels/alert_group_labels.py b/engine/apps/labels/alert_group_labels.py index 70dafead..8271d161 100644 --- a/engine/apps/labels/alert_group_labels.py +++ b/engine/apps/labels/alert_group_labels.py @@ -19,9 +19,14 @@ LABEL_VALUE_TYPES = (str, int, float, bool) MAX_LABELS_PER_ALERT_GROUP = 15 -def gather_labels_from_alert_receive_channel_and_raw_request_data( +def gather_alert_labels( alert_receive_channel: "AlertReceiveChannel", raw_request_data: "Alert.RawRequestData" ) -> typing.Optional[types.AlertLabels]: + """ + gather_alert_labels gathers labels for an alert received by the alert receive channel. + 1. static labels - inherits them from integration. + 2. dynamic labels and multi-label extraction template – templating the raw_request_data. + """ if not is_labels_feature_enabled(alert_receive_channel.organization): return None @@ -37,7 +42,7 @@ def gather_labels_from_alert_receive_channel_and_raw_request_data( return labels -def assign_labels( +def save_alert_group_labels( alert_group: "AlertGroup", alert_receive_channel: "AlertReceiveChannel", labels: typing.Optional[types.AlertLabels] ) -> None: from apps.labels.models import AlertGroupAssociatedLabel diff --git a/engine/apps/labels/client.py b/engine/apps/labels/client.py index 3310694d..645a09af 100644 --- a/engine/apps/labels/client.py +++ b/engine/apps/labels/client.py @@ -65,6 +65,15 @@ class LabelsAPIClient: self._check_response(response) return response.json(), response + def get_label_by_key_name( + self, key_name: str + ) -> typing.Tuple[typing.Optional["LabelOption"], requests.models.Response]: + url = urljoin(self.api_url, f"name/{key_name}") + + response = requests.get(url, timeout=TIMEOUT, headers=self._request_headers) + self._check_response(response) + return response.json(), response + def get_value( self, key_id: str, value_id: str ) -> typing.Tuple[typing.Optional["LabelValue"], requests.models.Response]: diff --git a/engine/apps/labels/0007_remove_alertreceivechannelassociatedlabel_inheritable_db.py b/engine/apps/labels/migrations/0007_remove_alertreceivechannelassociatedlabel_inheritable_db.py similarity index 78% rename from engine/apps/labels/0007_remove_alertreceivechannelassociatedlabel_inheritable_db.py rename to engine/apps/labels/migrations/0007_remove_alertreceivechannelassociatedlabel_inheritable_db.py index 91504bd9..aec1e6d7 100644 --- a/engine/apps/labels/0007_remove_alertreceivechannelassociatedlabel_inheritable_db.py +++ b/engine/apps/labels/migrations/0007_remove_alertreceivechannelassociatedlabel_inheritable_db.py @@ -1,8 +1,7 @@ -# TODO: MOVE IT TO /migrations DIRECTORY IN FUTURE RELEASE - # Generated by Django 4.2.15 on 2024-11-26 13:37 from django.db import migrations +import django_migration_linter as linter import common.migrations.remove_field @@ -13,9 +12,10 @@ class Migration(migrations.Migration): ] operations = [ + linter.IgnoreMigration(), common.migrations.remove_field.RemoveFieldDB( model_name="AlertReceiveChannelAssociatedLabel", name="inheritable", - remove_state_migration=("labels", "0007_remove_alertreceivechannelassociatedlabel_inheritable_state"), + remove_state_migration=("labels", "0006_remove_alertreceivechannelassociatedlabel_inheritable_state"), ), ] diff --git a/engine/apps/labels/models.py b/engine/apps/labels/models.py index ecd06c26..53a60ce3 100644 --- a/engine/apps/labels/models.py +++ b/engine/apps/labels/models.py @@ -1,8 +1,10 @@ +import logging import typing from django.db import models from django.utils import timezone +from apps.labels.client import LabelsAPIClient, LabelsRepoAPIException from apps.labels.tasks import update_label_pairs_cache from apps.labels.types import LabelPair from apps.labels.utils import LABEL_OUTDATED_TIMEOUT_MINUTES @@ -10,6 +12,7 @@ from apps.labels.utils import LABEL_OUTDATED_TIMEOUT_MINUTES if typing.TYPE_CHECKING: from apps.user_management.models import Organization +logger = logging.getLogger(__name__) MAX_KEY_NAME_LENGTH = 200 MAX_VALUE_NAME_LENGTH = 200 @@ -26,6 +29,36 @@ class LabelKeyCache(models.Model): def is_outdated(self) -> bool: return timezone.now() - self.last_synced > timezone.timedelta(minutes=LABEL_OUTDATED_TIMEOUT_MINUTES) + @classmethod + def get_or_create_by_name(cls, organization: "Organization", key_name: str) -> typing.Optional["LabelKeyCache"]: + """ + `get_or_create_by_name` tries to get label key with provided name from cache. + If there is no label key with this name in the cache - it tries to fetch it from the labels repo API. + """ + label_key = cls.objects.filter(organization=organization, name=key_name).first() + if label_key: + return label_key + + # fetch label key from labels repo + try: + label, _ = LabelsAPIClient(organization.grafana_url, organization.api_token).get_label_by_key_name( + label_key + ) + except LabelsRepoAPIException as e: + logger.error(f"Failed to get or create label key {key_name} for organization {organization.id}: {e}") + return None + + # save labels key in cache + label_key = LabelKeyCache( + id=label["key"]["id"], + name=label["key"]["name"], + organization=organization, + prescribed=label["key"]["prescribed"], + ) + label_key.save() + + return label_key + class LabelValueCache(models.Model): id = models.CharField(primary_key=True, editable=False, max_length=36) diff --git a/engine/apps/labels/tasks.py b/engine/apps/labels/tasks.py index 9ed1147f..89a3fc00 100644 --- a/engine/apps/labels/tasks.py +++ b/engine/apps/labels/tasks.py @@ -8,12 +8,13 @@ from django.utils import timezone from apps.labels.client import LabelsAPIClient, LabelsRepoAPIException from apps.labels.types import LabelOption, LabelPair from apps.labels.utils import LABEL_OUTDATED_TIMEOUT_MINUTES, get_associating_label_model -from apps.user_management.models import Organization from common.custom_celery_tasks import shared_dedicated_queue_retry_task logger = get_task_logger(__name__) logger.setLevel(logging.DEBUG) +MAX_RETRIES = 1 if settings.DEBUG else 10 + class KVPair(typing.TypedDict): value_name: str @@ -129,11 +130,10 @@ def _update_labels_cache(values_id_to_pair: typing.Dict[str, LabelPair]): LabelValueCache.objects.bulk_update(values, fields=["name", "last_synced", "prescribed"]) -@shared_dedicated_queue_retry_task( - autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else 10 -) +@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True, max_retries=MAX_RETRIES) def update_instances_labels_cache(organization_id: int, instance_ids: typing.List[int], instance_model_name: str): from apps.labels.models import LabelValueCache + from apps.user_management.models import Organization now = timezone.now() organization = Organization.objects.get(id=organization_id) @@ -162,3 +162,69 @@ def update_instances_labels_cache(organization_id: int, instance_ids: typing.Lis continue if label_option: update_label_option_cache.apply_async((label_option,)) + + +@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True, max_retries=MAX_RETRIES) +def add_service_label_for_alerting_integrations(): + """ + This task should be called manually and only once. + Starts tasks that add `service_name` dynamic label for Alerting integrations + """ + + from apps.alerts.models import AlertReceiveChannel + + organization_ids = ( + AlertReceiveChannel.objects.filter( + integration=AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING, + organization__is_grafana_labels_enabled=True, + organization__deleted_at__isnull=True, + ) + .values_list("organization", flat=True) + .distinct() + ) + + for idx, organization_id in enumerate(organization_ids): + countdown = idx // 10 + add_service_label_per_org.apply_async((organization_id,), countdown=countdown) + + +@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True, max_retries=MAX_RETRIES) +def add_service_label_per_org(organization_id: int): + """Add `service_name` dynamic label for all Alerting integrations per organization""" + + from apps.alerts.models import AlertReceiveChannel + from apps.user_management.models import Organization + + organization = Organization.objects.get(id=organization_id) + service_label_custom = AlertReceiveChannel._build_service_name_label_custom(organization) + if not service_label_custom: + return + integrations = AlertReceiveChannel.objects.filter( + integration=AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING, + organization=organization, + ) + integrations_to_update = [] + # add service label to integration custom labels if it's not already there + for integration in integrations: + dynamic_service_label_exists = False + dynamic_labels = integration.alert_group_labels_custom if integration.alert_group_labels_custom else [] + for label in dynamic_labels: + if label[0] == service_label_custom[0]: + dynamic_service_label_exists = True + break + if dynamic_service_label_exists: + continue + integration.alert_group_labels_custom = [service_label_custom] + dynamic_labels + integrations_to_update.append(integration) + + AlertReceiveChannel.objects.bulk_update(integrations_to_update, fields=["alert_group_labels_custom"]) + + +@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True, max_retries=MAX_RETRIES) +def add_service_label_for_integration(alert_receive_channel_id: int): + """Add `service_name` dynamic label for Alerting integration""" + + from apps.alerts.models import AlertReceiveChannel + + alert_receive_channel = AlertReceiveChannel.objects.get(id=alert_receive_channel_id) + alert_receive_channel.create_service_name_dynamic_label(True) diff --git a/engine/apps/labels/tests/test_add_service_label.py b/engine/apps/labels/tests/test_add_service_label.py new file mode 100644 index 00000000..9fae32fd --- /dev/null +++ b/engine/apps/labels/tests/test_add_service_label.py @@ -0,0 +1,40 @@ +import pytest + +from apps.alerts.constants import SERVICE_LABEL, SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION +from apps.alerts.models import AlertReceiveChannel +from apps.labels.tasks import add_service_label_per_org + + +@pytest.mark.django_db +def test_add_service_label_per_org(make_organization, make_alert_receive_channel, make_label_key): + organization = make_organization() + alert_receive_channel_alerting_no_labels = make_alert_receive_channel( + organization=organization, integration=AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING + ) + alert_receive_channel_alerting_with_label = make_alert_receive_channel( + organization=organization, + integration=AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING, + alert_group_labels_custom=[["test", None, "test_template"]], + ) + alert_receive_channel_grafana = make_alert_receive_channel( + organization=organization, integration=AlertReceiveChannel.INTEGRATION_GRAFANA + ) + service_name_label_key = make_label_key(organization, key_id="service_label_id", key_name=SERVICE_LABEL) + + expected_service_name_label = [service_name_label_key.id, None, SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION] + + add_service_label_per_org(organization.id) + + for alert_receive_channel in [ + alert_receive_channel_alerting_no_labels, + alert_receive_channel_alerting_with_label, + alert_receive_channel_grafana, + ]: + alert_receive_channel.refresh_from_db() + + assert alert_receive_channel_alerting_no_labels.alert_group_labels_custom == [expected_service_name_label] + assert alert_receive_channel_alerting_with_label.alert_group_labels_custom == [ + expected_service_name_label, + ["test", None, "test_template"], + ] + assert alert_receive_channel_grafana.alert_group_labels_custom is None diff --git a/engine/apps/labels/tests/test_labels_cache.py b/engine/apps/labels/tests/test_labels_cache.py index 870c76b9..f8653375 100644 --- a/engine/apps/labels/tests/test_labels_cache.py +++ b/engine/apps/labels/tests/test_labels_cache.py @@ -3,7 +3,7 @@ from unittest.mock import call, patch import pytest from django.utils import timezone -from apps.labels.client import LabelsRepoAPIException +from apps.labels.client import LabelsAPIClient, LabelsRepoAPIException from apps.labels.models import LabelKeyCache, LabelValueCache from apps.labels.tasks import update_instances_labels_cache, update_labels_cache from apps.labels.utils import LABEL_OUTDATED_TIMEOUT_MINUTES @@ -158,3 +158,27 @@ def test_update_instances_labels_cache_error(make_organization, make_alert_recei ) mock_get_label_by_key_id.assert_called_once_with(label_association.key_id) mock_update_cache.assert_not_called() + + +@pytest.mark.django_db +def test_get_or_create_label_key_cache_by_name(make_organization): + organization = make_organization() + label_key_data = {"id": "testid", "name": "testname", "prescribed": False} + + # test label does not exist in labels repo + with patch.object(LabelsAPIClient, "get_label_by_key_name", side_effect=LabelsRepoAPIException("test", "test")): + label = LabelKeyCache.get_or_create_by_name(organization, label_key_data["name"]) + + assert label is None + + # test label does not exist in cache + with patch.object(LabelsAPIClient, "get_label_by_key_name", return_value=({"key": label_key_data}, None)): + label = LabelKeyCache.get_or_create_by_name(organization, label_key_data["name"]) + + assert label is not None + assert LabelKeyCache.objects.filter(id=label.id).exists() + + # test label exists in cache + label = LabelKeyCache.get_or_create_by_name(organization, label_key_data["name"]) + assert label is not None + assert LabelKeyCache.objects.filter(id=label.id).exists() diff --git a/engine/apps/public_api/serializers/integrations.py b/engine/apps/public_api/serializers/integrations.py index 704c7660..e35e588d 100644 --- a/engine/apps/public_api/serializers/integrations.py +++ b/engine/apps/public_api/serializers/integrations.py @@ -123,6 +123,7 @@ class IntegrationSerializer(EagerLoadingMixin, serializers.ModelSerializer, Main connection_error = GrafanaAlertingSyncManager.check_for_connection_errors(organization) if connection_error: raise serializers.ValidationError(connection_error) + validated_data = self._add_service_label_if_needed(organization, validated_data) user = self.context["request"].user with transaction.atomic(): try: @@ -140,6 +141,8 @@ class IntegrationSerializer(EagerLoadingMixin, serializers.ModelSerializer, Main ) serializer.is_valid(raise_exception=True) serializer.save() + # Create default service_name label + instance.create_service_name_dynamic_label() return instance def update(self, *args, **kwargs): diff --git a/engine/settings/celery_task_routes.py b/engine/settings/celery_task_routes.py index fff08a2a..de222f0a 100644 --- a/engine/settings/celery_task_routes.py +++ b/engine/settings/celery_task_routes.py @@ -17,6 +17,9 @@ CELERY_TASK_ROUTES = { "apps.labels.tasks.update_instances_labels_cache": {"queue": "default"}, "apps.labels.tasks.update_label_option_cache": {"queue": "default"}, "apps.labels.tasks.update_label_pairs_cache": {"queue": "default"}, + "apps.labels.tasks.add_service_label_for_alerting_integrations": {"queue": "default"}, + "apps.labels.tasks.add_service_label_per_org": {"queue": "default"}, + "apps.labels.tasks.add_service_label_for_integration": {"queue": "default"}, "apps.metrics_exporter.tasks.start_calculate_and_cache_metrics": {"queue": "default"}, "apps.metrics_exporter.tasks.update_metrics_for_alert_group": {"queue": "default"}, "apps.metrics_exporter.tasks.update_metrics_for_user": {"queue": "default"},