From 872f1e35e19523101b0fad2e5f431a8d991af6bc Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Mon, 6 Jan 2025 16:35:39 -0500 Subject: [PATCH 01/13] fix: update direct paging integration non-default route data (#5397) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Which issue(s) this PR closes This is a quick db migration follow-up to https://github.com/grafana/oncall/pull/5382. It's mostly just an enhancement. Basically #5382 had created a new/non-default route for each Direct Paging integration. However, I overlooked actually setting the chatops/escalation chain data for this new route: ![Screenshot 2025-01-06 at 3 55 19 PM](https://github.com/user-attachments/assets/6c9e68c3-64b7-47e2-9de6-34edd151b505) This PR simply updates the recently created non-default direct paging integration route, such that, to start, direct paging a team has no escalation/notification difference whether the user doing the direct paging sets important = True or False. From here, teams can modify these routes to their needs (ex. setup and assign different escalation chains for these different routes). ## Checklist - [ ] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] Added the relevant release notes label (see labels prefixed w/ `release:`). These labels dictate how your PR will show up in the autogenerated release notes. --- ...t_paging_integration_non_default_routes.py | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 engine/apps/alerts/migrations/0073_update_direct_paging_integration_non_default_routes.py diff --git a/engine/apps/alerts/migrations/0073_update_direct_paging_integration_non_default_routes.py b/engine/apps/alerts/migrations/0073_update_direct_paging_integration_non_default_routes.py new file mode 100644 index 00000000..8696c6ad --- /dev/null +++ b/engine/apps/alerts/migrations/0073_update_direct_paging_integration_non_default_routes.py @@ -0,0 +1,56 @@ +# Generated by Django 4.2.17 on 2025-01-06 15:48 + +import logging + +from django.db import migrations +from django.db.models import Subquery, OuterRef + +logger = logging.getLogger(__name__) + + +def update_direct_paging_integration_non_default_routes(apps, schema_editor): + """ + If any of the original Direct Paging integration default routes had chatops settings or escalation chains + associated with them, simply set the non-default route to have the same settings. This will avoid any functional + differences should users start paging a team using the "important" functionality. + """ + ChannelFilter = apps.get_model("alerts", "ChannelFilter") + + logger.info("Starting update_direct_paging_integration_non_default_routes migration...") + + # Subquery that selects the matching "default" channel filter for each non-default filter + default_route_subquery = ChannelFilter.objects.filter( + alert_receive_channel=OuterRef("alert_receive_channel"), + is_default=True + ) + + # Perform a bulk update on all non-default routes in one go + updated_count = ( + ChannelFilter.objects + .filter( + alert_receive_channel__integration="direct_paging", + is_default=False + ) + .update( + escalation_chain_id=Subquery(default_route_subquery.values("escalation_chain_id")[:1]), + telegram_channel_id=Subquery(default_route_subquery.values("telegram_channel_id")[:1]), + notify_in_telegram=Subquery(default_route_subquery.values("notify_in_telegram")[:1]), + slack_channel_id=Subquery(default_route_subquery.values("slack_channel_id")[:1]), + notify_in_slack=Subquery(default_route_subquery.values("notify_in_slack")[:1]), + notification_backends=Subquery(default_route_subquery.values("notification_backends")[:1]), + ) + ) + + logger.info(f"Updated {updated_count} non-default direct paging integration routes") + logger.info("Finished update_direct_paging_integration_non_default_routes migration.") + + +class Migration(migrations.Migration): + + dependencies = [ + ("alerts", "0072_upsert_direct_paging_integration_routes"), + ] + + operations = [ + migrations.RunPython(update_direct_paging_integration_non_default_routes, migrations.RunPython.noop), + ] From 84c9a0cb0d8c11e55177244103e680a3fe37c19b Mon Sep 17 00:00:00 2001 From: Jack Baldry Date: Tue, 7 Jan 2025 15:37:20 +0000 Subject: [PATCH 02/13] Update publishing workflows to use GitHub App authentication (#5399) # What this PR does Use a centralized composite action that uses GitHub App authentication to publish documentation. The organization secrets used in the current workflows have expired. ## Checklist - [ ] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] Added the relevant release notes label (see labels prefixed w/ `release:`). These labels dictate how your PR will show up in the autogenerated release notes. Signed-off-by: Jack Baldry --- .../publish-technical-documentation-next.yml | 57 +++--------- ...ublish-technical-documentation-release.yml | 89 +++---------------- 2 files changed, 22 insertions(+), 124 deletions(-) diff --git a/.github/workflows/publish-technical-documentation-next.yml b/.github/workflows/publish-technical-documentation-next.yml index 80acbd23..53a5b7a7 100644 --- a/.github/workflows/publish-technical-documentation-next.yml +++ b/.github/workflows/publish-technical-documentation-next.yml @@ -1,58 +1,21 @@ -name: "Publish Technical Documentation (next)" +name: publish-technical-documentation-next on: push: branches: - - "main" + - main paths: - "docs/sources/**" workflow_dispatch: - jobs: - test: - runs-on: "ubuntu-latest" - steps: - - name: "Check out code" - uses: "actions/checkout@v4" - - name: "Build website" - # -e HUGO_REFLINKSERRORLEVEL=ERROR prevents merging broken refs with the downside - # that no refs to external content can be used as these refs will not resolve in the - # docs-base image. - run: > - docker run -v ${PWD}/docs/sources:/hugo/content/docs/oncall/latest - -e HUGO_REFLINKSERRORLEVEL=ERROR - --rm grafana/docs-base:latest /bin/bash - -c 'echo -e "---\\nredirectURL: /hugo/content/docs/oncall/latest/\\ntype: redirect\\nversioned: true\\n---\\n" - > /hugo/content/docs/oncall/_index.md; make hugo' - sync: - runs-on: "ubuntu-latest" - needs: "test" + if: github.repository == 'grafana/oncall' + permissions: + contents: read + id-token: write + runs-on: ubuntu-latest steps: - - name: "Check out code" - uses: "actions/checkout@v4" - - - name: "Clone website-sync Action" - # WEBSITE_SYNC_TOKEN is a fine-grained GitHub Personal Access Token that expires. - # It must be regenerated in the grafanabot GitHub account and requires a Grafana organization - # GitHub administrator to update the organization secret. - # The IT helpdesk can update the organization secret. - run: | - git clone --single-branch --no-tags --depth 1 \ - -b master https://grafanabot:${{ secrets.WEBSITE_SYNC_TOKEN }}@github.com/grafana/website-sync \ - ./.github/actions/website-sync - - - name: "Publish to website repository (next)" - uses: "./.github/actions/website-sync" - id: "publish-next" + - uses: actions/checkout@v4 + - uses: grafana/writers-toolkit/publish-technical-documentation@publish-technical-documentation/v1 with: - repository: "grafana/website" - branch: "master" - host: "github.com" - # PUBLISH_TO_WEBSITE_TOKEN is a fine-grained GitHub Personal Access Token that expires. - # It must be regenerated in the grafanabot GitHub account and requires a Grafana organization - # GitHub administrator to update the organization secret. - # The IT helpdesk can update the organization secret. - github_pat: "grafanabot:${{ secrets.PUBLISH_TO_WEBSITE_TOKEN }}" - source_folder: "docs/sources" - target_folder: "content/docs/oncall/next" + website_directory: content/docs/oncall/next diff --git a/.github/workflows/publish-technical-documentation-release.yml b/.github/workflows/publish-technical-documentation-release.yml index 5e8b1e00..e91b642a 100644 --- a/.github/workflows/publish-technical-documentation-release.yml +++ b/.github/workflows/publish-technical-documentation-release.yml @@ -1,4 +1,4 @@ -name: "Publish Technical Documentation (release)" +name: publish-technical-documentation-release on: push: @@ -9,85 +9,20 @@ on: paths: - "docs/sources/**" workflow_dispatch: - jobs: - test: - runs-on: "ubuntu-latest" - steps: - - name: "Check out code" - uses: "actions/checkout@v4" - - name: "Build website" - # -e HUGO_REFLINKSERRORLEVEL=ERROR prevents merging broken refs with the downside - # that no refs to external content can be used as these refs will not resolve in the - # docs-base image. - run: > - docker run -v ${PWD}/docs/sources:/hugo/content/docs/oncall/latest - -e HUGO_REFLINKSERRORLEVEL=ERROR - --rm grafana/docs-base:latest /bin/bash - -c 'echo -e "---\\nredirectURL: /hugo/content/docs/oncall/latest/\\ntype: redirect\\nversioned: true\\n---\\n" - > /hugo/content/docs/oncall/_index.md; make hugo' - sync: - runs-on: "ubuntu-latest" - needs: "test" + if: github.repository == 'grafana/oncall' + permissions: + contents: read + id-token: write + runs-on: ubuntu-latest steps: - - name: "Checkout code and tags" - uses: "actions/checkout@v4" + - uses: actions/checkout@v4 with: fetch-depth: 0 - - - name: "Checkout Actions library" - uses: "actions/checkout@v4" + - uses: grafana/writers-toolkit/publish-technical-documentation-release@publish-technical-documentation-release/v2 with: - repository: "grafana/grafana-github-actions" - path: "./actions" - - - name: "Install Actions from library" - run: "npm install --production --prefix ./actions" - - - name: "Determine if there is a matching release tag" - id: "has-matching-release-tag" - uses: "./actions/has-matching-release-tag" - with: - ref_name: "${{ github.ref_name }}" - release_tag_regexp: "^v(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)$" - release_branch_regexp: "^release-(0|[1-9]\\d*)\\.(0|[1-9]\\d*)$" - - - name: "Determine technical documentation version" - if: "steps.has-matching-release-tag.outputs.bool == 'true'" - uses: "./actions/docs-target" - id: "target" - with: - ref_name: "${{ github.ref_name }}" - - - name: "Clone website-sync Action" - if: "steps.has-matching-release-tag.outputs.bool == 'true'" - # WEBSITE_SYNC_TOKEN is a fine-grained GitHub Personal Access Token that expires. - # It must be regenerated in the grafanabot GitHub account and requires a Grafana organization - # GitHub administrator to update the organization secret. - # The IT helpdesk can update the organization secret. - run: | - git clone --single-branch --no-tags --depth 1 \ - -b master https://grafanabot:${{ secrets.WEBSITE_SYNC_TOKEN }}@github.com/grafana/website-sync \ - ./.github/actions/website-sync - - - name: "Publish to website repository (release)" - if: "steps.has-matching-release-tag.outputs.bool == 'true'" - uses: "./.github/actions/website-sync" - id: "publish-release" - with: - repository: "grafana/website" - branch: "master" - host: "github.com" - # PUBLISH_TO_WEBSITE_TOKEN is a fine-grained GitHub Personal Access Token that expires. - # It must be regenerated in the grafanabot GitHub account and requires a Grafana organization - # GitHub administrator to update the organization secret. - # The IT helpdesk can update the organization secret. - github_pat: "grafanabot:${{ secrets.PUBLISH_TO_WEBSITE_TOKEN }}" - source_folder: "docs/sources" - # Append ".x" to target to produce a v..x directory. - target_folder: "content/docs/oncall/${{ steps.target.outputs.target }}.x" - # Allow the workflow to succeed if there are no changes to commit. - # This is only going to be true on tags as those events ignore the path - # filter in the workflow `on.push` section. - allow_no_changes: "true" + release_tag_regexp: "^v(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)$" + release_branch_regexp: "^release-(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)$" + release_branch_with_patch_regexp: "^release-(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)$" + website_directory: content/docs/oncall From be10967883c760e75ed392e307cf5528d220ed82 Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Tue, 7 Jan 2025 11:50:55 -0500 Subject: [PATCH 03/13] fix: patch direct paging routes migration (#5401) ## Which issue(s) this PR closes Patches some issues experienced on dev related to https://github.com/grafana/oncall/pull/5397. Previous migration was running into the following: ```bash File "/usr/local/lib/python3.12/site-packages/pymysql/connections.py", line 775, in _read_packet packet.raise_for_error() File "/usr/local/lib/python3.12/site-packages/pymysql/protocol.py", line 219, in raise_for_error err.raise_mysql_exception(self._data) File "/usr/local/lib/python3.12/site-packages/pymysql/err.py", line 150, in raise_mysql_exception raise errorclass(errno, errval) django.db.utils.OperationalError: (1093, "You can't specify target table 'alerts_channelfilter' for update in FROM clause") ``` --- ...t_paging_integration_non_default_routes.py | 64 +++++++++++++------ 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/engine/apps/alerts/migrations/0073_update_direct_paging_integration_non_default_routes.py b/engine/apps/alerts/migrations/0073_update_direct_paging_integration_non_default_routes.py index 8696c6ad..bd9c4070 100644 --- a/engine/apps/alerts/migrations/0073_update_direct_paging_integration_non_default_routes.py +++ b/engine/apps/alerts/migrations/0073_update_direct_paging_integration_non_default_routes.py @@ -18,30 +18,56 @@ def update_direct_paging_integration_non_default_routes(apps, schema_editor): logger.info("Starting update_direct_paging_integration_non_default_routes migration...") - # Subquery that selects the matching "default" channel filter for each non-default filter - default_route_subquery = ChannelFilter.objects.filter( - alert_receive_channel=OuterRef("alert_receive_channel"), + # 1) Gather all default route data in a dictionary keyed by alert_receive_channel_id + default_direct_paging_routes = ChannelFilter.objects.filter( + alert_receive_channel__integration="direct_paging", is_default=True + ).values( + "alert_receive_channel_id", + "escalation_chain_id", + "telegram_channel_id", + "notify_in_telegram", + "slack_channel_id", + "notify_in_slack", + "notification_backends", ) - # Perform a bulk update on all non-default routes in one go - updated_count = ( - ChannelFilter.objects - .filter( - alert_receive_channel__integration="direct_paging", - is_default=False - ) - .update( - escalation_chain_id=Subquery(default_route_subquery.values("escalation_chain_id")[:1]), - telegram_channel_id=Subquery(default_route_subquery.values("telegram_channel_id")[:1]), - notify_in_telegram=Subquery(default_route_subquery.values("notify_in_telegram")[:1]), - slack_channel_id=Subquery(default_route_subquery.values("slack_channel_id")[:1]), - notify_in_slack=Subquery(default_route_subquery.values("notify_in_slack")[:1]), - notification_backends=Subquery(default_route_subquery.values("notification_backends")[:1]), - ) + default_direct_paging_route_map = { + route["alert_receive_channel_id"]: route for route in default_direct_paging_routes + } + + # 2) Fetch all non-default direct paging routes we want to update + non_default_direct_paging_routes = list(ChannelFilter.objects.filter( + alert_receive_channel__integration="direct_paging", + is_default=False + )) + + # 3) Update them in Python memory + for non_default_route in non_default_direct_paging_routes: + default_route = default_direct_paging_route_map.get(non_default_route.alert_receive_channel_id) + if default_route: + non_default_route.escalation_chain_id = default_route["escalation_chain_id"] + non_default_route.telegram_channel_id = default_route["telegram_channel_id"] + non_default_route.notify_in_telegram = default_route["notify_in_telegram"] + non_default_route.slack_channel_id = default_route["slack_channel_id"] + non_default_route.notify_in_slack = default_route["notify_in_slack"] + non_default_route.notification_backends = default_route["notification_backends"] + + # 4) Bulk update all at once + ChannelFilter.objects.bulk_update( + non_default_direct_paging_routes, + [ + "escalation_chain_id", + "telegram_channel_id", + "notify_in_telegram", + "slack_channel_id", + "notify_in_slack", + "notification_backends", + ], + batch_size=5000, ) - logger.info(f"Updated {updated_count} non-default direct paging integration routes") + logger.info(f"Updated {len(non_default_direct_paging_routes)} non-default direct paging integration routes") logger.info("Finished update_direct_paging_integration_non_default_routes migration.") From 6e753742294e5d116c11149ebe67d3d066d6f130 Mon Sep 17 00:00:00 2001 From: Jack Baldry Date: Wed, 8 Jan 2025 14:36:01 +0000 Subject: [PATCH 04/13] Use tags only workflow behavior (#5404) # What this PR does Without this, the action assumes there is a release branch that a tag is made on because that is common to most other Grafana repositories. Those repositories maintain a long-lived release branch to facilitate backports to documentation and code. Since grafana/oncall doesn't follow this pattern, it doesn't make sense to try and discover the matching release branch. That action behavior is disabled by the `tags_only: true` setting. ## Checklist - [ ] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] Added the relevant release notes label (see labels prefixed w/ `release:`). These labels dictate how your PR will show up in the autogenerated release notes. Signed-off-by: Jack Baldry --- .github/workflows/publish-technical-documentation-release.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/publish-technical-documentation-release.yml b/.github/workflows/publish-technical-documentation-release.yml index e91b642a..d2626d72 100644 --- a/.github/workflows/publish-technical-documentation-release.yml +++ b/.github/workflows/publish-technical-documentation-release.yml @@ -25,4 +25,6 @@ jobs: release_tag_regexp: "^v(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)$" release_branch_regexp: "^release-(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)$" release_branch_with_patch_regexp: "^release-(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)$" + # This repository only releases from the tag reference and not from long-lived release branches. + tags_only: true website_directory: content/docs/oncall From 3d4ce622cbd2597a905e7d293a42bf1b4845fb63 Mon Sep 17 00:00:00 2001 From: Yulya Artyukhina Date: Tue, 14 Jan 2025 11:02:23 +0100 Subject: [PATCH 05/13] Add default `service_name` label for Alerting integrations (#5373) # What this PR does - The `service_name` label will be added to Grafana Alerting integration when it is created, if it wasn't added by user. - Adds celery task that should be started manually and will add the `service_name` dynamic label to all existing Grafana Alerting integrations. ## Which issue(s) this PR closes Related to https://github.com/grafana/oncall-private/issues/2975 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] Added the relevant release notes label (see labels prefixed w/ `release:`). These labels dictate how your PR will show up in the autogenerated release notes. --------- Co-authored-by: Innokentii Konstantinov --- engine/apps/alerts/constants.py | 4 + engine/apps/alerts/models/alert.py | 12 +- .../alerts/models/alert_receive_channel.py | 54 +++++ engine/apps/alerts/tests/test_alert.py | 4 +- .../api/serializers/alert_receive_channel.py | 196 ++++++++---------- .../api/tests/test_alert_receive_channel.py | 168 +++++++++++++-- engine/apps/api/tests/test_labels.py | 24 +++ engine/apps/api/urls.py | 5 + .../apps/api/views/alert_receive_channel.py | 2 +- engine/apps/api/views/labels.py | 16 +- engine/apps/labels/alert_group_labels.py | 9 +- engine/apps/labels/client.py | 9 + ...vechannelassociatedlabel_inheritable_db.py | 6 +- engine/apps/labels/models.py | 33 +++ engine/apps/labels/tasks.py | 74 ++++++- .../labels/tests/test_add_service_label.py | 40 ++++ engine/apps/labels/tests/test_labels_cache.py | 26 ++- .../public_api/serializers/integrations.py | 3 + engine/settings/celery_task_routes.py | 3 + 19 files changed, 535 insertions(+), 153 deletions(-) rename engine/apps/labels/{ => migrations}/0007_remove_alertreceivechannelassociatedlabel_inheritable_db.py (78%) create mode 100644 engine/apps/labels/tests/test_add_service_label.py diff --git a/engine/apps/alerts/constants.py b/engine/apps/alerts/constants.py index 496836ca..c91cabfa 100644 --- a/engine/apps/alerts/constants.py +++ b/engine/apps/alerts/constants.py @@ -25,3 +25,7 @@ class AlertGroupState(str, Enum): ACKNOWLEDGED = "acknowledged" RESOLVED = "resolved" SILENCED = "silenced" + + +SERVICE_LABEL = "service_name" +SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION = "{{ payload.common_labels.service_name }}" diff --git a/engine/apps/alerts/models/alert.py b/engine/apps/alerts/models/alert.py index 844cbf67..2f1d1af6 100644 --- a/engine/apps/alerts/models/alert.py +++ b/engine/apps/alerts/models/alert.py @@ -14,7 +14,7 @@ from apps.alerts.constants import TASK_DELAY_SECONDS from apps.alerts.incident_appearance.templaters import TemplateLoader from apps.alerts.signals import alert_group_escalation_snapshot_built from apps.alerts.tasks.distribute_alert import send_alert_create_signal -from apps.labels.alert_group_labels import assign_labels, gather_labels_from_alert_receive_channel_and_raw_request_data +from apps.labels.alert_group_labels import gather_alert_labels, save_alert_group_labels from apps.labels.types import AlertLabels from common.jinja_templater import apply_jinja_template_to_alert_payload_and_labels from common.jinja_templater.apply_jinja_template import ( @@ -106,13 +106,11 @@ class Alert(models.Model): # This import is here to avoid circular imports from apps.alerts.models import AlertGroup, AlertGroupLogRecord, AlertReceiveChannel, ChannelFilter - parsed_labels = gather_labels_from_alert_receive_channel_and_raw_request_data( - alert_receive_channel, raw_request_data - ) - group_data = Alert.render_group_data(alert_receive_channel, raw_request_data, parsed_labels, is_demo) + alert_labels = gather_alert_labels(alert_receive_channel, raw_request_data) + group_data = Alert.render_group_data(alert_receive_channel, raw_request_data, alert_labels, is_demo) if channel_filter is None: - channel_filter = ChannelFilter.select_filter(alert_receive_channel, raw_request_data, parsed_labels) + channel_filter = ChannelFilter.select_filter(alert_receive_channel, raw_request_data, alert_labels) # Get or create group group, group_created = AlertGroup.objects.get_or_create_grouping( @@ -141,7 +139,7 @@ class Alert(models.Model): transaction.on_commit(partial(send_alert_create_signal.apply_async, (alert.pk,))) if group_created: - assign_labels(group, alert_receive_channel, parsed_labels) + save_alert_group_labels(group, alert_receive_channel, alert_labels) group.log_records.create(type=AlertGroupLogRecord.TYPE_REGISTERED) group.log_records.create(type=AlertGroupLogRecord.TYPE_ROUTE_ASSIGNED) diff --git a/engine/apps/alerts/models/alert_receive_channel.py b/engine/apps/alerts/models/alert_receive_channel.py index a8089337..791162d8 100644 --- a/engine/apps/alerts/models/alert_receive_channel.py +++ b/engine/apps/alerts/models/alert_receive_channel.py @@ -14,6 +14,7 @@ from django.utils import timezone from django.utils.crypto import get_random_string from emoji import emojize +from apps.alerts.constants import SERVICE_LABEL, SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION from apps.alerts.grafana_alerting_sync_manager.grafana_alerting_sync import GrafanaAlertingSyncManager from apps.alerts.integration_options_mixin import IntegrationOptionsMixin from apps.alerts.models.maintainable_object import MaintainableObject @@ -24,6 +25,7 @@ from apps.grafana_plugin.ui_url_builder import UIURLBuilder from apps.integrations.legacy_prefix import remove_legacy_prefix from apps.integrations.metadata import heartbeat from apps.integrations.tasks import create_alert, create_alertmanager_alerts +from apps.labels.tasks import add_service_label_for_integration from apps.metrics_exporter.helpers import ( metrics_add_integrations_to_cache, metrics_remove_deleted_integration_from_cache, @@ -48,6 +50,10 @@ if typing.TYPE_CHECKING: logger = logging.getLogger(__name__) +class CreatingServiceNameDynamicLabelFailed(Exception): + """Raised when failed to create a dynamic service name label""" + + class MessagingBackendTemplatesItem: title: str | None message: str | None @@ -790,6 +796,54 @@ class AlertReceiveChannel(IntegrationOptionsMixin, MaintainableObject): result["team"] = "General" return result + def create_service_name_dynamic_label(self, is_called_async: bool = False): + """ + create_service_name_dynamic_label creates a dynamic label for service_name for Grafana Alerting integration. + Warning: It might make a request to the labels repo API. + That's why it's called in api handlers, not in post_save. + Once we will have labels operator & get rid of syncing labels from repo, this method should be moved + to post_save. + """ + from apps.labels.models import LabelKeyCache + + if not self.organization.is_grafana_labels_enabled: + return + if self.integration != AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING: + return + + # validate that service_name label doesn't exist in already + service_name_label = LabelKeyCache.objects.filter(organization=self.organization, name=SERVICE_LABEL).first() + + if service_name_label is not None and self.alert_group_labels_custom is not None: + for k, _, _ in self.alert_group_labels_custom: + if k == service_name_label.id: + return + + service_name_dynamic_label = self._build_service_name_label_custom(self.organization) + if service_name_dynamic_label is None: + # if this method was called from a celery task, raise exception to retry it + if is_called_async: + raise CreatingServiceNameDynamicLabelFailed + # otherwise start a celery task to retry the label creation async + add_service_label_for_integration.apply_async((self.id,)) + return + self.alert_group_labels_custom = [service_name_dynamic_label] + (self.alert_group_labels_custom or []) + self.save(update_fields=["alert_group_labels_custom"]) + + @staticmethod + def _build_service_name_label_custom(organization: "Organization") -> DynamicLabelsEntryDB | None: + """ + _build_service_name_label_custom returns `service_name` label template in dynamic label format: + [key_id, None, template]. + If there is no label key service_name in the cache - it tries to fetch it from the labels repo API. + """ + from apps.labels.models import LabelKeyCache + + service_label_key = LabelKeyCache.get_or_create_by_name(organization, SERVICE_LABEL) + return ( + [service_label_key.id, None, SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION] if service_label_key else None + ) + @receiver(post_save, sender=AlertReceiveChannel) def listen_for_alertreceivechannel_model_save( diff --git a/engine/apps/alerts/tests/test_alert.py b/engine/apps/alerts/tests/test_alert.py index 33cee023..28839b20 100644 --- a/engine/apps/alerts/tests/test_alert.py +++ b/engine/apps/alerts/tests/test_alert.py @@ -56,8 +56,8 @@ def test_alert_create_custom_channel_filter(make_organization, make_alert_receiv assert alert.group.channel_filter == other_channel_filter -@patch("apps.alerts.models.alert.assign_labels") -@patch("apps.alerts.models.alert.gather_labels_from_alert_receive_channel_and_raw_request_data") +@patch("apps.alerts.models.alert.save_alert_group_labels") +@patch("apps.alerts.models.alert.gather_alert_labels") @patch("apps.alerts.models.ChannelFilter.select_filter", wraps=ChannelFilter.select_filter) @pytest.mark.django_db def test_alert_create_labels_are_assigned( diff --git a/engine/apps/api/serializers/alert_receive_channel.py b/engine/apps/api/serializers/alert_receive_channel.py index 41c0e979..c3fbc58e 100644 --- a/engine/apps/api/serializers/alert_receive_channel.py +++ b/engine/apps/api/serializers/alert_receive_channel.py @@ -13,7 +13,7 @@ from apps.alerts.grafana_alerting_sync_manager.grafana_alerting_sync import Graf from apps.alerts.models import AlertReceiveChannel from apps.base.messaging import get_messaging_backends from apps.integrations.legacy_prefix import has_legacy_prefix -from apps.labels.models import AlertReceiveChannelAssociatedLabel, LabelKeyCache, LabelValueCache +from apps.labels.models import LabelKeyCache, LabelValueCache from apps.labels.types import LabelKey from apps.user_management.models import Organization from common.api_helpers.custom_fields import TeamPrimaryKeyRelatedField @@ -33,7 +33,7 @@ def _additional_settings_serializer_from_type(integration_type: str) -> serializ return cls -# TODO: refactor this types as w no longer support storing static labels in this field. +# TODO: refactor this types as we no longer support storing static labels in this field. # AlertGroupCustomLabelValue represents custom alert group label value for API requests # It handles two types of label's value: # 1. Just Label Value from a label repo for a static label @@ -79,7 +79,10 @@ class AdditionalSettingsField(serializers.DictField): class CustomLabelSerializer(serializers.Serializer): - """This serializer is consistent with apps.api.serializers.labels.LabelPairSerializer, but allows null for value ID.""" + """ + This serializer is consistent with apps.api.serializers.labels.LabelPairSerializer, + but allows null for value ID to support templated labels. + """ class CustomLabelKeySerializer(serializers.Serializer): id = serializers.CharField() @@ -97,98 +100,12 @@ class CustomLabelSerializer(serializers.Serializer): class IntegrationAlertGroupLabelsSerializer(serializers.Serializer): - """Alert group labels configuration for the integration. See AlertReceiveChannel.alert_group_labels for details.""" - # todo: inheritable field is deprecated. Remove in a future release inheritable = serializers.DictField(child=serializers.BooleanField(), required=False) custom = CustomLabelSerializer(many=True) template = serializers.CharField(allow_null=True) - @staticmethod - def pop_alert_group_labels(validated_data: dict) -> IntegrationAlertGroupLabels | None: - """Get alert group labels from validated data.""" - - # the "alert_group_labels" field is optional, so either all 2 fields are present or none - # "inheritable" field is deprecated - if "custom" not in validated_data: - return None - - return { - "inheritable": validated_data.pop("inheritable", None), # deprecated - "custom": validated_data.pop("custom"), - "template": validated_data.pop("template"), - } - - @classmethod - def update( - cls, instance: AlertReceiveChannel, alert_group_labels: IntegrationAlertGroupLabels | None - ) -> AlertReceiveChannel: - if alert_group_labels is None: - return instance - - # update DB cache for custom labels - cls._create_custom_labels(instance.organization, alert_group_labels["custom"]) - # save static labels as integration labels - # todo: it's needed to cover delay between backend and frontend rollout, and can be removed later - cls._save_static_labels_as_integration_labels(instance, alert_group_labels["custom"]) - # update custom labels - instance.alert_group_labels_custom = cls._custom_labels_to_internal_value(alert_group_labels["custom"]) - - # update template - instance.alert_group_labels_template = alert_group_labels["template"] - - instance.save(update_fields=["alert_group_labels_custom", "alert_group_labels_template"]) - return instance - - @staticmethod - def _create_custom_labels(organization: Organization, labels: AlertGroupCustomLabelsAPI) -> None: - """Create LabelKeyCache and LabelValueCache objects for custom labels.""" - - label_keys = [ - LabelKeyCache( - id=label["key"]["id"], - name=label["key"]["name"], - prescribed=label["key"]["prescribed"], - organization=organization, - ) - for label in labels - ] - - label_values = [ - LabelValueCache( - id=label["value"]["id"], - name=label["value"]["name"], - prescribed=label["value"]["prescribed"], - key_id=label["key"]["id"], - ) - for label in labels - if label["value"]["id"] # don't create LabelValueCache objects for templated labels - ] - - LabelKeyCache.objects.bulk_create(label_keys, ignore_conflicts=True, batch_size=5000) - LabelValueCache.objects.bulk_create(label_values, ignore_conflicts=True, batch_size=5000) - - @staticmethod - def _save_static_labels_as_integration_labels(instance: AlertReceiveChannel, labels: AlertGroupCustomLabelsAPI): - labels_associations_to_create = [] - labels_copy = labels[:] - for label in labels_copy: - if label["value"]["id"] is not None: - labels_associations_to_create.append( - AlertReceiveChannelAssociatedLabel( - key_id=label["key"]["id"], - value_id=label["value"]["id"], - organization=instance.organization, - alert_receive_channel=instance, - ) - ) - labels.remove(label) - AlertReceiveChannelAssociatedLabel.objects.bulk_create( - labels_associations_to_create, ignore_conflicts=True, batch_size=5000 - ) - - @classmethod - def to_representation(cls, instance: AlertReceiveChannel) -> IntegrationAlertGroupLabels: + def to_representation(self, instance: AlertReceiveChannel) -> IntegrationAlertGroupLabels: """ The API representation of alert group labels is very different from the underlying model. @@ -200,20 +117,28 @@ class IntegrationAlertGroupLabelsSerializer(serializers.Serializer): return { # todo: "inheritable" field is deprecated, remove in a future release. "inheritable": {label.key_id: True for label in instance.labels.all()}, - "custom": cls._custom_labels_to_representation(instance.alert_group_labels_custom), + "custom": self._custom_labels_to_representation(instance.alert_group_labels_custom), "template": instance.alert_group_labels_template, } - @staticmethod - def _custom_labels_to_internal_value( - custom_labels: AlertGroupCustomLabelsAPI, - ) -> AlertReceiveChannel.DynamicLabelsConfigDB: - """Convert custom labels from API representation to the schema used by the JSONField on the model.""" + def to_internal_value(self, validated_data: dict) -> dict: + """ + to_internal_value converts dynamic labels from API format to internal format and updates labels cache + """ + alert_group_labels = self._pop_alert_group_labels(validated_data) + if alert_group_labels is None: + return validated_data - return [ - [label["key"]["id"], label["value"]["id"], None if label["value"]["id"] else label["value"]["name"]] - for label in custom_labels - ] + organization = self.context["request"].auth.organization + self._create_custom_labels(organization, alert_group_labels["custom"] if alert_group_labels else []) + + custom_labels = ( + self._custom_labels_to_internal_value(alert_group_labels["custom"]) if alert_group_labels else [] + ) + validated_data["alert_group_labels_custom"] = custom_labels or None + validated_data["alert_group_labels_template"] = alert_group_labels["template"] if alert_group_labels else None + + return validated_data @staticmethod def _custom_labels_to_representation( @@ -262,6 +187,63 @@ class IntegrationAlertGroupLabelsSerializer(serializers.Serializer): if key_id in label_key_index and (value_id in label_value_index or not value_id) ] + @staticmethod + def _custom_labels_to_internal_value( + custom_labels: AlertGroupCustomLabelsAPI, + ) -> AlertReceiveChannel.DynamicLabelsConfigDB: + """ + Convert dynamic labels from API representation to the schema used by the JSONField on the model: + [[key.id, None, template(stored in value.name here)]]. + """ + + return [ + [label["key"]["id"], None, label["value"]["name"]] + for label in custom_labels + if label["value"]["id"] is None + # value.id is not None for deprecated static labels, for dynamic labels it's always None + ] + + @staticmethod + def _pop_alert_group_labels(validated_data: dict) -> IntegrationAlertGroupLabels | None: + # the "alert_group_labels" field is optional, so either all 2 fields (custom and template) are present or none + # "inheritable" field is deprecated + if "custom" not in validated_data: + return None + + return { + "inheritable": validated_data.pop("inheritable", None), # deprecated + "custom": validated_data.pop("custom"), + "template": validated_data.pop("template"), + } + + @staticmethod + def _create_custom_labels(organization: Organization, labels: AlertGroupCustomLabelsAPI) -> None: + """Create LabelKeyCache and LabelValueCache objects for labels used in labelsSchema""" + + label_keys = [ + LabelKeyCache( + id=label["key"]["id"], + name=label["key"]["name"], + prescribed=label["key"]["prescribed"], + organization=organization, + ) + for label in labels + ] + + label_values = [ + LabelValueCache( + id=label["value"]["id"], + name=label["value"]["name"], + prescribed=label["value"]["prescribed"], + key_id=label["key"]["id"], + ) + for label in labels + if label["value"]["id"] # don't create LabelValueCache objects for templated labels + ] + + LabelKeyCache.objects.bulk_create(label_keys, ignore_conflicts=True, batch_size=5000) + LabelValueCache.objects.bulk_create(label_values, ignore_conflicts=True, batch_size=5000) + class AlertReceiveChannelSerializer( EagerLoadingMixin, LabelsSerializerMixin, serializers.ModelSerializer[AlertReceiveChannel] @@ -411,9 +393,8 @@ class AlertReceiveChannelSerializer( if _integration.slug == integration: is_able_to_autoresolve = _integration.is_able_to_autoresolve - # pop associated labels and alert group labels, so they are not passed to AlertReceiveChannel.create + # pop associated labels, so they are not passed to AlertReceiveChannel.create. They will be created later. labels = validated_data.pop("labels", None) - alert_group_labels = IntegrationAlertGroupLabelsSerializer.pop_alert_group_labels(validated_data) try: instance = AlertReceiveChannel.create( @@ -425,14 +406,16 @@ class AlertReceiveChannelSerializer( except AlertReceiveChannel.DuplicateDirectPagingError: raise BadRequest(detail=AlertReceiveChannel.DuplicateDirectPagingError.DETAIL) - # Create label associations first, then update alert group labels + # Create label associations self.update_labels_association_if_needed(labels, instance, organization) - instance = IntegrationAlertGroupLabelsSerializer.update(instance, alert_group_labels) # Create default webhooks if needed if create_default_webhooks and hasattr(instance.config, "create_default_webhooks"): instance.config.create_default_webhooks(instance) + # Create default service_name label + instance.create_service_name_dynamic_label() + return instance def update(self, instance, validated_data): @@ -440,11 +423,6 @@ class AlertReceiveChannelSerializer( labels = validated_data.pop("labels", None) self.update_labels_association_if_needed(labels, instance, self.context["request"].auth.organization) - # update alert group labels - instance = IntegrationAlertGroupLabelsSerializer.update( - instance, IntegrationAlertGroupLabelsSerializer.pop_alert_group_labels(validated_data) - ) - try: updated_instance = super().update(instance, validated_data) except AlertReceiveChannel.DuplicateDirectPagingError: diff --git a/engine/apps/api/tests/test_alert_receive_channel.py b/engine/apps/api/tests/test_alert_receive_channel.py index fabf3196..fa7d4748 100644 --- a/engine/apps/api/tests/test_alert_receive_channel.py +++ b/engine/apps/api/tests/test_alert_receive_channel.py @@ -7,10 +7,12 @@ from rest_framework import serializers, status from rest_framework.response import Response from rest_framework.test import APIClient +from apps.alerts.constants import SERVICE_LABEL, SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION +from apps.alerts.grafana_alerting_sync_manager import GrafanaAlertingSyncManager from apps.alerts.models import AlertReceiveChannel, EscalationPolicy from apps.api.permissions import LegacyAccessControlRole from apps.base.messaging import load_backend -from apps.labels.models import LabelKeyCache, LabelValueCache +from apps.labels.models import LabelKeyCache from common.exceptions import BacksyncIntegrationRequestError @@ -1717,24 +1719,20 @@ def test_alert_group_labels_put( label_3 = make_static_label_config(organization, alert_receive_channel) custom = [ - # plain label + # static label (deprecated, will be skipped) { "key": {"id": label_2.key.id, "name": label_2.key.name, "prescribed": False}, "value": {"id": label_2.value.id, "name": label_2.value.name, "prescribed": False}, }, - # plain label not present in DB cache - { - "key": {"id": "hello", "name": "world", "prescribed": False}, - "value": {"id": "foo", "name": "bar", "prescribed": False}, - }, - # templated label + # dynamic label { "key": {"id": label_3.key.id, "name": label_3.key.name, "prescribed": False}, - "value": { - "id": None, - "name": "{{ payload.foo }}", - "prescribed": False, - }, + "value": {"id": None, "name": "{{ payload.foo }}", "prescribed": False}, + }, + # dynamic label not present in DB cache + { + "key": {"id": "hello", "name": "world", "prescribed": False}, + "value": {"id": None, "name": "{{ payload.bar }}", "prescribed": False}, }, ] template = "{{ payload.labels | tojson }}" # advanced template @@ -1751,31 +1749,31 @@ def test_alert_group_labels_put( response = client.put(url, data, format="json", **make_user_auth_headers(user, token)) assert response.status_code == status.HTTP_200_OK - # check static labels were saved as integration labels assert response.json()["alert_group_labels"] == { - "inheritable": {label_1.key_id: True, label_2.key_id: True, label_3.key_id: True, "hello": True}, + "inheritable": {label_1.key_id: True, label_2.key_id: True, label_3.key_id: True}, "custom": [ { "key": {"id": label_3.key.id, "name": label_3.key.name, "prescribed": False}, "value": {"id": None, "name": "{{ payload.foo }}", "prescribed": False}, - } + }, + { + "key": {"id": "hello", "name": "world", "prescribed": False}, + "value": {"id": None, "name": "{{ payload.bar }}", "prescribed": False}, + }, ], "template": template, } alert_receive_channel.refresh_from_db() - # check static labels are not in the custom labels list + # check deprecated static label is not in the custom labels list assert alert_receive_channel.alert_group_labels_custom == [ [label_3.key_id, None, "{{ payload.foo }}"], + ["hello", None, "{{ payload.bar }}"], ] assert alert_receive_channel.alert_group_labels_template == template - # check static labels were assigned to integration - assert alert_receive_channel.labels.filter(key_id__in=[label_2.key_id, "hello"]).count() == 2 - # check label keys & values are created - key = LabelKeyCache.objects.filter(id="hello", name="world", organization=organization).first() - assert key is not None - assert LabelValueCache.objects.filter(key=key, id="foo", name="bar").exists() + # check label key is created + assert LabelKeyCache.objects.filter(id="hello", name="world", organization=organization).exists() @pytest.mark.django_db @@ -1850,6 +1848,130 @@ def test_alert_group_labels_post(alert_receive_channel_internal_api_setup, make_ assert alert_receive_channel.alert_group_labels_template == "{{ payload.labels | tojson }}" +@patch.object(GrafanaAlertingSyncManager, "check_for_connection_errors", return_value=None) +@pytest.mark.django_db +def test_create_service_name_label_for_new_alerting_integration( + _, + make_organization_and_user_with_plugin_token, + make_label_key, + make_user_auth_headers, +): + """Test adding default `service_name` dynamic label for new alerting integration.""" + + organization, user, token = make_organization_and_user_with_plugin_token() + service_name_label_key = make_label_key( + organization=organization, key_id="test", key_name=SERVICE_LABEL, prescribed=True + ) + + client = APIClient() + url = reverse("api-internal:alert_receive_channel-list") + + data = { + "integration": AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING, + "team": None, + "labels": [], + "alert_group_labels": { + "inheritable": {}, + "custom": [ + { + "key": {"id": "testid", "name": "testname", "prescribed": False}, + "value": {"id": None, "name": "{{ payload.foo }}", "prescribed": False}, + } + ], + "template": None, + }, + } + expected_alert_group_labels_response = { + "inheritable": {}, + "custom": [ + { + "key": {"id": service_name_label_key.id, "name": SERVICE_LABEL, "prescribed": True}, + "value": {"id": None, "name": SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION, "prescribed": False}, + }, + { + "key": {"id": "testid", "name": "testname", "prescribed": False}, + "value": {"id": None, "name": "{{ payload.foo }}", "prescribed": False}, + }, + ], + "template": None, + } + expected_alert_group_labels = [ + [service_name_label_key.id, None, SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION], + ["testid", None, "{{ payload.foo }}"], + ] + + response = client.post(url, data, format="json", **make_user_auth_headers(user, token)) + + assert response.status_code == status.HTTP_201_CREATED + assert response.json()["alert_group_labels"] == expected_alert_group_labels_response + + alert_receive_channel = organization.alert_receive_channels.filter(public_primary_key=response.json()["id"]).first() + + assert alert_receive_channel is not None + assert alert_receive_channel.alert_group_labels_custom == expected_alert_group_labels + + +@patch.object(GrafanaAlertingSyncManager, "check_for_connection_errors", return_value=None) +@pytest.mark.django_db +def test_skip_creating_service_name_label_for_new_alerting_integration( + _, + make_organization_and_user_with_plugin_token, + make_label_key, + make_user_auth_headers, +): + """ + Test skipping adding default `service_name` dynamic label for new alerting integration, + when this label was already added by user + """ + + organization, user, token = make_organization_and_user_with_plugin_token() + service_name_label_key = make_label_key( + organization=organization, key_id="test", key_name=SERVICE_LABEL, prescribed=True + ) + + client = APIClient() + url = reverse("api-internal:alert_receive_channel-list") + + data = { + "integration": AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING, + "team": None, + "labels": [], + "alert_group_labels": { + "inheritable": {}, + "custom": [ + { + "key": {"id": service_name_label_key.id, "name": SERVICE_LABEL, "prescribed": True}, + "value": {"id": None, "name": "{{ payload.foo }}", "prescribed": False}, + } + ], + "template": None, + }, + } + expected_alert_group_labels_response = { + "inheritable": {}, + "custom": [ + { + "key": {"id": service_name_label_key.id, "name": SERVICE_LABEL, "prescribed": True}, + "value": {"id": None, "name": "{{ payload.foo }}", "prescribed": False}, + } + ], + "template": None, + } + expected_alert_group_labels = [ + [service_name_label_key.id, None, "{{ payload.foo }}"], + ] + + response = client.post(url, data, format="json", **make_user_auth_headers(user, token)) + + assert response.status_code == status.HTTP_201_CREATED + assert response.json()["alert_group_labels"] == expected_alert_group_labels_response + + alert_receive_channel = organization.alert_receive_channels.filter(public_primary_key=response.json()["id"]).first() + + assert alert_receive_channel is not None + assert alert_receive_channel.alert_group_labels_custom == expected_alert_group_labels + + @pytest.mark.django_db def test_team_not_updated_if_not_in_data( make_organization_and_user_with_plugin_token, diff --git a/engine/apps/api/tests/test_labels.py b/engine/apps/api/tests/test_labels.py index 2c36363e..47fdb3c2 100644 --- a/engine/apps/api/tests/test_labels.py +++ b/engine/apps/api/tests/test_labels.py @@ -84,6 +84,30 @@ def test_get_update_key_put( assert response.json() == expected_result +@patch( + "apps.labels.client.LabelsAPIClient.get_label_by_key_name", + return_value=( + {"key": {"id": "keyid123", "name": "keyname12"}, "values": [{"id": "valueid123", "name": "yolo"}]}, + MockResponse(status_code=200), + ), +) +@pytest.mark.django_db +def test_get_key_by_name( + mocked_get_label_by_key_name, + make_organization_and_user_with_plugin_token, + make_user_auth_headers, +): + _, user, token = make_organization_and_user_with_plugin_token() + client = APIClient() + url = reverse("api-internal:get_key_by_name", kwargs={"key_name": "keyname12"}) + response = client.get(url, format="json", **make_user_auth_headers(user, token)) + expected_result = {"key": {"id": "keyid123", "name": "keyname12"}, "values": [{"id": "valueid123", "name": "yolo"}]} + + assert mocked_get_label_by_key_name.called + assert response.status_code == status.HTTP_200_OK + assert response.json() == expected_result + + @patch( "apps.labels.client.LabelsAPIClient.add_value", return_value=( diff --git a/engine/apps/api/urls.py b/engine/apps/api/urls.py index 46999cee..51cee7da 100644 --- a/engine/apps/api/urls.py +++ b/engine/apps/api/urls.py @@ -127,6 +127,11 @@ urlpatterns += [ LabelsViewSet.as_view({"get": "get_key", "put": "rename_key"}), name="get_update_key", ), + re_path( + r"^labels/name/(?P[\w\-]+)/?$", + LabelsViewSet.as_view({"get": "get_key_by_name"}), + name="get_key_by_name", + ), re_path( r"^labels/id/(?P[\w\-]+)/values/?$", LabelsViewSet.as_view({"post": "add_value"}), name="add_value" ), diff --git a/engine/apps/api/views/alert_receive_channel.py b/engine/apps/api/views/alert_receive_channel.py index 19c40a57..7c7ac102 100644 --- a/engine/apps/api/views/alert_receive_channel.py +++ b/engine/apps/api/views/alert_receive_channel.py @@ -312,7 +312,7 @@ class AlertReceiveChannelView( if instance is None: # pop extra fields so they are not passed to AlertReceiveChannel(**serializer.validated_data) serializer.validated_data.pop("create_default_webhooks", None) - IntegrationAlertGroupLabelsSerializer.pop_alert_group_labels(serializer.validated_data) + IntegrationAlertGroupLabelsSerializer._pop_alert_group_labels(serializer.validated_data) # create in-memory instance to test with the (possible) unsaved data instance = AlertReceiveChannel(**serializer.validated_data) diff --git a/engine/apps/api/views/labels.py b/engine/apps/api/views/labels.py index d27215f2..f290765b 100644 --- a/engine/apps/api/views/labels.py +++ b/engine/apps/api/views/labels.py @@ -17,6 +17,7 @@ from apps.api.serializers.labels import ( from apps.auth_token.auth import PluginAuthentication from apps.labels.client import LabelsAPIClient, LabelsRepoAPIException from apps.labels.tasks import update_instances_labels_cache, update_label_option_cache +from apps.labels.types import LabelOption from apps.labels.utils import is_labels_feature_enabled from common.api_helpers.exceptions import BadRequest @@ -44,6 +45,7 @@ class LabelsViewSet(LabelsFeatureFlagViewSet): "rename_value": [RBACPermission.Permissions.LABEL_WRITE], "get_keys": [RBACPermission.Permissions.LABEL_READ], "get_key": [RBACPermission.Permissions.LABEL_READ], + "get_key_by_name": [RBACPermission.Permissions.LABEL_READ], "get_value": [RBACPermission.Permissions.LABEL_READ], } @@ -66,6 +68,18 @@ class LabelsViewSet(LabelsFeatureFlagViewSet): self._update_labels_cache(label_option) return Response(label_option, status=response.status_code) + @extend_schema(responses=LabelOptionSerializer) + def get_key_by_name(self, request, key_name): + """ + get_key_by_name returns LabelOption – key with the list of values + """ + organization = self.request.auth.organization + label_option, response = LabelsAPIClient( + organization.grafana_url, + organization.api_token, + ).get_label_by_key_name(key_name) + return Response(label_option, status=response.status_code) + @extend_schema(responses=LabelValueSerializer) def get_value(self, request, key_id, value_id): """get_value returns a Value""" @@ -133,7 +147,7 @@ class LabelsViewSet(LabelsFeatureFlagViewSet): self._update_labels_cache(label_option) return Response(label_option, status=status) - def _update_labels_cache(self, label_option): + def _update_labels_cache(self, label_option: LabelOption): if not label_option: return serializer = LabelOptionSerializer(data=label_option) diff --git a/engine/apps/labels/alert_group_labels.py b/engine/apps/labels/alert_group_labels.py index 70dafead..8271d161 100644 --- a/engine/apps/labels/alert_group_labels.py +++ b/engine/apps/labels/alert_group_labels.py @@ -19,9 +19,14 @@ LABEL_VALUE_TYPES = (str, int, float, bool) MAX_LABELS_PER_ALERT_GROUP = 15 -def gather_labels_from_alert_receive_channel_and_raw_request_data( +def gather_alert_labels( alert_receive_channel: "AlertReceiveChannel", raw_request_data: "Alert.RawRequestData" ) -> typing.Optional[types.AlertLabels]: + """ + gather_alert_labels gathers labels for an alert received by the alert receive channel. + 1. static labels - inherits them from integration. + 2. dynamic labels and multi-label extraction template – templating the raw_request_data. + """ if not is_labels_feature_enabled(alert_receive_channel.organization): return None @@ -37,7 +42,7 @@ def gather_labels_from_alert_receive_channel_and_raw_request_data( return labels -def assign_labels( +def save_alert_group_labels( alert_group: "AlertGroup", alert_receive_channel: "AlertReceiveChannel", labels: typing.Optional[types.AlertLabels] ) -> None: from apps.labels.models import AlertGroupAssociatedLabel diff --git a/engine/apps/labels/client.py b/engine/apps/labels/client.py index 3310694d..645a09af 100644 --- a/engine/apps/labels/client.py +++ b/engine/apps/labels/client.py @@ -65,6 +65,15 @@ class LabelsAPIClient: self._check_response(response) return response.json(), response + def get_label_by_key_name( + self, key_name: str + ) -> typing.Tuple[typing.Optional["LabelOption"], requests.models.Response]: + url = urljoin(self.api_url, f"name/{key_name}") + + response = requests.get(url, timeout=TIMEOUT, headers=self._request_headers) + self._check_response(response) + return response.json(), response + def get_value( self, key_id: str, value_id: str ) -> typing.Tuple[typing.Optional["LabelValue"], requests.models.Response]: diff --git a/engine/apps/labels/0007_remove_alertreceivechannelassociatedlabel_inheritable_db.py b/engine/apps/labels/migrations/0007_remove_alertreceivechannelassociatedlabel_inheritable_db.py similarity index 78% rename from engine/apps/labels/0007_remove_alertreceivechannelassociatedlabel_inheritable_db.py rename to engine/apps/labels/migrations/0007_remove_alertreceivechannelassociatedlabel_inheritable_db.py index 91504bd9..aec1e6d7 100644 --- a/engine/apps/labels/0007_remove_alertreceivechannelassociatedlabel_inheritable_db.py +++ b/engine/apps/labels/migrations/0007_remove_alertreceivechannelassociatedlabel_inheritable_db.py @@ -1,8 +1,7 @@ -# TODO: MOVE IT TO /migrations DIRECTORY IN FUTURE RELEASE - # Generated by Django 4.2.15 on 2024-11-26 13:37 from django.db import migrations +import django_migration_linter as linter import common.migrations.remove_field @@ -13,9 +12,10 @@ class Migration(migrations.Migration): ] operations = [ + linter.IgnoreMigration(), common.migrations.remove_field.RemoveFieldDB( model_name="AlertReceiveChannelAssociatedLabel", name="inheritable", - remove_state_migration=("labels", "0007_remove_alertreceivechannelassociatedlabel_inheritable_state"), + remove_state_migration=("labels", "0006_remove_alertreceivechannelassociatedlabel_inheritable_state"), ), ] diff --git a/engine/apps/labels/models.py b/engine/apps/labels/models.py index ecd06c26..53a60ce3 100644 --- a/engine/apps/labels/models.py +++ b/engine/apps/labels/models.py @@ -1,8 +1,10 @@ +import logging import typing from django.db import models from django.utils import timezone +from apps.labels.client import LabelsAPIClient, LabelsRepoAPIException from apps.labels.tasks import update_label_pairs_cache from apps.labels.types import LabelPair from apps.labels.utils import LABEL_OUTDATED_TIMEOUT_MINUTES @@ -10,6 +12,7 @@ from apps.labels.utils import LABEL_OUTDATED_TIMEOUT_MINUTES if typing.TYPE_CHECKING: from apps.user_management.models import Organization +logger = logging.getLogger(__name__) MAX_KEY_NAME_LENGTH = 200 MAX_VALUE_NAME_LENGTH = 200 @@ -26,6 +29,36 @@ class LabelKeyCache(models.Model): def is_outdated(self) -> bool: return timezone.now() - self.last_synced > timezone.timedelta(minutes=LABEL_OUTDATED_TIMEOUT_MINUTES) + @classmethod + def get_or_create_by_name(cls, organization: "Organization", key_name: str) -> typing.Optional["LabelKeyCache"]: + """ + `get_or_create_by_name` tries to get label key with provided name from cache. + If there is no label key with this name in the cache - it tries to fetch it from the labels repo API. + """ + label_key = cls.objects.filter(organization=organization, name=key_name).first() + if label_key: + return label_key + + # fetch label key from labels repo + try: + label, _ = LabelsAPIClient(organization.grafana_url, organization.api_token).get_label_by_key_name( + label_key + ) + except LabelsRepoAPIException as e: + logger.error(f"Failed to get or create label key {key_name} for organization {organization.id}: {e}") + return None + + # save labels key in cache + label_key = LabelKeyCache( + id=label["key"]["id"], + name=label["key"]["name"], + organization=organization, + prescribed=label["key"]["prescribed"], + ) + label_key.save() + + return label_key + class LabelValueCache(models.Model): id = models.CharField(primary_key=True, editable=False, max_length=36) diff --git a/engine/apps/labels/tasks.py b/engine/apps/labels/tasks.py index 9ed1147f..89a3fc00 100644 --- a/engine/apps/labels/tasks.py +++ b/engine/apps/labels/tasks.py @@ -8,12 +8,13 @@ from django.utils import timezone from apps.labels.client import LabelsAPIClient, LabelsRepoAPIException from apps.labels.types import LabelOption, LabelPair from apps.labels.utils import LABEL_OUTDATED_TIMEOUT_MINUTES, get_associating_label_model -from apps.user_management.models import Organization from common.custom_celery_tasks import shared_dedicated_queue_retry_task logger = get_task_logger(__name__) logger.setLevel(logging.DEBUG) +MAX_RETRIES = 1 if settings.DEBUG else 10 + class KVPair(typing.TypedDict): value_name: str @@ -129,11 +130,10 @@ def _update_labels_cache(values_id_to_pair: typing.Dict[str, LabelPair]): LabelValueCache.objects.bulk_update(values, fields=["name", "last_synced", "prescribed"]) -@shared_dedicated_queue_retry_task( - autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else 10 -) +@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True, max_retries=MAX_RETRIES) def update_instances_labels_cache(organization_id: int, instance_ids: typing.List[int], instance_model_name: str): from apps.labels.models import LabelValueCache + from apps.user_management.models import Organization now = timezone.now() organization = Organization.objects.get(id=organization_id) @@ -162,3 +162,69 @@ def update_instances_labels_cache(organization_id: int, instance_ids: typing.Lis continue if label_option: update_label_option_cache.apply_async((label_option,)) + + +@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True, max_retries=MAX_RETRIES) +def add_service_label_for_alerting_integrations(): + """ + This task should be called manually and only once. + Starts tasks that add `service_name` dynamic label for Alerting integrations + """ + + from apps.alerts.models import AlertReceiveChannel + + organization_ids = ( + AlertReceiveChannel.objects.filter( + integration=AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING, + organization__is_grafana_labels_enabled=True, + organization__deleted_at__isnull=True, + ) + .values_list("organization", flat=True) + .distinct() + ) + + for idx, organization_id in enumerate(organization_ids): + countdown = idx // 10 + add_service_label_per_org.apply_async((organization_id,), countdown=countdown) + + +@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True, max_retries=MAX_RETRIES) +def add_service_label_per_org(organization_id: int): + """Add `service_name` dynamic label for all Alerting integrations per organization""" + + from apps.alerts.models import AlertReceiveChannel + from apps.user_management.models import Organization + + organization = Organization.objects.get(id=organization_id) + service_label_custom = AlertReceiveChannel._build_service_name_label_custom(organization) + if not service_label_custom: + return + integrations = AlertReceiveChannel.objects.filter( + integration=AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING, + organization=organization, + ) + integrations_to_update = [] + # add service label to integration custom labels if it's not already there + for integration in integrations: + dynamic_service_label_exists = False + dynamic_labels = integration.alert_group_labels_custom if integration.alert_group_labels_custom else [] + for label in dynamic_labels: + if label[0] == service_label_custom[0]: + dynamic_service_label_exists = True + break + if dynamic_service_label_exists: + continue + integration.alert_group_labels_custom = [service_label_custom] + dynamic_labels + integrations_to_update.append(integration) + + AlertReceiveChannel.objects.bulk_update(integrations_to_update, fields=["alert_group_labels_custom"]) + + +@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True, max_retries=MAX_RETRIES) +def add_service_label_for_integration(alert_receive_channel_id: int): + """Add `service_name` dynamic label for Alerting integration""" + + from apps.alerts.models import AlertReceiveChannel + + alert_receive_channel = AlertReceiveChannel.objects.get(id=alert_receive_channel_id) + alert_receive_channel.create_service_name_dynamic_label(True) diff --git a/engine/apps/labels/tests/test_add_service_label.py b/engine/apps/labels/tests/test_add_service_label.py new file mode 100644 index 00000000..9fae32fd --- /dev/null +++ b/engine/apps/labels/tests/test_add_service_label.py @@ -0,0 +1,40 @@ +import pytest + +from apps.alerts.constants import SERVICE_LABEL, SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION +from apps.alerts.models import AlertReceiveChannel +from apps.labels.tasks import add_service_label_per_org + + +@pytest.mark.django_db +def test_add_service_label_per_org(make_organization, make_alert_receive_channel, make_label_key): + organization = make_organization() + alert_receive_channel_alerting_no_labels = make_alert_receive_channel( + organization=organization, integration=AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING + ) + alert_receive_channel_alerting_with_label = make_alert_receive_channel( + organization=organization, + integration=AlertReceiveChannel.INTEGRATION_GRAFANA_ALERTING, + alert_group_labels_custom=[["test", None, "test_template"]], + ) + alert_receive_channel_grafana = make_alert_receive_channel( + organization=organization, integration=AlertReceiveChannel.INTEGRATION_GRAFANA + ) + service_name_label_key = make_label_key(organization, key_id="service_label_id", key_name=SERVICE_LABEL) + + expected_service_name_label = [service_name_label_key.id, None, SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION] + + add_service_label_per_org(organization.id) + + for alert_receive_channel in [ + alert_receive_channel_alerting_no_labels, + alert_receive_channel_alerting_with_label, + alert_receive_channel_grafana, + ]: + alert_receive_channel.refresh_from_db() + + assert alert_receive_channel_alerting_no_labels.alert_group_labels_custom == [expected_service_name_label] + assert alert_receive_channel_alerting_with_label.alert_group_labels_custom == [ + expected_service_name_label, + ["test", None, "test_template"], + ] + assert alert_receive_channel_grafana.alert_group_labels_custom is None diff --git a/engine/apps/labels/tests/test_labels_cache.py b/engine/apps/labels/tests/test_labels_cache.py index 870c76b9..f8653375 100644 --- a/engine/apps/labels/tests/test_labels_cache.py +++ b/engine/apps/labels/tests/test_labels_cache.py @@ -3,7 +3,7 @@ from unittest.mock import call, patch import pytest from django.utils import timezone -from apps.labels.client import LabelsRepoAPIException +from apps.labels.client import LabelsAPIClient, LabelsRepoAPIException from apps.labels.models import LabelKeyCache, LabelValueCache from apps.labels.tasks import update_instances_labels_cache, update_labels_cache from apps.labels.utils import LABEL_OUTDATED_TIMEOUT_MINUTES @@ -158,3 +158,27 @@ def test_update_instances_labels_cache_error(make_organization, make_alert_recei ) mock_get_label_by_key_id.assert_called_once_with(label_association.key_id) mock_update_cache.assert_not_called() + + +@pytest.mark.django_db +def test_get_or_create_label_key_cache_by_name(make_organization): + organization = make_organization() + label_key_data = {"id": "testid", "name": "testname", "prescribed": False} + + # test label does not exist in labels repo + with patch.object(LabelsAPIClient, "get_label_by_key_name", side_effect=LabelsRepoAPIException("test", "test")): + label = LabelKeyCache.get_or_create_by_name(organization, label_key_data["name"]) + + assert label is None + + # test label does not exist in cache + with patch.object(LabelsAPIClient, "get_label_by_key_name", return_value=({"key": label_key_data}, None)): + label = LabelKeyCache.get_or_create_by_name(organization, label_key_data["name"]) + + assert label is not None + assert LabelKeyCache.objects.filter(id=label.id).exists() + + # test label exists in cache + label = LabelKeyCache.get_or_create_by_name(organization, label_key_data["name"]) + assert label is not None + assert LabelKeyCache.objects.filter(id=label.id).exists() diff --git a/engine/apps/public_api/serializers/integrations.py b/engine/apps/public_api/serializers/integrations.py index 704c7660..e35e588d 100644 --- a/engine/apps/public_api/serializers/integrations.py +++ b/engine/apps/public_api/serializers/integrations.py @@ -123,6 +123,7 @@ class IntegrationSerializer(EagerLoadingMixin, serializers.ModelSerializer, Main connection_error = GrafanaAlertingSyncManager.check_for_connection_errors(organization) if connection_error: raise serializers.ValidationError(connection_error) + validated_data = self._add_service_label_if_needed(organization, validated_data) user = self.context["request"].user with transaction.atomic(): try: @@ -140,6 +141,8 @@ class IntegrationSerializer(EagerLoadingMixin, serializers.ModelSerializer, Main ) serializer.is_valid(raise_exception=True) serializer.save() + # Create default service_name label + instance.create_service_name_dynamic_label() return instance def update(self, *args, **kwargs): diff --git a/engine/settings/celery_task_routes.py b/engine/settings/celery_task_routes.py index fff08a2a..de222f0a 100644 --- a/engine/settings/celery_task_routes.py +++ b/engine/settings/celery_task_routes.py @@ -17,6 +17,9 @@ CELERY_TASK_ROUTES = { "apps.labels.tasks.update_instances_labels_cache": {"queue": "default"}, "apps.labels.tasks.update_label_option_cache": {"queue": "default"}, "apps.labels.tasks.update_label_pairs_cache": {"queue": "default"}, + "apps.labels.tasks.add_service_label_for_alerting_integrations": {"queue": "default"}, + "apps.labels.tasks.add_service_label_per_org": {"queue": "default"}, + "apps.labels.tasks.add_service_label_for_integration": {"queue": "default"}, "apps.metrics_exporter.tasks.start_calculate_and_cache_metrics": {"queue": "default"}, "apps.metrics_exporter.tasks.update_metrics_for_alert_group": {"queue": "default"}, "apps.metrics_exporter.tasks.update_metrics_for_user": {"queue": "default"}, From 7287367646f79566d167ce91dae3af62b6be04eb Mon Sep 17 00:00:00 2001 From: Yulya Artyukhina Date: Tue, 14 Jan 2025 12:21:52 +0100 Subject: [PATCH 06/13] Fix typo (#5413) # What this PR does fix typo ## Which issue(s) this PR closes Related to [issue link here] ## Checklist - [ ] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] Added the relevant release notes label (see labels prefixed w/ `release:`). These labels dictate how your PR will show up in the autogenerated release notes. --- engine/apps/alerts/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/apps/alerts/constants.py b/engine/apps/alerts/constants.py index c91cabfa..637f02a0 100644 --- a/engine/apps/alerts/constants.py +++ b/engine/apps/alerts/constants.py @@ -28,4 +28,4 @@ class AlertGroupState(str, Enum): SERVICE_LABEL = "service_name" -SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION = "{{ payload.common_labels.service_name }}" +SERVICE_LABEL_TEMPLATE_FOR_ALERTING_INTEGRATION = "{{ payload.commonLabels.service_name }}" From dcb37417efc67240deebd76bce8d415505cb9668 Mon Sep 17 00:00:00 2001 From: Yulya Artyukhina Date: Tue, 14 Jan 2025 14:40:32 +0100 Subject: [PATCH 07/13] Fix getting label by name (#5414) # What this PR does Handle JSONDecodeError on getting label key by name ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] Added the relevant release notes label (see labels prefixed w/ `release:`). These labels dictate how your PR will show up in the autogenerated release notes. --- engine/apps/labels/models.py | 3 ++- engine/apps/labels/tests/test_labels_cache.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/engine/apps/labels/models.py b/engine/apps/labels/models.py index 53a60ce3..ae81d0db 100644 --- a/engine/apps/labels/models.py +++ b/engine/apps/labels/models.py @@ -1,5 +1,6 @@ import logging import typing +from json import JSONDecodeError from django.db import models from django.utils import timezone @@ -44,7 +45,7 @@ class LabelKeyCache(models.Model): label, _ = LabelsAPIClient(organization.grafana_url, organization.api_token).get_label_by_key_name( label_key ) - except LabelsRepoAPIException as e: + except (LabelsRepoAPIException, JSONDecodeError) as e: logger.error(f"Failed to get or create label key {key_name} for organization {organization.id}: {e}") return None diff --git a/engine/apps/labels/tests/test_labels_cache.py b/engine/apps/labels/tests/test_labels_cache.py index f8653375..531b2a8f 100644 --- a/engine/apps/labels/tests/test_labels_cache.py +++ b/engine/apps/labels/tests/test_labels_cache.py @@ -1,3 +1,4 @@ +from json import JSONDecodeError from unittest.mock import call, patch import pytest @@ -165,6 +166,12 @@ def test_get_or_create_label_key_cache_by_name(make_organization): organization = make_organization() label_key_data = {"id": "testid", "name": "testname", "prescribed": False} + # test empty response from label repo (json decode error) + with patch.object(LabelsAPIClient, "get_label_by_key_name", side_effect=JSONDecodeError("test", "test", 0)): + label = LabelKeyCache.get_or_create_by_name(organization, label_key_data["name"]) + + assert label is None + # test label does not exist in labels repo with patch.object(LabelsAPIClient, "get_label_by_key_name", side_effect=LabelsRepoAPIException("test", "test")): label = LabelKeyCache.get_or_create_by_name(organization, label_key_data["name"]) From 4c92826c2625e24e3e367ca0af39f249ed1298ee Mon Sep 17 00:00:00 2001 From: Matias Bordese Date: Thu, 16 Jan 2025 09:19:16 -0300 Subject: [PATCH 08/13] chore: update schedule checks notification period and improve wording (#5412) Related to https://github.com/grafana/oncall-private/issues/2994 - Extend gaps/empty shift checks to consider 30 days (customizable via param, eventually make it customizable per schedule?); ie. every week (per beat schedule), check the schedule next 30 days - Trigger checks via async task on schedule API updates (instead of a sync call) - Update notifications wording / link to schedule --- engine/apps/api/serializers/schedule_base.py | 13 +++++++--- .../apps/api/serializers/schedule_calendar.py | 8 ++++-- engine/apps/api/serializers/schedule_ical.py | 3 ++- engine/apps/api/serializers/schedule_web.py | 8 ++++-- engine/apps/api/views/schedule.py | 2 +- engine/apps/schedules/constants.py | 1 + .../apps/schedules/models/on_call_schedule.py | 13 +++++----- .../tasks/check_gaps_and_empty_shifts.py | 2 +- .../notify_about_empty_shifts_in_schedule.py | 9 +++---- .../tasks/notify_about_gaps_in_schedule.py | 9 +++---- .../tests/test_check_gaps_and_empty_shifts.py | 25 ++++++++++--------- .../test_notify_about_gaps_in_schedule.py | 7 +++--- 12 files changed, 58 insertions(+), 42 deletions(-) diff --git a/engine/apps/api/serializers/schedule_base.py b/engine/apps/api/serializers/schedule_base.py index 242d6b1a..441180d8 100644 --- a/engine/apps/api/serializers/schedule_base.py +++ b/engine/apps/api/serializers/schedule_base.py @@ -2,8 +2,13 @@ from rest_framework import serializers from apps.api.serializers.slack_channel import SlackChannelSerializer from apps.api.serializers.user_group import UserGroupSerializer +from apps.schedules.constants import SCHEDULE_CHECK_NEXT_DAYS from apps.schedules.models import OnCallSchedule -from apps.schedules.tasks import schedule_notify_about_empty_shifts_in_schedule, schedule_notify_about_gaps_in_schedule +from apps.schedules.tasks import ( + check_gaps_and_empty_shifts_in_schedule, + schedule_notify_about_empty_shifts_in_schedule, + schedule_notify_about_gaps_in_schedule, +) from common.api_helpers.custom_fields import TeamPrimaryKeyRelatedField from common.api_helpers.mixins import EagerLoadingMixin from common.api_helpers.utils import CurrentOrganizationDefault @@ -44,8 +49,8 @@ class ScheduleBaseSerializer(EagerLoadingMixin, serializers.ModelSerializer): "Cannot update the user group, make sure to grant user group modification rights to " "non-admin users in Slack workspace settings" ) - SCHEDULE_HAS_GAPS_WARNING = "Schedule has unassigned time periods during next 7 days" - SCHEDULE_HAS_EMPTY_SHIFTS_WARNING = "Schedule has empty shifts during next 7 days" + SCHEDULE_HAS_GAPS_WARNING = f"Schedule has unassigned time periods during next {SCHEDULE_CHECK_NEXT_DAYS} days" + SCHEDULE_HAS_EMPTY_SHIFTS_WARNING = f"Schedule has empty shifts during next {SCHEDULE_CHECK_NEXT_DAYS} days" def get_warnings(self, obj): can_update_user_groups = self.context.get("can_update_user_groups", False) @@ -81,7 +86,7 @@ class ScheduleBaseSerializer(EagerLoadingMixin, serializers.ModelSerializer): def create(self, validated_data): created_schedule = super().create(validated_data) - created_schedule.check_gaps_and_empty_shifts_for_next_week() + check_gaps_and_empty_shifts_in_schedule.apply_async((created_schedule.pk,)) schedule_notify_about_empty_shifts_in_schedule.apply_async((created_schedule.pk,)) schedule_notify_about_gaps_in_schedule.apply_async((created_schedule.pk,)) return created_schedule diff --git a/engine/apps/api/serializers/schedule_calendar.py b/engine/apps/api/serializers/schedule_calendar.py index d6c35ea4..69f962c1 100644 --- a/engine/apps/api/serializers/schedule_calendar.py +++ b/engine/apps/api/serializers/schedule_calendar.py @@ -2,7 +2,11 @@ from rest_framework import serializers from apps.api.serializers.schedule_base import ScheduleBaseSerializer from apps.schedules.models import OnCallScheduleCalendar -from apps.schedules.tasks import schedule_notify_about_empty_shifts_in_schedule, schedule_notify_about_gaps_in_schedule +from apps.schedules.tasks import ( + check_gaps_and_empty_shifts_in_schedule, + schedule_notify_about_empty_shifts_in_schedule, + schedule_notify_about_gaps_in_schedule, +) from apps.slack.models import SlackChannel, SlackUserGroup from common.api_helpers.custom_fields import OrganizationFilteredPrimaryKeyRelatedField, TimeZoneField from common.api_helpers.utils import validate_ical_url @@ -58,7 +62,7 @@ class ScheduleCalendarCreateSerializer(ScheduleCalendarSerializer): or old_enable_web_overrides != updated_enable_web_overrides ): updated_schedule.drop_cached_ical() - updated_schedule.check_gaps_and_empty_shifts_for_next_week() + check_gaps_and_empty_shifts_in_schedule.apply_async((instance.pk,)) schedule_notify_about_empty_shifts_in_schedule.apply_async((instance.pk,)) schedule_notify_about_gaps_in_schedule.apply_async((instance.pk,)) return updated_schedule diff --git a/engine/apps/api/serializers/schedule_ical.py b/engine/apps/api/serializers/schedule_ical.py index 4fb1b9f2..21315ece 100644 --- a/engine/apps/api/serializers/schedule_ical.py +++ b/engine/apps/api/serializers/schedule_ical.py @@ -1,6 +1,7 @@ from apps.api.serializers.schedule_base import ScheduleBaseSerializer from apps.schedules.models import OnCallScheduleICal from apps.schedules.tasks import ( + check_gaps_and_empty_shifts_in_schedule, refresh_ical_final_schedule, schedule_notify_about_empty_shifts_in_schedule, schedule_notify_about_gaps_in_schedule, @@ -87,7 +88,7 @@ class ScheduleICalUpdateSerializer(ScheduleICalCreateSerializer): if old_ical_url_primary != updated_ical_url_primary or old_ical_url_overrides != updated_ical_url_overrides: updated_schedule.drop_cached_ical() - updated_schedule.check_gaps_and_empty_shifts_for_next_week() + check_gaps_and_empty_shifts_in_schedule.apply_async((instance.pk,)) schedule_notify_about_empty_shifts_in_schedule.apply_async((instance.pk,)) schedule_notify_about_gaps_in_schedule.apply_async((instance.pk,)) # for iCal-based schedules we need to refresh final schedule information diff --git a/engine/apps/api/serializers/schedule_web.py b/engine/apps/api/serializers/schedule_web.py index 3a503041..e8aea128 100644 --- a/engine/apps/api/serializers/schedule_web.py +++ b/engine/apps/api/serializers/schedule_web.py @@ -1,6 +1,10 @@ from apps.api.serializers.schedule_base import ScheduleBaseSerializer from apps.schedules.models import OnCallScheduleWeb -from apps.schedules.tasks import schedule_notify_about_empty_shifts_in_schedule, schedule_notify_about_gaps_in_schedule +from apps.schedules.tasks import ( + check_gaps_and_empty_shifts_in_schedule, + schedule_notify_about_empty_shifts_in_schedule, + schedule_notify_about_gaps_in_schedule, +) from apps.slack.models import SlackChannel, SlackUserGroup from common.api_helpers.custom_fields import OrganizationFilteredPrimaryKeyRelatedField, TimeZoneField @@ -41,7 +45,7 @@ class ScheduleWebCreateSerializer(ScheduleWebSerializer): updated_time_zone = updated_schedule.time_zone if old_time_zone != updated_time_zone: updated_schedule.drop_cached_ical() - updated_schedule.check_gaps_and_empty_shifts_for_next_week() + check_gaps_and_empty_shifts_in_schedule.apply_async((instance.pk,)) schedule_notify_about_empty_shifts_in_schedule.apply_async((instance.pk,)) schedule_notify_about_gaps_in_schedule.apply_async((instance.pk,)) return updated_schedule diff --git a/engine/apps/api/views/schedule.py b/engine/apps/api/views/schedule.py index e30aa8cb..2d816211 100644 --- a/engine/apps/api/views/schedule.py +++ b/engine/apps/api/views/schedule.py @@ -481,7 +481,7 @@ class ScheduleView( def reload_ical(self, request, pk): schedule = self.get_object(annotate=False) schedule.drop_cached_ical() - schedule.check_gaps_and_empty_shifts_for_next_week() + schedule.check_gaps_and_empty_shifts_for_next_days() if schedule.user_group is not None: update_slack_user_group_for_schedules.apply_async((schedule.user_group.pk,)) diff --git a/engine/apps/schedules/constants.py b/engine/apps/schedules/constants.py index 3f8f556e..26192c58 100644 --- a/engine/apps/schedules/constants.py +++ b/engine/apps/schedules/constants.py @@ -29,5 +29,6 @@ EXPORT_WINDOW_DAYS_BEFORE = 15 SCHEDULE_ONCALL_CACHE_KEY_PREFIX = "schedule_oncall_users_" SCHEDULE_ONCALL_CACHE_TTL = 15 * 60 # 15 minutes in seconds +SCHEDULE_CHECK_NEXT_DAYS = 30 PREFETCHED_SHIFT_SWAPS = "prefetched_shift_swaps" diff --git a/engine/apps/schedules/models/on_call_schedule.py b/engine/apps/schedules/models/on_call_schedule.py index e57cf4bc..691d172b 100644 --- a/engine/apps/schedules/models/on_call_schedule.py +++ b/engine/apps/schedules/models/on_call_schedule.py @@ -33,6 +33,7 @@ from apps.schedules.constants import ( ICAL_SUMMARY, ICAL_UID, PREFETCHED_SHIFT_SWAPS, + SCHEDULE_CHECK_NEXT_DAYS, ) from apps.schedules.ical_utils import ( EmptyShifts, @@ -293,9 +294,9 @@ class OnCallSchedule(PolymorphicModel): (self.prev_ical_file_overrides, self.cached_ical_file_overrides), ] - def check_gaps_and_empty_shifts_for_next_week(self) -> None: + def check_gaps_and_empty_shifts_for_next_days(self, days=SCHEDULE_CHECK_NEXT_DAYS) -> None: datetime_start = timezone.now() - datetime_end = datetime_start + datetime.timedelta(days=7) + datetime_end = datetime_start + datetime.timedelta(days=days) # get empty shifts from all events and gaps from final events events = self.filter_events( @@ -313,14 +314,14 @@ class OnCallSchedule(PolymorphicModel): self.has_empty_shifts = has_empty_shifts self.save(update_fields=["has_gaps", "has_empty_shifts"]) - def get_gaps_for_next_week(self) -> ScheduleEvents: + def get_gaps_for_next_days(self, days=SCHEDULE_CHECK_NEXT_DAYS) -> ScheduleEvents: today = timezone.now() - events = self.final_events(today, today + datetime.timedelta(days=7)) + events = self.final_events(today, today + datetime.timedelta(days=days)) return [event for event in events if event["is_gap"]] - def get_empty_shifts_for_next_week(self) -> EmptyShifts: + def get_empty_shifts_for_next_days(self, days=SCHEDULE_CHECK_NEXT_DAYS) -> EmptyShifts: today = timezone.now().date() - return list_of_empty_shifts_in_schedule(self, today, today + datetime.timedelta(days=7)) + return list_of_empty_shifts_in_schedule(self, today, today + datetime.timedelta(days=days)) def drop_cached_ical(self): self._drop_primary_ical_file() diff --git a/engine/apps/schedules/tasks/check_gaps_and_empty_shifts.py b/engine/apps/schedules/tasks/check_gaps_and_empty_shifts.py index 0a05471a..47042029 100644 --- a/engine/apps/schedules/tasks/check_gaps_and_empty_shifts.py +++ b/engine/apps/schedules/tasks/check_gaps_and_empty_shifts.py @@ -19,5 +19,5 @@ def check_gaps_and_empty_shifts_in_schedule(schedule_pk): task_logger.info(f"Tried to check_gaps_and_empty_shifts_in_schedule for non-existing schedule {schedule_pk}") return - schedule.check_gaps_and_empty_shifts_for_next_week() + schedule.check_gaps_and_empty_shifts_for_next_days() task_logger.info(f"Finish check_gaps_and_empty_shifts_in_schedule {schedule_pk}") diff --git a/engine/apps/schedules/tasks/notify_about_empty_shifts_in_schedule.py b/engine/apps/schedules/tasks/notify_about_empty_shifts_in_schedule.py index ebe68eda..aba334b4 100644 --- a/engine/apps/schedules/tasks/notify_about_empty_shifts_in_schedule.py +++ b/engine/apps/schedules/tasks/notify_about_empty_shifts_in_schedule.py @@ -48,15 +48,15 @@ def notify_about_empty_shifts_in_schedule_task(schedule_pk): task_logger.info(f"Tried to notify_about_empty_shifts_in_schedule_task for non-existing schedule {schedule_pk}") return - empty_shifts = schedule.get_empty_shifts_for_next_week() + empty_shifts = schedule.get_empty_shifts_for_next_days() schedule.empty_shifts_report_sent_at = timezone.now().date() if len(empty_shifts) != 0: schedule.has_empty_shifts = True text = ( - f'Tried to parse schedule *"{schedule.name}"* and found events without associated users.\n' - f"To ensure you don't miss any notifications, use a Grafana username as the event name in the calendar. " - f"The user should have Editor or Admin access.\n\n" + f"Reviewing *{schedule.slack_url}* on-call schedule found events without valid associated users.\n" + f"To ensure you don't miss any notifications, make sure user exists (or you provided a valid Grafana username). " + f"The user should have the right permissions, or be an Editor or Admin.\n\n" ) for idx, empty_shift in enumerate(empty_shifts): start_timestamp = empty_shift.start.astimezone(pytz.UTC).timestamp() @@ -80,7 +80,6 @@ def notify_about_empty_shifts_in_schedule_task(schedule_pk): text += '*All-day* event in "UTC" TZ\n' else: text += f"From {format_datetime_to_slack_with_time(start_timestamp)} to {format_datetime_to_slack_with_time(end_timestamp)} (your TZ)\n" - text += f"_From {OnCallSchedule.CALENDAR_TYPE_VERBAL[empty_shift.calendar_type]} calendar_\n" if idx != len(empty_shifts) - 1: text += "\n\n" post_message_to_channel(schedule.organization, schedule.slack_channel_slack_id, text) diff --git a/engine/apps/schedules/tasks/notify_about_gaps_in_schedule.py b/engine/apps/schedules/tasks/notify_about_gaps_in_schedule.py index 7a8b2c67..8bc00135 100644 --- a/engine/apps/schedules/tasks/notify_about_gaps_in_schedule.py +++ b/engine/apps/schedules/tasks/notify_about_gaps_in_schedule.py @@ -48,13 +48,13 @@ def notify_about_gaps_in_schedule_task(schedule_pk): task_logger.info(f"Tried to notify_about_gaps_in_schedule_task for non-existing schedule {schedule_pk}") return - gaps = schedule.get_gaps_for_next_week() + gaps = schedule.get_gaps_for_next_days() schedule.gaps_report_sent_at = timezone.now().date() if len(gaps) != 0: schedule.has_gaps = True - text = f"There are time periods that are unassigned in *{schedule.name}* on-call schedule.\n" - for idx, gap in enumerate(gaps): + text = f"There are time periods that are unassigned in *{schedule.slack_url}* on-call schedule.\n" + for gap in gaps: if gap["start"]: start_verbal = format_datetime_to_slack_with_time(gap["start"].astimezone(pytz.UTC).timestamp()) else: @@ -64,8 +64,7 @@ def notify_about_gaps_in_schedule_task(schedule_pk): else: end_verbal = "..." text += f"From {start_verbal} to {end_verbal} (your TZ)\n" - if idx != len(gaps) - 1: - text += "\n\n" + text += "\n\n" post_message_to_channel(schedule.organization, schedule.slack_channel_slack_id, text) else: schedule.has_gaps = False diff --git a/engine/apps/schedules/tests/test_check_gaps_and_empty_shifts.py b/engine/apps/schedules/tests/test_check_gaps_and_empty_shifts.py index 63ca428a..b5461771 100644 --- a/engine/apps/schedules/tests/test_check_gaps_and_empty_shifts.py +++ b/engine/apps/schedules/tests/test_check_gaps_and_empty_shifts.py @@ -4,6 +4,7 @@ import pytest from django.utils import timezone from apps.api.permissions import LegacyAccessControlRole +from apps.schedules.constants import SCHEDULE_CHECK_NEXT_DAYS from apps.schedules.models import CustomOnCallShift, OnCallScheduleWeb @@ -34,7 +35,7 @@ def test_no_empty_shifts_no_gaps( ) on_call_shift.add_rolling_users([[user1]]) schedule.refresh_ical_file() - schedule.check_gaps_and_empty_shifts_for_next_week() + schedule.check_gaps_and_empty_shifts_for_next_days() schedule.refresh_from_db() assert schedule.has_gaps is False @@ -73,7 +74,7 @@ def test_no_empty_shifts_but_gaps_now( assert schedule.has_gaps is False assert schedule.has_empty_shifts is False - schedule.check_gaps_and_empty_shifts_for_next_week() + schedule.check_gaps_and_empty_shifts_for_next_days() schedule.refresh_from_db() assert schedule.has_gaps is True @@ -111,7 +112,7 @@ def test_empty_shifts_no_gaps( assert schedule.has_gaps is False assert schedule.has_empty_shifts is False - schedule.check_gaps_and_empty_shifts_for_next_week() + schedule.check_gaps_and_empty_shifts_for_next_days() schedule.refresh_from_db() assert schedule.has_gaps is False @@ -150,7 +151,7 @@ def test_empty_shifts_and_gaps( assert schedule.has_gaps is False assert schedule.has_empty_shifts is False - schedule.check_gaps_and_empty_shifts_for_next_week() + schedule.check_gaps_and_empty_shifts_for_next_days() schedule.refresh_from_db() assert schedule.has_gaps is True @@ -206,7 +207,7 @@ def test_empty_shifts_and_gaps_in_the_past( assert schedule.has_gaps is False assert schedule.has_empty_shifts is False - schedule.check_gaps_and_empty_shifts_for_next_week() + schedule.check_gaps_and_empty_shifts_for_next_days() schedule.refresh_from_db() assert schedule.has_gaps is False @@ -225,9 +226,9 @@ def test_empty_shifts_and_gaps_in_the_future( user2 = make_user(organization=organization, username="user2", role=LegacyAccessControlRole.ADMIN) schedule = make_schedule(organization, schedule_class=OnCallScheduleWeb, name="test_schedule") - # empty shift with gaps starts in 7 days 1 min + # empty shift with gaps starts in SCHEDULE_CHECK_NEXT_DAYS days 1 min now = timezone.now().replace(microsecond=0) - start_date = now + datetime.timedelta(days=7, minutes=1) + start_date = now + datetime.timedelta(days=SCHEDULE_CHECK_NEXT_DAYS, minutes=1) data = { "start": start_date, "rotation_start": start_date, @@ -241,9 +242,9 @@ def test_empty_shifts_and_gaps_in_the_future( organization=organization, shift_type=CustomOnCallShift.TYPE_ROLLING_USERS_EVENT, **data ) on_call_shift.add_rolling_users([[user1]]) - # normal shift ends in 7 days 1 min - start_date2 = now - datetime.timedelta(days=7, minutes=1) - until = now + datetime.timedelta(days=7, minutes=1) + # normal shift ends in SCHEDULE_CHECK_NEXT_DAYS days 1 min + start_date2 = now - datetime.timedelta(days=SCHEDULE_CHECK_NEXT_DAYS, minutes=1) + until = now + datetime.timedelta(days=SCHEDULE_CHECK_NEXT_DAYS, minutes=1) data2 = { "start": start_date2, "rotation_start": start_date2, @@ -262,8 +263,8 @@ def test_empty_shifts_and_gaps_in_the_future( assert schedule.has_gaps is False assert schedule.has_empty_shifts is False - schedule.check_gaps_and_empty_shifts_for_next_week() + schedule.check_gaps_and_empty_shifts_for_next_days() schedule.refresh_from_db() - # no gaps and empty shifts in the next 7 days + # no gaps and empty shifts in the next SCHEDULE_CHECK_NEXT_DAYS days assert schedule.has_gaps is False assert schedule.has_empty_shifts is False diff --git a/engine/apps/schedules/tests/test_notify_about_gaps_in_schedule.py b/engine/apps/schedules/tests/test_notify_about_gaps_in_schedule.py index 5e35cf06..6f2f827d 100644 --- a/engine/apps/schedules/tests/test_notify_about_gaps_in_schedule.py +++ b/engine/apps/schedules/tests/test_notify_about_gaps_in_schedule.py @@ -4,6 +4,7 @@ from unittest.mock import patch import pytest from django.utils import timezone +from apps.schedules.constants import SCHEDULE_CHECK_NEXT_DAYS from apps.schedules.models import CustomOnCallShift, OnCallScheduleCalendar, OnCallScheduleICal, OnCallScheduleWeb from apps.schedules.tasks import notify_about_gaps_in_schedule_task, start_notify_about_gaps_in_schedule @@ -236,7 +237,7 @@ def test_gaps_near_future_trigger_notification( @pytest.mark.django_db -def test_gaps_later_than_7_days_no_triggering_notification( +def test_gaps_later_than_days_no_triggering_notification( make_slack_team_identity, make_slack_channel, make_organization, @@ -259,8 +260,8 @@ def test_gaps_later_than_7_days_no_triggering_notification( prev_ical_file_overrides=None, cached_ical_file_overrides=None, ) - start_date = now - datetime.timedelta(days=7, minutes=1) - until_date = now + datetime.timedelta(days=8) + start_date = now - datetime.timedelta(days=SCHEDULE_CHECK_NEXT_DAYS, minutes=1) + until_date = now + datetime.timedelta(days=SCHEDULE_CHECK_NEXT_DAYS + 1) data = { "start": start_date, "rotation_start": start_date, From 2a87bea6ed686b88e3aecb8d51d8b56b8eb276a8 Mon Sep 17 00:00:00 2001 From: Matias Bordese Date: Thu, 16 Jan 2025 09:19:32 -0300 Subject: [PATCH 09/13] feat: add filter affected services internal endpoint (#5415) Related to https://github.com/grafana/oncall-private/issues/2977 e.g. `GET /api/plugins/grafana-oncall-app/resources/alertgroups/filter_affected_services?service=service-a&service=service-b` ``` [ {"name": "service-a", "service_url": "http://localhost:3000/a/grafana-slo-app/service/service-a", "alert_groups_url": "http://localhost:3000/a/grafana-oncall-app/alert-groups?status=0&status=1&started_at=now-7d_now&label=service_name:service-a"} ] ``` --- engine/apps/api/tests/test_alert_group.py | 41 ++++++++ engine/apps/api/views/alert_group.py | 93 ++++++++++++++++--- .../tests/test_ui_url_builder.py | 6 ++ engine/apps/grafana_plugin/ui_url_builder.py | 3 + engine/common/constants/plugin_ids.py | 1 + 5 files changed, 129 insertions(+), 15 deletions(-) diff --git a/engine/apps/api/tests/test_alert_group.py b/engine/apps/api/tests/test_alert_group.py index 8ee438b6..d053e5fd 100644 --- a/engine/apps/api/tests/test_alert_group.py +++ b/engine/apps/api/tests/test_alert_group.py @@ -2413,3 +2413,44 @@ def test_filter_default_started_at( ) assert response.status_code == status.HTTP_200_OK assert response.json()["pk"] == old_alert_group.public_primary_key + + +@pytest.mark.django_db +def test_alert_group_affected_services( + alert_group_internal_api_setup, + make_user_for_organization, + make_user_auth_headers, + make_alert_group_label_association, +): + _, token, alert_groups = alert_group_internal_api_setup + resolved_ag, ack_ag, new_ag, silenced_ag = alert_groups + organization = new_ag.channel.organization + user = make_user_for_organization(organization) + + # set firing alert group service label + make_alert_group_label_association(organization, new_ag, key_name="service_name", value_name="service-a") + # set other service name labels for other alert groups + make_alert_group_label_association(organization, ack_ag, key_name="service_name", value_name="service-2") + make_alert_group_label_association(organization, resolved_ag, key_name="service_name", value_name="service-3") + make_alert_group_label_association(organization, silenced_ag, key_name="service_name", value_name="service-4") + + client = APIClient() + url = reverse("api-internal:alertgroup-filter-affected-services") + + url = f"{url}?service=service-1&service=service-2&service=service-3&service=service-a" + response = client.get(url, format="json", **make_user_auth_headers(user, token)) + + assert response.status_code == status.HTTP_200_OK + expected = [ + { + "name": "service-2", + "service_url": "a/grafana-slo-app/service/service-2", + "alert_groups_url": "a/grafana-oncall-app/alert-groups?status=0&status=1&started_at=now-7d_now&label=service_name:service-2", + }, + { + "name": "service-a", + "service_url": "a/grafana-slo-app/service/service-a", + "alert_groups_url": "a/grafana-oncall-app/alert-groups?status=0&status=1&started_at=now-7d_now&label=service_name:service-a", + }, + ] + assert response.json() == expected diff --git a/engine/apps/api/views/alert_group.py b/engine/apps/api/views/alert_group.py index 117fb9ce..eaa32dbf 100644 --- a/engine/apps/api/views/alert_group.py +++ b/engine/apps/api/views/alert_group.py @@ -26,6 +26,7 @@ from apps.api.serializers.alert_group_escalation_snapshot import AlertGroupEscal from apps.api.serializers.team import TeamSerializer from apps.auth_token.auth import PluginAuthentication from apps.base.models.user_notification_policy_log_record import UserNotificationPolicyLogRecord +from apps.grafana_plugin.ui_url_builder import UIURLBuilder from apps.labels.utils import is_labels_feature_enabled from apps.mobile_app.auth import MobileAppAuthTokenAuthentication from apps.user_management.models import Team, User @@ -283,6 +284,7 @@ class AlertGroupView( "bulk_action": [RBACPermission.Permissions.ALERT_GROUPS_WRITE], "preview_template": [RBACPermission.Permissions.INTEGRATIONS_TEST], "escalation_snapshot": [RBACPermission.Permissions.ALERT_GROUPS_READ], + "filter_affected_services": [RBACPermission.Permissions.ALERT_GROUPS_READ], } queryset = AlertGroup.objects.none() # needed for drf-spectacular introspection @@ -299,9 +301,18 @@ class AlertGroupView( return super().get_serializer_class() - def get_queryset(self, ignore_filtering_by_available_teams=False): - # no select_related or prefetch_related is used at this point, it will be done on paginate_queryset. - + def _get_queryset( + self, + action=None, + ignore_filtering_by_available_teams=False, + team_values=None, + started_at=None, + label_query=None, + ): + # make base get_queryset reusable via params + if action is None: + # assume stats by default + action = "stats" alert_receive_channels_qs = AlertReceiveChannel.objects_with_deleted.filter( organization_id=self.request.auth.organization.id ) @@ -310,7 +321,6 @@ class AlertGroupView( # Filter by team(s). Since we really filter teams from integrations, this is not an AlertGroup model filter. # This is based on the common.api_helpers.ByTeamModelFieldFilterMixin implementation - team_values = self.request.query_params.getlist("team", []) if team_values: null_team_lookup = Q(team__isnull=True) if NO_TEAM_VALUE in team_values else None teams_lookup = Q(team__public_primary_key__in=[ppk for ppk in team_values if ppk != NO_TEAM_VALUE]) @@ -321,10 +331,10 @@ class AlertGroupView( alert_receive_channels_ids = list(alert_receive_channels_qs.values_list("id", flat=True)) queryset = AlertGroup.objects.filter(channel__in=alert_receive_channels_ids) - if self.action in ("list", "stats") and not self.request.query_params.get("started_at"): + if action in ("list", "stats") and not started_at: queryset = queryset.filter(started_at__gte=timezone.now() - timezone.timedelta(days=30)) - if self.action in ("list", "stats") and settings.ALERT_GROUPS_DISABLE_PREFER_ORDERING_INDEX: + if action in ("list", "stats") and settings.ALERT_GROUPS_DISABLE_PREFER_ORDERING_INDEX: # workaround related to MySQL "ORDER BY LIMIT Query Optimizer Bug" # read more: https://hackmysql.com/infamous-order-by-limit-query-optimizer-bug/ from django_mysql.models import add_QuerySetMixin @@ -333,18 +343,28 @@ class AlertGroupView( queryset = queryset.force_index("alert_group_list_index") # Filter by labels. Since alert group labels are "static" filter by names, not IDs. - label_query = self.request.query_params.getlist("label", []) - kv_pairs = parse_label_query(label_query) - for key, value in kv_pairs: - # Utilize (organization, key_name, value_name, alert_group) index on AlertGroupAssociatedLabel - queryset = queryset.filter( - labels__organization=self.request.auth.organization, - labels__key_name=key, - labels__value_name=value, - ) + if label_query: + kv_pairs = parse_label_query(label_query) + for key, value in kv_pairs: + # Utilize (organization, key_name, value_name, alert_group) index on AlertGroupAssociatedLabel + queryset = queryset.filter( + labels__organization=self.request.auth.organization, + labels__key_name=key, + labels__value_name=value, + ) return queryset + def get_queryset(self, ignore_filtering_by_available_teams=False): + # no select_related or prefetch_related is used at this point, it will be done on paginate_queryset. + return self._get_queryset( + action=self.action, + ignore_filtering_by_available_teams=ignore_filtering_by_available_teams, + team_values=self.request.query_params.getlist("team", []), + started_at=self.request.query_params.get("started_at"), + label_query=self.request.query_params.getlist("label", []), + ) + def get_object(self): obj = super().get_object() obj = self.enrich([obj])[0] @@ -881,3 +901,46 @@ class AlertGroupView( escalation_snapshot = alert_group.escalation_snapshot result = AlertGroupEscalationSnapshotAPISerializer(escalation_snapshot).data if escalation_snapshot else {} return Response(result) + + @extend_schema( + responses=inline_serializer( + name="AffectedServices", + fields={ + "name": serializers.CharField(), + "service_url": serializers.CharField(), + "alert_groups_url": serializers.CharField(), + }, + many=True, + ) + ) + @action(methods=["get"], detail=False) + def filter_affected_services(self, request): + """Given a list of service names, return the ones that have active alerts.""" + organization = self.request.auth.organization + services = self.request.query_params.getlist("service", []) + url_builder = UIURLBuilder(organization) + affected_services = [] + days_to_check = 7 + for service_name in services: + is_affected = ( + self._get_queryset( + started_at=timezone.now() - timezone.timedelta(days=days_to_check), + label_query=[f"service_name:{service_name}"], + ) + .filter( + resolved=False, + silenced=False, + ) + .exists() + ) + if is_affected: + affected_services.append( + { + "name": service_name, + "service_url": url_builder.service_page(service_name), + "alert_groups_url": url_builder.alert_groups( + f"?status=0&status=1&started_at=now-{days_to_check}d_now&label=service_name:{service_name}" + ), + } + ) + return Response(affected_services) diff --git a/engine/apps/grafana_plugin/tests/test_ui_url_builder.py b/engine/apps/grafana_plugin/tests/test_ui_url_builder.py index dad96877..b55b1c4c 100644 --- a/engine/apps/grafana_plugin/tests/test_ui_url_builder.py +++ b/engine/apps/grafana_plugin/tests/test_ui_url_builder.py @@ -103,3 +103,9 @@ def test_build_url_overriden_base_url(org_setup): @pytest.mark.django_db def test_build_url_works_for_irm_and_oncall_plugins(org_setup, is_grafana_irm_enabled, expected_url): assert UIURLBuilder(org_setup(is_grafana_irm_enabled)).alert_group_detail(ALERT_GROUP_ID) == expected_url + + +@pytest.mark.django_db +def test_build_url_service_detail_page(org_setup): + builder = UIURLBuilder(org_setup()) + assert builder.service_page("service-a") == f"{GRAFANA_URL}/a/{PluginID.SLO}/service/service-a" diff --git a/engine/apps/grafana_plugin/ui_url_builder.py b/engine/apps/grafana_plugin/ui_url_builder.py index e37f8e75..5b4d6e64 100644 --- a/engine/apps/grafana_plugin/ui_url_builder.py +++ b/engine/apps/grafana_plugin/ui_url_builder.py @@ -56,3 +56,6 @@ class UIURLBuilder: def declare_incident(self, path_extra: str = "") -> str: return self._build_url("incidents/declare", path_extra, plugin_id=PluginID.INCIDENT) + + def service_page(self, service_name: str, path_extra: str = "") -> str: + return self._build_url(f"service/{service_name}", path_extra, plugin_id=PluginID.SLO) diff --git a/engine/common/constants/plugin_ids.py b/engine/common/constants/plugin_ids.py index 666c30e2..db6ff6a2 100644 --- a/engine/common/constants/plugin_ids.py +++ b/engine/common/constants/plugin_ids.py @@ -5,3 +5,4 @@ class PluginID: INCIDENT = "grafana-incident-app" LABELS = "grafana-labels-app" ML = "grafana-ml-app" + SLO = "grafana-slo-app" From 94e5d490b35b916920fc3c58dfff44ae18a8670f Mon Sep 17 00:00:00 2001 From: Julia Artyukhina Date: Mon, 20 Jan 2025 16:49:59 +0100 Subject: [PATCH 10/13] Add service dependencies feature flag (#5420) # What this PR does Adds `service_dependencies` feature flag ## Which issue(s) this PR closes Related to https://github.com/grafana/oncall-private/issues/2977 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] Added the relevant release notes label (see labels prefixed w/ `release:`). These labels dictate how your PR will show up in the autogenerated release notes. --- engine/apps/api/tests/test_alert_group.py | 28 +++++++++++++++++++++++ engine/apps/api/views/alert_group.py | 2 ++ engine/apps/api/views/features.py | 4 ++++ engine/settings/base.py | 1 + 4 files changed, 35 insertions(+) diff --git a/engine/apps/api/tests/test_alert_group.py b/engine/apps/api/tests/test_alert_group.py index d053e5fd..9ef2b76d 100644 --- a/engine/apps/api/tests/test_alert_group.py +++ b/engine/apps/api/tests/test_alert_group.py @@ -2421,7 +2421,9 @@ def test_alert_group_affected_services( make_user_for_organization, make_user_auth_headers, make_alert_group_label_association, + settings, ): + settings.FEATURE_SERVICE_DEPENDENCIES_ENABLED = True _, token, alert_groups = alert_group_internal_api_setup resolved_ag, ack_ag, new_ag, silenced_ag = alert_groups organization = new_ag.channel.organization @@ -2454,3 +2456,29 @@ def test_alert_group_affected_services( }, ] assert response.json() == expected + + +@pytest.mark.django_db +def test_alert_group_service_dependencies_feature_not_enabled( + alert_group_internal_api_setup, + make_user_for_organization, + make_user_auth_headers, + make_alert_group_label_association, + settings, +): + settings.FEATURE_SERVICE_DEPENDENCIES_ENABLED = False + _, token, alert_groups = alert_group_internal_api_setup + _, _, new_ag, _ = alert_groups + organization = new_ag.channel.organization + user = make_user_for_organization(organization) + + # set firing alert group service label + make_alert_group_label_association(organization, new_ag, key_name="service_name", value_name="service-a") + + client = APIClient() + url = reverse("api-internal:alertgroup-filter-affected-services") + + url = f"{url}?service=service-1" + response = client.get(url, format="json", **make_user_auth_headers(user, token)) + + assert response.status_code == status.HTTP_404_NOT_FOUND diff --git a/engine/apps/api/views/alert_group.py b/engine/apps/api/views/alert_group.py index eaa32dbf..2cf4f950 100644 --- a/engine/apps/api/views/alert_group.py +++ b/engine/apps/api/views/alert_group.py @@ -916,6 +916,8 @@ class AlertGroupView( @action(methods=["get"], detail=False) def filter_affected_services(self, request): """Given a list of service names, return the ones that have active alerts.""" + if not settings.FEATURE_SERVICE_DEPENDENCIES_ENABLED: + raise NotFound organization = self.request.auth.organization services = self.request.query_params.getlist("service", []) url_builder = UIURLBuilder(organization) diff --git a/engine/apps/api/views/features.py b/engine/apps/api/views/features.py index 7e35597a..597fd92c 100644 --- a/engine/apps/api/views/features.py +++ b/engine/apps/api/views/features.py @@ -26,6 +26,7 @@ class Feature(enum.StrEnum): GRAFANA_ALERTING_V2 = "grafana_alerting_v2" LABELS = "labels" GOOGLE_OAUTH2 = "google_oauth2" + SERVICE_DEPENDENCIES = "service_dependencies" class FeaturesAPIView(APIView): @@ -72,4 +73,7 @@ class FeaturesAPIView(APIView): if settings.GOOGLE_OAUTH2_ENABLED: enabled_features.append(Feature.GOOGLE_OAUTH2) + if settings.FEATURE_SERVICE_DEPENDENCIES_ENABLED: + enabled_features.append(Feature.SERVICE_DEPENDENCIES) + return enabled_features diff --git a/engine/settings/base.py b/engine/settings/base.py index 007779f1..d5be922b 100644 --- a/engine/settings/base.py +++ b/engine/settings/base.py @@ -76,6 +76,7 @@ FEATURE_ALERT_GROUP_SEARCH_ENABLED = getenv_boolean("FEATURE_ALERT_GROUP_SEARCH_ FEATURE_ALERT_GROUP_SEARCH_CUTOFF_DAYS = getenv_integer("FEATURE_ALERT_GROUP_SEARCH_CUTOFF_DAYS", default=None) FEATURE_NOTIFICATION_BUNDLE_ENABLED = getenv_boolean("FEATURE_NOTIFICATION_BUNDLE_ENABLED", default=True) FEATURE_DECLARE_INCIDENT_STEP_ENABLED = getenv_boolean("FEATURE_DECLARE_INCIDENT_STEP_ENABLED", default=False) +FEATURE_SERVICE_DEPENDENCIES_ENABLED = getenv_boolean("FEATURE_SERVICE_DEPENDENCIES_ENABLED", default=False) TWILIO_API_KEY_SID = os.environ.get("TWILIO_API_KEY_SID") TWILIO_API_KEY_SECRET = os.environ.get("TWILIO_API_KEY_SECRET") From 84411b7250e2815ef724a6b7a7f79baaf2432498 Mon Sep 17 00:00:00 2001 From: Julia Artyukhina Date: Tue, 21 Jan 2025 17:29:36 +0100 Subject: [PATCH 11/13] Add important version of round-robin escalation step (#5418) # What this PR does Adds `important` version of `Round-robin` escalation step Screenshot 2025-01-20 at 11 18 54 ## Which issue(s) this PR closes Related to https://github.com/grafana/oncall/issues/1184 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] Added the relevant release notes label (see labels prefixed w/ `release:`). These labels dictate how your PR will show up in the autogenerated release notes. --- .../escalation_policies.md | 32 +++++++++---------- .../escalation_policy_snapshot.py | 2 ++ .../incident_log_builder.py | 23 +++++++++---- .../0074_alter_escalationpolicy_step.py | 18 +++++++++++ .../apps/alerts/models/escalation_policy.py | 25 +++++++++------ .../tests/test_escalation_policy_snapshot.py | 6 +++- .../apps/api/tests/test_escalation_policy.py | 4 +++ .../serializers/escalation_policies.py | 7 +++- engine/apps/webhooks/utils.py | 8 +++-- 9 files changed, 90 insertions(+), 35 deletions(-) create mode 100644 engine/apps/alerts/migrations/0074_alter_escalationpolicy_step.py diff --git a/docs/sources/oncall-api-reference/escalation_policies.md b/docs/sources/oncall-api-reference/escalation_policies.md index 465b7f25..d50cf731 100644 --- a/docs/sources/oncall-api-reference/escalation_policies.md +++ b/docs/sources/oncall-api-reference/escalation_policies.md @@ -40,22 +40,22 @@ The above command returns JSON structured in the following way: } ``` -| Parameter | Required | Description | -| ---------------------------------- |:----------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `escalation_chain_id` | Yes | Each escalation policy is assigned to a specific escalation chain. | -| `position` | Optional | Escalation policies execute one after another starting from `position=0`. `Position=-1` will put the escalation policy to the end of the list. A new escalation policy created with a position of an existing escalation policy will move the old one (and all following) down in the list. | -| `type` | Yes | One of: `wait`, `notify_persons`, `notify_person_next_each_time`, `notify_on_call_from_schedule`, `notify_user_group`, `trigger_webhook`, `resolve`, `notify_whole_channel`, `notify_if_time_from_to`, `declare_incident`. | -| `important` | Optional | Default is `false`. Will assign "important" to personal notification rules if `true`. This can be used to distinguish alerts on which you want to be notified immediately by phone. Applicable for types `notify_persons`, `notify_team_members`, `notify_on_call_from_schedule`, and `notify_user_group`. | -| `duration` | If type = `wait` | The duration, in seconds, when type `wait` is chosen. Valid values are any number of seconds in the inclusive range `60 to 86400`. | -| `action_to_trigger` | If type = `trigger_webhook` | ID of a webhook. | -| `group_to_notify` | If type = `notify_user_group` | ID of a `User Group`. | -| `persons_to_notify` | If type = `notify_persons` | List of user IDs. | -| `persons_to_notify_next_each_time` | If type = `notify_person_next_each_time` | List of user IDs. | -| `notify_on_call _from_schedule` | If type = `notify_on_call_from_schedule` | ID of a Schedule. | -| `notify_if_time_from` | If type = `notify_if_time_from_to` | UTC time represents the beginning of the time period, for example `09:00:00Z`. | -| `notify_if_time_to` | If type = `notify_if_time_from_to` | UTC time represents the end of the time period, for example `18:00:00Z`. | -| `team_to_notify` | If type = `notify_team_members` | ID of a team. | -| `severity` | If type = `declare_incident` | Severity of the incident. | +| Parameter | Required | Description | +| ---------------------------------- |:----------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `escalation_chain_id` | Yes | Each escalation policy is assigned to a specific escalation chain. | +| `position` | Optional | Escalation policies execute one after another starting from `position=0`. `Position=-1` will put the escalation policy to the end of the list. A new escalation policy created with a position of an existing escalation policy will move the old one (and all following) down in the list. | +| `type` | Yes | One of: `wait`, `notify_persons`, `notify_person_next_each_time`, `notify_on_call_from_schedule`, `notify_user_group`, `trigger_webhook`, `resolve`, `notify_whole_channel`, `notify_if_time_from_to`, `declare_incident`. | +| `important` | Optional | Default is `false`. Will assign "important" to personal notification rules if `true`. This can be used to distinguish alerts on which you want to be notified immediately by phone. Applicable for types `notify_persons`, `notify_person_next_each_time`, `notify_team_members`, `notify_on_call_from_schedule`, and `notify_user_group`. | +| `duration` | If type = `wait` | The duration, in seconds, when type `wait` is chosen. Valid values are any number of seconds in the inclusive range `60 to 86400`. | +| `action_to_trigger` | If type = `trigger_webhook` | ID of a webhook. | +| `group_to_notify` | If type = `notify_user_group` | ID of a `User Group`. | +| `persons_to_notify` | If type = `notify_persons` | List of user IDs. | +| `persons_to_notify_next_each_time` | If type = `notify_person_next_each_time` | List of user IDs. | +| `notify_on_call _from_schedule` | If type = `notify_on_call_from_schedule` | ID of a Schedule. | +| `notify_if_time_from` | If type = `notify_if_time_from_to` | UTC time represents the beginning of the time period, for example `09:00:00Z`. | +| `notify_if_time_to` | If type = `notify_if_time_from_to` | UTC time represents the end of the time period, for example `18:00:00Z`. | +| `team_to_notify` | If type = `notify_team_members` | ID of a team. | +| `severity` | If type = `declare_incident` | Severity of the incident. | **HTTP request** diff --git a/engine/apps/alerts/escalation_snapshot/snapshot_classes/escalation_policy_snapshot.py b/engine/apps/alerts/escalation_snapshot/snapshot_classes/escalation_policy_snapshot.py index 1811e2e9..ede9e00a 100644 --- a/engine/apps/alerts/escalation_snapshot/snapshot_classes/escalation_policy_snapshot.py +++ b/engine/apps/alerts/escalation_snapshot/snapshot_classes/escalation_policy_snapshot.py @@ -137,6 +137,7 @@ class EscalationPolicySnapshot: EscalationPolicy.STEP_NOTIFY_SCHEDULE_IMPORTANT: self._escalation_step_notify_on_call_schedule, EscalationPolicy.STEP_TRIGGER_CUSTOM_WEBHOOK: self._escalation_step_trigger_custom_webhook, EscalationPolicy.STEP_NOTIFY_USERS_QUEUE: self._escalation_step_notify_users_queue, + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE_IMPORTANT: self._escalation_step_notify_users_queue, EscalationPolicy.STEP_NOTIFY_IF_TIME: self._escalation_step_notify_if_time, EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: self._escalation_step_notify_if_num_alerts_in_time_window, EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS: self._escalation_step_notify_multiple_users, @@ -199,6 +200,7 @@ class EscalationPolicySnapshot: ), { "reason": reason, + "important": self.step == EscalationPolicy.STEP_NOTIFY_USERS_QUEUE_IMPORTANT, }, immutable=True, ) diff --git a/engine/apps/alerts/incident_log_builder/incident_log_builder.py b/engine/apps/alerts/incident_log_builder/incident_log_builder.py index d291d4fa..70410d96 100644 --- a/engine/apps/alerts/incident_log_builder/incident_log_builder.py +++ b/engine/apps/alerts/incident_log_builder/incident_log_builder.py @@ -100,9 +100,10 @@ class IncidentLogBuilder: ] excluded_escalation_steps = [EscalationPolicy.STEP_WAIT, EscalationPolicy.STEP_FINAL_RESOLVE] not_excluded_steps_with_author = [ - EscalationPolicy.STEP_NOTIFY, - EscalationPolicy.STEP_NOTIFY_IMPORTANT, + EscalationPolicy._DEPRECATED_STEP_NOTIFY, + EscalationPolicy._DEPRECATED_STEP_NOTIFY_IMPORTANT, EscalationPolicy.STEP_NOTIFY_USERS_QUEUE, + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE_IMPORTANT, ] # exclude logs that we don't want to see in after resolve report @@ -466,6 +467,7 @@ class IncidentLogBuilder: EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS, EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT, EscalationPolicy.STEP_NOTIFY_USERS_QUEUE, + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE_IMPORTANT, ]: users_to_notify: UsersToNotify = escalation_policy_snapshot.sorted_users_queue @@ -473,7 +475,10 @@ class IncidentLogBuilder: if users_to_notify: plan_line = f'escalation step "{escalation_policy_snapshot.step_display}"' - if escalation_policy_snapshot.step == EscalationPolicy.STEP_NOTIFY_USERS_QUEUE: + if escalation_policy_snapshot.step in ( + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE, + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE_IMPORTANT, + ): try: last_user_index = users_to_notify.index(escalation_policy_snapshot.last_notified_user) except ValueError: @@ -489,14 +494,21 @@ class IncidentLogBuilder: escalation_plan.setdefault(timedelta, []).append({"plan_lines": [plan_line]}) - elif escalation_policy_snapshot.step == EscalationPolicy.STEP_NOTIFY_USERS_QUEUE: + elif escalation_policy_snapshot.step in ( + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE, + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE_IMPORTANT, + ): last_notified_user = escalation_policy_snapshot.last_notified_user users_to_notify = [last_notified_user] if last_notified_user else [] for user_to_notify in users_to_notify: notification_plan = self._get_notification_plan_for_user( user_to_notify, - important=escalation_policy_snapshot.step == EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT, + important=escalation_policy_snapshot.step + in [ + EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT, + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE_IMPORTANT, + ], for_slack=for_slack, future_step=future_step, ) @@ -534,7 +546,6 @@ class IncidentLogBuilder: for user_to_notify in final_notify_all_users_to_notify: notification_plan = self._get_notification_plan_for_user( user_to_notify, - important=escalation_policy_snapshot.step == EscalationPolicy.STEP_NOTIFY_IMPORTANT, for_slack=for_slack, future_step=future_step, ) diff --git a/engine/apps/alerts/migrations/0074_alter_escalationpolicy_step.py b/engine/apps/alerts/migrations/0074_alter_escalationpolicy_step.py new file mode 100644 index 00000000..93a32ac4 --- /dev/null +++ b/engine/apps/alerts/migrations/0074_alter_escalationpolicy_step.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.15 on 2025-01-20 10:33 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('alerts', '0073_update_direct_paging_integration_non_default_routes'), + ] + + operations = [ + migrations.AlterField( + model_name='escalationpolicy', + name='step', + field=models.IntegerField(choices=[(0, 'Wait'), (1, 'Notify User'), (2, 'Notify Whole Channel'), (3, 'Repeat Escalation (5 times max)'), (4, 'Resolve'), (5, 'Notify Group'), (6, 'Notify Schedule'), (7, 'Notify User (Important)'), (8, 'Notify Group (Important)'), (9, 'Notify Schedule (Important)'), (10, 'Trigger Outgoing Webhook'), (11, 'Notify User (next each time)'), (12, 'Continue escalation only if time is from'), (13, 'Notify multiple Users'), (14, 'Notify multiple Users (Important)'), (15, 'Continue escalation if >X alerts per Y minutes'), (16, 'Trigger Webhook'), (17, 'Notify all users in a Team'), (18, 'Notify all users in a Team (Important)'), (19, 'Declare Incident'), (20, 'Notify User (next each time) (Important)')], default=None, null=True), + ), + ] diff --git a/engine/apps/alerts/models/escalation_policy.py b/engine/apps/alerts/models/escalation_policy.py index e66a65c4..e4980fe1 100644 --- a/engine/apps/alerts/models/escalation_policy.py +++ b/engine/apps/alerts/models/escalation_policy.py @@ -29,13 +29,13 @@ class EscalationPolicy(OrderedModel): ( STEP_WAIT, - STEP_NOTIFY, + _DEPRECATED_STEP_NOTIFY, # only here to keep range intact STEP_FINAL_NOTIFYALL, STEP_REPEAT_ESCALATION_N_TIMES, STEP_FINAL_RESOLVE, STEP_NOTIFY_GROUP, STEP_NOTIFY_SCHEDULE, - STEP_NOTIFY_IMPORTANT, + _DEPRECATED_STEP_NOTIFY_IMPORTANT, # only here to keep range intact STEP_NOTIFY_GROUP_IMPORTANT, STEP_NOTIFY_SCHEDULE_IMPORTANT, _DEPRECATED_STEP_TRIGGER_CUSTOM_BUTTON, # only here to keep range intact @@ -48,18 +48,19 @@ class EscalationPolicy(OrderedModel): STEP_NOTIFY_TEAM_MEMBERS, STEP_NOTIFY_TEAM_MEMBERS_IMPORTANT, STEP_DECLARE_INCIDENT, - ) = range(20) + STEP_NOTIFY_USERS_QUEUE_IMPORTANT, + ) = range(21) # Must be the same order as previous STEP_CHOICES = ( (STEP_WAIT, "Wait"), - (STEP_NOTIFY, "Notify User"), + (_DEPRECATED_STEP_NOTIFY, "Notify User"), (STEP_FINAL_NOTIFYALL, "Notify Whole Channel"), (STEP_REPEAT_ESCALATION_N_TIMES, "Repeat Escalation (5 times max)"), (STEP_FINAL_RESOLVE, "Resolve"), (STEP_NOTIFY_GROUP, "Notify Group"), (STEP_NOTIFY_SCHEDULE, "Notify Schedule"), - (STEP_NOTIFY_IMPORTANT, "Notify User (Important)"), + (_DEPRECATED_STEP_NOTIFY_IMPORTANT, "Notify User (Important)"), (STEP_NOTIFY_GROUP_IMPORTANT, "Notify Group (Important)"), (STEP_NOTIFY_SCHEDULE_IMPORTANT, "Notify Schedule (Important)"), (_DEPRECATED_STEP_TRIGGER_CUSTOM_BUTTON, "Trigger Outgoing Webhook"), @@ -72,6 +73,7 @@ class EscalationPolicy(OrderedModel): (STEP_NOTIFY_TEAM_MEMBERS, "Notify all users in a Team"), (STEP_NOTIFY_TEAM_MEMBERS_IMPORTANT, "Notify all users in a Team (Important)"), (STEP_DECLARE_INCIDENT, "Declare Incident"), + (STEP_NOTIFY_USERS_QUEUE_IMPORTANT, "Notify User (next each time) (Important)"), ) # Ordered step choices available for internal api. @@ -114,6 +116,7 @@ class EscalationPolicy(OrderedModel): STEP_TRIGGER_CUSTOM_WEBHOOK, STEP_REPEAT_ESCALATION_N_TIMES, STEP_DECLARE_INCIDENT, + STEP_NOTIFY_USERS_QUEUE_IMPORTANT, ] # Maps internal api's steps choices to their verbal. First string in tuple is display name for existent step. @@ -142,7 +145,10 @@ class EscalationPolicy(OrderedModel): ), # Other STEP_TRIGGER_CUSTOM_WEBHOOK: ("Trigger webhook {{custom_webhook}}", "Trigger webhook"), - STEP_NOTIFY_USERS_QUEUE: ("Round robin notification for {{users}}", "Notify users one by one (round-robin)"), + STEP_NOTIFY_USERS_QUEUE: ( + "Round robin {{importance}} notification for {{users}}", + "Notify users one by one (round-robin)", + ), STEP_NOTIFY_IF_TIME: ( "Continue escalation if current UTC time is in {{timerange}}", "Continue escalation if current UTC time is in range", @@ -166,7 +172,6 @@ class EscalationPolicy(OrderedModel): STEP_FINAL_NOTIFYALL, STEP_FINAL_RESOLVE, STEP_TRIGGER_CUSTOM_WEBHOOK, - STEP_NOTIFY_USERS_QUEUE, STEP_NOTIFY_IF_TIME, STEP_REPEAT_ESCALATION_N_TIMES, STEP_DECLARE_INCIDENT, @@ -177,12 +182,14 @@ class EscalationPolicy(OrderedModel): STEP_NOTIFY_SCHEDULE: STEP_NOTIFY_SCHEDULE_IMPORTANT, STEP_NOTIFY_MULTIPLE_USERS: STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT, STEP_NOTIFY_TEAM_MEMBERS: STEP_NOTIFY_TEAM_MEMBERS_IMPORTANT, + STEP_NOTIFY_USERS_QUEUE: STEP_NOTIFY_USERS_QUEUE_IMPORTANT, } IMPORTANT_TO_DEFAULT_STEP_MAPPING = { STEP_NOTIFY_GROUP_IMPORTANT: STEP_NOTIFY_GROUP, STEP_NOTIFY_SCHEDULE_IMPORTANT: STEP_NOTIFY_SCHEDULE, STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT: STEP_NOTIFY_MULTIPLE_USERS, STEP_NOTIFY_TEAM_MEMBERS_IMPORTANT: STEP_NOTIFY_TEAM_MEMBERS, + STEP_NOTIFY_USERS_QUEUE_IMPORTANT: STEP_NOTIFY_USERS_QUEUE, } # Default steps are just usual version of important steps. E.g. notify group - notify group important @@ -191,6 +198,7 @@ class EscalationPolicy(OrderedModel): STEP_NOTIFY_SCHEDULE, STEP_NOTIFY_MULTIPLE_USERS, STEP_NOTIFY_TEAM_MEMBERS, + STEP_NOTIFY_USERS_QUEUE, } IMPORTANT_STEPS_SET = { @@ -198,6 +206,7 @@ class EscalationPolicy(OrderedModel): STEP_NOTIFY_SCHEDULE_IMPORTANT, STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT, STEP_NOTIFY_TEAM_MEMBERS_IMPORTANT, + STEP_NOTIFY_USERS_QUEUE_IMPORTANT, } SLACK_INTEGRATION_REQUIRED_STEPS = [ @@ -224,12 +233,10 @@ class EscalationPolicy(OrderedModel): PUBLIC_STEP_CHOICES_MAP = { STEP_WAIT: "wait", - STEP_NOTIFY: "notify_one_person", STEP_FINAL_NOTIFYALL: "notify_whole_channel", STEP_FINAL_RESOLVE: "resolve", STEP_NOTIFY_GROUP: "notify_user_group", STEP_NOTIFY_GROUP_IMPORTANT: "notify_user_group", - STEP_NOTIFY_IMPORTANT: "notify_one_person", STEP_NOTIFY_SCHEDULE: "notify_on_call_from_schedule", STEP_NOTIFY_SCHEDULE_IMPORTANT: "notify_on_call_from_schedule", STEP_TRIGGER_CUSTOM_WEBHOOK: "trigger_webhook", diff --git a/engine/apps/alerts/tests/test_escalation_policy_snapshot.py b/engine/apps/alerts/tests/test_escalation_policy_snapshot.py index 8882a370..a09d1ca2 100644 --- a/engine/apps/alerts/tests/test_escalation_policy_snapshot.py +++ b/engine/apps/alerts/tests/test_escalation_policy_snapshot.py @@ -93,9 +93,13 @@ def test_escalation_step_notify_all( @patch("apps.alerts.escalation_snapshot.snapshot_classes.EscalationPolicySnapshot._execute_tasks", return_value=None) +@pytest.mark.parametrize( + "step", [EscalationPolicy.STEP_NOTIFY_USERS_QUEUE, EscalationPolicy.STEP_NOTIFY_USERS_QUEUE_IMPORTANT] +) @pytest.mark.django_db def test_escalation_step_notify_users_queue( mocked_execute_tasks, + step, make_user_for_organization, escalation_step_test_setup, make_escalation_policy, @@ -105,7 +109,7 @@ def test_escalation_step_notify_users_queue( notify_queue_step = make_escalation_policy( escalation_chain=channel_filter.escalation_chain, - escalation_policy_step=EscalationPolicy.STEP_NOTIFY_USERS_QUEUE, + escalation_policy_step=step, ) notify_queue_step.notify_to_users_queue.set([user, user_2]) escalation_policy_snapshot = get_escalation_policy_snapshot_from_model(notify_queue_step) diff --git a/engine/apps/api/tests/test_escalation_policy.py b/engine/apps/api/tests/test_escalation_policy.py index 3ad2583c..a4027aea 100644 --- a/engine/apps/api/tests/test_escalation_policy.py +++ b/engine/apps/api/tests/test_escalation_policy.py @@ -511,6 +511,7 @@ def test_escalation_policy_move_to_position_permissions( (EscalationPolicy.STEP_NOTIFY_GROUP_IMPORTANT, EscalationPolicy.STEP_NOTIFY_GROUP), (EscalationPolicy.STEP_NOTIFY_SCHEDULE_IMPORTANT, EscalationPolicy.STEP_NOTIFY_SCHEDULE), (EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT, EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS), + (EscalationPolicy.STEP_NOTIFY_USERS_QUEUE_IMPORTANT, EscalationPolicy.STEP_NOTIFY_USERS_QUEUE), ], ) def test_escalation_policy_maps_default_to_important( @@ -545,6 +546,7 @@ def test_escalation_policy_maps_default_to_important( EscalationPolicy.STEP_NOTIFY_GROUP, EscalationPolicy.STEP_NOTIFY_SCHEDULE, EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS, + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE, ], ) def test_escalation_policy_default_steps_stay_default( @@ -578,6 +580,7 @@ def test_escalation_policy_default_steps_stay_default( (EscalationPolicy.STEP_NOTIFY_GROUP, EscalationPolicy.STEP_NOTIFY_GROUP_IMPORTANT), (EscalationPolicy.STEP_NOTIFY_SCHEDULE, EscalationPolicy.STEP_NOTIFY_SCHEDULE_IMPORTANT), (EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS, EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT), + (EscalationPolicy.STEP_NOTIFY_USERS_QUEUE, EscalationPolicy.STEP_NOTIFY_USERS_QUEUE_IMPORTANT), ], ) def test_create_escalation_policy_important( @@ -615,6 +618,7 @@ def test_create_escalation_policy_important( EscalationPolicy.STEP_NOTIFY_GROUP, EscalationPolicy.STEP_NOTIFY_SCHEDULE, EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS, + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE, ], ) def test_create_escalation_policy_default( diff --git a/engine/apps/public_api/serializers/escalation_policies.py b/engine/apps/public_api/serializers/escalation_policies.py index 92796a5d..1977b210 100644 --- a/engine/apps/public_api/serializers/escalation_policies.py +++ b/engine/apps/public_api/serializers/escalation_policies.py @@ -191,7 +191,10 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer): EscalationPolicy.STEP_NOTIFY_TEAM_MEMBERS_IMPORTANT, ]: fields_to_remove.remove("team_to_notify") - elif step == EscalationPolicy.STEP_NOTIFY_USERS_QUEUE: + elif step in [ + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE, + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE_IMPORTANT, + ]: fields_to_remove.remove("persons_to_notify_next_each_time") elif step in [EscalationPolicy.STEP_NOTIFY_GROUP, EscalationPolicy.STEP_NOTIFY_GROUP_IMPORTANT]: fields_to_remove.remove("group_to_notify") @@ -243,6 +246,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer): validated_data_fields_to_remove.remove("wait_delay") elif step in [ EscalationPolicy.STEP_NOTIFY_USERS_QUEUE, + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE_IMPORTANT, EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS, EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT, ]: @@ -298,6 +302,7 @@ class EscalationPolicyUpdateSerializer(EscalationPolicySerializer): instance.wait_delay = None if step not in [ EscalationPolicy.STEP_NOTIFY_USERS_QUEUE, + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE_IMPORTANT, EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS, EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT, ]: diff --git a/engine/apps/webhooks/utils.py b/engine/apps/webhooks/utils.py index f3b90e55..b3a100d3 100644 --- a/engine/apps/webhooks/utils.py +++ b/engine/apps/webhooks/utils.py @@ -132,13 +132,17 @@ def _extract_users_from_escalation_snapshot(escalation_snapshot): if escalation_snapshot: for policy_snapshot in escalation_snapshot.escalation_policies_snapshots: if policy_snapshot.step in [ - EscalationPolicy.STEP_NOTIFY, - EscalationPolicy.STEP_NOTIFY_IMPORTANT, EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS, EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT, ]: for user in policy_snapshot.notify_to_users_queue: users.append(_serialize_event_user(user)) + elif policy_snapshot.step in [ + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE, + EscalationPolicy.STEP_NOTIFY_USERS_QUEUE_IMPORTANT, + ]: + if policy_snapshot.notify_to_users_queue: + users.append(_serialize_event_user(policy_snapshot.next_user_in_sorted_queue)) elif policy_snapshot.step in [ EscalationPolicy.STEP_NOTIFY_SCHEDULE, EscalationPolicy.STEP_NOTIFY_SCHEDULE_IMPORTANT, From cc356c9d5467e71e38f3257efe3cd989c7e2df56 Mon Sep 17 00:00:00 2001 From: Matias Bordese Date: Tue, 21 Jan 2025 14:05:39 -0300 Subject: [PATCH 12/13] chore: capitalize Slack name references (#5421) Related to https://github.com/grafana/irm/issues/425 --- .../apps/alerts/incident_log_builder/incident_log_builder.py | 4 ++-- engine/apps/alerts/tasks/notify_user.py | 2 +- engine/apps/api/views/schedule.py | 2 +- engine/apps/slack/scenarios/resolution_note.py | 2 +- engine/apps/slack/scenarios/schedules.py | 4 ++-- engine/apps/slack/views.py | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/engine/apps/alerts/incident_log_builder/incident_log_builder.py b/engine/apps/alerts/incident_log_builder/incident_log_builder.py index 70410d96..a4f550ef 100644 --- a/engine/apps/alerts/incident_log_builder/incident_log_builder.py +++ b/engine/apps/alerts/incident_log_builder/incident_log_builder.py @@ -536,7 +536,7 @@ class IncidentLogBuilder: ) else: plan_line = ( - f'escalation step "{escalation_policy_snapshot.step_display}" is slack specific. ' f"Skipping" + f'escalation step "{escalation_policy_snapshot.step_display}" is Slack specific. ' f"Skipping" ) escalation_plan.setdefault(timedelta, []).append({"plan_lines": [plan_line]}) @@ -597,7 +597,7 @@ class IncidentLogBuilder: ) else: plan_line = ( - f'escalation step "{escalation_policy_snapshot.step_display}" is slack specific. Skipping' + f'escalation step "{escalation_policy_snapshot.step_display}" is Slack specific. Skipping' ) escalation_plan.setdefault(timedelta, []).append({"plan_lines": [plan_line]}) diff --git a/engine/apps/alerts/tasks/notify_user.py b/engine/apps/alerts/tasks/notify_user.py index 9e7e9e54..65ae1f44 100644 --- a/engine/apps/alerts/tasks/notify_user.py +++ b/engine/apps/alerts/tasks/notify_user.py @@ -252,7 +252,7 @@ def notify_user_task( type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED, notification_policy=notification_policy, alert_group=alert_group, - reason="Alert group slack notifications are disabled", + reason="Alert group Slack notifications are disabled", slack_prevent_posting=prevent_posting_to_thread, notification_step=notification_policy.step, notification_channel=notification_policy.notify_by, diff --git a/engine/apps/api/views/schedule.py b/engine/apps/api/views/schedule.py index 2d816211..51ead8e1 100644 --- a/engine/apps/api/views/schedule.py +++ b/engine/apps/api/views/schedule.py @@ -567,7 +567,7 @@ class ScheduleView( }, { "value": True, - "display_name": "Mention person in slack", + "display_name": "Mention person in Slack", }, ] return Response(options) diff --git a/engine/apps/slack/scenarios/resolution_note.py b/engine/apps/slack/scenarios/resolution_note.py index f1a557be..67ad4137 100644 --- a/engine/apps/slack/scenarios/resolution_note.py +++ b/engine/apps/slack/scenarios/resolution_note.py @@ -558,7 +558,7 @@ class ResolutionNoteModalStep(AlertGroupActionsMixin, scenario_step.ScenarioStep user_verbal = resolution_note.author_verbal(mention=True) message_timestamp = datetime.datetime.timestamp(resolution_note.created_at) blocks.append(DIVIDER) - source = "web" if resolution_note.source == ResolutionNote.Source.WEB else "slack" + source = "web" if resolution_note.source == ResolutionNote.Source.WEB else "Slack" blocks.append( typing.cast( diff --git a/engine/apps/slack/scenarios/schedules.py b/engine/apps/slack/scenarios/schedules.py index 61c6da61..b630bc10 100644 --- a/engine/apps/slack/scenarios/schedules.py +++ b/engine/apps/slack/scenarios/schedules.py @@ -26,8 +26,8 @@ if typing.TYPE_CHECKING: class EditScheduleShiftNotifyStep(scenario_step.ScenarioStep): notify_empty_oncall_options = {choice[0]: choice[1] for choice in OnCallSchedule.NotifyEmptyOnCall.choices} notify_oncall_shift_freq_options = {choice[0]: choice[1] for choice in OnCallSchedule.NotifyOnCallShiftFreq.choices} - mention_oncall_start_options = {1: "Mention person in slack", 0: "Inform in channel without mention"} - mention_oncall_next_options = {1: "Mention person in slack", 0: "Inform in channel without mention"} + mention_oncall_start_options = {1: "Mention person in Slack", 0: "Inform in channel without mention"} + mention_oncall_next_options = {1: "Mention person in Slack", 0: "Inform in channel without mention"} def process_scenario( self, diff --git a/engine/apps/slack/views.py b/engine/apps/slack/views.py index 859567cb..64fba9bb 100644 --- a/engine/apps/slack/views.py +++ b/engine/apps/slack/views.py @@ -586,7 +586,7 @@ class ResetSlackView(APIView): # just a placeholder value to continute uninstallation until UNIFIED_SLACK_APP_ENABLED is not enabled removed = True if not removed: - return Response({"error": "Failed to uninstall slack integration"}, status=500) + return Response({"error": "Failed to uninstall Slack integration"}, status=500) try: uninstall_slack_integration(request.user.organization, request.user) From ca40a824fb60040fc64af9fc517fc2ad857da673 Mon Sep 17 00:00:00 2001 From: Matias Bordese Date: Mon, 3 Feb 2025 13:12:42 -0300 Subject: [PATCH 13/13] chore: updating django-related deps (#5431) --- .github/workflows/e2e-tests.yml | 2 +- engine/requirements-dev.txt | 2 +- engine/requirements.in | 5 +++-- engine/requirements.txt | 14 ++++++++------ 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index 28b0f65c..417a4d33 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -179,7 +179,7 @@ jobs: - name: Upload artifact if: failure() - uses: actions/upload-pages-artifact@v1 + uses: actions/upload-pages-artifact@v3 with: path: ./grafana-plugin/playwright-report/ diff --git a/engine/requirements-dev.txt b/engine/requirements-dev.txt index cfb600e9..9c30d48e 100644 --- a/engine/requirements-dev.txt +++ b/engine/requirements-dev.txt @@ -18,7 +18,7 @@ charset-normalizer==3.3.2 # requests distlib==0.3.8 # via virtualenv -django==4.2.17 +django==4.2.18 # via # -c requirements.txt # django-stubs diff --git a/engine/requirements.in b/engine/requirements.in index e8fa1d3e..99e3424a 100644 --- a/engine/requirements.in +++ b/engine/requirements.in @@ -1,8 +1,8 @@ babel==2.12.1 beautifulsoup4==4.12.2 -celery[redis]==5.3.1 +celery[redis]==5.3.6 cryptography==43.0.1 -django==4.2.17 +django==4.2.18 django-add-default-value==0.10.0 django-anymail[amazon-ses]==12.0 django-cors-headers==3.7.0 @@ -32,6 +32,7 @@ fcm-django @ https://github.com/grafana/fcm-django/archive/refs/tags/v1.0.12r1.t hiredis==2.2.3 humanize==4.10.0 icalendar==5.0.10 +jinja2==3.1.5 lxml==5.2.2 markdown2==2.4.10 opentelemetry-sdk==1.26.0 diff --git a/engine/requirements.txt b/engine/requirements.txt index a85334b2..09124c34 100644 --- a/engine/requirements.txt +++ b/engine/requirements.txt @@ -34,7 +34,7 @@ cachetools==4.2.2 # via # google-auth # python-telegram-bot -celery[redis]==5.3.1 +celery==5.3.6 # via -r requirements.in certifi==2024.7.4 # via @@ -75,7 +75,7 @@ deprecated==1.2.14 # opentelemetry-api # opentelemetry-exporter-otlp-proto-grpc # opentelemetry-semantic-conventions -django==4.2.17 +django==4.2.18 # via # -r requirements.in # django-add-default-value @@ -97,7 +97,7 @@ django==4.2.17 # social-auth-app-django django-add-default-value==0.10.0 # via -r requirements.in -django-anymail[amazon-ses]==12.0 +django-anymail==12.0 # via -r requirements.in django-cors-headers==3.7.0 # via -r requirements.in @@ -152,7 +152,7 @@ firebase-admin==5.4.0 # via fcm-django flask==3.0.2 # via slack-export-viewer -google-api-core[grpc]==2.17.0 +google-api-core==2.17.0 # via # firebase-admin # google-api-python-client @@ -229,8 +229,10 @@ inflection==0.5.1 # via drf-spectacular itsdangerous==2.1.2 # via flask -jinja2==3.1.4 - # via flask +jinja2==3.1.5 + # via + # -r requirements.in + # flask jmespath==1.0.1 # via # boto3