From 8188dd5dd2c70dbfb0f3d8aebd96b7ad6163db1a Mon Sep 17 00:00:00 2001 From: Vadim Stepanov Date: Thu, 30 Nov 2023 17:18:18 +0000 Subject: [PATCH] Create missing direct paging integrations (#3468) # What this PR does Makes organization sync create direct paging integrations for Grafana teams that don't have one. ## Which issue(s) this PR fixes Related to https://github.com/grafana/oncall-private/issues/2302 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --- CHANGELOG.md | 4 + .../alerts/models/alert_receive_channel.py | 57 +++++++++++++- .../tests/test_alert_receiver_channel.py | 34 ++++++++ engine/apps/metrics_exporter/helpers.py | 77 ++++++++++--------- ..._cache.py => test_update_metrics_cache.py} | 65 ++++++++++++++++ engine/apps/user_management/models/team.py | 50 ++---------- .../apps/user_management/tests/test_sync.py | 24 ++++-- 7 files changed, 223 insertions(+), 88 deletions(-) rename engine/apps/metrics_exporter/tests/{test_update_metics_cache.py => test_update_metrics_cache.py} (89%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fb7147f..1cad9d6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Fixed + +- Create missing direct paging integrations on organization sync by @vadimkerr ([#3468](https://github.com/grafana/oncall/pull/3468)) + ## v1.3.67 (2023-11-30) Minor bugfixes + dependency updates :) diff --git a/engine/apps/alerts/models/alert_receive_channel.py b/engine/apps/alerts/models/alert_receive_channel.py index 22b7c6f2..ec2bae66 100644 --- a/engine/apps/alerts/models/alert_receive_channel.py +++ b/engine/apps/alerts/models/alert_receive_channel.py @@ -25,7 +25,7 @@ from apps.integrations.legacy_prefix import remove_legacy_prefix from apps.integrations.metadata import heartbeat from apps.integrations.tasks import create_alert, create_alertmanager_alerts from apps.metrics_exporter.helpers import ( - metrics_add_integration_to_cache, + metrics_add_integrations_to_cache, metrics_remove_deleted_integration_from_cache, metrics_update_integration_cache, ) @@ -94,6 +94,59 @@ class AlertReceiveChannelQueryset(models.QuerySet): class AlertReceiveChannelManager(models.Manager): + @staticmethod + def create_missing_direct_paging_integrations(organization: "Organization") -> None: + from apps.alerts.models import ChannelFilter + + # fetch teams without direct paging integration + teams_missing_direct_paging = list( + organization.teams.exclude( + pk__in=organization.alert_receive_channels.filter( + team__isnull=False, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING + ).values_list("team_id", flat=True) + ) + ) + if not teams_missing_direct_paging: + return + + # create missing integrations + AlertReceiveChannel.objects.bulk_create( + [ + AlertReceiveChannel( + organization=organization, + team=team, + integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING, + verbal_name=f"Direct paging ({team.name} team)", + ) + for team in teams_missing_direct_paging + ], + batch_size=5000, + ignore_conflicts=True, # ignore if direct paging integration already exists for team + ) + + # fetch integrations for teams (some of them are created above, but some may already exist previously) + alert_receive_channels = organization.alert_receive_channels.filter( + team__in=teams_missing_direct_paging, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING + ) + + # create default routes + ChannelFilter.objects.bulk_create( + [ + ChannelFilter( + alert_receive_channel=alert_receive_channel, + filtering_term=None, + is_default=True, + order=0, + ) + for alert_receive_channel in alert_receive_channels + ], + batch_size=5000, + ignore_conflicts=True, # ignore if default route already exists for integration + ) + + # add integrations to metrics cache + metrics_add_integrations_to_cache(list(alert_receive_channels), organization) + def get_queryset(self): return AlertReceiveChannelQueryset(self.model, using=self._db).filter( ~Q(integration=AlertReceiveChannel.INTEGRATION_MAINTENANCE), Q(deleted_at=None) @@ -678,7 +731,7 @@ def listen_for_alertreceivechannel_model_save( heartbeat = IntegrationHeartBeat.objects.create(alert_receive_channel=instance, timeout_seconds=TEN_MINUTES) write_resource_insight_log(instance=heartbeat, author=instance.author, event=EntityEvent.CREATED) - metrics_add_integration_to_cache(instance) + metrics_add_integrations_to_cache([instance], instance.organization) elif instance.deleted_at: if instance.is_alerting_integration: diff --git a/engine/apps/alerts/tests/test_alert_receiver_channel.py b/engine/apps/alerts/tests/test_alert_receiver_channel.py index d53513bc..9d542b56 100644 --- a/engine/apps/alerts/tests/test_alert_receiver_channel.py +++ b/engine/apps/alerts/tests/test_alert_receiver_channel.py @@ -232,6 +232,40 @@ def test_delete_duplicate_names(make_organization, make_alert_receive_channel): organization.alert_receive_channels.all().delete() +@patch("apps.alerts.models.alert_receive_channel.metrics_add_integrations_to_cache") +@pytest.mark.django_db +def test_create_missing_direct_paging_integrations( + mock_metrics_add_integrations_to_cache, + make_organization, + make_team, + make_alert_receive_channel, + make_channel_filter, +): + organization = make_organization() + + # team with no direct paging integration + team1 = make_team(organization) + + # team with direct paging integration + team2 = make_team(organization) + alert_receive_channel = make_alert_receive_channel( + organization, team=team2, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING + ) + make_channel_filter(alert_receive_channel, is_default=True, order=0) + + # create missing direct paging integration for organization + AlertReceiveChannel.objects.create_missing_direct_paging_integrations(organization) + + # check that missing integrations and default routes were created + assert organization.alert_receive_channels.count() == 2 + mock_metrics_add_integrations_to_cache.assert_called_once() + for team in [team1, team2]: + alert_receive_channel = organization.alert_receive_channels.get( + team=team, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING + ) + assert alert_receive_channel.channel_filters.get().is_default + + @pytest.mark.django_db def test_create_duplicate_direct_paging_integrations(make_organization, make_team, make_alert_receive_channel): """Check that it's not possible to have more than one active direct paging integration per team.""" diff --git a/engine/apps/metrics_exporter/helpers.py b/engine/apps/metrics_exporter/helpers.py index cf2d2c66..ec979f5b 100644 --- a/engine/apps/metrics_exporter/helpers.py +++ b/engine/apps/metrics_exporter/helpers.py @@ -25,6 +25,7 @@ from apps.metrics_exporter.constants import ( if typing.TYPE_CHECKING: from apps.alerts.models import AlertReceiveChannel + from apps.user_management.models import Organization def get_organization_ids_from_db(): @@ -156,50 +157,56 @@ def metrics_remove_deleted_integration_from_cache(integration: "AlertReceiveChan cache.set(metric_key, metric_cache, timeout=metrics_cache_timeout) -def metrics_add_integration_to_cache(integration: "AlertReceiveChannel"): - """Add new integration data to metrics cache""" - metrics_cache_timeout = get_metrics_cache_timeout(integration.organization_id) - metric_alert_groups_total_key = get_metric_alert_groups_total_key(integration.organization_id) +def metrics_add_integrations_to_cache(integrations: list["AlertReceiveChannel"], organization: "Organization"): + """ + Bulk add new integration data to metrics cache. This method is safe to call multiple times on the same integrations. + """ + metrics_cache_timeout = get_metrics_cache_timeout(organization.id) + metric_alert_groups_total_key = get_metric_alert_groups_total_key(organization.id) - instance_slug = integration.organization.stack_slug - instance_id = integration.organization.stack_id - grafana_org_id = integration.organization.org_id + instance_slug = organization.stack_slug + instance_id = organization.stack_id + grafana_org_id = organization.org_id metric_alert_groups_total: typing.Dict[int, AlertGroupsTotalMetricsDict] = cache.get( metric_alert_groups_total_key, {} ) - metric_alert_groups_total.setdefault( - integration.id, - { - "integration_name": integration.emojized_verbal_name, - "team_name": integration.team_name, - "team_id": integration.team_id_or_no_team, - "org_id": grafana_org_id, - "slug": instance_slug, - "id": instance_id, - AlertGroupState.FIRING.value: 0, - AlertGroupState.ACKNOWLEDGED.value: 0, - AlertGroupState.RESOLVED.value: 0, - AlertGroupState.SILENCED.value: 0, - }, - ) + + for integration in integrations: + metric_alert_groups_total.setdefault( + integration.id, + { + "integration_name": integration.emojized_verbal_name, + "team_name": integration.team_name, + "team_id": integration.team_id_or_no_team, + "org_id": grafana_org_id, + "slug": instance_slug, + "id": instance_id, + AlertGroupState.FIRING.value: 0, + AlertGroupState.ACKNOWLEDGED.value: 0, + AlertGroupState.RESOLVED.value: 0, + AlertGroupState.SILENCED.value: 0, + }, + ) cache.set(metric_alert_groups_total_key, metric_alert_groups_total, timeout=metrics_cache_timeout) - metric_alert_groups_response_time_key = get_metric_alert_groups_response_time_key(integration.organization_id) + metric_alert_groups_response_time_key = get_metric_alert_groups_response_time_key(organization.id) metric_alert_groups_response_time: typing.Dict[int, AlertGroupsResponseTimeMetricsDict] = cache.get( metric_alert_groups_response_time_key, {} ) - metric_alert_groups_response_time.setdefault( - integration.id, - { - "integration_name": integration.emojized_verbal_name, - "team_name": integration.team_name, - "team_id": integration.team_id_or_no_team, - "org_id": grafana_org_id, - "slug": instance_slug, - "id": instance_id, - "response_time": [], - }, - ) + + for integration in integrations: + metric_alert_groups_response_time.setdefault( + integration.id, + { + "integration_name": integration.emojized_verbal_name, + "team_name": integration.team_name, + "team_id": integration.team_id_or_no_team, + "org_id": grafana_org_id, + "slug": instance_slug, + "id": instance_id, + "response_time": [], + }, + ) cache.set(metric_alert_groups_response_time_key, metric_alert_groups_response_time, timeout=metrics_cache_timeout) diff --git a/engine/apps/metrics_exporter/tests/test_update_metics_cache.py b/engine/apps/metrics_exporter/tests/test_update_metrics_cache.py similarity index 89% rename from engine/apps/metrics_exporter/tests/test_update_metics_cache.py rename to engine/apps/metrics_exporter/tests/test_update_metrics_cache.py index fbc5f845..29fb52f7 100644 --- a/engine/apps/metrics_exporter/tests/test_update_metics_cache.py +++ b/engine/apps/metrics_exporter/tests/test_update_metrics_cache.py @@ -11,6 +11,7 @@ from apps.metrics_exporter.helpers import ( get_metric_alert_groups_response_time_key, get_metric_alert_groups_total_key, get_metric_user_was_notified_of_alert_groups_key, + metrics_add_integrations_to_cache, metrics_bulk_update_team_label_cache, ) from apps.metrics_exporter.metrics_cache_manager import MetricsCacheManager @@ -547,3 +548,67 @@ def test_update_metrics_cache_on_user_notification( # counter doesn't grow after the second notification of alert group notify_user_task(user.id, alert_group_2.id, previous_notification_policy_pk=notification_policy_1.id) arg_idx = get_called_arg_index_and_compare_results() + + +@pytest.mark.django_db +def test_metrics_add_integrations_to_cache(make_organization, make_alert_receive_channel): + organization = make_organization( + org_id=METRICS_TEST_ORG_ID, + stack_slug=METRICS_TEST_INSTANCE_SLUG, + stack_id=METRICS_TEST_INSTANCE_ID, + ) + alert_receive_channel1 = make_alert_receive_channel(organization) + alert_receive_channel2 = make_alert_receive_channel(organization) + + def _expected_alert_groups_total(alert_receive_channel, firing=0): + return { + "integration_name": alert_receive_channel.emojized_verbal_name, + "team_name": alert_receive_channel.team_name, + "team_id": alert_receive_channel.team_id_or_no_team, + "org_id": organization.org_id, + "slug": organization.stack_slug, + "id": organization.stack_id, + "firing": firing, + "silenced": 0, + "acknowledged": 0, + "resolved": 0, + } + + def _expected_alert_groups_response_time(alert_receive_channel, response_time=None): + if response_time is None: + response_time = [] + + return { + "integration_name": alert_receive_channel.emojized_verbal_name, + "team_name": alert_receive_channel.team_name, + "team_id": alert_receive_channel.team_id_or_no_team, + "org_id": organization.org_id, + "slug": organization.stack_slug, + "id": organization.stack_id, + "response_time": response_time, + } + + # clear cache, add some data + cache.set( + get_metric_alert_groups_total_key(organization.id), + {alert_receive_channel2.id: _expected_alert_groups_total(alert_receive_channel2, firing=42)}, + ) + cache.set( + get_metric_alert_groups_response_time_key(organization.id), + {alert_receive_channel2.id: _expected_alert_groups_response_time(alert_receive_channel2, response_time=[12])}, + ) + + # add integrations to cache + metrics_add_integrations_to_cache([alert_receive_channel1, alert_receive_channel2], organization) + + # check alert groups total + assert cache.get(get_metric_alert_groups_total_key(organization.id)) == { + alert_receive_channel1.id: _expected_alert_groups_total(alert_receive_channel1), + alert_receive_channel2.id: _expected_alert_groups_total(alert_receive_channel2, firing=42), + } + + # check alert groups response time + assert cache.get(get_metric_alert_groups_response_time_key(organization.id)) == { + alert_receive_channel1.id: _expected_alert_groups_response_time(alert_receive_channel1), + alert_receive_channel2.id: _expected_alert_groups_response_time(alert_receive_channel2, response_time=[12]), + } diff --git a/engine/apps/user_management/models/team.py b/engine/apps/user_management/models/team.py index 628e35d5..ff651a9d 100644 --- a/engine/apps/user_management/models/team.py +++ b/engine/apps/user_management/models/team.py @@ -2,10 +2,10 @@ import typing from django.conf import settings from django.core.validators import MinLengthValidator -from django.db import models, transaction +from django.db import models -from apps.alerts.models import AlertReceiveChannel, ChannelFilter -from apps.metrics_exporter.helpers import metrics_add_integration_to_cache, metrics_bulk_update_team_label_cache +from apps.alerts.models import AlertReceiveChannel +from apps.metrics_exporter.helpers import metrics_bulk_update_team_label_cache from apps.metrics_exporter.metrics_cache_manager import MetricsCacheManager from common.public_primary_keys import generate_public_primary_key, increase_public_primary_key_length @@ -52,48 +52,10 @@ class TeamManager(models.Manager["Team"]): for team in grafana_teams.values() if team["id"] not in existing_team_ids ) + organization.teams.bulk_create(teams_to_create, batch_size=5000) - with transaction.atomic(): - organization.teams.bulk_create(teams_to_create, batch_size=5000) - # Retrieve primary keys for the newly created users - # - # If the model’s primary key is an AutoField, the primary key attribute can only be retrieved - # on certain databases (currently PostgreSQL, MariaDB 10.5+, and SQLite 3.35+). - # On other databases, it will not be set. - # https://docs.djangoproject.com/en/4.1/ref/models/querysets/#django.db.models.query.QuerySet.bulk_create - created_teams = organization.teams.exclude(team_id__in=existing_team_ids) - direct_paging_integrations_to_create = [] - for team in created_teams: - alert_receive_channel = AlertReceiveChannel( - organization=organization, - team=team, - integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING, - verbal_name=f"Direct paging ({team.name if team else 'No'} team)", - ) - direct_paging_integrations_to_create.append(alert_receive_channel) - AlertReceiveChannel.objects.bulk_create(direct_paging_integrations_to_create, batch_size=5000) - created_direct_paging_integrations = ( - AlertReceiveChannel.objects.filter( - organization=organization, - integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING, - ) - .exclude(team__team_id__in=existing_team_ids) - .exclude(team__isnull=True) - ) - default_channel_filters_to_create = [] - for integration in created_direct_paging_integrations: - channel_filter = ChannelFilter( - alert_receive_channel=integration, - filtering_term=None, - is_default=True, - order=0, - ) - default_channel_filters_to_create.append(channel_filter) - ChannelFilter.objects.bulk_create(default_channel_filters_to_create, batch_size=5000) - - # Add direct paging integrations to metrics cache - for integration in direct_paging_integrations_to_create: - metrics_add_integration_to_cache(integration) + # create missing direct paging integrations + AlertReceiveChannel.objects.create_missing_direct_paging_integrations(organization) # delete excess teams and their direct paging integrations team_ids_to_delete = existing_team_ids - grafana_teams.keys() diff --git a/engine/apps/user_management/tests/test_sync.py b/engine/apps/user_management/tests/test_sync.py index df245138..505f6a3c 100644 --- a/engine/apps/user_management/tests/test_sync.py +++ b/engine/apps/user_management/tests/test_sync.py @@ -103,19 +103,20 @@ def test_sync_users_for_organization_role_none(make_organization, make_user_for_ @pytest.mark.django_db def test_sync_teams_for_organization(make_organization, make_team, make_alert_receive_channel): organization = make_organization() - teams = tuple(make_team(organization, team_id=team_id) for team_id in (1, 2)) + teams = tuple(make_team(organization, team_id=team_id) for team_id in (1, 2, 3)) direct_paging_integrations = tuple( make_alert_receive_channel(organization, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING, team=team) - for team in teams + for team in teams[:2] ) api_teams = tuple( - {"id": team_id, "name": "Test", "email": "test@test.test", "avatarUrl": "test.test/test"} for team_id in (2, 3) + {"id": team_id, "name": "Test", "email": "test@test.test", "avatarUrl": "test.test/test"} + for team_id in (2, 3, 4) ) Team.objects.sync_for_organization(organization, api_teams=api_teams) - assert organization.teams.count() == 2 + assert organization.teams.count() == 3 # check that excess teams and direct paging integrations are deleted assert not organization.teams.filter(pk=teams[0].pk).exists() @@ -129,11 +130,12 @@ def test_sync_teams_for_organization(make_organization, make_team, make_alert_re assert organization.alert_receive_channels.filter(pk=direct_paging_integrations[1].pk).exists() # check that missing teams are created - created_team = organization.teams.filter(team_id=api_teams[1]["id"]).first() + created_team = organization.teams.filter(team_id=api_teams[2]["id"]).first() assert created_team is not None - assert created_team.team_id == api_teams[1]["id"] - assert created_team.name == api_teams[1]["name"] + assert created_team.team_id == api_teams[2]["id"] + assert created_team.name == api_teams[2]["name"] + # check that direct paging is created for created team direct_paging_integration = AlertReceiveChannel.objects.get( organization=organization, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING, @@ -143,6 +145,14 @@ def test_sync_teams_for_organization(make_organization, make_team, make_alert_re assert direct_paging_integration.channel_filters.first().order == 0 assert direct_paging_integration.channel_filters.first().is_default + # check that direct paging is created for existing team + direct_paging_integration = AlertReceiveChannel.objects.get( + organization=organization, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING, team=teams[2] + ) + assert direct_paging_integration.channel_filters.count() == 1 + assert direct_paging_integration.channel_filters.first().order == 0 + assert direct_paging_integration.channel_filters.first().is_default + @pytest.mark.django_db def test_sync_users_for_team(make_organization, make_user_for_organization, make_team):