From 032ced6fd0c85e051f0f50cb843e10ec356e5cf9 Mon Sep 17 00:00:00 2001 From: Michael Derynck Date: Tue, 23 Jan 2024 15:59:33 -0700 Subject: [PATCH 01/22] Add more logging to plugin sync and install (#3730) # What this PR does Add logging to process for syncing OnCall backend with Grafana to help troubleshoot issues in self-hosted setups. ## Which issue(s) this PR fixes ## Checklist - [ ] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --- CHANGELOG.md | 4 ++++ engine/apps/auth_token/auth.py | 3 +++ engine/apps/grafana_plugin/views/install.py | 9 +++++++++ engine/apps/grafana_plugin/views/status.py | 17 ++++++++++++++++- engine/apps/user_management/sync.py | 2 ++ 5 files changed, 34 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e90d15f..51f7a19c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- Improved logging during plugin sync and install with Grafana @mderynck ([#3730](https://github.com/grafana/oncall/pull/3730)) + ## v1.3.92 (2024-01-23) Maintenance release diff --git a/engine/apps/auth_token/auth.py b/engine/apps/auth_token/auth.py index 89a12560..d40807c8 100644 --- a/engine/apps/auth_token/auth.py +++ b/engine/apps/auth_token/auth.py @@ -104,9 +104,11 @@ class BasePluginAuthentication(BaseAuthentication): try: context = dict(json.loads(request.headers.get("X-Grafana-Context"))) except (ValueError, TypeError): + logger.info("auth request user not found - missing valid X-Grafana-Context") return None if "UserId" not in context and "UserID" not in context: + logger.info("auth request user not found - X-Grafana-Context missing UserID") return None try: @@ -117,6 +119,7 @@ class BasePluginAuthentication(BaseAuthentication): try: return organization.users.get(user_id=user_id) except User.DoesNotExist: + logger.info(f"auth request user not found - user_id={user_id}") return None diff --git a/engine/apps/grafana_plugin/views/install.py b/engine/apps/grafana_plugin/views/install.py index 44fefaec..234a8758 100644 --- a/engine/apps/grafana_plugin/views/install.py +++ b/engine/apps/grafana_plugin/views/install.py @@ -1,3 +1,5 @@ +import logging + from rest_framework import status from rest_framework.request import Request from rest_framework.response import Response @@ -8,6 +10,8 @@ from apps.user_management.models import Organization from apps.user_management.sync import sync_organization from common.api_helpers.mixins import GrafanaHeadersMixin +logger = logging.getLogger(__name__) + class InstallView(GrafanaHeadersMixin, APIView): authentication_classes = (BasePluginAuthentication,) @@ -21,6 +25,11 @@ class InstallView(GrafanaHeadersMixin, APIView): organization.deleted_at = None organization.api_token = self.instance_context["grafana_token"] organization.save(update_fields=["api_token", "deleted_at"]) + logger.info(f"install - grafana_token replaced org={organization.pk}") sync_organization(organization) + logger.info( + f"install - sync organization finished org={organization.pk} " + f"token_status={organization.api_token_status}" + ) return Response(status=status.HTTP_204_NO_CONTENT) diff --git a/engine/apps/grafana_plugin/views/status.py b/engine/apps/grafana_plugin/views/status.py index 340c1774..8e1065cd 100644 --- a/engine/apps/grafana_plugin/views/status.py +++ b/engine/apps/grafana_plugin/views/status.py @@ -1,3 +1,5 @@ +import logging + from django.conf import settings from rest_framework.request import Request from rest_framework.response import Response @@ -11,6 +13,8 @@ from apps.user_management.models import Organization from common.api_helpers.mixins import GrafanaHeadersMixin from common.api_helpers.utils import create_engine_url +logger = logging.getLogger(__name__) + class StatusView(GrafanaHeadersMixin, APIView): authentication_classes = ( @@ -19,8 +23,13 @@ class StatusView(GrafanaHeadersMixin, APIView): ) def post(self, request: Request) -> Response: + logger.info( + f"authenticated via {type(request.successful_authenticator)}, user=[{request.user}] " + f"org=[{request.auth.organization.stack_slug if request.auth.organization else None}]" + ) + """ - Called asyncronounsly on each start of the plugin + Called asynchronously on each start of the plugin Checks if plugin is correctly installed and async runs a task to sync users, teams and org """ @@ -42,7 +51,12 @@ class StatusView(GrafanaHeadersMixin, APIView): if organization: is_installed = True token_ok = organization.api_token_status == Organization.API_TOKEN_STATUS_OK + logger.info( + f"Status - check token org={organization.pk} status={organization.api_token_status} " + f"token_ok={token_ok}" + ) if organization.is_moved: + logger.info(f"Organization Moved! org={organization.pk}") api_url = create_engine_url("", override_base=organization.migration_destination.oncall_backend_url) else: allow_signup = DynamicSetting.objects.get_or_create( @@ -51,6 +65,7 @@ class StatusView(GrafanaHeadersMixin, APIView): # If user is not present in OnCall database, set token_ok to False, which will trigger reinstall if not request.user: + logger.info(f"Status - user not found org={organization.pk} " f"setting token_status to PENDING") token_ok = False organization.api_token_status = Organization.API_TOKEN_STATUS_PENDING organization.save(update_fields=["api_token_status"]) diff --git a/engine/apps/user_management/sync.py b/engine/apps/user_management/sync.py index d3bd1d28..a06a7fc5 100644 --- a/engine/apps/user_management/sync.py +++ b/engine/apps/user_management/sync.py @@ -42,6 +42,7 @@ def _sync_organization(organization: Organization) -> None: rbac_is_enabled = grafana_api_client.is_rbac_enabled_for_organization() organization.is_rbac_permissions_enabled = rbac_is_enabled + logger.info(f"RBAC status org={organization.pk} rbac_enabled={organization.is_rbac_permissions_enabled}") _sync_instance_info(organization) @@ -59,6 +60,7 @@ def _sync_organization(organization: Organization) -> None: ) else: organization.api_token_status = Organization.API_TOKEN_STATUS_FAILED + logger.warning(f"Sync not successful org={organization.pk} token_status=FAILED") organization.save( update_fields=[ From a6b1ccd416d8697391a93d6bb2f47403076f80aa Mon Sep 17 00:00:00 2001 From: Innokentii Konstantinov Date: Wed, 24 Jan 2024 16:32:58 +0800 Subject: [PATCH 02/22] Remove unused const --- engine/common/api_helpers/mixins.py | 1 - 1 file changed, 1 deletion(-) diff --git a/engine/common/api_helpers/mixins.py b/engine/common/api_helpers/mixins.py index 9b06f537..4aacd63b 100644 --- a/engine/common/api_helpers/mixins.py +++ b/engine/common/api_helpers/mixins.py @@ -249,7 +249,6 @@ ACKNOWLEDGE_CONDITION = "acknowledge_condition" GROUPING_ID = "grouping_id" SOURCE_LINK = "source_link" ROUTE = "route" -ALERT_GROUP_LABELS = "alert_group_labels" ALERT_GROUP_MULTI_LABEL = "alert_group_multi_label" ALERT_GROUP_DYNAMIC_LABEL = "alert_group_dynamic_label" From 19cae8086e472779faff8878a64b0df7bb8b0a08 Mon Sep 17 00:00:00 2001 From: Yulya Artyukhina Date: Wed, 24 Jan 2024 16:31:56 +0100 Subject: [PATCH 03/22] Retry `perform_notification` with Telegram ratelimit countdown on `RetryAfter` error (#3744) # What this PR does Use Telegram ratelimit countdown when retry `perform_notification` task on `RetryAfter` error ## Which issue(s) this PR fixes https://github.com/grafana/oncall-private/issues/2451 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --- CHANGELOG.md | 4 +++ engine/apps/alerts/tasks/notify_user.py | 13 ++++++-- engine/apps/alerts/tests/test_notify_user.py | 34 ++++++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51f7a19c..ca254640 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Improved logging during plugin sync and install with Grafana @mderynck ([#3730](https://github.com/grafana/oncall/pull/3730)) +### Fixed + +- Fixed too frequent retry of `perform_notification` task on Telegram ratelimit error by @Ferril ([#3744](https://github.com/grafana/oncall/pull/3744)) + ## v1.3.92 (2024-01-23) Maintenance release diff --git a/engine/apps/alerts/tasks/notify_user.py b/engine/apps/alerts/tasks/notify_user.py index 277cf4b8..972f2033 100644 --- a/engine/apps/alerts/tasks/notify_user.py +++ b/engine/apps/alerts/tasks/notify_user.py @@ -1,10 +1,12 @@ import time from functools import partial +from celery.exceptions import Retry from django.conf import settings from django.db import transaction from django.utils import timezone from kombu.utils.uuid import uuid as celery_uuid +from telegram.error import RetryAfter from apps.alerts.constants import NEXT_ESCALATION_DELAY from apps.alerts.signals import user_notification_action_triggered_signal @@ -234,7 +236,10 @@ def notify_user_task( @shared_dedicated_queue_retry_task( - autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None + autoretry_for=(Exception,), + retry_backoff=True, + dont_autoretry_for=(Retry,), + max_retries=1 if settings.DEBUG else None, ) def perform_notification(log_record_pk): from apps.base.models import UserNotificationPolicy, UserNotificationPolicyLogRecord @@ -289,7 +294,11 @@ def perform_notification(log_record_pk): phone_backend.notify_by_call(user, alert_group, notification_policy) elif notification_channel == UserNotificationPolicy.NotificationChannel.TELEGRAM: - TelegramToUserConnector.notify_user(user, alert_group, notification_policy) + try: + TelegramToUserConnector.notify_user(user, alert_group, notification_policy) + except RetryAfter as e: + countdown = getattr(e, "retry_after", 3) + raise perform_notification.retry((log_record_pk,), countdown=countdown, exc=e) elif notification_channel == UserNotificationPolicy.NotificationChannel.SLACK: # TODO: refactor checking the possibility of sending a notification in slack diff --git a/engine/apps/alerts/tests/test_notify_user.py b/engine/apps/alerts/tests/test_notify_user.py index e06585b1..e0c32d2c 100644 --- a/engine/apps/alerts/tests/test_notify_user.py +++ b/engine/apps/alerts/tests/test_notify_user.py @@ -1,6 +1,7 @@ from unittest.mock import patch import pytest +from telegram.error import RetryAfter from apps.alerts.models import AlertGroup from apps.alerts.tasks.notify_user import notify_user_task, perform_notification @@ -8,6 +9,7 @@ from apps.api.permissions import LegacyAccessControlRole from apps.base.models.user_notification_policy import UserNotificationPolicy from apps.base.models.user_notification_policy_log_record import UserNotificationPolicyLogRecord from apps.slack.models import SlackMessage +from apps.telegram.models import TelegramToUserConnector NOTIFICATION_UNAUTHORIZED_MSG = "notification is not allowed for user" @@ -297,3 +299,35 @@ def test_perform_notification_missing_user_notification_policy_log_record(caplog "The alert group associated with this log record may have been deleted." ) in caplog.text assert f"perform_notification: found record for {invalid_pk}" not in caplog.text + + +@pytest.mark.django_db +def test_perform_notification_telegram_retryafter_error( + make_organization_and_user, + make_user_notification_policy, + make_alert_receive_channel, + make_alert_group, + make_user_notification_policy_log_record, +): + organization, user = make_organization_and_user() + user_notification_policy = make_user_notification_policy( + user=user, + step=UserNotificationPolicy.Step.NOTIFY, + notify_by=UserNotificationPolicy.NotificationChannel.TELEGRAM, + ) + alert_receive_channel = make_alert_receive_channel(organization=organization) + alert_group = make_alert_group(alert_receive_channel=alert_receive_channel) + log_record = make_user_notification_policy_log_record( + author=user, + alert_group=alert_group, + notification_policy=user_notification_policy, + type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED, + ) + countdown = 15 + exc = RetryAfter(countdown) + with patch.object(TelegramToUserConnector, "notify_user", side_effect=exc) as mock_notify_user: + with pytest.raises(RetryAfter): + perform_notification(log_record.pk) + + mock_notify_user.assert_called_once_with(user, alert_group, user_notification_policy) + assert alert_group.personal_log_records.last() == log_record From 21f0e2db8960c4920c27f90dd3c05ba5dde7ef00 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 25 Jan 2024 08:28:51 +0000 Subject: [PATCH 04/22] Update `make docs` procedure (#3749) Co-authored-by: grafanabot Co-authored-by: Jack Baldry --- docs/make-docs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/make-docs b/docs/make-docs index d4a8fd53..d5d861ca 100755 --- a/docs/make-docs +++ b/docs/make-docs @@ -717,7 +717,7 @@ case "${image}" in "${DOCS_IMAGE}" \ "--minAlertLevel=${VALE_MINALERTLEVEL}" \ '--glob=*.md' \ - --output=line \ + --output=/etc/vale/rdjsonl.tmpl \ /hugo/content/docs | sed "s#$(proj_dst "${proj}")#sources#" ;; *) From e18dafa650a79e407a4a169a661fa6ab53e7ffb9 Mon Sep 17 00:00:00 2001 From: Yulya Artyukhina Date: Thu, 25 Jan 2024 13:52:55 +0100 Subject: [PATCH 05/22] Fix `routes` and `schedules` public api endpoints (#3751) # What this PR does Add check whether organization has Slack connection on update Slack related field using public api endpoints ## Which issue(s) this PR fixes https://github.com/grafana/oncall-private/issues/1611 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --- CHANGELOG.md | 2 + engine/apps/public_api/serializers/routes.py | 2 + .../public_api/serializers/schedules_base.py | 3 + .../public_api/tests/test_integrations.py | 44 ++++++++++++++ engine/apps/public_api/tests/test_routes.py | 46 +++++++++++++++ .../apps/public_api/tests/test_schedules.py | 59 +++++++++++++++++++ 6 files changed, 156 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca254640..34c4efd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Fixed too frequent retry of `perform_notification` task on Telegram ratelimit error by @Ferril ([#3744](https://github.com/grafana/oncall/pull/3744)) +- Add check whether organization has Slack connection on update Slack related field using public api endpoints + by @Ferril ([#3751](https://github.com/grafana/oncall/pull/3751)) ## v1.3.92 (2024-01-23) diff --git a/engine/apps/public_api/serializers/routes.py b/engine/apps/public_api/serializers/routes.py index 878722be..0b6468db 100644 --- a/engine/apps/public_api/serializers/routes.py +++ b/engine/apps/public_api/serializers/routes.py @@ -86,6 +86,8 @@ class BaseChannelFilterSerializer(OrderedModelSerializer): slack_channel_id = slack_channel_id.upper() organization = self.context["request"].auth.organization slack_team_identity = organization.slack_team_identity + if not slack_team_identity: + raise BadRequest(detail="Slack isn't connected to this workspace") try: slack_team_identity.get_cached_channels().get(slack_id=slack_channel_id) except SlackChannel.DoesNotExist: diff --git a/engine/apps/public_api/serializers/schedules_base.py b/engine/apps/public_api/serializers/schedules_base.py index 03e4ae60..5ea13cb4 100644 --- a/engine/apps/public_api/serializers/schedules_base.py +++ b/engine/apps/public_api/serializers/schedules_base.py @@ -45,6 +45,9 @@ class ScheduleBaseSerializer(serializers.ModelSerializer): organization = self.context["request"].auth.organization slack_team_identity = organization.slack_team_identity + if (slack_channel_id or user_group_id) and not slack_team_identity: + raise BadRequest(detail="Slack isn't connected to this workspace") + if slack_channel_id is not None: slack_channel_id = slack_channel_id.upper() try: diff --git a/engine/apps/public_api/tests/test_integrations.py b/engine/apps/public_api/tests/test_integrations.py index 0d7a3045..8f9e2473 100644 --- a/engine/apps/public_api/tests/test_integrations.py +++ b/engine/apps/public_api/tests/test_integrations.py @@ -819,6 +819,50 @@ def test_update_integration_default_route( assert response.data["default_route"]["escalation_chain_id"] == escalation_chain.public_primary_key +@pytest.mark.django_db +def test_create_integration_default_route_with_slack_field( + make_organization_and_user_with_token, + make_escalation_chain, +): + organization, _, token = make_organization_and_user_with_token() + escalation_chain = make_escalation_chain(organization) + + client = APIClient() + data_for_create = { + "type": "grafana", + "name": "grafana_created", + "team_id": None, + "default_route": { + "escalation_chain_id": escalation_chain.public_primary_key, + "slack": {"channel_id": "TEST_SLACK_ID"}, + }, + } + url = reverse("api-public:integrations-list") + response = client.post(url, data=data_for_create, format="json", HTTP_AUTHORIZATION=f"{token}") + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.data["detail"] == "Slack isn't connected to this workspace" + + +@pytest.mark.django_db +def test_update_integration_default_route_with_slack_field( + make_organization_and_user_with_token, make_alert_receive_channel, make_channel_filter +): + organization, _, token = make_organization_and_user_with_token() + integration = make_alert_receive_channel(organization) + make_channel_filter(integration, is_default=True) + + client = APIClient() + data_for_update = { + "default_route": {"slack": {"channel_id": "TEST_SLACK_ID"}}, + } + + url = reverse("api-public:integrations-detail", args=[integration.public_primary_key]) + response = client.put(url, data=data_for_update, format="json", HTTP_AUTHORIZATION=f"{token}") + + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.data["detail"] == "Slack isn't connected to this workspace" + + @pytest.mark.django_db def test_cant_create_integrations_direct_paging( make_organization_and_user_with_token, make_team, make_alert_receive_channel, make_user_auth_headers diff --git a/engine/apps/public_api/tests/test_routes.py b/engine/apps/public_api/tests/test_routes.py index a03a238b..126408bc 100644 --- a/engine/apps/public_api/tests/test_routes.py +++ b/engine/apps/public_api/tests/test_routes.py @@ -282,6 +282,52 @@ def test_delete_route( new_channel_filter.refresh_from_db() +@pytest.mark.django_db +def test_create_route_slack_error( + route_public_api_setup, +): + _, _, token, alert_receive_channel, escalation_chain, _ = route_public_api_setup + + client = APIClient() + + url = reverse("api-public:routes-list") + data_for_create = { + "integration_id": alert_receive_channel.public_primary_key, + "routing_regex": "testreg", + "escalation_chain_id": escalation_chain.public_primary_key, + "slack": {"channel_id": "TEST_SLACK_ID"}, + } + response = client.post(url, format="json", HTTP_AUTHORIZATION=token, data=data_for_create) + + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.data["detail"] == "Slack isn't connected to this workspace" + + +@pytest.mark.django_db +def test_update_route_slack_error( + route_public_api_setup, + make_channel_filter, +): + _, _, token, alert_receive_channel, escalation_chain, _ = route_public_api_setup + new_channel_filter = make_channel_filter( + alert_receive_channel, + is_default=False, + filtering_term="testreg", + ) + + client = APIClient() + + url = reverse("api-public:routes-detail", kwargs={"pk": new_channel_filter.public_primary_key}) + data_to_update = { + "slack": {"channel_id": "TEST_SLACK_ID"}, + } + + response = client.put(url, format="json", HTTP_AUTHORIZATION=token, data=data_to_update) + + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.data["detail"] == "Slack isn't connected to this workspace" + + @pytest.mark.django_db def test_create_route_with_messaging_backend( route_public_api_setup, diff --git a/engine/apps/public_api/tests/test_schedules.py b/engine/apps/public_api/tests/test_schedules.py index 956f59c2..6d041bf9 100644 --- a/engine/apps/public_api/tests/test_schedules.py +++ b/engine/apps/public_api/tests/test_schedules.py @@ -844,6 +844,65 @@ def test_create_schedule_invalid_timezone(make_organization_and_user_with_token, assert response.json() == {"time_zone": ["Invalid timezone"]} +@pytest.mark.django_db +def test_create_calendar_schedule_slack_error(make_organization_and_user_with_token): + organization, user, token = make_organization_and_user_with_token() + client = APIClient() + + url = reverse("api-public:schedules-list") + # with slack channel id + data = { + "team_id": None, + "name": "schedule test name", + "time_zone": "Europe/Moscow", + "type": "calendar", + "slack": { + "channel_id": "TEST_SLACK_ID", + }, + } + + response = client.post(url, data=data, format="json", HTTP_AUTHORIZATION=f"{token}") + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.data["detail"] == "Slack isn't connected to this workspace" + # with slack user group id + data = { + "team_id": None, + "name": "schedule test name", + "time_zone": "Europe/Moscow", + "type": "calendar", + "slack": { + "user_group_id": "TEST_SLACK_ID", + }, + } + + response = client.post(url, data=data, format="json", HTTP_AUTHORIZATION=f"{token}") + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.data["detail"] == "Slack isn't connected to this workspace" + + +@pytest.mark.django_db +def test_update_calendar_schedule_slack_error( + make_organization_and_user_with_token, + make_schedule, +): + organization, user, token = make_organization_and_user_with_token() + client = APIClient() + schedule = make_schedule(organization, schedule_class=OnCallScheduleCalendar) + url = reverse("api-public:schedules-detail", kwargs={"pk": schedule.public_primary_key}) + + data = {"slack": {"channel_id": "TEST_SLACK_ID"}} + + response = client.put(url, data=data, format="json", HTTP_AUTHORIZATION=f"{token}") + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.data["detail"] == "Slack isn't connected to this workspace" + + data = {"slack": {"user_group_id": "TEST_SLACK_ID"}} + + response = client.put(url, data=data, format="json", HTTP_AUTHORIZATION=f"{token}") + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.data["detail"] == "Slack isn't connected to this workspace" + + @pytest.mark.django_db def test_create_ical_schedule_without_ical_url(make_organization_and_user_with_token): _, _, token = make_organization_and_user_with_token() From 2abcc4563a692e4c7388ddd40283800d365c576a Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Thu, 25 Jan 2024 13:46:55 -0500 Subject: [PATCH 06/22] mobile app proxy - request auth token from cloud auth api (#3748) # What this PR does Related to https://github.com/grafana/oncall-private/issues/2071 See [this conversation](https://raintank-corp.slack.com/archives/C064R17Q1A8/p1706125615995019) for all the context ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --- .../tests/test_mobile_app_gateway.py | 34 ++++---- engine/apps/mobile_app/views.py | 40 ++++++---- engine/common/cloud_auth_api/__init__.py | 0 engine/common/cloud_auth_api/client.py | 66 ++++++++++++++++ .../common/cloud_auth_api/tests/__init__.py | 0 .../cloud_auth_api/tests/test_client.py | 79 +++++++++++++++++++ engine/common/oncall_gateway/client.py | 2 +- engine/requirements.txt | 1 - engine/settings/base.py | 5 +- 9 files changed, 193 insertions(+), 34 deletions(-) create mode 100644 engine/common/cloud_auth_api/__init__.py create mode 100644 engine/common/cloud_auth_api/client.py create mode 100644 engine/common/cloud_auth_api/tests/__init__.py create mode 100644 engine/common/cloud_auth_api/tests/test_client.py diff --git a/engine/apps/mobile_app/tests/test_mobile_app_gateway.py b/engine/apps/mobile_app/tests/test_mobile_app_gateway.py index f38dabb4..5c259bf1 100644 --- a/engine/apps/mobile_app/tests/test_mobile_app_gateway.py +++ b/engine/apps/mobile_app/tests/test_mobile_app_gateway.py @@ -10,22 +10,23 @@ from rest_framework.test import APIClient from rest_framework.views import APIView from apps.mobile_app.views import MobileAppGatewayView +from common.cloud_auth_api.client import CloudAuthApiClient, CloudAuthApiException DOWNSTREAM_BACKEND = "incident" MOCK_DOWNSTREAM_URL = "https://mockdownstream.com" MOCK_DOWNSTREAM_INCIDENT_API_URL = "https://mockdownstreamincidentapi.com" -MOCK_DOWNSTREAM_HEADERS = {"X-OnCall-Mobile-Proxy-Authorization": "Bearer mock_jwt"} +MOCK_DOWNSTREAM_HEADERS = {"Authorization": "Bearer mock_auth_token"} MOCK_DOWNSTREAM_RESPONSE_DATA = {"foo": "bar"} MOCK_TIMEZONE_NOW = timezone.datetime(2021, 1, 1, 3, 4, 5, tzinfo=timezone.utc) -MOCK_JWT = "mncn,zxcnv,mznxcv" -MOCK_JWT_PRIVATE_KEY = "asd,mzcxn,vmnzxcv,mnzx,cvmnzaslkdjflaksjdf" +MOCK_AUTH_TOKEN = "mncn,zxcnv,mznxcv" @pytest.fixture(autouse=True) def enable_mobile_app_gateway(settings): settings.MOBILE_APP_GATEWAY_ENABLED = True - settings.MOBILE_APP_GATEWAY_RSA_PRIVATE_KEY = MOCK_JWT_PRIVATE_KEY + settings.GRAFANA_CLOUD_AUTH_API_URL = "asdfasdf" + settings.GRAFANA_CLOUD_AUTH_API_SYSTEM_TOKEN = "zxcvzx" class MockResponse: @@ -209,6 +210,7 @@ def test_mobile_app_gateway_supported_downstream_backends( (requests.exceptions.TooManyRedirects, (), status.HTTP_502_BAD_GATEWAY), (requests.exceptions.Timeout, (), status.HTTP_502_BAD_GATEWAY), (requests.exceptions.JSONDecodeError, ("", "", 5), status.HTTP_400_BAD_REQUEST), + (CloudAuthApiException, (403, "http://example.com"), status.HTTP_502_BAD_GATEWAY), ], ) def test_mobile_app_gateway_catches_errors_from_downstream_server( @@ -290,11 +292,11 @@ def test_mobile_app_gateway_incident_api_url( @pytest.mark.django_db @patch("apps.mobile_app.views.requests") -@patch("apps.mobile_app.views.MobileAppGatewayView._construct_jwt", return_value=MOCK_JWT) +@patch("apps.mobile_app.views.MobileAppGatewayView._get_auth_token", return_value=MOCK_AUTH_TOKEN) @patch("apps.mobile_app.views.MobileAppGatewayView._get_downstream_url", return_value=MOCK_DOWNSTREAM_URL) def test_mobile_app_gateway_proxies_headers( _mock_get_downstream_url, - _mock_construct_jwt, + _mock_get_auth_token, mock_requests, make_organization_and_user_with_mobile_app_auth_token, ): @@ -313,16 +315,16 @@ def test_mobile_app_gateway_proxies_headers( MOCK_DOWNSTREAM_URL, data=b"", params={}, - headers={"X-OnCall-Mobile-Proxy-Authorization": f"Bearer {MOCK_JWT}", "Content-Type": content_type_header}, + headers={"Authorization": f"Bearer {MOCK_AUTH_TOKEN}", "Content-Type": content_type_header}, ) @pytest.mark.django_db -@patch("apps.mobile_app.views.jwt.encode", return_value=MOCK_JWT) +@patch("apps.mobile_app.views.CloudAuthApiClient.request_signed_token", return_value=MOCK_AUTH_TOKEN) @patch("apps.mobile_app.views.timezone.now", return_value=MOCK_TIMEZONE_NOW) -def test_mobile_app_gateway_properly_generates_a_jwt( +def test_mobile_app_gateway_properly_generates_an_auth_token( _mock_timezone_now, - mock_jwt_encode, + mock_request_signed_token, make_organization, make_user_for_organization, ): @@ -337,10 +339,14 @@ def test_mobile_app_gateway_properly_generates_a_jwt( ) user = make_user_for_organization(organization, user_id=user_id) - encoded_jwt = MobileAppGatewayView._construct_jwt(user) + auth_token = MobileAppGatewayView._get_auth_token(DOWNSTREAM_BACKEND, user) - assert encoded_jwt == MOCK_JWT - mock_jwt_encode.assert_called_once_with( + assert auth_token == f"{stack_id}:{MOCK_AUTH_TOKEN}" + + mock_request_signed_token.assert_called_once_with( + organization_id, + stack_id, + [CloudAuthApiClient.Scopes.INCIDENT_WRITE], { "iat": MOCK_TIMEZONE_NOW, "exp": MOCK_TIMEZONE_NOW + timezone.timedelta(minutes=1), @@ -351,6 +357,4 @@ def test_mobile_app_gateway_properly_generates_a_jwt( "stack_slug": organization.stack_slug, "org_slug": organization.org_slug, }, - MOCK_JWT_PRIVATE_KEY, - algorithm="RS256", ) diff --git a/engine/apps/mobile_app/views.py b/engine/apps/mobile_app/views.py index 8871fee5..fcd728d7 100644 --- a/engine/apps/mobile_app/views.py +++ b/engine/apps/mobile_app/views.py @@ -1,7 +1,7 @@ +import enum import logging import typing -import jwt import requests from django.conf import settings from django.core.exceptions import ObjectDoesNotExist @@ -17,6 +17,7 @@ from rest_framework.views import APIView from apps.mobile_app.auth import MobileAppAuthTokenAuthentication, MobileAppVerificationTokenAuthentication from apps.mobile_app.models import FCMDevice, MobileAppAuthToken, MobileAppUserSettings from apps.mobile_app.serializers import FCMDeviceSerializer, MobileAppUserSettingsSerializer +from common.cloud_auth_api.client import CloudAuthApiClient, CloudAuthApiException if typing.TYPE_CHECKING: from apps.user_management.models import Organization, User @@ -135,12 +136,10 @@ class MobileAppGatewayView(APIView): authentication_classes = (MobileAppAuthTokenAuthentication,) permission_classes = (IsAuthenticated,) - class SupportedDownstreamBackends: + class SupportedDownstreamBackends(enum.StrEnum): INCIDENT = "incident" - ALL_SUPPORTED_DOWNSTREAM_BACKENDS = [ - SupportedDownstreamBackends.INCIDENT, - ] + ALL_SUPPORTED_DOWNSTREAM_BACKENDS = list(SupportedDownstreamBackends) def initial(self, request: Request, *args, **kwargs): # If the mobile app gateway is not enabled, return a 404 @@ -149,11 +148,12 @@ class MobileAppGatewayView(APIView): super().initial(request, *args, **kwargs) @classmethod - def _construct_jwt_payload(cls, user: "User") -> typing.Dict[str, typing.Any]: + def _construct_token_claims(cls, user: "User") -> typing.Dict[str, typing.Any]: organization = user.organization now = timezone.now() return { + # TODO: do we need to specify iat and exp? # registered claim names "iat": now, "exp": now + timezone.timedelta(minutes=1), # jwt is short lived. expires in 1 minute. @@ -167,19 +167,28 @@ class MobileAppGatewayView(APIView): } @classmethod - def _construct_jwt(cls, user: "User") -> str: + def _get_auth_token(cls, downstream_backend: SupportedDownstreamBackends, user: "User") -> str: """ RS256 = asymmetric = public/private key pair HS256 = symmetric = shared secret (don't use this) """ - return jwt.encode( - cls._construct_jwt_payload(user), settings.MOBILE_APP_GATEWAY_RSA_PRIVATE_KEY, algorithm="RS256" - ) + token_claims = cls._construct_token_claims(user) + token_scopes = { + cls.SupportedDownstreamBackends.INCIDENT: [CloudAuthApiClient.Scopes.INCIDENT_WRITE], + }[downstream_backend] + + org = user.organization + stack_id = org.stack_id + token = CloudAuthApiClient().request_signed_token(org.org_id, stack_id, token_scopes, token_claims) + + return f"{stack_id}:{token}" @classmethod - def _get_downstream_headers(cls, request: Request, user: "User") -> typing.Dict[str, str]: + def _get_downstream_headers( + cls, request: Request, downstream_backend: SupportedDownstreamBackends, user: "User" + ) -> typing.Dict[str, str]: headers = { - "X-OnCall-Mobile-Proxy-Authorization": f"Bearer {cls._construct_jwt(user)}", + "Authorization": f"Bearer {cls._get_auth_token(downstream_backend, user)}", } if (v := request.META.get("CONTENT_TYPE", None)) is not None: @@ -188,7 +197,9 @@ class MobileAppGatewayView(APIView): return headers @classmethod - def _get_downstream_url(cls, organization: "Organization", downstream_backend: str, downstream_path: str) -> str: + def _get_downstream_url( + cls, organization: "Organization", downstream_backend: SupportedDownstreamBackends, downstream_path: str + ) -> str: downstream_url = { cls.SupportedDownstreamBackends.INCIDENT: organization.grafana_incident_backend_url, }[downstream_backend] @@ -217,13 +228,14 @@ class MobileAppGatewayView(APIView): downstream_url, data=request.body, params=request.query_params.dict(), - headers=self._get_downstream_headers(request, user), + headers=self._get_downstream_headers(request, downstream_backend, user), ) return Response(status=downstream_response.status_code, data=downstream_response.json()) except ( requests.exceptions.RequestException, requests.exceptions.JSONDecodeError, + CloudAuthApiException, ) as e: if isinstance(e, requests.exceptions.JSONDecodeError): final_status = status.HTTP_400_BAD_REQUEST diff --git a/engine/common/cloud_auth_api/__init__.py b/engine/common/cloud_auth_api/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/engine/common/cloud_auth_api/client.py b/engine/common/cloud_auth_api/client.py new file mode 100644 index 00000000..21f760cc --- /dev/null +++ b/engine/common/cloud_auth_api/client.py @@ -0,0 +1,66 @@ +import enum +import typing +from urllib.parse import urljoin + +import requests +from django.conf import settings +from rest_framework import status + + +class CloudAuthApiException(Exception): + """A generic 400 or 500 level exception from the Cloud Auth API""" + + def __init__(self, status, url, msg="", method="GET"): + self.url = url + self.status = status + self.method = method + self.msg = msg + + def __str__(self): + return f"CloudAuthApiException: status={self.status} url={self.url} method={self.method} error={self.msg}" + + +class CloudAuthApiClient: + class Scopes(enum.StrEnum): + INCIDENT_WRITE = "incident:write" + + def __init__(self): + if settings.GRAFANA_CLOUD_AUTH_API_URL is None or settings.GRAFANA_CLOUD_AUTH_API_SYSTEM_TOKEN is None: + raise RuntimeError( + "GRAFANA_CLOUD_AUTH_API_URL and GRAFANA_CLOUD_AUTH_API_SYSTEM_TOKEN must be set" + "to use CloudAuthApiClient" + ) + + self.api_base_url = settings.GRAFANA_CLOUD_AUTH_API_URL + self.api_token = settings.GRAFANA_CLOUD_AUTH_API_SYSTEM_TOKEN + + def request_signed_token( + self, org_id: int, stack_id: int, scopes: typing.List[Scopes], claims: typing.Dict[str, typing.Any] + ) -> str: + headers = { + "Authorization": f"Bearer {self.api_token}", + "X-Org-ID": org_id, + "X-Realms": [ + { + "type": "stack", + "identifier": stack_id, + }, + ], + } + + url = urljoin(self.api_base_url, "v1/sign") + response = requests.post( + url, + headers=headers, + json={ + "claims": claims, + "extra": { + "scopes": scopes, + "org_id": org_id, + }, + }, + ) + + if response.status_code != status.HTTP_200_OK: + raise CloudAuthApiException(response.status_code, url, response.text, method="POST") + return response.json()["data"]["token"] diff --git a/engine/common/cloud_auth_api/tests/__init__.py b/engine/common/cloud_auth_api/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/engine/common/cloud_auth_api/tests/test_client.py b/engine/common/cloud_auth_api/tests/test_client.py new file mode 100644 index 00000000..c52d6eb0 --- /dev/null +++ b/engine/common/cloud_auth_api/tests/test_client.py @@ -0,0 +1,79 @@ +from unittest.mock import patch + +import pytest +from rest_framework import status + +from common.cloud_auth_api.client import CloudAuthApiClient, CloudAuthApiException + +GRAFANA_CLOUD_AUTH_API_URL = "http://example.com" +GRAFANA_CLOUD_AUTH_API_SYSTEM_TOKEN = "asdfasdfasdfasdf" + + +@pytest.fixture(autouse=True) +def configure_cloud_auth_api_client(settings): + settings.GRAFANA_CLOUD_AUTH_API_URL = GRAFANA_CLOUD_AUTH_API_URL + settings.GRAFANA_CLOUD_AUTH_API_SYSTEM_TOKEN = GRAFANA_CLOUD_AUTH_API_SYSTEM_TOKEN + + +@patch("common.cloud_auth_api.client.requests") +@pytest.mark.parametrize("response_status_code", [status.HTTP_200_OK, status.HTTP_401_UNAUTHORIZED]) +def test_request_signed_token(mock_requests, response_status_code): + mock_auth_token = ",mnasdlkjlakjoqwejroiqwejr" + mock_response_text = "error message" + + org_id = 1 + stack_id = 5 + scopes = ["incident:write", "foo:bar"] + claims = {"vegetable": "carrot", "fruit": "apple"} + + class MockResponse: + text = mock_response_text + + def __init__(self, status_code): + self.status_code = status_code + + def json(self): + return { + "data": { + "token": mock_auth_token, + }, + } + + mock_requests.post.return_value = MockResponse(response_status_code) + + def _make_request(): + return CloudAuthApiClient().request_signed_token(org_id, stack_id, scopes, claims) + + url = f"{GRAFANA_CLOUD_AUTH_API_URL}/v1/sign" + + if response_status_code != status.HTTP_200_OK: + with pytest.raises(CloudAuthApiException) as excinfo: + _make_request() + + assert excinfo.value.status == response_status_code + assert excinfo.value.method == "POST" + assert excinfo.value.msg == mock_response_text + assert excinfo.value.url == url + else: + assert _make_request() == mock_auth_token + + mock_requests.post.assert_called_once_with( + url, + headers={ + "Authorization": f"Bearer {GRAFANA_CLOUD_AUTH_API_SYSTEM_TOKEN}", + "X-Org-ID": org_id, + "X-Realms": [ + { + "type": "stack", + "identifier": stack_id, + }, + ], + }, + json={ + "claims": claims, + "extra": { + "scopes": scopes, + "org_id": org_id, + }, + }, + ) diff --git a/engine/common/oncall_gateway/client.py b/engine/common/oncall_gateway/client.py index 947b7968..cd3eadaf 100644 --- a/engine/common/oncall_gateway/client.py +++ b/engine/common/oncall_gateway/client.py @@ -44,7 +44,7 @@ class ChatopsProxyAPIException(Exception): self.msg = msg def __str__(self): - return f"LabelsRepoAPIException: status={self.status} url={self.url} method={self.method} error={self.msg}" + return f"ChatopsProxyAPIException: status={self.status} url={self.url} method={self.method} error={self.msg}" class ChatopsProxyAPIClient: diff --git a/engine/requirements.txt b/engine/requirements.txt index b182216f..be789078 100644 --- a/engine/requirements.txt +++ b/engine/requirements.txt @@ -56,4 +56,3 @@ babel==2.12.1 drf-spectacular==0.26.5 grpcio==1.57.0 markdown2==2.4.10 -PyJWT==2.8.0 diff --git a/engine/settings/base.py b/engine/settings/base.py index c3bd695d..6621c0a6 100644 --- a/engine/settings/base.py +++ b/engine/settings/base.py @@ -726,9 +726,8 @@ FCM_DJANGO_SETTINGS = { } MOBILE_APP_GATEWAY_ENABLED = getenv_boolean("MOBILE_APP_GATEWAY_ENABLED", default=False) -MOBILE_APP_GATEWAY_RSA_PRIVATE_KEY = os.environ.get("MOBILE_APP_GATEWAY_RSA_PRIVATE_KEY", None) -if MOBILE_APP_GATEWAY_ENABLED and not MOBILE_APP_GATEWAY_RSA_PRIVATE_KEY: - raise RuntimeError("MOBILE_APP_GATEWAY_RSA_PRIVATE_KEY is required when MOBILE_APP_GATEWAY_ENABLED is True") +GRAFANA_CLOUD_AUTH_API_URL = os.environ.get("GRAFANA_CLOUD_AUTH_API_URL", None) +GRAFANA_CLOUD_AUTH_API_SYSTEM_TOKEN = os.environ.get("GRAFANA_CLOUD_AUTH_API_SYSTEM_TOKEN", None) SELF_HOSTED_SETTINGS = { "STACK_ID": 5, From add6df570f99362eac60f4fdd4848c077d0025b6 Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Thu, 25 Jan 2024 14:19:28 -0500 Subject: [PATCH 07/22] address improperly formatted cloud auth api request headers --- .../mobile_app/tests/test_mobile_app_gateway.py | 3 +-- engine/apps/mobile_app/views.py | 5 +---- engine/common/cloud_auth_api/client.py | 14 +++++++++++--- engine/common/cloud_auth_api/tests/test_client.py | 12 ++++++++---- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/engine/apps/mobile_app/tests/test_mobile_app_gateway.py b/engine/apps/mobile_app/tests/test_mobile_app_gateway.py index 5c259bf1..a860a85a 100644 --- a/engine/apps/mobile_app/tests/test_mobile_app_gateway.py +++ b/engine/apps/mobile_app/tests/test_mobile_app_gateway.py @@ -344,8 +344,7 @@ def test_mobile_app_gateway_properly_generates_an_auth_token( assert auth_token == f"{stack_id}:{MOCK_AUTH_TOKEN}" mock_request_signed_token.assert_called_once_with( - organization_id, - stack_id, + organization, [CloudAuthApiClient.Scopes.INCIDENT_WRITE], { "iat": MOCK_TIMEZONE_NOW, diff --git a/engine/apps/mobile_app/views.py b/engine/apps/mobile_app/views.py index fcd728d7..46da4b5a 100644 --- a/engine/apps/mobile_app/views.py +++ b/engine/apps/mobile_app/views.py @@ -178,10 +178,7 @@ class MobileAppGatewayView(APIView): }[downstream_backend] org = user.organization - stack_id = org.stack_id - token = CloudAuthApiClient().request_signed_token(org.org_id, stack_id, token_scopes, token_claims) - - return f"{stack_id}:{token}" + return f"{org.stack_id}:{CloudAuthApiClient().request_signed_token(org, token_scopes, token_claims)}" @classmethod def _get_downstream_headers( diff --git a/engine/common/cloud_auth_api/client.py b/engine/common/cloud_auth_api/client.py index 21f760cc..193051a8 100644 --- a/engine/common/cloud_auth_api/client.py +++ b/engine/common/cloud_auth_api/client.py @@ -6,6 +6,9 @@ import requests from django.conf import settings from rest_framework import status +if typing.TYPE_CHECKING: + from apps.user_management.models import Organization + class CloudAuthApiException(Exception): """A generic 400 or 500 level exception from the Cloud Auth API""" @@ -35,15 +38,20 @@ class CloudAuthApiClient: self.api_token = settings.GRAFANA_CLOUD_AUTH_API_SYSTEM_TOKEN def request_signed_token( - self, org_id: int, stack_id: int, scopes: typing.List[Scopes], claims: typing.Dict[str, typing.Any] + self, org: "Organization", scopes: typing.List[Scopes], claims: typing.Dict[str, typing.Any] ) -> str: + org_id = org.org_id + stack_id = org.stack_id + headers = { "Authorization": f"Bearer {self.api_token}", - "X-Org-ID": org_id, + # need to cast to str otherwise - requests.exceptions.InvalidHeader: Header part ... from ('X-Org-ID', 5000) + # must be of type str or bytes, not + "X-Org-ID": str(org_id), "X-Realms": [ { "type": "stack", - "identifier": stack_id, + "identifier": str(stack_id), }, ], } diff --git a/engine/common/cloud_auth_api/tests/test_client.py b/engine/common/cloud_auth_api/tests/test_client.py index c52d6eb0..8c0a9bd1 100644 --- a/engine/common/cloud_auth_api/tests/test_client.py +++ b/engine/common/cloud_auth_api/tests/test_client.py @@ -16,13 +16,17 @@ def configure_cloud_auth_api_client(settings): @patch("common.cloud_auth_api.client.requests") +@pytest.mark.django_db @pytest.mark.parametrize("response_status_code", [status.HTTP_200_OK, status.HTTP_401_UNAUTHORIZED]) -def test_request_signed_token(mock_requests, response_status_code): +def test_request_signed_token(mock_requests, make_organization, response_status_code): mock_auth_token = ",mnasdlkjlakjoqwejroiqwejr" mock_response_text = "error message" org_id = 1 stack_id = 5 + + organization = make_organization(stack_id=stack_id, org_id=org_id) + scopes = ["incident:write", "foo:bar"] claims = {"vegetable": "carrot", "fruit": "apple"} @@ -42,7 +46,7 @@ def test_request_signed_token(mock_requests, response_status_code): mock_requests.post.return_value = MockResponse(response_status_code) def _make_request(): - return CloudAuthApiClient().request_signed_token(org_id, stack_id, scopes, claims) + return CloudAuthApiClient().request_signed_token(organization, scopes, claims) url = f"{GRAFANA_CLOUD_AUTH_API_URL}/v1/sign" @@ -61,11 +65,11 @@ def test_request_signed_token(mock_requests, response_status_code): url, headers={ "Authorization": f"Bearer {GRAFANA_CLOUD_AUTH_API_SYSTEM_TOKEN}", - "X-Org-ID": org_id, + "X-Org-ID": str(org_id), "X-Realms": [ { "type": "stack", - "identifier": stack_id, + "identifier": str(stack_id), }, ], }, From 4220199a86a05066752eeae4f2adc7337fefdb77 Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Thu, 25 Jan 2024 14:41:35 -0500 Subject: [PATCH 08/22] cast X-Realms header to jsonified string --- engine/common/cloud_auth_api/client.py | 16 ++++++++++------ .../common/cloud_auth_api/tests/test_client.py | 15 +++++++++------ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/engine/common/cloud_auth_api/client.py b/engine/common/cloud_auth_api/client.py index 193051a8..12b86961 100644 --- a/engine/common/cloud_auth_api/client.py +++ b/engine/common/cloud_auth_api/client.py @@ -1,4 +1,5 @@ import enum +import json import typing from urllib.parse import urljoin @@ -43,17 +44,20 @@ class CloudAuthApiClient: org_id = org.org_id stack_id = org.stack_id + # NOTE: header values must always be strings headers = { "Authorization": f"Bearer {self.api_token}", # need to cast to str otherwise - requests.exceptions.InvalidHeader: Header part ... from ('X-Org-ID', 5000) # must be of type str or bytes, not "X-Org-ID": str(org_id), - "X-Realms": [ - { - "type": "stack", - "identifier": str(stack_id), - }, - ], + "X-Realms": json.dumps( + [ + { + "type": "stack", + "identifier": str(stack_id), + }, + ] + ), } url = urljoin(self.api_base_url, "v1/sign") diff --git a/engine/common/cloud_auth_api/tests/test_client.py b/engine/common/cloud_auth_api/tests/test_client.py index 8c0a9bd1..2a10626c 100644 --- a/engine/common/cloud_auth_api/tests/test_client.py +++ b/engine/common/cloud_auth_api/tests/test_client.py @@ -1,3 +1,4 @@ +import json from unittest.mock import patch import pytest @@ -66,12 +67,14 @@ def test_request_signed_token(mock_requests, make_organization, response_status_ headers={ "Authorization": f"Bearer {GRAFANA_CLOUD_AUTH_API_SYSTEM_TOKEN}", "X-Org-ID": str(org_id), - "X-Realms": [ - { - "type": "stack", - "identifier": str(stack_id), - }, - ], + "X-Realms": json.dumps( + [ + { + "type": "stack", + "identifier": str(stack_id), + }, + ] + ), }, json={ "claims": claims, From baddc640925c818b16c9234493f6d5b3e00d403e Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Thu, 25 Jan 2024 15:14:08 -0500 Subject: [PATCH 09/22] remove iat and exp from auth api token claims --- .../tests/test_mobile_app_gateway.py | 6 ---- engine/apps/mobile_app/views.py | 32 ++++++------------- 2 files changed, 10 insertions(+), 28 deletions(-) diff --git a/engine/apps/mobile_app/tests/test_mobile_app_gateway.py b/engine/apps/mobile_app/tests/test_mobile_app_gateway.py index a860a85a..8dcd9766 100644 --- a/engine/apps/mobile_app/tests/test_mobile_app_gateway.py +++ b/engine/apps/mobile_app/tests/test_mobile_app_gateway.py @@ -4,7 +4,6 @@ from unittest.mock import patch import pytest import requests from django.urls import reverse -from django.utils import timezone from rest_framework import status from rest_framework.test import APIClient from rest_framework.views import APIView @@ -18,7 +17,6 @@ MOCK_DOWNSTREAM_INCIDENT_API_URL = "https://mockdownstreamincidentapi.com" MOCK_DOWNSTREAM_HEADERS = {"Authorization": "Bearer mock_auth_token"} MOCK_DOWNSTREAM_RESPONSE_DATA = {"foo": "bar"} -MOCK_TIMEZONE_NOW = timezone.datetime(2021, 1, 1, 3, 4, 5, tzinfo=timezone.utc) MOCK_AUTH_TOKEN = "mncn,zxcnv,mznxcv" @@ -321,9 +319,7 @@ def test_mobile_app_gateway_proxies_headers( @pytest.mark.django_db @patch("apps.mobile_app.views.CloudAuthApiClient.request_signed_token", return_value=MOCK_AUTH_TOKEN) -@patch("apps.mobile_app.views.timezone.now", return_value=MOCK_TIMEZONE_NOW) def test_mobile_app_gateway_properly_generates_an_auth_token( - _mock_timezone_now, mock_request_signed_token, make_organization, make_user_for_organization, @@ -347,8 +343,6 @@ def test_mobile_app_gateway_properly_generates_an_auth_token( organization, [CloudAuthApiClient.Scopes.INCIDENT_WRITE], { - "iat": MOCK_TIMEZONE_NOW, - "exp": MOCK_TIMEZONE_NOW + timezone.timedelta(minutes=1), "user_id": user.user_id, # grafana user ID "user_email": user.email, "stack_id": organization.stack_id, diff --git a/engine/apps/mobile_app/views.py b/engine/apps/mobile_app/views.py index 46da4b5a..0e5b172d 100644 --- a/engine/apps/mobile_app/views.py +++ b/engine/apps/mobile_app/views.py @@ -5,7 +5,6 @@ import typing import requests from django.conf import settings from django.core.exceptions import ObjectDoesNotExist -from django.utils import timezone from fcm_django.api.rest_framework import FCMDeviceAuthorizedViewSet as BaseFCMDeviceAuthorizedViewSet from rest_framework import mixins, status, viewsets from rest_framework.exceptions import NotFound, ParseError @@ -147,37 +146,26 @@ class MobileAppGatewayView(APIView): raise NotFound super().initial(request, *args, **kwargs) - @classmethod - def _construct_token_claims(cls, user: "User") -> typing.Dict[str, typing.Any]: - organization = user.organization - now = timezone.now() - - return { - # TODO: do we need to specify iat and exp? - # registered claim names - "iat": now, - "exp": now + timezone.timedelta(minutes=1), # jwt is short lived. expires in 1 minute. - # custom data - "user_id": user.user_id, # grafana user ID - "user_email": user.email, - "stack_id": organization.stack_id, - "organization_id": organization.org_id, # grafana org ID - "stack_slug": organization.stack_slug, - "org_slug": organization.org_slug, - } - @classmethod def _get_auth_token(cls, downstream_backend: SupportedDownstreamBackends, user: "User") -> str: """ RS256 = asymmetric = public/private key pair HS256 = symmetric = shared secret (don't use this) """ - token_claims = cls._construct_token_claims(user) + org = user.organization + token_claims = { + "user_id": user.user_id, # grafana user ID + "user_email": user.email, + "stack_id": org.stack_id, + "organization_id": org.org_id, # grafana org ID + "stack_slug": org.stack_slug, + "org_slug": org.org_slug, + } + token_scopes = { cls.SupportedDownstreamBackends.INCIDENT: [CloudAuthApiClient.Scopes.INCIDENT_WRITE], }[downstream_backend] - org = user.organization return f"{org.stack_id}:{CloudAuthApiClient().request_signed_token(org, token_scopes, token_claims)}" @classmethod From 19686a9cd65aa2725165f5e6c32f8caf6246d70e Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Fri, 26 Jan 2024 10:48:35 -0500 Subject: [PATCH 10/22] add some more logging to mobile app proxy --- engine/apps/mobile_app/views.py | 5 +++++ engine/common/cloud_auth_api/client.py | 13 +++++++++++++ 2 files changed, 18 insertions(+) diff --git a/engine/apps/mobile_app/views.py b/engine/apps/mobile_app/views.py index 0e5b172d..18f53032 100644 --- a/engine/apps/mobile_app/views.py +++ b/engine/apps/mobile_app/views.py @@ -206,6 +206,10 @@ class MobileAppGatewayView(APIView): raise NotFound(f"Downstream backend {downstream_backend} not supported") downstream_url = self._get_downstream_url(user.organization, downstream_backend, downstream_path) + + log_msg_common = f"{downstream_backend} request to {method} {downstream_url}" + logger.info(f"Proxying {log_msg_common}") + downstream_request_handler = getattr(requests, method.lower()) try: @@ -216,6 +220,7 @@ class MobileAppGatewayView(APIView): headers=self._get_downstream_headers(request, downstream_backend, user), ) + logger.info(f"Successfully proxied {log_msg_common}") return Response(status=downstream_response.status_code, data=downstream_response.json()) except ( requests.exceptions.RequestException, diff --git a/engine/common/cloud_auth_api/client.py b/engine/common/cloud_auth_api/client.py index 12b86961..8cfc3598 100644 --- a/engine/common/cloud_auth_api/client.py +++ b/engine/common/cloud_auth_api/client.py @@ -1,5 +1,6 @@ import enum import json +import logging import typing from urllib.parse import urljoin @@ -11,6 +12,9 @@ if typing.TYPE_CHECKING: from apps.user_management.models import Organization +logger = logging.getLogger(__name__) + + class CloudAuthApiException(Exception): """A generic 400 or 500 level exception from the Cloud Auth API""" @@ -61,6 +65,9 @@ class CloudAuthApiClient: } url = urljoin(self.api_base_url, "v1/sign") + common_log_msg = f"org_id={org_id} stack_id={stack_id} url={url}" + logger.info(f"Requesting signed token from Cloud Auth API {common_log_msg}") + response = requests.post( url, headers=headers, @@ -74,5 +81,11 @@ class CloudAuthApiClient: ) if response.status_code != status.HTTP_200_OK: + logger.warning( + f"Got non-HTTP 200 when attempting to request signed token from Cloud Auth API {common_log_msg} " + f"status_code={response.status_code} response={response.text}" + ) raise CloudAuthApiException(response.status_code, url, response.text, method="POST") + + logger.info(f"Successfully requested signed token from Cloud Auth API {common_log_msg}") return response.json()["data"]["token"] From c5917f4d32290c3d26697f2025e3670aed4ef14a Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Fri, 26 Jan 2024 14:38:16 -0500 Subject: [PATCH 11/22] pass str(org_id) to cloud auth api --- engine/common/cloud_auth_api/client.py | 9 +++++---- engine/common/cloud_auth_api/tests/test_client.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/engine/common/cloud_auth_api/client.py b/engine/common/cloud_auth_api/client.py index 8cfc3598..0694944a 100644 --- a/engine/common/cloud_auth_api/client.py +++ b/engine/common/cloud_auth_api/client.py @@ -45,20 +45,21 @@ class CloudAuthApiClient: def request_signed_token( self, org: "Organization", scopes: typing.List[Scopes], claims: typing.Dict[str, typing.Any] ) -> str: - org_id = org.org_id - stack_id = org.stack_id + # The Cloud Auth API expects the org_id and stack_id to be strings + org_id = str(org.org_id) + stack_id = str(org.stack_id) # NOTE: header values must always be strings headers = { "Authorization": f"Bearer {self.api_token}", # need to cast to str otherwise - requests.exceptions.InvalidHeader: Header part ... from ('X-Org-ID', 5000) # must be of type str or bytes, not - "X-Org-ID": str(org_id), + "X-Org-ID": org_id, "X-Realms": json.dumps( [ { "type": "stack", - "identifier": str(stack_id), + "identifier": stack_id, }, ] ), diff --git a/engine/common/cloud_auth_api/tests/test_client.py b/engine/common/cloud_auth_api/tests/test_client.py index 2a10626c..4ef028f4 100644 --- a/engine/common/cloud_auth_api/tests/test_client.py +++ b/engine/common/cloud_auth_api/tests/test_client.py @@ -80,7 +80,7 @@ def test_request_signed_token(mock_requests, make_organization, response_status_ "claims": claims, "extra": { "scopes": scopes, - "org_id": org_id, + "org_id": str(org_id), }, }, ) From bb7ce3d1337aee5e96181c3a17a108744de7e805 Mon Sep 17 00:00:00 2001 From: Maxim Mordasov Date: Mon, 29 Jan 2024 12:23:21 +0300 Subject: [PATCH 12/22] Fix dynamic labels & multi-label extraction labels (#3753) # What this PR does Fix dynamic labels & multi-label extraction labels ## Which issue(s) this PR fixes https://github.com/grafana/oncall/issues/3750 ## Checklist - [ ] Unit, integration, and e2e (if applicable) tests updated - [ ] Documentation added (or `pr:no public docs` PR label added if not required) - [ ] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --- CHANGELOG.md | 1 + .../IntegrationLabelsForm/IntegrationLabelsForm.tsx | 9 +++++---- .../OutgoingWebhookForm/OutgoingWebhookForm.tsx | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34c4efd2..e981536f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Fixed too frequent retry of `perform_notification` task on Telegram ratelimit error by @Ferril ([#3744](https://github.com/grafana/oncall/pull/3744)) +- Dynamic labels & multi-label extraction label are broken ([#3750](https://github.com/grafana/oncall/issues/3750)) - Add check whether organization has Slack connection on update Slack related field using public api endpoints by @Ferril ([#3751](https://github.com/grafana/oncall/pull/3751)) diff --git a/grafana-plugin/src/containers/IntegrationLabelsForm/IntegrationLabelsForm.tsx b/grafana-plugin/src/containers/IntegrationLabelsForm/IntegrationLabelsForm.tsx index 73f54d1e..ac53937d 100644 --- a/grafana-plugin/src/containers/IntegrationLabelsForm/IntegrationLabelsForm.tsx +++ b/grafana-plugin/src/containers/IntegrationLabelsForm/IntegrationLabelsForm.tsx @@ -206,9 +206,10 @@ const IntegrationLabelsForm = observer((props: IntegrationLabelsFormProps) => { templates={templates} templateBody={alertGroupLabels.custom[customLabelIndexToShowTemplateEditor].value.name} onHide={() => setCustomLabelIndexToShowTemplateEditor(undefined)} - onUpdateTemplates={({ alert_group_labels }) => { + onUpdateTemplates={(templates) => { const newCustom = [...alertGroupLabels.custom]; - newCustom[customLabelIndexToShowTemplateEditor].value.name = alert_group_labels; + newCustom[customLabelIndexToShowTemplateEditor].value.name = + templates[LabelTemplateOptions.AlertGroupDynamicLabel.key]; setAlertGroupLabels({ ...alertGroupLabels, @@ -229,10 +230,10 @@ const IntegrationLabelsForm = observer((props: IntegrationLabelsFormProps) => { templates={templates} templateBody={alertGroupLabels.template} onHide={() => setShowTemplateEditor(false)} - onUpdateTemplates={({ alert_group_labels }) => { + onUpdateTemplates={(templates) => { setAlertGroupLabels({ ...alertGroupLabels, - template: alert_group_labels, + template: templates[LabelTemplateOptions.AlertGroupMultiLabel.key], }); setShowTemplateEditor(undefined); diff --git a/grafana-plugin/src/containers/OutgoingWebhookForm/OutgoingWebhookForm.tsx b/grafana-plugin/src/containers/OutgoingWebhookForm/OutgoingWebhookForm.tsx index c2714029..f3fb474a 100644 --- a/grafana-plugin/src/containers/OutgoingWebhookForm/OutgoingWebhookForm.tsx +++ b/grafana-plugin/src/containers/OutgoingWebhookForm/OutgoingWebhookForm.tsx @@ -177,6 +177,7 @@ const OutgoingWebhookForm = observer((props: OutgoingWebhookFormProps) => { preset: selectedPreset?.id, trigger_type: null, http_method: 'POST', + forward_all: true, }; } else if (isNewOrCopy) { data = { ...outgoingWebhookStore.items[id], is_legacy: false, name: '' }; From 29dadcc07fca347f6eb1f88dd43b807cbebe40be Mon Sep 17 00:00:00 2001 From: Maxim Mordasov Date: Mon, 29 Jan 2024 17:15:56 +0300 Subject: [PATCH 13/22] Show warning when edit AM-based resolve templates (#3764) # What this PR does show autoresolve template for ALL integrations when autoresolve is ON show modal on edit button click for alertmanager based integrations Screenshot 2024-01-29 at 13 37 08 ## Which issue(s) this PR fixes Frontend part of https://github.com/grafana/oncall-private/issues/2260 ## Checklist - [ ] Unit, integration, and e2e (if applicable) tests updated - [ ] Documentation added (or `pr:no public docs` PR label added if not required) - [ ] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --------- Co-authored-by: Vadim Stepanov --- CHANGELOG.md | 1 + .../Integrations/IntegrationTemplateBlock.tsx | 27 +++++++--- .../components/WithConfirm/WithConfirm.tsx | 18 +++++-- .../IntegrationTemplatesList.tsx | 52 ++++++------------- 4 files changed, 52 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e981536f..bdf2b257 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Improved logging during plugin sync and install with Grafana @mderynck ([#3730](https://github.com/grafana/oncall/pull/3730)) +- Add a modal for autoresolve and grouping templates for Alertmanager-based integrations ([#3764](https://github.com/grafana/oncall/pull/3764)) ### Fixed diff --git a/grafana-plugin/src/components/Integrations/IntegrationTemplateBlock.tsx b/grafana-plugin/src/components/Integrations/IntegrationTemplateBlock.tsx index 889eb9d2..ff77ac4f 100644 --- a/grafana-plugin/src/components/Integrations/IntegrationTemplateBlock.tsx +++ b/grafana-plugin/src/components/Integrations/IntegrationTemplateBlock.tsx @@ -1,8 +1,9 @@ import React from 'react'; -import { Button, InlineLabel, LoadingPlaceholder, Tooltip } from '@grafana/ui'; +import { Button, InlineLabel, LoadingPlaceholder } from '@grafana/ui'; import cn from 'classnames/bind'; +import WithConfirm from 'components/WithConfirm/WithConfirm'; import { WithPermissionControlTooltip } from 'containers/WithPermissionControl/WithPermissionControlTooltip'; import { UserActions } from 'utils/authorization'; @@ -17,6 +18,7 @@ interface IntegrationTemplateBlockProps { renderInput: () => React.ReactNode; showHelp?: boolean; isLoading?: boolean; + warningOnEdit?: string; onEdit: (templateName) => void; onRemove?: () => void; @@ -31,6 +33,7 @@ const IntegrationTemplateBlock: React.FC = ({ onEdit, onRemove, isLoading, + warningOnEdit, }) => { let tooltip = labelTooltip; let inlineLabelProps = { tooltip }; @@ -48,14 +51,24 @@ const IntegrationTemplateBlock: React.FC = ({ {isTemplateEditable && ( <> - -