From 9c550af7218e7ca8f2e7bbd871adde41326cdc57 Mon Sep 17 00:00:00 2001 From: Innokentii Konstantinov Date: Tue, 8 Nov 2022 14:43:22 +0800 Subject: [PATCH] Support of oncall-gw (#741) * Draft support of oncall-gw * Clean up * Create oncall connector on org create in gcom * Naming fixes * Rework oncall-gateway package. \nMove it from apps. * Fix typo --- engine/apps/slack/views.py | 3 + engine/apps/social_auth/pipeline.py | 12 +- .../user_management/models/organization.py | 20 +++ engine/common/oncall_gateway/__init__.py | 6 + .../oncall_gateway/oncall_gateway_client.py | 148 ++++++++++++++++++ engine/common/oncall_gateway/tasks.py | 96 ++++++++++++ engine/common/oncall_gateway/utils.py | 40 +++++ engine/settings/base.py | 6 + 8 files changed, 328 insertions(+), 3 deletions(-) create mode 100644 engine/common/oncall_gateway/__init__.py create mode 100644 engine/common/oncall_gateway/oncall_gateway_client.py create mode 100644 engine/common/oncall_gateway/tasks.py create mode 100644 engine/common/oncall_gateway/utils.py diff --git a/engine/apps/slack/views.py b/engine/apps/slack/views.py index 8d4ca3c1..3f312cb2 100644 --- a/engine/apps/slack/views.py +++ b/engine/apps/slack/views.py @@ -52,6 +52,7 @@ from apps.slack.slack_client import SlackClientWithErrorHandling from apps.slack.slack_client.exceptions import SlackAPIException, SlackAPITokenException from apps.slack.tasks import clean_slack_integration_leftovers, unpopulate_slack_user_identities from common.insight_log import ChatOpsEvent, ChatOpsType, write_chatops_insight_log +from common.oncall_gateway import delete_slack_connector_async from .models import SlackActionRecord, SlackMessage, SlackTeamIdentity, SlackUserIdentity @@ -537,6 +538,8 @@ class ResetSlackView(APIView): slack_team_identity = organization.slack_team_identity if slack_team_identity is not None: clean_slack_integration_leftovers.apply_async((organization.pk,)) + if settings.FEATURE_MULTIREGION_ENABLED: + delete_slack_connector_async.apply_async((slack_team_identity.slack_id,)) write_chatops_insight_log( author=request.user, event_name=ChatOpsEvent.WORKSPACE_DISCONNECTED, diff --git a/engine/apps/social_auth/pipeline.py b/engine/apps/social_auth/pipeline.py index a271d056..4aaf9339 100644 --- a/engine/apps/social_auth/pipeline.py +++ b/engine/apps/social_auth/pipeline.py @@ -2,7 +2,8 @@ import logging from urllib.parse import urljoin from django.apps import apps -from django.http import HttpResponse +from django.conf import settings +from django.http import HttpResponse, JsonResponse from rest_framework import status from social_core.exceptions import AuthForbidden @@ -13,6 +14,7 @@ from common.constants.slack_auth import ( SLACK_AUTH_WRONG_WORKSPACE_ERROR, ) from common.insight_log import ChatOpsEvent, ChatOpsType, write_chatops_insight_log +from common.oncall_gateway import check_slack_installation_backend, create_slack_connector logger = logging.getLogger(__name__) @@ -93,13 +95,17 @@ def populate_slack_identities(response, backend, user, organization, **kwargs): return HttpResponse(status=status.HTTP_400_BAD_REQUEST) slack_team_id = response["team"]["id"] + if settings.FEATURE_MULTIREGION_ENABLED and not check_slack_installation_backend( + slack_team_id, settings.ONCALL_BACKEND_REGION + ): + return JsonResponse(status=status.HTTP_400_BAD_REQUEST, json={"detail": "error about regions"}) slack_team_identity, is_slack_team_identity_created = SlackTeamIdentity.objects.get_or_create( slack_id=slack_team_id, ) - # update slack oauth fields by data from response slack_team_identity.update_oauth_fields(user, organization, response) - + if settings.FEATURE_MULTIREGION_ENABLED: + create_slack_connector(slack_team_id, settings.ONCALL_BACKEND_REGION) populate_slack_channels_for_team.apply_async((slack_team_identity.pk,)) user.slack_user_identity.update_profile_info() # todo slack: do we need update info for all existing slack users in slack team? diff --git a/engine/apps/user_management/models/organization.py b/engine/apps/user_management/models/organization.py index 2561e3cb..3f35a22a 100644 --- a/engine/apps/user_management/models/organization.py +++ b/engine/apps/user_management/models/organization.py @@ -12,6 +12,7 @@ from apps.alerts.tasks import disable_maintenance from apps.slack.utils import post_message_to_channel from apps.user_management.subscription_strategy import FreePublicBetaSubscriptionStrategy from common.insight_log import ChatOpsEvent, ChatOpsType, write_chatops_insight_log +from common.oncall_gateway import create_oncall_connector, delete_oncall_connector_async from common.public_primary_keys import generate_public_primary_key, increase_public_primary_key_length logger = logging.getLogger(__name__) @@ -31,7 +32,26 @@ def generate_public_primary_key_for_organization(): return new_public_primary_key +class OrganizationQuerySet(models.QuerySet): + def create(self, **kwargs): + instance = super().create(**kwargs) + if settings.FEATURE_MULTIREGION_ENABLED: + create_oncall_connector(instance.public_primary_key, settings.ONCALL_BACKEND_REGION) + return instance + + def delete(self): + org_id = self.public_primary_key + super().delete(self) + if settings.FEATURE_MULTIREGION_ENABLED: + delete_oncall_connector_async.apply_async( + (org_id), + ) + + class Organization(MaintainableObject): + + objects = OrganizationQuerySet.as_manager() + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.subscription_strategy = self._get_subscription_strategy() diff --git a/engine/common/oncall_gateway/__init__.py b/engine/common/oncall_gateway/__init__.py new file mode 100644 index 00000000..6b3e6523 --- /dev/null +++ b/engine/common/oncall_gateway/__init__.py @@ -0,0 +1,6 @@ +""" +This package is for interaction with OnCall-Gateway, service to provide multiregional chatops. +""" + +from .tasks import delete_oncall_connector_async, delete_slack_connector_async # noqa: F401 +from .utils import check_slack_installation_backend, create_oncall_connector, create_slack_connector # noqa: F401 diff --git a/engine/common/oncall_gateway/oncall_gateway_client.py b/engine/common/oncall_gateway/oncall_gateway_client.py new file mode 100644 index 00000000..7682c9ce --- /dev/null +++ b/engine/common/oncall_gateway/oncall_gateway_client.py @@ -0,0 +1,148 @@ +import json +from dataclasses import dataclass +from urllib.parse import urljoin + +import requests +from django.conf import settings + + +@dataclass +class OnCallConnector: + """ + OnCallConnector represents connection between oncall org and oncall-gateway + """ + + oncall_org_id: str + backend: str + + +@dataclass +class SlackConnector: + """ + SlackConnector represents connection between slack team with installed oncall app and oncall-gateway + """ + + slack_id: str + backend: str + + +DEFAULT_TIMEOUT = 5 + + +class OnCallGatewayAPIClient: + def __init__(self, url: str, token: str): + self.base_url = url + self.api_base_url = urljoin(self.base_url, "api/v1/") + self.api_token = token + + # OnCall Connector + @property + def _oncall_connectors_url(self) -> str: + return urljoin(self.api_base_url, "oncall_org_connectors") + + def post_oncall_connector( + self, oncall_org_id: str, backend: str + ) -> tuple[OnCallConnector, requests.models.Response]: + d = {"oncall_org_id": oncall_org_id, "backend": backend} + response = self._post(url=self._oncall_connectors_url, json=d) + response_data = response.json() + + return OnCallConnector(oncall_org_id=response_data["oncall_org_id"], backend=response_data["backend"]), response + + def delete_oncall_connector(self, oncall_org_id: str) -> requests.models.Response: + url = urljoin(f"{self._oncall_connectors_url}/", oncall_org_id) + response = self._delete(url=url) + response_data = response.json() + return ( + OnCallConnector( + response_data["oncall_org_id"], + response_data["backend"], + ), + response, + ) + + # Slack Connector + @property + def _slack_connectors_url(self) -> str: + return urljoin(self.api_base_url, "slack_team_connectors") + + def post_slack_connector(self, slack_id: str, backend: str) -> tuple[SlackConnector, requests.models.Response]: + d = {"slack_id": slack_id, "backend": backend} + response = self._post(url=self._slack_connectors_url, json=d) + response_data = response.json() + return ( + OnCallConnector( + response_data["oncall_org_id"], + response_data["backend"], + ), + response, + ) + + def get_slack_connector(self, slack_id: str) -> tuple[SlackConnector, requests.models.Response]: + url = urljoin(f"{self._slack_connectors_url}/", slack_id) + response = self._get(url=url) + response_data = response.json() + return ( + SlackConnector( + response_data["slack_id"], + response_data["backend"], + ), + response, + ) + + def delete_slack_connector(self, slack_id: str) -> requests.models.Response: + url = urljoin(f"{self._slack_connectors_url}/", slack_id) + response = self._delete(url=url) + return response + + def _get(self, url, params=None, **kwargs) -> requests.models.Response: + kwargs["params"] = params + response = self._call_api(method=requests.get, url=url, **kwargs) + return response + + def _post(self, url, data=None, json=None, **kwargs) -> requests.models.Response: + kwargs["data"] = data + kwargs["json"] = json + response = self._call_api(method=requests.post, url=url, **kwargs) + return response + + def _delete(self, url, **kwargs) -> requests.models.Response: + response = self._call_api(method=requests.delete, url=url, **kwargs) + return response + + def _call_api(self, method, url, **kwargs) -> requests.models.Response: + kwargs["headers"] = self._headers | kwargs.get("headers", {}) + response = method(url, **kwargs) + self._check_response(response) + return response + + @property + def _headers(self) -> dict: + return { + "User-Agent": settings.GRAFANA_COM_USER_AGENT, + "Authorization": f"Bearer {self.api_token}", + "Content-Type": "application/json", + } + + @classmethod + def _check_response(cls, response: requests.models.Response): + if response.status_code not in [200, 201, 202, 204]: + err_msg = cls._get_error_msg_from_response(response) + if 400 <= response.status_code < 500: + print(1) + err_msg = "%s Client Error: %s for url: %s" % (response.status_code, err_msg, response.url) + + elif 500 <= response.status_code < 600: + print(2) + err_msg = "%s Server Error: %s for url: %s" % (response.status_code, err_msg, response.url) + print(err_msg) + raise requests.exceptions.HTTPError(err_msg, response=response) + + @classmethod + def _get_error_msg_from_response(cls, response: requests.models.Response) -> str: + error_msg = "" + try: + error_msg = response.json()["message"] + except (json.JSONDecodeError, KeyError): + error_msg = response.text if response.text else response.reason + return error_msg diff --git a/engine/common/oncall_gateway/tasks.py b/engine/common/oncall_gateway/tasks.py new file mode 100644 index 00000000..ecc15841 --- /dev/null +++ b/engine/common/oncall_gateway/tasks.py @@ -0,0 +1,96 @@ +import requests +from celery.utils.log import get_task_logger +from django.conf import settings + +from common.custom_celery_tasks import shared_dedicated_queue_retry_task + +from .oncall_gateway_client import OnCallGatewayAPIClient + +task_logger = get_task_logger(__name__) + + +@shared_dedicated_queue_retry_task( + autoretry_for=(Exception,), + retry_backoff=True, + max_retries=None, +) +def create_oncall_connector_async(oncall_org_id, backend): + client = OnCallGatewayAPIClient(settings.ONCALL_GATEWAY_URL, settings.ONCALL_GATEWAY_API_TOKEN) + try: + client.post_oncall_connector(oncall_org_id, backend) + except requests.exceptions.HTTPError as http_exc: + # TODO: decide which http codes to retry + print(http_exc.response) + if http_exc.response.status_code == 409: + task_logger.error( + f"Failed to create OnCallConnector oncall_org_id={oncall_org_id} backend={backend} exc={http_exc}" + ) + else: + raise http_exc + except Exception as e: + task_logger.error(f"Failed to create OnCallConnector oncall_org_id={oncall_org_id} backend={backend} exc={e}") + raise e + + +@shared_dedicated_queue_retry_task( + autoretry_for=(Exception,), + retry_backoff=True, + max_retries=None, +) +def delete_oncall_connector_async(oncall_org_id): + client = OnCallGatewayAPIClient(settings.ONCALL_GATEWAY_URL, settings.ONCALL_GATEWAY_API_TOKEN) + try: + client.delete_slack_connector(oncall_org_id) + except requests.exceptions.HTTPError as http_exc: + if http_exc.response.status_code == 404: + # 404 indicates than resourse was deleted already + return + else: + task_logger.error(f"Failed to delete OnCallConnector oncall_org_id={oncall_org_id} exc={http_exc}") + raise http_exc + except Exception as e: + task_logger.error(f"Failed to delete OnCallConnector oncall_org_id={oncall_org_id} exc={e}") + raise e + + +@shared_dedicated_queue_retry_task( + autoretry_for=(Exception,), + retry_backoff=True, + max_retries=None, +) +def create_slack_connector_async(slack_id, backend): + client = OnCallGatewayAPIClient(settings.ONCALL_GATEWAY_URL, settings.ONCALL_GATEWAY_API_TOKEN) + try: + client.post_slack_connector(slack_id, backend) + except requests.exceptions.HTTPError as http_exc: + # TODO: decide which http codes to retry + if http_exc.response.status_code == 409: + task_logger.error( + f"Failed to create SlackConnector oncall_org_id={slack_id} backend={backend} exc={http_exc}" + ) + else: + raise http_exc + except Exception as e: + task_logger.error(f"Failed to create SlackConnector slack_id={slack_id} backend={backend} exc={e}") + raise e + + +@shared_dedicated_queue_retry_task( + autoretry_for=(Exception,), + retry_backoff=True, + max_retries=None, +) +def delete_slack_connector_async(slack_id): + client = OnCallGatewayAPIClient(settings.ONCALL_GATEWAY_URL, settings.ONCALL_GATEWAY_API_TOKEN) + try: + client.delete_slack_connector(slack_id) + except requests.exceptions.HTTPError as http_exc: + if http_exc.response.status_code == 404: + # 404 indicates than resourse was deleted already + return + else: + task_logger.error(f"Failed to delete OnCallConnector slack_id={slack_id} exc={http_exc}") + raise http_exc + except Exception as e: + task_logger.error(f"Failed to delete OnCallConnector slack_id={slack_id} exc={e}") + raise e diff --git a/engine/common/oncall_gateway/utils.py b/engine/common/oncall_gateway/utils.py new file mode 100644 index 00000000..28802e15 --- /dev/null +++ b/engine/common/oncall_gateway/utils.py @@ -0,0 +1,40 @@ +import logging + +import requests +from django.conf import settings + +from .oncall_gateway_client import OnCallGatewayAPIClient +from .tasks import create_oncall_connector_async, create_slack_connector_async + +logger = logging.getLogger(__name__) + + +def create_oncall_connector(oncall_org_id: str, backend: str): + client = OnCallGatewayAPIClient(settings.ONCALL_GATEWAY_URL, settings.ONCALL_GATEWAY_API_TOKEN) + try: + client.post_oncall_connector(oncall_org_id, backend) + except Exception as e: + logger.error(f"Failed to create_oncall_connector oncall_org_id={oncall_org_id} backend={backend} exc={e}") + create_oncall_connector_async.apply_async((oncall_org_id, backend), countdown=2) + + +def check_slack_installation_backend(slack_id: str, backend: str) -> bool: + client = OnCallGatewayAPIClient(settings.ONCALL_GATEWAY_URL, settings.ONCALL_GATEWAY_API_TOKEN) + try: + slack_connector, _ = client.get_slack_connector(slack_id) + if slack_connector.backend == backend: + return True + else: + return False + except requests.exceptions.HTTPError as http_exc: + if http_exc.response.status_code == 404: + return True + + +def create_slack_connector(slack_id: str, backend: str): + client = OnCallGatewayAPIClient(settings.ONCALL_GATEWAY_URL, settings.ONCALL_GATEWAY_API_TOKEN) + try: + client.post_slack_connector(slack_id, backend) + except Exception as e: + logger.error(f"Failed to create_oncall_connector slack_id={slack_id} backend={backend} exc={e}") + create_slack_connector_async.apply_async((slack_id, backend), countdown=2) diff --git a/engine/settings/base.py b/engine/settings/base.py index b673f779..2164ea80 100644 --- a/engine/settings/base.py +++ b/engine/settings/base.py @@ -53,6 +53,7 @@ FEATURE_TELEGRAM_INTEGRATION_ENABLED = getenv_boolean("FEATURE_TELEGRAM_INTEGRAT FEATURE_EMAIL_INTEGRATION_ENABLED = getenv_boolean("FEATURE_EMAIL_INTEGRATION_ENABLED", default=True) FEATURE_SLACK_INTEGRATION_ENABLED = getenv_boolean("FEATURE_SLACK_INTEGRATION_ENABLED", default=True) FEATURE_WEB_SCHEDULES_ENABLED = getenv_boolean("FEATURE_WEB_SCHEDULES_ENABLED", default=False) +FEATURE_MULTIREGION_ENABLED = getenv_boolean("FEATURE_MULTIREGION_ENABLED", default=False) GRAFANA_CLOUD_ONCALL_HEARTBEAT_ENABLED = getenv_boolean("GRAFANA_CLOUD_ONCALL_HEARTBEAT_ENABLED", default=True) GRAFANA_CLOUD_NOTIFICATIONS_ENABLED = getenv_boolean("GRAFANA_CLOUD_NOTIFICATIONS_ENABLED", default=True) @@ -73,6 +74,11 @@ GRAFANA_CLOUD_ONCALL_TOKEN = os.environ.get("GRAFANA_CLOUD_ONCALL_TOKEN", None) # Outgoing webhook settings DANGEROUS_WEBHOOKS_ENABLED = getenv_boolean("DANGEROUS_WEBHOOKS_ENABLED", default=False) +# Multiregion settings +ONCALL_GATEWAY_URL = os.environ.get("ONCALL_GATEWAY_URL") +ONCALL_GATEWAY_API_TOKEN = os.environ.get("ONCALL_GATEWAY_API_TOKEN") +ONCALL_BACKEND_REGION = os.environ.get("ONCALL_BACKEND_REGION") + # Database class DatabaseTypes: