Merge pull request #1641 from grafana/dev

Dev to Main
This commit is contained in:
Innokentii Konstantinov 2023-03-28 14:27:17 +08:00 committed by GitHub
commit a32d46482e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 153 additions and 73 deletions

View file

@ -54,7 +54,7 @@ from apps.slack.slack_client import SlackClientWithErrorHandling
from apps.slack.slack_client.exceptions import SlackAPIException, SlackAPITokenException
from apps.slack.tasks import clean_slack_integration_leftovers, unpopulate_slack_user_identities
from common.insight_log import ChatOpsEvent, ChatOpsType, write_chatops_insight_log
from common.oncall_gateway import delete_slack_connector_async
from common.oncall_gateway import delete_slack_connector
from .models import SlackMessage, SlackTeamIdentity, SlackUserIdentity
@ -551,7 +551,7 @@ class ResetSlackView(APIView):
if slack_team_identity is not None:
clean_slack_integration_leftovers.apply_async((organization.pk,))
if settings.FEATURE_MULTIREGION_ENABLED:
delete_slack_connector_async.apply_async((slack_team_identity.slack_id,))
delete_slack_connector(str(organization.uuid))
write_chatops_insight_log(
author=request.user,
event_name=ChatOpsEvent.WORKSPACE_DISCONNECTED,

View file

@ -0,0 +1,2 @@
class InstallMultiRegionSlackException(Exception):
pass

View file

@ -7,6 +7,7 @@ from social_core import exceptions
from social_django.middleware import SocialAuthExceptionMiddleware
from apps.social_auth.backends import LoginSlackOAuth2V2
from apps.social_auth.exceptions import InstallMultiRegionSlackException
from common.constants.slack_auth import REDIRECT_AFTER_SLACK_INSTALL, SLACK_AUTH_FAILED
@ -30,3 +31,9 @@ class SocialAuthAuthCanceledExceptionMiddleware(SocialAuthExceptionMiddleware):
return redirect(url_to_redirect)
elif isinstance(exception, KeyError) and REDIRECT_AFTER_SLACK_INSTALL in exception.args:
return HttpResponse(status=status.HTTP_401_UNAUTHORIZED)
elif isinstance(exception, InstallMultiRegionSlackException):
REGION_ERROR = "region_error"
url_to_redirect = urljoin(
request.user.organization.grafana_url, f"{redirect_to}?tab=Slack&slack_error={REGION_ERROR}"
)
return redirect(url_to_redirect)

View file

@ -3,18 +3,19 @@ from urllib.parse import urljoin
from django.apps import apps
from django.conf import settings
from django.http import HttpResponse, JsonResponse
from django.http import HttpResponse
from rest_framework import status
from social_core.exceptions import AuthForbidden
from apps.slack.tasks import populate_slack_channels_for_team, populate_slack_usergroups_for_team
from apps.social_auth.exceptions import InstallMultiRegionSlackException
from common.constants.slack_auth import (
REDIRECT_AFTER_SLACK_INSTALL,
SLACK_AUTH_SLACK_USER_ALREADY_CONNECTED_ERROR,
SLACK_AUTH_WRONG_WORKSPACE_ERROR,
)
from common.insight_log import ChatOpsEvent, ChatOpsType, write_chatops_insight_log
from common.oncall_gateway import check_slack_installation_backend, create_slack_connector
from common.oncall_gateway import check_slack_installation_possible, create_slack_connector
logger = logging.getLogger(__name__)
@ -91,21 +92,22 @@ def populate_slack_identities(response, backend, user, organization, **kwargs):
return
if organization.slack_team_identity is not None:
# means that organization already has slack integration
# means that organization already has Slack integration
return HttpResponse(status=status.HTTP_400_BAD_REQUEST)
slack_team_id = response["team"]["id"]
if settings.FEATURE_MULTIREGION_ENABLED and not check_slack_installation_backend(
slack_team_id, settings.ONCALL_BACKEND_REGION
if settings.FEATURE_MULTIREGION_ENABLED and not check_slack_installation_possible(
str(organization.uuid), slack_team_id, settings.ONCALL_BACKEND_REGION
):
return JsonResponse(status=status.HTTP_400_BAD_REQUEST, json={"detail": "error about regions"})
raise InstallMultiRegionSlackException
slack_team_identity, is_slack_team_identity_created = SlackTeamIdentity.objects.get_or_create(
slack_id=slack_team_id,
)
# update slack oauth fields by data from response
slack_team_identity.update_oauth_fields(user, organization, response)
if settings.FEATURE_MULTIREGION_ENABLED:
create_slack_connector(slack_team_id, settings.ONCALL_BACKEND_REGION)
create_slack_connector(str(organization.uuid), slack_team_id, settings.ONCALL_BACKEND_REGION)
populate_slack_channels_for_team.apply_async((slack_team_identity.pk,))
user.slack_user_identity.update_profile_info()
# todo slack: do we need update info for all existing slack users in slack team?

View file

@ -15,7 +15,7 @@ from apps.alerts.tasks import disable_maintenance
from apps.slack.utils import post_message_to_channel
from apps.user_management.subscription_strategy import FreePublicBetaSubscriptionStrategy
from common.insight_log import ChatOpsEvent, ChatOpsType, write_chatops_insight_log
from common.oncall_gateway import create_oncall_connector, delete_oncall_connector_async, delete_slack_connector_async
from common.oncall_gateway import create_oncall_connector, delete_oncall_connector, delete_slack_connector
from common.public_primary_keys import generate_public_primary_key, increase_public_primary_key_length
logger = logging.getLogger(__name__)
@ -47,7 +47,7 @@ class OrganizationQuerySet(models.QuerySet):
def create(self, **kwargs):
instance = super().create(**kwargs)
if settings.FEATURE_MULTIREGION_ENABLED:
create_oncall_connector(instance.uuid, settings.ONCALL_BACKEND_REGION)
create_oncall_connector(str(instance.uuid), settings.ONCALL_BACKEND_REGION)
return instance
def delete(self):
@ -74,9 +74,9 @@ class Organization(MaintainableObject):
def delete(self):
if settings.FEATURE_MULTIREGION_ENABLED:
delete_oncall_connector_async.apply_async((self.public_primary_key,))
delete_oncall_connector(str(self.uuid))
if self.slack_team_identity:
delete_slack_connector_async.apply_async((self.slack_team_identity.slack_id,))
delete_slack_connector(str(self.uuid))
self.deleted_at = timezone.now()
self.save(update_fields=["deleted_at"])

View file

@ -2,5 +2,10 @@
This package is for interaction with OnCall-Gateway, service to provide multiregional chatops.
"""
from .tasks import delete_oncall_connector_async, delete_slack_connector_async # noqa: F401
from .utils import check_slack_installation_backend, create_oncall_connector, create_slack_connector # noqa: F401
from .utils import ( # noqa: F401
check_slack_installation_possible,
create_oncall_connector,
create_slack_connector,
delete_oncall_connector,
delete_slack_connector,
)

View file

@ -22,6 +22,7 @@ class SlackConnector:
SlackConnector represents connection between slack team with installed oncall app and oncall-gateway
"""
oncall_org_id: str
slack_team_id: str
backend: str
@ -59,35 +60,31 @@ class OnCallGatewayAPIClient:
def _slack_connectors_url(self) -> str:
return urljoin(self.api_base_url, "slack_team_connectors")
def post_slack_connector(self, slack_id: str, backend: str) -> tuple[SlackConnector, requests.models.Response]:
d = {"slack_team_id": slack_id, "backend": backend}
def post_slack_connector(
self, oncall_org_id: str, slack_id: str, backend: str
) -> tuple[SlackConnector, requests.models.Response]:
d = {"oncall_org_id": oncall_org_id, "slack_team_id": slack_id, "backend": backend}
response = self._post(url=self._slack_connectors_url, json=d)
response_data = response.json()
return (
SlackConnector(
response_data["oncall_org_id"],
response_data["slack_team_id"],
response_data["backend"],
),
response,
)
def get_slack_connector(self, slack_id: str) -> tuple[SlackConnector, requests.models.Response]:
url = urljoin(f"{self._slack_connectors_url}/", slack_id)
response = self._get(url=url)
response_data = response.json()
return (
SlackConnector(
response_data["slack_team_id"],
response_data["backend"],
),
response,
)
def delete_slack_connector(self, slack_id: str) -> requests.models.Response:
url = urljoin(f"{self._slack_connectors_url}/", slack_id)
def delete_slack_connector(self, oncall_org_id: str) -> requests.models.Response:
url = urljoin(f"{self._slack_connectors_url}/", oncall_org_id)
response = self._delete(url=url)
return response
def check_slack_installation_possible(self, oncall_org_id, backend, slack_id: str) -> requests.models.Response:
url = urljoin(f"{self._slack_connectors_url}/", "check_installation_possible")
url += f"?slack_team_id={slack_id}&oncall_org_id={oncall_org_id}&backend={backend}"
return self._get(url=url)
def _get(self, url, params=None, **kwargs) -> requests.models.Response:
kwargs["params"] = params
response = self._call_api(method=requests.get, url=url, **kwargs)
@ -125,7 +122,6 @@ class OnCallGatewayAPIClient:
err_msg = "%s Client Error: %s for url: %s" % (response.status_code, err_msg, response.url)
elif 500 <= response.status_code < 600:
err_msg = "%s Server Error: %s for url: %s" % (response.status_code, err_msg, response.url)
print(err_msg)
raise requests.exceptions.HTTPError(err_msg, response=response)
@classmethod

View file

@ -12,16 +12,16 @@ task_logger = get_task_logger(__name__)
@shared_dedicated_queue_retry_task(
autoretry_for=(Exception,),
retry_backoff=True,
max_retries=None,
max_retries=100,
)
def create_oncall_connector_async(oncall_org_id, backend):
client = OnCallGatewayAPIClient(settings.ONCALL_GATEWAY_URL, settings.ONCALL_GATEWAY_API_TOKEN)
try:
client.post_oncall_connector(oncall_org_id, backend)
except requests.exceptions.HTTPError as http_exc:
# TODO: decide which http codes to retry
print(http_exc.response)
if http_exc.response.status_code == 409:
# 409 Indicates that it's impossible to create such connector.
# More likely because it already exists.
task_logger.error(
f"Failed to create OnCallConnector oncall_org_id={oncall_org_id} backend={backend} exc={http_exc}"
)
@ -35,7 +35,7 @@ def create_oncall_connector_async(oncall_org_id, backend):
@shared_dedicated_queue_retry_task(
autoretry_for=(Exception,),
retry_backoff=True,
max_retries=None,
max_retries=100,
)
def delete_oncall_connector_async(oncall_org_id):
client = OnCallGatewayAPIClient(settings.ONCALL_GATEWAY_URL, settings.ONCALL_GATEWAY_API_TOKEN)
@ -43,7 +43,7 @@ def delete_oncall_connector_async(oncall_org_id):
client.delete_slack_connector(oncall_org_id)
except requests.exceptions.HTTPError as http_exc:
if http_exc.response.status_code == 404:
# 404 indicates than resourse was deleted already
# 404 indicates that connector was deleted already
return
else:
task_logger.error(f"Failed to delete OnCallConnector oncall_org_id={oncall_org_id} exc={http_exc}")
@ -53,44 +53,68 @@ def delete_oncall_connector_async(oncall_org_id):
raise e
# deprecated
@shared_dedicated_queue_retry_task(
autoretry_for=(Exception,),
retry_backoff=True,
max_retries=None,
)
def create_slack_connector_async(slack_id, backend):
client = OnCallGatewayAPIClient(settings.ONCALL_GATEWAY_URL, settings.ONCALL_GATEWAY_API_TOKEN)
try:
client.post_slack_connector(slack_id, backend)
except requests.exceptions.HTTPError as http_exc:
# TODO: decide which http codes to retry
if http_exc.response.status_code == 409:
task_logger.error(
f"Failed to create SlackConnector oncall_org_id={slack_id} backend={backend} exc={http_exc}"
)
else:
raise http_exc
except Exception as e:
task_logger.error(f"Failed to create SlackConnector slack_id={slack_id} backend={backend} exc={e}")
raise e
pass
# deprecated
@shared_dedicated_queue_retry_task(
autoretry_for=(Exception,),
retry_backoff=True,
max_retries=None,
)
def delete_slack_connector_async(slack_id):
pass
@shared_dedicated_queue_retry_task(
autoretry_for=(Exception,),
retry_backoff=True,
max_retries=100,
)
def create_slack_connector_async_v2(**kwargs):
oncall_org_id = kwargs.get("oncall_org_id")
slack_team_id = kwargs.get("slack_team_id")
backend = kwargs.get("backend")
client = OnCallGatewayAPIClient(settings.ONCALL_GATEWAY_URL, settings.ONCALL_GATEWAY_API_TOKEN)
try:
client.delete_slack_connector(slack_id)
client.post_slack_connector(oncall_org_id, slack_team_id, backend)
except requests.exceptions.HTTPError as http_exc:
if http_exc.response.status_code == 404:
# 404 indicates than resourse was deleted already
return
if http_exc.response.status_code == 409:
# 409 Indicates that it's impossible to create such connector.
# More likely because it already exists.
task_logger.error(
f"Failed to create SlackConnector oncall_org_id={oncall_org_id} backend={backend} exc={http_exc}"
)
else:
task_logger.error(f"Failed to delete OnCallConnector slack_id={slack_id} exc={http_exc}")
raise http_exc
except Exception as e:
task_logger.error(f"Failed to delete OnCallConnector slack_id={slack_id} exc={e}")
task_logger.error(f"Failed to create SlackConnector slack_id={oncall_org_id} backend={backend} exc={e}")
raise e
@shared_dedicated_queue_retry_task(
autoretry_for=(Exception,),
retry_backoff=True,
max_retries=100,
)
def delete_slack_connector_async_v2(**kwargs):
oncall_org_id = kwargs.get("oncall_org_id")
client = OnCallGatewayAPIClient(settings.ONCALL_GATEWAY_URL, settings.ONCALL_GATEWAY_API_TOKEN)
try:
client.delete_slack_connector(oncall_org_id)
except requests.exceptions.HTTPError as http_exc:
if http_exc.response.status_code == 404:
# 404 indicates that connector was deleted already
return
else:
raise http_exc
except Exception as e:
task_logger.error(f"Failed to delete SlackConnectorV2 oncall_org_id={oncall_org_id} exc={e}")
raise e

View file

@ -4,7 +4,12 @@ import requests
from django.conf import settings
from .oncall_gateway_client import OnCallGatewayAPIClient
from .tasks import create_oncall_connector_async, create_slack_connector_async
from .tasks import (
create_oncall_connector_async,
create_slack_connector_async_v2,
delete_oncall_connector_async,
delete_slack_connector_async_v2,
)
logger = logging.getLogger(__name__)
@ -14,27 +19,43 @@ def create_oncall_connector(oncall_org_id: str, backend: str):
try:
client.post_oncall_connector(oncall_org_id, backend)
except Exception as e:
logger.error(f"Failed to create_oncall_connector oncall_org_id={oncall_org_id} backend={backend} exc={e}")
logger.error(f"create_oncall_connector: failed " f"oncall_org_id={oncall_org_id} backend={backend} exc={e}")
create_oncall_connector_async.apply_async((oncall_org_id, backend), countdown=2)
def check_slack_installation_backend(slack_id: str, backend: str) -> bool:
def delete_oncall_connector(oncall_org_id: str):
delete_oncall_connector_async.delay(oncall_org_id)
def check_slack_installation_possible(oncall_org_id: str, slack_id: str, backend: str) -> bool:
client = OnCallGatewayAPIClient(settings.ONCALL_GATEWAY_URL, settings.ONCALL_GATEWAY_API_TOKEN)
try:
slack_connector, _ = client.get_slack_connector(slack_id)
if slack_connector.backend == backend:
return True
else:
return False
response = client.check_slack_installation_possible(
oncall_org_id=oncall_org_id, slack_id=slack_id, backend=backend
)
return response.status_code == 200
except requests.exceptions.HTTPError as http_exc:
if http_exc.response.status_code == 404:
return True
logger.error(
f"check_slack_installation_backend: slack installation impossible "
f"oncall_org_id={oncall_org_id} slack_id={slack_id} backend={backend} exc={http_exc}"
)
return False
def create_slack_connector(slack_id: str, backend: str):
def create_slack_connector(oncall_org_id: str, slack_id: str, backend: str):
client = OnCallGatewayAPIClient(settings.ONCALL_GATEWAY_URL, settings.ONCALL_GATEWAY_API_TOKEN)
try:
client.post_slack_connector(slack_id, backend)
client.post_slack_connector(oncall_org_id, slack_id, backend)
except Exception as e:
logger.error(f"Failed to create_oncall_connector slack_id={slack_id} backend={backend} exc={e}")
create_slack_connector_async.apply_async((slack_id, backend), countdown=2)
logger.error(
f"create_slack_connector: failed "
f"oncall_org_id={oncall_org_id} slack_id={slack_id} backend={backend} exc={e}"
)
create_slack_connector_async_v2.apply_async(
kwargs={"oncall_org_id": oncall_org_id, "slack_id": slack_id, "backend": backend}, countdown=2
)
def delete_slack_connector(oncall_org_id: str):
delete_slack_connector_async_v2.delay(oncall_org_id=oncall_org_id)

View file

@ -79,3 +79,18 @@ if settings.DEBUG_CELERY_TASKS_PROFILING:
logger.info("ended: {} of {} with cpu={} at {}".format(task_id, task.name, time.perf_counter(), time.time()))
sample_mem()
memdump()
if settings.PYROSCOPE_PROFILER_ENABLED:
@celery.signals.worker_process_init.connect(weak=False)
def init_pyroscope(*args, **kwargs):
import pyroscope
pyroscope.configure(
application_name=settings.PYROSCOPE_APPLICATION_NAME,
server_address=settings.PYROSCOPE_SERVER_ADDRESS,
auth_token=settings.PYROSCOPE_AUTH_TOKEN,
detect_subprocesses=True, # detect subprocesses started by the main process; default is False
tags={"type": "celery", "celery_worker": os.environ.get("CELERY_WORKER_QUEUE", "no_queue_specified")},
)

View file

@ -53,7 +53,7 @@ if settings.PYROSCOPE_PROFILER_ENABLED:
server_address=settings.PYROSCOPE_SERVER_ADDRESS,
auth_token=settings.PYROSCOPE_AUTH_TOKEN,
detect_subprocesses=True,
tags={"celery_worker": settings.PYROSCOPE_CELERY_WORKER_QUEUE},
tags={"type": "uwsgi"},
)
except ModuleNotFoundError:

View file

@ -681,4 +681,3 @@ PYROSCOPE_PROFILER_ENABLED = getenv_boolean("PYROSCOPE_PROFILER_ENABLED", defaul
PYROSCOPE_APPLICATION_NAME = os.getenv("PYROSCOPE_APPLICATION_NAME", "oncall")
PYROSCOPE_SERVER_ADDRESS = os.getenv("PYROSCOPE_SERVER_ADDRESS", "http://pyroscope:4040")
PYROSCOPE_AUTH_TOKEN = os.getenv("PYROSCOPE_AUTH_TOKEN", "")
PYROSCOPE_CELERY_WORKER_QUEUE = os.getenv("CELERY_WORKER_QUEUE", None)

View file

@ -1,4 +1,5 @@
[uwsgi]
strict=true
chdir=/etc/app
module=engine.wsgi:application
master=True
@ -14,6 +15,7 @@ buffer-size=65535
http-auto-chunked=True
http-timeout=620
post-buffering=1
enable-threads=true
logger=stdio
log-format=source=engine:uwsgi status=%(status) method=%(method) path=%(uri) latency=%(secs) google_trace_id=%(var.HTTP_X_CLOUD_TRACE_CONTEXT) protocol=%(proto) resp_size=%(size) req_body_size=%(cl)

View file

@ -40,5 +40,11 @@ export function getSlackMessage(slackError: SlackError, team: Team, hasLiveSetti
);
}
if (slackError === SlackError.REGION_ERROR) {
return (
<>Couldn't connect to Slack. Slack workspace has already been connected to OnCall instance in another region.</>
);
}
return <>Couldn't connect Slack.</>;
}

View file

@ -2,4 +2,5 @@ export enum SlackError {
WRONG_WORKSPACE = 'wrong_workspace',
USER_ALREADY_CONNECTED = 'user_already_connected',
AUTH_FAILED = 'auth_failed',
REGION_ERROR = 'region_error',
}