oncall-engine/engine/apps/webhooks/tasks/trigger_webhook.py

318 lines
13 KiB
Python
Raw Normal View History

import json
import logging
import typing
from datetime import datetime
from json import JSONDecodeError
import requests
from celery.utils.log import get_task_logger
from django.conf import settings
from django.db.models import Prefetch
from django.utils import timezone
from apps.alerts.models import AlertGroup, AlertGroupLogRecord, EscalationPolicy
from apps.base.models import UserNotificationPolicyLogRecord
from apps.user_management.models import User
from apps.webhooks.models import Webhook, WebhookResponse
from apps.webhooks.models.webhook import WEBHOOK_FIELD_PLACEHOLDER
Add webhook presets (#2996) # What this PR does Add a system similar to how we select integrations when creating webhooks so that the user has a description of what webhookds do and does not have to write complex templates for common webhook use cases. Presets allow us to create the contents of the webhooks in code and define which fields are controlled by the preset. Some specifics: - Newly created webhooks must choose between Simple, Advanced or another predefined system - Simple is always an escalation step and will post the entire payload to the given URL - Advanced is the same as no preset which is our current view where all fields are available - There are no changes for all existing webhooks with empty preset fields - Once a webhook is created with a preset the preset cannot be changed - Fields in the webhook that are populated by code will give a validation error if they are modified - In the public API webhooks with presets are returned for viewing but cannot be created or modified. This restriction is in place because the Web UI provides the context for which fields to use with a preset. The public API is for interacting with webhooks where all fields are defined. To define a preset create a file with metadata and an override function. The metadata drives validation and what to display in the UI. There are two functions one is connected to the pre_save hook of the Webhook model for persistent changes, the other replaces parameters at execution time for ephemeral changes. See the simple and advanced presets as an example. The file must be listed in settings in `INSTALLED_WEBHOOK_PRESETS` to be enabled at runtime.. ## Which issue(s) this PR fixes ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --------- Co-authored-by: Joey Orlando <joey.orlando@grafana.com>
2023-09-27 07:22:52 -06:00
from apps.webhooks.presets.preset_options import WebhookPresetOptions
from apps.webhooks.utils import (
InvalidWebhookData,
InvalidWebhookHeaders,
InvalidWebhookTrigger,
InvalidWebhookUrl,
serialize_event,
)
from common.custom_celery_tasks import shared_dedicated_queue_retry_task
from settings.base import WEBHOOK_RESPONSE_LIMIT
NOT_FROM_SELECTED_INTEGRATION = "Alert group was not from a selected integration"
logger = get_task_logger(__name__)
logger.setLevel(logging.DEBUG)
EXECUTE_WEBHOOK_RETRIES = 3
# these exceptions are fully out of our control (e.g. customer's network issues)
# let's manually retry them without raising an exception
EXECUTE_WEBHOOK_EXCEPTIONS_TO_MANUALLY_RETRY = (requests.exceptions.Timeout,)
TRIGGER_TYPE_TO_LABEL = {
Webhook.TRIGGER_ALERT_GROUP_CREATED: "alert group created",
Webhook.TRIGGER_ACKNOWLEDGE: "acknowledge",
Webhook.TRIGGER_RESOLVE: "resolve",
Webhook.TRIGGER_SILENCE: "silence",
Webhook.TRIGGER_UNSILENCE: "unsilence",
Webhook.TRIGGER_UNRESOLVE: "unresolve",
Webhook.TRIGGER_MANUAL: "escalation",
Webhook.TRIGGER_UNACKNOWLEDGE: "unacknowledge",
Webhook.TRIGGER_STATUS_CHANGE: "status change",
}
class WebhookRequestStatus(typing.TypedDict):
url: typing.Optional[str]
request_trigger: typing.Optional[str]
request_headers: typing.Optional[str]
request_data: typing.Optional[str]
status_code: typing.Optional[int]
content: typing.Optional[str]
webhook: Webhook
event_data: str
@shared_dedicated_queue_retry_task(
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
)
def send_webhook_event(trigger_type, alert_group_id, organization_id=None, user_id=None, is_backsync=False):
`apps.get_model` -> `import` (#2619) # What this PR does Remove [`apps.get_model`](https://docs.djangoproject.com/en/3.2/ref/applications/#django.apps.apps.get_model) invocations and use inline `import` statements in places where models are imported within functions/methods to avoid circular imports. I believe `import` statements are more appropriate for most use cases as they allow for better static code analysis & formatting, and solve the issue of circular imports without being unnecessarily dynamic as `apps.get_model`. With `import` statements, it's possible to: - Jump to model definitions in most IDEs - Automatically sort inline imports with `isort` - Find import errors faster/easier (most IDEs highlight broken imports) - Have more consistency across regular & inline imports when importing models This PR also adds a flake8 rule to ban imports of `django.apps.apps`, so it's harder to use `apps.get_model` by mistake (it's possible to ignore this rule by using `# noqa: I251`). The rule is not enforced on directories with migration files, because `apps.get_model` is often used to get a historical state of a model, which is useful when writing migrations ([see this SO answer for more details](https://stackoverflow.com/a/37769213)). So `apps.get_model` is considered OK in migrations (even necessary in some cases). ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required)
2023-07-25 10:43:23 +01:00
from apps.webhooks.models import Webhook
webhooks_qs = Webhook.objects.filter(
trigger_type=trigger_type,
organization_id=organization_id,
).exclude(is_webhook_enabled=False)
# include status change triggered webhooks if needed
if trigger_type in Webhook.STATUS_CHANGE_TRIGGERS:
webhooks_qs |= Webhook.objects.filter(
trigger_type=Webhook.TRIGGER_STATUS_CHANGE,
organization_id=organization_id,
).exclude(is_webhook_enabled=False)
if is_backsync:
webhooks_qs = webhooks_qs.filter(is_from_connected_integration=False)
for webhook in webhooks_qs:
execute_webhook.apply_async((webhook.pk, alert_group_id, user_id, None), kwargs={"trigger_type": trigger_type})
def _isoformat_date(date_value: datetime) -> typing.Optional[str]:
return date_value.isoformat() if date_value else None
def _build_payload(
webhook: Webhook, alert_group: AlertGroup, user: User, trigger_type: int | None
) -> typing.Dict[str, typing.Any]:
payload_trigger_type = webhook.trigger_type
if payload_trigger_type == Webhook.TRIGGER_STATUS_CHANGE and trigger_type is not None:
# use original trigger type when generating the payload if status change is set
payload_trigger_type = trigger_type
event = {
"type": TRIGGER_TYPE_TO_LABEL[payload_trigger_type],
}
if payload_trigger_type == Webhook.TRIGGER_ALERT_GROUP_CREATED:
event["time"] = _isoformat_date(alert_group.started_at)
elif payload_trigger_type == Webhook.TRIGGER_ACKNOWLEDGE:
event["time"] = _isoformat_date(alert_group.acknowledged_at)
elif payload_trigger_type == Webhook.TRIGGER_RESOLVE:
event["time"] = _isoformat_date(alert_group.resolved_at)
elif payload_trigger_type == Webhook.TRIGGER_SILENCE:
event["time"] = _isoformat_date(alert_group.silenced_at)
event["until"] = _isoformat_date(alert_group.silenced_until)
elif payload_trigger_type == Webhook.TRIGGER_MANUAL:
event["time"] = _isoformat_date(timezone.now())
# include latest response data per webhook in the event input data
# exclude past responses from webhook being executed
responses_data = {}
responses = (
alert_group.webhook_responses.all()
.exclude(webhook__public_primary_key=webhook.public_primary_key)
.order_by("-timestamp")
)
for r in responses:
if r.webhook.public_primary_key not in responses_data:
try:
response_data = r.json()
except JSONDecodeError:
response_data = r.content
responses_data[r.webhook.public_primary_key] = response_data
return serialize_event(event, alert_group, user, webhook, responses_data)
def mask_authorization_header(
headers: typing.Dict[str, str], header_keys_to_mask: typing.List[str]
) -> typing.Dict[str, str]:
masked_headers = headers.copy()
lower_keys = set(k.lower() for k in header_keys_to_mask)
for k in headers.keys():
if k.lower() in lower_keys:
masked_headers[k] = WEBHOOK_FIELD_PLACEHOLDER
return masked_headers
def make_request(
webhook: Webhook, alert_group: AlertGroup, data: typing.Dict[str, typing.Any]
) -> typing.Tuple[bool, WebhookRequestStatus, typing.Optional[str], typing.Optional[Exception]]:
status: WebhookRequestStatus = {
"url": None,
"request_trigger": None,
"request_headers": None,
"request_data": None,
"status_code": None,
"content": None,
"webhook": webhook,
"event_data": json.dumps(data),
}
masked_header_keys = ["Authorization"]
exception = error = None
try:
Add webhook presets (#2996) # What this PR does Add a system similar to how we select integrations when creating webhooks so that the user has a description of what webhookds do and does not have to write complex templates for common webhook use cases. Presets allow us to create the contents of the webhooks in code and define which fields are controlled by the preset. Some specifics: - Newly created webhooks must choose between Simple, Advanced or another predefined system - Simple is always an escalation step and will post the entire payload to the given URL - Advanced is the same as no preset which is our current view where all fields are available - There are no changes for all existing webhooks with empty preset fields - Once a webhook is created with a preset the preset cannot be changed - Fields in the webhook that are populated by code will give a validation error if they are modified - In the public API webhooks with presets are returned for viewing but cannot be created or modified. This restriction is in place because the Web UI provides the context for which fields to use with a preset. The public API is for interacting with webhooks where all fields are defined. To define a preset create a file with metadata and an override function. The metadata drives validation and what to display in the UI. There are two functions one is connected to the pre_save hook of the Webhook model for persistent changes, the other replaces parameters at execution time for ephemeral changes. See the simple and advanced presets as an example. The file must be listed in settings in `INSTALLED_WEBHOOK_PRESETS` to be enabled at runtime.. ## Which issue(s) this PR fixes ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --------- Co-authored-by: Joey Orlando <joey.orlando@grafana.com>
2023-09-27 07:22:52 -06:00
if webhook.preset:
if webhook.preset not in WebhookPresetOptions.WEBHOOK_PRESETS:
raise Exception(f"Invalid preset {webhook.preset}")
else:
preset = WebhookPresetOptions.WEBHOOK_PRESETS[webhook.preset]
preset.override_parameters_at_runtime(webhook)
masked_header_keys.extend(preset.get_masked_headers())
Add webhook presets (#2996) # What this PR does Add a system similar to how we select integrations when creating webhooks so that the user has a description of what webhookds do and does not have to write complex templates for common webhook use cases. Presets allow us to create the contents of the webhooks in code and define which fields are controlled by the preset. Some specifics: - Newly created webhooks must choose between Simple, Advanced or another predefined system - Simple is always an escalation step and will post the entire payload to the given URL - Advanced is the same as no preset which is our current view where all fields are available - There are no changes for all existing webhooks with empty preset fields - Once a webhook is created with a preset the preset cannot be changed - Fields in the webhook that are populated by code will give a validation error if they are modified - In the public API webhooks with presets are returned for viewing but cannot be created or modified. This restriction is in place because the Web UI provides the context for which fields to use with a preset. The public API is for interacting with webhooks where all fields are defined. To define a preset create a file with metadata and an override function. The metadata drives validation and what to display in the UI. There are two functions one is connected to the pre_save hook of the Webhook model for persistent changes, the other replaces parameters at execution time for ephemeral changes. See the simple and advanced presets as an example. The file must be listed in settings in `INSTALLED_WEBHOOK_PRESETS` to be enabled at runtime.. ## Which issue(s) this PR fixes ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --------- Co-authored-by: Joey Orlando <joey.orlando@grafana.com>
2023-09-27 07:22:52 -06:00
if not webhook.check_integration_filter(alert_group):
status["request_trigger"] = NOT_FROM_SELECTED_INTEGRATION
return False, status, None, None
triggered, status["request_trigger"] = webhook.check_trigger(data)
if triggered:
status["url"] = webhook.build_url(data)
request_kwargs = webhook.build_request_kwargs(data, raise_data_errors=True)
display_headers = mask_authorization_header(request_kwargs.get("headers", {}), masked_header_keys)
status["request_headers"] = json.dumps(display_headers)
if "json" in request_kwargs:
status["request_data"] = json.dumps(request_kwargs["json"])
else:
status["request_data"] = request_kwargs.get("data")
response = webhook.make_request(status["url"], request_kwargs)
status["status_code"] = response.status_code
content_length = len(response.content)
if content_length <= WEBHOOK_RESPONSE_LIMIT:
try:
status["content"] = json.dumps(response.json())
except JSONDecodeError:
status["content"] = response.content.decode("utf-8")
else:
status[
"content"
] = f"Response content {content_length} exceeds {WEBHOOK_RESPONSE_LIMIT} character limit"
return triggered, status, None, None
except InvalidWebhookUrl as e:
status["url"] = error = e.message
except InvalidWebhookTrigger as e:
status["request_trigger"] = error = e.message
except InvalidWebhookHeaders as e:
status["request_headers"] = error = e.message
except InvalidWebhookData as e:
status["request_data"] = error = e.message
except requests.exceptions.SSLError as e:
# Don't raise an exception for SSL errors, as they are out of our control and retrying
# isn't going to help. Just show the error to the user and give up
#
# from the docs (https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification)
# "Requests will throw a SSLError if its unable to verify the certificate"
status["content"] = error = str(e)
except Exception as e:
status["content"] = error = str(e)
exception = e
return True, status, error, exception
@shared_dedicated_queue_retry_task(
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else EXECUTE_WEBHOOK_RETRIES
)
def execute_webhook(webhook_pk, alert_group_id, user_id, escalation_policy_id, trigger_type=None, manual_retry_num=0):
`apps.get_model` -> `import` (#2619) # What this PR does Remove [`apps.get_model`](https://docs.djangoproject.com/en/3.2/ref/applications/#django.apps.apps.get_model) invocations and use inline `import` statements in places where models are imported within functions/methods to avoid circular imports. I believe `import` statements are more appropriate for most use cases as they allow for better static code analysis & formatting, and solve the issue of circular imports without being unnecessarily dynamic as `apps.get_model`. With `import` statements, it's possible to: - Jump to model definitions in most IDEs - Automatically sort inline imports with `isort` - Find import errors faster/easier (most IDEs highlight broken imports) - Have more consistency across regular & inline imports when importing models This PR also adds a flake8 rule to ban imports of `django.apps.apps`, so it's harder to use `apps.get_model` by mistake (it's possible to ignore this rule by using `# noqa: I251`). The rule is not enforced on directories with migration files, because `apps.get_model` is often used to get a historical state of a model, which is useful when writing migrations ([see this SO answer for more details](https://stackoverflow.com/a/37769213)). So `apps.get_model` is considered OK in migrations (even necessary in some cases). ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required)
2023-07-25 10:43:23 +01:00
from apps.webhooks.models import Webhook
try:
`apps.get_model` -> `import` (#2619) # What this PR does Remove [`apps.get_model`](https://docs.djangoproject.com/en/3.2/ref/applications/#django.apps.apps.get_model) invocations and use inline `import` statements in places where models are imported within functions/methods to avoid circular imports. I believe `import` statements are more appropriate for most use cases as they allow for better static code analysis & formatting, and solve the issue of circular imports without being unnecessarily dynamic as `apps.get_model`. With `import` statements, it's possible to: - Jump to model definitions in most IDEs - Automatically sort inline imports with `isort` - Find import errors faster/easier (most IDEs highlight broken imports) - Have more consistency across regular & inline imports when importing models This PR also adds a flake8 rule to ban imports of `django.apps.apps`, so it's harder to use `apps.get_model` by mistake (it's possible to ignore this rule by using `# noqa: I251`). The rule is not enforced on directories with migration files, because `apps.get_model` is often used to get a historical state of a model, which is useful when writing migrations ([see this SO answer for more details](https://stackoverflow.com/a/37769213)). So `apps.get_model` is considered OK in migrations (even necessary in some cases). ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required)
2023-07-25 10:43:23 +01:00
webhook = Webhook.objects.get(pk=webhook_pk)
except Webhook.DoesNotExist:
Add webhook presets (#2996) # What this PR does Add a system similar to how we select integrations when creating webhooks so that the user has a description of what webhookds do and does not have to write complex templates for common webhook use cases. Presets allow us to create the contents of the webhooks in code and define which fields are controlled by the preset. Some specifics: - Newly created webhooks must choose between Simple, Advanced or another predefined system - Simple is always an escalation step and will post the entire payload to the given URL - Advanced is the same as no preset which is our current view where all fields are available - There are no changes for all existing webhooks with empty preset fields - Once a webhook is created with a preset the preset cannot be changed - Fields in the webhook that are populated by code will give a validation error if they are modified - In the public API webhooks with presets are returned for viewing but cannot be created or modified. This restriction is in place because the Web UI provides the context for which fields to use with a preset. The public API is for interacting with webhooks where all fields are defined. To define a preset create a file with metadata and an override function. The metadata drives validation and what to display in the UI. There are two functions one is connected to the pre_save hook of the Webhook model for persistent changes, the other replaces parameters at execution time for ephemeral changes. See the simple and advanced presets as an example. The file must be listed in settings in `INSTALLED_WEBHOOK_PRESETS` to be enabled at runtime.. ## Which issue(s) this PR fixes ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --------- Co-authored-by: Joey Orlando <joey.orlando@grafana.com>
2023-09-27 07:22:52 -06:00
logger.warning(f"Webhook {webhook_pk} does not exist")
return
try:
personal_log_records = UserNotificationPolicyLogRecord.objects.filter(
alert_group_id=alert_group_id,
author__isnull=False,
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_SUCCESS,
).select_related("author")
alert_group = (
AlertGroup.objects.prefetch_related(
Prefetch("personal_log_records", queryset=personal_log_records, to_attr="sent_notifications")
)
.select_related("channel")
.get(pk=alert_group_id)
)
except AlertGroup.DoesNotExist:
return
user = None
if user_id is not None:
user = User.objects.filter(pk=user_id).first()
data = _build_payload(webhook, alert_group, user, trigger_type)
triggered, status, error, exception = make_request(webhook, alert_group, data)
# create response entry only if webhook was triggered
response = None
if triggered:
response = WebhookResponse.objects.create(
alert_group=alert_group,
trigger_type=trigger_type or webhook.trigger_type,
**status,
)
else:
reason = status.get("request_trigger", "Unknown")
logger.info(f"Webhook {webhook_pk} was not triggered: {reason}")
escalation_policy = step = None
if escalation_policy_id:
escalation_policy = EscalationPolicy.objects.filter(pk=escalation_policy_id).first()
step = EscalationPolicy.STEP_TRIGGER_CUSTOM_WEBHOOK
# create log record
error_code = None
log_type = AlertGroupLogRecord.TYPE_CUSTOM_WEBHOOK_TRIGGERED
trigger_log = TRIGGER_TYPE_TO_LABEL[webhook.trigger_type]
if webhook.trigger_type == Webhook.TRIGGER_MANUAL and escalation_policy is None:
trigger_log = None # triggered manually
reason = str(status["status_code"])
if error is not None:
log_type = AlertGroupLogRecord.TYPE_ESCALATION_FAILED
error_code = AlertGroupLogRecord.ERROR_ESCALATION_TRIGGER_CUSTOM_WEBHOOK_ERROR
reason = error
if triggered:
AlertGroupLogRecord.objects.create(
type=log_type,
alert_group=alert_group,
author=user,
reason=reason,
step_specific_info={
"webhook_name": webhook.name,
"webhook_id": webhook.public_primary_key,
"trigger": trigger_log,
"response_id": response.pk if response else None,
},
escalation_policy=escalation_policy,
escalation_policy_step=step,
escalation_error_code=error_code,
)
if isinstance(exception, EXECUTE_WEBHOOK_EXCEPTIONS_TO_MANUALLY_RETRY):
msg_details = (
f"webhook={webhook_pk} alert_group={alert_group_id} user={user_id} escalation_policy={escalation_policy_id}"
)
if manual_retry_num < EXECUTE_WEBHOOK_RETRIES:
retry_num = manual_retry_num + 1
logger.warning(f"Manually retrying execute_webhook for {msg_details} manual_retry_num={retry_num}")
execute_webhook.apply_async(
(webhook_pk, alert_group_id, user_id, escalation_policy_id),
kwargs={"trigger_type": trigger_type, "manual_retry_num": retry_num},
countdown=10,
)
else:
# don't raise an exception if we've exhausted retries for
# exceptions within EXECUTE_WEBHOOK_EXCEPTIONS_TO_MANUALLY_RETRY, simply give up trying
logger.warning(f"Exhausted execute_webhook retries for {msg_details}")
elif exception:
raise exception