* Modify plugin.json to support RBAC role registration * defines 26 new custom roles in plugin.json. The main roles are: - Admin: read/write access to everything in OnCall - Reader: read access to everything in OnCall - OnCaller : read access to everything in OnCall + edit access to Alert Groups and Schedules - <object-type> Editor: read/write access to everything related to <object-type> - <object-type> Reader: read access for <object-type> - User Settings Admin: read/write access to all user's settings, not just own settings. This is in comparison to User Settings Editor which can only read/write own settings * update changelog and documentation (#686) * implement RBAC for OnCall backend This commit refactors backend authorization. It trys to use RBAC authorization if the org's grafana instance supports it, otherwise it falls back to basic role authorization. * update RBAC backend tests * add tests for RBAC changes - run backend tests as matrix where RBAC is enabled/disabled. When RBAC is enabled, the permissions granted are read from the role grants in the frontend's plugin.json file (instead of relying what we specify in RBACPermission.Permissions) - remove --reuse-db --nomigrations flags from engine/tox.ini - minor autoformatting changes to docker-compose-developer.yml * remove --ds=settings.ci-test from pytest CI command DJANGO_SETTINGS_MODULE is already specified as an env var so this is just unecessary duplication * update gitignore * update github action job name for "test" * RBAC frontend changes * refactors the use of basic roles (ex. Viewer, Editor, Admin) use RBAC permissions (when supported), or falling back to basic roles when RBAC is not supported. - updates the UserAction enum in grafana-plugin/src/state/userAction.ts. Previously this was hardcoded to a list of strings that were being returned by the OnCall API. Now the values here correspond to the permissions in plugin.json (plus a fallback role) * changes per Gabriel's comments: - get rid of group attribute in rbac roles - remove displayName role attribute - remove hidden role attribute - add back role to includes section * don't try to update user timezone if they don't have permission
385 lines
19 KiB
Python
385 lines
19 KiB
Python
import time
|
|
|
|
from django.apps import apps
|
|
from django.conf import settings
|
|
from django.db import transaction
|
|
from django.utils import timezone
|
|
from kombu import uuid as celery_uuid
|
|
|
|
from apps.alerts.constants import NEXT_ESCALATION_DELAY
|
|
from apps.alerts.signals import user_notification_action_triggered_signal
|
|
from apps.base.messaging import get_messaging_backend_from_id
|
|
from apps.base.utils import live_settings
|
|
from common.custom_celery_tasks import shared_dedicated_queue_retry_task
|
|
|
|
from .task_logger import task_logger
|
|
|
|
|
|
@shared_dedicated_queue_retry_task(
|
|
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
|
|
)
|
|
def notify_user_task(
|
|
user_pk,
|
|
alert_group_pk,
|
|
previous_notification_policy_pk=None,
|
|
reason=None,
|
|
prevent_posting_to_thread=False,
|
|
notify_even_acknowledged=False,
|
|
important=False,
|
|
notify_anyway=False,
|
|
):
|
|
UserNotificationPolicy = apps.get_model("base", "UserNotificationPolicy")
|
|
UserNotificationPolicyLogRecord = apps.get_model("base", "UserNotificationPolicyLogRecord")
|
|
User = apps.get_model("user_management", "User")
|
|
AlertGroup = apps.get_model("alerts", "AlertGroup")
|
|
UserHasNotification = apps.get_model("alerts", "UserHasNotification")
|
|
|
|
try:
|
|
alert_group = AlertGroup.all_objects.get(pk=alert_group_pk)
|
|
except AlertGroup.DoesNotExist:
|
|
return f"notify_user_task: alert_group {alert_group_pk} doesn't exist"
|
|
|
|
countdown = 0
|
|
stop_escalation = False
|
|
log_message = ""
|
|
log_record = None
|
|
|
|
with transaction.atomic():
|
|
try:
|
|
user = User.objects.get(pk=user_pk)
|
|
except User.DoesNotExist:
|
|
return f"notify_user_task: user {user_pk} doesn't exist"
|
|
|
|
organization = alert_group.channel.organization
|
|
|
|
if not user.is_notification_allowed:
|
|
task_logger.info(f"notify_user_task: user {user.pk} notification is not allowed")
|
|
UserNotificationPolicyLogRecord(
|
|
author=user,
|
|
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED,
|
|
reason=f"notification is not allowed for user",
|
|
alert_group=alert_group,
|
|
notification_error_code=UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_FORBIDDEN,
|
|
).save()
|
|
return
|
|
|
|
user_has_notification, _ = UserHasNotification.objects.get_or_create(
|
|
user=user,
|
|
alert_group=alert_group,
|
|
)
|
|
|
|
user_has_notification = UserHasNotification.objects.filter(pk=user_has_notification.pk).select_for_update()[0]
|
|
|
|
if previous_notification_policy_pk is None:
|
|
notification_policy = UserNotificationPolicy.objects.filter(user=user, important=important).first()
|
|
if notification_policy is None:
|
|
task_logger.info(
|
|
f"notify_user_task: Failed to notify. No notification policies. user_id={user_pk} alert_group_id={alert_group_pk} important={important}"
|
|
)
|
|
return
|
|
# Here we collect a brief overview of notification steps configured for user to send it to thread.
|
|
collected_steps_ids = []
|
|
next_notification_policy = notification_policy.next()
|
|
while next_notification_policy is not None:
|
|
if next_notification_policy.step == UserNotificationPolicy.Step.NOTIFY:
|
|
if next_notification_policy.notify_by not in collected_steps_ids:
|
|
collected_steps_ids.append(next_notification_policy.notify_by)
|
|
next_notification_policy = next_notification_policy.next()
|
|
collected_steps = ", ".join(
|
|
UserNotificationPolicy.NotificationChannel(step_id).label for step_id in collected_steps_ids
|
|
)
|
|
reason = ("Reason: " + reason + "\n") if reason is not None else ""
|
|
reason += ("Further notification plan: " + collected_steps) if len(collected_steps_ids) > 0 else ""
|
|
else:
|
|
if notify_user_task.request.id != user_has_notification.active_notification_policy_id:
|
|
task_logger.info(
|
|
f"notify_user_task: active_notification_policy_id mismatch. "
|
|
f"Duplication or non-active escalation triggered. "
|
|
f"Active: {user_has_notification.active_notification_policy_id}"
|
|
)
|
|
return
|
|
|
|
try:
|
|
notification_policy = UserNotificationPolicy.objects.get(pk=previous_notification_policy_pk)
|
|
if notification_policy.user.organization != organization:
|
|
notification_policy = UserNotificationPolicy.objects.get(
|
|
order=notification_policy.order, user=user, important=important
|
|
)
|
|
notification_policy = notification_policy.next()
|
|
except UserNotificationPolicy.DoesNotExist:
|
|
task_logger.info(
|
|
f"notify_user_taskLNotification policy {previous_notification_policy_pk} has been deleted"
|
|
)
|
|
return
|
|
reason = None
|
|
if notification_policy is None:
|
|
stop_escalation = True
|
|
log_record = UserNotificationPolicyLogRecord(
|
|
author=user,
|
|
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FINISHED,
|
|
notification_policy=notification_policy,
|
|
alert_group=alert_group,
|
|
slack_prevent_posting=prevent_posting_to_thread,
|
|
)
|
|
log_message += "Personal escalation exceeded"
|
|
else:
|
|
if (
|
|
(alert_group.acknowledged and not notify_even_acknowledged)
|
|
or alert_group.resolved
|
|
or alert_group.is_archived
|
|
or alert_group.wiped_at
|
|
or alert_group.root_alert_group
|
|
):
|
|
return "Acknowledged, resolved, archived, attached or wiped."
|
|
|
|
if alert_group.silenced and not notify_anyway:
|
|
task_logger.info(
|
|
f"notify_user_task: skip notification user {user.pk} because alert_group {alert_group.pk} is silenced"
|
|
)
|
|
return
|
|
|
|
active_invitations_count = alert_group.invitations.filter(invitee=user, is_active=True).count()
|
|
if (notify_even_acknowledged or notify_anyway) and active_invitations_count == 0:
|
|
task_logger.info(f"notify_user_task: skip notification user {user.pk} invitation exceeded")
|
|
return
|
|
|
|
if notification_policy.step == UserNotificationPolicy.Step.WAIT:
|
|
if notification_policy.wait_delay is not None:
|
|
delay_in_seconds = notification_policy.wait_delay.total_seconds()
|
|
else:
|
|
delay_in_seconds = 0
|
|
countdown = delay_in_seconds
|
|
log_record = UserNotificationPolicyLogRecord(
|
|
author=user,
|
|
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED,
|
|
notification_policy=notification_policy,
|
|
alert_group=alert_group,
|
|
slack_prevent_posting=prevent_posting_to_thread,
|
|
notification_step=notification_policy.step,
|
|
)
|
|
task_logger.info(f"notify_user_task: Waiting {delay_in_seconds} to notify user {user.pk}")
|
|
elif notification_policy.step == UserNotificationPolicy.Step.NOTIFY:
|
|
user_to_be_notified_in_slack = (
|
|
notification_policy.notify_by == UserNotificationPolicy.NotificationChannel.SLACK
|
|
)
|
|
if user_to_be_notified_in_slack and alert_group.notify_in_slack_enabled is False:
|
|
log_record = UserNotificationPolicyLogRecord(
|
|
author=user,
|
|
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED,
|
|
notification_policy=notification_policy,
|
|
alert_group=alert_group,
|
|
reason=reason,
|
|
slack_prevent_posting=prevent_posting_to_thread,
|
|
notification_step=notification_policy.step,
|
|
notification_channel=notification_policy.notify_by,
|
|
notification_error_code=UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_POSTING_TO_SLACK_IS_DISABLED,
|
|
)
|
|
else:
|
|
log_record = UserNotificationPolicyLogRecord(
|
|
author=user,
|
|
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED,
|
|
notification_policy=notification_policy,
|
|
alert_group=alert_group,
|
|
reason=reason,
|
|
slack_prevent_posting=prevent_posting_to_thread,
|
|
notification_step=notification_policy.step,
|
|
notification_channel=notification_policy.notify_by,
|
|
)
|
|
if log_record: # log_record is None if user notification policy step is unspecified
|
|
log_record.save()
|
|
if notify_user_task.request.retries == 0:
|
|
transaction.on_commit(lambda: send_user_notification_signal.apply_async((log_record.pk,)))
|
|
|
|
if not stop_escalation:
|
|
if notification_policy.step != UserNotificationPolicy.Step.WAIT:
|
|
transaction.on_commit(lambda: perform_notification.apply_async((log_record.pk,)))
|
|
|
|
delay = NEXT_ESCALATION_DELAY
|
|
if countdown is not None:
|
|
delay += countdown
|
|
task_id = celery_uuid()
|
|
|
|
user_has_notification.active_notification_policy_id = task_id
|
|
user_has_notification.save(update_fields=["active_notification_policy_id"])
|
|
|
|
transaction.on_commit(
|
|
lambda: notify_user_task.apply_async(
|
|
(user.pk, alert_group.pk, notification_policy.pk, reason),
|
|
{
|
|
"notify_even_acknowledged": notify_even_acknowledged,
|
|
"notify_anyway": notify_anyway,
|
|
"prevent_posting_to_thread": prevent_posting_to_thread,
|
|
},
|
|
countdown=delay,
|
|
task_id=task_id,
|
|
)
|
|
)
|
|
|
|
else:
|
|
user_has_notification.active_notification_policy_id = None
|
|
user_has_notification.save(update_fields=["active_notification_policy_id"])
|
|
|
|
|
|
@shared_dedicated_queue_retry_task(
|
|
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
|
|
)
|
|
def perform_notification(log_record_pk):
|
|
SMSMessage = apps.get_model("twilioapp", "SMSMessage")
|
|
PhoneCall = apps.get_model("twilioapp", "PhoneCall")
|
|
UserNotificationPolicy = apps.get_model("base", "UserNotificationPolicy")
|
|
TelegramToUserConnector = apps.get_model("telegram", "TelegramToUserConnector")
|
|
UserNotificationPolicyLogRecord = apps.get_model("base", "UserNotificationPolicyLogRecord")
|
|
log_record = UserNotificationPolicyLogRecord.objects.get(pk=log_record_pk)
|
|
|
|
user = log_record.author
|
|
alert_group = log_record.alert_group
|
|
notification_policy = log_record.notification_policy
|
|
notification_channel = notification_policy.notify_by if notification_policy else None
|
|
if user is None or notification_policy is None:
|
|
UserNotificationPolicyLogRecord(
|
|
author=user,
|
|
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED,
|
|
notification_policy=notification_policy,
|
|
reason="Expected data is missing",
|
|
alert_group=alert_group,
|
|
notification_step=notification_policy.step if notification_policy else None,
|
|
notification_channel=notification_channel,
|
|
notification_error_code=None,
|
|
).save()
|
|
return
|
|
|
|
if not user.is_notification_allowed:
|
|
UserNotificationPolicyLogRecord(
|
|
author=user,
|
|
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED,
|
|
reason=f"notification is not allowed for user",
|
|
alert_group=alert_group,
|
|
notification_error_code=UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_FORBIDDEN,
|
|
).save()
|
|
return
|
|
|
|
if notification_channel == UserNotificationPolicy.NotificationChannel.SMS:
|
|
SMSMessage.send_sms(
|
|
user,
|
|
alert_group,
|
|
notification_policy,
|
|
is_cloud_notification=live_settings.GRAFANA_CLOUD_NOTIFICATIONS_ENABLED,
|
|
)
|
|
|
|
elif notification_channel == UserNotificationPolicy.NotificationChannel.PHONE_CALL:
|
|
PhoneCall.make_call(
|
|
user,
|
|
alert_group,
|
|
notification_policy,
|
|
is_cloud_notification=live_settings.GRAFANA_CLOUD_NOTIFICATIONS_ENABLED,
|
|
)
|
|
|
|
elif notification_channel == UserNotificationPolicy.NotificationChannel.TELEGRAM:
|
|
TelegramToUserConnector.notify_user(user, alert_group, notification_policy)
|
|
|
|
elif notification_channel == UserNotificationPolicy.NotificationChannel.SLACK:
|
|
# TODO: refactor checking the possibility of sending a notification in slack
|
|
# Code below is not consistent.
|
|
# We check various slack reasons to skip escalation in this task, in send_slack_notification,
|
|
# before and after posting of slack message.
|
|
if alert_group.reason_to_skip_escalation == alert_group.RATE_LIMITED:
|
|
task_logger.debug(
|
|
f"send_slack_notification for alert_group {alert_group.pk} failed because of slack ratelimit."
|
|
)
|
|
UserNotificationPolicyLogRecord(
|
|
author=user,
|
|
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED,
|
|
notification_policy=notification_policy,
|
|
reason="Slack ratelimit",
|
|
alert_group=alert_group,
|
|
notification_step=notification_policy.step,
|
|
notification_channel=notification_channel,
|
|
notification_error_code=UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_IN_SLACK_RATELIMIT,
|
|
).save()
|
|
return
|
|
|
|
if alert_group.notify_in_slack_enabled is True and not log_record.slack_prevent_posting:
|
|
# we cannot notify users in Slack if their team does not have Slack integration
|
|
if alert_group.channel.organization.slack_team_identity is None:
|
|
task_logger.debug(
|
|
f"send_slack_notification for alert_group {alert_group.pk} failed because slack team identity "
|
|
f"does not exist."
|
|
)
|
|
UserNotificationPolicyLogRecord(
|
|
author=user,
|
|
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED,
|
|
notification_policy=notification_policy,
|
|
reason="Slack team identity does not exist",
|
|
alert_group=alert_group,
|
|
notification_step=notification_policy.step,
|
|
notification_channel=notification_channel,
|
|
notification_error_code=UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_IN_SLACK_TOKEN_ERROR,
|
|
).save()
|
|
return
|
|
|
|
retry_timeout_hours = 1
|
|
slack_message = alert_group.get_slack_message()
|
|
if slack_message is not None:
|
|
slack_message.send_slack_notification(user, alert_group, notification_policy)
|
|
task_logger.debug(f"Finished send_slack_notification for alert_group {alert_group.pk}.")
|
|
# check how much time has passed since log record was created
|
|
# to prevent eternal loop of restarting perform_notification task
|
|
elif timezone.now() < log_record.created_at + timezone.timedelta(hours=retry_timeout_hours):
|
|
task_logger.debug(
|
|
f"send_slack_notification for alert_group {alert_group.pk} failed because slack message "
|
|
f"does not exist. Restarting perform_notification."
|
|
)
|
|
restart_delay_seconds = 60
|
|
perform_notification.apply_async((log_record_pk,), countdown=restart_delay_seconds)
|
|
else:
|
|
task_logger.debug(
|
|
f"send_slack_notification for alert_group {alert_group.pk} failed because slack message "
|
|
f"after {retry_timeout_hours} hours still does not exist"
|
|
)
|
|
UserNotificationPolicyLogRecord(
|
|
author=user,
|
|
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED,
|
|
notification_policy=notification_policy,
|
|
reason="Slack message does not exist",
|
|
alert_group=alert_group,
|
|
notification_step=notification_policy.step,
|
|
notification_channel=notification_channel,
|
|
notification_error_code=UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_IN_SLACK,
|
|
).save()
|
|
else:
|
|
try:
|
|
backend_id = UserNotificationPolicy.NotificationChannel(notification_policy.notify_by).name
|
|
backend = get_messaging_backend_from_id(backend_id)
|
|
except ValueError:
|
|
backend = None
|
|
|
|
if backend is None:
|
|
task_logger.debug(f"notify_user failed because messaging backend is not available")
|
|
UserNotificationPolicyLogRecord(
|
|
author=user,
|
|
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED,
|
|
notification_policy=notification_policy,
|
|
reason="Messaging backend not available",
|
|
alert_group=alert_group,
|
|
notification_step=notification_policy.step,
|
|
notification_channel=notification_channel,
|
|
notification_error_code=UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_MESSAGING_BACKEND_ERROR,
|
|
).save()
|
|
return
|
|
backend.notify_user(user, alert_group, notification_policy)
|
|
|
|
|
|
@shared_dedicated_queue_retry_task(
|
|
autoretry_for=(Exception,), retry_backoff=True, max_retries=0 if settings.DEBUG else None
|
|
)
|
|
def send_user_notification_signal(log_record_pk):
|
|
start_time = time.time()
|
|
|
|
UserNotificationPolicyLogRecord = apps.get_model("base", "UserNotificationPolicyLogRecord")
|
|
task_logger.debug(f"LOG RECORD PK: {log_record_pk}")
|
|
task_logger.debug(f"LOG RECORD LAST: {UserNotificationPolicyLogRecord.objects.last()}")
|
|
|
|
log_record = UserNotificationPolicyLogRecord.objects.get(pk=log_record_pk)
|
|
user_notification_action_triggered_signal.send(sender=send_user_notification_signal, log_record=log_record)
|
|
|
|
task_logger.debug("--- USER SIGNAL TOOK %s seconds ---" % (time.time() - start_time))
|