oncall-engine/engine/apps/alerts/tasks/maintenance.py
Michael Derynck 6b40f95033 World, meet OnCall!
Co-authored-by: Eve832 <eve.meelan@grafana.com>
    Co-authored-by: Francisco Montes de Oca <nevermind89x@gmail.com>
    Co-authored-by: Ildar Iskhakov <ildar.iskhakov@grafana.com>
    Co-authored-by: Innokentii Konstantinov <innokenty.konstantinov@grafana.com>
    Co-authored-by: Julia <ferril.darkdiver@gmail.com>
    Co-authored-by: maskin25 <kengurek@gmail.com>
    Co-authored-by: Matias Bordese <mbordese@gmail.com>
    Co-authored-by: Matvey Kukuy <motakuk@gmail.com>
    Co-authored-by: Michael Derynck <michael.derynck@grafana.com>
    Co-authored-by: Richard Hartmann <richih@richih.org>
    Co-authored-by: Robby Milo <robbymilo@fastmail.com>
    Co-authored-by: Timur Olzhabayev <timur.olzhabayev@grafana.com>
    Co-authored-by: Vadim Stepanov <vadimkerr@gmail.com>
    Co-authored-by: Yulia Shanyrova <yulia.shanyrova@grafana.com>
2022-06-03 08:09:47 -06:00

138 lines
6.1 KiB
Python

from django.apps import apps
from django.conf import settings
from django.db import transaction
from django.db.models import ExpressionWrapper, F, fields
from django.utils import timezone
from apps.user_management.organization_log_creator import create_organization_log
from common.custom_celery_tasks import shared_dedicated_queue_retry_task
from .task_logger import task_logger
@shared_dedicated_queue_retry_task(
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
)
def disable_maintenance(*args, **kwargs):
AlertGroup = apps.get_model("alerts", "AlertGroup")
OrganizationLogRecord = apps.get_model("base", "OrganizationLogRecord")
User = apps.get_model("user_management", "User")
Organization = apps.get_model("user_management", "Organization")
user = None
object_under_maintenance = None
user_id = kwargs.get("user_id")
if user_id:
user = User.objects.get(pk=user_id)
force = kwargs.get("force", False)
with transaction.atomic():
if "alert_receive_channel_id" in kwargs:
AlertReceiveChannel = apps.get_model("alerts", "AlertReceiveChannel")
alert_receive_channel_id = kwargs["alert_receive_channel_id"]
try:
object_under_maintenance = AlertReceiveChannel.objects.select_for_update().get(
pk=alert_receive_channel_id,
)
except AlertReceiveChannel.DoesNotExist:
task_logger.info(
f"AlertReceiveChannel for disable_maintenance does not exists. Id: {alert_receive_channel_id}"
)
elif "organization_id" in kwargs:
organization_id = kwargs["organization_id"]
try:
object_under_maintenance = Organization.objects.select_for_update().get(pk=organization_id)
except Organization.DoesNotExist:
task_logger.info(f"Organization for disable_maintenance does not exists. Id: {organization_id}")
else:
task_logger.info(f"Invalid instance id passed in disable_maintenance. Got: {kwargs}")
if object_under_maintenance is not None and (
disable_maintenance.request.id == object_under_maintenance.maintenance_uuid or force
):
verbal = object_under_maintenance.get_verbal()
log_type, object_verbal = OrganizationLogRecord.get_log_type_and_maintainable_object_verbal(
object_under_maintenance,
object_under_maintenance.maintenance_mode,
verbal,
stopped=True,
)
description = (
f"{object_under_maintenance.get_maintenance_mode_display()} of {object_verbal} "
f"stopped{' by user' if user else ''}"
)
organization = (
object_under_maintenance
if isinstance(object_under_maintenance, Organization)
else object_under_maintenance.organization
)
create_organization_log(organization, user, log_type, description)
if object_under_maintenance.maintenance_mode == object_under_maintenance.MAINTENANCE:
mode_verbal = "Maintenance"
maintenance_incident = AlertGroup.all_objects.get(
maintenance_uuid=object_under_maintenance.maintenance_uuid
)
transaction.on_commit(maintenance_incident.resolve_by_disable_maintenance)
if object_under_maintenance.maintenance_mode == object_under_maintenance.DEBUG_MAINTENANCE:
mode_verbal = "Debug"
# Use mode_verbal variable instead of object_under_maintenance.get_maintenance_mode_display()
# because after transaction maintenance_mode is None.
if organization.slack_team_identity:
transaction.on_commit(
lambda: object_under_maintenance.notify_about_maintenance_action(
f"{mode_verbal} of {verbal} finished."
)
)
object_under_maintenance.maintenance_uuid = None
object_under_maintenance.maintenance_duration = None
object_under_maintenance.maintenance_mode = None
object_under_maintenance.maintenance_started_at = None
object_under_maintenance.maintenance_author = None
object_under_maintenance.save(
update_fields=[
"maintenance_uuid",
"maintenance_duration",
"maintenance_mode",
"maintenance_started_at",
"maintenance_author",
]
)
@shared_dedicated_queue_retry_task(
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
)
def check_maintenance_finished(*args, **kwargs):
AlertReceiveChannel = apps.get_model("alerts", "AlertReceiveChannel")
Organization = apps.get_model("user_management", "Organization")
now = timezone.now()
maintenance_finish_at = ExpressionWrapper(
(F("maintenance_started_at") + F("maintenance_duration")), output_field=fields.DateTimeField()
)
alert_receive_channel_with_expired_maintenance_ids = (
AlertReceiveChannel.objects.filter(maintenance_started_at__isnull=False)
.annotate(maintenance_finish_at=maintenance_finish_at)
.filter(maintenance_finish_at__lt=now)
.values_list("pk", flat=True)
)
for id in alert_receive_channel_with_expired_maintenance_ids:
disable_maintenance.apply_async(
args=(),
kwargs={"alert_receive_channel_id": id, "force": True},
)
organization_with_expired_maintenance_ids = (
Organization.objects.filter(maintenance_started_at__isnull=False)
.annotate(maintenance_finish_at=maintenance_finish_at)
.filter(maintenance_finish_at__lt=now)
.values_list("pk", flat=True)
)
for id in organization_with_expired_maintenance_ids:
disable_maintenance.apply_async(
args=(),
kwargs={"organization_id": id, "force": True},
)