Fix too many tasks being created for create contact points (#12)
Fix too many tasks being created for create_contact_points_for_datasource task Co-authored-by: Julia <ferril.darkdiver@gmail.com>
This commit is contained in:
parent
dbaffd86f5
commit
583835f8d4
3 changed files with 43 additions and 9 deletions
|
|
@ -5,7 +5,7 @@ from typing import Optional
|
|||
from django.apps import apps
|
||||
from rest_framework import status
|
||||
|
||||
from apps.alerts.tasks import create_contact_points_for_datasource
|
||||
from apps.alerts.tasks import schedule_create_contact_points_for_datasource
|
||||
from apps.grafana_plugin.helpers import GrafanaAPIClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -77,16 +77,15 @@ class GrafanaAlertingSyncManager:
|
|||
# sync other datasource
|
||||
for datasource in datasources:
|
||||
if datasource["type"] == GrafanaAlertingSyncManager.ALERTING_DATASOURCE:
|
||||
if self.create_contact_point(datasource) is None:
|
||||
contact_point = self.create_contact_point(datasource)
|
||||
if contact_point is None:
|
||||
# Failed to create contact point duo to getting wrong alerting config. It is expected behaviour.
|
||||
# Add datasource to list and retry to create contact point for it async
|
||||
datasources_to_create.append(datasource)
|
||||
|
||||
if datasources_to_create:
|
||||
# create other contact points async
|
||||
create_contact_points_for_datasource.apply_async(
|
||||
(self.alert_receive_channel.pk, datasources_to_create),
|
||||
)
|
||||
schedule_create_contact_points_for_datasource(self.alert_receive_channel.pk, datasources_to_create)
|
||||
else:
|
||||
self.alert_receive_channel.is_finished_alerting_setup = True
|
||||
self.alert_receive_channel.save(update_fields=["is_finished_alerting_setup"])
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ from .calculcate_escalation_finish_time import calculate_escalation_finish_time
|
|||
from .call_ack_url import call_ack_url # noqa: F401
|
||||
from .check_escalation_finished import check_escalation_finished_task # noqa: F401
|
||||
from .create_contact_points_for_datasource import create_contact_points_for_datasource # noqa: F401
|
||||
from .create_contact_points_for_datasource import schedule_create_contact_points_for_datasource # noqa: F401
|
||||
from .custom_button_result import custom_button_result # noqa: F401
|
||||
from .delete_alert_group import delete_alert_group # noqa: F401
|
||||
from .distribute_alert import distribute_alert # noqa: F401
|
||||
|
|
|
|||
|
|
@ -1,9 +1,32 @@
|
|||
import logging
|
||||
|
||||
from celery.utils.log import get_task_logger
|
||||
from django.apps import apps
|
||||
from django.core.cache import cache
|
||||
from rest_framework import status
|
||||
|
||||
from apps.grafana_plugin.helpers import GrafanaAPIClient
|
||||
from common.custom_celery_tasks import shared_dedicated_queue_retry_task
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
def get_cache_key_create_contact_points_for_datasource(alert_receive_channel_id):
|
||||
CACHE_KEY_PREFIX = "create_contact_points_for_datasource"
|
||||
return f"{CACHE_KEY_PREFIX}_{alert_receive_channel_id}"
|
||||
|
||||
|
||||
@shared_dedicated_queue_retry_task
|
||||
def schedule_create_contact_points_for_datasource(alert_receive_channel_id, datasource_list):
|
||||
CACHE_LIFETIME = 600
|
||||
START_TASK_DELAY = 3
|
||||
task = create_contact_points_for_datasource.apply_async(
|
||||
args=[alert_receive_channel_id, datasource_list], countdown=START_TASK_DELAY
|
||||
)
|
||||
cache_key = get_cache_key_create_contact_points_for_datasource(alert_receive_channel_id)
|
||||
cache.set(cache_key, task.id, timeout=CACHE_LIFETIME)
|
||||
|
||||
|
||||
@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True, max_retries=10)
|
||||
def create_contact_points_for_datasource(alert_receive_channel_id, datasource_list):
|
||||
|
|
@ -11,6 +34,11 @@ def create_contact_points_for_datasource(alert_receive_channel_id, datasource_li
|
|||
Try to create contact points for other datasource.
|
||||
Restart task for datasource, for which contact point was not created.
|
||||
"""
|
||||
cache_key = get_cache_key_create_contact_points_for_datasource(alert_receive_channel_id)
|
||||
cached_task_id = cache.get(cache_key)
|
||||
current_task_id = create_contact_points_for_datasource.request.id
|
||||
if cached_task_id is not None and current_task_id != cached_task_id:
|
||||
return
|
||||
|
||||
AlertReceiveChannel = apps.get_model("alerts", "AlertReceiveChannel")
|
||||
|
||||
|
|
@ -21,7 +49,7 @@ def create_contact_points_for_datasource(alert_receive_channel_id, datasource_li
|
|||
api_token=alert_receive_channel.organization.api_token,
|
||||
)
|
||||
# list of datasource for which contact point creation was failed
|
||||
datasource_to_create = []
|
||||
datasources_to_create = []
|
||||
for datasource in datasource_list:
|
||||
contact_point = None
|
||||
config, response_info = client.get_alerting_config(datasource["id"])
|
||||
|
|
@ -29,16 +57,22 @@ def create_contact_points_for_datasource(alert_receive_channel_id, datasource_li
|
|||
if response_info.get("status_code") == status.HTTP_404_NOT_FOUND:
|
||||
client.get_alertmanager_status_with_config(datasource["id"])
|
||||
contact_point = alert_receive_channel.grafana_alerting_sync_manager.create_contact_point(datasource)
|
||||
elif response_info.get("status_code") == status.HTTP_400_BAD_REQUEST:
|
||||
logger.warning(
|
||||
f"Failed to create contact point for integration {alert_receive_channel_id}, "
|
||||
f"datasource info: {datasource}; response: {response_info}"
|
||||
)
|
||||
continue
|
||||
else:
|
||||
contact_point = alert_receive_channel.grafana_alerting_sync_manager.create_contact_point(datasource)
|
||||
if contact_point is None:
|
||||
# Failed to create contact point duo to getting wrong alerting config.
|
||||
# Add datasource to list and retry to create contact point for it again
|
||||
datasource_to_create.append(datasource)
|
||||
datasources_to_create.append(datasource)
|
||||
|
||||
# if some contact points were not created, restart task for them
|
||||
if datasource_to_create:
|
||||
create_contact_points_for_datasource.apply_async((alert_receive_channel_id, datasource_to_create), countdown=5)
|
||||
if datasources_to_create:
|
||||
schedule_create_contact_points_for_datasource(alert_receive_channel_id, datasources_to_create)
|
||||
else:
|
||||
alert_receive_channel.is_finished_alerting_setup = True
|
||||
alert_receive_channel.save(update_fields=["is_finished_alerting_setup"])
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue