oncall-engine/engine/apps/grafana_plugin/tasks/sync_v2.py
Michael Derynck d1cb862125
Make sync settings configurable (#5002)
# What this PR does
Add settings for how sync jobs get split up to control throughput of
requests.
- `SYNC_V2_MAX_TASKS ` controls how many tasks can run concurrently
- `SYNC_V2_PERIOD_SECONDS` controls the time offset before starting
another set of tasks each time `SYNC_V2_MAX_TASKS` is reached
- `SYNC_V2_BATCH_SIZE` controls how many organizations will be sync'd
per task

## Which issue(s) this PR closes

Related to [issue link here]

<!--
*Note*: If you want the issue to be auto-closed once the PR is merged,
change "Related to" to "Closes" in the line above.
If you have more than one GitHub issue that this PR closes, be sure to
preface
each issue link with a [closing
keyword](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/using-keywords-in-issues-and-pull-requests#linking-a-pull-request-to-an-issue).
This ensures that the issue(s) are auto-closed once the PR has been
merged.
-->

## Checklist

- [x] Unit, integration, and e2e (if applicable) tests updated
- [x] Documentation added (or `pr:no public docs` PR label added if not
required)
- [x] Added the relevant release notes label (see labels prefixed w/
`release:`). These labels dictate how your PR will
    show up in the autogenerated release notes.
2024-09-10 14:17:46 +00:00

56 lines
2.4 KiB
Python

import logging
from celery.utils.log import get_task_logger
from django.conf import settings
from apps.grafana_plugin.helpers.client import GrafanaAPIClient
from apps.grafana_plugin.helpers.gcom import get_active_instance_ids
from apps.user_management.models import Organization
from common.custom_celery_tasks import shared_dedicated_queue_retry_task
logger = get_task_logger(__name__)
logger.setLevel(logging.DEBUG)
@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True, max_retries=0)
def start_sync_organizations_v2():
organization_qs = Organization.objects.all()
active_instance_ids, is_cloud_configured = get_active_instance_ids()
if is_cloud_configured:
if not active_instance_ids:
logger.warning("Did not find any active instances!")
return
else:
logger.debug(f"Found {len(active_instance_ids)} active instances")
organization_qs = organization_qs.filter(stack_id__in=active_instance_ids)
logger.info(f"Found {len(organization_qs)} active organizations")
batch = []
batch_index = 0
task_countdown_seconds = 0
for org in organization_qs:
if GrafanaAPIClient.validate_grafana_token_format(org.api_token):
batch.append(org.pk)
if len(batch) == settings.SYNC_V2_BATCH_SIZE:
sync_organizations_v2.apply_async((batch,), countdown=task_countdown_seconds)
batch = []
batch_index += 1
if batch_index == settings.SYNC_V2_MAX_TASKS:
batch_index = 0
task_countdown_seconds += settings.SYNC_V2_PERIOD_SECONDS
else:
logger.info(f"Skipping stack_slug={org.stack_slug}, api_token format is invalid or not set")
if batch:
sync_organizations_v2.apply_async((batch,), countdown=task_countdown_seconds)
@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True, max_retries=0)
def sync_organizations_v2(org_ids=None):
organization_qs = Organization.objects.filter(id__in=org_ids)
for org in organization_qs:
client = GrafanaAPIClient(api_url=org.grafana_url, api_token=org.api_token)
_, status = client.sync()
if status["status_code"] != 200:
logger.error(
f"Failed to request sync org_id={org.pk} stack_slug={org.stack_slug} status_code={status['status_code']} url={status['url']} message={status['message']}"
)