Merge pull request #4820 from grafana/dev

v1.8.11
This commit is contained in:
Vadim Stepanov 2024-08-14 11:05:32 +01:00 committed by GitHub
commit 22c644e89f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
50 changed files with 1357 additions and 722 deletions

View file

@ -23,5 +23,5 @@ runs:
if: ${{ inputs.install-dependencies == 'true' }}
shell: bash
run: |
pip install uv
pip install uv setuptools
uv pip sync --system ${{ inputs.python-requirements-paths }}

View file

@ -5,6 +5,7 @@ on:
env:
DJANGO_SETTINGS_MODULE: settings.ci_test
SKIP_SLACK_SDK_WARNING: True
DATABASE_HOST: localhost
RABBITMQ_URI: amqp://rabbitmq:rabbitmq@localhost:5672
SLACK_CLIENT_OAUTH_ID: 1

View file

@ -6,4 +6,4 @@ registry: ctlptl-registry
kindV1Alpha4Cluster:
nodes:
- role: control-plane
image: kindest/node:v1.27.3
image: kindest/node:v1.27.11

View file

@ -1,3 +1,3 @@
aiohttp==3.9.4
aiohttp==3.10.2
Faker==16.4.0
tqdm==4.66.3

View file

@ -14,6 +14,15 @@ canonical: https://grafana.com/docs/oncall/latest/configure/live-call-routing/
aliases:
- /docs/grafana-cloud/alerting-and-irm/oncall/configure/escalation-chains-and-routes/
- ../live-call-routing/ # /docs/oncall/<ONCALL_VERSION>/escalation-chains-and-routes/
refs:
open-source:
- pattern: /docs/oncall/
destination: /docs/oncall/<ONCALL_VERSION>/set-up/open-source/
- pattern: /docs/grafana-cloud/
destination: /docs/grafana-cloud/alerting-and-irm/oncall/set-up/open-source/
irm-invoice:
- pattern: /docs/grafana-cloud/
destination: /docs/grafana-cloud/cost-management-and-billing/understand-your-invoice/irm-invoice/
---
# Configure SMS & call routing with Grafana OnCall
@ -27,10 +36,18 @@ You can further customize your configuration to send different alerts to differe
To complete the steps in this guide, ensure you have the following:
- Grafana Cloud account: If you haven't already, [sign up for Grafana Cloud](https://grafana.com/auth/sign-up/create-user).
- For Grafana Cloud users: A Grafana Cloud account. If you haven't already, [sign up for Grafana Cloud](https://grafana.com/auth/sign-up/create-user).
- For OSS users: Notification routing must be configured using either Grafana Cloud or a third-party provider, such as Twilio.
Refer to the [Grafana OnCall open source guide](ref:open-source) for more information.
- Grafana OnCall user with administrator privileges and notification settings configured.
- Twilio account: [Sign up for Twilio](https://www.twilio.com/try-twilio).
{{< admonition type="note" >}}
While OSS users have the option to use Grafana Cloud for phone and SMS routing, it is not required.
If you decide to use Grafana Cloud for notification delivery, be aware that charges may apply.
For more information, refer to our [billing documentation](ref:irm-invoice).
{{< /admonition >}}
## Basic set up
In the basic set up, you'll create an integration in OnCall and configure a phone number in Twilio.

View file

@ -27,7 +27,7 @@ RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
&& rm grpcio-1.64.1-cp312-cp312-linux_aarch64.whl; \
fi
RUN pip install uv
RUN pip install uv setuptools
# TODO: figure out how to get this to work.. see comment in .github/workflows/e2e-tests.yml
# https://stackoverflow.com/a/71846527

View file

@ -8,11 +8,10 @@ from drf_spectacular.utils import PolymorphicProxySerializer, extend_schema_fiel
from jinja2 import TemplateSyntaxError
from rest_framework import serializers
from rest_framework.exceptions import ValidationError
from rest_framework.fields import SerializerMethodField, set_value
from rest_framework.fields import SerializerMethodField
from apps.alerts.grafana_alerting_sync_manager.grafana_alerting_sync import GrafanaAlertingSyncManager
from apps.alerts.models import AlertReceiveChannel
from apps.alerts.models.channel_filter import ChannelFilter
from apps.base.messaging import get_messaging_backends
from apps.integrations.legacy_prefix import has_legacy_prefix
from apps.labels.models import LabelKeyCache, LabelValueCache
@ -277,7 +276,7 @@ class AlertReceiveChannelSerializer(
# With using of select_related ORM builds strange join
# which leads to incorrect heartbeat-alert_receive_channel binding in result
PREFETCH_RELATED = ["channel_filters", "integration_heartbeat", "labels", "labels__key", "labels__value"]
SELECT_RELATED = ["organization", "author"]
SELECT_RELATED = ["organization", "author", "team"]
class Meta:
model = AlertReceiveChannel
@ -490,11 +489,12 @@ class AlertReceiveChannelSerializer(
return has_legacy_prefix(obj.integration)
def get_connected_escalations_chains_count(self, obj: "AlertReceiveChannel") -> int:
return (
ChannelFilter.objects.filter(alert_receive_channel=obj, escalation_chain__isnull=False)
.values("escalation_chain")
.distinct()
.count()
return len(
set(
channel_filter.escalation_chain_id
for channel_filter in obj.channel_filters.all()
if channel_filter.escalation_chain_id is not None
)
)
@ -632,7 +632,7 @@ class AlertReceiveChannelTemplatesSerializer(EagerLoadingMixin, serializers.Mode
backend_updates[field] = value
# update backend templates
backend_templates.update(backend_updates)
set_value(ret, ["messaging_backends_templates", backend_id], backend_templates)
self.set_value(ret, ["messaging_backends_templates", backend_id], backend_templates)
return errors
@ -651,7 +651,7 @@ class AlertReceiveChannelTemplatesSerializer(EagerLoadingMixin, serializers.Mode
errors[field_name] = "invalid template"
except DjangoValidationError:
errors[field_name] = "invalid URL"
set_value(ret, [field_name], value)
self.set_value(ret, [field_name], value)
return errors
def to_representation(self, obj: "AlertReceiveChannel"):

View file

@ -1,5 +1,3 @@
from datetime import timedelta
import pytest
from django.utils import timezone
@ -29,8 +27,8 @@ def make_resolved_ack_new_silenced_alert_groups(make_alert_group, make_alert_rec
resolved_alert_group = make_alert_group(
alert_receive_channel,
channel_filter=channel_filter,
acknowledged_at=timezone.now() + timedelta(hours=1),
resolved_at=timezone.now() + timedelta(hours=2),
acknowledged_at=timezone.now() + timezone.timedelta(hours=1),
resolved_at=timezone.now() + timezone.timedelta(hours=2),
resolved=True,
acknowledged=True,
)
@ -39,7 +37,7 @@ def make_resolved_ack_new_silenced_alert_groups(make_alert_group, make_alert_rec
ack_alert_group = make_alert_group(
alert_receive_channel,
channel_filter=channel_filter,
acknowledged_at=timezone.now() + timedelta(hours=1),
acknowledged_at=timezone.now() + timezone.timedelta(hours=1),
acknowledged=True,
)
make_alert(alert_group=ack_alert_group, raw_request_data=alert_raw_request_data)
@ -51,7 +49,7 @@ def make_resolved_ack_new_silenced_alert_groups(make_alert_group, make_alert_rec
alert_receive_channel,
channel_filter=channel_filter,
silenced=True,
silenced_at=timezone.now() + timedelta(hours=1),
silenced_at=timezone.now() + timezone.timedelta(hours=1),
)
make_alert(alert_group=silenced_alert_group, raw_request_data=alert_raw_request_data)

View file

@ -1,4 +1,3 @@
import datetime
from unittest.mock import Mock, patch
import pytest
@ -250,8 +249,8 @@ def test_get_filter_resolved_by(
resolved_alert_group = make_alert_group(
alert_receive_channel,
channel_filter=default_channel_filter,
acknowledged_at=timezone.now() + datetime.timedelta(hours=1),
resolved_at=timezone.now() + datetime.timedelta(hours=2),
acknowledged_at=timezone.now() + timezone.timedelta(hours=1),
resolved_at=timezone.now() + timezone.timedelta(hours=2),
resolved=True,
acknowledged=True,
resolved_by_user=first_user,
@ -302,8 +301,8 @@ def test_get_filter_resolved_by_multiple_values(
resolved_alert_group = make_alert_group(
alert_receive_channel,
channel_filter=default_channel_filter,
acknowledged_at=timezone.now() + datetime.timedelta(hours=1),
resolved_at=timezone.now() + datetime.timedelta(hours=2),
acknowledged_at=timezone.now() + timezone.timedelta(hours=1),
resolved_at=timezone.now() + timezone.timedelta(hours=2),
resolved=True,
acknowledged=True,
resolved_by_user=user,
@ -348,8 +347,8 @@ def test_get_filter_acknowledged_by(
acknowledged_alert_group = make_alert_group(
alert_receive_channel,
channel_filter=default_channel_filter,
acknowledged_at=timezone.now() + datetime.timedelta(hours=1),
resolved_at=timezone.now() + datetime.timedelta(hours=2),
acknowledged_at=timezone.now() + timezone.timedelta(hours=1),
resolved_at=timezone.now() + timezone.timedelta(hours=2),
acknowledged=True,
acknowledged_by_user=first_user,
)
@ -398,8 +397,8 @@ def test_get_filter_acknowledged_by_multiple_values(
acknowledged_alert_group = make_alert_group(
alert_receive_channel,
channel_filter=default_channel_filter,
acknowledged_at=timezone.now() + datetime.timedelta(hours=1),
resolved_at=timezone.now() + datetime.timedelta(hours=2),
acknowledged_at=timezone.now() + timezone.timedelta(hours=1),
resolved_at=timezone.now() + timezone.timedelta(hours=2),
acknowledged=True,
acknowledged_by_user=user,
)
@ -442,7 +441,7 @@ def test_get_filter_silenced_by(
silenced_alert_group = make_alert_group(
alert_receive_channel,
channel_filter=default_channel_filter,
silenced_at=timezone.now() + datetime.timedelta(hours=1),
silenced_at=timezone.now() + timezone.timedelta(hours=1),
silenced=True,
silenced_by_user=first_user,
)
@ -491,7 +490,7 @@ def test_get_filter_silenced_by_multiple_values(
acknowledged_alert_group = make_alert_group(
alert_receive_channel,
channel_filter=default_channel_filter,
silenced_at=timezone.now() + datetime.timedelta(hours=1),
silenced_at=timezone.now() + timezone.timedelta(hours=1),
silenced=True,
silenced_by_user=user,
)
@ -670,8 +669,8 @@ def test_get_filter_mine(
acknowledged_alert_group = make_alert_group(
alert_receive_channel,
channel_filter=default_channel_filter,
acknowledged_at=timezone.now() + datetime.timedelta(hours=1),
resolved_at=timezone.now() + datetime.timedelta(hours=2),
acknowledged_at=timezone.now() + timezone.timedelta(hours=1),
resolved_at=timezone.now() + timezone.timedelta(hours=2),
acknowledged=True,
acknowledged_by_user=first_user,
)
@ -724,8 +723,8 @@ def test_get_filter_involved_users(
acknowledged_alert_group = make_alert_group(
alert_receive_channel,
channel_filter=default_channel_filter,
acknowledged_at=timezone.now() + datetime.timedelta(hours=1),
resolved_at=timezone.now() + datetime.timedelta(hours=2),
acknowledged_at=timezone.now() + timezone.timedelta(hours=1),
resolved_at=timezone.now() + timezone.timedelta(hours=2),
acknowledged=True,
acknowledged_by_user=first_user,
)
@ -999,7 +998,7 @@ def test_get_title_search(
alert_receive_channel, channel_filter=channel_filter, web_title_cache=f"testing {i+1}"
)
# alert groups starting every months going back
alert_group.started_at = timezone.now() - datetime.timedelta(days=10 + 30 * i)
alert_group.started_at = timezone.now() - timezone.timedelta(days=10 + 30 * i)
alert_group.save(update_fields=["started_at"])
make_alert(alert_group=alert_group, raw_request_data=alert_raw_request_data)
alert_groups.append(alert_group)
@ -1021,8 +1020,8 @@ def test_get_title_search(
response = client.get(
url
+ "?search=testing&started_at={}_{}".format(
(timezone.now() - datetime.timedelta(days=500)).strftime(DateRangeFilterMixin.DATE_FORMAT),
(timezone.now() - datetime.timedelta(days=30)).strftime(DateRangeFilterMixin.DATE_FORMAT),
(timezone.now() - timezone.timedelta(days=500)).strftime(DateRangeFilterMixin.DATE_FORMAT),
(timezone.now() - timezone.timedelta(days=30)).strftime(DateRangeFilterMixin.DATE_FORMAT),
),
format="json",
**make_user_auth_headers(user, token),

View file

@ -242,7 +242,7 @@ class AlertReceiveChannelView(
)
# distinct to remove duplicates after alert_receive_channels X labels join
queryset = queryset.distinct()
queryset = queryset.distinct().order_by("id")
return queryset

View file

@ -65,7 +65,9 @@ class BaseShiftSwapViewSet(ModelViewSet):
return ShiftSwapRequestListSerializer if self.action == "list" else super().get_serializer_class()
def get_queryset(self):
queryset = ShiftSwapRequest.objects.filter(schedule__organization=self.request.auth.organization)
queryset = ShiftSwapRequest.objects.filter(schedule__organization=self.request.auth.organization).order_by(
"-created_at"
)
return self.serializer_class.setup_eager_loading(queryset)
def perform_destroy(self, instance: ShiftSwapRequest) -> None:

View file

@ -20,6 +20,7 @@ from apps.email.inbound import InboundEmailWebhookView
],
)
@pytest.mark.django_db
@pytest.mark.filterwarnings("ignore:::anymail.*") # ignore missing WEBHOOK_SECRET in amazon ses test setup
def test_amazon_ses_provider_load(
settings, make_organization_and_user_with_token, make_alert_receive_channel, recipients, expected
):
@ -128,7 +129,10 @@ def test_mailgun_provider_load(
"sender_value,expected_result",
[
("'Alex Smith' <test@example.com>", "test@example.com"),
("'Alex Smith' via [TEST] mail <test@example.com>", "'Alex Smith' via [TEST] mail <test@example.com>"),
# double quotes required when including special characters
("\"'Alex Smith' via [TEST] mail\" <test@example.com>", "test@example.com"),
# missing double quotes
("'Alex Smith' via [TEST] mail <test@example.com>", "\"'Alex Smith' via\""),
],
)
def test_get_sender_from_email_message(sender_value, expected_result):

View file

@ -118,5 +118,5 @@ def sync_out_of_office_calendar_events_for_user(google_oauth2_user_pk: int) -> N
@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True)
def sync_out_of_office_calendar_events_for_all_users() -> None:
for google_oauth2_user in GoogleOAuth2User.objects.all():
for google_oauth2_user in GoogleOAuth2User.objects.filter(user__organization__deleted_at__isnull=True):
sync_out_of_office_calendar_events_for_user.apply_async(args=(google_oauth2_user.pk,))

View file

@ -372,3 +372,21 @@ def test_sync_out_of_office_calendar_events_for_user_preexisting_shift_swap_requ
ssrs.first().delete()
tasks.sync_out_of_office_calendar_events_for_user(google_oauth2_user_pk)
assert _fetch_shift_swap_requests().count() == 1
@patch("apps.google.tasks.sync_out_of_office_calendar_events_for_user.apply_async")
@pytest.mark.django_db
def test_sync_out_of_office_calendar_events_for_all_users(
mock_sync_out_of_office_calendar_events_for_user,
make_organization_and_user,
make_google_oauth2_user_for_user,
):
organization, user = make_organization_and_user()
google_oauth2_user = make_google_oauth2_user_for_user(user)
deleted_organization, deleted_user = make_organization_and_user()
make_google_oauth2_user_for_user(deleted_user)
deleted_organization.delete()
tasks.sync_out_of_office_calendar_events_for_all_users()
mock_sync_out_of_office_calendar_events_for_user.assert_called_once_with(args=(google_oauth2_user.pk,))

View file

@ -2,9 +2,10 @@ import logging
import re
import typing
from django.conf import settings
from django.core.cache import cache
from prometheus_client import CollectorRegistry
from prometheus_client.metrics_core import CounterMetricFamily, GaugeMetricFamily, HistogramMetricFamily
from prometheus_client.metrics_core import CounterMetricFamily, GaugeMetricFamily, HistogramMetricFamily, Metric
from apps.alerts.constants import AlertGroupState
from apps.metrics_exporter.constants import (
@ -26,6 +27,11 @@ from apps.metrics_exporter.helpers import (
get_organization_ids,
)
from apps.metrics_exporter.tasks import start_calculate_and_cache_metrics, start_recalculation_for_new_metric
from settings.base import (
METRIC_ALERT_GROUPS_RESPONSE_TIME_NAME,
METRIC_ALERT_GROUPS_TOTAL_NAME,
METRIC_USER_WAS_NOTIFIED_OF_ALERT_GROUPS_NAME,
)
application_metrics_registry = CollectorRegistry()
@ -42,6 +48,8 @@ RE_USER_WAS_NOTIFIED_OF_ALERT_GROUPS = re.compile(_RE_BASE_PATTERN.format(USER_W
# https://github.com/prometheus/client_python#custom-collectors
class ApplicationMetricsCollector:
GetMetricFunc = typing.Callable[[set], typing.Tuple[Metric, set]]
def __init__(self):
self._buckets = (60, 300, 600, 3600, "+Inf")
self._stack_labels = [
@ -61,29 +69,33 @@ class ApplicationMetricsCollector:
self._user_labels = ["username"] + self._stack_labels
def collect(self):
"""
Collects metrics listed in METRICS_TO_COLLECT settings var
"""
metrics_map: typing.Dict[str, ApplicationMetricsCollector.GetMetricFunc] = {
METRIC_ALERT_GROUPS_TOTAL_NAME: self._get_alert_groups_total_metric,
METRIC_ALERT_GROUPS_RESPONSE_TIME_NAME: self._get_response_time_metric,
METRIC_USER_WAS_NOTIFIED_OF_ALERT_GROUPS_NAME: self._get_user_was_notified_of_alert_groups_metric,
}
org_ids = set(get_organization_ids())
metrics: typing.List[Metric] = []
missing_org_ids: typing.Set[int] = set()
# alert groups total metric: gauge
alert_groups_total, missing_org_ids_1 = self._get_alert_groups_total_metric(org_ids)
# alert groups response time metrics: histogram
alert_groups_response_time_seconds, missing_org_ids_2 = self._get_response_time_metric(org_ids)
# user was notified of alert groups metrics: counter
user_was_notified, missing_org_ids_3 = self._get_user_was_notified_of_alert_groups_metric(org_ids)
# This part is used for releasing new metrics to avoid recalculation for every metric.
# Uncomment with metric name when needed.
# # update new metric gradually
# missing_org_ids_3 = self._update_new_metric(USER_WAS_NOTIFIED_OF_ALERT_GROUPS, org_ids, missing_org_ids_3)
for metric_name in settings.METRICS_TO_COLLECT:
if metric_name not in metrics_map:
logger.error(f"Invalid metric name {metric_name} in `METRICS_TO_COLLECT` var")
continue
metric, missing_org_ids_temp = metrics_map[metric_name](org_ids)
metrics.append(metric)
missing_org_ids |= missing_org_ids_temp
# check for orgs missing any of the metrics or needing a refresh, start recalculation task for missing org ids
missing_org_ids = missing_org_ids_1 | missing_org_ids_2 | missing_org_ids_3
self.recalculate_cache_for_missing_org_ids(org_ids, missing_org_ids)
yield alert_groups_total
yield alert_groups_response_time_seconds
yield user_was_notified
for metric in metrics:
yield metric
def _get_alert_groups_total_metric(self, org_ids):
def _get_alert_groups_total_metric(self, org_ids: set[int]) -> typing.Tuple[Metric, set[int]]:
alert_groups_total = GaugeMetricFamily(
ALERT_GROUPS_TOTAL, "All alert groups", labels=self._integration_labels_with_state
)
@ -98,15 +110,7 @@ class ApplicationMetricsCollector:
logger.warning(f"Deleting stale metrics cache for {org_key}")
cache.delete(org_key)
break
# Labels values should have the same order as _integration_labels_with_state
labels_values = [
integration_data["integration_name"], # integration
integration_data["team_name"], # team
integration_data["org_id"], # grafana org_id
integration_data["slug"], # grafana instance slug
integration_data["id"], # grafana instance id
]
labels_values = list(map(str, labels_values))
labels_values: typing.List[str] = self._get_labels_from_integration_data(integration_data)
for service_name in integration_data["services"]:
for state in AlertGroupState:
alert_groups_total.add_metric(
@ -118,7 +122,25 @@ class ApplicationMetricsCollector:
missing_org_ids = org_ids - processed_org_ids
return alert_groups_total, missing_org_ids
def _get_response_time_metric(self, org_ids):
def _get_user_was_notified_of_alert_groups_metric(self, org_ids: set[int]) -> typing.Tuple[Metric, set[int]]:
user_was_notified = CounterMetricFamily(
USER_WAS_NOTIFIED_OF_ALERT_GROUPS, "Number of alert groups user was notified of", labels=self._user_labels
)
processed_org_ids = set()
user_was_notified_keys = [get_metric_user_was_notified_of_alert_groups_key(org_id) for org_id in org_ids]
org_users: typing.Dict[str, typing.Dict[int, UserWasNotifiedOfAlertGroupsMetricsDict]] = cache.get_many(
user_was_notified_keys
)
for org_key, users in org_users.items():
for _, user_data in users.items():
labels_values: typing.List[str] = self._get_labels_from_user_data(user_data)
user_was_notified.add_metric(labels_values, user_data["counter"])
org_id_from_key = RE_USER_WAS_NOTIFIED_OF_ALERT_GROUPS.match(org_key).groups()[0]
processed_org_ids.add(int(org_id_from_key))
missing_org_ids = org_ids - processed_org_ids
return user_was_notified, missing_org_ids
def _get_response_time_metric(self, org_ids: set[int]) -> typing.Tuple[Metric, set[int]]:
alert_groups_response_time_seconds = HistogramMetricFamily(
ALERT_GROUPS_RESPONSE_TIME,
"Users response time to alert groups in 7 days (seconds)",
@ -135,21 +157,12 @@ class ApplicationMetricsCollector:
logger.warning(f"Deleting stale metrics cache for {org_key}")
cache.delete(org_key)
break
# Labels values should have the same order as _integration_labels
labels_values = [
integration_data["integration_name"], # integration
integration_data["team_name"], # team
integration_data["org_id"], # grafana org_id
integration_data["slug"], # grafana instance slug
integration_data["id"], # grafana instance id
]
labels_values = list(map(str, labels_values))
labels_values: typing.List[str] = self._get_labels_from_integration_data(integration_data)
for service_name, response_time in integration_data["services"].items():
if not response_time:
continue
buckets, sum_value = self.get_buckets_with_sum(response_time)
buckets = sorted(list(buckets.items()), key=lambda x: float(x[0]))
buckets_values, sum_value = self._get_buckets_with_sum(response_time)
buckets: list = sorted(list(buckets_values.items()), key=lambda x: float(x[0]))
alert_groups_response_time_seconds.add_metric(
labels_values + [service_name],
buckets=buckets,
@ -160,55 +173,7 @@ class ApplicationMetricsCollector:
missing_org_ids = org_ids - processed_org_ids
return alert_groups_response_time_seconds, missing_org_ids
def _get_user_was_notified_of_alert_groups_metric(self, org_ids):
user_was_notified = CounterMetricFamily(
USER_WAS_NOTIFIED_OF_ALERT_GROUPS, "Number of alert groups user was notified of", labels=self._user_labels
)
processed_org_ids = set()
user_was_notified_keys = [get_metric_user_was_notified_of_alert_groups_key(org_id) for org_id in org_ids]
org_users: typing.Dict[str, typing.Dict[int, UserWasNotifiedOfAlertGroupsMetricsDict]] = cache.get_many(
user_was_notified_keys
)
for org_key, users in org_users.items():
for _, user_data in users.items():
# Labels values should have the same order as _user_labels
labels_values = [
user_data["user_username"], # username
user_data["org_id"], # grafana org_id
user_data["slug"], # grafana instance slug
user_data["id"], # grafana instance id
]
labels_values = list(map(str, labels_values))
user_was_notified.add_metric(labels_values, user_data["counter"])
org_id_from_key = RE_USER_WAS_NOTIFIED_OF_ALERT_GROUPS.match(org_key).groups()[0]
processed_org_ids.add(int(org_id_from_key))
missing_org_ids = org_ids - processed_org_ids
return user_was_notified, missing_org_ids
def _update_new_metric(self, metric_name, org_ids, missing_org_ids):
"""
This method is used for new metrics to calculate metrics gradually and avoid force recalculation for all orgs
"""
calculation_started_key = get_metric_calculation_started_key(metric_name)
is_calculation_started = cache.get(calculation_started_key)
if len(missing_org_ids) == len(org_ids) or is_calculation_started:
missing_org_ids = set()
if not is_calculation_started:
start_recalculation_for_new_metric.apply_async((metric_name,))
return missing_org_ids
def recalculate_cache_for_missing_org_ids(self, org_ids, missing_org_ids):
cache_timer_for_org_keys = [get_metrics_cache_timer_key(org_id) for org_id in org_ids]
cache_timers_for_org = cache.get_many(cache_timer_for_org_keys)
recalculate_orgs: typing.List[RecalculateOrgMetricsDict] = []
for org_id in org_ids:
force_task = org_id in missing_org_ids
if force_task or not cache_timers_for_org.get(get_metrics_cache_timer_key(org_id)):
recalculate_orgs.append({"organization_id": org_id, "force": force_task})
if recalculate_orgs:
start_calculate_and_cache_metrics.apply_async((recalculate_orgs,))
def get_buckets_with_sum(self, values):
def _get_buckets_with_sum(self, values: typing.List[int]) -> typing.Tuple[typing.Dict[str, float], int]:
"""Put values in correct buckets and count values sum"""
buckets_values = {str(key): 0 for key in self._buckets}
sum_value = 0
@ -219,5 +184,51 @@ class ApplicationMetricsCollector:
sum_value += value
return buckets_values, sum_value
def _get_labels_from_integration_data(
self, integration_data: AlertGroupsTotalMetricsDict | AlertGroupsResponseTimeMetricsDict
) -> typing.List[str]:
# Labels values should have the same order as _integration_labels_with_state
labels_values = [
integration_data["integration_name"], # integration
integration_data["team_name"], # team
integration_data["org_id"], # grafana org_id
integration_data["slug"], # grafana instance slug
integration_data["id"], # grafana instance id
]
return list(map(str, labels_values))
application_metrics_registry.register(ApplicationMetricsCollector())
def _get_labels_from_user_data(self, user_data: UserWasNotifiedOfAlertGroupsMetricsDict) -> typing.List[str]:
# Labels values should have the same order as _user_labels
labels_values = [
user_data["user_username"], # username
user_data["org_id"], # grafana org_id
user_data["slug"], # grafana instance slug
user_data["id"], # grafana instance id
]
return list(map(str, labels_values))
def _update_new_metric(self, metric_name: str, org_ids: set[int], missing_org_ids: set[int]) -> set[int]:
"""
This method is used for new metrics to calculate metrics gradually and avoid force recalculation for all orgs
Add to collect() method the following code with metric name when needed:
# update new metric gradually
missing_org_ids_X = self._update_new_metric(<NEW_METRIC_NAME>, org_ids, missing_org_ids_X)
"""
calculation_started_key = get_metric_calculation_started_key(metric_name)
is_calculation_started = cache.get(calculation_started_key)
if len(missing_org_ids) == len(org_ids) or is_calculation_started:
missing_org_ids = set()
if not is_calculation_started:
start_recalculation_for_new_metric.apply_async((metric_name,))
return missing_org_ids
def recalculate_cache_for_missing_org_ids(self, org_ids: set[int], missing_org_ids: set[int]) -> None:
cache_timer_for_org_keys = [get_metrics_cache_timer_key(org_id) for org_id in org_ids]
cache_timers_for_org = cache.get_many(cache_timer_for_org_keys)
recalculate_orgs: typing.List[RecalculateOrgMetricsDict] = []
for org_id in org_ids:
force_task = org_id in missing_org_ids
if force_task or not cache_timers_for_org.get(get_metrics_cache_timer_key(org_id)):
recalculate_orgs.append({"organization_id": org_id, "force": force_task})
if recalculate_orgs:
start_calculate_and_cache_metrics.apply_async((recalculate_orgs,))

View file

@ -15,16 +15,44 @@ from apps.metrics_exporter.constants import (
from apps.metrics_exporter.helpers import get_metric_alert_groups_response_time_key, get_metric_alert_groups_total_key
from apps.metrics_exporter.metrics_collectors import ApplicationMetricsCollector
from apps.metrics_exporter.tests.conftest import METRICS_TEST_SERVICE_NAME
from settings.base import (
METRIC_ALERT_GROUPS_RESPONSE_TIME_NAME,
METRIC_ALERT_GROUPS_TOTAL_NAME,
METRIC_USER_WAS_NOTIFIED_OF_ALERT_GROUPS_NAME,
)
# redis cluster usage modifies the cache keys for some operations, so we need to test both cases
# see common.cache.ensure_cache_key_allocates_to_the_same_hash_slot for more details
@pytest.mark.parametrize("use_redis_cluster", [True, False])
@pytest.mark.parametrize(
"metric_base_names_and_metric_names",
[
[
[METRIC_ALERT_GROUPS_TOTAL_NAME, METRIC_USER_WAS_NOTIFIED_OF_ALERT_GROUPS_NAME],
[ALERT_GROUPS_TOTAL, USER_WAS_NOTIFIED_OF_ALERT_GROUPS],
],
[[METRIC_ALERT_GROUPS_RESPONSE_TIME_NAME], [ALERT_GROUPS_RESPONSE_TIME]],
[
[
METRIC_ALERT_GROUPS_TOTAL_NAME,
METRIC_ALERT_GROUPS_RESPONSE_TIME_NAME,
METRIC_USER_WAS_NOTIFIED_OF_ALERT_GROUPS_NAME,
],
[ALERT_GROUPS_TOTAL, USER_WAS_NOTIFIED_OF_ALERT_GROUPS, ALERT_GROUPS_RESPONSE_TIME],
],
],
)
@patch("apps.metrics_exporter.metrics_collectors.get_organization_ids", return_value=[1])
@patch("apps.metrics_exporter.metrics_collectors.start_calculate_and_cache_metrics.apply_async")
@pytest.mark.django_db
def test_application_metrics_collector(
mocked_org_ids, mocked_start_calculate_and_cache_metrics, mock_cache_get_metrics_for_collector, use_redis_cluster
def test_application_metrics_collectors(
mocked_org_ids,
mocked_start_calculate_and_cache_metrics,
mock_cache_get_metrics_for_collector,
use_redis_cluster,
metric_base_names_and_metric_names,
settings,
):
"""Test that ApplicationMetricsCollector generates expected metrics from cache"""
@ -41,10 +69,16 @@ def test_application_metrics_collector(
return labels
with override_settings(USE_REDIS_CLUSTER=use_redis_cluster):
settings.METRICS_TO_COLLECT = metric_base_names_and_metric_names[0]
collector = ApplicationMetricsCollector()
test_metrics_registry = CollectorRegistry()
test_metrics_registry.register(collector)
for metric in test_metrics_registry.collect():
metrics = [i for i in test_metrics_registry.collect()]
assert len(metrics) == len(metric_base_names_and_metric_names[1])
for metric in metrics:
assert metric.name in metric_base_names_and_metric_names[1]
if metric.name == ALERT_GROUPS_TOTAL:
# 2 integrations with labels for each alert group state per service
assert len(metric.samples) == len(AlertGroupState) * 3 # 2 from 1st integration and 1 from 2nd
@ -71,6 +105,8 @@ def test_application_metrics_collector(
elif metric.name == USER_WAS_NOTIFIED_OF_ALERT_GROUPS:
# metric with labels for each notified user
assert len(metric.samples) == 1
else:
raise AssertionError
result = generate_latest(test_metrics_registry).decode("utf-8")
assert result is not None
assert mocked_org_ids.called
@ -91,7 +127,9 @@ def test_application_metrics_collector_with_old_metrics_without_services(
collector = ApplicationMetricsCollector()
test_metrics_registry = CollectorRegistry()
test_metrics_registry.register(collector)
for metric in test_metrics_registry.collect():
metrics = [i for i in test_metrics_registry.collect()]
assert len(metrics) == 3
for metric in metrics:
if metric.name == ALERT_GROUPS_TOTAL:
alert_groups_total_metrics_cache = cache.get(get_metric_alert_groups_total_key(org_id))
assert alert_groups_total_metrics_cache and "services" not in alert_groups_total_metrics_cache[1]
@ -106,6 +144,8 @@ def test_application_metrics_collector_with_old_metrics_without_services(
elif metric.name == USER_WAS_NOTIFIED_OF_ALERT_GROUPS:
# metric with labels for each notified user
assert len(metric.samples) == 1
else:
raise AssertionError
result = generate_latest(test_metrics_registry).decode("utf-8")
assert result is not None
assert mocked_org_ids.called

View file

@ -256,5 +256,5 @@ def conditionally_send_going_oncall_push_notifications_for_schedule(schedule_pk)
@shared_dedicated_queue_retry_task()
def conditionally_send_going_oncall_push_notifications_for_all_schedules() -> None:
for schedule in OnCallSchedule.objects.all():
for schedule in OnCallSchedule.objects.filter(organization__deleted_at__isnull=True):
conditionally_send_going_oncall_push_notifications_for_schedule.apply_async((schedule.pk,))

View file

@ -470,13 +470,16 @@ def test_conditionally_send_going_oncall_push_notifications_for_schedule(
@pytest.mark.django_db
def test_conditionally_send_going_oncall_push_notifications_for_all_schedules(
mocked_conditionally_send_going_oncall_push_notifications_for_schedule,
make_organization_and_user,
make_organization,
make_schedule,
):
organization, _ = make_organization_and_user()
organization = make_organization()
deleted_organization = make_organization(deleted_at=timezone.now())
schedule1 = make_schedule(organization, schedule_class=OnCallScheduleCalendar)
schedule2 = make_schedule(organization, schedule_class=OnCallScheduleICal)
schedule3 = make_schedule(organization, schedule_class=OnCallScheduleWeb)
make_schedule(deleted_organization, schedule_class=OnCallScheduleWeb)
conditionally_send_going_oncall_push_notifications_for_all_schedules()

View file

@ -5,7 +5,7 @@ import typing
import requests
from django.conf import settings
from firebase_admin.exceptions import FirebaseError
from firebase_admin.messaging import AndroidConfig, APNSConfig, APNSPayload, Message, UnregisteredError
from firebase_admin.messaging import AndroidConfig, APNSConfig, APNSPayload, Message, SendResponse, UnregisteredError
from requests import HTTPError
from rest_framework import status
@ -49,9 +49,12 @@ def send_message_to_fcm_device(device: "FCMDevice", message: Message) -> bool:
https://firebase.google.com/docs/cloud-messaging/http-server-ref#interpret-downstream
"""
response = device.send_message(message)
logger.debug(f"FCM response: {response}")
if isinstance(response, FirebaseError):
if isinstance(response, SendResponse):
logger.debug(
f"FCM response: success={response.success} message_id={response.message_id} exception={response.exception}"
)
elif isinstance(response, FirebaseError):
logger.exception(
f"FCM error occured in mobile_app.utils.send_message_to_fcm_device fcm_device_info={device} "
f"firebase_error_code={response._code} firebase_error_cause={response._cause} "
@ -63,6 +66,9 @@ def send_message_to_fcm_device(device: "FCMDevice", message: Message) -> bool:
return False
raise response
else:
logger.debug(f"FCM response: {response}")
return True

View file

@ -1,3 +1,4 @@
import datetime
import time
from rest_framework import fields, serializers
@ -345,7 +346,9 @@ class CustomOnCallShiftSerializer(EagerLoadingMixin, serializers.ModelSerializer
if isinstance(validated_data.get(field), list) and len(validated_data[field]) == 0:
validated_data[field] = None
if validated_data.get("start") is not None:
validated_data["start"] = validated_data["start"].replace(tzinfo=None)
# store start date as UTC, TZ is really given by the time_zone field
# (see apps/schedules/models/custom_on_call_shift.py::convert_dt_to_schedule_timezone)
validated_data["start"] = validated_data["start"].replace(tzinfo=datetime.timezone.utc)
if validated_data.get("frequency") is not None and "interval" not in validated_data:
# if there is frequency but no interval is given, default to 1
validated_data["interval"] = 1

View file

@ -1,4 +1,3 @@
import datetime
from unittest.mock import patch
import pytest
@ -69,7 +68,7 @@ def test_refresh_ical_files_filter_orgs(
make_schedule,
):
organization = make_organization()
deleted_organization = make_organization(deleted_at=datetime.datetime.now())
deleted_organization = make_organization(deleted_at=timezone.now())
schedule_from_deleted_org = make_schedule(deleted_organization, schedule_class=OnCallScheduleWeb)
schedule = make_schedule(organization, schedule_class=OnCallScheduleWeb)

View file

@ -1,6 +1,6 @@
import datetime
import enum
import typing
from datetime import datetime
from apps.slack.client import SlackClient
from apps.slack.errors import (
@ -83,7 +83,7 @@ def post_message_to_channel(organization: "Organization", channel_id: str, text:
def _format_datetime_to_slack(timestamp: float, format: str) -> str:
fallback = datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M (UTC)")
fallback = datetime.datetime.fromtimestamp(timestamp, datetime.UTC).strftime("%Y-%m-%d %H:%M (UTC)")
return f"<!date^{int(timestamp)}^{format}|{fallback}>"

View file

@ -433,8 +433,8 @@ class SlackEventApiEndpointView(APIView):
step_was_found = True
if not step_was_found:
raise Exception("Step is undefined" + str(payload))
logger.warning("SlackEventApiEndpointView: Step is undefined" + str(payload))
return Response(status=422)
return Response(status=200)
@staticmethod

View file

@ -136,9 +136,11 @@ def test_button_press_handler_non_existing_alert_group(
make_telegram_user_connector(user_1, telegram_chat_id=chat_id)
update = generate_button_press_ack_message(chat_id, 1234)
update_data = update.callback_query.data
handler = ButtonPressHandler(update=update)
with patch.object(update.callback_query, "answer") as mock_answer:
with patch.object(update, "callback_query", autospec=True) as mock_callback_query:
mock_callback_query.data = update_data
handler.process_update()
mock_answer.assert_called_once_with(NOT_FOUND_ERROR, show_alert=True)
mock_callback_query.answer.assert_called_once_with(NOT_FOUND_ERROR, show_alert=True)

View file

@ -1,8 +1,9 @@
from datetime import datetime
from django.conf import settings
from django.db.models import Q
from django.utils import timezone
from django_filters import rest_framework as filters
from django_filters.utils import handle_timezone
from drf_spectacular.utils import extend_schema_field
from rest_framework import serializers
@ -12,6 +13,14 @@ from common.api_helpers.exceptions import BadRequest
NO_TEAM_VALUE = "null"
def _handle_timezone(value):
if settings.USE_TZ and timezone.is_naive(value):
return timezone.make_aware(value, timezone.get_current_timezone())
elif not settings.USE_TZ and timezone.is_aware(value):
return timezone.make_naive(value, timezone.utc)
return value
class DateRangeFilterMixin:
DATE_FORMAT = "%Y-%m-%dT%H:%M:%S"
@ -44,8 +53,8 @@ class DateRangeFilterMixin:
if start_date > end_date:
raise BadRequest(detail="Invalid range value")
start_date = handle_timezone(start_date, False)
end_date = handle_timezone(end_date, False)
start_date = _handle_timezone(start_date)
end_date = _handle_timezone(end_date)
return start_date, end_date

View file

@ -439,7 +439,7 @@ def test_ordered_model_create_swap_and_delete_concurrent():
assert list(TestOrderedModel.objects.values_list("extra_field", flat=True)) == expected_extra_field_values
class TestOrderedModelSerializer(OrderedModelSerializer):
class OrderedModelSerializerForTests(OrderedModelSerializer):
class Meta:
model = TestOrderedModel
fields = OrderedModelSerializer.Meta.fields + ["test_field", "extra_field"]
@ -461,7 +461,7 @@ def test_ordered_model_swap_all_to_zero_via_serializer():
def update_order_to_zero(idx):
try:
instance = instances[idx]
serializer = TestOrderedModelSerializer(instance, data={"order": 0, "extra_field": idx}, partial=True)
serializer = OrderedModelSerializerForTests(instance, data={"order": 0, "extra_field": idx}, partial=True)
serializer.is_valid(raise_exception=True)
serializer.save()
instance.swap(positions[idx])

View file

@ -10,7 +10,7 @@ logger = logging.getLogger(__name__)
class RequestTimeLoggingMiddleware(MiddlewareMixin):
@staticmethod
def log_message(request, response, tag, message=""):
dt = datetime.datetime.utcnow()
dt = datetime.datetime.now(datetime.UTC)
if not hasattr(request, "_logging_start_dt"):
request._logging_start_dt = dt
if request.path.startswith("/integrations/v1"):

View file

@ -18,21 +18,21 @@ charset-normalizer==3.3.2
# requests
distlib==0.3.8
# via virtualenv
django==4.2.11
django==4.2.15
# via
# -c requirements.txt
# django-stubs
# django-stubs-ext
django-filter-stubs==0.1.3
# via -r requirements-dev.in
django-stubs==4.2.2
django-stubs[compatible-mypy]==4.2.2
# via
# -r requirements-dev.in
# django-filter-stubs
# djangorestframework-stubs
django-stubs-ext==4.2.7
# via django-stubs
djangorestframework-stubs==3.14.2
djangorestframework-stubs[compatible-mypy]==3.14.2
# via
# -r requirements-dev.in
# django-filter-stubs
@ -96,7 +96,7 @@ pytest-django==4.8.0
# via -r requirements-dev.in
pytest-factoryboy==2.7.0
# via -r requirements-dev.in
pytest-xdist==3.6.1
pytest-xdist[psutil]==3.6.1
# via -r requirements-dev.in
python-dateutil==2.8.2
# via
@ -106,14 +106,10 @@ pyyaml==6.0.1
# via
# -c requirements.txt
# pre-commit
requests==2.32.0
requests==2.32.3
# via
# -c requirements.txt
# djangorestframework-stubs
setuptools==70.0.0
# via
# -c requirements.txt
# nodeenv
six==1.16.0
# via
# -c requirements.txt
@ -156,7 +152,7 @@ typing-extensions==4.9.0
# djangorestframework-stubs
# mypy
# pytest-factoryboy
urllib3==1.26.18
urllib3==1.26.19
# via
# -c requirements.txt
# requests

View file

@ -2,10 +2,10 @@ babel==2.12.1
beautifulsoup4==4.12.2
celery[redis]==5.3.1
cryptography==42.0.8
django==4.2.11
django==4.2.15
django-add-default-value==0.10.0
django-amazon-ses==4.0.1
django-anymail==8.6
django-anymail==11.1
django-cors-headers==3.7.0
# pyroscope-io==0.8.1
django-dbconn-retry==0.1.7
@ -23,7 +23,7 @@ django-redis==5.4.0
django-rest-polymorphic==0.1.10
django-silk==5.0.3
django-sns-view==0.1.2
djangorestframework==3.14.0
djangorestframework==3.15.2
factory-boy<3.0
drf-spectacular==0.26.5
emoji==2.4.0
@ -32,7 +32,7 @@ emoji==2.4.0
grpcio==1.64.1
fcm-django @ https://github.com/grafana/fcm-django/archive/refs/tags/v1.0.12r1.tar.gz#sha256=7ec7cd9d353fc9edf19a4acd4fa14090a31d83d02ac986c5e5e081dea29f564f
hiredis==2.2.3
humanize==0.5.1
humanize==4.10.0
icalendar==5.0.10
lxml==5.2.2
markdown2==2.4.10
@ -53,23 +53,14 @@ python-telegram-bot==13.13
recurring-ical-events==2.1.0
redis==5.0.1
regex==2021.11.2
requests==2.32.0
requests==2.32.3
slack-export-viewer==1.1.4
slack_sdk==3.21.3
social-auth-app-django==5.4.1
twilio~=6.37.0
urllib3==1.26.18
urllib3==1.26.19
uwsgi==2.0.26
whitenoise==5.3.0
google-api-python-client==2.122.0
google-auth-httplib2==0.2.0
google-auth-oauthlib==1.2.0
# see the following resources as to why we need to install setuptools manually
#
# Python 3.12 release notes https://docs.python.org/3/whatsnew/3.12.html
#
# python/cpython#95299: Do not pre-install setuptools in virtual environments
# created with venv. This means that distutils, setuptools, pkg_resources, and
# easy_install will no longer available by default; to access these run pip
# install setuptools in the activated virtual environment.
setuptools==70.0.0

View file

@ -74,7 +74,7 @@ deprecated==1.2.14
# via
# opentelemetry-api
# opentelemetry-exporter-otlp-proto-grpc
django==4.2.11
django==4.2.15
# via
# -r requirements.in
# django-add-default-value
@ -100,7 +100,7 @@ django-add-default-value==0.10.0
# via -r requirements.in
django-amazon-ses==4.0.1
# via -r requirements.in
django-anymail==8.6
django-anymail==11.1
# via -r requirements.in
django-cors-headers==3.7.0
# via -r requirements.in
@ -136,7 +136,7 @@ django-silk==5.0.3
# via -r requirements.in
django-sns-view==0.1.2
# via -r requirements.in
djangorestframework==3.14.0
djangorestframework==3.15.2
# via
# -r requirements.in
# django-rest-polymorphic
@ -217,7 +217,7 @@ httplib2==0.22.0
# via
# google-api-python-client
# google-auth-httplib2
humanize==0.5.1
humanize==4.10.0
# via -r requirements.in
icalendar==5.0.10
# via
@ -370,7 +370,6 @@ python3-openid==3.2.0
pytz==2024.1
# via
# apscheduler
# djangorestframework
# icalendar
# python-telegram-bot
# recurring-ical-events
@ -393,7 +392,7 @@ referencing==0.33.0
# jsonschema-specifications
regex==2021.11.2
# via -r requirements.in
requests==2.32.0
requests==2.32.3
# via
# -r requirements.in
# cachecontrol
@ -455,10 +454,11 @@ uritemplate==4.1.1
# via
# drf-spectacular
# google-api-python-client
urllib3==1.26.18
urllib3==1.26.19
# via
# -r requirements.in
# botocore
# django-anymail
# requests
uwsgi==2.0.26
# via -r requirements.in

View file

@ -107,6 +107,17 @@ CHATOPS_SIGNING_SECRET = os.environ.get("CHATOPS_SIGNING_SECRET", None)
# Prometheus exporter metrics endpoint auth
PROMETHEUS_EXPORTER_SECRET = os.environ.get("PROMETHEUS_EXPORTER_SECRET")
# Application metric names without prefixes
METRIC_ALERT_GROUPS_TOTAL_NAME = "alert_groups_total"
METRIC_ALERT_GROUPS_RESPONSE_TIME_NAME = "alert_groups_response_time"
METRIC_USER_WAS_NOTIFIED_OF_ALERT_GROUPS_NAME = "user_was_notified_of_alert_groups"
METRICS_ALL = [
METRIC_ALERT_GROUPS_TOTAL_NAME,
METRIC_ALERT_GROUPS_RESPONSE_TIME_NAME,
METRIC_USER_WAS_NOTIFIED_OF_ALERT_GROUPS_NAME,
]
# List of metrics to collect. Collect all available application metrics by default
METRICS_TO_COLLECT = os.environ.get("METRICS_TO_COLLECT", METRICS_ALL)
# Database
@ -457,8 +468,6 @@ TIME_ZONE = "UTC"
USE_I18N = True
USE_L10N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)

View file

@ -17,3 +17,9 @@ banned-modules =
addopts = --dist no --no-migrations --color=yes --showlocals
# https://pytest-django.readthedocs.io/en/latest/faq.html#my-tests-are-not-being-found-why
python_files = tests.py test_*.py *_tests.py
filterwarnings =
ignore:::django_filters.*
ignore:::httpretty.*
ignore:::polymorphic.*
ignore:::telegram.*

View file

@ -12,7 +12,7 @@ module.exports = {
{
files: ['src/**/*.{ts,tsx}'],
rules: {
'deprecation/deprecation': 'warn',
'deprecation/deprecation': 'off',
},
parserOptions: {
project: './tsconfig.json',

View file

@ -135,14 +135,14 @@
"@dnd-kit/sortable": "^7.0.2",
"@dnd-kit/utilities": "^3.2.1",
"@emotion/css": "11.10.6",
"@grafana/data": "^10.2.3",
"@grafana/data": "^11.1.3",
"@grafana/faro-web-sdk": "^1.4.2",
"@grafana/faro-web-tracing": "^1.4.2",
"@grafana/labels": "~1.5.1",
"@grafana/runtime": "^10.2.2",
"@grafana/runtime": "^11.1.3",
"@grafana/scenes": "^1.28.0",
"@grafana/schema": "^10.2.2",
"@grafana/ui": "10.2.0",
"@grafana/schema": "^11.1.3",
"@grafana/ui": "^11.1.3",
"@lifeomic/attempt": "^3.0.3",
"array-move": "^4.0.0",
"axios": "^1.6.7",

View file

@ -1,7 +1,11 @@
import React, { ReactElement, useMemo, useState } from 'react';
import { PluginExtensionLink } from '@grafana/data';
import { getPluginLinkExtensions } from '@grafana/runtime';
import {
type GetPluginExtensionsOptions,
getPluginLinkExtensions,
usePluginLinks as originalUsePluginLinks,
} from '@grafana/runtime';
import { Dropdown, ToolbarButton } from '@grafana/ui';
import { OnCallPluginExtensionPoints } from 'types';
@ -16,6 +20,9 @@ interface Props {
grafanaIncidentId: string | null;
}
// `usePluginLinks()` is only available in Grafana>=11.1.0, so we have a fallback for older versions
const usePluginLinks = originalUsePluginLinks === undefined ? usePluginLinksFallback : originalUsePluginLinks;
export function ExtensionLinkDropdown({
incident,
extensionPointId,
@ -24,15 +31,15 @@ export function ExtensionLinkDropdown({
}: Props): ReactElement | null {
const [isOpen, setIsOpen] = useState(false);
const context = useExtensionPointContext(incident);
const extensions = useExtensionLinks(context, extensionPointId);
const { links, isLoading } = usePluginLinks({ context, extensionPointId, limitPerPlugin: 3 });
if (extensions.length === 0) {
if (links.length === 0 || isLoading) {
return null;
}
const menu = (
<ExtensionLinkMenu
extensions={extensions}
extensions={links}
declareIncidentLink={declareIncidentLink}
grafanaIncidentId={grafanaIncidentId}
/>
@ -51,24 +58,31 @@ function useExtensionPointContext(incident: ApiSchemas['AlertGroup']): PluginExt
return { alertGroup: incident };
}
function useExtensionLinks<T extends object>(
context: T,
extensionPointId: OnCallPluginExtensionPoints
): PluginExtensionLink[] {
function usePluginLinksFallback({ context, extensionPointId, limitPerPlugin }: GetPluginExtensionsOptions): {
links: PluginExtensionLink[];
isLoading: boolean;
} {
return useMemo(() => {
// getPluginLinkExtensions is available in Grafana>=10.0,
// so will be undefined in earlier versions. Just return an
// empty list of extensions in this case.
if (getPluginLinkExtensions === undefined) {
return [];
return {
links: [],
isLoading: false,
};
}
const { extensions } = getPluginLinkExtensions({
extensionPointId,
context,
limitPerPlugin: 3,
limitPerPlugin,
});
return extensions;
return {
links: extensions,
isLoading: false,
};
}, [context]);
}

View file

@ -30,12 +30,10 @@ exports[`AddResponders should properly display the add responders button when hi
>
<div>
<button
aria-disabled="false"
class="css-8b29hm-button"
type="button"
>
<div
class="css-1j2891d-Icon"
/>
<span
class="css-1riaxdn"
>
@ -117,12 +115,10 @@ exports[`AddResponders should render properly in create mode 1`] = `
>
<div>
<button
aria-disabled="false"
class="css-8b29hm-button"
type="button"
>
<div
class="css-1j2891d-Icon"
/>
<span
class="css-1riaxdn"
>
@ -170,12 +166,10 @@ exports[`AddResponders should render properly in update mode 1`] = `
>
<div>
<button
aria-disabled="false"
class="css-8b29hm-button"
type="button"
>
<div
class="css-1j2891d-Icon"
/>
<span
class="css-1riaxdn"
>
@ -223,12 +217,10 @@ exports[`AddResponders should render selected team and users properly 1`] = `
>
<div>
<button
aria-disabled="false"
class="css-8b29hm-button"
type="button"
>
<div
class="css-1j2891d-Icon"
/>
<span
class="css-1riaxdn"
>
@ -282,15 +274,11 @@ exports[`AddResponders should render selected team and users properly 1`] = `
>
<button
aria-label="Remove responder"
class="css-17584xm"
class="css-a2noi1"
data-testid="team-responder-delete-icon"
tabindex="0"
type="button"
>
<div
class="css-1j2891d-Icon"
/>
</button>
/>
</div>
</div>
</li>
@ -352,6 +340,7 @@ exports[`AddResponders should render selected team and users properly 1`] = `
aria-live="polite"
aria-relevant="additions text"
class="css-1f43avz-a11yText-A11yText"
role="log"
/>
<div
class="css-1i88p6p"
@ -366,6 +355,7 @@ exports[`AddResponders should render selected team and users properly 1`] = `
Select...
</div>
<input
aria-activedescendant=""
aria-autocomplete="list"
aria-describedby="react-select-2-placeholder"
aria-expanded="false"
@ -382,11 +372,7 @@ exports[`AddResponders should render selected team and users properly 1`] = `
</div>
<div
class="css-zyjsuv-input-suffix"
>
<div
class="css-1j2891d-Icon"
/>
</div>
/>
</div>
</div>
</div>
@ -395,15 +381,11 @@ exports[`AddResponders should render selected team and users properly 1`] = `
>
<button
aria-label="Remove responder"
class="css-17584xm"
class="css-a2noi1"
data-testid="user-responder-delete-icon"
tabindex="0"
type="button"
>
<div
class="css-1j2891d-Icon"
/>
</button>
/>
</div>
</div>
</div>
@ -467,6 +449,7 @@ exports[`AddResponders should render selected team and users properly 1`] = `
aria-live="polite"
aria-relevant="additions text"
class="css-1f43avz-a11yText-A11yText"
role="log"
/>
<div
class="css-1i88p6p"
@ -481,6 +464,7 @@ exports[`AddResponders should render selected team and users properly 1`] = `
Select...
</div>
<input
aria-activedescendant=""
aria-autocomplete="list"
aria-describedby="react-select-3-placeholder"
aria-expanded="false"
@ -496,11 +480,7 @@ exports[`AddResponders should render selected team and users properly 1`] = `
</div>
<div
class="css-zyjsuv-input-suffix"
>
<div
class="css-1j2891d-Icon"
/>
</div>
/>
</div>
</div>
</div>
@ -509,15 +489,11 @@ exports[`AddResponders should render selected team and users properly 1`] = `
>
<button
aria-label="Remove responder"
class="css-17584xm"
class="css-a2noi1"
data-testid="user-responder-delete-icon"
tabindex="0"
type="button"
>
<div
class="css-1j2891d-Icon"
/>
</button>
/>
</div>
</div>
</div>
@ -581,6 +557,7 @@ exports[`AddResponders should render selected team and users properly 1`] = `
aria-live="polite"
aria-relevant="additions text"
class="css-1f43avz-a11yText-A11yText"
role="log"
/>
<div
class="css-1i88p6p"
@ -595,6 +572,7 @@ exports[`AddResponders should render selected team and users properly 1`] = `
Select...
</div>
<input
aria-activedescendant=""
aria-autocomplete="list"
aria-describedby="react-select-4-placeholder"
aria-expanded="false"
@ -610,11 +588,7 @@ exports[`AddResponders should render selected team and users properly 1`] = `
</div>
<div
class="css-zyjsuv-input-suffix"
>
<div
class="css-1j2891d-Icon"
/>
</div>
/>
</div>
</div>
</div>
@ -623,15 +597,11 @@ exports[`AddResponders should render selected team and users properly 1`] = `
>
<button
aria-label="Remove responder"
class="css-17584xm"
class="css-a2noi1"
data-testid="user-responder-delete-icon"
tabindex="0"
type="button"
>
<div
class="css-1j2891d-Icon"
/>
</button>
/>
</div>
</div>
</div>
@ -640,28 +610,24 @@ exports[`AddResponders should render selected team and users properly 1`] = `
<div
aria-label="[object Object]"
class="css-10yjoiw css-182y09v"
data-testid="data-testid Alert info"
role="status"
>
<div
class="css-1td7znu"
class="css-1ewk8v0"
data-testid="data-testid Alert info"
>
<div
class="css-ufgc62"
class="css-9n8jpb"
>
<div
class="css-tluiue"
>
<div
class="css-1j2891d-Icon"
/>
</div>
/>
</div>
<div
class="css-1gmwkrf"
class="css-vjkmk1"
>
<span
class="css-9om60p"
class="css-b9x8ok"
>
<span
class="css-77ouhj--primary css-77ouhj--medium css-1287p17"
@ -686,11 +652,7 @@ exports[`AddResponders should render selected team and users properly 1`] = `
</div>
<div
class="css-12kn7ff-layoutChildrenWrapper"
>
<div
class="css-1j2891d-Icon"
/>
</div>
/>
</div>
</span>
</a>

View file

@ -22,11 +22,7 @@ exports[`AddRespondersPopup it shows a loading message initially 1`] = `
/>
<div
class="css-7099m8-input-suffix"
>
<div
class="css-1j2891d-Icon"
/>
</div>
/>
</div>
</div>
<div
@ -35,16 +31,17 @@ exports[`AddRespondersPopup it shows a loading message initially 1`] = `
>
<div
class="css-1hvl7lx"
data-testid="data-testid radio-button"
>
<input
checked=""
class="css-1f9hgw3"
class="css-18nv6l3"
id="option-teams-radiogroup-1"
name="radiogroup-1"
type="radio"
/>
<label
class="css-10bka4u"
class="css-18zk0h1"
for="option-teams-radiogroup-1"
>
Teams
@ -53,15 +50,16 @@ exports[`AddRespondersPopup it shows a loading message initially 1`] = `
</div>
<div
class="css-1hvl7lx"
data-testid="data-testid radio-button"
>
<input
class="css-1f9hgw3"
class="css-18nv6l3"
id="option-users-radiogroup-1"
name="radiogroup-1"
type="radio"
/>
<label
class="css-10bka4u"
class="css-18zk0h1"
for="option-users-radiogroup-1"
>
Users
@ -75,14 +73,9 @@ exports[`AddRespondersPopup it shows a loading message initially 1`] = `
Loading...
<div
class="css-1tqtz24"
class="css-1baulvz"
data-testid="Spinner"
>
<i
aria-label="loading spinner"
class="fa fa-spinner fa-spin fa-spin"
/>
</div>
/>
</div>
</div>
</div>

View file

@ -14,6 +14,7 @@ exports[`NotificationPoliciesSelect disabled state 1`] = `
aria-live="polite"
aria-relevant="additions text"
class="css-1f43avz-a11yText-A11yText"
role="log"
/>
<div
class="css-1i88p6p"
@ -27,6 +28,7 @@ exports[`NotificationPoliciesSelect disabled state 1`] = `
Default
</div>
<input
aria-activedescendant=""
aria-autocomplete="list"
aria-expanded="false"
aria-haspopup="true"
@ -42,11 +44,7 @@ exports[`NotificationPoliciesSelect disabled state 1`] = `
</div>
<div
class="css-zyjsuv-input-suffix"
>
<div
class="css-1j2891d-Icon"
/>
</div>
/>
</div>
</div>
</div>
@ -66,6 +64,7 @@ exports[`NotificationPoliciesSelect it renders properly 1`] = `
aria-live="polite"
aria-relevant="additions text"
class="css-1f43avz-a11yText-A11yText"
role="log"
/>
<div
class="css-1i88p6p"
@ -79,6 +78,7 @@ exports[`NotificationPoliciesSelect it renders properly 1`] = `
Default
</div>
<input
aria-activedescendant=""
aria-autocomplete="list"
aria-expanded="false"
aria-haspopup="true"
@ -93,11 +93,7 @@ exports[`NotificationPoliciesSelect it renders properly 1`] = `
</div>
<div
class="css-zyjsuv-input-suffix"
>
<div
class="css-1j2891d-Icon"
/>
</div>
/>
</div>
</div>
</div>

View file

@ -43,15 +43,11 @@ exports[`TeamResponder it renders data properly 1`] = `
>
<button
aria-label="Remove responder"
class="css-17584xm"
class="css-a2noi1"
data-testid="team-responder-delete-icon"
tabindex="0"
type="button"
>
<div
class="css-1j2891d-Icon"
/>
</button>
/>
</div>
</div>
</li>

View file

@ -60,6 +60,7 @@ exports[`UserResponder it renders data properly 1`] = `
aria-live="polite"
aria-relevant="additions text"
class="css-1f43avz-a11yText-A11yText"
role="log"
/>
<div
class="css-1i88p6p"
@ -73,6 +74,7 @@ exports[`UserResponder it renders data properly 1`] = `
Important
</div>
<input
aria-activedescendant=""
aria-autocomplete="list"
aria-expanded="false"
aria-haspopup="true"
@ -87,11 +89,7 @@ exports[`UserResponder it renders data properly 1`] = `
</div>
<div
class="css-zyjsuv-input-suffix"
>
<div
class="css-1j2891d-Icon"
/>
</div>
/>
</div>
</div>
</div>
@ -100,15 +98,11 @@ exports[`UserResponder it renders data properly 1`] = `
>
<button
aria-label="Remove responder"
class="css-17584xm"
class="css-a2noi1"
data-testid="user-responder-delete-icon"
tabindex="0"
type="button"
>
<div
class="css-1j2891d-Icon"
/>
</button>
/>
</div>
</div>
</div>

View file

@ -24,14 +24,9 @@ exports[`MobileAppConnection it shows a QR code if the app isn't already connect
Loading...
<div
class="css-1tqtz24"
class="css-1baulvz"
data-testid="Spinner"
>
<i
aria-label="loading spinner"
class="fa fa-spinner fa-spin fa-spin"
/>
</div>
/>
</div>
</div>
<div
@ -150,14 +145,9 @@ exports[`MobileAppConnection it shows a loading message if it is currently disco
Loading...
<div
class="css-1tqtz24"
class="css-1baulvz"
data-testid="Spinner"
>
<i
aria-label="loading spinner"
class="fa fa-spinner fa-spin fa-spin"
/>
</div>
/>
</div>
</div>
<div
@ -276,14 +266,9 @@ exports[`MobileAppConnection it shows a loading message if it is currently fetch
Loading...
<div
class="css-1tqtz24"
class="css-1baulvz"
data-testid="Spinner"
>
<i
aria-label="loading spinner"
class="fa fa-spinner fa-spin fa-spin"
/>
</div>
/>
</div>
</div>
<div
@ -401,12 +386,10 @@ exports[`MobileAppConnection it shows a warning when cloud is not connected 1`]
href="/a/grafana-oncall-app/cloud"
>
<button
aria-disabled="false"
class="css-8b29hm-button"
type="button"
>
<div
class="css-1j2891d-Icon"
/>
<span
class="css-1riaxdn"
>

View file

@ -3,6 +3,7 @@
exports[`DisconnectButton it renders properly 1`] = `
<div>
<button
aria-disabled="false"
class="css-ttl745-button disconnect-button"
data-testid="test__disconnect"
type="button"

View file

@ -28,6 +28,7 @@ exports[`LinkLoginButton it renders properly 1`] = `
class="css-12oo3x0-layoutChildrenWrapper"
>
<button
aria-disabled="false"
class="css-td06pi-button"
type="button"
>

View file

@ -90,6 +90,7 @@ exports[`PluginConfigPage If onCallApiUrl is not set in the plugin's meta jsonDa
</div>
</div>
<button
aria-disabled="false"
class="css-td06pi-button"
type="submit"
>
@ -130,6 +131,7 @@ exports[`PluginConfigPage If onCallApiUrl is set, and updatePluginStatus returns
class="css-18qv8yz-layoutChildrenWrapper"
>
<button
aria-disabled="false"
class="css-td06pi-button"
type="button"
>
@ -144,6 +146,7 @@ exports[`PluginConfigPage If onCallApiUrl is set, and updatePluginStatus returns
class="css-18qv8yz-layoutChildrenWrapper"
>
<button
aria-disabled="false"
class="css-ttl745-button"
type="button"
>
@ -198,6 +201,7 @@ exports[`PluginConfigPage It doesn't make any network calls if the plugin config
class="css-18qv8yz-layoutChildrenWrapper"
>
<button
aria-disabled="false"
class="css-ttl745-button"
type="button"
>
@ -253,6 +257,7 @@ exports[`PluginConfigPage OnCallApiUrl is set, and checkTokenAndIfPluginIsConnec
class="css-18qv8yz-layoutChildrenWrapper"
>
<button
aria-disabled="false"
class="css-ttl745-button"
type="button"
>
@ -349,6 +354,7 @@ exports[`PluginConfigPage OnCallApiUrl is set, and checkTokenAndIfPluginIsConnec
class="css-18qv8yz-layoutChildrenWrapper"
>
<button
aria-disabled="false"
class="css-td06pi-button"
type="button"
>
@ -363,6 +369,7 @@ exports[`PluginConfigPage OnCallApiUrl is set, and checkTokenAndIfPluginIsConnec
class="css-18qv8yz-layoutChildrenWrapper"
>
<button
aria-disabled="false"
class="css-ttl745-button"
type="button"
>
@ -404,6 +411,7 @@ exports[`PluginConfigPage Plugin reset: successful - false 1`] = `
class="css-18qv8yz-layoutChildrenWrapper"
>
<button
aria-disabled="false"
class="css-td06pi-button"
type="button"
>
@ -418,6 +426,7 @@ exports[`PluginConfigPage Plugin reset: successful - false 1`] = `
class="css-18qv8yz-layoutChildrenWrapper"
>
<button
aria-disabled="false"
class="css-ttl745-button"
type="button"
>
@ -522,6 +531,7 @@ exports[`PluginConfigPage Plugin reset: successful - true 1`] = `
</div>
</div>
<button
aria-disabled="false"
class="css-td06pi-button"
type="submit"
>

View file

@ -87,16 +87,14 @@ exports[`ConfigurationForm It doesn't allow the user to submit if the URL is inv
class="css-9z7wq3"
role="alert"
>
<div
class="css-1j2891d-Icon"
/>
Must be a valid URL
</div>
</div>
</div>
</div>
<button
class="css-td06pi-button"
aria-disabled="false"
class="css-9hybrt-button"
disabled=""
type="submit"
>
@ -255,6 +253,7 @@ exports[`ConfigurationForm It shows an error message if the self hosted plugin A
</span>
</div>
<button
aria-disabled="false"
class="css-td06pi-button"
type="submit"
>

View file

@ -4,7 +4,8 @@ exports[`RemoveCurrentConfigurationButton It renders properly when disabled 1`]
<body>
<div>
<button
class="css-ttl745-button"
aria-disabled="false"
class="css-mgdi0l-button"
disabled=""
type="button"
>
@ -22,6 +23,7 @@ exports[`RemoveCurrentConfigurationButton It renders properly when enabled 1`] =
<body>
<div>
<button
aria-disabled="false"
class="css-ttl745-button"
type="button"
>

File diff suppressed because it is too large Load diff

View file

@ -87,7 +87,7 @@ for t in results:
teams[t["id"]] = t["name"]
# fetch users (TODO: handle pagination)
# fetch users
# https://grafana.com/docs/grafana-cloud/alerting-and-irm/oncall/oncall-api-reference/users/#list-users
# GET {{API_URL}}/api/v1/users/

View file

@ -0,0 +1,127 @@
# requires requests (pip install requests)
# You can run it like this:
# $ ONCALL_API_TOKEN=<api-token> python page_each_user.py
# This script will create an escalation chain, an escalation policy and a webhook integration
# to trigger alerts to each user in the organization. It will iterate over all users and update
# the escalation policy to notify each user, then trigger an alert group to page that user.
# By default the escalation chain will be named "Page each user", the integration will be named "Page each user".
# You can customize these names by setting the environment variables ESCALATION_NAME and INTEGRATION_NAME.
# NOTE: You need to remove the existing escalation chain and integration if you want to run this script again.
import os
import time
import requests
ONCALL_API_BASE_URL = os.environ.get(
"ONCALL_API_BASE_URL",
"https://oncall-prod-us-central-0.grafana.net/oncall",
)
ONCALL_API_TOKEN = os.environ.get("ONCALL_API_TOKEN")
ESCALATION_NAME = os.environ.get("ESCALATION_NAME", "Page each user")
INTEGRATION_NAME = os.environ.get("INTEGRATION_NAME", "Page each user")
headers = {
"Authorization": ONCALL_API_TOKEN,
}
def setup_escalation():
"""Setup an escalation chain to be used by the paging integration."""
response = requests.post(
f"{ONCALL_API_BASE_URL}/api/v1/escalation_chains",
headers=headers,
json={"name": ESCALATION_NAME},
)
response.raise_for_status()
return response.json()
def setup_escalation_policy(escalation_chain):
"""Setup a base escalation policy associated to the given escalation chain."""
response = requests.post(
f"{ONCALL_API_BASE_URL}/api/v1/escalation_policies",
headers=headers,
json={
"escalation_chain_id": escalation_chain["id"],
"type": "wait",
"duration": 60,
},
)
response.raise_for_status()
return response.json()
def update_escalation_to_notify_user(escalation_policy, user):
"""Update the escalation policy to notify the given user."""
response = requests.put(
f"{ONCALL_API_BASE_URL}/api/v1/escalation_policies/{escalation_policy['id']}",
headers=headers,
json={
"type": "notify_persons",
"persons_to_notify": [user["id"]],
},
)
response.raise_for_status()
def setup_integration(escalation_chain):
"""Setup a webhook integration to trigger alerts following the given escalation chain."""
response = requests.post(
f"{ONCALL_API_BASE_URL}/api/v1/integrations",
headers=headers,
json={
"name": INTEGRATION_NAME,
"type": "webhook",
"default_route": {
"escalation_chain_id": escalation_chain["id"],
},
"templates": {
"web": {
"title": "{{ payload.title }}",
}
}
},
)
response.raise_for_status()
return response.json()
# setup escalation chain, escalation policy and integration
escalation_chain = setup_escalation()
escalation_policy = setup_escalation_policy(escalation_chain)
integration = setup_integration(escalation_chain)
# iterate users, update escalation policy and trigger alert group
page = 1
while True:
url = ONCALL_API_BASE_URL + "/api/v1/users/"
r = requests.get(url, params={"page": page}, headers=headers)
r.raise_for_status()
response_data = r.json()
results = response_data.get("results")
for u in results:
print("Updating escalation for user", u["username"])
update_escalation_to_notify_user(escalation_policy, u)
print("Triggering alert group for user", u["username"])
response = requests.post(
integration["link"],
headers=headers,
json={
"title": f"Paging user {u['username']}",
"message": "Please acknowledge this alert"
},
)
# wait a bit to avoid rate limiting (and allow alert processing before next one)
time.sleep(5)
page += 1
total_pages = int(response_data.get("total_pages"))
if page > total_pages:
break

View file

@ -1,19 +1,22 @@
# Sample scripts using public API
- [oncall_hours_reports.py](oncall_hours_reports.py)
- [oncall_hours_reports.py](https://github.com/grafana/oncall/blob/dev/tools/scripts/oncall_hours_report.py)
Generate per-user on-call hours report
- [oncall_reports.py](oncall_reports.py)
- [oncall_reports.py](https://github.com/grafana/oncall/blob/dev/tools/scripts/oncall_reports.py)
Generate CSV user reports using public API
- [shift_shifts.py](shift_shifts.py)
- [page_each_user.py](https://github.com/grafana/oncall/blob/dev/tools/scripts/page_each_user.py)
Create an integration and trigger an alert group per user targeting each user
- [shift_shifts.py](https://github.com/grafana/oncall/blob/dev/tools/scripts/shift_shifts.py)
Shift schedule shifts by a given delta
- [mattermost_webhooks.py](mattermost_webhooks.py)
- [mattermost_webhooks.py](https://github.com/grafana/oncall/blob/dev/tools/scripts/mattermost_webhooks.py)
Setup Mattermost webhooks for alert group notifications
- [discord_webhooks.py](discord_webhooks.py)
- [discord_webhooks.py](https://github.com/grafana/oncall/blob/dev/tools/scripts/discord_webhooks.py)
Setup Discord webhooks for alert group notifications
- [swap_requests_workday.py](swap_requests_workday.py)
- [swap_requests_workday.py](https://github.com/grafana/oncall/blob/dev/tools/scripts/swap_requests_workday.py)
Create shift swap requests using Workday absences information