Add service_name label to insight metrics (#4300)
# What this PR does Adds `service_name` label to insight metrics NOTE: It is related to [this PR](https://github.com/grafana/oncall/pull/4227) and should be merged no sooner than two days after the next release (current release version is 1.4.4), because we need to wait for the metrics cache to be updated for all organizations (uses the new cache structure with `services`) ## Which issue(s) this PR closes Related to https://github.com/grafana/oncall-private/issues/2610 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] Added the relevant release notes label (see labels prefixed w/ `release:`). These labels dictate how your PR will show up in the autogenerated release notes.
This commit is contained in:
parent
a2859553ce
commit
f583da5b56
6 changed files with 73 additions and 403 deletions
|
|
@ -65,6 +65,7 @@ This metric has the following labels:
|
|||
| `org_id` | ID of Grafana organization |
|
||||
| `team` | Team name |
|
||||
| `integration` | OnCall Integration name |
|
||||
| `service_name`| Value of Alert group `service_name` label |
|
||||
| `state` | Alert groups state. May be `firing`, `acknowledged`, `resolved` and `silenced`|
|
||||
|
||||
**Query example:**
|
||||
|
|
@ -86,6 +87,7 @@ This metric has the following labels:
|
|||
| `org_id` | ID of Grafana organization |
|
||||
| `team` | Team name |
|
||||
| `integration` | OnCall Integration name |
|
||||
| `service_name`| Value of Alert group `service_name` label |
|
||||
| `le` | Histogram bucket value in seconds. May be `60`, `300`, `600`, `3600` and `+Inf`|
|
||||
|
||||
**Query example:**
|
||||
|
|
|
|||
|
|
@ -287,14 +287,7 @@ def metrics_update_alert_groups_state_cache(states_diff: dict, organization_id:
|
|||
if not integration_alert_groups:
|
||||
continue
|
||||
for service_name, service_state_diff in service_data.items():
|
||||
if "services" in integration_alert_groups:
|
||||
states_to_update = integration_alert_groups["services"].setdefault(
|
||||
service_name, get_default_states_dict()
|
||||
)
|
||||
else:
|
||||
# support version of metrics cache without service name. This clause can be removed when all metrics
|
||||
# cache is updated on prod (~2 days after release)
|
||||
states_to_update = integration_alert_groups
|
||||
states_to_update = integration_alert_groups["services"].setdefault(service_name, get_default_states_dict())
|
||||
for previous_state, counter in service_state_diff["previous_states"].items():
|
||||
if states_to_update[previous_state] - counter > 0:
|
||||
states_to_update[previous_state] -= counter
|
||||
|
|
@ -329,13 +322,8 @@ def metrics_update_alert_groups_response_time_cache(integrations_response_time:
|
|||
if not integration_response_time_metrics:
|
||||
continue
|
||||
for service_name, response_time_values in service_data.items():
|
||||
if "services" in integration_response_time_metrics:
|
||||
integration_response_time_metrics["services"].setdefault(service_name, [])
|
||||
integration_response_time_metrics["services"][service_name].extend(response_time_values)
|
||||
else:
|
||||
# support version of metrics cache without service name. This clause can be removed when all metrics
|
||||
# cache is updated on prod (~2 days after release)
|
||||
integration_response_time_metrics["response_time"].extend(response_time_values)
|
||||
integration_response_time_metrics["services"].setdefault(service_name, [])
|
||||
integration_response_time_metrics["services"][service_name].extend(response_time_values)
|
||||
cache.set(metric_alert_groups_response_time_key, metric_alert_groups_response_time, timeout=metrics_cache_timeout)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from apps.alerts.constants import AlertGroupState
|
|||
from apps.metrics_exporter.constants import (
|
||||
ALERT_GROUPS_RESPONSE_TIME,
|
||||
ALERT_GROUPS_TOTAL,
|
||||
SERVICE_LABEL,
|
||||
USER_WAS_NOTIFIED_OF_ALERT_GROUPS,
|
||||
AlertGroupsResponseTimeMetricsDict,
|
||||
AlertGroupsTotalMetricsDict,
|
||||
|
|
@ -52,7 +53,7 @@ class ApplicationMetricsCollector:
|
|||
"team",
|
||||
]
|
||||
+ self._stack_labels
|
||||
# + [SERVICE_LABEL] # todo:metrics: uncomment when all metric cache is updated (~2 after release)
|
||||
+ [SERVICE_LABEL]
|
||||
)
|
||||
self._integration_labels_with_state = self._integration_labels + ["state"]
|
||||
self._user_labels = ["username"] + self._stack_labels
|
||||
|
|
@ -100,24 +101,12 @@ class ApplicationMetricsCollector:
|
|||
integration_data["id"], # grafana instance id
|
||||
]
|
||||
labels_values = list(map(str, labels_values))
|
||||
# clause below is needed for compatibility with old metric cache during rollout metrics with services
|
||||
if "services" in integration_data:
|
||||
count_per_state = {state.value: 0 for state in AlertGroupState}
|
||||
for service_name in integration_data["services"]:
|
||||
for state in AlertGroupState:
|
||||
count_per_state[state.value] += integration_data["services"][service_name][state.value]
|
||||
# todo:metrics: with enabling service_name label move "add_metric" under
|
||||
# "for service_name..." iteration
|
||||
for state_name, counter in count_per_state.items():
|
||||
alert_groups_total.add_metric(
|
||||
labels_values + [state_name],
|
||||
# todo:metrics: replace [state.value] when all metric cache is updated
|
||||
# + [service_name, state.value],
|
||||
counter,
|
||||
)
|
||||
else:
|
||||
for service_name in integration_data["services"]:
|
||||
for state in AlertGroupState:
|
||||
alert_groups_total.add_metric(labels_values + [state.value], integration_data[state.value])
|
||||
alert_groups_total.add_metric(
|
||||
labels_values + [service_name, state.value],
|
||||
integration_data["services"][service_name][state.value],
|
||||
)
|
||||
org_id_from_key = RE_ALERT_GROUPS_TOTAL.match(org_key).groups()[0]
|
||||
processed_org_ids.add(int(org_id_from_key))
|
||||
missing_org_ids = org_ids - processed_org_ids
|
||||
|
|
@ -146,30 +135,16 @@ class ApplicationMetricsCollector:
|
|||
]
|
||||
labels_values = list(map(str, labels_values))
|
||||
|
||||
# clause below is needed for compatibility with old metric cache during rollout metrics with services
|
||||
if "services" in integration_data:
|
||||
response_time_values = []
|
||||
# todo:metrics: for service_name, response_time
|
||||
for _, response_time in integration_data["services"].items():
|
||||
if not response_time:
|
||||
continue
|
||||
response_time_values.extend(response_time)
|
||||
else:
|
||||
response_time_values = integration_data["response_time"]
|
||||
|
||||
if not response_time_values:
|
||||
# ignore empty response_time_values
|
||||
continue
|
||||
|
||||
# todo:metrics: with enabling service_name label move "add_metric" under
|
||||
# "for service_name, response_time..." iteration
|
||||
buckets, sum_value = self.get_buckets_with_sum(response_time_values)
|
||||
buckets = sorted(list(buckets.items()), key=lambda x: float(x[0]))
|
||||
alert_groups_response_time_seconds.add_metric(
|
||||
labels_values, # + [service_name] todo:metrics: uncomment when all metric cache is updated
|
||||
buckets=buckets,
|
||||
sum_value=sum_value,
|
||||
)
|
||||
for service_name, response_time in integration_data["services"].items():
|
||||
if not response_time:
|
||||
continue
|
||||
buckets, sum_value = self.get_buckets_with_sum(response_time)
|
||||
buckets = sorted(list(buckets.items()), key=lambda x: float(x[0]))
|
||||
alert_groups_response_time_seconds.add_metric(
|
||||
labels_values + [service_name],
|
||||
buckets=buckets,
|
||||
sum_value=sum_value,
|
||||
)
|
||||
org_id_from_key = RE_ALERT_GROUPS_RESPONSE_TIME.match(org_key).groups()[0]
|
||||
processed_org_ids.add(int(org_id_from_key))
|
||||
missing_org_ids = org_ids - processed_org_ids
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ METRICS_TEST_ORG_ID = 123 # random number
|
|||
METRICS_TEST_INSTANCE_SLUG = "test_instance"
|
||||
METRICS_TEST_INSTANCE_ID = 292 # random number
|
||||
METRICS_TEST_USER_USERNAME = "Alex"
|
||||
METRICS_TEST_SERVICE_NAME = "test_service"
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
|
|
@ -45,6 +46,12 @@ def mock_cache_get_metrics_for_collector(monkeypatch):
|
|||
"acknowledged": 3,
|
||||
"resolved": 5,
|
||||
},
|
||||
METRICS_TEST_SERVICE_NAME: {
|
||||
"firing": 12,
|
||||
"silenced": 14,
|
||||
"acknowledged": 13,
|
||||
"resolved": 15,
|
||||
},
|
||||
},
|
||||
},
|
||||
2: {
|
||||
|
|
@ -72,9 +79,7 @@ def mock_cache_get_metrics_for_collector(monkeypatch):
|
|||
"org_id": 1,
|
||||
"slug": "Test stack",
|
||||
"id": 1,
|
||||
"services": {
|
||||
NO_SERVICE_VALUE: [2, 10, 200, 650],
|
||||
},
|
||||
"services": {NO_SERVICE_VALUE: [2, 10, 200, 650], METRICS_TEST_SERVICE_NAME: [4, 12, 20]},
|
||||
},
|
||||
2: {
|
||||
"integration_name": "Empty integration",
|
||||
|
|
@ -108,92 +113,6 @@ def mock_cache_get_metrics_for_collector(monkeypatch):
|
|||
monkeypatch.setattr(cache, "get_many", _mock_cache_get_many)
|
||||
|
||||
|
||||
# todo:metrics: remove later when all cache is updated
|
||||
@pytest.fixture() # used for test backwards compatibility with old version of metrics
|
||||
def mock_cache_get_metrics_for_collector_mixed_versions(monkeypatch):
|
||||
def _mock_cache_get(key, *args, **kwargs):
|
||||
if ALERT_GROUPS_TOTAL in key:
|
||||
key = ALERT_GROUPS_TOTAL
|
||||
elif ALERT_GROUPS_RESPONSE_TIME in key:
|
||||
key = ALERT_GROUPS_RESPONSE_TIME
|
||||
elif USER_WAS_NOTIFIED_OF_ALERT_GROUPS in key:
|
||||
key = USER_WAS_NOTIFIED_OF_ALERT_GROUPS
|
||||
test_metrics = {
|
||||
ALERT_GROUPS_TOTAL: {
|
||||
1: {
|
||||
"integration_name": "Test metrics integration",
|
||||
"team_name": "Test team",
|
||||
"team_id": 1,
|
||||
"org_id": 1,
|
||||
"slug": "Test stack",
|
||||
"id": 1,
|
||||
"firing": 2,
|
||||
"acknowledged": 3,
|
||||
"silenced": 4,
|
||||
"resolved": 5,
|
||||
},
|
||||
2: {
|
||||
"integration_name": "Test metrics integration 2",
|
||||
"team_name": "Test team",
|
||||
"team_id": 1,
|
||||
"org_id": 1,
|
||||
"slug": "Test stack",
|
||||
"id": 1,
|
||||
"services": {
|
||||
NO_SERVICE_VALUE: {
|
||||
"firing": 2,
|
||||
"silenced": 4,
|
||||
"acknowledged": 3,
|
||||
"resolved": 5,
|
||||
},
|
||||
"test_service": {
|
||||
"firing": 10,
|
||||
"silenced": 10,
|
||||
"acknowledged": 10,
|
||||
"resolved": 10,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
ALERT_GROUPS_RESPONSE_TIME: {
|
||||
1: {
|
||||
"integration_name": "Test metrics integration",
|
||||
"team_name": "Test team",
|
||||
"team_id": 1,
|
||||
"org_id": 1,
|
||||
"slug": "Test stack",
|
||||
"id": 1,
|
||||
"response_time": [2, 10, 200, 650],
|
||||
},
|
||||
2: {
|
||||
"integration_name": "Test metrics integration 2",
|
||||
"team_name": "Test team",
|
||||
"team_id": 1,
|
||||
"org_id": 1,
|
||||
"slug": "Test stack",
|
||||
"id": 1,
|
||||
"services": {NO_SERVICE_VALUE: [2, 10, 200, 650], "test_service": [4, 8, 12]},
|
||||
},
|
||||
},
|
||||
USER_WAS_NOTIFIED_OF_ALERT_GROUPS: {
|
||||
1: {
|
||||
"org_id": 1,
|
||||
"slug": "Test stack",
|
||||
"id": 1,
|
||||
"user_username": "Alex",
|
||||
"counter": 4,
|
||||
}
|
||||
},
|
||||
}
|
||||
return test_metrics.get(key)
|
||||
|
||||
def _mock_cache_get_many(keys, *args, **kwargs):
|
||||
return {key: _mock_cache_get(key) for key in keys if _mock_cache_get(key)}
|
||||
|
||||
monkeypatch.setattr(cache, "get", _mock_cache_get)
|
||||
monkeypatch.setattr(cache, "get_many", _mock_cache_get_many)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_get_metrics_cache(monkeypatch):
|
||||
def _mock_cache_get(key, *args, **kwargs):
|
||||
|
|
@ -255,50 +174,6 @@ def make_metrics_cache_params(monkeypatch):
|
|||
return _make_cache_params
|
||||
|
||||
|
||||
# todo:metrics: remove later when all cache is updated
|
||||
@pytest.fixture
|
||||
def make_metrics_cache_params_old_version(monkeypatch):
|
||||
def _make_cache_params(integration_id, organization_id, team_name=None, team_id=None):
|
||||
team_name = team_name or "No team"
|
||||
team_id = team_id or "no_team"
|
||||
metric_alert_groups_total_key = get_metric_alert_groups_total_key(organization_id)
|
||||
metric_alert_groups_response_time_key = get_metric_alert_groups_response_time_key(organization_id)
|
||||
|
||||
def cache_get(key, *args, **kwargs):
|
||||
metrics_data = {
|
||||
metric_alert_groups_response_time_key: {
|
||||
integration_id: {
|
||||
"integration_name": METRICS_TEST_INTEGRATION_NAME,
|
||||
"team_name": team_name,
|
||||
"team_id": team_id,
|
||||
"org_id": METRICS_TEST_ORG_ID,
|
||||
"slug": METRICS_TEST_INSTANCE_SLUG,
|
||||
"id": METRICS_TEST_INSTANCE_ID,
|
||||
"response_time": [],
|
||||
}
|
||||
},
|
||||
metric_alert_groups_total_key: {
|
||||
integration_id: {
|
||||
"integration_name": METRICS_TEST_INTEGRATION_NAME,
|
||||
"team_name": team_name,
|
||||
"team_id": team_id,
|
||||
"org_id": METRICS_TEST_ORG_ID,
|
||||
"slug": METRICS_TEST_INSTANCE_SLUG,
|
||||
"id": METRICS_TEST_INSTANCE_ID,
|
||||
"firing": 0,
|
||||
"acknowledged": 0,
|
||||
"silenced": 0,
|
||||
"resolved": 0,
|
||||
}
|
||||
},
|
||||
}
|
||||
return metrics_data.get(key, {})
|
||||
|
||||
return cache_get
|
||||
|
||||
return _make_cache_params
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def make_user_was_notified_metrics_cache_params(monkeypatch):
|
||||
def _make_cache_params(user_id, organization_id):
|
||||
|
|
|
|||
|
|
@ -8,9 +8,11 @@ from apps.alerts.constants import AlertGroupState
|
|||
from apps.metrics_exporter.constants import (
|
||||
ALERT_GROUPS_RESPONSE_TIME,
|
||||
ALERT_GROUPS_TOTAL,
|
||||
NO_SERVICE_VALUE,
|
||||
USER_WAS_NOTIFIED_OF_ALERT_GROUPS,
|
||||
)
|
||||
from apps.metrics_exporter.metrics_collectors import ApplicationMetricsCollector
|
||||
from apps.metrics_exporter.tests.conftest import METRICS_TEST_SERVICE_NAME
|
||||
|
||||
|
||||
# redis cluster usage modifies the cache keys for some operations, so we need to test both cases
|
||||
|
|
@ -24,17 +26,46 @@ def test_application_metrics_collector(
|
|||
):
|
||||
"""Test that ApplicationMetricsCollector generates expected metrics from cache"""
|
||||
|
||||
def get_expected_labels(service_name=NO_SERVICE_VALUE, **kwargs):
|
||||
labels = {
|
||||
"integration": "Test metrics integration",
|
||||
"team": "Test team",
|
||||
"org_id": "1",
|
||||
"slug": "Test stack",
|
||||
"id": "1",
|
||||
"service_name": service_name,
|
||||
}
|
||||
labels.update(kwargs)
|
||||
return labels
|
||||
|
||||
with override_settings(USE_REDIS_CLUSTER=use_redis_cluster):
|
||||
collector = ApplicationMetricsCollector()
|
||||
test_metrics_registry = CollectorRegistry()
|
||||
test_metrics_registry.register(collector)
|
||||
for metric in test_metrics_registry.collect():
|
||||
if metric.name == ALERT_GROUPS_TOTAL:
|
||||
# integration with labels for each alert group state
|
||||
assert len(metric.samples) == len(AlertGroupState) * 2
|
||||
# 2 integrations with labels for each alert group state per service
|
||||
assert len(metric.samples) == len(AlertGroupState) * 3 # 2 from 1st integration and 1 from 2nd
|
||||
assert {0, 2, 3, 4, 5, 12, 13, 14, 15} == set(sample.value for sample in metric.samples)
|
||||
# check that labels were set correctly
|
||||
expected_labels_no_service = get_expected_labels(state="firing")
|
||||
expected_labels_test_service = get_expected_labels(METRICS_TEST_SERVICE_NAME, state="firing")
|
||||
metric_labels = [sample.labels for sample in metric.samples]
|
||||
for expected_labels in [expected_labels_no_service, expected_labels_test_service]:
|
||||
assert expected_labels in metric_labels
|
||||
elif metric.name == ALERT_GROUPS_RESPONSE_TIME:
|
||||
# integration with labels for each value in collector's bucket + _count and _sum histogram values
|
||||
assert len(metric.samples) == len(collector._buckets) + 2
|
||||
# integration with labels for each of 2 service_name values in collector's bucket + _count and _sum
|
||||
# histogram values
|
||||
# ignore integration without response_time data
|
||||
assert len(metric.samples) == (len(collector._buckets) + 2) * 2 # 2 from 1st integration, ignore 2nd
|
||||
# check that `_sum` values for both services are presented
|
||||
assert {36, 862}.issubset(set(sample.value for sample in metric.samples))
|
||||
# check that labels were set correctly
|
||||
expected_labels_no_service = get_expected_labels()
|
||||
expected_labels_test_service = get_expected_labels(METRICS_TEST_SERVICE_NAME)
|
||||
metric_labels = [sample.labels for sample in metric.samples]
|
||||
for expected_labels in [expected_labels_no_service, expected_labels_test_service]:
|
||||
assert expected_labels in metric_labels
|
||||
elif metric.name == USER_WAS_NOTIFIED_OF_ALERT_GROUPS:
|
||||
# metric with labels for each notified user
|
||||
assert len(metric.samples) == 1
|
||||
|
|
@ -44,37 +75,3 @@ def test_application_metrics_collector(
|
|||
# Since there is no recalculation timer for test org in cache, start_calculate_and_cache_metrics must be called
|
||||
assert mocked_start_calculate_and_cache_metrics.called
|
||||
test_metrics_registry.unregister(collector)
|
||||
|
||||
|
||||
# todo:metrics: remove later when all cache is updated
|
||||
@patch("apps.metrics_exporter.metrics_collectors.get_organization_ids", return_value=[1])
|
||||
@patch("apps.metrics_exporter.metrics_collectors.start_calculate_and_cache_metrics.apply_async")
|
||||
@pytest.mark.django_db
|
||||
def test_application_metrics_collector_mixed_cache(
|
||||
mocked_org_ids, mocked_start_calculate_and_cache_metrics, mock_cache_get_metrics_for_collector_mixed_versions
|
||||
):
|
||||
"""Test that ApplicationMetricsCollector generates expected metrics from previous and new versions of cache"""
|
||||
|
||||
collector = ApplicationMetricsCollector()
|
||||
test_metrics_registry = CollectorRegistry()
|
||||
test_metrics_registry.register(collector)
|
||||
for metric in test_metrics_registry.collect():
|
||||
if metric.name == ALERT_GROUPS_TOTAL:
|
||||
# integration with labels for each alert group state
|
||||
assert len(metric.samples) == len(AlertGroupState) * 2
|
||||
# check that values from different services were combined to one sample
|
||||
assert {2, 3, 4, 5, 12, 13, 14, 15} == set(sample.value for sample in metric.samples)
|
||||
elif metric.name == ALERT_GROUPS_RESPONSE_TIME:
|
||||
# integration with labels for each value in collector's bucket + _count and _sum histogram values
|
||||
assert len(metric.samples) == (len(collector._buckets) + 2) * 2
|
||||
# check that values from different services were combined to one sample
|
||||
assert 7.0 in set(sample.value for sample in metric.samples)
|
||||
elif metric.name == USER_WAS_NOTIFIED_OF_ALERT_GROUPS:
|
||||
# metric with labels for each notified user
|
||||
assert len(metric.samples) == 1
|
||||
result = generate_latest(test_metrics_registry).decode("utf-8")
|
||||
assert result is not None
|
||||
assert mocked_org_ids.called
|
||||
# Since there is no recalculation timer for test org in cache, start_calculate_and_cache_metrics must be called
|
||||
assert mocked_start_calculate_and_cache_metrics.called
|
||||
test_metrics_registry.unregister(collector)
|
||||
|
|
|
|||
|
|
@ -21,11 +21,10 @@ from apps.metrics_exporter.tests.conftest import (
|
|||
METRICS_TEST_INSTANCE_SLUG,
|
||||
METRICS_TEST_INTEGRATION_NAME,
|
||||
METRICS_TEST_ORG_ID,
|
||||
METRICS_TEST_SERVICE_NAME,
|
||||
METRICS_TEST_USER_USERNAME,
|
||||
)
|
||||
|
||||
TEST_SERVICE_VALUE = "Test_service"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_apply_async(monkeypatch):
|
||||
|
|
@ -159,23 +158,23 @@ def test_update_metric_alert_groups_total_cache_on_action(
|
|||
alert_group.un_silence_by_user_or_backsync(user)
|
||||
arg_idx = get_called_arg_index_and_compare_results(expected_result_firing)
|
||||
|
||||
# create alert group with service label and check metric cache is updated properly
|
||||
# set state values to default
|
||||
expected_result_metric_alert_groups_total[alert_receive_channel.id]["services"][NO_SERVICE_VALUE].update(
|
||||
default_state
|
||||
)
|
||||
|
||||
# create alert group with service label and check metric cache is updated properly
|
||||
alert_group_with_service = make_alert_group(alert_receive_channel)
|
||||
make_alert(alert_group=alert_group_with_service, raw_request_data={})
|
||||
make_alert_group_label_association(
|
||||
organization, alert_group_with_service, key_name=SERVICE_LABEL, value_name=TEST_SERVICE_VALUE
|
||||
organization, alert_group_with_service, key_name=SERVICE_LABEL, value_name=METRICS_TEST_SERVICE_NAME
|
||||
)
|
||||
alert_group_created_signal.send(sender=alert_group_with_service.__class__, alert_group=alert_group_with_service)
|
||||
|
||||
# check alert_groups_total metric cache, get called args
|
||||
arg_idx = get_called_arg_index_and_compare_results(expected_result_firing, TEST_SERVICE_VALUE)
|
||||
arg_idx = get_called_arg_index_and_compare_results(expected_result_firing, METRICS_TEST_SERVICE_NAME)
|
||||
|
||||
alert_group_with_service.resolve_by_user_or_backsync(user)
|
||||
get_called_arg_index_and_compare_results(expected_result_resolved, TEST_SERVICE_VALUE)
|
||||
get_called_arg_index_and_compare_results(expected_result_resolved, METRICS_TEST_SERVICE_NAME)
|
||||
|
||||
|
||||
@patch("apps.alerts.models.alert_group_log_record.tasks.send_update_log_report_signal.apply_async")
|
||||
|
|
@ -282,11 +281,11 @@ def test_update_metric_alert_groups_response_time_cache_on_action(
|
|||
alert_group_with_service = make_alert_group(alert_receive_channel)
|
||||
make_alert(alert_group=alert_group_with_service, raw_request_data={})
|
||||
make_alert_group_label_association(
|
||||
organization, alert_group_with_service, key_name=SERVICE_LABEL, value_name=TEST_SERVICE_VALUE
|
||||
organization, alert_group_with_service, key_name=SERVICE_LABEL, value_name=METRICS_TEST_SERVICE_NAME
|
||||
)
|
||||
assert_cache_was_not_changed_by_response_time_metric()
|
||||
alert_group_with_service.acknowledge_by_user_or_backsync(user)
|
||||
get_called_arg_index_and_compare_results(TEST_SERVICE_VALUE)
|
||||
get_called_arg_index_and_compare_results(METRICS_TEST_SERVICE_NAME)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
|
|
@ -676,169 +675,3 @@ def test_metrics_add_integrations_to_cache(make_organization, make_alert_receive
|
|||
alert_receive_channel1.id: _expected_alert_groups_response_time(alert_receive_channel1),
|
||||
alert_receive_channel2.id: _expected_alert_groups_response_time(alert_receive_channel2, response_time=[12]),
|
||||
}
|
||||
|
||||
|
||||
# todo:metrics: remove later when all cache is updated
|
||||
@patch("apps.alerts.models.alert_group_log_record.tasks.send_update_log_report_signal.apply_async")
|
||||
@patch("apps.alerts.tasks.send_alert_group_signal.alert_group_action_triggered_signal.send")
|
||||
@pytest.mark.django_db
|
||||
@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
|
||||
def test_update_metric_alert_groups_total_cache_on_action_backward_compatability(
|
||||
mocked_send_log_signal,
|
||||
mocked_action_signal_send,
|
||||
mock_apply_async,
|
||||
make_organization,
|
||||
make_user_for_organization,
|
||||
make_alert_receive_channel,
|
||||
make_alert_group,
|
||||
make_alert,
|
||||
make_metrics_cache_params_old_version,
|
||||
monkeypatch,
|
||||
):
|
||||
"""Test update metric cache works properly with previous version of cache"""
|
||||
organization = make_organization(
|
||||
org_id=METRICS_TEST_ORG_ID,
|
||||
stack_slug=METRICS_TEST_INSTANCE_SLUG,
|
||||
stack_id=METRICS_TEST_INSTANCE_ID,
|
||||
)
|
||||
user = make_user_for_organization(organization)
|
||||
alert_receive_channel = make_alert_receive_channel(organization, verbal_name=METRICS_TEST_INTEGRATION_NAME)
|
||||
|
||||
metric_alert_groups_total_key = get_metric_alert_groups_total_key(organization.id)
|
||||
|
||||
expected_result_metric_alert_groups_total = {
|
||||
alert_receive_channel.id: {
|
||||
"integration_name": alert_receive_channel.verbal_name,
|
||||
"team_name": "No team",
|
||||
"team_id": "no_team",
|
||||
"org_id": organization.org_id,
|
||||
"slug": organization.stack_slug,
|
||||
"id": organization.stack_id,
|
||||
"firing": 0,
|
||||
"silenced": 0,
|
||||
"acknowledged": 0,
|
||||
"resolved": 0,
|
||||
}
|
||||
}
|
||||
|
||||
expected_result_firing = {
|
||||
"firing": 1,
|
||||
"silenced": 0,
|
||||
"acknowledged": 0,
|
||||
"resolved": 0,
|
||||
}
|
||||
|
||||
expected_result_acked = {
|
||||
"firing": 0,
|
||||
"silenced": 0,
|
||||
"acknowledged": 1,
|
||||
"resolved": 0,
|
||||
}
|
||||
|
||||
expected_result_resolved = {
|
||||
"firing": 0,
|
||||
"silenced": 0,
|
||||
"acknowledged": 0,
|
||||
"resolved": 1,
|
||||
}
|
||||
|
||||
metrics_cache = make_metrics_cache_params_old_version(alert_receive_channel.id, organization.id)
|
||||
monkeypatch.setattr(cache, "get", metrics_cache)
|
||||
|
||||
def get_called_arg_index_and_compare_results(update_expected_result):
|
||||
"""find index for the metric argument, that was set in cache"""
|
||||
for idx, called_arg in enumerate(mock_cache_set_called_args):
|
||||
if idx >= arg_idx and called_arg.args[0] == metric_alert_groups_total_key:
|
||||
expected_result_metric_alert_groups_total[alert_receive_channel.id].update(update_expected_result)
|
||||
assert called_arg.args[1] == expected_result_metric_alert_groups_total
|
||||
return idx + 1
|
||||
raise AssertionError
|
||||
|
||||
with patch("apps.metrics_exporter.tasks.cache.set") as mock_cache_set:
|
||||
arg_idx = 0
|
||||
alert_group = make_alert_group(alert_receive_channel)
|
||||
make_alert(alert_group=alert_group, raw_request_data={})
|
||||
# this signal is normally called in get_or_create_grouping on create alert
|
||||
alert_group_created_signal.send(sender=alert_group.__class__, alert_group=alert_group)
|
||||
|
||||
# check alert_groups_total metric cache, get called args
|
||||
mock_cache_set_called_args = mock_cache_set.call_args_list
|
||||
arg_idx = get_called_arg_index_and_compare_results(expected_result_firing)
|
||||
|
||||
alert_group.acknowledge_by_user_or_backsync(user)
|
||||
arg_idx = get_called_arg_index_and_compare_results(expected_result_acked)
|
||||
|
||||
alert_group.resolve_by_user_or_backsync(user)
|
||||
arg_idx = get_called_arg_index_and_compare_results(expected_result_resolved)
|
||||
|
||||
alert_group.un_resolve_by_user_or_backsync(user)
|
||||
arg_idx = get_called_arg_index_and_compare_results(expected_result_firing)
|
||||
|
||||
|
||||
# todo:metrics: remove later when all cache is updated
|
||||
@patch("apps.alerts.models.alert_group_log_record.tasks.send_update_log_report_signal.apply_async")
|
||||
@patch("apps.alerts.tasks.send_alert_group_signal.alert_group_action_triggered_signal.send")
|
||||
@pytest.mark.django_db
|
||||
@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
|
||||
def test_update_metric_alert_groups_response_time_cache_on_action_backward_compatability(
|
||||
mocked_send_log_signal,
|
||||
mocked_action_signal_send,
|
||||
mock_apply_async,
|
||||
make_organization,
|
||||
make_user_for_organization,
|
||||
make_alert_receive_channel,
|
||||
make_alert_group,
|
||||
make_alert,
|
||||
monkeypatch,
|
||||
make_metrics_cache_params_old_version,
|
||||
):
|
||||
"""Test update metric cache works properly with previous version of cache"""
|
||||
organization = make_organization(
|
||||
org_id=METRICS_TEST_ORG_ID,
|
||||
stack_slug=METRICS_TEST_INSTANCE_SLUG,
|
||||
stack_id=METRICS_TEST_INSTANCE_ID,
|
||||
)
|
||||
user = make_user_for_organization(organization)
|
||||
alert_receive_channel = make_alert_receive_channel(organization, verbal_name=METRICS_TEST_INTEGRATION_NAME)
|
||||
|
||||
metric_alert_groups_response_time_key = get_metric_alert_groups_response_time_key(organization.id)
|
||||
|
||||
expected_result_metric_alert_groups_response_time = {
|
||||
alert_receive_channel.id: {
|
||||
"integration_name": alert_receive_channel.verbal_name,
|
||||
"team_name": "No team",
|
||||
"team_id": "no_team",
|
||||
"org_id": organization.org_id,
|
||||
"slug": organization.stack_slug,
|
||||
"id": organization.stack_id,
|
||||
"response_time": [],
|
||||
}
|
||||
}
|
||||
|
||||
metrics_cache = make_metrics_cache_params_old_version(alert_receive_channel.id, organization.id)
|
||||
monkeypatch.setattr(cache, "get", metrics_cache)
|
||||
|
||||
def get_called_arg_index_and_compare_results():
|
||||
"""find index for related to the metric argument, that was set in cache"""
|
||||
for idx, called_arg in enumerate(mock_cache_set_called_args):
|
||||
if idx >= arg_idx and called_arg.args[0] == metric_alert_groups_response_time_key:
|
||||
response_time_values = called_arg.args[1][alert_receive_channel.id]["response_time"]
|
||||
expected_result_metric_alert_groups_response_time[alert_receive_channel.id].update(
|
||||
{"response_time": response_time_values}
|
||||
)
|
||||
# response time values len always will be 1 here since cache is mocked and refreshed on every call
|
||||
assert len(response_time_values) == 1
|
||||
assert called_arg.args[1] == expected_result_metric_alert_groups_response_time
|
||||
return idx + 1
|
||||
raise AssertionError
|
||||
|
||||
with patch("apps.metrics_exporter.tasks.cache.set") as mock_cache_set:
|
||||
arg_idx = 0
|
||||
alert_group = make_alert_group(alert_receive_channel)
|
||||
make_alert(alert_group=alert_group, raw_request_data={})
|
||||
|
||||
# check alert_groups_response_time metric cache, get called args
|
||||
mock_cache_set_called_args = mock_cache_set.call_args_list
|
||||
|
||||
alert_group.acknowledge_by_user_or_backsync(user)
|
||||
arg_idx = get_called_arg_index_and_compare_results()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue