From fbfcf7b7205a55fcee0e9f7882550e8ccdb2fd9b Mon Sep 17 00:00:00 2001 From: Vadim Stepanov Date: Fri, 25 Aug 2023 17:18:37 +0100 Subject: [PATCH 1/8] Prevent uWSGI from consuming too much memory (#2884) # What this PR does Fixes https://github.com/grafana/oncall/issues/1521 (see https://github.com/unbit/uwsgi/issues/2299) ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --- engine/uwsgi.ini | 3 +++ 1 file changed, 3 insertions(+) diff --git a/engine/uwsgi.ini b/engine/uwsgi.ini index 2103d899..66d4892a 100644 --- a/engine/uwsgi.ini +++ b/engine/uwsgi.ini @@ -95,6 +95,9 @@ die-on-term=true ; every request. It is not so slow, but with some kind of app/extensions that could be overkill. single-interpreter=true +; Prevent uWSGI from consuming too much memory: https://github.com/grafana/oncall/issues/1521 +max-fd=1048576 + logger=stdio log-format=source=engine:uwsgi status=%(status) method=%(method) path=%(uri) latency=%(secs) google_trace_id=%(var.HTTP_X_CLOUD_TRACE_CONTEXT) protocol=%(proto) resp_size=%(size) req_body_size=%(cl) log-encoder=format ${strftime:%%Y-%%m-%%d %%H:%%M:%%S} ${msgnl} From d5e1e9ade2fee5120a2a659fdab883982b6149b9 Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Mon, 28 Aug 2023 10:02:55 +0200 Subject: [PATCH 2/8] add interactive api endpoint integration test for accepting shift swap requests --- .../tests/test_interactive_api_endpoint.py | 45 ++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/engine/apps/slack/tests/test_interactive_api_endpoint.py b/engine/apps/slack/tests/test_interactive_api_endpoint.py index 74bd473e..fdb6c4c3 100644 --- a/engine/apps/slack/tests/test_interactive_api_endpoint.py +++ b/engine/apps/slack/tests/test_interactive_api_endpoint.py @@ -8,6 +8,7 @@ from rest_framework.test import APIClient from apps.slack.scenarios.manage_responders import ManageRespondersUserChange from apps.slack.scenarios.paging import OnPagingTeamChange +from apps.slack.scenarios.shift_swap_requests import AcceptShiftSwapRequestStep from apps.slack.types import PayloadType EVENT_TRIGGER_ID = "5333959822612.4122782784722.4734ff484b2ac4d36a185bb242ee9932" @@ -125,7 +126,7 @@ def test_organization_not_found_scenario_doesnt_break_slash_commands( "command": settings.SLACK_DIRECT_PAGING_SLASH_COMMAND, "text": "potato", "api_app_id": "A0909234092340293402934234234234234234", - "is_enterprise_install": "false", + "is_enterprise_install": "False", "response_url": "https://hooks.slack.com/commands/cvcv/cvcv/cvcv", "trigger_id": "asdfasdf.4122782784722.cvcv", } @@ -197,3 +198,45 @@ def test_organization_not_found_scenario_doesnt_break_manage_responders( assert response.status_code == status.HTTP_200_OK mock_process_scenario.assert_called_once() + + +@patch("apps.slack.views.SlackEventApiEndpointView.verify_signature", return_value=True) +@patch.object(AcceptShiftSwapRequestStep, "process_scenario") +@pytest.mark.django_db +def test_accept_shift_swap_request( + mock_process_scenario, + _mock_verify_signature, + make_organization, + make_slack_user_identity, + make_user, + slack_team_identity, +): + organization = make_organization(slack_team_identity=slack_team_identity) + slack_user_identity = make_slack_user_identity(slack_team_identity=slack_team_identity, slack_id=SLACK_USER_ID) + make_user(organization=organization, slack_user_identity=slack_user_identity) + + payload = { + "type": "block_actions", + "user": { + "id": SLACK_USER_ID, + }, + "team": { + "id": SLACK_TEAM_ID, + }, + "actions": [ + { + "action_id": "AcceptShiftSwapRequestStep", + "block_id": "G0ec", + "text": {"type": "plain_text", "text": ":heavy_check_mark: Accept Shift Swap Request", "emoji": True}, + "value": '{"shift_swap_request_pk": 5, "organization_id": 1}', + "style": "primary", + "type": "button", + "action_ts": "1693208812.474860", + } + ], + } + + response = _make_request(payload) + + assert response.status_code == status.HTTP_200_OK + mock_process_scenario.assert_called_once_with(slack_user_identity, slack_team_identity, payload) From c811a2ad156d6e8498d719d0f7c682c283f83dd6 Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Mon, 28 Aug 2023 11:55:17 +0200 Subject: [PATCH 3/8] Address bug when taking a Shift Swap Request and the Slack message is not updated (#2886) # What this PR does Address bug when a Shift Swap Request is accepted either via the web or mobile UI, and the Slack message is not updated to reflect the latest state. ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --- CHANGELOG.md | 5 +++ engine/apps/api/tests/test_shift_swaps.py | 36 ++++++++++++++----- engine/apps/api/views/shift_swap.py | 2 ++ .../apps/public_api/tests/test_shift_swap.py | 12 +++++++ .../tests/test_interactive_api_endpoint.py | 2 +- 5 files changed, 48 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fdceacc7..fbc80b9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Fixed + +- Address bug when a Shift Swap Request is accepted either via the web or mobile UI, and the Slack message is not + updated to reflect the latest state by @joeyorlando ([#2886](https://github.com/grafana/oncall/pull/2886)) + ## v1.3.27 (2023-08-25) ### Added diff --git a/engine/apps/api/tests/test_shift_swaps.py b/engine/apps/api/tests/test_shift_swaps.py index 145c79ca..05a17750 100644 --- a/engine/apps/api/tests/test_shift_swaps.py +++ b/engine/apps/api/tests/test_shift_swaps.py @@ -669,8 +669,9 @@ def test_delete_others_ssr_permissions(ssr_setup, make_user_auth_headers): assert response.status_code == status.HTTP_403_FORBIDDEN +@patch("apps.api.views.shift_swap.update_shift_swap_request_message") @pytest.mark.django_db -def test_take(ssr_setup, make_user_auth_headers): +def test_take(mock_update_shift_swap_request_message, ssr_setup, make_user_auth_headers): ssr, _, token, benefactor = ssr_setup() client = APIClient() url = reverse("api-internal:shift_swap-take", kwargs={"pk": ssr.public_primary_key}) @@ -696,9 +697,14 @@ def test_take(ssr_setup, make_user_auth_headers): assert response.status_code == status.HTTP_200_OK assert response_json == expected_response + mock_update_shift_swap_request_message.apply_async.assert_called_once_with((ssr.pk,)) + +@patch("apps.api.views.shift_swap.update_shift_swap_request_message") @pytest.mark.django_db -def test_benficiary_tries_to_take_their_own_ssr(ssr_setup, make_user_auth_headers): +def test_benficiary_tries_to_take_their_own_ssr( + mock_update_shift_swap_request_message, ssr_setup, make_user_auth_headers +): ssr, beneficiary, token, _ = ssr_setup() client = APIClient() url = reverse("api-internal:shift_swap-take", kwargs={"pk": ssr.public_primary_key}) @@ -706,6 +712,8 @@ def test_benficiary_tries_to_take_their_own_ssr(ssr_setup, make_user_auth_header response = client.post(url, format="json", **make_user_auth_headers(beneficiary, token)) assert response.status_code == status.HTTP_400_BAD_REQUEST + mock_update_shift_swap_request_message.apply_async.assert_not_called() + @pytest.mark.django_db def test_take_already_taken_ssr(ssr_setup, make_user_auth_headers): @@ -713,15 +721,22 @@ def test_take_already_taken_ssr(ssr_setup, make_user_auth_headers): client = APIClient() url = reverse("api-internal:shift_swap-take", kwargs={"pk": ssr.public_primary_key}) - response = client.post(url, format="json", **make_user_auth_headers(benefactor, token)) - assert response.status_code == status.HTTP_200_OK + with patch("apps.api.views.shift_swap.update_shift_swap_request_message") as mock_update_shift_swap_request_message: + response = client.post(url, format="json", **make_user_auth_headers(benefactor, token)) + assert response.status_code == status.HTTP_200_OK - response = client.post(url, format="json", **make_user_auth_headers(benefactor, token)) - assert response.status_code == status.HTTP_400_BAD_REQUEST + mock_update_shift_swap_request_message.apply_async.assert_called_once_with((ssr.pk,)) + + with patch("apps.api.views.shift_swap.update_shift_swap_request_message") as mock_update_shift_swap_request_message: + response = client.post(url, format="json", **make_user_auth_headers(benefactor, token)) + assert response.status_code == status.HTTP_400_BAD_REQUEST + + mock_update_shift_swap_request_message.apply_async.assert_not_called() +@patch("apps.api.views.shift_swap.update_shift_swap_request_message") @pytest.mark.django_db -def test_take_past_due_ssr(ssr_setup, make_user_auth_headers): +def test_take_past_due_ssr(mock_update_shift_swap_request_message, ssr_setup, make_user_auth_headers): ssr, _, token, benefactor = ssr_setup() client = APIClient() url = reverse("api-internal:shift_swap-take", kwargs={"pk": ssr.public_primary_key}) @@ -732,9 +747,12 @@ def test_take_past_due_ssr(ssr_setup, make_user_auth_headers): response = client.post(url, format="json", **make_user_auth_headers(benefactor, token)) assert response.status_code == status.HTTP_400_BAD_REQUEST + mock_update_shift_swap_request_message.apply_async.assert_not_called() + +@patch("apps.api.views.shift_swap.update_shift_swap_request_message") @pytest.mark.django_db -def test_take_deleted_ssr(ssr_setup, make_user_auth_headers): +def test_take_deleted_ssr(mock_update_shift_swap_request_message, ssr_setup, make_user_auth_headers): ssr, _, token, benefactor = ssr_setup() client = APIClient() url = reverse("api-internal:shift_swap-take", kwargs={"pk": ssr.public_primary_key}) @@ -744,6 +762,8 @@ def test_take_deleted_ssr(ssr_setup, make_user_auth_headers): response = client.post(url, format="json", **make_user_auth_headers(benefactor, token)) assert response.status_code == status.HTTP_404_NOT_FOUND + mock_update_shift_swap_request_message.apply_async.assert_not_called() + @patch("apps.api.views.shift_swap.ShiftSwapViewSet.take", return_value=mock_success_response) @pytest.mark.django_db diff --git a/engine/apps/api/views/shift_swap.py b/engine/apps/api/views/shift_swap.py index 8e89d68c..5a4c01af 100644 --- a/engine/apps/api/views/shift_swap.py +++ b/engine/apps/api/views/shift_swap.py @@ -45,6 +45,8 @@ class BaseShiftSwapViewSet(ModelViewSet): except exceptions.BeneficiaryCannotTakeOwnShiftSwapRequest: raise BadRequest(detail="A shift swap request cannot be created and taken by the same user") + update_shift_swap_request_message.apply_async((shift_swap.pk,)) + return ShiftSwapRequestSerializer(shift_swap).data def get_serializer_class(self): diff --git a/engine/apps/public_api/tests/test_shift_swap.py b/engine/apps/public_api/tests/test_shift_swap.py index 913144f7..a4593320 100644 --- a/engine/apps/public_api/tests/test_shift_swap.py +++ b/engine/apps/public_api/tests/test_shift_swap.py @@ -330,8 +330,10 @@ def test_delete( mock_update_shift_swap_request_message.apply_async.assert_called_once_with((swap.pk,)) +@patch("apps.api.views.shift_swap.update_shift_swap_request_message") @pytest.mark.django_db def test_take( + mock_update_shift_swap_request_message, make_organization_and_user_with_token, make_user_for_organization, setup_swap, @@ -352,9 +354,13 @@ def test_take( assert swap.status == ShiftSwapRequest.Statuses.TAKEN assert swap.benefactor == another_user + mock_update_shift_swap_request_message.apply_async.assert_called_once_with((swap.pk,)) + +@patch("apps.api.views.shift_swap.update_shift_swap_request_message") @pytest.mark.django_db def test_take_requires_benefactor( + mock_update_shift_swap_request_message, make_organization_and_user_with_token, setup_swap, ): @@ -372,9 +378,13 @@ def test_take_requires_benefactor( assert swap.status == ShiftSwapRequest.Statuses.OPEN assert swap.benefactor is None + mock_update_shift_swap_request_message.apply_async.assert_not_called() + +@patch("apps.api.views.shift_swap.update_shift_swap_request_message") @pytest.mark.django_db def test_take_errors( + mock_update_shift_swap_request_message, make_organization_and_user_with_token, make_user_for_organization, setup_swap, @@ -411,3 +421,5 @@ def test_take_errors( data = {"benefactor": another_user.public_primary_key} response = client.post(url, data, format="json", HTTP_AUTHORIZATION=f"{token}") assert response.status_code == status.HTTP_400_BAD_REQUEST + + mock_update_shift_swap_request_message.apply_async.assert_not_called() diff --git a/engine/apps/slack/tests/test_interactive_api_endpoint.py b/engine/apps/slack/tests/test_interactive_api_endpoint.py index fdb6c4c3..62d2026c 100644 --- a/engine/apps/slack/tests/test_interactive_api_endpoint.py +++ b/engine/apps/slack/tests/test_interactive_api_endpoint.py @@ -228,7 +228,7 @@ def test_accept_shift_swap_request( "action_id": "AcceptShiftSwapRequestStep", "block_id": "G0ec", "text": {"type": "plain_text", "text": ":heavy_check_mark: Accept Shift Swap Request", "emoji": True}, - "value": '{"shift_swap_request_pk": 5, "organization_id": 1}', + "value": f'{{"shift_swap_request_pk": 5, "organization_id": {organization.pk}}}', "style": "primary", "type": "button", "action_ts": "1693208812.474860", From 5bc7351671f57c56d2b908d68b4c549e48d3eab3 Mon Sep 17 00:00:00 2001 From: Yulya Artyukhina Date: Mon, 28 Aug 2023 14:13:01 +0200 Subject: [PATCH 4/8] Fix next step eta for silenced alert groups (#2887) # What this PR does Update `next_step_eta` in alert group escalation snapshot when alert group is silenced for period ## Which issue(s) this PR fixes Fixes the issue related to [this one](https://github.com/grafana/oncall-private/issues/2028) ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --------- Co-authored-by: Joey Orlando Co-authored-by: Joey Orlando --- .../escalation_snapshot_mixin.py | 13 ++ engine/apps/alerts/models/alert_group.py | 12 +- .../alerts/tasks/check_escalation_finished.py | 6 +- engine/apps/alerts/tests/test_alert_group.py | 162 +++++++++++++++++- .../test_check_escalation_finished_task.py | 6 +- .../tests/test_escalation_snapshot_mixin.py | 27 +++ 6 files changed, 222 insertions(+), 4 deletions(-) diff --git a/engine/apps/alerts/escalation_snapshot/escalation_snapshot_mixin.py b/engine/apps/alerts/escalation_snapshot/escalation_snapshot_mixin.py index afccd39c..c7d17e8f 100644 --- a/engine/apps/alerts/escalation_snapshot/escalation_snapshot_mixin.py +++ b/engine/apps/alerts/escalation_snapshot/escalation_snapshot_mixin.py @@ -223,6 +223,19 @@ class EscalationSnapshotMixin: raw_next_step_eta = self.raw_escalation_snapshot.get("next_step_eta") return None if not raw_next_step_eta else parse(raw_next_step_eta).replace(tzinfo=pytz.UTC) + def update_next_step_eta(self, increase_by_timedelta: datetime.timedelta) -> typing.Optional[dict]: + """ + update next_step_eta field directly to avoid serialization overhead + """ + if not self.raw_escalation_snapshot: + return None + + raw_next_step_eta = self.raw_escalation_snapshot.get("next_step_eta") + next_step_eta = parse(raw_next_step_eta).replace(tzinfo=pytz.UTC) + updated_next_step_eta = next_step_eta + increase_by_timedelta + self.raw_escalation_snapshot["next_step_eta"] = updated_next_step_eta.strftime("%Y-%m-%dT%H:%M:%S.%fZ") + return self.raw_escalation_snapshot + def start_escalation_if_needed(self, countdown=START_ESCALATION_DELAY, eta=None): """ :type self:AlertGroup diff --git a/engine/apps/alerts/models/alert_group.py b/engine/apps/alerts/models/alert_group.py index d7965159..2bf9be0c 100644 --- a/engine/apps/alerts/models/alert_group.py +++ b/engine/apps/alerts/models/alert_group.py @@ -17,6 +17,7 @@ from django.utils.functional import cached_property from apps.alerts.constants import AlertGroupState from apps.alerts.escalation_snapshot import EscalationSnapshotMixin +from apps.alerts.escalation_snapshot.escalation_snapshot_mixin import START_ESCALATION_DELAY from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE from apps.alerts.incident_appearance.renderers.slack_renderer import AlertGroupSlackRenderer from apps.alerts.incident_log_builder import IncidentLogBuilder @@ -977,12 +978,18 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. silence_delay_timedelta = datetime.timedelta(seconds=silence_delay) silenced_until = now + silence_delay_timedelta if self.is_root_alert_group: + self.update_next_step_eta(datetime.timedelta(seconds=silence_delay + START_ESCALATION_DELAY)) self.start_unsilence_task(countdown=silence_delay) else: silence_delay_timedelta = None silenced_until = None - self.silence(silenced_at=now, silenced_until=silenced_until, silenced_by_user=user) + self.silence( + silenced_at=now, + silenced_until=silenced_until, + silenced_by_user=user, + raw_escalation_snapshot=self.raw_escalation_snapshot, + ) # Update alert group state and response time metrics cache self._update_metrics(organization_id=user.organization_id, previous_state=initial_state, state=self.state) @@ -1520,6 +1527,8 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. alert_group.silenced_by_user = user if not silence_for_period: alert_group.is_escalation_finished = True + else: + alert_group.update_next_step_eta(datetime.timedelta(seconds=silence_delay + START_ESCALATION_DELAY)) if alert_group.response_time is None: alert_group.response_time = alert_group._get_response_time() @@ -1537,6 +1546,7 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models. "silenced_until", "silenced_by_user", "is_escalation_finished", + "raw_escalation_snapshot", "response_time", ] AlertGroup.objects.bulk_update(alert_groups_to_silence_list, fields=fields_to_update, batch_size=100) diff --git a/engine/apps/alerts/tasks/check_escalation_finished.py b/engine/apps/alerts/tasks/check_escalation_finished.py index 7726f95f..b38693fd 100644 --- a/engine/apps/alerts/tasks/check_escalation_finished.py +++ b/engine/apps/alerts/tasks/check_escalation_finished.py @@ -60,9 +60,13 @@ def audit_alert_group_escalation(alert_group: "AlertGroup") -> None: ) if escalation_snapshot.next_step_eta_is_valid() is False: - raise AlertGroupEscalationPolicyExecutionAuditException( + msg = ( f"{base_msg}'s escalation snapshot does not have a valid next_step_eta: {escalation_snapshot.next_step_eta}" ) + + task_logger.warning(msg) + raise AlertGroupEscalationPolicyExecutionAuditException(msg) + task_logger.info(f"{base_msg}'s escalation snapshot has a valid next_step_eta: {escalation_snapshot.next_step_eta}") executed_escalation_policy_snapshots = escalation_snapshot.executed_escalation_policy_snapshots diff --git a/engine/apps/alerts/tests/test_alert_group.py b/engine/apps/alerts/tests/test_alert_group.py index 07456981..450665e7 100644 --- a/engine/apps/alerts/tests/test_alert_group.py +++ b/engine/apps/alerts/tests/test_alert_group.py @@ -1,7 +1,9 @@ +from unittest.mock import patch + import pytest from apps.alerts.incident_appearance.renderers.phone_call_renderer import AlertGroupPhoneCallRenderer -from apps.alerts.models import AlertGroup +from apps.alerts.models import AlertGroup, AlertGroupLogRecord from apps.alerts.tasks.delete_alert_group import delete_alert_group from apps.slack.models import SlackMessage @@ -116,3 +118,161 @@ def test_alerts_count_gt( assert alert_group.alerts_count_gt(1) is True assert alert_group.alerts_count_gt(2) is False assert alert_group.alerts_count_gt(3) is False + + +@patch("apps.alerts.models.AlertGroup.start_unsilence_task", return_value=None) +@pytest.mark.django_db +def test_silence_by_user_for_period( + mocked_start_unsilence_task, + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + organization, user = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + + alert_group = make_alert_group(alert_receive_channel) + + raw_next_step_eta = "2023-08-28T09:27:26.627047Z" + silence_delay = 120 * 60 + updated_raw_next_step_eta = "2023-08-28T11:27:36.627047Z" # silence_delay + START_ESCALATION_DELAY + + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + alert_group.raw_escalation_snapshot["next_step_eta"] = raw_next_step_eta + + assert not alert_group.log_records.filter( + type=AlertGroupLogRecord.TYPE_SILENCE, + author=user, + ).exists() + + alert_group.silence_by_user(user, silence_delay=silence_delay) + + assert alert_group.log_records.filter( + type=AlertGroupLogRecord.TYPE_SILENCE, + author=user, + ).exists() + + alert_group.refresh_from_db() + + assert alert_group.silenced + assert alert_group.raw_escalation_snapshot["next_step_eta"] == updated_raw_next_step_eta + assert mocked_start_unsilence_task.called + + +@patch("apps.alerts.models.AlertGroup.start_unsilence_task", return_value=None) +@pytest.mark.django_db +def test_silence_by_user_forever( + mocked_start_unsilence_task, + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + organization, user = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + + alert_group = make_alert_group(alert_receive_channel) + + raw_next_step_eta = "2023-08-28T09:27:26.627047Z" + + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + alert_group.raw_escalation_snapshot["next_step_eta"] = raw_next_step_eta + + assert not alert_group.log_records.filter( + type=AlertGroupLogRecord.TYPE_SILENCE, + author=user, + ).exists() + + alert_group.silence_by_user(user, silence_delay=None) + + assert alert_group.log_records.filter( + type=AlertGroupLogRecord.TYPE_SILENCE, + author=user, + ).exists() + + alert_group.refresh_from_db() + + assert alert_group.silenced + assert alert_group.raw_escalation_snapshot["next_step_eta"] == raw_next_step_eta + assert not mocked_start_unsilence_task.called + + +@patch("apps.alerts.models.AlertGroup.start_unsilence_task", return_value=None) +@pytest.mark.django_db +def test_bulk_silence_for_period( + mocked_start_unsilence_task, + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + organization, user = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + + alert_group = make_alert_group(alert_receive_channel) + + raw_next_step_eta = "2023-08-28T09:27:26.627047Z" + silence_delay = 120 * 60 + updated_raw_next_step_eta = "2023-08-28T11:27:36.627047Z" # silence_delay + START_ESCALATION_DELAY + + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + alert_group.raw_escalation_snapshot["next_step_eta"] = raw_next_step_eta + alert_group.save() + + alert_groups = AlertGroup.objects.filter(pk__in=[alert_group.id]) + + assert not alert_group.log_records.filter( + type=AlertGroupLogRecord.TYPE_SILENCE, + author=user, + ).exists() + + AlertGroup.bulk_silence(user, alert_groups, silence_delay=silence_delay) + + assert alert_group.log_records.filter( + type=AlertGroupLogRecord.TYPE_SILENCE, + author=user, + ).exists() + + alert_group.refresh_from_db() + + assert alert_group.silenced + assert alert_group.raw_escalation_snapshot["next_step_eta"] == updated_raw_next_step_eta + assert mocked_start_unsilence_task.called + + +@patch("apps.alerts.models.AlertGroup.start_unsilence_task", return_value=None) +@pytest.mark.django_db +def test_bulk_silence_forever( + mocked_start_unsilence_task, + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + organization, user = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + + alert_group = make_alert_group(alert_receive_channel) + + raw_next_step_eta = "2023-08-28T09:27:26.627047Z" + + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + alert_group.raw_escalation_snapshot["next_step_eta"] = raw_next_step_eta + alert_group.save() + + alert_groups = AlertGroup.objects.filter(pk__in=[alert_group.id]) + + assert not alert_group.log_records.filter( + type=AlertGroupLogRecord.TYPE_SILENCE, + author=user, + ).exists() + + AlertGroup.bulk_silence(user, alert_groups, silence_delay=0) + + assert alert_group.log_records.filter( + type=AlertGroupLogRecord.TYPE_SILENCE, + author=user, + ).exists() + + alert_group.refresh_from_db() + + assert alert_group.silenced + assert alert_group.raw_escalation_snapshot["next_step_eta"] == raw_next_step_eta + assert not mocked_start_unsilence_task.called diff --git a/engine/apps/alerts/tests/test_check_escalation_finished_task.py b/engine/apps/alerts/tests/test_check_escalation_finished_task.py index 8e947210..f6a9aeeb 100644 --- a/engine/apps/alerts/tests/test_check_escalation_finished_task.py +++ b/engine/apps/alerts/tests/test_check_escalation_finished_task.py @@ -364,7 +364,11 @@ def test_check_escalation_finished_task_calls_audit_alert_group_escalation_for_e with pytest.raises(AlertGroupEscalationPolicyExecutionAuditException) as exc: check_escalation_finished_task() - assert str(exc.value) == f"The following alert group id(s) failed auditing: {alert_group1.id}, {alert_group2.id}" + error_msg = str(exc.value) + + assert "The following alert group id(s) failed auditing:" in error_msg + assert str(alert_group1.id) in error_msg + assert str(alert_group2.id) in error_msg mocked_audit_alert_group_escalation.assert_any_call(alert_group1) mocked_audit_alert_group_escalation.assert_any_call(alert_group2) diff --git a/engine/apps/alerts/tests/test_escalation_snapshot_mixin.py b/engine/apps/alerts/tests/test_escalation_snapshot_mixin.py index db229db9..1be7b2fb 100644 --- a/engine/apps/alerts/tests/test_escalation_snapshot_mixin.py +++ b/engine/apps/alerts/tests/test_escalation_snapshot_mixin.py @@ -1,3 +1,4 @@ +import datetime from unittest.mock import PropertyMock, patch import pytest @@ -657,3 +658,29 @@ def test_next_step_eta( mock_dateutil_parser.assert_called_once_with(mocked_raw_date) mock_dateutil_parser.return_value.replace.assert_called_once_with(tzinfo=pytz.UTC) + + +@pytest.mark.django_db +def test_update_next_step_eta( + make_organization_and_user, + make_alert_receive_channel, + make_alert_group, +): + raw_next_step_eta = "2023-08-28T09:27:26.627047Z" + updated_raw_next_step_eta = "2023-08-28T11:27:26.627047Z" + increase_by_timedelta = datetime.timedelta(minutes=120) + + organization, _ = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + alert_group = make_alert_group(alert_receive_channel) + alert_group.raw_escalation_snapshot = alert_group.build_raw_escalation_snapshot() + alert_group.raw_escalation_snapshot["next_step_eta"] = raw_next_step_eta + + assert alert_group.raw_escalation_snapshot is not None + assert alert_group.raw_escalation_snapshot["next_step_eta"] == raw_next_step_eta + + alert_group.update_next_step_eta(increase_by_timedelta) + alert_group.save() + alert_group.refresh_from_db() + + assert alert_group.raw_escalation_snapshot["next_step_eta"] == updated_raw_next_step_eta From 5d4f96ec96c49fefac548ff35631474cd248b507 Mon Sep 17 00:00:00 2001 From: Michael Derynck Date: Mon, 28 Aug 2023 20:01:45 -0600 Subject: [PATCH 5/8] Alerting integration use get for receiver key (#2894) # What this PR does Fixes an issue where OnCall could not parse Alerting configuration if there was a route created without a receiver. The case where this comes up is a notification policy matcher is defined for the purpose of ignoring certain label values before continuing on to other matching rules. ## Which issue(s) this PR fixes ## Checklist - [ ] Unit, integration, and e2e (if applicable) tests updated - [ ] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --- CHANGELOG.md | 2 ++ .../grafana_alerting_sync_manager/grafana_alerting_sync.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fbc80b9c..d28cedbb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Address bug when a Shift Swap Request is accepted either via the web or mobile UI, and the Slack message is not updated to reflect the latest state by @joeyorlando ([#2886](https://github.com/grafana/oncall/pull/2886)) +- Fix issue where Grafana integration would fail to parse alerting config for routes without receivers @mderynck + ([#2894](https://github.com/grafana/oncall/pull/2894)) ## v1.3.27 (2023-08-25) diff --git a/engine/apps/alerts/grafana_alerting_sync_manager/grafana_alerting_sync.py b/engine/apps/alerts/grafana_alerting_sync_manager/grafana_alerting_sync.py index 70658b2a..b0ed1558 100644 --- a/engine/apps/alerts/grafana_alerting_sync_manager/grafana_alerting_sync.py +++ b/engine/apps/alerts/grafana_alerting_sync_manager/grafana_alerting_sync.py @@ -374,7 +374,7 @@ class GrafanaAlertingSyncManager: return True routes = route_config.get("routes", []) for route in routes: - if route["receiver"] == receiver_name: + if route.get("receiver") == receiver_name: return True if route.get("routes"): if self._recursive_check_contact_point_is_in_routes(route, receiver_name): From e0e1f4b02144f79f665bc36a69453db2bc46bda8 Mon Sep 17 00:00:00 2001 From: Michael Derynck Date: Mon, 28 Aug 2023 20:19:28 -0600 Subject: [PATCH 6/8] Always update last_heartbeat_time async (#2892) # What this PR does If the same heartbeat is requested at a high rate it can create lock contention when updating the timestamp in the DB. Moving to always run update in task should free up the connection on the API server faster, although the task might still see some lock wait time. ## Which issue(s) this PR fixes ## Checklist - [ ] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --- engine/apps/integrations/views.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/engine/apps/integrations/views.py b/engine/apps/integrations/views.py index c694d87e..638df507 100644 --- a/engine/apps/integrations/views.py +++ b/engine/apps/integrations/views.py @@ -3,7 +3,6 @@ import logging from django.conf import settings from django.core.exceptions import PermissionDenied -from django.db import OperationalError from django.http import HttpResponseBadRequest, JsonResponse from django.utils.decorators import method_decorator from django.views.decorators.csrf import csrf_exempt @@ -325,10 +324,6 @@ class IntegrationHeartBeatAPIView(AlertChannelDefiningMixin, IntegrationHeartBea return Response(status=200) def _process_heartbeat_signal(self, request, alert_receive_channel): - try: - process_heartbeat_task(alert_receive_channel.pk) - # If database is not ready, fallback to celery task - except OperationalError: - process_heartbeat_task.apply_async( - (alert_receive_channel.pk,), - ) + process_heartbeat_task.apply_async( + (alert_receive_channel.pk,), + ) From 69661f19861d87f8546e1d42fad7149c24573f0c Mon Sep 17 00:00:00 2001 From: Ildar Iskhakov Date: Tue, 29 Aug 2023 15:03:32 +0800 Subject: [PATCH 7/8] Switch to alpine base image (#2872) # What this PR does ## Which issue(s) this PR fixes ## Checklist - [ ] Unit, integration, and e2e (if applicable) tests updated - [ ] Documentation added (or `pr:no public docs` PR label added if not required) - [ ] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) --- CHANGELOG.md | 4 ++++ engine/Dockerfile | 29 ++++++++++++++--------------- engine/requirements.txt | 2 +- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d28cedbb..5395ca41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Changed + +- Switch engine to alpine base image ([2872](https://github.com/grafana/oncall/pull/2872)) + ### Fixed - Address bug when a Shift Swap Request is accepted either via the web or mobile UI, and the Slack message is not diff --git a/engine/Dockerfile b/engine/Dockerfile index 9ba6b4e0..73f76a8b 100644 --- a/engine/Dockerfile +++ b/engine/Dockerfile @@ -1,25 +1,24 @@ -FROM python:3.11.4-slim-bookworm AS base +FROM python:3.11.4-alpine3.18 AS base # Create a group and user to run an app ENV APP_USER=appuser -RUN groupadd --system --gid 2000 ${APP_USER} && \ - useradd --no-log-init --system --uid 1000 --gid ${APP_USER} ${APP_USER} +RUN addgroup --system --gid 2000 ${APP_USER} && \ + adduser --system --uid 1000 --ingroup ${APP_USER} ${APP_USER} -RUN apt-get update && apt-get install -y \ - python3-dev \ - gcc \ - libmariadb-dev \ - libpq-dev \ - netcat-traditional \ - curl \ - bash \ - git \ - libpcre3 \ - libpcre3-dev +RUN apk add bash \ + python3-dev \ + build-base \ + linux-headers \ + pcre-dev \ + mariadb-connector-c-dev \ + libffi-dev \ + git \ + postgresql-dev WORKDIR /etc/app COPY ./requirements.txt ./ RUN pip install --upgrade pip +RUN pip install --upgrade setuptools wheel RUN pip install -r requirements.txt # we intentionally have two COPY commands, this is to have the requirements.txt in a separate build step @@ -48,7 +47,7 @@ RUN chown -R ${APP_USER}:${APP_USER} /tmp/prometheus_django_metrics ENV prometheus_multiproc_dir "/tmp/prometheus_django_metrics" FROM base AS dev -RUN apt-get install -y sqlite3 default-mysql-client postgresql-client +RUN apk add sqlite mysql-client postgresql-client RUN pip install -r requirements-dev.txt FROM dev AS dev-enterprise diff --git a/engine/requirements.txt b/engine/requirements.txt index 6ac758a8..952d1dac 100644 --- a/engine/requirements.txt +++ b/engine/requirements.txt @@ -46,7 +46,7 @@ opentelemetry-instrumentation-celery==0.36b0 opentelemetry-instrumentation-pymysql==0.36b0 opentelemetry-instrumentation-wsgi==0.36b0 opentelemetry-exporter-otlp-proto-grpc==1.15.0 -pyroscope-io==0.8.1 +# pyroscope-io==0.8.1 django-dbconn-retry==0.1.7 django-ipware==4.0.2 django-anymail==8.6 From d5248534eb8a5796a3ba15ed6a01dc36f48d920c Mon Sep 17 00:00:00 2001 From: Ildar Iskhakov Date: Tue, 29 Aug 2023 15:48:04 +0800 Subject: [PATCH 8/8] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5395ca41..f9644bc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## Unreleased +## v1.3.28 (2023-08-29) ### Changed