Reworked declare incident escalation step (#5130)

Reworked https://github.com/grafana/oncall/pull/5047. Main update is the switch from FK to a [M2M relation](https://docs.google.com/document/d/1HeulqxoFShSHtInQrZNJLL5MDlHPNT50rVGaK3zZWvw/edit?disco=AAABVLjV4W8) (which doesn't really change the original/intended behavior, besides not needing to alter the alert group table, and it is a bit more flexible; the extra table shouldn't introduce issues because this is used only for tracking purposes and the information needed in the log record is already there). Avoid a db migration involving alert group table: ``` -- -- Create model RelatedIncident -- CREATE TABLE `alerts_relatedincident` (`id` bigint AUTO_INCREMENT NOT NULL PRIMARY KEY, `incident_id` varchar(50) NOT NULL, `created_at` datetime(6) NOT NULL, `is_active` bool NOT NULL, `channel_filter_id` bigint NULL, `organization_id` bigint NOT NULL); CREATE TABLE `alerts_relatedincident_attached_alert_groups` (`id` bigint AUTO_INCREMENT NOT NULL PRIMARY KEY, `relatedincident_id` bigint NOT NULL, `alertgroup_id` bigint NOT NULL); ALTER TABLE `alerts_relatedincident` ADD CONSTRAINT `alerts_relatedincident_organization_id_incident_id_d7fc9a4f_uniq` UNIQUE (`organization_id`, `incident_id`); ALTER TABLE `alerts_relatedincident` ADD CONSTRAINT `alerts_relatedincide_channel_filter_id_9556c836_fk_alerts_ch` FOREIGN KEY (`channel_filter_id`) REFERENCES `alerts_channelfilter` (`id`); ALTER TABLE `alerts_relatedincident` ADD CONSTRAINT `alerts_relatedincide_organization_id_74ed6bed_fk_user_mana` FOREIGN KEY (`organization_id`) REFERENCES `user_management_organization` (`id`); CREATE INDEX `alerts_relatedincident_incident_id_8356a799` ON `alerts_relatedincident` (`incident_id`); ALTER TABLE `alerts_relatedincident_attached_alert_groups` ADD CONSTRAINT `alerts_relatedincident_a_relatedincident_id_alert_3d683baa_uniq` UNIQUE (`relatedincident_id`, `alertgroup_id`); ALTER TABLE `alerts_relatedincident_attached_alert_groups` ADD CONSTRAINT `alerts_relatedincide_relatedincident_id_3e5e7a23_fk_alerts_re` FOREIGN KEY (`relatedincident_id`) REFERENCES `alerts_relatedincident` (`id`); ALTER TABLE `alerts_relatedincident_attached_alert_groups` ADD CONSTRAINT `alerts_relatedincide_alertgroup_id_0125deca_fk_alerts_al` FOREIGN KEY (`alertgroup_id`) REFERENCES `alerts_alertgroup` (`id`); ```
2024-10-07 16:26:10 -03:00 · 2024-10-07 16:26:10 -03:00 · fa815b7ecd
commit fa815b7ecd
parent ac7dc97cc3
23 changed files with 957 additions and 5 deletions
--- a/engine/apps/alerts/escalation_snapshot/snapshot_classes/escalation_policy_snapshot.py
+++ b/engine/apps/alerts/escalation_snapshot/snapshot_classes/escalation_policy_snapshot.py
@ -12,11 +12,13 @@ from apps.alerts.models.alert_group_log_record import AlertGroupLogRecord
 from apps.alerts.models.escalation_policy import EscalationPolicy
 from apps.alerts.tasks import (
    custom_webhook_result,
+    declare_incident,
    notify_all_task,
    notify_group_task,
    notify_user_task,
    resolve_by_last_step_task,
 )
+from apps.alerts.utils import is_declare_incident_step_enabled
 from apps.schedules.ical_utils import list_users_to_notify_from_ical
 from apps.user_management.models import User

@ -136,6 +138,7 @@ class EscalationPolicySnapshot:
            EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: self._escalation_step_notify_if_num_alerts_in_time_window,
            EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS: self._escalation_step_notify_multiple_users,
            EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT: self._escalation_step_notify_multiple_users,
+            EscalationPolicy.STEP_DECLARE_INCIDENT: self._escalation_step_declare_incident,
            None: self._escalation_step_not_configured,
        }
        result = action_map[self.step](alert_group, reason)
@ -410,6 +413,32 @@ class EscalationPolicySnapshot:

        self._execute_tasks(tasks)

+    def _escalation_step_declare_incident(self, alert_group: "AlertGroup", _reason: str) -> None:
+        grafana_declare_incident_enabled = is_declare_incident_step_enabled(
+            organization=alert_group.channel.organization
+        )
+        if not grafana_declare_incident_enabled:
+            AlertGroupLogRecord(
+                type=AlertGroupLogRecord.TYPE_ESCALATION_FAILED,
+                alert_group=alert_group,
+                reason="Declare Incident step is not enabled",
+                escalation_policy=self.escalation_policy,
+                escalation_error_code=AlertGroupLogRecord.ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED,
+                escalation_policy_step=self.step,
+            ).save()
+            return
+        tasks = []
+        declare_incident_task = declare_incident.signature(
+            args=(alert_group.pk,),
+            kwargs={
+                "escalation_policy_pk": self.id,
+                "severity": self.severity,
+            },
+            immutable=True,
+        )
+        tasks.append(declare_incident_task)
+        self._execute_tasks(tasks)
+
    def _escalation_step_notify_if_time(self, alert_group: "AlertGroup", _reason: str) -> StepExecutionResultData:
        eta = None

--- a/engine/apps/alerts/migrations/0060_relatedincident.py
+++ b/engine/apps/alerts/migrations/0060_relatedincident.py
@ -0,0 +1,30 @@
+# Generated by Django 4.2.15 on 2024-10-04 16:38
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('user_management', '0022_alter_team_unique_together'),
+        ('alerts', '0059_escalationpolicy_severity_and_more'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='RelatedIncident',
+            fields=[
+                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('incident_id', models.CharField(db_index=True, max_length=50)),
+                ('created_at', models.DateTimeField(auto_now_add=True)),
+                ('is_active', models.BooleanField(default=True)),
+                ('attached_alert_groups', models.ManyToManyField(related_name='related_incidents', to='alerts.alertgroup')),
+                ('channel_filter', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='related_incidents', to='alerts.channelfilter')),
+                ('organization', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related_incidents', to='user_management.organization')),
+            ],
+            options={
+                'unique_together': {('organization', 'incident_id')},
+            },
+        ),
+    ]
--- a/engine/apps/alerts/models/init.py
+++ b/engine/apps/alerts/models/init.py
@ -13,6 +13,7 @@ from .escalation_policy import EscalationPolicy  # noqa: F401
 from .grafana_alerting_contact_point import GrafanaAlertingContactPoint  # noqa: F401
 from .invitation import Invitation  # noqa: F401
 from .maintainable_object import MaintainableObject  # noqa: F401
+from .related_incident import RelatedIncident  # noqa: F401
 from .resolution_note import ResolutionNote, ResolutionNoteSlackMessage  # noqa: F401
 from .user_has_notification import UserHasNotification  # noqa: F401
 from .user_notification_bundle import BundledNotification, UserNotificationBundle  # noqa: F401
--- a/engine/apps/alerts/models/alert_group.py
+++ b/engine/apps/alerts/models/alert_group.py
@ -44,6 +44,7 @@ if typing.TYPE_CHECKING:
        AlertGroupLogRecord,
        AlertReceiveChannel,
        BundledNotification,
+        RelatedIncident,
        ResolutionNote,
        ResolutionNoteSlackMessage,
    )
@ -193,6 +194,7 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
    acknowledged_by_user: typing.Optional["User"]
    alerts: "RelatedManager['Alert']"
    bundled_notifications: "RelatedManager['BundledNotification']"
+    related_incidents: "RelatedManager['RelatedIncident']"
    dependent_alert_groups: "RelatedManager['AlertGroup']"
    channel: "AlertReceiveChannel"
    log_records: "RelatedManager['AlertGroupLogRecord']"
--- a/engine/apps/alerts/models/alert_group_log_record.py
+++ b/engine/apps/alerts/models/alert_group_log_record.py
@ -11,18 +11,24 @@ from rest_framework.fields import DateTimeField

 from apps.alerts import tasks
 from apps.alerts.constants import ActionSource
+from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE
 from apps.alerts.utils import render_relative_timeline
 from apps.slack.slack_formatter import SlackFormatter
 from common.utils import clean_markup

 if typing.TYPE_CHECKING:
    from apps.alerts.models import AlertGroup, CustomButton, EscalationPolicy, Invitation
-    from apps.user_management.models import User
+    from apps.user_management.models import Organization, User

 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)


+class RelatedIncidentData(typing.TypedDict):
+    incident_link: typing.Optional[str]
+    incident_title: str
+
+
 class AlertGroupLogRecord(models.Model):
    alert_group: "AlertGroup"
    author: typing.Optional["User"]
@ -161,7 +167,9 @@ class AlertGroupLogRecord(models.Model):
        ERROR_ESCALATION_TRIGGER_CUSTOM_WEBHOOK_ERROR,
        ERROR_ESCALATION_NOTIFY_TEAM_MEMBERS_STEP_IS_NOT_CONFIGURED,
        ERROR_ESCALATION_TRIGGER_WEBHOOK_IS_DISABLED,
-    ) = range(20)
+        ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED,
+        ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED,
+    ) = range(22)

    type = models.IntegerField(choices=TYPE_CHOICES)

@ -225,7 +233,14 @@ class AlertGroupLogRecord(models.Model):
    escalation_policy_step = models.IntegerField(null=True, default=None)
    step_specific_info = JSONField(null=True, default=None)

-    STEP_SPECIFIC_INFO_KEYS = ["schedule_name", "custom_button_name", "usergroup_handle", "source_integration_name"]
+    STEP_SPECIFIC_INFO_KEYS = [
+        "schedule_name",
+        "custom_button_name",
+        "usergroup_handle",
+        "source_integration_name",
+        "incident_id",
+        "incident_title",
+    ]

    def _make_log_line_link(self, url, title, html=False, for_slack=False, substitute_with_tag=False):
        if html and url:
@ -244,6 +259,7 @@ class AlertGroupLogRecord(models.Model):
        author = self.author.short(organization) if self.author is not None else None
        escalation_chain = self.alert_group.channel_filter.escalation_chain if self.alert_group.channel_filter else None
        step_info = self.get_step_specific_info()
+        related_incident = self.render_incident_data_from_step_info(organization, step_info)
        escalation_chain_data = (
            {
                "pk": escalation_chain.public_primary_key,
@ -280,6 +296,7 @@ class AlertGroupLogRecord(models.Model):
            "type": self.type,
            "created_at": created_at,
            "author": author,
+            "incident": related_incident,
            "escalation_chain": escalation_chain_data,
            "schedule": schedule,
            "webhook": webhook,
@ -425,6 +442,14 @@ class AlertGroupLogRecord(models.Model):
                result += f'triggered step "Notify on-call from Schedule {schedule_text}{important_text}"'
            elif escalation_policy_step == EscalationPolicy.STEP_REPEAT_ESCALATION_N_TIMES:
                result += "escalation started from the beginning"
+            elif escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT:
+                organization = self.alert_group.channel.organization
+                incident_data = self.render_incident_data_from_step_info(organization, step_specific_info)
+                incident_link = incident_data["incident_link"]
+                incident_title = incident_data["incident_title"]
+                tag = "related_incident" if substitute_with_tag else False
+                incident_text = self._make_log_line_link(incident_link, incident_title, html, for_slack, tag)
+                result += self.reason + f": {incident_text}"
            else:
                result += f'triggered step "{EscalationPolicy.get_step_display_name(escalation_policy_step)}"'
        elif self.type == AlertGroupLogRecord.TYPE_SILENCE:
@ -640,8 +665,32 @@ class AlertGroupLogRecord(models.Model):
                    result += f"failed to notify User Group{usergroup_handle_text} in Slack"
            elif self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_TRIGGER_WEBHOOK_IS_DISABLED:
                result += 'skipped escalation step "Trigger Outgoing Webhook" because it is disabled'
+            elif (
+                self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED
+            ):
+                result += 'skipped escalation step "Declare Incident": step is not enabled'
+            elif self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED:
+                result += "failed to declare an Incident"
+                if self.reason:
+                    result += f": {self.reason}"
        return result

+    def render_incident_data_from_step_info(
+        self, organization: "Organization", step_specific_info: dict
+    ) -> RelatedIncidentData | None:
+        from apps.alerts.models.related_incident import get_incident_url
+
+        if not step_specific_info or not all(key in step_specific_info for key in ["incident_title", "incident_id"]):
+            return None
+
+        incident_link = (
+            get_incident_url(organization, step_specific_info["incident_id"])
+            if step_specific_info["incident_id"]
+            else None
+        )
+        incident_title = step_specific_info["incident_title"] or DEFAULT_BACKUP_TITLE
+        return {"incident_link": incident_link, "incident_title": incident_title}
+
    def get_step_specific_info(self):
        step_specific_info = None
        # in some cases step_specific_info was saved with using json.dumps
--- a/engine/apps/alerts/models/escalation_policy.py
+++ b/engine/apps/alerts/models/escalation_policy.py
@ -92,6 +92,7 @@ class EscalationPolicy(OrderedModel):
        STEP_NOTIFY_IF_TIME,
        STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW,
        STEP_REPEAT_ESCALATION_N_TIMES,
+        STEP_DECLARE_INCIDENT,
    ]
    # Steps can be stored in db while interacting with internal api
    # Includes important versions of default steps
@ -218,6 +219,7 @@ class EscalationPolicy(OrderedModel):
        STEP_NOTIFY_IF_TIME,
        STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW,
        STEP_REPEAT_ESCALATION_N_TIMES,
+        STEP_DECLARE_INCIDENT,
    ]

    PUBLIC_STEP_CHOICES_MAP = {
@ -239,6 +241,7 @@ class EscalationPolicy(OrderedModel):
        STEP_NOTIFY_IF_TIME: "notify_if_time_from_to",
        STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: "notify_if_num_alerts_in_window",
        STEP_REPEAT_ESCALATION_N_TIMES: "repeat_escalation",
+        STEP_DECLARE_INCIDENT: "declare_incident",
    }

    public_primary_key = models.CharField(
--- a/engine/apps/alerts/models/related_incident.py
+++ b/engine/apps/alerts/models/related_incident.py
@ -0,0 +1,48 @@
+import typing
+from urllib.parse import urljoin
+
+from django.db import models
+
+from common.constants.plugin_ids import PluginID
+
+if typing.TYPE_CHECKING:
+    from django.db.models.manager import RelatedManager
+
+    from apps.alerts.models import AlertGroup, ChannelFilter
+    from apps.user_management.models import Organization
+
+
+def get_incident_url(organization, incident_id) -> str:
+    return urljoin(organization.grafana_url, f"a/{PluginID.INCIDENT}/incidents/{incident_id}")
+
+
+class RelatedIncident(models.Model):
+    attached_alert_groups: "RelatedManager['AlertGroup']"
+    channel_filter: typing.Optional["ChannelFilter"]
+    organization: "Organization"
+
+    incident_id = models.CharField(db_index=True, max_length=50)
+    organization = models.ForeignKey(
+        "user_management.Organization",
+        on_delete=models.CASCADE,
+        related_name="related_incidents",
+    )
+    channel_filter = models.ForeignKey(
+        "alerts.ChannelFilter",
+        on_delete=models.SET_NULL,
+        null=True,
+        related_name="related_incidents",
+    )
+    created_at = models.DateTimeField(auto_now_add=True)
+    is_active = models.BooleanField(default=True)
+
+    attached_alert_groups = models.ManyToManyField(
+        "alerts.AlertGroup",
+        related_name="related_incidents",
+    )
+
+    class Meta:
+        unique_together = ("organization", "incident_id")
+
+    def get_incident_link(self) -> str:
+        return get_incident_url(self.organization, self.incident_id)
--- a/engine/apps/alerts/tasks/init.py
+++ b/engine/apps/alerts/tasks/init.py
@ -5,6 +5,7 @@ from .alert_group_web_title_cache import (  # noqa:F401
 )
 from .check_escalation_finished import check_escalation_finished_task  # noqa: F401
 from .custom_webhook_result import custom_webhook_result  # noqa: F401
+from .declare_incident import declare_incident  # noqa: F401
 from .delete_alert_group import delete_alert_group  # noqa: F401
 from .delete_alert_group import finish_delete_alert_group  # noqa: F401
 from .delete_alert_group import send_alert_group_signal_for_delete  # noqa: F401
--- a/engine/apps/alerts/tasks/declare_incident.py
+++ b/engine/apps/alerts/tasks/declare_incident.py
@ -0,0 +1,148 @@
+import logging
+
+from django.conf import settings
+
+from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE
+from common.custom_celery_tasks import shared_dedicated_queue_retry_task
+from common.incident_api.client import (
+    DEFAULT_INCIDENT_SEVERITY,
+    DEFAULT_INCIDENT_STATUS,
+    IncidentAPIClient,
+    IncidentAPIException,
+)
+
+logger = logging.getLogger(__name__)
+
+ATTACHMENT_CAPTION = "OnCall Alert Group"
+ERROR_SEVERITY_NOT_FOUND = "Severity.FindOne: not found"
+MAX_RETRIES = 1 if settings.DEBUG else 10
+MAX_ATTACHED_ALERT_GROUPS_PER_INCIDENT = 5
+
+
+def _attach_alert_group_to_incident(alert_group, incident_id, incident_title, escalation_policy, attached=False):
+    from apps.alerts.models import AlertGroupLogRecord, EscalationPolicy, RelatedIncident
+
+    declared_incident, _ = RelatedIncident.objects.get_or_create(
+        incident_id=incident_id,
+        organization=alert_group.channel.organization,
+        defaults={
+            "channel_filter": alert_group.channel_filter,
+        },
+    )
+    declared_incident.attached_alert_groups.add(alert_group)
+    reason = "attached to existing incident" if attached else "incident declared"
+    AlertGroupLogRecord.objects.create(
+        type=AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED,
+        reason=reason,
+        alert_group=alert_group,
+        step_specific_info={"incident_id": incident_id, "incident_title": incident_title},
+        escalation_policy=escalation_policy,
+        escalation_policy_step=EscalationPolicy.STEP_DECLARE_INCIDENT,
+    )
+
+
+def _create_error_log_record(alert_group, escalation_policy, reason=""):
+    from apps.alerts.models import AlertGroupLogRecord, EscalationPolicy
+
+    AlertGroupLogRecord.objects.create(
+        type=AlertGroupLogRecord.TYPE_ESCALATION_FAILED,
+        escalation_error_code=AlertGroupLogRecord.ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED,
+        reason=reason,
+        alert_group=alert_group,
+        escalation_policy=escalation_policy,
+        escalation_policy_step=EscalationPolicy.STEP_DECLARE_INCIDENT,
+    )
+
+
+@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), retry_backoff=True, max_retries=MAX_RETRIES)
+def declare_incident(alert_group_pk, escalation_policy_pk, severity=None):
+    from apps.alerts.models import AlertGroup, EscalationPolicy, RelatedIncident
+
+    alert_group = AlertGroup.objects.get(pk=alert_group_pk)
+    organization = alert_group.channel.organization
+    escalation_policy = None
+    if escalation_policy_pk:
+        escalation_policy = EscalationPolicy.objects.filter(pk=escalation_policy_pk).first()
+
+    if alert_group.channel_filter.is_default:
+        _create_error_log_record(
+            alert_group, escalation_policy, reason="Declare incident step is not enabled for default routes"
+        )
+        return
+
+    if declare_incident.request.retries == MAX_RETRIES:
+        _create_error_log_record(alert_group, escalation_policy)
+        return
+
+    incident_client = IncidentAPIClient(organization.grafana_url, organization.api_token)
+
+    # check for currently active related incident in the same route (channel_filter)
+    existing_incident = (
+        RelatedIncident.objects.filter(
+            organization=organization, channel_filter=alert_group.channel_filter, is_active=True
+        )
+        .order_by("-created_at")
+        .first()
+    )
+
+    if existing_incident:
+        incident_id = existing_incident.incident_id
+        try:
+            # get existing incident details
+            incident_data, _ = incident_client.get_incident(incident_id)
+        except IncidentAPIException as e:
+            logger.error(f"Error getting incident details: {e.msg}")
+            if e.status == 404:
+                # incident not found, mark as not opened
+                existing_incident.is_active = False
+                existing_incident.save(update_fields=["is_active"])
+            else:
+                # raise (and retry)
+                raise
+        else:
+            # incident exists, check if it is still active
+            if incident_data["status"] == DEFAULT_INCIDENT_STATUS:
+                # attach to incident context
+                incident_title = incident_data["title"]
+                num_attached = existing_incident.attached_alert_groups.count()
+                if num_attached < MAX_ATTACHED_ALERT_GROUPS_PER_INCIDENT:
+                    try:
+                        incident_data, _ = incident_client.add_activity(incident_id, alert_group.web_link)
+                    except IncidentAPIException as e:
+                        logger.error(f"Error attaching to existing incident: {e.msg}")
+                # setup association between alert group and incident (even if not attached)
+                _attach_alert_group_to_incident(
+                    alert_group, incident_id, incident_title, escalation_policy, attached=True
+                )
+            else:
+                existing_incident.is_active = False
+                existing_incident.save(update_fields=["is_active"])
+
+    if existing_incident is None or not existing_incident.is_active:
+        # create new incident
+        if severity == EscalationPolicy.SEVERITY_SET_FROM_LABEL:
+            severity_label = alert_group.labels.filter(key_name="severity").first()
+            severity = severity_label.value_name if severity_label else None
+        severity = severity or DEFAULT_INCIDENT_SEVERITY
+        try:
+            incident_data, _ = incident_client.create_incident(
+                alert_group.web_title_cache if alert_group.web_title_cache else DEFAULT_BACKUP_TITLE,
+                severity=severity,
+                attachCaption=ATTACHMENT_CAPTION,
+                attachURL=alert_group.web_link,
+            )
+        except IncidentAPIException as e:
+            logger.error(f"Error creating new incident: {e.msg}")
+            if ERROR_SEVERITY_NOT_FOUND.lower() in e.msg.lower() and severity != DEFAULT_INCIDENT_SEVERITY:
+                # invalid severity, retry with default severity
+                declare_incident.apply_async(
+                    args=(alert_group_pk, escalation_policy_pk),
+                    kwargs={"severity": DEFAULT_INCIDENT_SEVERITY},
+                )
+                return
+            # else raise (and retry)
+            raise
+        else:
+            _attach_alert_group_to_incident(
+                alert_group, incident_data["incidentID"], incident_data["title"], escalation_policy
+            )
--- a/engine/apps/alerts/tests/factories.py
+++ b/engine/apps/alerts/tests/factories.py
@ -11,6 +11,7 @@ from apps.alerts.models import (
    EscalationChain,
    EscalationPolicy,
    Invitation,
+    RelatedIncident,
    ResolutionNote,
    ResolutionNoteSlackMessage,
    UserNotificationBundle,
@ -91,3 +92,8 @@ class InvitationFactory(factory.DjangoModelFactory):
 class UserNotificationBundleFactory(factory.DjangoModelFactory):
    class Meta:
        model = UserNotificationBundle
+
+
+class RelatedIncidentFactory(factory.DjangoModelFactory):
+    class Meta:
+        model = RelatedIncident
--- a/engine/apps/alerts/tests/test_escalation_policy_snapshot.py
+++ b/engine/apps/alerts/tests/test_escalation_policy_snapshot.py
@ -690,3 +690,52 @@ def test_notify_team_members(
        (user_2.pk, alert_group.pk), expected_kwargs, immutable=True
    )
    assert mock_execute.signature.call_count == 2
+
+
+@pytest.mark.django_db
+def test_escalation_step_declare_incident(
+    escalation_step_test_setup,
+    make_escalation_policy,
+):
+    organization, _, _, channel_filter, alert_group, reason = escalation_step_test_setup
+
+    declare_incident_step = make_escalation_policy(
+        escalation_chain=channel_filter.escalation_chain,
+        escalation_policy_step=EscalationPolicy.STEP_DECLARE_INCIDENT,
+    )
+    escalation_policy_snapshot = get_escalation_policy_snapshot_from_model(declare_incident_step)
+    expected_eta = timezone.now() + timezone.timedelta(seconds=NEXT_ESCALATION_DELAY)
+    with patch.object(EscalationPolicySnapshot, "_execute_tasks") as mocked_execute_tasks:
+        with patch(
+            "apps.alerts.escalation_snapshot.snapshot_classes.escalation_policy_snapshot.is_declare_incident_step_enabled",
+            return_value=True,
+        ):
+            result = escalation_policy_snapshot.execute(alert_group, reason)
+            expected_result = EscalationPolicySnapshot.StepExecutionResultData(
+                eta=result.eta,
+                stop_escalation=False,
+                pause_escalation=False,
+                start_from_beginning=False,
+            )
+            assert (
+                expected_eta + timezone.timedelta(seconds=15)
+                > result.eta
+                > expected_eta - timezone.timedelta(seconds=15)
+            )
+            assert result == expected_result
+            assert not alert_group.log_records.exists()
+            mocked_execute_tasks.assert_called_once()
+    with patch.object(EscalationPolicySnapshot, "_execute_tasks") as mocked_execute_tasks:
+        with patch(
+            "apps.alerts.escalation_snapshot.snapshot_classes.escalation_policy_snapshot.is_declare_incident_step_enabled",
+            return_value=False,
+        ):
+            escalation_policy_snapshot.execute(alert_group, reason)
+            mocked_execute_tasks.assert_not_called()
+            assert alert_group.log_records.exists()
+            log_record = alert_group.log_records.get()
+            assert log_record.type == AlertGroupLogRecord.TYPE_ESCALATION_FAILED
+            assert (
+                log_record.escalation_error_code
+                == AlertGroupLogRecord.ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED
+            )
--- a/engine/apps/alerts/tests/test_related_incident.py
+++ b/engine/apps/alerts/tests/test_related_incident.py
@ -0,0 +1,332 @@
+from unittest.mock import patch
+
+import httpretty
+import pytest
+
+from apps.alerts.models import AlertGroupLogRecord, EscalationPolicy, RelatedIncident
+from apps.alerts.tasks.declare_incident import (
+    ATTACHMENT_CAPTION,
+    DEFAULT_BACKUP_TITLE,
+    DEFAULT_INCIDENT_SEVERITY,
+    ERROR_SEVERITY_NOT_FOUND,
+    MAX_ATTACHED_ALERT_GROUPS_PER_INCIDENT,
+    declare_incident,
+)
+from common.incident_api.client import IncidentAPIException
+
+
+@pytest.fixture
+def setup_alert_group_and_escalation_step(
+    make_organization,
+    make_alert_receive_channel,
+    make_alert_group,
+    make_channel_filter,
+    make_escalation_chain,
+    make_escalation_policy,
+):
+    def _setup_alert_group_and_escalation_step(is_default_route=False, already_declared_incident=False):
+        organization = make_organization(grafana_url="https://stack.grafana.net", api_token="token")
+        alert_receive_channel = make_alert_receive_channel(organization=organization)
+        escalation_chain = make_escalation_chain(organization)
+        declare_incident_step = make_escalation_policy(
+            escalation_chain=escalation_chain,
+            escalation_policy_step=EscalationPolicy.STEP_DECLARE_INCIDENT,
+        )
+        channel_filter = make_channel_filter(
+            alert_receive_channel,
+            escalation_chain=escalation_chain,
+            is_default=is_default_route,
+        )
+        alert_group = make_alert_group(
+            alert_receive_channel=alert_receive_channel,
+            channel_filter=channel_filter,
+        )
+        declared_incident = None
+        if already_declared_incident:
+            declared_incident = RelatedIncident.objects.create(
+                incident_id="123",
+                organization=organization,
+                channel_filter=channel_filter,
+            )
+
+        return alert_group, declare_incident_step, declared_incident
+
+    return _setup_alert_group_and_escalation_step
+
+
+@pytest.mark.django_db
+def test_declare_incident_default_route(setup_alert_group_and_escalation_step):
+    alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(is_default_route=True)
+
+    declare_incident(alert_group.pk, declare_incident_step.pk)
+
+    alert_group.refresh_from_db()
+    # check triggered log
+    log_record = alert_group.log_records.last()
+    assert log_record.type == log_record.TYPE_ESCALATION_FAILED
+    assert log_record.escalation_policy == declare_incident_step
+    assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
+    assert log_record.step_specific_info is None
+    assert log_record.reason == "Declare incident step is not enabled for default routes"
+    assert log_record.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED
+
+
+@pytest.mark.django_db
+@httpretty.activate(verbose=True, allow_net_connect=False)
+def test_declare_incident_ok(setup_alert_group_and_escalation_step):
+    alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
+
+    with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
+        mock_create_incident.return_value = {"incidentID": "123", "title": "Incident"}, None
+        declare_incident(alert_group.pk, declare_incident_step.pk)
+
+    mock_create_incident.assert_called_with(
+        DEFAULT_BACKUP_TITLE,
+        severity=DEFAULT_INCIDENT_SEVERITY,
+        attachCaption=ATTACHMENT_CAPTION,
+        attachURL=alert_group.web_link,
+    )
+
+    alert_group.refresh_from_db()
+
+    # check declared incident
+    new_incident = alert_group.related_incidents.get()
+    assert new_incident.incident_id == "123"
+    assert new_incident.organization == alert_group.channel.organization
+    assert new_incident.channel_filter == alert_group.channel_filter
+    # check triggered log
+    log_record = alert_group.log_records.last()
+    assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
+    assert log_record.escalation_policy == declare_incident_step
+    assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
+    assert log_record.step_specific_info == {"incident_id": "123", "incident_title": "Incident"}
+    assert log_record.reason == "incident declared"
+    assert log_record.escalation_error_code is None
+
+
+@pytest.mark.django_db
+@httpretty.activate(verbose=True, allow_net_connect=False)
+def test_declare_incident_set_severity(setup_alert_group_and_escalation_step):
+    alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
+    severity = "critical"
+
+    with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
+        mock_create_incident.return_value = {"incidentID": "123", "title": "Incident"}, None
+        declare_incident(alert_group.pk, declare_incident_step.pk, severity=severity)
+
+    mock_create_incident.assert_called_with(
+        DEFAULT_BACKUP_TITLE, severity=severity, attachCaption=ATTACHMENT_CAPTION, attachURL=alert_group.web_link
+    )
+
+
+@pytest.mark.django_db
+@httpretty.activate(verbose=True, allow_net_connect=False)
+def test_declare_incident_set_severity_from_label(setup_alert_group_and_escalation_step):
+    alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
+    expected_severity = "minor"
+    # set alert group label
+    alert_group.labels.create(
+        organization=alert_group.channel.organization, key_name="severity", value_name=expected_severity
+    )
+    severity = EscalationPolicy.SEVERITY_SET_FROM_LABEL
+
+    with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
+        mock_create_incident.return_value = {"incidentID": "123", "title": "Incident"}, None
+        declare_incident(alert_group.pk, declare_incident_step.pk, severity=severity)
+
+    mock_create_incident.assert_called_with(
+        DEFAULT_BACKUP_TITLE,
+        severity=expected_severity,
+        attachCaption=ATTACHMENT_CAPTION,
+        attachURL=alert_group.web_link,
+    )
+
+
+@pytest.mark.django_db
+@httpretty.activate(verbose=True, allow_net_connect=False)
+def test_declare_incident_invalid_severity_fallback(setup_alert_group_and_escalation_step):
+    alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
+    severity = "INVALID"
+
+    with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
+        with patch.object(declare_incident, "apply_async") as mock_declare_incident_apply_async:
+            mock_create_incident.side_effect = IncidentAPIException(
+                status=500, url="some-url", msg=ERROR_SEVERITY_NOT_FOUND
+            )
+            declare_incident(alert_group.pk, declare_incident_step.pk, severity=severity)
+
+    # create call failing with invalid severity
+    mock_create_incident.assert_called_with(
+        DEFAULT_BACKUP_TITLE, severity=severity, attachCaption=ATTACHMENT_CAPTION, attachURL=alert_group.web_link
+    )
+    # new task is queued with default severity instead
+    mock_declare_incident_apply_async.assert_called_with(
+        args=(alert_group.pk, declare_incident_step.pk), kwargs={"severity": DEFAULT_INCIDENT_SEVERITY}
+    )
+
+
+@pytest.mark.django_db
+@httpretty.activate(verbose=True, allow_net_connect=False)
+def test_declare_incident_attach_alert_group(setup_alert_group_and_escalation_step):
+    alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
+        already_declared_incident=True
+    )
+    incident_id = existing_open_incident.incident_id
+
+    with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
+        with patch("common.incident_api.client.IncidentAPIClient.add_activity") as mock_add_activity:
+            mock_get_incident.return_value = {"incidentID": incident_id, "title": "Incident", "status": "active"}, None
+            mock_add_activity.return_value = {"activityItemID": "111"}, None
+            declare_incident(alert_group.pk, declare_incident_step.pk)
+
+    # check declared incident
+    assert existing_open_incident.attached_alert_groups.filter(id=alert_group.id).exists()
+    log_record = alert_group.log_records.last()
+    assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
+    assert log_record.escalation_policy == declare_incident_step
+    assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
+    assert log_record.step_specific_info == {"incident_id": incident_id, "incident_title": "Incident"}
+    assert log_record.reason == "attached to existing incident"
+    assert log_record.escalation_error_code is None
+
+
+@pytest.mark.django_db
+@httpretty.activate(verbose=True, allow_net_connect=False)
+def test_declare_incident_resolved_update(setup_alert_group_and_escalation_step):
+    alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
+        already_declared_incident=True
+    )
+    incident_id = existing_open_incident.incident_id
+    new_incident_id = "333"
+    assert new_incident_id != incident_id
+
+    with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
+        with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
+            mock_get_incident.return_value = {
+                "incidentID": incident_id,
+                "title": "Incident1",
+                "status": "resolved",
+            }, None
+            mock_create_incident.return_value = {"incidentID": new_incident_id, "title": "Incident2"}, None
+            declare_incident(alert_group.pk, declare_incident_step.pk)
+
+    existing_open_incident.refresh_from_db()
+
+    assert existing_open_incident.is_active is False
+    # check declared incident
+    assert not existing_open_incident.attached_alert_groups.filter(id=alert_group.id).exists()
+    assert alert_group.related_incidents.get().incident_id == new_incident_id
+    log_record = alert_group.log_records.last()
+    assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
+    assert log_record.escalation_policy == declare_incident_step
+    assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
+    assert log_record.step_specific_info == {"incident_id": new_incident_id, "incident_title": "Incident2"}
+    assert log_record.reason == "incident declared"
+    assert log_record.escalation_error_code is None
+
+
+@pytest.mark.django_db
+@httpretty.activate(verbose=True, allow_net_connect=False)
+def test_declare_incident_attach_alert_group_skip_incident_update(
+    setup_alert_group_and_escalation_step, make_alert_group
+):
+    alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
+        already_declared_incident=True
+    )
+    alert_receive_channel = alert_group.channel
+    channel_filter = alert_group.channel_filter
+    incident_id = existing_open_incident.incident_id
+
+    # attach max alert groups to incident
+    for _ in range(MAX_ATTACHED_ALERT_GROUPS_PER_INCIDENT):
+        ag = make_alert_group(alert_receive_channel=alert_receive_channel, channel_filter=channel_filter)
+        existing_open_incident.attached_alert_groups.add(ag)
+
+    with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
+        with patch("common.incident_api.client.IncidentAPIClient.add_activity") as mock_add_activity:
+            mock_get_incident.return_value = {"incidentID": incident_id, "title": "Incident", "status": "active"}, None
+            declare_incident(alert_group.pk, declare_incident_step.pk)
+
+    assert not mock_add_activity.called
+
+    # check declared incident
+    assert existing_open_incident.attached_alert_groups.filter(id=alert_group.id).exists()
+    log_record = alert_group.log_records.last()
+    assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
+    assert log_record.escalation_policy == declare_incident_step
+    assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
+    assert log_record.step_specific_info == {"incident_id": incident_id, "incident_title": "Incident"}
+    assert log_record.reason == "attached to existing incident"
+    assert log_record.escalation_error_code is None
+
+
+@pytest.mark.django_db
+@httpretty.activate(verbose=True, allow_net_connect=False)
+def test_get_existing_incident_error(setup_alert_group_and_escalation_step):
+    alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
+        already_declared_incident=True
+    )
+
+    with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
+        mock_get_incident.side_effect = IncidentAPIException(status=500, url="some-url")
+        with pytest.raises(IncidentAPIException):
+            declare_incident(alert_group.pk, declare_incident_step.pk)
+
+    # but if incident was not found, a new one should be created
+    incident_id = existing_open_incident.incident_id
+    new_incident_id = "333"
+    assert new_incident_id != incident_id
+
+    with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
+        with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
+            mock_get_incident.side_effect = IncidentAPIException(status=404, url="some-url")
+            mock_create_incident.return_value = {"incidentID": new_incident_id, "title": "Incident"}, None
+            declare_incident(alert_group.pk, declare_incident_step.pk)
+
+    alert_group.refresh_from_db()
+
+    # check declared incident
+    assert not existing_open_incident.attached_alert_groups.filter(id=alert_group.id).exists()
+    new_incident = alert_group.related_incidents.get()
+    assert new_incident != existing_open_incident
+    assert new_incident.incident_id == new_incident_id
+    assert new_incident.organization == alert_group.channel.organization
+    assert new_incident.channel_filter == alert_group.channel_filter
+
+
+@pytest.mark.django_db
+@httpretty.activate(verbose=True, allow_net_connect=False)
+def test_attach_alert_group_error(setup_alert_group_and_escalation_step):
+    alert_group, declare_incident_step, existing_open_incident = setup_alert_group_and_escalation_step(
+        already_declared_incident=True
+    )
+    incident_id = existing_open_incident.incident_id
+
+    with patch("common.incident_api.client.IncidentAPIClient.get_incident") as mock_get_incident:
+        with patch("common.incident_api.client.IncidentAPIClient.add_activity") as mock_add_activity:
+            mock_get_incident.return_value = {"incidentID": incident_id, "title": "Incident", "status": "active"}, None
+            mock_add_activity.side_effect = IncidentAPIException(status=500, url="some-url")
+            declare_incident(alert_group.pk, declare_incident_step.pk)
+
+    alert_group.refresh_from_db()
+
+    # incident attachment failed, but DB is still updated
+    assert existing_open_incident.attached_alert_groups.filter(id=alert_group.id).exists()
+    log_record = alert_group.log_records.last()
+    assert log_record.type == log_record.TYPE_ESCALATION_TRIGGERED
+    assert log_record.escalation_policy == declare_incident_step
+    assert log_record.escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT
+    assert log_record.step_specific_info == {"incident_id": incident_id, "incident_title": "Incident"}
+    assert log_record.reason == "attached to existing incident"
+    assert log_record.escalation_error_code is None
+
+
+@pytest.mark.django_db
+@httpretty.activate(verbose=True, allow_net_connect=False)
+def test_create_incident_error(setup_alert_group_and_escalation_step):
+    alert_group, declare_incident_step, _ = setup_alert_group_and_escalation_step(already_declared_incident=False)
+
+    with patch("common.incident_api.client.IncidentAPIClient.create_incident") as mock_create_incident:
+        mock_create_incident.side_effect = IncidentAPIException(status=500, url="some-url")
+        with pytest.raises(IncidentAPIException):
+            declare_incident(alert_group.pk, declare_incident_step.pk)
--- a/engine/apps/alerts/utils.py
+++ b/engine/apps/alerts/utils.py
@ -1,3 +1,11 @@
+import typing
+
+from django.conf import settings
+
+if typing.TYPE_CHECKING:
+    from apps.user_management.models import Organization
+
+
 def render_relative_timeline(log_created_at, alert_group_started_at):
    time_delta = log_created_at - alert_group_started_at
    seconds = int(time_delta.total_seconds())
@ -12,3 +20,7 @@ def render_relative_timeline(log_created_at, alert_group_started_at):
        return "%dm%ds" % (minutes, seconds)
    else:
        return "%ds" % (seconds,)
+
+
+def is_declare_incident_step_enabled(organization: "Organization") -> bool:
+    return organization.is_grafana_incident_enabled and settings.FEATURE_DECLARE_INCIDENT_STEP_ENABLED
--- a/engine/apps/api/serializers/escalation_policy.py
+++ b/engine/apps/api/serializers/escalation_policy.py
@ -3,6 +3,7 @@ from datetime import timedelta
 from rest_framework import serializers

 from apps.alerts.models import EscalationChain, EscalationPolicy
+from apps.alerts.utils import is_declare_incident_step_enabled
 from apps.schedules.models import OnCallSchedule
 from apps.slack.models import SlackUserGroup
 from apps.user_management.models import Team, User
@ -24,6 +25,7 @@ TO_TIME = "to_time"
 NUM_ALERTS_IN_WINDOW = "num_alerts_in_window"
 NUM_MINUTES_IN_WINDOW = "num_minutes_in_window"
 CUSTOM_WEBHOOK_TRIGGER = "custom_webhook"
+SEVERITY = "severity"

 STEP_TYPE_TO_RELATED_FIELD_MAP = {
    EscalationPolicy.STEP_WAIT: [WAIT_DELAY],
@ -35,6 +37,7 @@ STEP_TYPE_TO_RELATED_FIELD_MAP = {
    EscalationPolicy.STEP_NOTIFY_IF_TIME: [FROM_TIME, TO_TIME],
    EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: [NUM_ALERTS_IN_WINDOW, NUM_MINUTES_IN_WINDOW],
    EscalationPolicy.STEP_TRIGGER_CUSTOM_WEBHOOK: [CUSTOM_WEBHOOK_TRIGGER],
+    EscalationPolicy.STEP_DECLARE_INCIDENT: [SEVERITY],
 }


@ -81,6 +84,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, serializers.ModelSerializer)
        allow_null=True,
        filter_field="organization",
    )
+    severity = serializers.CharField(required=False, allow_null=True)

    class Meta:
        model = EscalationPolicy
@ -99,6 +103,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, serializers.ModelSerializer)
            "notify_schedule",
            "notify_to_group",
            "notify_to_team_members",
+            "severity",
            "important",
        ]

@ -123,6 +128,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, serializers.ModelSerializer)
            NUM_ALERTS_IN_WINDOW,
            NUM_MINUTES_IN_WINDOW,
            CUSTOM_WEBHOOK_TRIGGER,
+            SEVERITY,
        ]

        step = data.get("step")
@ -151,6 +157,8 @@ class EscalationPolicySerializer(EagerLoadingMixin, serializers.ModelSerializer)
            raise serializers.ValidationError("Invalid step value")
        if step_type in EscalationPolicy.SLACK_INTEGRATION_REQUIRED_STEPS and organization.slack_team_identity is None:
            raise serializers.ValidationError("Invalid escalation step type: step is Slack-specific")
+        if step_type == EscalationPolicy.STEP_DECLARE_INCIDENT and not is_declare_incident_step_enabled(organization):
+            raise serializers.ValidationError("Invalid escalation step type: step is not enabled")
        return step_type

    def to_representation(self, instance):
@ -214,6 +222,7 @@ class EscalationPolicyUpdateSerializer(EscalationPolicySerializer):
            NUM_ALERTS_IN_WINDOW,
            NUM_MINUTES_IN_WINDOW,
            CUSTOM_WEBHOOK_TRIGGER,
+            SEVERITY,
        ]

        for f in STEP_TYPE_TO_RELATED_FIELD_MAP.get(step, []):
--- a/engine/apps/api/tests/test_alert_group.py
+++ b/engine/apps/api/tests/test_alert_group.py
@ -975,6 +975,37 @@ def test_get_filter_labels(
    assert response.json()["results"][0]["pk"] == alert_groups[0].public_primary_key


+@pytest.mark.django_db
+def test_get_filter_by_related_incident(
+    alert_group_internal_api_setup, make_related_incident, make_alert_group, make_user_auth_headers
+):
+    user, token, alert_groups = alert_group_internal_api_setup
+
+    alert_group = alert_groups[0]
+    related_incident = make_related_incident("1", alert_group.channel.organization, alert_group.channel_filter)
+    related_incident.attached_alert_groups.add(alert_group)
+
+    client = APIClient()
+    url = reverse("api-internal:alertgroup-list")
+    response = client.get(
+        url + "?has_related_incident=true",
+        format="json",
+        **make_user_auth_headers(user, token),
+    )
+
+    assert response.status_code == status.HTTP_200_OK
+    assert len(response.data["results"]) == 1
+
+    response = client.get(
+        url + "?has_related_incident=false",
+        format="json",
+        **make_user_auth_headers(user, token),
+    )
+
+    assert response.status_code == status.HTTP_200_OK
+    assert len(response.data["results"]) == 3
+
+
@pytest.mark.django_db
 def test_get_title_search(
    settings,
--- a/engine/apps/api/tests/test_escalation_policy.py
+++ b/engine/apps/api/tests/test_escalation_policy.py
@ -10,6 +10,7 @@ from rest_framework.test import APIClient

 from apps.alerts.models import EscalationPolicy
 from apps.api.permissions import LegacyAccessControlRole
+from common.incident_api.client import DEFAULT_INCIDENT_SEVERITY, IncidentAPIException


@pytest.fixture()
@ -651,8 +652,13 @@ def test_create_escalation_policy_with_no_important_version(
    make_escalation_chain,
    step,
    make_user_auth_headers,
+    settings,
 ):
    organization, user, _, _ = make_organization_and_user_with_slack_identities()
+    # make sure declare incident step is enabled
+    settings.FEATURE_DECLARE_INCIDENT_STEP_ENABLED = True
+    organization.is_grafana_incident_enabled = True
+    organization.save()
    _, token = make_token_for_organization(organization)
    escalation_chain = make_escalation_chain(organization)

@ -832,6 +838,7 @@ def test_escalation_policy_switch_importance(
        "notify_schedule": None,
        "notify_to_group": None,
        "notify_to_team_members": None,
+        "severity": None,
        "important": True,
        "wait_delay": None,
    }
@ -889,6 +896,7 @@ def test_escalation_policy_filter_by_user(
            "notify_schedule": None,
            "notify_to_group": None,
            "notify_to_team_members": None,
+            "severity": None,
            "important": False,
        },
        {
@ -906,6 +914,7 @@ def test_escalation_policy_filter_by_user(
            "notify_schedule": None,
            "notify_to_group": None,
            "notify_to_team_members": None,
+            "severity": None,
            "important": False,
        },
    ]
@ -971,6 +980,7 @@ def test_escalation_policy_filter_by_slack_channel(
            "notify_schedule": None,
            "notify_to_group": None,
            "notify_to_team_members": None,
+            "severity": None,
            "important": False,
        },
    ]
@ -1001,3 +1011,88 @@ def test_escalation_policy_escalation_options_webhooks(
    returned_options = [option["value"] for option in response.json()]

    assert EscalationPolicy.STEP_TRIGGER_CUSTOM_WEBHOOK in returned_options
+
+
+@pytest.mark.django_db
+def test_escalation_policy_severity_options(
+    make_organization_and_user_with_plugin_token,
+    make_user_auth_headers,
+):
+    organization, user, token = make_organization_and_user_with_plugin_token()
+    organization.is_grafana_labels_enabled = False
+    organization.save()
+
+    client = APIClient()
+    url = reverse("api-internal:escalation_policy-severity-options")
+
+    # without labels enabled
+    available_severities = [
+        {"severityID": "abc", "orgID": "1", "displayLabel": "Pending", "level": -1},
+        {"severityID": "def", "orgID": "1", "displayLabel": "Critical", "level": 1},
+    ]
+    with patch("common.incident_api.client.IncidentAPIClient.get_severities") as mock_get_severities:
+        mock_get_severities.return_value = available_severities, None
+        response = client.get(url, format="json", **make_user_auth_headers(user, token))
+
+    expected_options = [{"value": s["displayLabel"], "display_name": s["displayLabel"]} for s in available_severities]
+    assert response.json() == expected_options
+
+    # failing request does not break; fallback to default option only
+    with patch("common.incident_api.client.IncidentAPIClient.get_severities") as mock_get_severities:
+        mock_get_severities.side_effect = IncidentAPIException(status=404, url="some-url")
+        response = client.get(url, format="json", **make_user_auth_headers(user, token))
+
+    fallback_options = [{"value": DEFAULT_INCIDENT_SEVERITY, "display_name": DEFAULT_INCIDENT_SEVERITY}]
+    assert response.json() == fallback_options
+
+    # labels enabled
+    organization.is_grafana_labels_enabled = True
+    organization.save()
+
+    with patch("common.incident_api.client.IncidentAPIClient.get_severities") as mock_get_severities:
+        mock_get_severities.return_value = available_severities, None
+        response = client.get(url, format="json", **make_user_auth_headers(user, token))
+    # include set from label option
+    expected_options = [
+        {
+            "value": EscalationPolicy.SEVERITY_SET_FROM_LABEL,
+            "display_name": EscalationPolicy.SEVERITY_SET_FROM_LABEL_DISPLAY_VALUE,
+        }
+    ] + expected_options
+    assert response.json() == expected_options
+
+
+@pytest.mark.django_db
+def test_create_escalation_policy_declare_incident(
+    escalation_policy_internal_api_setup, make_user_auth_headers, settings
+):
+    token, escalation_chain, _, user, _ = escalation_policy_internal_api_setup
+    organization = escalation_chain.organization
+    client = APIClient()
+    url = reverse("api-internal:escalation_policy-list")
+
+    data = {
+        "step": EscalationPolicy.STEP_DECLARE_INCIDENT,
+        "severity": "critical",
+        "escalation_chain": escalation_chain.public_primary_key,
+    }
+
+    response = client.post(url, data, format="json", **make_user_auth_headers(user, token))
+    assert response.status_code == status.HTTP_400_BAD_REQUEST
+
+    # make sure declare incident step is enabled
+    settings.FEATURE_DECLARE_INCIDENT_STEP_ENABLED = True
+    organization.is_grafana_incident_enabled = True
+    organization.save()
+
+    response = client.post(url, data, format="json", **make_user_auth_headers(user, token))
+    assert response.status_code == status.HTTP_201_CREATED
+    escalation_policy = EscalationPolicy.objects.get(public_primary_key=response.data["id"])
+    assert escalation_policy.step == EscalationPolicy.STEP_DECLARE_INCIDENT
+    assert escalation_policy.severity == "critical"
+
+    url = reverse("api-internal:escalation_policy-detail", kwargs={"pk": escalation_policy.public_primary_key})
+    response = client.get(url, format="json", **make_user_auth_headers(user, token))
+    response_data = response.json()
+    assert response_data["step"] == EscalationPolicy.STEP_DECLARE_INCIDENT
+    assert response_data["severity"] == "critical"
--- a/engine/apps/api/views/alert_group.py
+++ b/engine/apps/api/views/alert_group.py
@ -17,6 +17,7 @@ from apps.alerts.constants import ActionSource
 from apps.alerts.models import AlertGroup, AlertReceiveChannel, EscalationChain, ResolutionNote
 from apps.alerts.paging import unpage_user
 from apps.alerts.tasks import delete_alert_group, send_update_resolution_note_signal
+from apps.alerts.utils import is_declare_incident_step_enabled
 from apps.api.errors import AlertGroupAPIError
 from apps.api.label_filtering import parse_label_query
 from apps.api.permissions import RBACPermission
@ -120,6 +121,7 @@ class AlertGroupFilter(DateRangeFilterMixin, ModelFieldFilterMixin, filters.Filt
    )
    with_resolution_note = filters.BooleanFilter(method="filter_with_resolution_note")
    mine = filters.BooleanFilter(method="filter_mine")
+    has_related_incident = filters.BooleanFilter(field_name="related_incidents", lookup_expr="isnull", exclude=True)

    def filter_status(self, queryset, name, value):
        if not value:
@ -719,6 +721,7 @@ class AlertGroupView(
        """
        Retrieve a list of valid filter options that can be used to filter alert groups
        """
+        organization = self.request.auth.organization
        api_root = "/api/internal/v1/"
        default_day_range = 30

@ -804,7 +807,7 @@ class AlertGroupView(

            filter_options = [{"name": "search", "type": "search", "description": description}] + filter_options

-        if is_labels_feature_enabled(self.request.auth.organization):
+        if is_labels_feature_enabled(organization):
            filter_options.append(
                {
                    "name": "label",
@ -813,6 +816,15 @@ class AlertGroupView(
                }
            )

+        if is_declare_incident_step_enabled(organization):
+            filter_options.append(
+                {
+                    "name": "has_related_incident",
+                    "type": "boolean",
+                    "default": "true",
+                }
+            )
+
        return Response(filter_options)

    @extend_schema(
--- a/engine/apps/api/views/escalation_policy.py
+++ b/engine/apps/api/views/escalation_policy.py
@ -1,3 +1,5 @@
+import logging
+
 from django.conf import settings
 from django.db.models import Q
 from rest_framework.decorators import action
@ -5,6 +7,7 @@ from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response

 from apps.alerts.models import EscalationPolicy
+from apps.alerts.utils import is_declare_incident_step_enabled
 from apps.api.permissions import RBACPermission
 from apps.api.serializers.escalation_policy import (
    EscalationPolicyCreateSerializer,
@ -19,9 +22,12 @@ from common.api_helpers.mixins import (
    TeamFilteringMixin,
    UpdateSerializerMixin,
 )
+from common.incident_api.client import DEFAULT_INCIDENT_SEVERITY, IncidentAPIClient, IncidentAPIException
 from common.insight_log import EntityEvent, write_resource_insight_log
 from common.ordered_model.viewset import OrderedModelViewSet

+logger = logging.getLogger(__name__)
+

 class EscalationPolicyView(
    TeamFilteringMixin,
@ -42,6 +48,7 @@ class EscalationPolicyView(
        "escalation_options": [RBACPermission.Permissions.ESCALATION_CHAINS_READ],
        "delay_options": [RBACPermission.Permissions.ESCALATION_CHAINS_READ],
        "num_minutes_in_window_options": [RBACPermission.Permissions.ESCALATION_CHAINS_READ],
+        "severity_options": [RBACPermission.Permissions.ESCALATION_CHAINS_READ],
        "create": [RBACPermission.Permissions.ESCALATION_CHAINS_WRITE],
        "update": [RBACPermission.Permissions.ESCALATION_CHAINS_WRITE],
        "partial_update": [RBACPermission.Permissions.ESCALATION_CHAINS_WRITE],
@ -116,6 +123,7 @@ class EscalationPolicyView(

    @action(detail=False, methods=["get"])
    def escalation_options(self, request):
+        grafana_declare_incident_enabled = is_declare_incident_step_enabled(organization=self.request.auth.organization)
        choices = []
        for step in EscalationPolicy.INTERNAL_API_STEPS:
            verbal = EscalationPolicy.INTERNAL_API_STEPS_TO_VERBAL_MAP[step]
@ -126,7 +134,7 @@ class EscalationPolicyView(
            if slack_integration_required and not settings.FEATURE_SLACK_INTEGRATION_ENABLED:
                continue

-            if step == EscalationPolicy.STEP_DECLARE_INCIDENT:
+            if step == EscalationPolicy.STEP_DECLARE_INCIDENT and not grafana_declare_incident_enabled:
                continue

            choices.append(
@ -155,3 +163,25 @@ class EscalationPolicyView(
            {"value": choice[0], "display_name": choice[1]} for choice in EscalationPolicy.WEB_DURATION_CHOICES_MINUTES
        ]
        return Response(choices)
+
+    @action(detail=False, methods=["get"])
+    def severity_options(self, request):
+        organization = self.request.auth.organization
+        choices = []
+        if organization.is_grafana_labels_enabled:
+            choices = [
+                {
+                    "value": EscalationPolicy.SEVERITY_SET_FROM_LABEL,
+                    "display_name": EscalationPolicy.SEVERITY_SET_FROM_LABEL_DISPLAY_VALUE,
+                }
+            ]
+        incident_client = IncidentAPIClient(organization.grafana_url, organization.api_token)
+        try:
+            severities, _ = incident_client.get_severities()
+            choices += [
+                {"value": severity["displayLabel"], "display_name": severity["displayLabel"]} for severity in severities
+            ]
+        except IncidentAPIException as e:
+            logger.error(f"Error getting severities: {e.msg}")
+            choices += [{"value": DEFAULT_INCIDENT_SEVERITY, "display_name": DEFAULT_INCIDENT_SEVERITY}]
+        return Response(choices)
--- a/engine/apps/public_api/serializers/escalation_policies.py
+++ b/engine/apps/public_api/serializers/escalation_policies.py
@ -5,6 +5,7 @@ from django.utils.functional import cached_property
 from rest_framework import fields, serializers

 from apps.alerts.models import EscalationChain, EscalationPolicy
+from apps.alerts.utils import is_declare_incident_step_enabled
 from apps.schedules.models import OnCallSchedule
 from apps.slack.models import SlackUserGroup
 from apps.user_management.models import Team, User
@ -72,6 +73,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
        required=False,
        source="custom_webhook",
    )
+    severity = serializers.CharField(required=False)
    important = serializers.BooleanField(required=False)

    TIME_FORMAT = "%H:%M:%SZ"
@ -101,6 +103,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
            "notify_if_time_to",
            "num_alerts_in_window",
            "num_minutes_in_window",
+            "severity",
        ]

    PREFETCH_RELATED = ["notify_to_users_queue"]
@ -120,6 +123,9 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
        if step_type == EscalationPolicy.STEP_FINAL_NOTIFYALL and organization.slack_team_identity is None:
            raise BadRequest(detail="Invalid escalation step type: step is Slack-specific")

+        if step_type == EscalationPolicy.STEP_DECLARE_INCIDENT and not is_declare_incident_step_enabled(organization):
+            raise BadRequest("Invalid escalation step type: step is not enabled")
+
        return step_type

    def create(self, validated_data):
@ -163,6 +169,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
            "notify_if_time_to",
            "num_alerts_in_window",
            "num_minutes_in_window",
+            "severity",
        ]
        if step == EscalationPolicy.STEP_WAIT:
            fields_to_remove.remove("duration")
@ -190,6 +197,8 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
        elif step == EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW:
            fields_to_remove.remove("num_alerts_in_window")
            fields_to_remove.remove("num_minutes_in_window")
+        elif step == EscalationPolicy.STEP_DECLARE_INCIDENT:
+            fields_to_remove.remove("severity")

        if (
            step in EscalationPolicy.DEFAULT_TO_IMPORTANT_STEP_MAPPING
@ -213,6 +222,7 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
            "to_time",
            "num_alerts_in_window",
            "num_minutes_in_window",
+            "severity",
        ]
        step = validated_data.get("step")
        important = validated_data.pop("important", None)
@ -243,6 +253,8 @@ class EscalationPolicySerializer(EagerLoadingMixin, OrderedModelSerializer):
        elif step == EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW:
            validated_data_fields_to_remove.remove("num_alerts_in_window")
            validated_data_fields_to_remove.remove("num_minutes_in_window")
+        elif step == EscalationPolicy.STEP_DECLARE_INCIDENT:
+            validated_data_fields_to_remove.remove("severity")

        for field in validated_data_fields_to_remove:
            validated_data.pop(field, None)
@ -299,5 +311,7 @@ class EscalationPolicyUpdateSerializer(EscalationPolicySerializer):
                if step != EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW:
                    instance.num_alerts_in_window = None
                    instance.num_minutes_in_window = None
+                if step != EscalationPolicy.STEP_DECLARE_INCIDENT:
+                    instance.severity = None

        return super().update(instance, validated_data)
--- a/engine/apps/public_api/tests/test_escalation_policies.py
+++ b/engine/apps/public_api/tests/test_escalation_policies.py
@ -463,3 +463,43 @@ def test_update_escalation_policy_using_notify_team_members(
    escalation_policy = EscalationPolicy.objects.get(public_primary_key=response.data["id"])
    serializer = EscalationPolicySerializer(escalation_policy)
    assert response.data == serializer.data
+
+
+@pytest.mark.django_db
+def test_create_escalation_policy_declare_incident(
+    make_organization_and_user_with_token,
+    escalation_policies_setup,
+    settings,
+):
+    organization, user, token = make_organization_and_user_with_token()
+    escalation_chain, _, _ = escalation_policies_setup(organization, user)
+
+    data_for_create = {
+        "escalation_chain_id": escalation_chain.public_primary_key,
+        "type": "declare_incident",
+        "position": 0,
+        "severity": "critical",
+    }
+
+    client = APIClient()
+    url = reverse("api-public:escalation_policies-list")
+    response = client.post(url, data=data_for_create, format="json", HTTP_AUTHORIZATION=token)
+    assert response.status_code == status.HTTP_400_BAD_REQUEST
+
+    # make sure declare incident step is enabled
+    settings.FEATURE_DECLARE_INCIDENT_STEP_ENABLED = True
+    organization.is_grafana_incident_enabled = True
+    organization.save()
+
+    response = client.post(url, data=data_for_create, format="json", HTTP_AUTHORIZATION=token)
+    assert response.status_code == status.HTTP_201_CREATED
+
+    escalation_policy = EscalationPolicy.objects.get(public_primary_key=response.data["id"])
+    assert escalation_policy.step == EscalationPolicy.STEP_DECLARE_INCIDENT
+    assert escalation_policy.severity == "critical"
+
+    url = reverse("api-public:escalation_policies-detail", kwargs={"pk": escalation_policy.public_primary_key})
+    response = client.get(url, format="json", HTTP_AUTHORIZATION=token)
+    response_data = response.json()
+    assert response_data["type"] == EscalationPolicy.PUBLIC_STEP_CHOICES_MAP[EscalationPolicy.STEP_DECLARE_INCIDENT]
+    assert response_data["severity"] == "critical"
--- a/engine/conftest.py
+++ b/engine/conftest.py
@ -35,6 +35,7 @@ from apps.alerts.tests.factories import (
    EscalationChainFactory,
    EscalationPolicyFactory,
    InvitationFactory,
+    RelatedIncidentFactory,
    ResolutionNoteFactory,
    ResolutionNoteSlackMessageFactory,
    UserNotificationBundleFactory,
@ -1112,3 +1113,11 @@ def make_user_notification_bundle():
        )

    return _make_user_notification_bundle
+
+
+@pytest.fixture
+def make_related_incident():
+    def _make_related_incident(incident_id, organization, channel_filter):
+        return RelatedIncidentFactory(incident_id=incident_id, organization=organization, channel_filter=channel_filter)
+
+    return _make_related_incident
--- a/engine/settings/base.py
+++ b/engine/settings/base.py
@ -75,6 +75,7 @@ FEATURE_LABELS_ENABLED_PER_ORG = getenv_list("FEATURE_LABELS_ENABLED_PER_ORG", d
 FEATURE_ALERT_GROUP_SEARCH_ENABLED = getenv_boolean("FEATURE_ALERT_GROUP_SEARCH_ENABLED", default=True)
 FEATURE_ALERT_GROUP_SEARCH_CUTOFF_DAYS = getenv_integer("FEATURE_ALERT_GROUP_SEARCH_CUTOFF_DAYS", default=None)
 FEATURE_NOTIFICATION_BUNDLE_ENABLED = getenv_boolean("FEATURE_NOTIFICATION_BUNDLE_ENABLED", default=True)
+FEATURE_DECLARE_INCIDENT_STEP_ENABLED = getenv_boolean("FEATURE_DECLARE_INCIDENT_STEP_ENABLED", default=False)

 TWILIO_API_KEY_SID = os.environ.get("TWILIO_API_KEY_SID")
 TWILIO_API_KEY_SECRET = os.environ.get("TWILIO_API_KEY_SECRET")
--- a/engine/settings/celery_task_routes.py
+++ b/engine/settings/celery_task_routes.py
@ -94,6 +94,7 @@ CELERY_TASK_ROUTES = {
    # CRITICAL
    "apps.alerts.tasks.acknowledge_reminder.acknowledge_reminder_task": {"queue": "critical"},
    "apps.alerts.tasks.acknowledge_reminder.unacknowledge_timeout_task": {"queue": "critical"},
+    "apps.alerts.tasks.declare_incident.declare_incident": {"queue": "critical"},
    "apps.alerts.tasks.distribute_alert.send_alert_create_signal": {"queue": "critical"},
    "apps.alerts.tasks.escalate_alert_group.escalate_alert_group": {"queue": "critical"},
    "apps.alerts.tasks.invite_user_to_join_incident.invite_user_to_join_incident": {"queue": "critical"},