feat(sf): producer side of mid-execution escalation (PDD)

Closes the producer half of ADR-011 P2. With this commit a task agent
can call buildEscalationArtifact + writeEscalationArtifact and the
escalation goes end-to-end: artifact persisted to disk, DB flag set,
state derivation picks it up, dispatch returns 'stop'.

PDD spec for this change:

Purpose: let a task agent file an escalation when it hits a decision
  the user must make (overwrite vs fail, model A vs model B, etc.)
  rather than continue past undocumented ambiguity.
Consumer: future sf_task_escalate tool, and direct callers of
  escalation.ts (e.g., resolve-time DB tools).
Contract:
  1. buildEscalationArtifact validates options (2-4 entries, unique
     ids, recommendation must reference a real option id) and throws
     a descriptive Error before any IO. Verified via smoke test:
     unknown recommendation id → "is not one of the option ids: …"
  2. writeEscalationArtifact atomically writes the JSON to
     .sf/milestones/{M}/slices/{S}/tasks/{T}-ESCALATION.json,
     auto-creating the tasks/ subdirectory.
  3. continueWithDefault=false → setTaskEscalationPending → loop
     pauses on next dispatch (verified end-to-end).
  4. continueWithDefault=true → setTaskEscalationAwaitingReview →
     loop continues; artifact recorded for human review later
     (verified — detectPendingEscalation returns null for awaiting).
  5. clearTaskEscalationFlags zeros both pending+awaiting but
     preserves escalation_artifact_path so the audit trail survives.
  6. Emits a UOK audit event 'escalation-manual-attention-created'
     with traceId 'escalation:{M}:{S}:{T}' for cross-system trace.
Failure boundary:
  - Validation throws BEFORE any DB or FS write — partial state
    impossible.
  - resolveSlicePath returns null when the slice doesn't exist;
    writeEscalationArtifact throws with a clear /sf doctor hint.
  - atomicWriteSync is the same temp+rename pattern used by every
    other SF artifact write.
Evidence:
  - typecheck clean
  - smoke test exercises all 7 contract conditions end-to-end
    (build, write, pending detection, awaiting-review skip,
    clear, validation rejection, audit trail traceId)
Non-goals:
  - sf_task_escalate MCP tool registration (separate fire — small,
    just exposing buildEscalationArtifact+writeEscalationArtifact
    via the tool surface).
  - resolveEscalation (apply user's choice → clear flags → carry
    forward as override) — bigger; later fire.
  - listActionableEscalations / listAllEscalations helpers — for
    /sf escalate list, later fire.
  - /sf escalate user command itself.
Invariants:
  - Safety: builder validates BEFORE writer commits anything. The
    two phases never partially succeed.
  - Liveness: the two flags are mutually exclusive (set helpers
    flip both atomically in one UPDATE) — no state where both 1.
DB schema gains escalation_awaiting_review column (v24 migration).
The two helpers setTaskEscalationPending and
setTaskEscalationAwaitingReview write the mutually-exclusive flag
pair in one UPDATE so a reader can never observe both = 1.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-02 20:16:15 +02:00
parent a558ff6c64
commit 14efcd7734
2 changed files with 227 additions and 6 deletions

View file

@ -1,15 +1,143 @@
// SF Extension — ADR-011 Phase 2 Mid-Execution Escalation (gsd-2 ADR)
//
// Currently scoped to detection only. The wider gsd-2 module (build/write/
// resolve/list) requires several DB helpers SF doesn't yet have; those land
// in subsequent fires. This file covers what state derivation + dispatch
// need today: reading an artifact and detecting whether any task in a slice
// is paused waiting for a user response.
// Owns: artifact I/O (read/build/write), detection, and the producer-side
// flag flips. Resolution + listing land when the user-facing /sf escalate
// command lands.
import { existsSync, readFileSync } from "node:fs";
import { existsSync, mkdirSync, readFileSync } from "node:fs";
import { dirname, join } from "node:path";
import { atomicWriteSync } from "./atomic-write.js";
import { resolveSlicePath } from "./paths.js";
import type { TaskRow } from "./sf-db.js";
import {
setTaskEscalationAwaitingReview,
setTaskEscalationPending,
} from "./sf-db.js";
import type { EscalationArtifact, EscalationOption } from "./types.js";
import { buildAuditEnvelope, emitUokAuditEvent } from "./uok/audit.js";
// ─── Paths ────────────────────────────────────────────────────────────────
/** Canonical escalation artifact path, parallel to T##-SUMMARY.md:
* .sf/milestones/{M}/slices/{S}/tasks/{T}-ESCALATION.json */
export function escalationArtifactPath(
basePath: string,
milestoneId: string,
sliceId: string,
taskId: string,
): string | null {
const sliceDir = resolveSlicePath(basePath, milestoneId, sliceId);
if (!sliceDir) return null;
return join(sliceDir, "tasks", `${taskId}-ESCALATION.json`);
}
// ─── Artifact builder ──────────────────────────────────────────────────────
/** Build an EscalationArtifact from a task agent's escalation payload.
* Server-side validation matches readEscalationArtifact's schema checks so a
* hand-edited artifact cannot be weaker than what the writer would emit. */
export function buildEscalationArtifact(params: {
taskId: string;
sliceId: string;
milestoneId: string;
question: string;
options: EscalationOption[];
recommendation: string;
recommendationRationale: string;
continueWithDefault: boolean;
}): EscalationArtifact {
if (
!Array.isArray(params.options) ||
params.options.length < 2 ||
params.options.length > 4
) {
throw new Error(
`escalation.options must have between 2 and 4 entries (got ${params.options?.length ?? 0})`,
);
}
const optionIds = new Set(params.options.map((o) => o.id));
if (optionIds.size !== params.options.length) {
throw new Error("escalation.options must have unique ids");
}
if (!optionIds.has(params.recommendation)) {
throw new Error(
`escalation.recommendation "${params.recommendation}" is not one of the option ids: ${[...optionIds].join(", ")}`,
);
}
return {
version: 1,
taskId: params.taskId,
sliceId: params.sliceId,
milestoneId: params.milestoneId,
question: params.question,
options: params.options,
recommendation: params.recommendation,
recommendationRationale: params.recommendationRationale,
continueWithDefault: params.continueWithDefault,
createdAt: new Date().toISOString(),
};
}
// ─── Artifact writer ───────────────────────────────────────────────────────
/** Atomically write an escalation artifact and flip the appropriate DB flag.
* When continueWithDefault=true, marks awaiting_review (no pause); otherwise
* marks pending (loop pauses next dispatch). Emits a UOK audit event for
* trace continuity. Returns the path that was written. */
export function writeEscalationArtifact(
basePath: string,
artifact: EscalationArtifact,
): string {
const path = escalationArtifactPath(
basePath,
artifact.milestoneId,
artifact.sliceId,
artifact.taskId,
);
if (!path) {
throw new Error(
`escalation: cannot resolve tasks dir for ${artifact.milestoneId}/${artifact.sliceId} — run /sf doctor`,
);
}
mkdirSync(dirname(path), { recursive: true });
atomicWriteSync(path, JSON.stringify(artifact, null, 2));
if (artifact.continueWithDefault) {
setTaskEscalationAwaitingReview(
artifact.milestoneId,
artifact.sliceId,
artifact.taskId,
path,
);
} else {
setTaskEscalationPending(
artifact.milestoneId,
artifact.sliceId,
artifact.taskId,
path,
);
}
emitUokAuditEvent(
basePath,
buildAuditEnvelope({
traceId: `escalation:${artifact.milestoneId}:${artifact.sliceId}:${artifact.taskId}`,
category: "gate",
type: "escalation-manual-attention-created",
payload: {
milestoneId: artifact.milestoneId,
sliceId: artifact.sliceId,
taskId: artifact.taskId,
continueWithDefault: artifact.continueWithDefault,
optionCount: artifact.options.length,
recommendation: artifact.recommendation,
},
}),
);
return path;
}
/** Read an escalation artifact by path. Returns null when missing or malformed.
*

View file

@ -368,6 +368,7 @@ function initSchema(db: DbAdapter, fileBacked: boolean): void {
verification_status TEXT NOT NULL DEFAULT '',
sequence INTEGER DEFAULT 0, -- Ordering hint: tools may set this to control execution order
escalation_pending INTEGER NOT NULL DEFAULT 0, -- ADR-011 P2 (gsd-2): pause-on-escalation flag
escalation_awaiting_review INTEGER NOT NULL DEFAULT 0, -- ADR-011 P2 (gsd-2): continueWithDefault=true marker (no pause)
escalation_artifact_path TEXT DEFAULT NULL, -- ADR-011 P2 (gsd-2): path to T##-ESCALATION.json
PRIMARY KEY (milestone_id, slice_id, id),
FOREIGN KEY (milestone_id, slice_id) REFERENCES slices(milestone_id, id)
@ -1465,6 +1466,25 @@ function migrateSchema(db: DbAdapter): void {
});
}
if (currentVersion < 24) {
// ADR-011 P2 (gsd-2 ADR): the third escalation flag for the
// continueWithDefault=true case — an artifact is recorded for human
// review later, but the loop is NOT paused. Mutually exclusive with
// escalation_pending (the writer flips one or the other).
ensureColumn(
db,
"tasks",
"escalation_awaiting_review",
`ALTER TABLE tasks ADD COLUMN escalation_awaiting_review INTEGER NOT NULL DEFAULT 0`,
);
db.prepare(
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
).run({
":version": 24,
":applied_at": new Date().toISOString(),
});
}
db.exec("COMMIT");
} catch (err) {
db.exec("ROLLBACK");
@ -2330,6 +2350,75 @@ export function updateTaskStatus(
});
}
/** ADR-011 P2: set pause-on-escalation state on a task. The two flags are
* mutually exclusive pending=1 forces awaiting_review=0. */
export function setTaskEscalationPending(
milestoneId: string,
sliceId: string,
taskId: string,
artifactPath: string,
): void {
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
currentDb
.prepare(
`UPDATE tasks
SET escalation_pending = 1,
escalation_awaiting_review = 0,
escalation_artifact_path = :path
WHERE milestone_id = :mid AND slice_id = :sid AND id = :tid`,
)
.run({
":path": artifactPath,
":mid": milestoneId,
":sid": sliceId,
":tid": taskId,
});
}
/** ADR-011 P2: continueWithDefault=true marker artifact exists but no pause.
* Mutually exclusive with escalation_pending. */
export function setTaskEscalationAwaitingReview(
milestoneId: string,
sliceId: string,
taskId: string,
artifactPath: string,
): void {
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
currentDb
.prepare(
`UPDATE tasks
SET escalation_awaiting_review = 1,
escalation_pending = 0,
escalation_artifact_path = :path
WHERE milestone_id = :mid AND slice_id = :sid AND id = :tid`,
)
.run({
":path": artifactPath,
":mid": milestoneId,
":sid": sliceId,
":tid": taskId,
});
}
/** ADR-011 P2: clear both escalation flags (called when an escalation is
* resolved or its artifact is removed). Leaves escalation_artifact_path so
* the resolution audit trail survives. */
export function clearTaskEscalationFlags(
milestoneId: string,
sliceId: string,
taskId: string,
): void {
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
currentDb
.prepare(
`UPDATE tasks
SET escalation_pending = 0,
escalation_awaiting_review = 0
WHERE milestone_id = :mid AND slice_id = :sid AND id = :tid`,
)
.run({ ":mid": milestoneId, ":sid": sliceId, ":tid": taskId });
}
export function setTaskBlockerDiscovered(
milestoneId: string,
sliceId: string,
@ -2550,6 +2639,8 @@ export interface TaskRow {
verification_status?: string;
/** ADR-011 P2: 1 = task is paused waiting for the user to resolve an escalation. */
escalation_pending?: number;
/** ADR-011 P2: 1 = continueWithDefault=true marker — artifact recorded but loop not paused. */
escalation_awaiting_review?: number;
/** ADR-011 P2: relative path to the T##-ESCALATION.json artifact next to T##-PLAN.md. */
escalation_artifact_path?: string | null;
}
@ -2644,6 +2735,8 @@ function rowToTask(row: Record<string, unknown>): TaskRow {
sequence: (row["sequence"] as number) ?? 0,
verification_status: (row["verification_status"] as string) ?? "",
escalation_pending: (row["escalation_pending"] as number) ?? 0,
escalation_awaiting_review:
(row["escalation_awaiting_review"] as number) ?? 0,
escalation_artifact_path:
(row["escalation_artifact_path"] as string | null) ?? null,
};