fix: make auto recovery evidence-driven

This commit is contained in:
Mikael Hugo 2026-04-29 19:45:43 +02:00
parent 2ed1638153
commit aa70e1db56
5 changed files with 304 additions and 24 deletions

View file

@ -54,9 +54,11 @@ export function mapStatusToExitCode(status: string): number {
/**
* Detect genuine auto-mode termination notifications.
*
* Only matches the actual stop signals emitted by stopAuto():
* Only matches the actual stop/pause signals emitted by stopAuto()/pauseAuto():
* "Auto-mode stopped..."
* "Step-mode stopped..."
* "Auto-mode paused..."
* "Step-mode paused..."
*
* Does NOT match progress notifications that happen to contain words like
* "complete" or "stopped" (e.g., "Override resolved — rewrite-docs completed",
@ -64,7 +66,12 @@ export function mapStatusToExitCode(status: string): number {
*
* Blocked detection is separate checked via isBlockedNotification.
*/
export const TERMINAL_PREFIXES = ["auto-mode stopped", "step-mode stopped"];
export const TERMINAL_PREFIXES = [
"auto-mode stopped",
"step-mode stopped",
"auto-mode paused",
"step-mode paused",
];
/**
* Idle timeout for short, single-shot commands (status, queue, history, etc.).
@ -115,7 +122,11 @@ export function isBlockedNotification(event: Record<string, unknown>): boolean {
return false;
const message = String(event.message ?? "").toLowerCase();
// Blocked notifications come through stopAuto as "Auto-mode stopped (Blocked: ...)"
return message.includes("blocked:");
return (
message.includes("blocked:") ||
message.startsWith("auto-mode paused") ||
message.startsWith("step-mode paused")
);
}
export function isMilestoneReadyNotification(

View file

@ -4,7 +4,8 @@
* and blocker placeholder generation.
*/
import { existsSync } from "node:fs";
import { existsSync, readFileSync } from "node:fs";
import { relative } from "node:path";
import type {
ExtensionAPI,
ExtensionContext,
@ -13,8 +14,18 @@ import { resolveAgentEnd } from "./auto-loop.js";
import {
diagnoseExpectedArtifact,
resolveExpectedArtifactPath,
verifyExpectedArtifact,
writeBlockerPlaceholder,
} from "./auto-recovery.js";
import { parseRoadmap as parseLegacyRoadmap } from "./parsers-legacy.js";
import {
relMilestoneFile,
relSliceFile,
resolveMilestoneFile,
resolveSliceFile,
} from "./paths.js";
import { getSlice, isDbAvailable } from "./sf-db.js";
import { parseUnitId } from "./unit-id.js";
import {
formatExecuteTaskRecoveryStatus,
inspectExecuteTaskDurability,
@ -29,6 +40,92 @@ export interface RecoveryContext {
unitRecoveryCount: Map<string, number>;
}
export interface UnitRecoveryStatus {
expected: string;
verified: boolean;
missing: string[];
}
function relToBase(basePath: string, path: string): string {
const rel = relative(basePath, path);
return rel && !rel.startsWith("..") ? rel : path;
}
function formatSliceDbStatus(status: string | undefined): string {
return status ? `DB slice status is "${status}" (expected complete)` : "";
}
export function inspectUnitRecoveryStatus(
unitType: string,
unitId: string,
basePath: string,
): UnitRecoveryStatus {
const expected =
diagnoseExpectedArtifact(unitType, unitId, basePath) ??
"required durable artifact";
const missing: string[] = [];
const artifactPath = resolveExpectedArtifactPath(unitType, unitId, basePath);
if (!artifactPath) {
missing.push(`artifact path could not be resolved for ${expected}`);
} else if (!existsSync(artifactPath)) {
missing.push(`artifact missing (${relToBase(basePath, artifactPath)})`);
}
if (unitType === "complete-slice") {
const { milestone: mid, slice: sid } = parseUnitId(unitId);
if (mid && sid) {
const uatPath = resolveSliceFile(basePath, mid, sid, "UAT");
if (!uatPath || !existsSync(uatPath)) {
missing.push(
`UAT missing (${relSliceFile(basePath, mid, sid, "UAT")})`,
);
}
const dbSlice = getSlice(mid, sid);
if (dbSlice) {
const dbStatus = formatSliceDbStatus(dbSlice.status);
if (dbStatus && dbSlice.status !== "complete") missing.push(dbStatus);
} else if (!isDbAvailable()) {
const roadmapPath = resolveMilestoneFile(basePath, mid, "ROADMAP");
if (!roadmapPath || !existsSync(roadmapPath)) {
missing.push(
`roadmap missing (${relMilestoneFile(basePath, mid, "ROADMAP")})`,
);
} else {
try {
const roadmap = parseLegacyRoadmap(
readFileSync(roadmapPath, "utf-8"),
);
const slice = roadmap.slices.find((s) => s.id === sid);
if (slice && !slice.done) {
missing.push(
`roadmap checkbox not marked [x] (${relMilestoneFile(basePath, mid, "ROADMAP")})`,
);
}
} catch {
missing.push(
`roadmap could not be parsed (${relMilestoneFile(basePath, mid, "ROADMAP")})`,
);
}
}
}
}
}
const verified = verifyExpectedArtifact(unitType, unitId, basePath);
if (!verified && missing.length === 0) {
missing.push(`artifact verification failed for ${expected}`);
}
return { expected, verified, missing };
}
export function formatUnitRecoveryStatus(status: UnitRecoveryStatus): string {
return status.missing.length > 0
? status.missing.join("; ")
: "all durable artifacts present";
}
export async function recoverTimedOutUnit(
ctx: ExtensionContext,
pi: ExtensionAPI,
@ -169,21 +266,19 @@ export async function recoverTimedOutUnit(
return "paused";
}
const expected =
diagnoseExpectedArtifact(unitType, unitId, basePath) ??
"required durable artifact";
const status = inspectUnitRecoveryStatus(unitType, unitId, basePath);
const diagnostic = formatUnitRecoveryStatus(status);
// Check if the artifact already exists on disk — agent may have written it
// without signaling completion.
const artifactPath = resolveExpectedArtifactPath(unitType, unitId, basePath);
if (artifactPath && existsSync(artifactPath)) {
// Check full unit durability — the primary artifact alone is not enough for
// units such as complete-slice, which also require UAT and state transition.
if (status.verified) {
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, {
phase: "finalized",
recoveryAttempts: recoveryAttempts + 1,
lastRecoveryReason: reason,
});
ctx.ui.notify(
`${reason === "idle" ? "Idle" : "Timeout"} recovery: ${unitType} ${unitId} artifact already exists on disk. Advancing. (attempt ${attemptNumber})`,
`${reason === "idle" ? "Idle" : "Timeout"} recovery: ${unitType} ${unitId} durable state already verified. Advancing. (attempt ${attemptNumber})`,
"info",
);
unitRecoveryCount.delete(recoveryKey);
@ -208,19 +303,21 @@ export async function recoverTimedOutUnit(
`**FINAL ${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — last chance before skip.**`,
`You are still executing ${unitType} ${unitId}.`,
`Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts} — next failure skips this unit.`,
`Expected durable output: ${expected}.`,
"You MUST write the artifact file NOW, even if incomplete.",
"Write whatever you have — partial research, preliminary findings, best-effort analysis.",
"A partial artifact is infinitely better than no artifact.",
`Expected durable output: ${status.expected}.`,
`Current durability status: ${diagnostic}.`,
"Repair only the missing durability items listed above.",
"You MUST write the missing artifact/state NOW, even if incomplete.",
"Write whatever you have — partial research, preliminary findings, best-effort analysis — and record the remaining blocker explicitly.",
"If you are truly blocked, write the file with a BLOCKER section explaining why.",
]
: [
`**${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — stay in auto-mode.**`,
`You are still executing ${unitType} ${unitId}.`,
`Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts}.`,
`Expected durable output: ${expected}.`,
"Stop broad exploration.",
"Write the required artifact now.",
`Expected durable output: ${status.expected}.`,
`Current durability status: ${diagnostic}.`,
"Stop broad exploration and do not redo work that is already durable.",
"Repair only the missing durability items listed above.",
"If blocked, write the partial artifact and explicitly record the blocker instead of going silent.",
];
@ -233,7 +330,7 @@ export async function recoverTimedOutUnit(
{ triggerTurn: true, deliverAs: "steer" },
);
ctx.ui.notify(
`${reason === "idle" ? "Idle" : "Timeout"} recovery: steering ${unitType} ${unitId} to produce ${expected} (attempt ${attemptNumber}, session ${recoveryAttempts + 1}/${maxRecoveryAttempts}).`,
`${reason === "idle" ? "Idle" : "Timeout"} recovery: steering ${unitType} ${unitId} to repair ${diagnostic} (attempt ${attemptNumber}, session ${recoveryAttempts + 1}/${maxRecoveryAttempts}).`,
"warning",
);
return "recovered";
@ -250,7 +347,7 @@ export async function recoverTimedOutUnit(
lastRecoveryReason: reason,
});
ctx.ui.notify(
`Milestone ${unitId} ${reason}-recovery exhausted ${maxRecoveryAttempts} attempt(s) — worktree branch preserved. Re-run /sf auto once blockers are resolved.`,
`Milestone ${unitId} ${reason}-recovery exhausted ${maxRecoveryAttempts} attempt(s): ${diagnostic}. Worktree branch preserved. Re-run /sf auto once blockers are resolved.`,
"error",
);
return "paused";
@ -262,7 +359,7 @@ export async function recoverTimedOutUnit(
unitType,
unitId,
basePath,
`${reason} recovery exhausted ${maxRecoveryAttempts} attempts without producing the artifact.`,
`${reason} recovery exhausted ${maxRecoveryAttempts} attempts. Status: ${diagnostic}.`,
);
if (placeholder) {

View file

@ -0,0 +1,117 @@
import assert from "node:assert/strict";
import {
mkdirSync,
mkdtempSync,
readFileSync,
rmSync,
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { test } from "node:test";
import {
formatUnitRecoveryStatus,
inspectUnitRecoveryStatus,
type RecoveryContext,
recoverTimedOutUnit,
} from "../auto-timeout-recovery.ts";
import { closeDatabase, isDbAvailable } from "../sf-db.ts";
const ROADMAP_INCOMPLETE = `# M001: Test Milestone
## Slices
- [ ] **S01: Test Slice** \`risk:low\`
> After this: something works
`;
function createCompleteSliceFixture(): string {
const base = mkdtempSync(join(tmpdir(), "sf-timeout-recovery-test-"));
const sliceDir = join(base, ".sf", "milestones", "M001", "slices", "S01");
mkdirSync(sliceDir, { recursive: true });
writeFileSync(
join(base, ".sf", "milestones", "M001", "M001-ROADMAP.md"),
ROADMAP_INCOMPLETE,
"utf-8",
);
writeFileSync(join(sliceDir, "S01-SUMMARY.md"), "# Summary\n", "utf-8");
return base;
}
function makeRecoveryContext(base: string): RecoveryContext {
return {
basePath: base,
verbose: false,
currentUnitStartedAt: Date.now(),
unitRecoveryCount: new Map(),
};
}
test("inspectUnitRecoveryStatus reports complete-slice gaps even when SUMMARY exists", () => {
if (isDbAvailable()) closeDatabase();
const base = createCompleteSliceFixture();
try {
const status = inspectUnitRecoveryStatus(
"complete-slice",
"M001/S01",
base,
);
const diagnostic = formatUnitRecoveryStatus(status);
assert.equal(status.verified, false);
assert.ok(
!diagnostic.includes("S01-SUMMARY.md"),
"existing SUMMARY must not be reported as missing",
);
assert.match(diagnostic, /UAT missing/);
assert.match(diagnostic, /roadmap checkbox not marked \[x\]/);
} finally {
rmSync(base, { recursive: true, force: true });
}
});
test("recoverTimedOutUnit steers complete-slice with exact missing durability items", async () => {
if (isDbAvailable()) closeDatabase();
const base = createCompleteSliceFixture();
const sent: string[] = [];
const notifications: string[] = [];
try {
const result = await recoverTimedOutUnit(
{
ui: {
notify: (message: string) => notifications.push(message),
},
} as any,
{
sendMessage: (message: { content: string }) => {
sent.push(message.content);
},
} as any,
"complete-slice",
"M001/S01",
"idle",
makeRecoveryContext(base),
);
assert.equal(result, "recovered");
assert.equal(sent.length, 1, "should steer instead of finalizing");
assert.match(sent[0], /Current durability status:/);
assert.match(sent[0], /UAT missing/);
assert.match(sent[0], /roadmap checkbox not marked \[x\]/);
assert.ok(
notifications.some((n) => n.includes("to repair")),
"notification should explain the exact repair target",
);
assert.ok(
notifications.every((n) => !n.includes("durable state already verified")),
"summary-only state must not be treated as recovered",
);
const runtimeRecord = readFileSync(
join(base, ".sf", "runtime", "units", "complete-slice-M001-S01.json"),
"utf-8",
);
assert.match(runtimeRecord, /"phase": "recovered"/);
} finally {
rmSync(base, { recursive: true, force: true });
}
});

View file

@ -15,7 +15,12 @@ import test from "node:test";
// ─── Extracted detection logic (mirrors headless.ts) ────────────────────────
const TERMINAL_PREFIXES = ["auto-mode stopped", "step-mode stopped"];
const TERMINAL_PREFIXES = [
"auto-mode stopped",
"step-mode stopped",
"auto-mode paused",
"step-mode paused",
];
function isTerminalNotification(event: Record<string, unknown>): boolean {
if (event.type !== "extension_ui_request" || event.method !== "notify")
@ -28,7 +33,11 @@ function isBlockedNotification(event: Record<string, unknown>): boolean {
if (event.type !== "extension_ui_request" || event.method !== "notify")
return false;
const message = String(event.message ?? "").toLowerCase();
return message.includes("blocked:");
return (
message.includes("blocked:") ||
message.startsWith("auto-mode paused") ||
message.startsWith("step-mode paused")
);
}
function makeNotify(message: string): Record<string, unknown> {
@ -71,6 +80,16 @@ test("detects 'Step-mode stopped.' as terminal", () => {
assert.ok(isTerminalNotification(makeNotify("Step-mode stopped.")));
});
test("detects provider-error auto pause as terminal", () => {
assert.ok(
isTerminalNotification(
makeNotify(
"Auto-mode paused due to provider error: Schema overload: consecutive tool validation failures exceeded cap",
),
),
);
});
// ─── False positives that previously triggered early exit (#879) ────────────
test("does NOT match 'All slices are complete — nothing to discuss.'", () => {
@ -150,6 +169,17 @@ test("detects inline 'Blocked:' message", () => {
);
});
test("detects auto pause as blocked for headless callers", () => {
assert.ok(
isBlockedNotification(
makeNotify(
"Auto-mode paused due to provider error: Schema overload: consecutive tool validation failures exceeded cap",
),
),
);
assert.ok(isBlockedNotification(makeNotify("Auto-mode paused (Escape).")));
});
test("does NOT match 'blocked' without colon (avoids false positives)", () => {
assert.ok(
!isBlockedNotification(

View file

@ -194,7 +194,9 @@ import {
EXIT_CANCELLED,
EXIT_ERROR,
EXIT_SUCCESS,
isBlockedNotification,
isInteractiveHeadlessTool,
isTerminalNotification,
mapStatusToExitCode,
shouldArmHeadlessIdleTimeout,
} from "../headless-events.js";
@ -246,6 +248,29 @@ test("isInteractiveHeadlessTool: non-interactive tools stay false", () => {
assert.equal(isInteractiveHeadlessTool(undefined), false);
});
test("isTerminalNotification: auto pause is terminal for headless callers", () => {
assert.equal(
isTerminalNotification({
type: "extension_ui_request",
method: "notify",
message:
"Auto-mode paused due to provider error: Schema overload: consecutive tool validation failures exceeded cap",
}),
true,
);
});
test("isBlockedNotification: auto pause exits as blocked", () => {
assert.equal(
isBlockedNotification({
type: "extension_ui_request",
method: "notify",
message: "Auto-mode paused (Escape).",
}),
true,
);
});
test("shouldArmHeadlessIdleTimeout: arms after tool calls when no interactive tool is in flight", () => {
assert.equal(shouldArmHeadlessIdleTimeout(1, 0), true);
assert.equal(shouldArmHeadlessIdleTimeout(3, 0), true);