fix: make auto recovery evidence-driven
This commit is contained in:
parent
2ed1638153
commit
aa70e1db56
5 changed files with 304 additions and 24 deletions
|
|
@ -54,9 +54,11 @@ export function mapStatusToExitCode(status: string): number {
|
|||
/**
|
||||
* Detect genuine auto-mode termination notifications.
|
||||
*
|
||||
* Only matches the actual stop signals emitted by stopAuto():
|
||||
* Only matches the actual stop/pause signals emitted by stopAuto()/pauseAuto():
|
||||
* "Auto-mode stopped..."
|
||||
* "Step-mode stopped..."
|
||||
* "Auto-mode paused..."
|
||||
* "Step-mode paused..."
|
||||
*
|
||||
* Does NOT match progress notifications that happen to contain words like
|
||||
* "complete" or "stopped" (e.g., "Override resolved — rewrite-docs completed",
|
||||
|
|
@ -64,7 +66,12 @@ export function mapStatusToExitCode(status: string): number {
|
|||
*
|
||||
* Blocked detection is separate — checked via isBlockedNotification.
|
||||
*/
|
||||
export const TERMINAL_PREFIXES = ["auto-mode stopped", "step-mode stopped"];
|
||||
export const TERMINAL_PREFIXES = [
|
||||
"auto-mode stopped",
|
||||
"step-mode stopped",
|
||||
"auto-mode paused",
|
||||
"step-mode paused",
|
||||
];
|
||||
|
||||
/**
|
||||
* Idle timeout for short, single-shot commands (status, queue, history, etc.).
|
||||
|
|
@ -115,7 +122,11 @@ export function isBlockedNotification(event: Record<string, unknown>): boolean {
|
|||
return false;
|
||||
const message = String(event.message ?? "").toLowerCase();
|
||||
// Blocked notifications come through stopAuto as "Auto-mode stopped (Blocked: ...)"
|
||||
return message.includes("blocked:");
|
||||
return (
|
||||
message.includes("blocked:") ||
|
||||
message.startsWith("auto-mode paused") ||
|
||||
message.startsWith("step-mode paused")
|
||||
);
|
||||
}
|
||||
|
||||
export function isMilestoneReadyNotification(
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
* and blocker placeholder generation.
|
||||
*/
|
||||
|
||||
import { existsSync } from "node:fs";
|
||||
import { existsSync, readFileSync } from "node:fs";
|
||||
import { relative } from "node:path";
|
||||
import type {
|
||||
ExtensionAPI,
|
||||
ExtensionContext,
|
||||
|
|
@ -13,8 +14,18 @@ import { resolveAgentEnd } from "./auto-loop.js";
|
|||
import {
|
||||
diagnoseExpectedArtifact,
|
||||
resolveExpectedArtifactPath,
|
||||
verifyExpectedArtifact,
|
||||
writeBlockerPlaceholder,
|
||||
} from "./auto-recovery.js";
|
||||
import { parseRoadmap as parseLegacyRoadmap } from "./parsers-legacy.js";
|
||||
import {
|
||||
relMilestoneFile,
|
||||
relSliceFile,
|
||||
resolveMilestoneFile,
|
||||
resolveSliceFile,
|
||||
} from "./paths.js";
|
||||
import { getSlice, isDbAvailable } from "./sf-db.js";
|
||||
import { parseUnitId } from "./unit-id.js";
|
||||
import {
|
||||
formatExecuteTaskRecoveryStatus,
|
||||
inspectExecuteTaskDurability,
|
||||
|
|
@ -29,6 +40,92 @@ export interface RecoveryContext {
|
|||
unitRecoveryCount: Map<string, number>;
|
||||
}
|
||||
|
||||
export interface UnitRecoveryStatus {
|
||||
expected: string;
|
||||
verified: boolean;
|
||||
missing: string[];
|
||||
}
|
||||
|
||||
function relToBase(basePath: string, path: string): string {
|
||||
const rel = relative(basePath, path);
|
||||
return rel && !rel.startsWith("..") ? rel : path;
|
||||
}
|
||||
|
||||
function formatSliceDbStatus(status: string | undefined): string {
|
||||
return status ? `DB slice status is "${status}" (expected complete)` : "";
|
||||
}
|
||||
|
||||
export function inspectUnitRecoveryStatus(
|
||||
unitType: string,
|
||||
unitId: string,
|
||||
basePath: string,
|
||||
): UnitRecoveryStatus {
|
||||
const expected =
|
||||
diagnoseExpectedArtifact(unitType, unitId, basePath) ??
|
||||
"required durable artifact";
|
||||
const missing: string[] = [];
|
||||
const artifactPath = resolveExpectedArtifactPath(unitType, unitId, basePath);
|
||||
if (!artifactPath) {
|
||||
missing.push(`artifact path could not be resolved for ${expected}`);
|
||||
} else if (!existsSync(artifactPath)) {
|
||||
missing.push(`artifact missing (${relToBase(basePath, artifactPath)})`);
|
||||
}
|
||||
|
||||
if (unitType === "complete-slice") {
|
||||
const { milestone: mid, slice: sid } = parseUnitId(unitId);
|
||||
if (mid && sid) {
|
||||
const uatPath = resolveSliceFile(basePath, mid, sid, "UAT");
|
||||
if (!uatPath || !existsSync(uatPath)) {
|
||||
missing.push(
|
||||
`UAT missing (${relSliceFile(basePath, mid, sid, "UAT")})`,
|
||||
);
|
||||
}
|
||||
|
||||
const dbSlice = getSlice(mid, sid);
|
||||
if (dbSlice) {
|
||||
const dbStatus = formatSliceDbStatus(dbSlice.status);
|
||||
if (dbStatus && dbSlice.status !== "complete") missing.push(dbStatus);
|
||||
} else if (!isDbAvailable()) {
|
||||
const roadmapPath = resolveMilestoneFile(basePath, mid, "ROADMAP");
|
||||
if (!roadmapPath || !existsSync(roadmapPath)) {
|
||||
missing.push(
|
||||
`roadmap missing (${relMilestoneFile(basePath, mid, "ROADMAP")})`,
|
||||
);
|
||||
} else {
|
||||
try {
|
||||
const roadmap = parseLegacyRoadmap(
|
||||
readFileSync(roadmapPath, "utf-8"),
|
||||
);
|
||||
const slice = roadmap.slices.find((s) => s.id === sid);
|
||||
if (slice && !slice.done) {
|
||||
missing.push(
|
||||
`roadmap checkbox not marked [x] (${relMilestoneFile(basePath, mid, "ROADMAP")})`,
|
||||
);
|
||||
}
|
||||
} catch {
|
||||
missing.push(
|
||||
`roadmap could not be parsed (${relMilestoneFile(basePath, mid, "ROADMAP")})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const verified = verifyExpectedArtifact(unitType, unitId, basePath);
|
||||
if (!verified && missing.length === 0) {
|
||||
missing.push(`artifact verification failed for ${expected}`);
|
||||
}
|
||||
|
||||
return { expected, verified, missing };
|
||||
}
|
||||
|
||||
export function formatUnitRecoveryStatus(status: UnitRecoveryStatus): string {
|
||||
return status.missing.length > 0
|
||||
? status.missing.join("; ")
|
||||
: "all durable artifacts present";
|
||||
}
|
||||
|
||||
export async function recoverTimedOutUnit(
|
||||
ctx: ExtensionContext,
|
||||
pi: ExtensionAPI,
|
||||
|
|
@ -169,21 +266,19 @@ export async function recoverTimedOutUnit(
|
|||
return "paused";
|
||||
}
|
||||
|
||||
const expected =
|
||||
diagnoseExpectedArtifact(unitType, unitId, basePath) ??
|
||||
"required durable artifact";
|
||||
const status = inspectUnitRecoveryStatus(unitType, unitId, basePath);
|
||||
const diagnostic = formatUnitRecoveryStatus(status);
|
||||
|
||||
// Check if the artifact already exists on disk — agent may have written it
|
||||
// without signaling completion.
|
||||
const artifactPath = resolveExpectedArtifactPath(unitType, unitId, basePath);
|
||||
if (artifactPath && existsSync(artifactPath)) {
|
||||
// Check full unit durability — the primary artifact alone is not enough for
|
||||
// units such as complete-slice, which also require UAT and state transition.
|
||||
if (status.verified) {
|
||||
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, {
|
||||
phase: "finalized",
|
||||
recoveryAttempts: recoveryAttempts + 1,
|
||||
lastRecoveryReason: reason,
|
||||
});
|
||||
ctx.ui.notify(
|
||||
`${reason === "idle" ? "Idle" : "Timeout"} recovery: ${unitType} ${unitId} artifact already exists on disk. Advancing. (attempt ${attemptNumber})`,
|
||||
`${reason === "idle" ? "Idle" : "Timeout"} recovery: ${unitType} ${unitId} durable state already verified. Advancing. (attempt ${attemptNumber})`,
|
||||
"info",
|
||||
);
|
||||
unitRecoveryCount.delete(recoveryKey);
|
||||
|
|
@ -208,19 +303,21 @@ export async function recoverTimedOutUnit(
|
|||
`**FINAL ${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — last chance before skip.**`,
|
||||
`You are still executing ${unitType} ${unitId}.`,
|
||||
`Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts} — next failure skips this unit.`,
|
||||
`Expected durable output: ${expected}.`,
|
||||
"You MUST write the artifact file NOW, even if incomplete.",
|
||||
"Write whatever you have — partial research, preliminary findings, best-effort analysis.",
|
||||
"A partial artifact is infinitely better than no artifact.",
|
||||
`Expected durable output: ${status.expected}.`,
|
||||
`Current durability status: ${diagnostic}.`,
|
||||
"Repair only the missing durability items listed above.",
|
||||
"You MUST write the missing artifact/state NOW, even if incomplete.",
|
||||
"Write whatever you have — partial research, preliminary findings, best-effort analysis — and record the remaining blocker explicitly.",
|
||||
"If you are truly blocked, write the file with a BLOCKER section explaining why.",
|
||||
]
|
||||
: [
|
||||
`**${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — stay in auto-mode.**`,
|
||||
`You are still executing ${unitType} ${unitId}.`,
|
||||
`Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts}.`,
|
||||
`Expected durable output: ${expected}.`,
|
||||
"Stop broad exploration.",
|
||||
"Write the required artifact now.",
|
||||
`Expected durable output: ${status.expected}.`,
|
||||
`Current durability status: ${diagnostic}.`,
|
||||
"Stop broad exploration and do not redo work that is already durable.",
|
||||
"Repair only the missing durability items listed above.",
|
||||
"If blocked, write the partial artifact and explicitly record the blocker instead of going silent.",
|
||||
];
|
||||
|
||||
|
|
@ -233,7 +330,7 @@ export async function recoverTimedOutUnit(
|
|||
{ triggerTurn: true, deliverAs: "steer" },
|
||||
);
|
||||
ctx.ui.notify(
|
||||
`${reason === "idle" ? "Idle" : "Timeout"} recovery: steering ${unitType} ${unitId} to produce ${expected} (attempt ${attemptNumber}, session ${recoveryAttempts + 1}/${maxRecoveryAttempts}).`,
|
||||
`${reason === "idle" ? "Idle" : "Timeout"} recovery: steering ${unitType} ${unitId} to repair ${diagnostic} (attempt ${attemptNumber}, session ${recoveryAttempts + 1}/${maxRecoveryAttempts}).`,
|
||||
"warning",
|
||||
);
|
||||
return "recovered";
|
||||
|
|
@ -250,7 +347,7 @@ export async function recoverTimedOutUnit(
|
|||
lastRecoveryReason: reason,
|
||||
});
|
||||
ctx.ui.notify(
|
||||
`Milestone ${unitId} ${reason}-recovery exhausted ${maxRecoveryAttempts} attempt(s) — worktree branch preserved. Re-run /sf auto once blockers are resolved.`,
|
||||
`Milestone ${unitId} ${reason}-recovery exhausted ${maxRecoveryAttempts} attempt(s): ${diagnostic}. Worktree branch preserved. Re-run /sf auto once blockers are resolved.`,
|
||||
"error",
|
||||
);
|
||||
return "paused";
|
||||
|
|
@ -262,7 +359,7 @@ export async function recoverTimedOutUnit(
|
|||
unitType,
|
||||
unitId,
|
||||
basePath,
|
||||
`${reason} recovery exhausted ${maxRecoveryAttempts} attempts without producing the artifact.`,
|
||||
`${reason} recovery exhausted ${maxRecoveryAttempts} attempts. Status: ${diagnostic}.`,
|
||||
);
|
||||
|
||||
if (placeholder) {
|
||||
|
|
|
|||
117
src/resources/extensions/sf/tests/auto-timeout-recovery.test.ts
Normal file
117
src/resources/extensions/sf/tests/auto-timeout-recovery.test.ts
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
import assert from "node:assert/strict";
|
||||
import {
|
||||
mkdirSync,
|
||||
mkdtempSync,
|
||||
readFileSync,
|
||||
rmSync,
|
||||
writeFileSync,
|
||||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { test } from "node:test";
|
||||
import {
|
||||
formatUnitRecoveryStatus,
|
||||
inspectUnitRecoveryStatus,
|
||||
type RecoveryContext,
|
||||
recoverTimedOutUnit,
|
||||
} from "../auto-timeout-recovery.ts";
|
||||
import { closeDatabase, isDbAvailable } from "../sf-db.ts";
|
||||
|
||||
const ROADMAP_INCOMPLETE = `# M001: Test Milestone
|
||||
|
||||
## Slices
|
||||
|
||||
- [ ] **S01: Test Slice** \`risk:low\`
|
||||
> After this: something works
|
||||
`;
|
||||
|
||||
function createCompleteSliceFixture(): string {
|
||||
const base = mkdtempSync(join(tmpdir(), "sf-timeout-recovery-test-"));
|
||||
const sliceDir = join(base, ".sf", "milestones", "M001", "slices", "S01");
|
||||
mkdirSync(sliceDir, { recursive: true });
|
||||
writeFileSync(
|
||||
join(base, ".sf", "milestones", "M001", "M001-ROADMAP.md"),
|
||||
ROADMAP_INCOMPLETE,
|
||||
"utf-8",
|
||||
);
|
||||
writeFileSync(join(sliceDir, "S01-SUMMARY.md"), "# Summary\n", "utf-8");
|
||||
return base;
|
||||
}
|
||||
|
||||
function makeRecoveryContext(base: string): RecoveryContext {
|
||||
return {
|
||||
basePath: base,
|
||||
verbose: false,
|
||||
currentUnitStartedAt: Date.now(),
|
||||
unitRecoveryCount: new Map(),
|
||||
};
|
||||
}
|
||||
|
||||
test("inspectUnitRecoveryStatus reports complete-slice gaps even when SUMMARY exists", () => {
|
||||
if (isDbAvailable()) closeDatabase();
|
||||
const base = createCompleteSliceFixture();
|
||||
try {
|
||||
const status = inspectUnitRecoveryStatus(
|
||||
"complete-slice",
|
||||
"M001/S01",
|
||||
base,
|
||||
);
|
||||
const diagnostic = formatUnitRecoveryStatus(status);
|
||||
|
||||
assert.equal(status.verified, false);
|
||||
assert.ok(
|
||||
!diagnostic.includes("S01-SUMMARY.md"),
|
||||
"existing SUMMARY must not be reported as missing",
|
||||
);
|
||||
assert.match(diagnostic, /UAT missing/);
|
||||
assert.match(diagnostic, /roadmap checkbox not marked \[x\]/);
|
||||
} finally {
|
||||
rmSync(base, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("recoverTimedOutUnit steers complete-slice with exact missing durability items", async () => {
|
||||
if (isDbAvailable()) closeDatabase();
|
||||
const base = createCompleteSliceFixture();
|
||||
const sent: string[] = [];
|
||||
const notifications: string[] = [];
|
||||
try {
|
||||
const result = await recoverTimedOutUnit(
|
||||
{
|
||||
ui: {
|
||||
notify: (message: string) => notifications.push(message),
|
||||
},
|
||||
} as any,
|
||||
{
|
||||
sendMessage: (message: { content: string }) => {
|
||||
sent.push(message.content);
|
||||
},
|
||||
} as any,
|
||||
"complete-slice",
|
||||
"M001/S01",
|
||||
"idle",
|
||||
makeRecoveryContext(base),
|
||||
);
|
||||
|
||||
assert.equal(result, "recovered");
|
||||
assert.equal(sent.length, 1, "should steer instead of finalizing");
|
||||
assert.match(sent[0], /Current durability status:/);
|
||||
assert.match(sent[0], /UAT missing/);
|
||||
assert.match(sent[0], /roadmap checkbox not marked \[x\]/);
|
||||
assert.ok(
|
||||
notifications.some((n) => n.includes("to repair")),
|
||||
"notification should explain the exact repair target",
|
||||
);
|
||||
assert.ok(
|
||||
notifications.every((n) => !n.includes("durable state already verified")),
|
||||
"summary-only state must not be treated as recovered",
|
||||
);
|
||||
const runtimeRecord = readFileSync(
|
||||
join(base, ".sf", "runtime", "units", "complete-slice-M001-S01.json"),
|
||||
"utf-8",
|
||||
);
|
||||
assert.match(runtimeRecord, /"phase": "recovered"/);
|
||||
} finally {
|
||||
rmSync(base, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
|
@ -15,7 +15,12 @@ import test from "node:test";
|
|||
|
||||
// ─── Extracted detection logic (mirrors headless.ts) ────────────────────────
|
||||
|
||||
const TERMINAL_PREFIXES = ["auto-mode stopped", "step-mode stopped"];
|
||||
const TERMINAL_PREFIXES = [
|
||||
"auto-mode stopped",
|
||||
"step-mode stopped",
|
||||
"auto-mode paused",
|
||||
"step-mode paused",
|
||||
];
|
||||
|
||||
function isTerminalNotification(event: Record<string, unknown>): boolean {
|
||||
if (event.type !== "extension_ui_request" || event.method !== "notify")
|
||||
|
|
@ -28,7 +33,11 @@ function isBlockedNotification(event: Record<string, unknown>): boolean {
|
|||
if (event.type !== "extension_ui_request" || event.method !== "notify")
|
||||
return false;
|
||||
const message = String(event.message ?? "").toLowerCase();
|
||||
return message.includes("blocked:");
|
||||
return (
|
||||
message.includes("blocked:") ||
|
||||
message.startsWith("auto-mode paused") ||
|
||||
message.startsWith("step-mode paused")
|
||||
);
|
||||
}
|
||||
|
||||
function makeNotify(message: string): Record<string, unknown> {
|
||||
|
|
@ -71,6 +80,16 @@ test("detects 'Step-mode stopped.' as terminal", () => {
|
|||
assert.ok(isTerminalNotification(makeNotify("Step-mode stopped.")));
|
||||
});
|
||||
|
||||
test("detects provider-error auto pause as terminal", () => {
|
||||
assert.ok(
|
||||
isTerminalNotification(
|
||||
makeNotify(
|
||||
"Auto-mode paused due to provider error: Schema overload: consecutive tool validation failures exceeded cap",
|
||||
),
|
||||
),
|
||||
);
|
||||
});
|
||||
|
||||
// ─── False positives that previously triggered early exit (#879) ────────────
|
||||
|
||||
test("does NOT match 'All slices are complete — nothing to discuss.'", () => {
|
||||
|
|
@ -150,6 +169,17 @@ test("detects inline 'Blocked:' message", () => {
|
|||
);
|
||||
});
|
||||
|
||||
test("detects auto pause as blocked for headless callers", () => {
|
||||
assert.ok(
|
||||
isBlockedNotification(
|
||||
makeNotify(
|
||||
"Auto-mode paused due to provider error: Schema overload: consecutive tool validation failures exceeded cap",
|
||||
),
|
||||
),
|
||||
);
|
||||
assert.ok(isBlockedNotification(makeNotify("Auto-mode paused (Escape).")));
|
||||
});
|
||||
|
||||
test("does NOT match 'blocked' without colon (avoids false positives)", () => {
|
||||
assert.ok(
|
||||
!isBlockedNotification(
|
||||
|
|
|
|||
|
|
@ -194,7 +194,9 @@ import {
|
|||
EXIT_CANCELLED,
|
||||
EXIT_ERROR,
|
||||
EXIT_SUCCESS,
|
||||
isBlockedNotification,
|
||||
isInteractiveHeadlessTool,
|
||||
isTerminalNotification,
|
||||
mapStatusToExitCode,
|
||||
shouldArmHeadlessIdleTimeout,
|
||||
} from "../headless-events.js";
|
||||
|
|
@ -246,6 +248,29 @@ test("isInteractiveHeadlessTool: non-interactive tools stay false", () => {
|
|||
assert.equal(isInteractiveHeadlessTool(undefined), false);
|
||||
});
|
||||
|
||||
test("isTerminalNotification: auto pause is terminal for headless callers", () => {
|
||||
assert.equal(
|
||||
isTerminalNotification({
|
||||
type: "extension_ui_request",
|
||||
method: "notify",
|
||||
message:
|
||||
"Auto-mode paused due to provider error: Schema overload: consecutive tool validation failures exceeded cap",
|
||||
}),
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test("isBlockedNotification: auto pause exits as blocked", () => {
|
||||
assert.equal(
|
||||
isBlockedNotification({
|
||||
type: "extension_ui_request",
|
||||
method: "notify",
|
||||
message: "Auto-mode paused (Escape).",
|
||||
}),
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test("shouldArmHeadlessIdleTimeout: arms after tool calls when no interactive tool is in flight", () => {
|
||||
assert.equal(shouldArmHeadlessIdleTimeout(1, 0), true);
|
||||
assert.equal(shouldArmHeadlessIdleTimeout(3, 0), true);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue