Add runaway recovery handoff artifacts

This commit is contained in:
Mikael Hugo 2026-04-30 08:07:44 +02:00
parent 6aa631c17a
commit f76504a038
3 changed files with 267 additions and 1 deletions

View file

@ -40,6 +40,7 @@ import {
} from "./context-budget.js";
import type { SFPreferences } from "./preferences.js";
import { resolveAutoSupervisorConfig } from "./preferences.js";
import { writeRunawayRecoveryArtifact } from "./runaway-recovery.js";
import { recordSelfFeedback } from "./self-feedback.js";
import { getMilestoneSlices, getSliceTasks, isDbAvailable } from "./sf-db.js";
import {
@ -332,6 +333,10 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
runawayGuardPause: decision.metadata,
},
);
const recoveryArtifact = writeRunawayRecoveryArtifact(
s.basePath,
decision.metadata,
);
const unitParts = unitId.split("/");
recordSelfFeedback(
{
@ -340,7 +345,9 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
summary: decision.reason,
evidence: JSON.stringify(decision.metadata, null, 2),
suggestedFix:
"Review the paused unit's warning responses and runtime metrics to distinguish legitimate scope from loop/churn.",
recoveryArtifact
? `Resume from ${recoveryArtifact.markdownPath}; use its dirty-file list and resume prompt to split or finish the smallest verifiable unit.`
: "Review the paused unit's warning responses and runtime metrics to distinguish legitimate scope from loop/churn.",
occurredIn: {
unitType,
milestone: unitParts[0],

View file

@ -0,0 +1,181 @@
import { execFileSync } from "node:child_process";
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
import { join } from "node:path";
import type { RunawayGuardPauseMetadata } from "./auto-runaway-guard.js";
import { sfRoot } from "./paths.js";
export interface RunawayRecoveryArtifact {
version: 1;
kind: "runaway-recovery";
createdAt: string;
unitType: string;
unitId: string;
reason: string;
metrics: RunawayGuardPauseMetadata["metrics"];
thresholdReasons: string[];
lastWarningMetrics: RunawayGuardPauseMetadata["lastWarningMetrics"];
topTools: Record<string, number>;
changedFiles: string[];
recommendedAction: string;
resumePrompt: string;
jsonPath: string;
markdownPath: string;
}
export function writeRunawayRecoveryArtifact(
basePath: string,
metadata: RunawayGuardPauseMetadata,
now = new Date(),
): RunawayRecoveryArtifact | null {
try {
const dir = join(sfRoot(basePath), "runtime", "runaway-recovery");
mkdirSync(dir, { recursive: true });
const stamp = now.toISOString().replace(/[:.]/g, "-");
const fileStem = `${stamp}-${safeSegment(metadata.unitType)}-${safeSegment(metadata.unitId)}`;
const jsonPath = join(dir, `${fileStem}.json`);
const markdownPath = join(dir, `${fileStem}.md`);
const changedFiles = listChangedFiles(basePath);
const topTools = metadata.metrics.topTools ?? {};
const recommendedAction = buildRecommendedAction(metadata, changedFiles);
const resumePrompt = buildResumePrompt(metadata, changedFiles);
const artifact: RunawayRecoveryArtifact = {
version: 1,
kind: "runaway-recovery",
createdAt: now.toISOString(),
unitType: metadata.unitType,
unitId: metadata.unitId,
reason: metadata.reason,
metrics: metadata.metrics,
thresholdReasons: metadata.thresholdReasons,
lastWarningMetrics: metadata.lastWarningMetrics,
topTools,
changedFiles,
recommendedAction,
resumePrompt,
jsonPath: relativeToSfRoot(jsonPath, basePath),
markdownPath: relativeToSfRoot(markdownPath, basePath),
};
writeFileSync(jsonPath, JSON.stringify(artifact, null, 2) + "\n", "utf-8");
writeFileSync(markdownPath, renderMarkdown(artifact), "utf-8");
return artifact;
} catch {
return null;
}
}
function listChangedFiles(basePath: string): string[] {
try {
const out = execFileSync("git", ["status", "--porcelain=v1"], {
cwd: basePath,
encoding: "utf8",
stdio: ["ignore", "pipe", "ignore"],
timeout: 2000,
});
return out
.split("\n")
.map((line) => line.trimEnd())
.filter(Boolean)
.map((line) => line.slice(3))
.slice(0, 200);
} catch {
return [];
}
}
function buildRecommendedAction(
metadata: RunawayGuardPauseMetadata,
changedFiles: string[],
): string {
if (changedFiles.length === 0) {
return "Create a narrower follow-up unit before resuming. The paused unit consumed budget without durable file changes.";
}
if (metadata.unitType === "execute-task" && changedFiles.length <= 8) {
return "Resume with a verification-first prompt: inspect the listed dirty files, finish the failing tests, and avoid new exploration.";
}
return "Split the remaining work into smaller units with disjoint write sets before resuming. Keep the listed dirty files as the handoff boundary.";
}
function buildResumePrompt(
metadata: RunawayGuardPauseMetadata,
changedFiles: string[],
): string {
const fileList =
changedFiles.length === 0
? "No changed files were detected."
: changedFiles.map((file) => `- ${file}`).join("\n");
return [
`Resume paused ${metadata.unitType} ${metadata.unitId} after runaway guard pause.`,
"",
"First read the runtime recovery artifact and the dirty files below.",
"Do not broaden scope. Do not restart research from scratch.",
"Classify the previous run as one of: legitimately large, blocked, or stuck/churning.",
"Then either finish the smallest verifiable slice or create a narrower follow-up unit.",
"",
`Pause reason: ${metadata.reason}`,
"",
"Dirty files:",
fileList,
].join("\n");
}
function renderMarkdown(artifact: RunawayRecoveryArtifact): string {
const changedFiles =
artifact.changedFiles.length === 0
? "- none"
: artifact.changedFiles.map((file) => `- \`${file}\``).join("\n");
const topTools = Object.entries(artifact.topTools)
.sort(([, a], [, b]) => b - a)
.slice(0, 10)
.map(([tool, count]) => `- \`${tool}\`: ${count}`)
.join("\n") || "- none recorded";
return [
"# Runaway Recovery Handoff",
"",
`Created: ${artifact.createdAt}`,
`Unit: ${artifact.unitType} ${artifact.unitId}`,
"",
"## Reason",
"",
artifact.reason,
"",
"## Recommended Action",
"",
artifact.recommendedAction,
"",
"## Metrics",
"",
`- Tool calls: ${artifact.metrics.toolCalls}`,
`- Session tokens: ${artifact.metrics.sessionTokens}`,
`- Elapsed ms: ${artifact.metrics.elapsedMs}`,
`- Changed files: ${artifact.metrics.changedFiles ?? "unknown"}`,
"",
"## Threshold Reasons",
"",
artifact.thresholdReasons.map((reason) => `- ${reason}`).join("\n"),
"",
"## Top Tools",
"",
topTools,
"",
"## Dirty Files",
"",
changedFiles,
"",
"## Resume Prompt",
"",
"```text",
artifact.resumePrompt,
"```",
"",
].join("\n");
}
function safeSegment(value: string): string {
return value.replace(/[^A-Za-z0-9._-]+/g, "-").slice(0, 80) || "unit";
}
function relativeToSfRoot(path: string, basePath: string): string {
const root = sfRoot(basePath);
if (!existsSync(root)) return path;
return path.startsWith(`${root}/`) ? `.sf/${path.slice(root.length + 1)}` : path;
}

View file

@ -0,0 +1,78 @@
import assert from "node:assert/strict";
import { execFileSync } from "node:child_process";
import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import test from "node:test";
import type { RunawayGuardPauseMetadata } from "../auto-runaway-guard.ts";
import { writeRunawayRecoveryArtifact } from "../runaway-recovery.ts";
function metadata(): RunawayGuardPauseMetadata {
return {
reason:
"Runaway guard paused execute-task M004/S01/T04: budget kept growing after 2 diagnostic turn(s).",
pausedAt: 1714447889000,
unitType: "execute-task",
unitId: "M004/S01/T04",
diagnosticTurns: 2,
warningsSent: 2,
thresholdReasons: ["36 tool calls (warning 25)"],
metrics: {
toolCalls: 36,
sessionTokens: 1_680_000,
elapsedMs: 311_000,
changedFiles: 2,
topTools: { read: 12, edit: 4 },
},
lastWarningMetrics: {
toolCalls: 25,
sessionTokens: 1_000_000,
elapsedMs: 240_000,
},
thresholds: {
toolCallWarning: 25,
tokenWarning: 1_000_000,
elapsedMs: 1_200_000,
changedFilesWarning: 75,
minIntervalMs: 120_000,
},
};
}
test("writeRunawayRecoveryArtifact creates durable json and markdown handoff", () => {
const dir = mkdtempSync(join(tmpdir(), "sf-runaway-recovery-"));
try {
execFileSync("git", ["init"], {
cwd: dir,
stdio: ["ignore", "ignore", "ignore"],
});
writeFileSync(join(dir, "changed.go"), "package main\n");
const artifact = writeRunawayRecoveryArtifact(
dir,
metadata(),
new Date("2026-04-30T06:00:00.000Z"),
);
assert.ok(artifact);
assert.equal(artifact.kind, "runaway-recovery");
assert.equal(artifact.unitId, "M004/S01/T04");
assert.deepEqual(artifact.changedFiles, ["changed.go"]);
assert.match(artifact.resumePrompt, /Do not broaden scope/);
assert.match(artifact.recommendedAction, /verification-first/);
assert.ok(existsSync(join(dir, artifact.jsonPath)));
assert.ok(existsSync(join(dir, artifact.markdownPath)));
const raw = JSON.parse(readFileSync(join(dir, artifact.jsonPath), "utf-8"));
assert.equal(raw.version, 1);
assert.equal(raw.metrics.toolCalls, 36);
const markdown = readFileSync(join(dir, artifact.markdownPath), "utf-8");
assert.match(markdown, /Runaway Recovery Handoff/);
assert.match(markdown, /changed.go/);
assert.match(markdown, /Resume Prompt/);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});