Add runaway recovery handoff artifacts
This commit is contained in:
parent
6aa631c17a
commit
f76504a038
3 changed files with 267 additions and 1 deletions
|
|
@ -40,6 +40,7 @@ import {
|
|||
} from "./context-budget.js";
|
||||
import type { SFPreferences } from "./preferences.js";
|
||||
import { resolveAutoSupervisorConfig } from "./preferences.js";
|
||||
import { writeRunawayRecoveryArtifact } from "./runaway-recovery.js";
|
||||
import { recordSelfFeedback } from "./self-feedback.js";
|
||||
import { getMilestoneSlices, getSliceTasks, isDbAvailable } from "./sf-db.js";
|
||||
import {
|
||||
|
|
@ -332,6 +333,10 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
|
|||
runawayGuardPause: decision.metadata,
|
||||
},
|
||||
);
|
||||
const recoveryArtifact = writeRunawayRecoveryArtifact(
|
||||
s.basePath,
|
||||
decision.metadata,
|
||||
);
|
||||
const unitParts = unitId.split("/");
|
||||
recordSelfFeedback(
|
||||
{
|
||||
|
|
@ -340,7 +345,9 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
|
|||
summary: decision.reason,
|
||||
evidence: JSON.stringify(decision.metadata, null, 2),
|
||||
suggestedFix:
|
||||
"Review the paused unit's warning responses and runtime metrics to distinguish legitimate scope from loop/churn.",
|
||||
recoveryArtifact
|
||||
? `Resume from ${recoveryArtifact.markdownPath}; use its dirty-file list and resume prompt to split or finish the smallest verifiable unit.`
|
||||
: "Review the paused unit's warning responses and runtime metrics to distinguish legitimate scope from loop/churn.",
|
||||
occurredIn: {
|
||||
unitType,
|
||||
milestone: unitParts[0],
|
||||
|
|
|
|||
181
src/resources/extensions/sf/runaway-recovery.ts
Normal file
181
src/resources/extensions/sf/runaway-recovery.ts
Normal file
|
|
@ -0,0 +1,181 @@
|
|||
import { execFileSync } from "node:child_process";
|
||||
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import type { RunawayGuardPauseMetadata } from "./auto-runaway-guard.js";
|
||||
import { sfRoot } from "./paths.js";
|
||||
|
||||
export interface RunawayRecoveryArtifact {
|
||||
version: 1;
|
||||
kind: "runaway-recovery";
|
||||
createdAt: string;
|
||||
unitType: string;
|
||||
unitId: string;
|
||||
reason: string;
|
||||
metrics: RunawayGuardPauseMetadata["metrics"];
|
||||
thresholdReasons: string[];
|
||||
lastWarningMetrics: RunawayGuardPauseMetadata["lastWarningMetrics"];
|
||||
topTools: Record<string, number>;
|
||||
changedFiles: string[];
|
||||
recommendedAction: string;
|
||||
resumePrompt: string;
|
||||
jsonPath: string;
|
||||
markdownPath: string;
|
||||
}
|
||||
|
||||
export function writeRunawayRecoveryArtifact(
|
||||
basePath: string,
|
||||
metadata: RunawayGuardPauseMetadata,
|
||||
now = new Date(),
|
||||
): RunawayRecoveryArtifact | null {
|
||||
try {
|
||||
const dir = join(sfRoot(basePath), "runtime", "runaway-recovery");
|
||||
mkdirSync(dir, { recursive: true });
|
||||
const stamp = now.toISOString().replace(/[:.]/g, "-");
|
||||
const fileStem = `${stamp}-${safeSegment(metadata.unitType)}-${safeSegment(metadata.unitId)}`;
|
||||
const jsonPath = join(dir, `${fileStem}.json`);
|
||||
const markdownPath = join(dir, `${fileStem}.md`);
|
||||
const changedFiles = listChangedFiles(basePath);
|
||||
const topTools = metadata.metrics.topTools ?? {};
|
||||
const recommendedAction = buildRecommendedAction(metadata, changedFiles);
|
||||
const resumePrompt = buildResumePrompt(metadata, changedFiles);
|
||||
const artifact: RunawayRecoveryArtifact = {
|
||||
version: 1,
|
||||
kind: "runaway-recovery",
|
||||
createdAt: now.toISOString(),
|
||||
unitType: metadata.unitType,
|
||||
unitId: metadata.unitId,
|
||||
reason: metadata.reason,
|
||||
metrics: metadata.metrics,
|
||||
thresholdReasons: metadata.thresholdReasons,
|
||||
lastWarningMetrics: metadata.lastWarningMetrics,
|
||||
topTools,
|
||||
changedFiles,
|
||||
recommendedAction,
|
||||
resumePrompt,
|
||||
jsonPath: relativeToSfRoot(jsonPath, basePath),
|
||||
markdownPath: relativeToSfRoot(markdownPath, basePath),
|
||||
};
|
||||
writeFileSync(jsonPath, JSON.stringify(artifact, null, 2) + "\n", "utf-8");
|
||||
writeFileSync(markdownPath, renderMarkdown(artifact), "utf-8");
|
||||
return artifact;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function listChangedFiles(basePath: string): string[] {
|
||||
try {
|
||||
const out = execFileSync("git", ["status", "--porcelain=v1"], {
|
||||
cwd: basePath,
|
||||
encoding: "utf8",
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
timeout: 2000,
|
||||
});
|
||||
return out
|
||||
.split("\n")
|
||||
.map((line) => line.trimEnd())
|
||||
.filter(Boolean)
|
||||
.map((line) => line.slice(3))
|
||||
.slice(0, 200);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function buildRecommendedAction(
|
||||
metadata: RunawayGuardPauseMetadata,
|
||||
changedFiles: string[],
|
||||
): string {
|
||||
if (changedFiles.length === 0) {
|
||||
return "Create a narrower follow-up unit before resuming. The paused unit consumed budget without durable file changes.";
|
||||
}
|
||||
if (metadata.unitType === "execute-task" && changedFiles.length <= 8) {
|
||||
return "Resume with a verification-first prompt: inspect the listed dirty files, finish the failing tests, and avoid new exploration.";
|
||||
}
|
||||
return "Split the remaining work into smaller units with disjoint write sets before resuming. Keep the listed dirty files as the handoff boundary.";
|
||||
}
|
||||
|
||||
function buildResumePrompt(
|
||||
metadata: RunawayGuardPauseMetadata,
|
||||
changedFiles: string[],
|
||||
): string {
|
||||
const fileList =
|
||||
changedFiles.length === 0
|
||||
? "No changed files were detected."
|
||||
: changedFiles.map((file) => `- ${file}`).join("\n");
|
||||
return [
|
||||
`Resume paused ${metadata.unitType} ${metadata.unitId} after runaway guard pause.`,
|
||||
"",
|
||||
"First read the runtime recovery artifact and the dirty files below.",
|
||||
"Do not broaden scope. Do not restart research from scratch.",
|
||||
"Classify the previous run as one of: legitimately large, blocked, or stuck/churning.",
|
||||
"Then either finish the smallest verifiable slice or create a narrower follow-up unit.",
|
||||
"",
|
||||
`Pause reason: ${metadata.reason}`,
|
||||
"",
|
||||
"Dirty files:",
|
||||
fileList,
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function renderMarkdown(artifact: RunawayRecoveryArtifact): string {
|
||||
const changedFiles =
|
||||
artifact.changedFiles.length === 0
|
||||
? "- none"
|
||||
: artifact.changedFiles.map((file) => `- \`${file}\``).join("\n");
|
||||
const topTools = Object.entries(artifact.topTools)
|
||||
.sort(([, a], [, b]) => b - a)
|
||||
.slice(0, 10)
|
||||
.map(([tool, count]) => `- \`${tool}\`: ${count}`)
|
||||
.join("\n") || "- none recorded";
|
||||
return [
|
||||
"# Runaway Recovery Handoff",
|
||||
"",
|
||||
`Created: ${artifact.createdAt}`,
|
||||
`Unit: ${artifact.unitType} ${artifact.unitId}`,
|
||||
"",
|
||||
"## Reason",
|
||||
"",
|
||||
artifact.reason,
|
||||
"",
|
||||
"## Recommended Action",
|
||||
"",
|
||||
artifact.recommendedAction,
|
||||
"",
|
||||
"## Metrics",
|
||||
"",
|
||||
`- Tool calls: ${artifact.metrics.toolCalls}`,
|
||||
`- Session tokens: ${artifact.metrics.sessionTokens}`,
|
||||
`- Elapsed ms: ${artifact.metrics.elapsedMs}`,
|
||||
`- Changed files: ${artifact.metrics.changedFiles ?? "unknown"}`,
|
||||
"",
|
||||
"## Threshold Reasons",
|
||||
"",
|
||||
artifact.thresholdReasons.map((reason) => `- ${reason}`).join("\n"),
|
||||
"",
|
||||
"## Top Tools",
|
||||
"",
|
||||
topTools,
|
||||
"",
|
||||
"## Dirty Files",
|
||||
"",
|
||||
changedFiles,
|
||||
"",
|
||||
"## Resume Prompt",
|
||||
"",
|
||||
"```text",
|
||||
artifact.resumePrompt,
|
||||
"```",
|
||||
"",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function safeSegment(value: string): string {
|
||||
return value.replace(/[^A-Za-z0-9._-]+/g, "-").slice(0, 80) || "unit";
|
||||
}
|
||||
|
||||
function relativeToSfRoot(path: string, basePath: string): string {
|
||||
const root = sfRoot(basePath);
|
||||
if (!existsSync(root)) return path;
|
||||
return path.startsWith(`${root}/`) ? `.sf/${path.slice(root.length + 1)}` : path;
|
||||
}
|
||||
78
src/resources/extensions/sf/tests/runaway-recovery.test.ts
Normal file
78
src/resources/extensions/sf/tests/runaway-recovery.test.ts
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import test from "node:test";
|
||||
|
||||
import type { RunawayGuardPauseMetadata } from "../auto-runaway-guard.ts";
|
||||
import { writeRunawayRecoveryArtifact } from "../runaway-recovery.ts";
|
||||
|
||||
function metadata(): RunawayGuardPauseMetadata {
|
||||
return {
|
||||
reason:
|
||||
"Runaway guard paused execute-task M004/S01/T04: budget kept growing after 2 diagnostic turn(s).",
|
||||
pausedAt: 1714447889000,
|
||||
unitType: "execute-task",
|
||||
unitId: "M004/S01/T04",
|
||||
diagnosticTurns: 2,
|
||||
warningsSent: 2,
|
||||
thresholdReasons: ["36 tool calls (warning 25)"],
|
||||
metrics: {
|
||||
toolCalls: 36,
|
||||
sessionTokens: 1_680_000,
|
||||
elapsedMs: 311_000,
|
||||
changedFiles: 2,
|
||||
topTools: { read: 12, edit: 4 },
|
||||
},
|
||||
lastWarningMetrics: {
|
||||
toolCalls: 25,
|
||||
sessionTokens: 1_000_000,
|
||||
elapsedMs: 240_000,
|
||||
},
|
||||
thresholds: {
|
||||
toolCallWarning: 25,
|
||||
tokenWarning: 1_000_000,
|
||||
elapsedMs: 1_200_000,
|
||||
changedFilesWarning: 75,
|
||||
minIntervalMs: 120_000,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
test("writeRunawayRecoveryArtifact creates durable json and markdown handoff", () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), "sf-runaway-recovery-"));
|
||||
try {
|
||||
execFileSync("git", ["init"], {
|
||||
cwd: dir,
|
||||
stdio: ["ignore", "ignore", "ignore"],
|
||||
});
|
||||
writeFileSync(join(dir, "changed.go"), "package main\n");
|
||||
|
||||
const artifact = writeRunawayRecoveryArtifact(
|
||||
dir,
|
||||
metadata(),
|
||||
new Date("2026-04-30T06:00:00.000Z"),
|
||||
);
|
||||
|
||||
assert.ok(artifact);
|
||||
assert.equal(artifact.kind, "runaway-recovery");
|
||||
assert.equal(artifact.unitId, "M004/S01/T04");
|
||||
assert.deepEqual(artifact.changedFiles, ["changed.go"]);
|
||||
assert.match(artifact.resumePrompt, /Do not broaden scope/);
|
||||
assert.match(artifact.recommendedAction, /verification-first/);
|
||||
assert.ok(existsSync(join(dir, artifact.jsonPath)));
|
||||
assert.ok(existsSync(join(dir, artifact.markdownPath)));
|
||||
|
||||
const raw = JSON.parse(readFileSync(join(dir, artifact.jsonPath), "utf-8"));
|
||||
assert.equal(raw.version, 1);
|
||||
assert.equal(raw.metrics.toolCalls, 36);
|
||||
|
||||
const markdown = readFileSync(join(dir, artifact.markdownPath), "utf-8");
|
||||
assert.match(markdown, /Runaway Recovery Handoff/);
|
||||
assert.match(markdown, /changed.go/);
|
||||
assert.match(markdown, /Resume Prompt/);
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue