fix: harden headless dogfood self-healing

2026-05-15 03:53:15 +02:00 · 2026-05-15 03:53:15 +02:00 · 8b0f0bbd65
commit 8b0f0bbd65
parent 3ac5aede1e
9 changed files with 492 additions and 198 deletions
--- a/packages/coding-agent/src/core/subagent-runner.ts
+++ b/packages/coding-agent/src/core/subagent-runner.ts
@ -0,0 +1,291 @@
+/**
+ * subagent-runner.ts — In-process subagent execution helper.
+ *
+ * Replaces the spawn-based `sf -p` dispatch in headless-triage.ts and
+ * uok/agent-runner.js.  Each call gets a fresh, isolated AgentSession.
+ * Cancellation via AbortController, not signals.
+ */
+
+import type { AssistantMessage, AssistantMessageEvent } from "@singularity-forge/ai";
+import { getAgentDir } from "../config.js";
+import { SessionManager } from "./session-manager.js";
+import { SettingsManager } from "./settings-manager.js";
+import { DefaultResourceLoader } from "./resource-loader.js";
+import { createAgentSession } from "./sdk.js";
+import { theme } from "../modes/interactive/theme/theme.js";
+import type { ExtensionUIContext } from "./extensions/types.js";
+import type { AgentSessionEvent } from "./agent-session.js";
+
+const DEFAULT_SUBAGENT_TIMEOUT_MS = 8 * 60 * 1000;
+
+export interface SubagentConfig {
+	/** Pre-composed system prompt (caller has already merged any prompt-parts). */
+	systemPrompt: string;
+	/** Optional model id like "kimi-coding/kimi-k2.6". If omitted, uses the session default. */
+	model?: string;
+	/** Tool name filter (subset of built-in tools by name). */
+	tools?: string[];
+	/** Working directory. Default: process.cwd(). */
+	cwd?: string;
+	/** Display name for log/error messages. */
+	name?: string;
+}
+
+export interface SubagentResult {
+	ok: boolean;
+	/** Final assistant text output (joined from all text parts). */
+	output: string;
+	/** Error or timeout message. */
+	stderr?: string;
+	/** 0=success, 124=timeout, 1=error. */
+	exitCode: number;
+}
+
+/**
+ * No-op UI context for subagent sessions (headless, no interactive UI).
+ * Shape mirrors createPrintModeUIContext() in print-mode.ts.
+ */
+function createSubagentUIContext(): ExtensionUIContext {
+	return {
+		select: async () => undefined,
+		confirm: async () => false,
+		input: async () => undefined,
+		notify: () => {},
+		onTerminalInput: () => () => {},
+		setStatus: () => {},
+		setWorkingMessage: () => {},
+		setWorkingVisible: () => {},
+		setWidget: () => {},
+		setFooter: () => {},
+		setHeader: () => {},
+		setTitle: () => {},
+		custom: async () => undefined as never,
+		pasteToEditor: () => {},
+		setEditorText: () => {},
+		getEditorText: () => "",
+		editor: async () => undefined,
+		setEditorComponent: () => {},
+		get theme() {
+			return theme;
+		},
+		getAllThemes: () => [],
+		getTheme: () => undefined,
+		setTheme: () => ({ success: false, error: "UI not available" }),
+		getToolsExpanded: () => false,
+		setToolsExpanded: () => {},
+	};
+}
+
+/**
+ * Run a subagent in-process with an isolated AgentSession.
+ *
+ * @param config  - Subagent configuration (system prompt, model, tools, cwd).
+ * @param task    - Task string to send as the first prompt.
+ * @param options - Optional AbortSignal and timeout.
+ * @returns       SubagentResult with ok, output, stderr, exitCode.
+ */
+export async function runSubagent(
+	config: SubagentConfig,
+	task: string,
+	options?: { signal?: AbortSignal; timeoutMs?: number },
+): Promise<SubagentResult> {
+	const name = config.name ?? "subagent";
+	const cwd = config.cwd ?? process.cwd();
+	const timeoutMs = options?.timeoutMs ?? DEFAULT_SUBAGENT_TIMEOUT_MS;
+
+	// Build an isolated resource loader with the caller's system prompt appended.
+	const agentDir = getAgentDir();
+	const settingsManager = SettingsManager.create(cwd, agentDir);
+	const resourceLoader = new DefaultResourceLoader({
+		cwd,
+		agentDir,
+		settingsManager,
+		appendSystemPrompt: config.systemPrompt,
+	});
+	await resourceLoader.reload();
+
+	// Create a fresh in-memory session so it never touches the user's session files.
+	// Tool filtering is applied below via setActiveToolsByName after session creation.
+	const { session, modelFallbackMessage } = await createAgentSession({
+		cwd,
+		agentDir,
+		resourceLoader,
+		sessionManager: SessionManager.inMemory(),
+		settingsManager,
+		persistModelChanges: false,
+	});
+
+	if (modelFallbackMessage) {
+		// Non-fatal: log but continue with whatever model was resolved.
+		process.stderr.write(
+			`[subagent:${name}] model fallback: ${modelFallbackMessage}\n`,
+		);
+	}
+
+	// Apply optional model override.
+	if (config.model) {
+		const parts = config.model.split("/");
+		const [provider, modelId] =
+			parts.length >= 2
+				? [parts[0], parts.slice(1).join("/")]
+				: ["", config.model];
+		const match =
+			provider
+				? session.modelRegistry.find(provider, modelId)
+				: session.modelRegistry
+						.getAll()
+						.find((m) => m.id === config.model || `${m.provider}/${m.id}` === config.model);
+		if (match) {
+			try {
+				await session.setModel(match, { persist: false });
+			} catch (err) {
+				process.stderr.write(
+					`[subagent:${name}] could not set model "${config.model}": ${err instanceof Error ? err.message : String(err)}\n`,
+				);
+			}
+		} else {
+			process.stderr.write(
+				`[subagent:${name}] model "${config.model}" not found in registry, using default\n`,
+			);
+		}
+	}
+
+	// Apply tool name filter. setActiveToolsByName silently drops unknown names
+	// and rebuilds the system prompt to reflect the active tool set.
+	if (config.tools && config.tools.length > 0) {
+		session.setActiveToolsByName(config.tools);
+	}
+
+	// Bind extensions in headless mode (no lifecycle, no interactive UI).
+	await session.bindExtensions({
+		uiContext: createSubagentUIContext(),
+		onError: (err) => {
+			process.stderr.write(
+				`[subagent:${name}] extension error (${err.extensionPath}): ${err.error}\n`,
+			);
+		},
+		runLifecycle: false,
+	});
+
+	// Collect incremental text output from events so the timeout case
+	// can still return partial output.
+	let partialOutput = "";
+	const unsubscribe = session.subscribe((event: AgentSessionEvent) => {
+		if (event.type === "message_update") {
+			const streamEvent: AssistantMessageEvent = event.assistantMessageEvent;
+			if (streamEvent.type === "text_delta") {
+				partialOutput += streamEvent.delta;
+			}
+		}
+	});
+
+	// Helper to extract final output from session state messages.
+	const extractFinalOutput = (): string => {
+		const messages = session.state.messages;
+		const last = messages[messages.length - 1];
+		if (!last || last.role !== "assistant") return partialOutput;
+		const assistant = last as AssistantMessage;
+		const parts = assistant.content
+			.filter((c): c is { type: "text"; text: string } => c.type === "text")
+			.map((c) => c.text);
+		return parts.join("\n") || partialOutput;
+	};
+
+	let timer: ReturnType<typeof setTimeout> | undefined;
+
+	const cleanup = (): void => {
+		if (timer) {
+			clearTimeout(timer);
+			timer = undefined;
+		}
+		unsubscribe();
+	};
+
+	const promptTask = `Task: ${task}`;
+
+	try {
+		const promptPromise = session.prompt(promptTask, {
+			runExtensionHooks: false,
+		});
+
+		if (timeoutMs === 0 && !options?.signal) {
+			// Fast path: no watchdog, no cancellation.
+			await promptPromise;
+			cleanup();
+			return { ok: true, output: extractFinalOutput(), exitCode: 0 };
+		}
+
+		// Build race competitors.
+		type RaceResult = { timedOut?: true; cancelled?: true; error?: unknown };
+		const competitors: Promise<RaceResult>[] = [
+			promptPromise.then(() => ({} as RaceResult)),
+		];
+
+		if (timeoutMs > 0) {
+			competitors.push(
+				new Promise<RaceResult>((resolve) => {
+					timer = setTimeout(() => {
+						void session.abort().catch(() => {});
+						resolve({ timedOut: true });
+					}, timeoutMs);
+				}),
+			);
+		}
+
+		if (options?.signal) {
+			const sig = options.signal;
+			if (sig.aborted) {
+				void session.abort().catch(() => {});
+				cleanup();
+				return {
+					ok: false,
+					output: partialOutput,
+					stderr: `${name} cancelled`,
+					exitCode: 1,
+				};
+			}
+			competitors.push(
+				new Promise<RaceResult>((resolve) => {
+					const onAbort = () => {
+						void session.abort().catch(() => {});
+						resolve({ cancelled: true });
+					};
+					sig.addEventListener("abort", onAbort, { once: true });
+				}),
+			);
+		}
+
+		const result = await Promise.race(competitors);
+
+		cleanup();
+
+		if (result.timedOut) {
+			return {
+				ok: false,
+				output: extractFinalOutput(),
+				stderr: `${name} timed out after ${timeoutMs}ms (configure SF_TRIAGE_AGENT_TIMEOUT_MS to extend)`,
+				exitCode: 124,
+			};
+		}
+
+		if (result.cancelled) {
+			return {
+				ok: false,
+				output: extractFinalOutput(),
+				stderr: `${name} cancelled`,
+				exitCode: 1,
+			};
+		}
+
+		// Normal completion.
+		return { ok: true, output: extractFinalOutput(), exitCode: 0 };
+	} catch (err) {
+		cleanup();
+		return {
+			ok: false,
+			output: extractFinalOutput(),
+			stderr: err instanceof Error ? err.message : String(err),
+			exitCode: 1,
+		};
+	}
+}
--- a/packages/coding-agent/src/index.ts
+++ b/packages/coding-agent/src/index.ts
@ -255,6 +255,12 @@ export {
 	// Pre-built tools (use process.cwd())
 	readOnlyTools,
 } from "./core/sdk.js";
+// Subagent runner
+export {
+	runSubagent,
+	type SubagentConfig,
+	type SubagentResult,
+} from "./core/subagent-runner.js";
 export {
 	type BranchSummaryEntry,
 	buildSessionContext,
--- a/src/headless-triage.ts
+++ b/src/headless-triage.ts
@ -24,12 +24,11 @@
 * Consumer: headless.ts when command === "triage".
 */

-import { spawn } from "node:child_process";
 import { randomUUID } from "node:crypto";
-import { existsSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
-import { tmpdir } from "node:os";
+import { existsSync } from "node:fs";
 import { join } from "node:path";
 import { createJiti } from "@mariozechner/jiti";
+import { runSubagent } from "@singularity-forge/coding-agent";
 import { parse as parseYaml } from "yaml";
 import { resolveBundledSourceResource } from "./bundled-resource-path.js";
 import { getSfEnv } from "./env.js";
@ -335,33 +334,6 @@ export function parseTriagePlan(text: string): TriageDecision[] | null {
 	return parseTriagePlanStrict(text).plan;
 }

-function buildSfPrintLaunchArgs(
-	promptPath: string,
-	task: string,
-	agent: AgentConfig,
-	options: { tools?: string[]; model?: string; cwd?: string } = {},
-): { command: string; args: string[] } {
-	const sfBinPath = process.env.SF_BIN_PATH || process.argv[1];
-	const baseArgs = [
-		"--mode",
-		"text",
-		"-p",
-		"--no-session",
-		"--append-system-prompt",
-		promptPath,
-	];
-	const tools = options.tools ?? agent.tools;
-	if (tools && tools.length > 0) baseArgs.push("--tools", tools.join(","));
-	const model = options.model ?? agent.model;
-	if (model) baseArgs.push("--model", model);
-	baseArgs.push(`Task: ${task}`);
-	if (!sfBinPath) return { command: "sf", args: baseArgs };
-	if (sfBinPath.endsWith(".js") || sfBinPath.endsWith(".ts")) {
-		return { command: process.execPath, args: [sfBinPath, ...baseArgs] };
-	}
-	return { command: sfBinPath, args: baseArgs };
-}
-
 /**
 * Default per-agent timeout: 8 minutes. Long enough for a real LLM reasoning
 * pass + tool calls; short enough that a hung gemini OAuth or stalled
@ -372,6 +344,7 @@ function buildSfPrintLaunchArgs(
 * forever on `proc.on("close")`, so a single hung subagent dispatch
 * blocked the orchestrator until manual kill (observed 2026-05-14:
 * 33-minute-stuck triage --apply caused by an unresponsive provider).
+ * The watchdog is now enforced inside `runSubagent` (SDK helper).
 */
 const DEFAULT_AGENT_TIMEOUT_MS = (() => {
 	const fromEnv = Number.parseInt(
@ -387,8 +360,6 @@ async function defaultAgentRunner(
 	task: string,
 	options: { tools?: string[]; model?: string; cwd?: string } = {},
 ): Promise<AgentRunResult> {
-	const tmpDir = mkdtempSync(join(tmpdir(), "sf-triage-agent-"));
-	const promptPath = join(tmpDir, `${agent.name}.md`);
 	// Compose the system prompt via the prompt-parts registry. Dynamic
 	// import because src/resources/ is excluded from the root tsconfig
 	// (extensions get their own build). If the module isn't available
@ -408,92 +379,26 @@ async function defaultAgentRunner(
 			tools: options.tools ?? agent.tools,
 		}) ?? agent.systemPrompt;
 	const appendedPrompt = `${composed}\n\n## Task Input\n\n${task}`;
-	writeFileSync(promptPath, appendedPrompt, { encoding: "utf-8", mode: 0o600 });
-	try {
-		const launch = buildSfPrintLaunchArgs(
-			promptPath,
-			"Run the task input from the appended system prompt.",
-			agent,
-			options,
-		);
-		return await new Promise<AgentRunResult>((resolve) => {
-			const proc = spawn(launch.command, launch.args, {
-				cwd: options.cwd ?? process.cwd(),
-				env: {
-					...process.env,
-					SF_PRINT_TIMEOUT_MS:
-						process.env.SF_PRINT_TIMEOUT_MS ??
-						String(Math.max(1_000, DEFAULT_AGENT_TIMEOUT_MS - 5_000)),
-				},
-				shell: false,
-				stdio: ["ignore", "pipe", "pipe"],
-			});
-			let stdout = "";
-			let stderr = "";
-			let settled = false;
-			const settle = (result: AgentRunResult) => {
-				if (settled) return;
-				settled = true;
-				resolve(result);
-			};
-			// Watchdog: send SIGTERM at the timeout, escalate to SIGKILL
-			// 5s later if the process didn't exit. The result is reported
-			// as ok=false with a clear "timed out" stderr so the trust /
-			// review gate sees a real failure (not a silent stall).
-			const watchdog = setTimeout(() => {
-				if (settled) return;
-				try {
-					proc.kill("SIGTERM");
-				} catch {
-					/* already dead */
-				}
-				const kill = setTimeout(() => {
-					try {
-						proc.kill("SIGKILL");
-					} catch {
-						/* already dead */
-					}
-				}, 5_000);
-				// Resolve immediately on watchdog fire so the orchestrator
-				// can proceed to the next gate; the kill is best-effort
-				// cleanup of the abandoned subprocess.
-				kill.unref?.();
-				settle({
-					ok: false,
-					output: stdout,
-					stderr: `${agent.name} timed out after ${DEFAULT_AGENT_TIMEOUT_MS}ms (configure SF_TRIAGE_AGENT_TIMEOUT_MS to extend)`,
-					exitCode: 124, // POSIX convention for timeout
-				});
-			}, DEFAULT_AGENT_TIMEOUT_MS);
-			watchdog.unref?.();
-			proc.stdout.on("data", (chunk) => {
-				stdout += chunk.toString();
-			});
-			proc.stderr.on("data", (chunk) => {
-				stderr += chunk.toString();
-			});
-			proc.on("error", (err) => {
-				clearTimeout(watchdog);
-				settle({
-					ok: false,
-					output: stdout,
-					stderr: err instanceof Error ? err.message : String(err),
-					exitCode: 1,
-				});
-			});
-			proc.on("close", (code) => {
-				clearTimeout(watchdog);
-				settle({
-					ok: (code ?? 1) === 0,
-					output: stdout.trim(),
-					stderr: stderr.trim(),
-					exitCode: code ?? 1,
-				});
-			});
-		});
-	} finally {
-		rmSync(tmpDir, { recursive: true, force: true });
-	}
+	const result = await runSubagent(
+		{
+			systemPrompt: appendedPrompt,
+			model: options.model ?? agent.model,
+			tools: options.tools ?? agent.tools,
+			cwd: options.cwd ?? process.cwd(),
+			name: agent.name,
+		},
+		task,
+		{ timeoutMs: DEFAULT_AGENT_TIMEOUT_MS },
+	);
+	return {
+		ok: result.ok,
+		output: result.output,
+		stderr:
+			result.exitCode === 124
+				? `${agent.name} timed out after ${DEFAULT_AGENT_TIMEOUT_MS}ms (configure SF_TRIAGE_AGENT_TIMEOUT_MS to extend)`
+				: (result.stderr ?? ""),
+		exitCode: result.exitCode,
+	};
 }

 async function emitTriageApplyJournal(
@ -1134,9 +1039,7 @@ export async function handleTriage(
 			basePath: string,
 			content: string,
 		) => string | null;
-		rankTriageModelsViaRouter: (
-			candidates?: string[],
-		) => Promise<string[]>;
+		rankTriageModelsViaRouter: (candidates?: string[]) => Promise<string[]>;
 	};
 	try {
 		drainModule = (await jiti.import(
--- a/src/resources/extensions/sf/tests/auto-dispatch-canonical-plan.test.mjs
+++ b/src/resources/extensions/sf/tests/auto-dispatch-canonical-plan.test.mjs
@ -212,4 +212,68 @@ describe("resolveDispatch canonical milestone plan", () => {
 			cleanup(base);
 		}
 	});
+
+	test("completing_milestone_when_validation_projection_advances_round_uses_file_and_revalidates", async () => {
+		const base = makeTempDir("sf-dispatch-validation-revalidate-");
+		try {
+			mkdirSync(join(base, ".sf"), { recursive: true });
+			openDatabase(join(base, ".sf", "sf.db"));
+			insertMilestone({
+				id: "M901",
+				title: "Validation revalidation",
+				status: "active",
+			});
+			const validationPath = ".sf/milestones/M901/M901-VALIDATION.md";
+			insertAssessment({
+				path: validationPath,
+				milestoneId: "M901",
+				scope: "milestone-validation",
+				status: "needs-attention",
+				fullContent: [
+					"---",
+					"verdict: needs-attention",
+					"remediation_round: 0",
+					"---",
+					"",
+					"# Old Validation",
+				].join("\n"),
+			});
+			writeMilestoneFile(
+				base,
+				"M901",
+				"M901-VALIDATION.md",
+				[
+					"---",
+					"verdict: needs-remediation",
+					"remediation_round: 1",
+					"---",
+					"",
+					"# Updated Validation",
+					"Validation attention remediated; ready for revalidation.",
+				].join("\n"),
+			);
+
+			const result = await resolveDispatch({
+				state: {
+					phase: "completing-milestone",
+				},
+				mid: "M901",
+				midTitle: "Validation revalidation",
+				basePath: base,
+				prefs: { phases: {} },
+				session: {},
+				pipelineVariant: "standard",
+			});
+
+			expect(result).toMatchObject({
+				action: "dispatch",
+				unitType: "validate-milestone",
+				unitId: "M901",
+				prompt: "validate milestone",
+			});
+		} finally {
+			closeDatabase();
+			cleanup(base);
+		}
+	});
 });
--- a/src/resources/extensions/sf/tests/uok-diagnostic-synthesis.test.mjs
+++ b/src/resources/extensions/sf/tests/uok-diagnostic-synthesis.test.mjs
@ -108,7 +108,7 @@ test("synthesizeUokDiagnostics_when_live_lock_exists_reports_running_and_childre
 	assert.deepEqual(diagnostics.currentUnit.childPids, [12345]);
 });

-test("synthesizeUokDiagnostics_when_ledger_open_without_lock_reports_orphaned_run", () => {
+test("synthesizeUokDiagnostics_when_parity_recovers_orphaned_ledger_does_not_report_open_run", () => {
 	const root = makeProject();
 	openDatabase(":memory:");
 	recordUokRunStart({
@ -124,9 +124,9 @@ test("synthesizeUokDiagnostics_when_ledger_open_without_lock_reports_orphaned_ru
 		processRows: [],
 	});

-	assert.equal(diagnostics.verdict, "degraded");
-	assert.equal(diagnostics.signals.ledger, "open-runs");
-	assert.ok(issueCodes(diagnostics).includes("open-ledger-without-live-lock"));
+	assert.equal(diagnostics.verdict, "clear");
+	assert.equal(diagnostics.signals.ledger, "consistent");
+	assert.ok(!issueCodes(diagnostics).includes("open-ledger-without-live-lock"));
 });

 test("synthesizeUokDiagnostics_when_parity_missing_exit_reports_current_warning", () => {
@ -242,6 +242,26 @@ test("writeUokDiagnostics_when_repair_enabled_clears_mismatched_stale_projection
 	assert.deepEqual(listUnitRuntimeRecords(root), []);
 });

+test("writeUokDiagnostics_when_repair_enabled_clears_stale_projection_without_expected_unit", () => {
+	const root = makeProject();
+	writeUnitRuntimeRecord(root, "research-slice", "M003/S01", NOW - 10_000, {
+		status: "running",
+		lastHeartbeatAt: NOW - 5_000,
+		lastProgressAt: NOW - 5_000,
+	});
+
+	const diagnostics = writeUokDiagnostics(root, {
+		nowMs: NOW,
+		processRows: [],
+		expectedNext: { action: "skip" },
+		repairStaleRuntimeProjection: true,
+	});
+
+	assert.equal(diagnostics.signals.runtimeProjection, "ok");
+	assert.ok(!issueCodes(diagnostics).includes("stale-runtime-projection"));
+	assert.deepEqual(listUnitRuntimeRecords(root), []);
+});
+
 test("writeUokDiagnostics_persists_report_for_status_widget_and_doctor", () => {
 	const root = makeProject();
 	openDatabase(":memory:");
--- a/src/resources/extensions/sf/uok/agent-runner.js
+++ b/src/resources/extensions/sf/uok/agent-runner.js
@ -3,17 +3,17 @@
 *
 * Purpose: bridge the gap between PersistentAgent (passive inbox/memory store)
 * and actual LLM execution. Reads pending messages, assembles prompts, dispatches
- * to LLM via `sf headless --print`, writes replies back to the MessageBus, and
+ * to LLM via in-process `runSubagent`, writes replies back to the MessageBus, and
 * manages context window.
 *
 * Integration: imported lazily by `AgentSwarm.run()` when `opts.enableLLM` is true,
 * and directly by `auto/loop.js` for sleeptime consolidation. Keeps PersistentAgent
- * itself free of child_process spawning logic.
+ * itself free of session-management logic.
 *
 * Consumer: AgentSwarm.run() orchestrator and autonomous dispatch paths.
 */

-import { spawn } from "node:child_process";
+import { runSubagent } from "@singularity-forge/coding-agent";

 const DEFAULT_MAX_CONTEXT_TURNS = 10;
 const DEFAULT_MAX_TURNS_PER_RUN = 5;
@ -54,71 +54,35 @@ function buildAgentPrompt(agent, messages) {
 }

 /**
- * Execute a prompt via sf headless --print mode.
+ * Execute a prompt via in-process runSubagent.
 * Returns the LLM response text.
 */
-function runHeadlessPrompt(
+async function runHeadlessPrompt(
 	basePath,
 	prompt,
 	timeoutMs = DEFAULT_RUNNER_TIMEOUT_MS,
 ) {
-	return new Promise((resolve, reject) => {
-		const sfBin = process.env.SF_BIN_PATH || process.argv[1];
-		if (!sfBin) {
-			reject(new Error("SF_BIN_PATH not set and process.argv[1] unavailable"));
-			return;
-		}
-
-		const args = [sfBin, "--print", prompt];
-		const child = spawn(process.execPath, args, {
+	const result = await runSubagent(
+		{
+			systemPrompt:
+				"You are a persistent agent in a multi-agent swarm. Process the incoming messages and produce a structured response.",
 			cwd: basePath,
-			stdio: ["ignore", "pipe", "pipe"],
-			env: { ...process.env, SF_AUTONOMOUS: "0" },
-		});
+			name: "swarm-agent",
+		},
+		prompt,
+		{ timeoutMs },
+	);

-		const chunks = [];
-		const errChunks = [];
-		let settled = false;
+	if (!result.ok) {
+		if (result.exitCode === 124) {
+			throw new Error(`Agent runner timed out after ${timeoutMs}ms`);
+		}
+		throw new Error(
+			`sf headless failed: ${result.stderr || result.output || "unknown error"}`,
+		);
+	}

-		const timer = setTimeout(() => {
-			if (!settled) {
-				settled = true;
-				child.kill("SIGTERM");
-				reject(new Error(`Agent runner timed out after ${timeoutMs}ms`));
-			}
-		}, timeoutMs);
-
-		child.stdout.on("data", (chunk) => chunks.push(chunk));
-		child.stderr.on("data", (chunk) => errChunks.push(chunk));
-
-		child.on("error", (err) => {
-			if (!settled) {
-				settled = true;
-				clearTimeout(timer);
-				reject(err);
-			}
-		});
-
-		child.on("exit", (code) => {
-			if (settled) return;
-			settled = true;
-			clearTimeout(timer);
-
-			const stdout = Buffer.concat(chunks).toString("utf-8").trim();
-			const stderr = Buffer.concat(errChunks).toString("utf-8").trim();
-
-			if (code !== 0) {
-				reject(
-					new Error(
-						`sf headless exited ${code}: ${stderr || stdout || "unknown error"}`,
-					),
-				);
-				return;
-			}
-
-			resolve(stdout);
-		});
-	});
+	return result.output;
 }

 /**
--- a/src/resources/extensions/sf/uok/auto-dispatch.js
+++ b/src/resources/extensions/sf/uok/auto-dispatch.js
@ -10,7 +10,7 @@
 */

 import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
-import { dirname, join } from "node:path";
+import { dirname, isAbsolute, join } from "node:path";
 import {
 	buildChallengePrompt,
 	buildCompleteMilestonePrompt,
@ -86,6 +86,7 @@ import {
 	getSliceTasks,
 	getValidationAttentionMarker,
 	incrementRuntimeCounter,
+	insertAssessment,
 	isDbAvailable,
 	markAllGatesOmitted,
 	setRuntimeCounter,
@ -129,6 +130,42 @@ function missingSliceStop(mid, phase) {
 async function readMilestoneValidationForDispatch(basePath, mid) {
 	if (isDbAvailable()) {
 		const assessment = getMilestoneValidationAssessment(mid);
+		const assessmentPath =
+			assessment?.path && isAbsolute(assessment.path)
+				? assessment.path
+				: assessment?.path
+					? join(basePath, assessment.path)
+					: resolveMilestoneFile(basePath, mid, "VALIDATION");
+		const dbContent = assessment?.full_content ?? "";
+		const dbRound = parseValidationRemediationRound(dbContent);
+		if (assessmentPath && existsSync(assessmentPath)) {
+			const fileContent = await loadFile(assessmentPath);
+			const fileVerdict = extractVerdict(fileContent);
+			const fileRound = parseValidationRemediationRound(fileContent);
+			if (
+				fileContent &&
+				fileVerdict &&
+				fileRound !== null &&
+				(dbRound === null || fileRound > dbRound)
+			) {
+				insertAssessment({
+					path:
+						assessment?.path ??
+						resolveMilestoneFile(basePath, mid, "VALIDATION"),
+					milestoneId: mid,
+					scope: "milestone-validation",
+					status: fileVerdict,
+					fullContent: fileContent,
+				});
+				return {
+					verdict: fileVerdict,
+					content: fileContent,
+					path:
+						assessment?.path ??
+						resolveMilestoneFile(basePath, mid, "VALIDATION"),
+				};
+			}
+		}
 		const verdict =
 			typeof assessment?.status === "string" && assessment.status.trim()
 				? assessment.status.trim().toLowerCase()
@ -1576,6 +1613,18 @@ export const DISPATCH_RULES = [
 				if (validation.verdict) {
 					const verdict = validation.verdict;
 					if (verdict && verdict !== "pass") {
+						if (verdict === "needs-remediation") {
+							return {
+								action: "dispatch",
+								unitType: "validate-milestone",
+								unitId: mid,
+								prompt: await buildValidateMilestonePrompt(
+									mid,
+									midTitle,
+									basePath,
+								),
+							};
+						}
 						if (verdict === "needs-attention") {
 							const attentionPlan =
 								extractValidationAttentionPlan(validationContent) ??
--- a/src/resources/extensions/sf/uok/diagnostic-synthesis.js
+++ b/src/resources/extensions/sf/uok/diagnostic-synthesis.js
@ -170,12 +170,12 @@ export function synthesizeUokDiagnostics(basePath, options = {}) {
 	if (
 		options.repairStaleRuntimeProjection &&
 		!lockAlive &&
-		expectedUnit &&
 		activeRuntimeUnits.length > 0
 	) {
 		const mismatchedRecords = records.filter((record) => {
 			const unit = classifyRuntimeRecord(record, false, nowMs, staleMs);
 			if (!unit.projectionActive) return false;
+			if (!expectedUnit) return true;
 			return (
 				unit.unitType !== expectedUnit.unitType ||
 				unit.unitId !== expectedUnit.unitId
@ -203,10 +203,7 @@ export function synthesizeUokDiagnostics(basePath, options = {}) {
 	const signals = {
 		lock: lock ? (lockAlive ? "active" : "stale") : "missing",
 		parity: parityHealth?.status ?? "unknown",
-		ledger:
-			openRuns.length === 0 && preParityOpenRuns.length === 0
-				? "consistent"
-				: "open-runs",
+		ledger: openRuns.length === 0 ? "consistent" : "open-runs",
 		runtimeProjection: "ok",
 		wrapper: "unknown",
 	};
@ -229,16 +226,14 @@ export function synthesizeUokDiagnostics(basePath, options = {}) {
 		});
 		recommendations.push("Reconcile UOK parity before mutating git state.");
 	}
-	const orphanedOpenRuns = openRuns.length > 0 ? openRuns : preParityOpenRuns;
-	if (orphanedOpenRuns.length > 0 && !lockAlive) {
+	if (openRuns.length > 0 && !lockAlive) {
 		issues.push({
 			code: "open-ledger-without-live-lock",
 			severity: "error",
-			message: `UOK ledger has ${orphanedOpenRuns.length} started run(s) without a live auto.lock owner.`,
+			message: `UOK ledger has ${openRuns.length} started run(s) without a live auto.lock owner.`,
 			evidence: {
-				runIds: orphanedOpenRuns.map((run) => run.runId),
-				autoRecoveredByParity:
-					openRuns.length === 0 && preParityOpenRuns.length > 0,
+				runIds: openRuns.map((run) => run.runId),
+				recoveredBeforeDiagnostics: preParityOpenRuns.length > openRuns.length,
 			},
 		});
 		recommendations.push(
--- a/src/tests/integration/web-command-parity-contract.test.ts
+++ b/src/tests/integration/web-command-parity-contract.test.ts
@ -41,7 +41,7 @@ const EXPECTED_BUILTIN_OUTCOMES = new Map<string, "rpc" | "surface" | "reject">(
 		["thinking", "surface"],
 		["edit-mode", "reject"],
 		["terminal", "reject"],
-		["stop", "prompt"],
+		["stop", "reject"],
 		["exit", "reject"],
 		["quit", "reject"],
 	],
@ -204,7 +204,9 @@ test("registered SF command roots expose direct browser outcomes or prompt passt
 		"SF extension roots are registered directly; legacy sf prefix is not a browser root",
 	);

-	for (const root of registeredRoots.filter((r) => r !== "exit")) {
+	for (const root of registeredRoots.filter(
+		(r) => r !== "exit" && r !== "stop",
+	)) {
 		const outcome = dispatchBrowserSlashCommand(`/${root}`);
 		const expected = EXPECTED_SF_OUTCOMES.get(root);
 		if (expected) {