fix: harden sf server control loop

2026-05-17 21:13:12 +02:00 · 2026-05-17 21:13:12 +02:00 · acd907fec2
commit acd907fec2
parent 70d89eebec
33 changed files with 1602 additions and 192 deletions
--- a/.sf/preferences.yaml
+++ b/.sf/preferences.yaml
@ -1,8 +1,6 @@
---
 version: 1
 experimental:
  smoke_gate: false
---
 # SF Preferences

-See `~/.sf/agent/extensions/sf/docs/preferences-reference.md` for full documentation.
+# See `~/.sf/agent/extensions/sf/docs/preferences-reference.md` for full documentation.
--- a/AGENTS.md
+++ b/AGENTS.md
@ -98,6 +98,27 @@ npm run release:changelog
 npm run release:bump
 ```

+## Running SF Locally
+
+The server surface is the default local dogfooding surface for web/RPC/autonomous
+control. The TUI still exists, but do not use it as the default way to run or
+verify autonomous mode.
+
+```bash
+# Source/dev server
+npm run sf:server -- --port 4000 --host 127.0.0.1
+
+# Built server after npm run build:core or npm run build
+npm run sf:server:dist -- --port 4000 --host 127.0.0.1
+```
+
+Bind only trusted interfaces. For this workstation, localhost plus Tailscale is
+acceptable; public `0.0.0.0` is not the default. If a server is already running,
+use `sf headless ...` as the machine/control surface instead of starting a
+second writer. Server-forwarded feedback writes are queued and drained by the
+server before autonomous dispatch, so CLI control does not block behind a busy
+unit.
+
 ## Coding Style & Naming Conventions

 - **Language**: TypeScript with `"strict": true` enabled in all packages
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -98,3 +98,34 @@ When adding a new `{{variable}}` to a prompt template in `prompts/`, you must:
 `loadPrompt` throws at runtime if any `{{var}}` in the template has no
 corresponding key in the vars object — this is intentional to catch
 template/code drift early.
+
+## Running the SF server in this repo
+
+Use the server surface for dogfooding and browser/RPC control. Do not start the
+TUI as the default way to exercise autonomous mode.
+
+```bash
+# source/dev server, with resource redirect and restart support
+npm run sf:server -- --port 4000 --host 127.0.0.1
+
+# built server, after npm run build:core or npm run build
+npm run sf:server:dist -- --port 4000 --host 127.0.0.1
+```
+
+If the server is already running, prefer `sf headless ...` control commands
+rather than starting a second writer. Feedback add/resolve commands are
+forwarded to the active server and queued there so CLI control does not hang
+behind an autonomous unit.
+
+For remote local-network access, bind an additional trusted interface such as a
+Tailscale address. Do not bind `0.0.0.0` for the dev server unless an explicit
+fronting proxy/firewall decision is in place.
+
+Before assuming a source edit is live, rebuild the relevant output:
+
+```bash
+npm run build:core
+```
+
+Then restart the server. Stale `dist/` or stale `~/.sf/agent/extensions/sf/`
+copies can make fixed source look broken.
--- a/packages/coding-agent/src/modes/rpc/rpc-client.ts
+++ b/packages/coding-agent/src/modes/rpc/rpc-client.ts
@ -409,12 +409,19 @@ export class RpcClient {
 		subcommand: "add" | "resolve",
 		args: string[],
 		json = false,
-	): Promise<{ exitCode: number; stdout: string; stderr: string }> {
+		options: { queued?: boolean } = {},
+	): Promise<{
+		exitCode: number | null;
+		stdout: string;
+		stderr: string;
+		queued?: boolean;
+	}> {
 		const response = await this.send({
 			type: "sf_feedback",
 			subcommand,
 			args,
 			json,
+			queued: options.queued,
 		});
 		return this.getData(response);
 	}
--- a/packages/coding-agent/src/modes/rpc/rpc-mode.ts
+++ b/packages/coding-agent/src/modes/rpc/rpc-mode.ts
@ -12,7 +12,16 @@
 */

 import * as crypto from "node:crypto";
-import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
+import {
+	appendFileSync,
+	existsSync,
+	mkdirSync,
+	readdirSync,
+	readFileSync,
+	renameSync,
+	statSync,
+	unlinkSync,
+} from "node:fs";
 import type { WriteStream } from "node:tty";
 import { pathToFileURL } from "node:url";
 import { dirname, join, resolve } from "node:path";
@ -42,6 +51,142 @@ const RUNTIME_HEARTBEAT_INTERVAL_MS = Number(
 	process.env.SF_RUNTIME_HEARTBEAT_INTERVAL_MS ?? 10_000,
 );

+const SF_FEEDBACK_QUEUE_FILE = "sf-feedback-queue.jsonl";
+const SF_FEEDBACK_FAILED_QUEUE_FILE = "sf-feedback-queue-failed.jsonl";
+
+function queueSfFeedbackCommand(
+	cwd: string,
+	command: Extract<RpcCommand, { type: "sf_feedback" }>,
+): string {
+	const dir = join(cwd, ".sf", "runtime");
+	mkdirSync(dir, { recursive: true });
+	const path = join(dir, SF_FEEDBACK_QUEUE_FILE);
+	appendFileSync(
+		path,
+		`${JSON.stringify({
+			schemaVersion: 1,
+			queuedAt: new Date().toISOString(),
+			id: command.id,
+			subcommand: command.subcommand,
+			args: command.args,
+			json: command.json === true,
+			source: "rpc",
+		})}\n`,
+		"utf-8",
+	);
+	return path;
+}
+
+type QueuedSfFeedbackCommand = {
+	schemaVersion: 1;
+	queuedAt: string;
+	id?: string;
+	subcommand: "add" | "list" | "resolve";
+	args: string[];
+	json: boolean;
+	source: "rpc";
+};
+
+function parseQueuedSfFeedbackLine(
+	line: string,
+): QueuedSfFeedbackCommand | null {
+	try {
+		const row = JSON.parse(line) as Partial<QueuedSfFeedbackCommand>;
+		if (
+			row.schemaVersion !== 1 ||
+			(row.subcommand !== "add" &&
+				row.subcommand !== "list" &&
+				row.subcommand !== "resolve") ||
+			!Array.isArray(row.args)
+		) {
+			return null;
+		}
+		return {
+			schemaVersion: 1,
+			queuedAt:
+				typeof row.queuedAt === "string"
+					? row.queuedAt
+					: new Date().toISOString(),
+			id: typeof row.id === "string" ? row.id : undefined,
+			subcommand: row.subcommand,
+			args: row.args.map((arg) => String(arg)),
+			json: row.json === true,
+			source: "rpc",
+		};
+	} catch {
+		return null;
+	}
+}
+
+/**
+ * Apply queued sf_feedback commands before a daemon-owned autonomous run starts.
+ *
+ * Purpose: keep CLI/RPC control commands non-blocking while preserving a single
+ * server-owned writer for self-feedback mutations.
+ *
+ * Consumer: start_autonomous RPC command in the SF server session.
+ */
+async function drainQueuedSfFeedbackCommands(cwd: string): Promise<void> {
+	const runtimeDir = join(cwd, ".sf", "runtime");
+	const queuePath = join(runtimeDir, SF_FEEDBACK_QUEUE_FILE);
+	if (!existsSync(queuePath)) return;
+
+	const drainingPath = join(
+		runtimeDir,
+		`${SF_FEEDBACK_QUEUE_FILE}.${process.pid}.draining`,
+	);
+	try {
+		renameSync(queuePath, drainingPath);
+	} catch {
+		return;
+	}
+
+	const lines = readFileSync(drainingPath, "utf-8")
+		.split("\n")
+		.map((line) => line.trim())
+		.filter(Boolean);
+	const queued = lines
+		.map(parseQueuedSfFeedbackLine)
+		.filter((row): row is QueuedSfFeedbackCommand => row !== null);
+	if (queued.length === 0) {
+		unlinkSync(drainingPath);
+		return;
+	}
+
+	const { handleFeedback } = await loadHeadlessFeedbackHandler();
+	const failed: QueuedSfFeedbackCommand[] = [];
+	for (const command of queued) {
+		try {
+			const captured = await captureProcessWrites(() =>
+				handleFeedback(cwd, {
+					subcommand: command.subcommand,
+					args: command.args,
+					json: command.json,
+				}),
+			);
+			if (captured.result.exitCode !== 0) failed.push(command);
+		} catch {
+			failed.push(command);
+		}
+	}
+
+	if (failed.length > 0) {
+		appendFileSync(
+			join(runtimeDir, SF_FEEDBACK_FAILED_QUEUE_FILE),
+			failed.map((row) => JSON.stringify(row)).join("\n") + "\n",
+			"utf-8",
+		);
+	}
+	unlinkSync(drainingPath);
+}
+
+function scheduleQueuedSfFeedbackDrain(cwd: string): void {
+	const timer = setTimeout(() => {
+		void drainQueuedSfFeedbackCommands(cwd);
+	}, 0);
+	timer.unref?.();
+}
+
 async function captureProcessWrites<T>(
 	run: () => Promise<T>,
 ): Promise<{ result: T; stdout: string; stderr: string }> {
@ -853,6 +998,7 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 					const previousHeadless = process.env.SF_HEADLESS;
 					process.env.SF_HEADLESS = "1";
 					try {
+						await drainQueuedSfFeedbackCommands(process.cwd());
 						await session.prompt("/autonomous", {
 							source: "rpc",
 						});
@ -882,6 +1028,16 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 			}

 			case "sf_feedback": {
+				if (command.queued === true) {
+					const queuePath = queueSfFeedbackCommand(process.cwd(), command);
+					scheduleQueuedSfFeedbackDrain(process.cwd());
+					return success(id, "sf_feedback", {
+						exitCode: null,
+						stdout: JSON.stringify({ ok: true, queued: true, queuePath }),
+						stderr: "",
+						queued: true,
+					});
+				}
 				const { handleFeedback } = await loadHeadlessFeedbackHandler();
 				const captured = await captureProcessWrites(() =>
 					handleFeedback(process.cwd(), {
--- a/packages/coding-agent/src/modes/rpc/rpc-types.ts
+++ b/packages/coding-agent/src/modes/rpc/rpc-types.ts
@ -47,6 +47,7 @@ export type RpcCommand =
 			subcommand: "add" | "resolve";
 			args: string[];
 			json?: boolean;
+			queued?: boolean;
 	  }

 	// State
@ -185,7 +186,12 @@ export type RpcResponse =
 			type: "response";
 			command: "sf_feedback";
 			success: true;
-			data: { exitCode: number; stdout: string; stderr: string };
+			data: {
+				exitCode: number | null;
+				stdout: string;
+				stderr: string;
+				queued?: boolean;
+			};
 	  }
 	| {
 			id?: string;
--- a/packages/rpc-client/src/rpc-client.ts
+++ b/packages/rpc-client/src/rpc-client.ts
@ -482,12 +482,19 @@ export class RpcClient {
 		subcommand: "add" | "resolve",
 		args: string[],
 		json = false,
-	): Promise<{ exitCode: number; stdout: string; stderr: string }> {
+		options: { queued?: boolean } = {},
+	): Promise<{
+		exitCode: number | null;
+		stdout: string;
+		stderr: string;
+		queued?: boolean;
+	}> {
 		const response = await this.send({
 			type: "sf_feedback",
 			subcommand,
 			args,
 			json,
+			queued: options.queued,
 		});
 		return this.getData(response);
 	}
--- a/packages/rpc-client/src/rpc-types.ts
+++ b/packages/rpc-client/src/rpc-types.ts
@ -113,6 +113,7 @@ export type RpcCommand =
 			subcommand: "add" | "resolve";
 			args: string[];
 			json?: boolean;
+			queued?: boolean;
 	  }

 	// State
@ -251,7 +252,12 @@ export type RpcResponse =
 			type: "response";
 			command: "sf_feedback";
 			success: true;
-			data: { exitCode: number; stdout: string; stderr: string };
+			data: {
+				exitCode: number | null;
+				stdout: string;
+				stderr: string;
+				queued?: boolean;
+			};
 	  }
 	| {
 			id?: string;
--- a/src/cli-web-branch.ts
+++ b/src/cli-web-branch.ts
@ -227,7 +227,7 @@ export type RunWebCliBranchResult =
 	| {
 			handled: true;
 			exitCode: number;
-			action: "start";
+			action: "start" | "reload";
 			status: WebModeLaunchStatus;
 			launchInputs: {
 				cwd: string;
@ -270,8 +270,8 @@ export async function runWebCliBranch(
 		};
 	}

-	// `sf server [start] [path]` starts the full operator server for one repo.
-	// Matches: `sf server`, `sf server start`, `sf server start <path>`, `sf server <path>`
+	// `sf server [start|reload] [path]` starts the full operator server for one repo.
+	// Matches: `sf server`, `sf server start`, `sf server reload`, `sf server <path>`
 	const isWebSubcommand =
 		flags.messages[0] === "server" && flags.messages[1] !== "stop";
 	if (!isWebSubcommand) {
@ -286,7 +286,7 @@ export async function runWebCliBranch(
 	//   sf server <path>          → messages[1] (when not "start")
 	let webPath = flags.webPath;
 	if (!webPath && isWebSubcommand) {
-		if (flags.messages[1] === "start") {
+		if (flags.messages[1] === "start" || flags.messages[1] === "reload") {
 			webPath = flags.messages[2];
 		} else if (flags.messages[1]) {
 			webPath = flags.messages[1];
@ -346,6 +346,7 @@ export async function runWebCliBranch(
 		agentDir,
 		host: flags.webHost,
 		port: flags.webPort,
+		...(flags.messages[1] === "reload" ? { reload: true } : {}),
 		allowedOrigins: flags.webAllowedOrigins,
 	});

@ -356,7 +357,7 @@ export async function runWebCliBranch(
 	return {
 		handled: true,
 		exitCode: status.ok ? 0 : 1,
-		action: "start",
+		action: flags.messages[1] === "reload" ? "reload" : "start",
 		status,
 		launchInputs: {
 			cwd: currentCwd,
--- a/src/headless-server-forward.ts
+++ b/src/headless-server-forward.ts
@ -12,9 +12,10 @@ import { resolve } from "node:path";
 import { readInstanceRegistry, type WebInstanceEntry } from "./web-mode.js";

 export interface ForwardedHeadlessResult {
-	exitCode: number;
+	exitCode: number | null;
 	stdout: string;
 	stderr: string;
+	queued?: boolean;
 }

 type SfFeedbackResponse =
@ -109,6 +110,7 @@ export async function forwardFeedbackToActiveServer(
 			subcommand: options.subcommand,
 			args: options.args,
 			json: options.json,
+			queued: true,
 		},
 	);
 	if (response.statusCode === 404) return null;
--- a/src/headless-triage.ts
+++ b/src/headless-triage.ts
@ -67,6 +67,7 @@ export interface HandleTriageOptions {
 	max?: number;
 	run?: boolean;
 	apply?: boolean;
+	urgentOnly?: boolean;
 	model?: string;
 	agentRunner?: AgentRunner;
 }
@ -1166,6 +1167,13 @@ export async function handleTriage(
 		return { exitCode: 1 };
 	}

+	if (options.urgentOnly) {
+		candidates = candidates.filter(
+			(candidate) =>
+				candidate.severity === "high" || candidate.severity === "critical",
+		);
+	}
+
 	if (typeof options.max === "number" && options.max > 0) {
 		candidates = candidates.slice(0, options.max);
 	}
--- a/src/headless.ts
+++ b/src/headless.ts
@ -105,6 +105,13 @@ import {

 const HEADLESS_HEARTBEAT_INTERVAL_MS = 60_000;

+type SelfFeedbackSeverity = "low" | "medium" | "high" | "critical" | string;
+
+interface SelfFeedbackRowForTriage {
+	resolvedAt?: string | null;
+	severity?: SelfFeedbackSeverity;
+}
+
 interface HeadlessTimeoutSolverEvalRecord {
 	runId: string;
 	reportPath: string;
@ -577,6 +584,31 @@ export async function runHeadless(options: HeadlessOptions): Promise<void> {
 	}
 }

+/**
+ * Count unresolved high/critical self-feedback rows for autonomous pre-triage.
+ *
+ * Purpose: let urgent operator/detector findings bypass the normal triage
+ * cadence without making the TypeScript headless surface depend on JS
+ * extension declarations.
+ *
+ * Consumer: runHeadlessOnce before autonomous dispatch.
+ */
+async function countUrgentSelfFeedbackRows(basePath: string): Promise<number> {
+	try {
+		const modulePath = "./resources/extensions/sf/self-feedback.js";
+		const mod = (await import(modulePath)) as {
+			readAllSelfFeedback?: (basePath: string) => SelfFeedbackRowForTriage[];
+		};
+		return (mod.readAllSelfFeedback?.(basePath) ?? []).filter(
+			(entry) =>
+				!entry.resolvedAt &&
+				(entry.severity === "high" || entry.severity === "critical"),
+		).length;
+	} catch {
+		return 0;
+	}
+}
+
 async function runHeadlessOnce(
 	options: HeadlessOptions,
 	restartCount: number,
@ -660,12 +692,19 @@ async function runHeadlessOnce(
 				"last-triage-at",
 			);
 			let shouldRunTriage = true;
+			const urgentTriageCount = await countUrgentSelfFeedbackRows(
+				process.cwd(),
+			);
 			try {
 				if (existsSync(triageMarkerPath)) {
 					const last = Date.parse(
 						readFileSync(triageMarkerPath, "utf8").trim(),
 					);
-					if (Number.isFinite(last) && Date.now() - last < triageIntervalMs) {
+					if (
+						urgentTriageCount === 0 &&
+						Number.isFinite(last) &&
+						Date.now() - last < triageIntervalMs
+					) {
 						shouldRunTriage = false;
 						if (!options.json) {
 							process.stderr.write(
@ -687,13 +726,16 @@ async function runHeadlessOnce(
 					const { handleTriage } = await import("./headless-triage.js");
 					if (!options.json) {
 						process.stderr.write(
-							`[headless] autonomous: draining self-feedback triage queue first (max=${triageMaxBatch})...\n`,
+							urgentTriageCount > 0
+								? `[headless] autonomous: draining ${urgentTriageCount} high/critical self-feedback entr${urgentTriageCount === 1 ? "y" : "ies"} before dispatch (max=${triageMaxBatch})...\n`
+								: `[headless] autonomous: draining self-feedback triage queue first (max=${triageMaxBatch})...\n`,
 						);
 					}
 					await handleTriage(process.cwd(), {
 						apply: true,
 						json: !!options.json,
 						max: triageMaxBatch,
+						urgentOnly: urgentTriageCount > 0,
 					});
 					try {
 						const runtimeDir = join(process.cwd(), ".sf", "runtime");
@ -971,7 +1013,7 @@ async function runHeadlessOnce(
 				if (forwarded.stdout) process.stdout.write(forwarded.stdout);
 				if (forwarded.stderr) process.stderr.write(forwarded.stderr);
 				return {
-					exitCode: forwarded.exitCode,
+					exitCode: forwarded.exitCode ?? EXIT_SUCCESS,
 					interrupted: false,
 					timedOut: false,
 				};
--- a/src/resources/extensions/sf/auto-timers.js
+++ b/src/resources/extensions/sf/auto-timers.js
@ -6,6 +6,7 @@
 * via startUnitSupervision() and torn down by the caller via clearUnitTimeout().
 */
 import { saveActivityLog } from "./activity-log.js";
+import { resolveAgentEnd } from "./auto/resolve.js";
 import { resolveAgentEndCancelled } from "./auto/resolve.js";
 import { detectWorkingTreeActivity } from "./auto-supervisor.js";
 import { blockModel } from "./blocked-models.js";
@ -40,6 +41,124 @@ import {
 	writeUnitRuntimeRecord,
 } from "./uok/unit-runtime.js";
 import { logError, logWarning } from "./workflow-logger.js";
+
+/**
+ * Clear active supervision handles for the current unit attempt.
+ *
+ * Purpose: stop one runaway-guard terminal decision from being emitted repeatedly
+ * while the autonomous loop is being unblocked.
+ *
+ * Consumer: finalizeRunawayGuardFailure() when zero-progress or silent-worker
+ * detection has already converted the current unit attempt into a failed record.
+ */
+function clearSupervisionHandles(s) {
+	if (s.unitTimeoutHandle) {
+		clearTimeout(s.unitTimeoutHandle);
+		s.unitTimeoutHandle = null;
+	}
+	if (s.wrapupWarningHandle) {
+		clearTimeout(s.wrapupWarningHandle);
+		s.wrapupWarningHandle = null;
+	}
+	if (s.idleWatchdogHandle) {
+		clearInterval(s.idleWatchdogHandle);
+		s.idleWatchdogHandle = null;
+	}
+	if (s.continueHereHandle) {
+		clearInterval(s.continueHereHandle);
+		s.continueHereHandle = null;
+	}
+}
+
+/**
+ * Finish a runaway-guard failure as one terminal unit-attempt event.
+ *
+ * Purpose: convert zero-progress and silent-worker supervision failures into a
+ * retryable failed runtime record, close the worker lineage, stop supervision
+ * timers, and unblock the unit promise so the autonomous loop can select the
+ * next eligible model instead of repeating the same warning.
+ *
+ * Consumer: startUnitSupervision() idle watchdog fail branch.
+ */
+export async function finalizeRunawayGuardFailure(sctx, decision, helpers = {}) {
+	const { s, ctx, unitType, unitId, buildSnapshotOpts } = sctx;
+	const currentUnit = s.currentUnit;
+	if (!currentUnit) return;
+	const closeout = helpers.closeoutUnit ?? closeoutUnit;
+	const writeRuntime = helpers.writeUnitRuntimeRecord ?? writeUnitRuntimeRecord;
+	const block = helpers.blockModel ?? blockModel;
+	const recordFeedback = helpers.recordSelfFeedback ?? recordSelfFeedback;
+	const notify = helpers.notify ?? ((message, level) => ctx.ui.notify(message, level));
+	const resolveUnit =
+		helpers.resolveAgentEnd ??
+		((event) => {
+			resolveAgentEnd(event);
+		});
+	const failedModel = s.currentUnitModel;
+	if (
+		decision.reason === "zero-progress" &&
+		failedModel?.provider &&
+		failedModel?.id
+	) {
+		block(
+			s.basePath,
+			failedModel.provider,
+			failedModel.id,
+			`zero-progress on ${unitType} ${unitId}`,
+			{ expiresAt: Date.now() + 60 * 60 * 1000 },
+		);
+		notify(
+			`Temporarily blocked ${failedModel.provider}/${failedModel.id} after zero-progress on ${unitType} ${unitId}; retry will choose a fallback.`,
+			"warning",
+		);
+	}
+	await closeout(
+		ctx,
+		s.basePath,
+		currentUnit.type,
+		currentUnit.id,
+		currentUnit.startedAt,
+		buildSnapshotOpts(),
+	);
+	writeRuntime(s.basePath, unitType, unitId, currentUnit.startedAt, {
+		phase: "failed-silent-worker",
+		status: "failed",
+		lastProgressAt: Date.now(),
+		lastProgressKind: "runaway-guard-fail",
+		runawayGuardFail: decision.metadata,
+		lineageEvent: {
+			status: "failed",
+			workerSessionId: ctx.sessionManager?.getSessionId?.(),
+			note: `${decision.reason ?? "runaway-guard"} failed current attempt`,
+		},
+	});
+	const unitParts = unitId.split("/");
+	recordFeedback(
+		{
+			kind: "runaway-loop:silent-worker-failure",
+			severity: "high",
+			summary: decision.reason,
+			evidence: JSON.stringify(decision.metadata, null, 2),
+			suggestedFix:
+				"LLM session never produced an assistant message — check session-manager.ts:1086-1096 (silent _persist skip) and verify the model/provider is responding. The dispatcher will attempt retry within maxRetries; if persistent, transitions to blocked.",
+			occurredIn: {
+				unitType,
+				milestone: unitParts[0],
+				slice: unitParts[1],
+				task: unitParts.slice(2).join("/") || undefined,
+			},
+			source: "detector",
+		},
+		s.basePath,
+	);
+	clearSupervisionHandles(s);
+	notify(decision.reason, "error");
+	resolveUnit({
+		messages: [],
+		_synthetic: "runaway-guard-fail",
+		reason: decision.reason,
+	});
+}
 /**
 * Set up all four supervision timers for the current unit:
 * 1. Soft timeout warning (wrapup)
@ -271,65 +390,7 @@ export function startUnitSupervision(sctx) {
 				}
 				if (decision.action === "fail") {
 					if (getInFlightToolCount() > 0) return;
-					const failedModel = s.currentUnitModel;
-					if (
-						decision.reason === "zero-progress" &&
-						failedModel?.provider &&
-						failedModel?.id
-					) {
-						blockModel(
-							s.basePath,
-							failedModel.provider,
-							failedModel.id,
-							`zero-progress on ${unitType} ${unitId}`,
-							{ expiresAt: Date.now() + 60 * 60 * 1000 },
-						);
-						ctx.ui.notify(
-							`Temporarily blocked ${failedModel.provider}/${failedModel.id} after zero-progress on ${unitType} ${unitId}; retry will choose a fallback.`,
-							"warning",
-						);
-					}
-					await closeoutUnit(
-						ctx,
-						s.basePath,
-						s.currentUnit.type,
-						s.currentUnit.id,
-						s.currentUnit.startedAt,
-						buildSnapshotOpts(),
-					);
-					writeUnitRuntimeRecord(
-						s.basePath,
-						unitType,
-						unitId,
-						s.currentUnit.startedAt,
-						{
-							phase: "failed-silent-worker",
-							status: "failed",
-							lastProgressAt: Date.now(),
-							lastProgressKind: "runaway-guard-fail",
-							runawayGuardFail: decision.metadata,
-						},
-					);
-					const unitParts = unitId.split("/");
-					recordSelfFeedback(
-						{
-							kind: "runaway-loop:silent-worker-failure",
-							severity: "high",
-							summary: decision.reason,
-							evidence: JSON.stringify(decision.metadata, null, 2),
-							suggestedFix:
-								"LLM session never produced an assistant message — check session-manager.ts:1086-1096 (silent _persist skip) and verify the model/provider is responding. The dispatcher will attempt retry within maxRetries; if persistent, transitions to blocked.",
-							occurredIn: {
-								unitType,
-								milestone: unitParts[0],
-								slice: unitParts[1],
-								task: unitParts.slice(2).join("/") || undefined,
-							},
-							source: "detector",
-						},
-						s.basePath,
-					);
-					ctx.ui.notify(decision.reason, "error");
+					await finalizeRunawayGuardFailure(sctx, decision);
 					return;
 				}
 				if (decision.action === "pause") {
--- a/src/resources/extensions/sf/detectors/index.js
+++ b/src/resources/extensions/sf/detectors/index.js
@ -11,6 +11,7 @@ export { periodicDetectorSweepGate } from "./periodic-runner.js";
 export { productionPlateauGate } from "./production-plateau.js";
 export { repeatedFeedbackKindGate } from "./repeated-feedback-kind.js";
 export { sameUnitLoopGate } from "./same-unit-loop.js";
+export { serverDirectionDriftGate } from "./server-direction-drift.js";
 export { staleLockGate } from "./stale-lock.js";
 export { statusCompletionDriftGate } from "./status-completion-drift.js";
 export { zeroProgressGate } from "./zero-progress.js";
--- a/src/resources/extensions/sf/detectors/periodic-runner.js
+++ b/src/resources/extensions/sf/detectors/periodic-runner.js
@ -11,6 +11,7 @@ import { detectCrashLoop } from "./crash-loop-classifier.js";
 import { detectProductionPlateau } from "./production-plateau.js";
 import { detectRepeatedFeedbackKind } from "./repeated-feedback-kind.js";
 import { detectSameUnitLoop } from "./same-unit-loop.js";
+import { detectServerDirectionDrift } from "./server-direction-drift.js";
 import { detectStaleLock } from "./stale-lock.js";
 import { detectStatusCompletionDrift } from "./status-completion-drift.js";
 import { detectZeroProgress } from "./zero-progress.js";
@ -74,6 +75,10 @@ function defaultDetectors(ctx, options) {
 			name: "production-plateau",
 			run: () => detectProductionPlateau(ctx?.unitMetrics, ctx, options),
 		},
+		{
+			name: "server-direction-drift",
+			run: () => detectServerDirectionDrift(ctx, options),
+		},
 	];
 }

--- a/src/resources/extensions/sf/detectors/server-direction-drift.js
+++ b/src/resources/extensions/sf/detectors/server-direction-drift.js
@ -0,0 +1,132 @@
+/**
+ * server-direction-drift.js — detect obsolete server architecture in live work.
+ *
+ * Purpose: stop SF from planning queued work against superseded server shapes
+ * after the product direction moves to one embedded `sf server` control plane.
+ *
+ * Consumer: Wiggums periodic detector sweep and UOK detector gate registry.
+ */
+
+const DEFAULT_DEPRECATED_PATTERNS = [
+	/\bsf serve\b/i,
+	/\bA2A\b/i,
+	/\bJSON-RPC API\b/i,
+	/\bper-repo systemd unit\b/i,
+	/\bper-repo web servers?\b/i,
+	/\bseparate standalone daemon brain\b/i,
+];
+
+const ACTIVE_STATUSES = new Set(["queued", "active", "planned", "pending"]);
+const CLOSED_STATUSES = new Set([
+	"cancelled",
+	"canceled",
+	"complete",
+	"completed",
+	"done",
+	"superseded",
+	"parked",
+]);
+
+/**
+ * Detect queued milestone/slice work that still targets a deprecated server path.
+ *
+ * Purpose: make stale roadmap/server-direction drift visible before autonomous
+ * planning spends turns on obsolete `sf serve`, A2A, or per-repo server work.
+ *
+ * Consumer: periodic-runner.js default detector list.
+ */
+export function detectServerDirectionDrift(ctx = {}, options = {}) {
+	const rows = [
+		...normalizeRows(ctx.milestones, "milestone"),
+		...normalizeRows(ctx.slices, "slice"),
+		...normalizeRows(ctx.requirements, "requirement"),
+	];
+	const patterns =
+		options.deprecatedServerPatterns ?? DEFAULT_DEPRECATED_PATTERNS;
+	const matches = [];
+
+	for (const row of rows) {
+		if (!isActiveRow(row)) continue;
+		const text = searchableText(row);
+		const pattern = patterns.find((candidate) => candidate.test(text));
+		if (!pattern) continue;
+		matches.push({
+			kind: row.kind,
+			id: row.id,
+			milestoneId: row.milestoneId ?? row.milestone_id ?? null,
+			status: row.status ?? null,
+			pattern: pattern.source,
+			title: row.title ?? "",
+		});
+	}
+
+	if (matches.length === 0) {
+		return { stuck: false, reason: "", signature: { checked: rows.length } };
+	}
+	return {
+		stuck: true,
+		reason: "server-direction-drift",
+		signature: {
+			matches,
+			expectedDirection:
+				"sf server is the single operator server; web/Next.js embeds daemon lifecycle",
+		},
+	};
+}
+
+/**
+ * Run server-direction drift as a UOK verification gate.
+ *
+ * Purpose: make superseded server architecture detectable through the common
+ * gate runner, not only through ad hoc roadmap review.
+ *
+ * Consumer: detector gate registry and periodicDetectorSweepGate.
+ */
+export const serverDirectionDriftGate = {
+	id: "server-direction-drift",
+	type: "verification",
+	async execute(ctx = {}) {
+		const result = detectServerDirectionDrift(ctx, ctx.options);
+		if (result.stuck) {
+			return {
+				outcome: "manual-attention",
+				failureClass: "verification",
+				rationale: result.reason,
+				findings: result.signature,
+			};
+		}
+		return {
+			outcome: "pass",
+			failureClass: null,
+			rationale: "no server-direction drift",
+		};
+	},
+};
+
+function normalizeRows(rows, kind) {
+	if (!Array.isArray(rows)) return [];
+	return rows.map((row) => ({ ...row, kind }));
+}
+
+function isActiveRow(row) {
+	const status = String(row.status ?? "").toLowerCase();
+	if (CLOSED_STATUSES.has(status)) return false;
+	return ACTIVE_STATUSES.has(status) || status === "";
+}
+
+function searchableText(row) {
+	return [
+		row.id,
+		row.title,
+		row.description,
+		row.why,
+		row.goal,
+		row.successCriteria,
+		row.success_criteria,
+		row.notes,
+		row.full_content,
+		row.vision,
+	]
+		.filter((value) => typeof value === "string")
+		.join("\n");
+}
--- a/src/resources/extensions/sf/experimental.js
+++ b/src/resources/extensions/sf/experimental.js
@ -17,12 +17,35 @@ import {
 	loadProjectSFPreferences,
 } from "./preferences.js";

-/** Extract the body section that follows a YAML frontmatter block. */
-function extractBodyAfterFrontmatter(content) {
-	const closingIdx = content.indexOf("\n---", content.indexOf("---"));
-	if (closingIdx === -1) return null;
-	const afterFrontmatter = content.slice(closingIdx + 4);
-	return afterFrontmatter.trim() ? afterFrontmatter : null;
+/** Return the preferences documentation comment block from a YAML file. */
+function extractPreferencesCommentBlock(content) {
+	const marker = "\n# SF Preferences";
+	const idx = content.indexOf(marker);
+	if (idx >= 0) return commentPreferencesBody(content.slice(idx));
+	if (content.startsWith("# SF Preferences")) return content;
+	return null;
+}
+
+/** Return a YAML-commented default preferences reference block. */
+function defaultPreferencesCommentBlock() {
+	return [
+		"",
+		"# SF Preferences",
+		"#",
+		"# See `~/.sf/agent/extensions/sf/docs/preferences-reference.md` for full documentation.",
+		"",
+	].join("\n");
+}
+
+/** Preserve the human reference body without making preferences.yaml multi-doc. */
+function commentPreferencesBody(body) {
+	return body
+		.split("\n")
+		.map((line) => {
+			if (line === "" || line.startsWith("#")) return line;
+			return `# ${line}`;
+		})
+		.join("\n");
 }

 /** All recognized experimental feature flags with descriptions. */
@ -81,14 +104,15 @@ export function setExperimentalFlag(name, value) {
 	prefs.experimental = { ...(prefs.experimental ?? {}), [name]: value };

 	const frontmatter = serializePreferencesToFrontmatter(prefs);
-	let body =
-		"\n# SF Preferences\n\nSee `~/.sf/agent/extensions/sf/docs/preferences-reference.md` for full documentation.\n";
+	let body = defaultPreferencesCommentBlock();
 	if (existsSync(path)) {
-		const preserved = extractBodyAfterFrontmatter(readFileSync(path, "utf-8"));
+		const preserved = extractPreferencesCommentBlock(
+			readFileSync(path, "utf-8"),
+		);
 		if (preserved) body = preserved;
 	}
 	mkdirSync(dirname(path), { recursive: true });
-	writeFileSync(path, `---\n${frontmatter}---${body}`, "utf-8");
+	writeFileSync(path, `${frontmatter}${body}`, "utf-8");
 }

 /**
--- a/src/resources/extensions/sf/preferences-loader.js
+++ b/src/resources/extensions/sf/preferences-loader.js
@ -194,7 +194,7 @@ export function _resetParseWarningFlag() {
 */
 export function parsePreferencesYaml(content) {
 	try {
-		const parsed = parseYaml(content);
+		const parsed = parseYaml(stripPreferencesYamlDocument(content));
 		if (typeof parsed !== "object" || parsed === null) return {};
 		return parsed;
 	} catch (e) {
@ -203,6 +203,22 @@ export function parsePreferencesYaml(content) {
 	}
 }

+/**
+ * Return only the machine-readable YAML document from preferences.yaml.
+ *
+ * Purpose: tolerate older files where a human reference body was appended as
+ * raw Markdown after `# SF Preferences` while keeping canonical writes pure
+ * YAML plus comments.
+ *
+ * Consumer: parsePreferencesYaml before handing content to the YAML parser.
+ */
+function stripPreferencesYamlDocument(content) {
+	const marker = "\n# SF Preferences";
+	const idx = content.indexOf(marker);
+	if (idx < 0) return content;
+	return content.slice(0, idx);
+}
+
 /**
 * Parse legacy frontmatter-style preference content.
 *
--- a/src/resources/extensions/sf/sf-db.js
+++ b/src/resources/extensions/sf/sf-db.js
@ -13,6 +13,7 @@ export * from "./sf-db/sf-db-memory.js";
 export * from "./sf-db/sf-db-milestones.js";
 export * from "./sf-db/sf-db-mode-state.js";
 export * from "./sf-db/sf-db-profile.js";
+export * from "./sf-db/roadmap-projection-sync.js";
 export * from "./sf-db/sf-db-self-feedback.js";
 export * from "./sf-db/sf-db-session-store.js";
 export * from "./sf-db/sf-db-slices.js";
--- a/src/resources/extensions/sf/sf-db/roadmap-projection-sync.js
+++ b/src/resources/extensions/sf/sf-db/roadmap-projection-sync.js
@ -0,0 +1,85 @@
+/**
+ * roadmap-projection-sync.js - schedule DB-backed roadmap projection refreshes.
+ *
+ * Purpose: keep M###-ROADMAP.md and M###-ROADMAP.json as generated views of
+ * canonical SQLite planning state after milestone or slice mutations.
+ *
+ * Consumer: sf-db milestone/slice write wrappers and projection-sync tests.
+ */
+import { logWarning } from "../workflow-logger.js";
+
+const pending = new Map();
+const inFlight = new Set();
+
+/**
+ * Queue a best-effort ROADMAP.md/json refresh for one milestone.
+ *
+ * Purpose: make roadmap files server-maintained projections instead of stale
+ * manually rendered artifacts while keeping DB writes synchronous and durable.
+ *
+ * Consumer: insert/update milestone and slice DB wrappers.
+ */
+export function scheduleRoadmapProjectionRefresh(
+	basePath = process.cwd(),
+	milestoneId,
+) {
+	if (!milestoneId || roadmapProjectionSyncDisabled()) return;
+	const key = `${basePath}\0${milestoneId}`;
+	if (pending.has(key) || inFlight.has(key)) return;
+	pending.set(key, { basePath, milestoneId });
+	const timer = setTimeout(() => {
+		void flushOneRoadmapProjection(key);
+	}, 0);
+	timer.unref?.();
+}
+
+/**
+ * Refresh one roadmap projection immediately.
+ *
+ * Purpose: provide an explicit, awaitable projection path for tests and repair
+ * tools while sharing the same renderer used by the asynchronous scheduler.
+ *
+ * Consumer: roadmap projection sync tests and future server repair jobs.
+ */
+export async function refreshRoadmapProjectionNow(basePath, milestoneId) {
+	const { renderRoadmapFromDb } = await import("../markdown-renderer.js");
+	return renderRoadmapFromDb(basePath, milestoneId);
+}
+
+/**
+ * Drain queued projection refreshes.
+ *
+ * Purpose: let tests prove DB writes schedule real roadmap projection updates
+ * without waiting on wall-clock timers.
+ *
+ * Consumer: roadmap-projection-sync.test.mjs.
+ */
+export async function flushRoadmapProjectionRefreshesForTests() {
+	while (pending.size > 0) {
+		const keys = [...pending.keys()];
+		await Promise.all(keys.map((key) => flushOneRoadmapProjection(key)));
+	}
+}
+
+function roadmapProjectionSyncDisabled() {
+	if (process.env.SF_ROADMAP_PROJECTION_SYNC === "0") return true;
+	if (process.env.SF_ROADMAP_PROJECTION_SYNC === "1") return false;
+	return process.env.VITEST === "true";
+}
+
+async function flushOneRoadmapProjection(key) {
+	const entry = pending.get(key);
+	if (!entry || inFlight.has(key)) return;
+	pending.delete(key);
+	inFlight.add(key);
+	try {
+		await refreshRoadmapProjectionNow(entry.basePath, entry.milestoneId);
+	} catch (err) {
+		logWarning("roadmap-projection-sync", "projection refresh failed", {
+			milestoneId: entry.milestoneId,
+			error: err instanceof Error ? err.message : String(err),
+		});
+	} finally {
+		inFlight.delete(key);
+	}
+}
--- a/src/resources/extensions/sf/sf-db/sf-db-milestones.js
+++ b/src/resources/extensions/sf/sf-db/sf-db-milestones.js
@ -11,6 +11,7 @@ import {
 	rowToMilestone,
 	transaction,
 } from "./sf-db-core.js";
+import { scheduleRoadmapProjectionRefresh } from "./roadmap-projection-sync.js";

 export function insertMilestone(m) {
 	const currentDb = _getAdapter();
@ -57,6 +58,7 @@ export function insertMilestone(m) {
 	if (hasPlanningPayload(m.planning)) {
 		insertMilestoneSpecIfAbsent(m.id, m.planning ?? {});
 	}
+	scheduleRoadmapProjectionRefresh(process.cwd(), m.id);
 }

 export function upsertMilestonePlanning(milestoneId, planning) {
@ -111,6 +113,7 @@ export function upsertMilestonePlanning(milestoneId, planning) {
 				? JSON.stringify(planning.productResearch)
 				: null,
 		});
+	scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
 }

 export function getAllMilestones() {
@ -146,6 +149,7 @@ export function updateMilestoneStatus(milestoneId, status, completedAt) {
 			":completed_at": completedAt ?? null,
 			":id": milestoneId,
 		});
+	scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
 }

 export function updateMilestoneQueueOrder(order) {
@ -159,6 +163,9 @@ export function updateMilestoneQueueOrder(order) {
 			stmt.run({ ":sequence": i + 1, ":id": order[i] });
 		}
 	});
+	for (const milestoneId of order) {
+		scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
+	}
 }

 export function getActiveMilestoneFromDb() {
@ -274,6 +281,9 @@ export function bulkInsertLegacyHierarchy(payload) {
 			);
 		}
 	});
+	for (const milestoneId of clearMilestoneIds) {
+		scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
+	}
 }

 export function clearEngineHierarchy() {
--- a/src/resources/extensions/sf/sf-db/sf-db-slices.js
+++ b/src/resources/extensions/sf/sf-db/sf-db-slices.js
@ -10,6 +10,7 @@ import {
 	safeParseJsonArray,
 	transaction,
 } from "./sf-db-core.js";
+import { scheduleRoadmapProjectionRefresh } from "./roadmap-projection-sync.js";

 export function insertSlice(s) {
 	const currentDb = _getAdapter();
@ -95,6 +96,7 @@ export function insertSlice(s) {
 			":raw_traces_vision_fragment": s.tracesVisionFragment ?? null,
 		});
 	insertSliceSpecIfAbsent(s.milestoneId, s.id, s.planning ?? {});
+	scheduleRoadmapProjectionRefresh(process.cwd(), s.milestoneId);
 }

 export function insertOrIgnoreSlice(args) {
@ -109,6 +111,7 @@ export function insertOrIgnoreSlice(args) {
 			":title": args.title,
 			":ts": args.createdAt,
 		});
+	scheduleRoadmapProjectionRefresh(process.cwd(), args.milestoneId);
 }

 export function clearSliceSketch(milestoneId, sliceId) {
@ -127,6 +130,7 @@ export function setSliceSketchFlag(milestoneId, sliceId, isSketch) {
 			":mid": milestoneId,
 			":sid": sliceId,
 		});
+	scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
 }

 export function autoHealSketchFlags(milestoneId, hasPlanFile) {
@ -178,6 +182,7 @@ export function upsertSlicePlanning(milestoneId, sliceId, planning) {
 			// ADR-0000 P2 (schema v69): vision trace fragment is part of planning.
 			":traces_vision_fragment": planning.tracesVisionFragment ?? null,
 		});
+	scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
 }

 // ADR-0000 P2 (schema v69): focused setter so callers that already have a
@ -195,6 +200,7 @@ export function updateSliceVisionTrace(milestoneId, sliceId, fragment) {
 			":mid": milestoneId,
 			":sid": sliceId,
 		});
+	scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
 }

 export function getSlice(milestoneId, sliceId) {
@ -219,6 +225,7 @@ export function updateSliceStatus(milestoneId, sliceId, status, completedAt) {
 			":milestone_id": milestoneId,
 			":id": sliceId,
 		});
+	scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
 }

 export function setSliceUatVerdict(milestoneId, sliceId, verdict) {
@ -229,6 +236,7 @@ export function setSliceUatVerdict(milestoneId, sliceId, verdict) {
 			`UPDATE slices SET uat_verdict = :verdict WHERE milestone_id = :mid AND id = :sid`,
 		)
 		.run({ ":mid": milestoneId, ":sid": sliceId, ":verdict": verdict });
+	scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
 }

 export function getSliceUatVerdict(milestoneId, sliceId) {
@ -312,6 +320,7 @@ export function setSliceSummaryMd(milestoneId, sliceId, summaryMd, uatMd) {
 			":summary_md": summaryMd,
 			":uat_md": uatMd,
 		});
+	scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
 }

 export function getMilestoneSlices(milestoneId) {
@ -369,6 +378,7 @@ export function syncSliceDependencies(milestoneId, sliceId, depends) {
 			)
 			.run({ ":mid": milestoneId, ":sid": sliceId, ":dep": dep });
 	}
+	scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
 }

 export function getDependentSlices(milestoneId, sliceId) {
@ -452,6 +462,7 @@ export function updateSliceFields(milestoneId, sliceId, fields) {
 			":depends": fields.depends ? JSON.stringify(fields.depends) : null,
 			":demo": fields.demo ?? null,
 		});
+	scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
 }

 export function setSliceReplanTriggeredAt(milestoneId, sliceId, ts) {
@ -462,6 +473,7 @@ export function setSliceReplanTriggeredAt(milestoneId, sliceId, ts) {
 			"UPDATE slices SET replan_triggered_at = :ts WHERE milestone_id = :mid AND id = :sid",
 		)
 		.run({ ":ts": ts, ":mid": milestoneId, ":sid": sliceId });
+	scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
 }

 export function deleteSlice(milestoneId, sliceId) {
@ -493,4 +505,5 @@ export function deleteSlice(milestoneId, sliceId) {
 			.prepare(`DELETE FROM slices WHERE milestone_id = :mid AND id = :sid`)
 			.run({ ":mid": milestoneId, ":sid": sliceId });
 	});
+	scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
 }
--- a/src/resources/extensions/sf/tests/detector-server-direction-drift.test.mjs
+++ b/src/resources/extensions/sf/tests/detector-server-direction-drift.test.mjs
@ -0,0 +1,82 @@
+/**
+ * detector-server-direction-drift.test.mjs — server direction drift contracts.
+ *
+ * Purpose: prove Wiggums catches queued work that revives superseded server
+ * architecture while ignoring cancelled historical slices.
+ */
+import assert from "node:assert/strict";
+import { test } from "vitest";
+import {
+	detectServerDirectionDrift,
+	serverDirectionDriftGate,
+} from "../detectors/server-direction-drift.js";
+import { runDetectorSweep } from "../detectors/periodic-runner.js";
+
+test("detectServerDirectionDrift_when_queued_slice_mentions_sf_serve_flags_drift", () => {
+	const result = detectServerDirectionDrift({
+		slices: [
+			{
+				milestone_id: "M053",
+				id: "S01",
+				status: "queued",
+				title: "`sf serve` daemon scaffold + JSON-RPC API",
+				goal: "Create a separate JSON-RPC API.",
+			},
+		],
+	});
+
+	assert.equal(result.stuck, true);
+	assert.equal(result.reason, "server-direction-drift");
+	assert.equal(result.signature.matches[0].id, "S01");
+});
+
+test("detectServerDirectionDrift_when_cancelled_slice_mentions_sf_serve_ignores_history", () => {
+	const result = detectServerDirectionDrift({
+		slices: [
+			{
+				milestone_id: "M053",
+				id: "S01",
+				status: "cancelled",
+				title: "`sf serve` daemon scaffold + JSON-RPC API",
+			},
+		],
+	});
+
+	assert.equal(result.stuck, false);
+});
+
+test("serverDirectionDriftGate_when_drift_exists_returns_manual_attention", async () => {
+	const result = await serverDirectionDriftGate.execute({
+		requirements: [
+			{
+				id: "R999",
+				status: "active",
+				description: "Add A2A as the primary server control plane.",
+			},
+		],
+	});
+
+	assert.equal(result.outcome, "manual-attention");
+	assert.equal(result.rationale, "server-direction-drift");
+});
+
+test("runDetectorSweep_includes_server_direction_drift_detector", async () => {
+	const result = await runDetectorSweep(
+		{
+			slices: [
+				{
+					id: "S99",
+					status: "queued",
+					title: "Per-repo systemd unit for another server",
+				},
+			],
+		},
+		{ throttleMs: 0 },
+	);
+
+	assert.ok(
+		result.detectorsFired.some(
+			(detector) => detector.name === "server-direction-drift",
+		),
+	);
+});
--- a/src/resources/extensions/sf/tests/model-router-agentic.test.mjs
+++ b/src/resources/extensions/sf/tests/model-router-agentic.test.mjs
@ -2,6 +2,7 @@ import { describe, expect, test } from "vitest";
 import {
 	BASE_REQUIREMENTS,
 	MODEL_CAPABILITY_PROFILES,
+	resolveModelForComplexity,
 	scoreEligibleModels,
 	scoreModel,
 } from "../model-router.js";
@ -16,6 +17,11 @@ describe("agentic capability axis (ADR-0079)", () => {
 		);
 	});

+	test("challenge base requirements weight adversarial agentic reasoning", () => {
+		expect(BASE_REQUIREMENTS.challenge.reasoning).toBeGreaterThanOrEqual(0.8);
+		expect(BASE_REQUIREMENTS.challenge.agentic).toBeGreaterThanOrEqual(0.85);
+	});
+
 	test("known agentic-capable models score higher than coding-completion models on execute-task", () => {
 		const codestralScore = scoreModel(
 			MODEL_CAPABILITY_PROFILES["codestral-latest"],
@ -34,6 +40,45 @@ describe("agentic capability axis (ADR-0079)", () => {
 		expect(sonnetScore).toBeGreaterThan(codestralScore);
 	});

+	test("challenge routing ignores sticky model unless explicitly enabled", () => {
+		const phaseConfig = {
+			primary: "openai/gpt-5.5",
+			fallbacks: ["minimax/MiniMax-M2.7"],
+		};
+		const routingConfig = {
+			enabled: true,
+			capability_routing: true,
+		};
+		const availableModels = ["kimi-coding/kimi-k2.6", "minimax/MiniMax-M2.7"];
+		const stickyHint = { provider: "minimax", id: "MiniMax-M2.7" };
+
+		const withoutSticky = resolveModelForComplexity(
+			{ tier: "standard" },
+			phaseConfig,
+			routingConfig,
+			availableModels,
+			"challenge",
+			{},
+			{},
+			stickyHint,
+		);
+		expect(withoutSticky.selectionMethod).toBe("capability-scored");
+		expect(withoutSticky.modelId).toBe("kimi-coding/kimi-k2.6");
+
+		const withSticky = resolveModelForComplexity(
+			{ tier: "standard" },
+			phaseConfig,
+			{ ...routingConfig, sticky_routing: true },
+			availableModels,
+			"challenge",
+			{},
+			{},
+			stickyHint,
+		);
+		expect(withSticky.selectionMethod).toBe("slice-sticky");
+		expect(withSticky.modelId).toBe("minimax/MiniMax-M2.7");
+	});
+
 	test("devstral variants score below agentic models on execute-task", () => {
 		const devstralScore = scoreModel(
 			MODEL_CAPABILITY_PROFILES["devstral-2512"],
--- a/src/resources/extensions/sf/tests/preferences-models.test.mjs
+++ b/src/resources/extensions/sf/tests/preferences-models.test.mjs
@ -110,6 +110,25 @@ describe("preferences model resolution", () => {
 		});
 	});

+	test("resolveModelWithFallbacksForUnit_when_challenge_uses_validation_model", () => {
+		makePreferencesProject(
+			[
+				"version: 1",
+				"models:",
+				"  planning: minimax/MiniMax-M2.7",
+				"  validation: kimi-coding/kimi-k2.6",
+				"",
+			].join("\n"),
+		);
+
+		const result = resolveModelWithFallbacksForUnit("challenge");
+
+		assert.deepEqual(result, {
+			primary: "kimi-coding/kimi-k2.6",
+			fallbacks: [],
+		});
+	});
+
 	test("isModelInEnabledList_when_list_empty_allows_any_model", () => {
 		assert.equal(isModelInEnabledList("kimi-coding", "kimi-k2.6", []), true);
 		assert.equal(
--- a/src/resources/extensions/sf/tests/roadmap-projection-sync.test.mjs
+++ b/src/resources/extensions/sf/tests/roadmap-projection-sync.test.mjs
@ -0,0 +1,106 @@
+import assert from "node:assert/strict";
+import {
+	existsSync,
+	mkdirSync,
+	mkdtempSync,
+	readFileSync,
+	rmSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, describe, test } from "vitest";
+import {
+	closeDatabase,
+	flushRoadmapProjectionRefreshesForTests,
+	insertMilestone,
+	insertSlice,
+	openDatabase,
+	updateSliceStatus,
+	upsertMilestonePlanning,
+} from "../sf-db.js";
+
+const originalCwd = process.cwd();
+const originalEnv = { ...process.env };
+const tmpDirs = [];
+
+afterEach(() => {
+	closeDatabase();
+	process.chdir(originalCwd);
+	process.env = { ...originalEnv };
+	while (tmpDirs.length > 0) {
+		rmSync(tmpDirs.pop(), { recursive: true, force: true });
+	}
+});
+
+function makeProject() {
+	const dir = mkdtempSync(join(tmpdir(), "sf-roadmap-sync-"));
+	tmpDirs.push(dir);
+	mkdirSync(join(dir, ".sf"), { recursive: true });
+	process.env.SF_ROADMAP_PROJECTION_SYNC = "1";
+	process.chdir(dir);
+	openDatabase(join(dir, ".sf", "sf.db"));
+	return dir;
+}
+
+describe("roadmap projection sync", () => {
+	test("db_writes_refresh_roadmap_projection", async () => {
+		const project = makeProject();
+
+		insertMilestone({
+			id: "M777",
+			title: "Initial server plan",
+			status: "queued",
+			planning: {
+				vision: "Keep planning state in SQLite.",
+				successCriteria: ["Projection exists."],
+			},
+		});
+		insertSlice({
+			milestoneId: "M777",
+			id: "S01",
+			title: "Render projection",
+			status: "pending",
+			sequence: 1,
+			planning: {
+				goal: "Write ROADMAP.md and ROADMAP.json from DB state.",
+			},
+		});
+		await flushRoadmapProjectionRefreshesForTests();
+
+		const roadmapPath = join(
+			project,
+			".sf",
+			"milestones",
+			"M777",
+			"M777-ROADMAP.md",
+		);
+		const jsonPath = join(
+			project,
+			".sf",
+			"milestones",
+			"M777",
+			"M777-ROADMAP.json",
+		);
+		assert.equal(existsSync(roadmapPath), true);
+		assert.equal(existsSync(jsonPath), true);
+		assert.match(readFileSync(roadmapPath, "utf-8"), /Initial server plan/);
+
+		upsertMilestonePlanning("M777", {
+			title: "Server-owned roadmap projection",
+			vision: "The server refreshes generated roadmap files after DB writes.",
+		});
+		updateSliceStatus("M777", "S01", "complete", "2026-05-17T20:00:00.000Z");
+		await flushRoadmapProjectionRefreshesForTests();
+
+		const roadmap = readFileSync(roadmapPath, "utf-8");
+		const projection = JSON.parse(readFileSync(jsonPath, "utf-8"));
+		assert.match(roadmap, /Server-owned roadmap projection/);
+		assert.match(
+			roadmap,
+			/The server refreshes generated roadmap files after DB writes/,
+		);
+		assert.match(roadmap, /- \[x\] \*\*S01: Render projection\*\*/);
+		assert.equal(projection.origin, "db-projection");
+		assert.equal(projection.slices[0].status, "complete");
+	});
+});
--- a/src/resources/extensions/sf/uok/gate-registry-bootstrap.js
+++ b/src/resources/extensions/sf/uok/gate-registry-bootstrap.js
@ -11,6 +11,7 @@ import { repeatedFeedbackKindGate } from "../detectors/repeated-feedback-kind.js
 import { artifactFlapGate } from "../detectors/artifact-flap.js";
 import { staleLockGate } from "../detectors/stale-lock.js";
 import { periodicDetectorSweepGate } from "../detectors/periodic-runner.js";
+import { serverDirectionDriftGate } from "../detectors/server-direction-drift.js";
 import { inlineRuntimeGate } from "./inline-runtime-gate.js";

 /**
@ -41,6 +42,7 @@ registry.register(zeroProgressGate);
 registry.register(repeatedFeedbackKindGate);
 registry.register(artifactFlapGate);
 registry.register(staleLockGate);
+registry.register(serverDirectionDriftGate);
 registry.register(periodicDetectorSweepGate);
 registry.register(inlineRuntimeGate);

--- a/src/tests/headless-feedback.test.ts
+++ b/src/tests/headless-feedback.test.ts
@ -20,6 +20,24 @@ const handlerSrc = readFileSync(
 	join(__dirname, "..", "headless-feedback.ts"),
 	"utf-8",
 );
+const forwardSrc = readFileSync(
+	join(__dirname, "..", "headless-server-forward.ts"),
+	"utf-8",
+);
+const rpcModeSrc = readFileSync(
+	join(
+		__dirname,
+		"..",
+		"..",
+		"packages",
+		"coding-agent",
+		"src",
+		"modes",
+		"rpc",
+		"rpc-mode.ts",
+	),
+	"utf-8",
+);

 test("headless.ts dispatches feedback command to handleFeedback", () => {
 	assert.match(
@ -72,5 +90,29 @@ test("add path defaults blocking from severity, doesn't require it", () => {
 	// readBoolFlag(--blocking) OR severity === high|critical → blocking=true.
 	// The behaviour is documented in self-feedback.js (deriveBlocking),
 	// mirror it so operator-filed entries have consistent semantics.
-	assert.match(handlerSrc, /severity === "high" \|\| severity === "critical"/);
+	assert.match(handlerSrc, /severity === "high"/);
+	assert.match(handlerSrc, /severity === "critical"/);
+});
+
+test("active-server feedback forwarding queues writes instead of blocking RPC", () => {
+	assert.match(
+		forwardSrc,
+		/queued:\s*true/,
+		"forwarded add/resolve commands must ask the active RPC server to queue writes",
+	);
+	assert.match(
+		rpcModeSrc,
+		/SF_FEEDBACK_QUEUE_FILE = "sf-feedback-queue\.jsonl"/,
+		"RPC server must persist queued feedback commands durably",
+	);
+	assert.match(
+		rpcModeSrc,
+		/await drainQueuedSfFeedbackCommands\(process\.cwd\(\)\)/,
+		"server-owned autonomous startup must drain queued feedback before running",
+	);
+	assert.match(
+		rpcModeSrc,
+		/scheduleQueuedSfFeedbackDrain\(process\.cwd\(\)\)/,
+		"queued feedback commands should also drain from the server control lane",
+	);
 });
--- a/src/tests/integration/web-mode-cli.test.ts
+++ b/src/tests/integration/web-mode-cli.test.ts
@ -954,6 +954,51 @@ test("sf server stop <path> is parsed and dispatched with resolved path", async
 	assert.equal(stopOptions?.all, false);
 });

+test("sf server reload <path> is parsed as reload launch", async (_t) => {
+	const tmp = mkdtempSync(join(tmpdir(), "sf-web-reload-path-"));
+	let receivedOptions: Record<string, unknown> | undefined;
+
+	afterEach(() => {
+		rmSync(tmp, { recursive: true, force: true });
+	});
+
+	mkdirSync(tmp, { recursive: true });
+	const flags = cliWeb.parseCliArgs([
+		"node",
+		"dist/loader.js",
+		"server",
+		"reload",
+		tmp,
+	]);
+	assert.deepEqual(flags.messages, ["server", "reload", tmp]);
+
+	const result = await cliWeb.runWebCliBranch(flags, {
+		cwd: () => "/",
+		runWebMode: async (options) => {
+			receivedOptions = options as unknown as Record<string, unknown>;
+			return {
+				mode: "web" as const,
+				ok: true as const,
+				cwd: options.cwd,
+				projectSessionsDir: options.projectSessionsDir,
+				host: "127.0.0.1",
+				port: 4000,
+				url: "http://127.0.0.1:4000",
+				hostKind: "packaged-standalone" as const,
+				hostPath: "/tmp/server.js",
+				hostRoot: "/tmp",
+			};
+		},
+		stderr: { write: () => true },
+	});
+
+	assert.equal(result.handled, true);
+	if (!result.handled) throw new Error("expected handled");
+	assert.equal(result.action, "reload");
+	assert.equal(receivedOptions?.cwd, tmp);
+	assert.equal(receivedOptions?.reload, true);
+});
+
 // ─── Context-aware launch detection tests ──────────────────────────────

 test("resolveContextAwareCwd returns project cwd when inside a project under dev root", (_t) => {
@ -1137,12 +1182,94 @@ test("launchWebMode kills stale instance for same cwd before spawning", async (_
 	assert.equal(status.ok, true);
 	assert.equal(spawnCalled, true);
 	// Stale instance for same cwd should have been cleaned up
-	assert.match(stderrOutput, /Cleaning up stale/);
+	assert.match(stderrOutput, /Stale SF server was already stopped/);
 	// New instance should be registered
 	const registry = webMode.readInstanceRegistry(registryPath);
 	assert.equal(registry[resolve(cwd)]?.pid, 88888);
 });

+test("launchWebMode reload proves candidate before replacing fixed-port server", async (_t) => {
+	const tmp = mkdtempSync(join(tmpdir(), "sf-web-reload-"));
+	const standaloneRoot = join(tmp, "dist", "web", "standalone");
+	const serverPath = join(standaloneRoot, "server.js");
+	mkdirSync(standaloneRoot, { recursive: true });
+	writeFileSync(serverPath, 'console.log("stub")\n');
+
+	const registryPath = join(tmp, "web-instances.json");
+	const pidFilePath = join(tmp, "web-server.pid");
+	const cwd = "/tmp/reload-project";
+	webMode.registerInstance(
+		cwd,
+		{ pid: 77777, port: 4000, url: "http://127.0.0.1:4000" },
+		registryPath,
+	);
+
+	const spawnPorts: string[] = [];
+	const bootUrls: string[] = [];
+	let nextPid = 90000;
+	let stderrOutput = "";
+
+	afterEach(() => {
+		rmSync(tmp, { recursive: true, force: true });
+	});
+
+	const status = await webMode.launchWebMode(
+		{
+			cwd,
+			projectSessionsDir: "/tmp/.sf/sessions/reload",
+			agentDir: "/tmp/.sf/agent",
+			packageRoot: tmp,
+			port: 4000,
+			reload: true,
+		},
+		{
+			initResources: () => {},
+			resolvePort: async () => 45123,
+			execPath: "/custom/node",
+			env: { TEST_ENV: "1" },
+			kill: ((pid: number, signal?: string | number) => {
+				if (pid === 77777 && signal === 0) return true;
+				const error = new Error("no such process") as NodeJS.ErrnoException;
+				error.code = "ESRCH";
+				throw error;
+			}) as typeof process.kill,
+			spawn: (_command, _args, options) => {
+				spawnPorts.push(String(options.env?.PORT));
+				return {
+					pid: nextPid++,
+					once: () => undefined,
+					unref: () => {},
+				} as any;
+			},
+			waitForBootReady: async (url) => {
+				bootUrls.push(url);
+			},
+			openBrowser: () => {},
+			pidFilePath,
+			writePidFile: webMode.writePidFile,
+			registryPath,
+			stderr: {
+				write(chunk: string) {
+					stderrOutput += chunk;
+					return true;
+				},
+			},
+		},
+	);
+
+	assert.equal(status.ok, true);
+	assert.deepEqual(spawnPorts, ["45123", "4000"]);
+	assert.deepEqual(bootUrls, [
+		"http://127.0.0.1:45123",
+		"http://127.0.0.1:4000",
+	]);
+	assert.match(stderrOutput, /Proving reload candidate/);
+	assert.match(stderrOutput, /Reload candidate passed boot check/);
+	const registry = webMode.readInstanceRegistry(registryPath);
+	assert.equal(registry[resolve(cwd)]?.pid, 90001);
+	assert.equal(registry[resolve(cwd)]?.port, 4000);
+});
+
 test("launchWebMode does not log cleanup when no stale instance exists", async (_t) => {
 	const tmp = mkdtempSync(join(tmpdir(), "sf-web-no-stale-"));
 	const standaloneRoot = join(tmp, "dist", "web", "standalone");
--- a/src/web-mode.ts
+++ b/src/web-mode.ts
@ -56,6 +56,16 @@ export interface WebModeLaunchOptions {
 	packageRoot?: string;
 	host?: string;
 	port?: number;
+	/**
+	 * Reload an existing registered server after the replacement passes boot.
+	 *
+	 * Purpose: keep `sf server` upgrades graceful by proving the candidate host
+	 * is healthy before terminating the old process bound to the project.
+	 *
+	 * Consumer: `sf server reload` and default `sf server start` behavior when a
+	 * live same-project instance already exists.
+	 */
+	reload?: boolean;
 	/** Additional allowed origins for CORS (forwarded as SF_WEB_ALLOWED_ORIGINS). */
 	allowedOrigins?: string[];
 }
@ -128,6 +138,7 @@ export interface WebModeDeps {
 	writePidFile?: (path: string, pid: number) => void;
 	readPidFile?: (path: string) => number | null;
 	deletePidFile?: (path: string) => void;
+	kill?: typeof process.kill;
 	/** Path to the multi-instance registry JSON (for testing). */
 	registryPath?: string;
 }
@ -146,6 +157,11 @@ export interface WebModeStopResult {
 	stoppedCount?: number;
 }

+type ExistingServerInstance =
+	| { state: "none" }
+	| { state: "dead"; entry: WebInstanceEntry }
+	| { state: "live"; entry: WebInstanceEntry };
+
 // ─── Instance Registry ──────────────────────────────────────────────────────

 export interface WebInstanceEntry {
@ -831,6 +847,57 @@ function cleanupStaleInstance(
 	unregisterInstance(cwd, registryPath);
 }

+function getRegisteredServerInstance(
+	cwd: string,
+	registryPath?: string,
+	kill: typeof process.kill = process.kill,
+): ExistingServerInstance {
+	const registry = readInstanceRegistry(registryPath);
+	const entry = registry[resolve(cwd)];
+	if (!entry) return { state: "none" };
+	if (!pidExists(entry.pid, kill)) return { state: "dead", entry };
+	return { state: "live", entry };
+}
+
+function cleanupDeadRegisteredInstance(
+	cwd: string,
+	stderr: WritableLike,
+	entry: WebInstanceEntry,
+	registryPath?: string,
+): void {
+	stderr.write(
+		`[forge] Stale SF server was already stopped (pid=${entry.pid}) — clearing entry.\n`,
+	);
+	unregisterInstance(cwd, registryPath);
+}
+
+function stopReloadedInstance(
+	cwd: string,
+	stderr: WritableLike,
+	entry: WebInstanceEntry,
+	registryPath?: string,
+): void {
+	const result = terminateWebServerProcessTree(entry.pid);
+	if (result === "killed" || result === "force-killed") {
+		stderr.write(
+			`[forge] Reloaded SF server for ${resolve(cwd)}; stopped previous pid=${entry.pid}.\n`,
+		);
+	} else if (result === "already-dead") {
+		stderr.write(
+			`[forge] Previous SF server already exited during reload (pid=${entry.pid}).\n`,
+		);
+	} else {
+		stderr.write(
+			`[forge] Reload candidate is running, but previous SF server pid=${entry.pid} did not stop: ${result.error}\n`,
+		);
+		return;
+	}
+	// Only remove the old registry row after the new instance has already
+	// registered itself. unregisterInstance deletes by cwd, so callers must
+	// invoke this before registering the replacement.
+	unregisterInstance(cwd, registryPath);
+}
+
 /**
 * Detect and reap orphaned next-server processes that outlived their parent
 * web host. These orphans have cwd under dist/web/standalone (or a deleted
@ -951,10 +1018,35 @@ export async function launchWebMode(

 	stderr.write(`[forge] Starting server mode…\n`);

-	// Kill any stale server instance for this project before reserving a port.
-	// This prevents EADDRINUSE when the previous `sf server` was terminated
-	// without a clean shutdown (e.g. terminal closed, crash).
-	cleanupStaleInstance(options.cwd, stderr, deps.registryPath);
+	const existing = getRegisteredServerInstance(
+		options.cwd,
+		deps.registryPath,
+		deps.kill,
+	);
+	let reloadPrevious: WebInstanceEntry | null = null;
+	if (
+		existing.state === "live" &&
+		(options.reload === true ||
+			!options.port ||
+			options.port === existing.entry.port)
+	) {
+		reloadPrevious = existing.entry;
+		stderr.write(
+			`[forge] Existing SF server found for ${resolve(options.cwd)} (pid=${existing.entry.pid}, port=${existing.entry.port}); launching replacement before shutdown.\n`,
+		);
+	} else if (existing.state === "dead") {
+		cleanupDeadRegisteredInstance(
+			options.cwd,
+			stderr,
+			existing.entry,
+			deps.registryPath,
+		);
+	} else if (existing.state === "live") {
+		// Explicit fixed-port start cannot bind beside a live same-port process.
+		// Stop it before launch so legacy `sf server start --port 4000` keeps
+		// working, while normal starts use reload-first behavior.
+		cleanupStaleInstance(options.cwd, stderr, deps.registryPath);
+	}

 	// Also reap orphaned next-server processes from prior unclean shutdowns
 	// (sf-mooe4m5k-6fm7z9): orphaned next-server processes with cwd under
@ -969,28 +1061,11 @@ export async function launchWebMode(
 		);
 	}

-	const port =
+	const targetPort =
 		options.port ??
+		reloadPrevious?.port ??
 		(deps.resolvePort ? await deps.resolvePort(host) : DEFAULT_PORT);
-	const authToken = randomBytes(32).toString("hex");
-	const url = `http://${host}:${port}`;
-	const env = {
-		...(deps.env ?? process.env),
-		HOSTNAME: host,
-		PORT: String(port),
-		SF_WEB_HOST: host,
-		SF_WEB_PORT: String(port),
-		SF_WEB_AUTH_TOKEN: authToken,
-		SF_WEB_PROJECT_CWD: options.cwd,
-		SF_WEB_PROJECT_SESSIONS_DIR: options.projectSessionsDir,
-		SF_WEB_PACKAGE_ROOT: resolution.packageRoot,
-		SF_WEB_HOST_KIND: resolution.kind,
-		SF_WEB_AUTO_START_AUTONOMOUS: "1",
-		...(resolution.kind === "source-dev" ? { NEXT_PUBLIC_SF_DEV: "1" } : {}),
-		...(options.allowedOrigins?.length
-			? { SF_WEB_ALLOWED_ORIGINS: options.allowedOrigins.join(",") }
-			: {}),
-	};
+	const targetUrl = `http://${host}:${targetPort}`;

 	try {
 		stderr.write(`[forge] Initialising resources…\n`);
@ -1005,8 +1080,8 @@ export async function launchWebMode(
 			cwd: options.cwd,
 			projectSessionsDir: options.projectSessionsDir,
 			host,
-			port,
-			url,
+			port: targetPort,
+			url: targetUrl,
 			hostKind: resolution.kind,
 			hostPath: resolution.entryPath,
 			hostRoot: resolution.hostRoot,
@ -1016,89 +1091,163 @@ export async function launchWebMode(
 		return failure;
 	}

-	const spawnSpec = buildSpawnSpec(
-		resolution,
-		host,
-		port,
-		deps.platform ?? process.platform,
-		deps.execPath ?? process.execPath,
-	);
-
-	stderr.write(`[forge] Launching web host on port ${port}…\n`);
-
-	const spawnResult = await spawnDetachedProcess(
-		deps.spawn ??
-			((command, args, spawnOptions) => spawn(command, args, spawnOptions)),
-		spawnSpec.command,
-		spawnSpec.args,
-		{
-			cwd: spawnSpec.cwd,
-			detached: true,
-			stdio: "ignore",
-			windowsHide: true,
-			shell: needsWindowsShell(
-				spawnSpec.command,
-				deps.platform ?? process.platform,
-			),
-			env,
-		},
-	);
-
-	if (!spawnResult.ok) {
-		const failure: WebModeLaunchFailure = {
-			mode: "web",
-			ok: false,
-			cwd: options.cwd,
-			projectSessionsDir: options.projectSessionsDir,
+	const spawnVerifiedHost = async (
+		port: number,
+		label: "candidate" | "web host",
+		autoStartAutonomous: boolean,
+	): Promise<
+		| {
+				ok: true;
+				child: SpawnedChildLike;
+				authToken: string;
+				url: string;
+		  }
+		| { ok: false; failure: WebModeLaunchFailure }
+	> => {
+		const authToken = randomBytes(32).toString("hex");
+		const url = `http://${host}:${port}`;
+		const env = {
+			...(deps.env ?? process.env),
+			HOSTNAME: host,
+			PORT: String(port),
+			SF_WEB_HOST: host,
+			SF_WEB_PORT: String(port),
+			SF_WEB_AUTH_TOKEN: authToken,
+			SF_WEB_PROJECT_CWD: options.cwd,
+			SF_WEB_PROJECT_SESSIONS_DIR: options.projectSessionsDir,
+			SF_WEB_PACKAGE_ROOT: resolution.packageRoot,
+			SF_WEB_HOST_KIND: resolution.kind,
+			SF_WEB_AUTO_START_AUTONOMOUS: autoStartAutonomous ? "1" : "0",
+			...(resolution.kind === "source-dev" ? { NEXT_PUBLIC_SF_DEV: "1" } : {}),
+			...(options.allowedOrigins?.length
+				? { SF_WEB_ALLOWED_ORIGINS: options.allowedOrigins.join(",") }
+				: {}),
+		};
+		const spawnSpec = buildSpawnSpec(
+			resolution,
 			host,
 			port,
-			url,
-			hostKind: resolution.kind,
-			hostPath: resolution.entryPath,
-			hostRoot: resolution.hostRoot,
-			failureReason: `launch:${spawnResult.error instanceof Error ? spawnResult.error.message : String(spawnResult.error)}`,
-		};
-		emitLaunchStatus(stderr, failure);
-		return failure;
+			deps.platform ?? process.platform,
+			deps.execPath ?? process.execPath,
+		);
+		stderr.write(`[forge] Launching ${label} on port ${port}…\n`);
+		const spawnResult = await spawnDetachedProcess(
+			deps.spawn ??
+				((command, args, spawnOptions) => spawn(command, args, spawnOptions)),
+			spawnSpec.command,
+			spawnSpec.args,
+			{
+				cwd: spawnSpec.cwd,
+				detached: true,
+				stdio: "ignore",
+				windowsHide: true,
+				shell: needsWindowsShell(
+					spawnSpec.command,
+					deps.platform ?? process.platform,
+				),
+				env,
+			},
+		);
+		if (!spawnResult.ok) {
+			return {
+				ok: false,
+				failure: {
+					mode: "web",
+					ok: false,
+					cwd: options.cwd,
+					projectSessionsDir: options.projectSessionsDir,
+					host,
+					port,
+					url,
+					hostKind: resolution.kind,
+					hostPath: resolution.entryPath,
+					hostRoot: resolution.hostRoot,
+					failureReason: `launch:${spawnResult.error instanceof Error ? spawnResult.error.message : String(spawnResult.error)}`,
+				},
+			};
+		}
+		try {
+			const bootReadyFn =
+				deps.waitForBootReady ??
+				((u: string) => waitForBootReady(u, 180_000, stderr, authToken));
+			await bootReadyFn(url);
+		} catch (error) {
+			if (spawnResult.child.pid !== undefined) {
+				terminateWebServerProcessTree(spawnResult.child.pid);
+			}
+			return {
+				ok: false,
+				failure: {
+					mode: "web",
+					ok: false,
+					cwd: options.cwd,
+					projectSessionsDir: options.projectSessionsDir,
+					host,
+					port,
+					url,
+					hostKind: resolution.kind,
+					hostPath: resolution.entryPath,
+					hostRoot: resolution.hostRoot,
+					failureReason: `boot-ready:${error instanceof Error ? error.message : String(error)}`,
+				},
+			};
+		}
+		return { ok: true, child: spawnResult.child, authToken, url };
+	};
+
+	if (reloadPrevious) {
+		const candidatePort = deps.resolvePort
+			? await deps.resolvePort(host)
+			: await reserveWebPort(host);
+		stderr.write(
+			`[forge] Proving reload candidate on temporary port ${candidatePort} before touching fixed port ${targetPort}…\n`,
+		);
+		const candidate = await spawnVerifiedHost(
+			candidatePort,
+			"candidate",
+			false,
+		);
+		if (!candidate.ok) {
+			emitLaunchStatus(stderr, candidate.failure);
+			return candidate.failure;
+		}
+		if (candidate.child.pid !== undefined) {
+			terminateWebServerProcessTree(candidate.child.pid);
+		}
+		stderr.write(`[forge] Reload candidate passed boot check.\n`);
+		stopReloadedInstance(
+			options.cwd,
+			stderr,
+			reloadPrevious,
+			deps.registryPath,
+		);
+	}
+
+	const finalHost = await spawnVerifiedHost(targetPort, "web host", true);
+	if (!finalHost.ok) {
+		emitLaunchStatus(stderr, finalHost.failure);
+		return finalHost.failure;
 	}

 	try {
-		const bootReadyFn =
-			deps.waitForBootReady ??
-			((u: string) => waitForBootReady(u, 180_000, stderr, authToken));
-		await bootReadyFn(url);
-	} catch (error) {
-		const failure: WebModeLaunchFailure = {
-			mode: "web",
-			ok: false,
-			cwd: options.cwd,
-			projectSessionsDir: options.projectSessionsDir,
-			host,
-			port,
-			url,
-			hostKind: resolution.kind,
-			hostPath: resolution.entryPath,
-			hostRoot: resolution.hostRoot,
-			failureReason: `boot-ready:${error instanceof Error ? error.message : String(error)}`,
-		};
-		emitLaunchStatus(stderr, failure);
-		return failure;
-	}
-
-	try {
-		spawnResult.child.unref?.();
-		const pid = spawnResult.child.pid;
+		finalHost.child.unref?.();
+		const pid = finalHost.child.pid;
 		if (pid !== undefined) {
 			const pidFilePath = deps.pidFilePath ?? defaultWebPidFilePath;
 			(deps.writePidFile ?? writePidFile)(pidFilePath, pid);
 			// Register in multi-instance registry
 			registerInstance(
 				options.cwd,
-				{ pid, port, url, authToken },
+				{
+					pid,
+					port: targetPort,
+					url: targetUrl,
+					authToken: finalHost.authToken,
+				},
 				deps.registryPath,
 			);
 		}
-		const authenticatedUrl = `${url}/#token=${authToken}`;
+		const authenticatedUrl = `${targetUrl}/#token=${finalHost.authToken}`;
 		try {
 			(deps.openBrowser ?? openBrowser)(authenticatedUrl);
 		} catch (browserError) {
@ -1113,8 +1262,8 @@ export async function launchWebMode(
 			cwd: options.cwd,
 			projectSessionsDir: options.projectSessionsDir,
 			host,
-			port,
-			url,
+			port: targetPort,
+			url: targetUrl,
 			hostKind: resolution.kind,
 			hostPath: resolution.entryPath,
 			hostRoot: resolution.hostRoot,
@ -1124,15 +1273,15 @@ export async function launchWebMode(
 		return failure;
 	}

-	const authenticatedUrl = `${url}/#token=${authToken}`;
+	const authenticatedUrl = `${targetUrl}/#token=${finalHost.authToken}`;
 	const success: WebModeLaunchSuccess = {
 		mode: "web",
 		ok: true,
 		cwd: options.cwd,
 		projectSessionsDir: options.projectSessionsDir,
 		host,
-		port,
-		url,
+		port: targetPort,
+		url: targetUrl,
 		hostKind: resolution.kind,
 		hostPath: resolution.entryPath,
 		hostRoot: resolution.hostRoot,
--- a/src/web/settings-service.ts
+++ b/src/web/settings-service.ts
@ -1,6 +1,6 @@
 import { execFile } from "node:child_process";
 import { existsSync } from "node:fs";
-import { join } from "node:path";
+import { dirname, join } from "node:path";
 import { pathToFileURL } from "node:url";
 import type { SettingsData } from "../../web/lib/settings-types.ts";
 import { resolveBridgeRuntimeConfig } from "./bridge-service.ts";
@ -65,6 +65,13 @@ export async function collectSettingsData(
 	const budgetPath = budgetResolution.modulePath;
 	const historyPath = historyResolution.modulePath;
 	const metricsPath = metricsResolution.modulePath;
+	const benchmarksPath = join(
+		dirname(routerPath),
+		"learning",
+		"data",
+		"model-benchmarks.json",
+	);
+	const performancePath = join(projectCwd, ".sf", "model-performance.json");

 	// All modules share the same compiled-vs-source mode (they're all from the same package)
 	const useCompiledJs = prefsResolution.useCompiledJs;
@ -102,6 +109,7 @@ export async function collectSettingsData(
 	// and writes a combined JSON payload to stdout.
 	const script = [
 		'const { pathToFileURL } = await import("node:url");',
+		'const { existsSync, readFileSync } = await import("node:fs");',
 		"const prefsMod = await import(pathToFileURL(process.env.SF_SETTINGS_PREFS_MODULE).href);",
 		"const routerMod = await import(pathToFileURL(process.env.SF_SETTINGS_ROUTER_MODULE).href);",
 		"const budgetMod = await import(pathToFileURL(process.env.SF_SETTINGS_BUDGET_MODULE).href);",
@ -172,8 +180,45 @@ export async function collectSettingsData(
 		"const ledger = metricsMod.loadLedgerFromDisk(process.env.SF_SETTINGS_BASE);",
 		"const projectTotals = ledger ? metricsMod.getProjectTotals(ledger.units) : null;",

+		// 6. Published benchmark table and local learned model outcomes
+		"function readJson(path) {",
+		"  if (!path || !existsSync(path)) return null;",
+		"  try { return JSON.parse(readFileSync(path, 'utf-8')); } catch { return null; }",
+		"}",
+		"function benchmarkRows(raw) {",
+		"  if (!raw || typeof raw !== 'object') return [];",
+		"  return Object.entries(raw)",
+		"    .filter(([modelId]) => !modelId.startsWith('_'))",
+		"    .map(([modelId, row]) => ({ modelId, ...(row && typeof row === 'object' ? row : {}) }))",
+		"    .sort((a, b) => String(a.modelId).localeCompare(String(b.modelId)));",
+		"}",
+		"function performanceRows(raw) {",
+		"  if (!raw || typeof raw !== 'object') return [];",
+		"  const rows = [];",
+		"  for (const [unitType, models] of Object.entries(raw)) {",
+		"    if (!models || typeof models !== 'object') continue;",
+		"    for (const [modelId, value] of Object.entries(models)) {",
+		"      if (!value || typeof value !== 'object') continue;",
+		"      const aggregate = value.aggregate && typeof value.aggregate === 'object' ? value.aggregate : {};",
+		"      rows.push({",
+		"        unitType,",
+		"        modelId,",
+		"        successes: Number(aggregate.successes ?? 0),",
+		"        failures: Number(aggregate.failures ?? 0),",
+		"        timeouts: Number(aggregate.timeouts ?? 0),",
+		"        totalTokens: Number(aggregate.totalTokens ?? 0),",
+		"        totalCost: Number(aggregate.totalCost ?? 0),",
+		"        lastUsed: aggregate.lastUsed ?? null,",
+		"      });",
+		"    }",
+		"  }",
+		"  return rows.sort((a, b) => String(b.lastUsed ?? '').localeCompare(String(a.lastUsed ?? '')));",
+		"}",
+		"const modelBenchmarks = benchmarkRows(readJson(process.env.SF_SETTINGS_BENCHMARKS_PATH));",
+		"const modelPerformance = performanceRows(readJson(process.env.SF_SETTINGS_MODEL_PERFORMANCE_PATH));",
+
 		// Write combined payload
-		"process.stdout.write(JSON.stringify({ preferences, routingConfig, budgetAllocation, routingHistory, projectTotals }));",
+		"process.stdout.write(JSON.stringify({ preferences, routingConfig, budgetAllocation, routingHistory, projectTotals, modelBenchmarks, modelPerformance }));",
 	].join(" ");

 	const prefixArgs = buildSubprocessPrefixArgs(
@ -196,6 +241,8 @@ export async function collectSettingsData(
 					SF_SETTINGS_HISTORY_MODULE: historyPath,
 					SF_SETTINGS_METRICS_MODULE: metricsPath,
 					SF_SETTINGS_BASE: projectCwd,
+					SF_SETTINGS_BENCHMARKS_PATH: benchmarksPath,
+					SF_SETTINGS_MODEL_PERFORMANCE_PATH: performancePath,
 				},
 				maxBuffer: SETTINGS_MAX_BUFFER,
 				windowsHide: true,
--- a/web/components/sf/settings-panels.tsx
+++ b/web/components/sf/settings-panels.tsx
@ -22,6 +22,8 @@ import { Button } from "@/components/ui/button";
 import { authFetch } from "@/lib/auth";
 import type {
 	SettingsData,
+	SettingsModelBenchmark,
+	SettingsModelPerformance,
 	SettingsPatternHistory,
 	SettingsRoutingHistory,
 } from "@/lib/settings-types";
@ -438,10 +440,68 @@ function TierOutcomeBadge({
 	);
 }

+function normalizeModelId(id: string): string {
+	return id.includes("/") ? (id.split("/").pop() ?? id) : id;
+}
+
+function formatBenchmarkScore(value: number | null | undefined): string {
+	return typeof value === "number" && Number.isFinite(value)
+		? value.toFixed(1)
+		: "–";
+}
+
+function aggregateModelPerformance(
+	rows: SettingsModelPerformance[],
+	modelId: string,
+): { runs: number; successRate: string; cost: string } {
+	const bare = normalizeModelId(modelId);
+	const matched = rows.filter(
+		(row) =>
+			row.modelId === modelId ||
+			row.modelId.endsWith(`/${bare}`) ||
+			normalizeModelId(row.modelId) === bare,
+	);
+	const totals = matched.reduce(
+		(acc, row) => {
+			acc.successes += row.successes;
+			acc.failures += row.failures;
+			acc.timeouts += row.timeouts;
+			acc.cost += row.totalCost;
+			return acc;
+		},
+		{ successes: 0, failures: 0, timeouts: 0, cost: 0 },
+	);
+	const runs = totals.successes + totals.failures + totals.timeouts;
+	return {
+		runs,
+		successRate:
+			runs > 0 ? `${Math.round((totals.successes / runs) * 100)}%` : "–",
+		cost: runs > 0 ? formatCost(totals.cost) : "–",
+	};
+}
+
+function rankedBenchmarks(
+	benchmarks: SettingsModelBenchmark[],
+): SettingsModelBenchmark[] {
+	return [...benchmarks]
+		.sort((a, b) => {
+			const score = (row: SettingsModelBenchmark) =>
+				(row.swe_bench_verified ?? row.swe_bench ?? 0) * 0.35 +
+				(row.live_code_bench ?? 0) * 0.25 +
+				(row.hle ?? 0) * 0.15 +
+				(row.gpqa ?? 0) * 0.15 +
+				(row.instruction_following ?? 0) * 0.1;
+			return score(b) - score(a);
+		})
+		.slice(0, 12);
+}
+
 export function ModelRoutingPanel() {
 	const { state, data, busy, refresh } = useSettingsData();
 	const routingConfig = data?.routingConfig ?? null;
 	const routingHistory = data?.routingHistory ?? null;
+	const modelBenchmarks = rankedBenchmarks(data?.modelBenchmarks ?? []);
+	const modelPerformance = data?.modelPerformance ?? [];

 	return (
 		<div className="space-y-4" data-testid="settings-model-routing">
@ -569,6 +629,73 @@ export function ModelRoutingPanel() {
 					) : (
 						<SettingsEmpty message="No routing history yet" />
 					)}
+
+					{/* Model benchmarks */}
+					{modelBenchmarks.length > 0 ? (
+						<div className="space-y-2">
+							<h4 className="text-[11px] font-medium text-muted-foreground">
+								Model Benchmarks
+							</h4>
+							<div className="overflow-x-auto rounded-lg border border-border/50 bg-card/50">
+								<table className="w-full text-left text-xs">
+									<thead className="border-b border-border/50 text-[10px] uppercase text-muted-foreground">
+										<tr>
+											<th className="px-3 py-2 font-medium">Model</th>
+											<th className="px-2 py-2 font-medium">SWE</th>
+											<th className="px-2 py-2 font-medium">LCB</th>
+											<th className="px-2 py-2 font-medium">HLE</th>
+											<th className="px-2 py-2 font-medium">GPQA</th>
+											<th className="px-2 py-2 font-medium">Local</th>
+											<th className="px-3 py-2 font-medium">Cost</th>
+										</tr>
+									</thead>
+									<tbody>
+										{modelBenchmarks.map((row) => {
+											const local = aggregateModelPerformance(
+												modelPerformance,
+												row.modelId,
+											);
+											return (
+												<tr
+													key={row.modelId}
+													className="border-b border-border/30 last:border-0"
+													title={row.source ?? undefined}
+												>
+													<td className="max-w-[180px] truncate px-3 py-2 font-mono text-[11px] text-foreground/85">
+														{row.modelId}
+													</td>
+													<td className="px-2 py-2 tabular-nums">
+														{formatBenchmarkScore(
+															row.swe_bench_verified ?? row.swe_bench,
+														)}
+													</td>
+													<td className="px-2 py-2 tabular-nums">
+														{formatBenchmarkScore(row.live_code_bench)}
+													</td>
+													<td className="px-2 py-2 tabular-nums">
+														{formatBenchmarkScore(row.hle)}
+													</td>
+													<td className="px-2 py-2 tabular-nums">
+														{formatBenchmarkScore(row.gpqa)}
+													</td>
+													<td className="px-2 py-2 tabular-nums">
+														{local.runs > 0
+															? `${local.successRate} / ${local.runs}`
+															: "–"}
+													</td>
+													<td className="px-3 py-2 tabular-nums">
+														{local.cost}
+													</td>
+												</tr>
+											);
+										})}
+									</tbody>
+								</table>
+							</div>
+						</div>
+					) : (
+						<SettingsEmpty message="No benchmark data available" />
+					)}
 				</>
 			)}
 		</div>
@ -775,7 +902,7 @@ export function RemoteQuestionsPanel() {
 	const { data, busy, refresh } = useSettingsData();
 	const existingConfig = data?.preferences?.remoteQuestions ?? null;

-	const [_envVarSet, setEnvVarSet] = useState(false);
+	const [, setEnvVarSet] = useState(false);
 	const [envVarName, setEnvVarName] = useState<string | null>(null);
 	const [apiLoading, setApiLoading] = useState(true);
 	const [tokenSet, setTokenSet] = useState(false);
--- a/web/lib/settings-types.ts
+++ b/web/lib/settings-types.ts
@ -83,6 +83,35 @@ export interface SettingsProjectTotals {
 	userMessages: number;
 }

+// ─── Model Benchmark And Local Outcome Data ─────────────────────────────────
+
+export interface SettingsModelBenchmark {
+	modelId: string;
+	swe_bench?: number | null;
+	swe_bench_verified?: number | null;
+	live_code_bench?: number | null;
+	human_eval?: number | null;
+	hle?: number | null;
+	aime_2026?: number | null;
+	gpqa?: number | null;
+	mmlu_pro?: number | null;
+	instruction_following?: number | null;
+	context_window?: number | null;
+	max_output_tokens?: number | null;
+	source?: string | null;
+}
+
+export interface SettingsModelPerformance {
+	unitType: string;
+	modelId: string;
+	successes: number;
+	failures: number;
+	timeouts: number;
+	totalTokens: number;
+	totalCost: number;
+	lastUsed: string | null;
+}
+
 // ─── Effective Preferences ────────────────────────────────────────────────────

 export interface SettingsPreferencesData {
@ -124,4 +153,6 @@ export interface SettingsData {
 	budgetAllocation: SettingsBudgetAllocation;
 	routingHistory: SettingsRoutingHistory | null;
 	projectTotals: SettingsProjectTotals | null;
+	modelBenchmarks: SettingsModelBenchmark[];
+	modelPerformance: SettingsModelPerformance[];
 }