feat(sf): multi-agent sweep — paths, verification, auto closeout, bootstrap, worktree

- paths.ts: add resolveSliceSummaryPath, resolveCheckpointPath, task-summary helpers - bootstrap/system-context.ts: worktree active context + codebase-map inject - auto.ts: plumb autonomousMode flag, startAuto options expansion - auto/loop.ts: Math.max(0,...) clock-skew guard in enforceMinRequestInterval - auto/session.ts: add lastUnitAgentEndMessages and PreExecFailure tracking - auto-post-unit.ts: clearEvidenceFromDisk after verification, isDeterministicPolicyError - auto-unit-closeout.ts: populate lastPreExecFailure on gate failures - cache.ts: fix TTL helper arg counts - codebase-generator.ts: add incremental refresh helpers - commands/handlers/auto.ts: wire autonomousMode and plan-v2 flags - context-budget.ts: remove stale context-budget trimming (was dead code) - dispatch-guard.ts: trim unused guards - doctor-{environment,runtime-checks}.ts: expand health checks - execution-instruction-guard.ts: add approval-boundary guard - gate-registry.ts: de-dup gate registration on reload - gitignore.ts: add .sf/worktrees to default gitignore - notification-store.ts: add dedup window + category grouping - pre-execution-checks.ts: add provider-readiness pre-check - preferences.ts: subscription cost helpers + allow_flat_rate_providers - production-mutation-approval.ts: approval-required flag on mutation tools - state.ts: remove redundant fallback (now handled in deriveState) - token-counter.ts: subscription token usage tracking - verification-gate.ts: gate retry on bounded failure class - workflow-{projections,reconcile,template-compiler,templates}: hardening - worktree-{command,manager}: path normalization + active-worktree tracking - tests/verification-evidence.test.ts: new — evidence load/save/clear coverage - tests/provider-errors.test.ts: add missing provider-delay tests Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-02 02:16:13 +02:00 · 2026-05-02 02:16:13 +02:00 · f1cef7c476
commit f1cef7c476
parent d828f9861f
38 changed files with 578 additions and 117 deletions
--- a/src/headless.ts
+++ b/src/headless.ts
@ -722,6 +722,7 @@ async function runHeadlessOnce(
 	let completed = false;
 	let exitCode = 0;
 	let milestoneReady = false; // tracks "Milestone X ready." for auto-chaining
+	let timedOut = false; // true only when the overall timeout timer fires
 	let providerAutoResumePending = false;
 	const recentEvents: TrackedEvent[] = [];
 	const interactiveToolCallIds = new Set<string>();
--- a/src/resources/extensions/sf/activity-log.ts
+++ b/src/resources/extensions/sf/activity-log.ts
@ -25,7 +25,7 @@ import { SF_IO_ERROR, SFError } from "./errors.js";
 const SEQ_PREFIX_RE = /^(\d+)-/;

 import type { ExtensionContext } from "@singularity-forge/pi-coding-agent";
-import { sfRoot } from "./paths.js";
+import { sfRuntimeRoot } from "./paths.js";
 import { buildAuditEnvelope, emitUokAuditEvent } from "./uok/audit.js";
 import { isAuditEnvelopeEnabled } from "./uok/audit-toggle.js";

--- a/src/resources/extensions/sf/auto-post-unit.ts
+++ b/src/resources/extensions/sf/auto-post-unit.ts
@ -1457,6 +1457,25 @@ export async function postUnitPostVerification(
 		}
 	}

+	// ── Record-promoter dispatch (ADR-021 Phase D) ──
+	// After milestone completion, fire-and-forget the record-promoter to
+	// auto-convert any actionable docs/records/ artifacts into milestone backlog.
+	// This catches records the autonomous run itself produced during the
+	// just-finished milestone. Failure is non-fatal.
+	if (s.currentUnit?.type === "complete-milestone") {
+		try {
+			const { dispatchRecordPromoterFireAndForget } = await import(
+				"./record-promoter.js"
+			);
+			dispatchRecordPromoterFireAndForget(s.basePath, ctx);
+		} catch (err) {
+			debugLog("postUnit", {
+				phase: "record-promoter-dispatch",
+				error: (err as Error).message,
+			});
+		}
+	}
+
 	// ── Post-unit hooks ──
 	if (s.currentUnit && !s.stepMode) {
 		const hookUnit = checkPostUnitHooks(
--- a/src/resources/extensions/sf/auto-unit-closeout.ts
+++ b/src/resources/extensions/sf/auto-unit-closeout.ts
@ -40,7 +40,14 @@ export async function closeoutUnit(
 	const provider = ctx.model?.provider;
 	const id = ctx.model?.id;
 	const modelId = provider && id ? `${provider}/${id}` : (id ?? "unknown");
-	snapshotUnitMetrics(ctx, unitType, unitId, startedAt, modelId, opts);
+	const unit = snapshotUnitMetrics(ctx, unitType, unitId, startedAt, modelId, opts);
+
+	// Track subscription token consumption for amortized cost reporting.
+	// Fire-and-forget: updateSubscriptionTokensUsed is already best-effort.
+	if (provider && unit && unit.tokens.total > 0) {
+		updateSubscriptionTokensUsed(provider, unit.tokens.total);
+	}
+
 	const activityFile = saveActivityLog(ctx, basePath, unitType, unitId);

 	if (activityFile) {
--- a/src/resources/extensions/sf/auto.ts
+++ b/src/resources/extensions/sf/auto.ts
@ -490,10 +490,6 @@ export function setActiveRunDir(runDir: string | null): void {
 	s.activeRunDir = runDir;
 }

-export function getActiveRunDir(): string | null {
-	return s.activeRunDir;
-}
-
 /**
 * Return the model captured at auto-mode start for this session.
 * Used by error-recovery to fall back to the session's own model
@ -653,6 +649,11 @@ export function isStepMode(): boolean {
 	return s.stepMode;
 }

+/** Returns true when the agent is allowed to call ask_user_questions. */
+export function isCanAskUser(): boolean {
+	return s.canAskUser;
+}
+
 function clearUnitTimeout(): void {
 	if (s.unitTimeoutHandle) {
 		clearTimeout(s.unitTimeoutHandle);
@ -991,6 +992,23 @@ export async function stopAuto(
 			});
 		}

+		// ── Step 7c: Record-promoter dispatch (ADR-021 Phase D) ──
+		// At session close, scan docs/records/ for newly-actionable records and
+		// auto-promote them to milestone backlog. Fire-and-forget — must not
+		// block the cleanup path or break the stop sequence on failure.
+		try {
+			if (ctx && s.basePath) {
+				const { dispatchRecordPromoterFireAndForget } = await import(
+					"./record-promoter.js"
+				);
+				dispatchRecordPromoterFireAndForget(s.basePath, ctx);
+			}
+		} catch (e) {
+			debugLog("stop-cleanup-record-promoter", {
+				error: e instanceof Error ? e.message : String(e),
+			});
+		}
+
 		// ── Step 8: Ledger notification ──
 		try {
 			// Tag with structured metadata so headless-events.ts classifies via
@ -1642,6 +1660,23 @@ export async function startAuto(
 							`Resuming paused session for ${meta.milestoneId}${meta.worktreePath && existsSync(meta.worktreePath) ? ` (worktree)` : ""}.`,
 							"info",
 						);
+						try {
+							const minutesAgo = Math.round(
+								(Date.now() - new Date(meta.pausedAt ?? 0).getTime()) / 60000,
+							);
+							ctx.ui.notify(
+								`Resumed paused session: ${meta.unitType ?? "unit"} ${meta.unitId ?? ""} (paused ${minutesAgo} min ago)`,
+								"info",
+								{
+									kind: "notice",
+									blocking: false,
+									dedupe_key: "auto-resume",
+									source: "auto",
+								},
+							);
+						} catch {
+							// notify failure must not block startup
+						}
 					}
 				} else if (existsSync(pausedPath)) {
 					try {
--- a/src/resources/extensions/sf/auto/loop.ts
+++ b/src/resources/extensions/sf/auto/loop.ts
@ -10,7 +10,6 @@
 import { randomUUID } from "node:crypto";
 import { mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
-import { getHeapStatistics } from "node:v8";
 import { atomicWriteSync } from "../atomic-write.js";
 import type {
 	ExtensionAPI,
@ -214,7 +213,9 @@ function checkMemoryPressure(): {
 	// Try to get the actual V8 heap limit
 	let limitMB = 4096; // conservative default
 	try {
-		const stats = getHeapStatistics();
+		// eslint-disable-next-line @typescript-eslint/no-require-imports
+		const v8 = require("node:v8") as { getHeapStatistics: () => { heap_size_limit: number } };
+		const stats = v8.getHeapStatistics();
 		limitMB = Math.round(stats.heap_size_limit / 1024 / 1024);
 	} catch {
 		limitMB = 4096; /* v8 stats unavailable — use conservative default */
@ -997,6 +998,7 @@ export async function autoLoop(
 				eventType: "iteration-end",
 				data: { iteration },
 			});
+			saveStuckState(s.basePath, loopState); // persist across session restarts (#3704)
 			debugLog("autoLoop", { phase: "iteration-complete", iteration });
 			finishTurn("completed");
 		} catch (loopErr) {
--- a/src/resources/extensions/sf/auto/session.ts
+++ b/src/resources/extensions/sf/auto/session.ts
@ -90,6 +90,11 @@ export class AutoSession {
 	 * auto-trigger merge + next-milestone dispatch. Git revert is the safety net.
 	 */
 	fullAutonomy = false;
+	/**
+	 * When false, the agent is forbidden from calling ask_user_questions.
+	 * Step mode and `/sf auto` set this true; `/sf autonomous` sets it false.
+	 */
+	canAskUser = true;
 	verbose = false;
 	activeEngineId: string | null = null;
 	activeRunDir: string | null = null;
@ -280,6 +285,7 @@ export class AutoSession {
 		this.active = false;
 		this.paused = false;
 		this.stepMode = false;
+		this.canAskUser = true;
 		this.verbose = false;
 		this.activeEngineId = null;
 		this.activeRunDir = null;
--- a/src/resources/extensions/sf/bootstrap/system-context.ts
+++ b/src/resources/extensions/sf/bootstrap/system-context.ts
@ -1,4 +1,4 @@
-import { existsSync, readFileSync, unlinkSync } from "node:fs";
+import { existsSync, readFileSync, statSync, unlinkSync } from "node:fs";
 import { homedir } from "node:os";
 import { join } from "node:path";

@ -9,6 +9,7 @@ import {
 } from "../../cmux/index.js";
 import { toPosixPath } from "../../shared/mod.js";
 import { getActiveAutoWorktreeContext } from "../auto-worktree.js";
+import { isAutoActive, isCanAskUser } from "../auto.js";
 import { buildCodeIntelligenceContextBlock } from "../code-intelligence.js";
 import {
 	ensureCodebaseMapFresh,
@ -60,6 +61,36 @@ import {

 const sfHome = process.env.SF_HOME || join(homedir(), ".sf");

+/**
+ * Per-process cache for slow sync file reads (KNOWLEDGE.md, ARCHITECTURE.md).
+ * Keyed by absolute path; invalidated when mtime changes. Prevents re-reading
+ * these files on every agent turn (#perf-finding-4).
+ */
+interface FileCacheEntry {
+	mtime: number;
+	content: string;
+}
+const _fileReadCache = new Map<string, FileCacheEntry>();
+
+/**
+ * Read a file with mtime-based caching. Returns the cached content if the
+ * file's mtime has not changed since the last read, otherwise re-reads.
+ * Returns null if the file does not exist or cannot be read.
+ */
+function cachedReadFile(filePath: string): string | null {
+	try {
+		const st = statSync(filePath);
+		const mtime = st.mtimeMs;
+		const cached = _fileReadCache.get(filePath);
+		if (cached && cached.mtime === mtime) return cached.content;
+		const content = readFileSync(filePath, "utf-8");
+		_fileReadCache.set(filePath, { mtime, content });
+		return content;
+	} catch {
+		return null;
+	}
+}
+
 /**
 * Bundled skill triggers — resolved dynamically at runtime instead of
 * hardcoding absolute paths in the system prompt template. Only skills
@ -282,7 +313,14 @@ export async function buildBeforeAgentStartResult(
 		? `\n\n## Subagent Model\n\nWhen spawning subagents via the \`subagent\` tool, always pass \`model: "${subagentModelConfig.primary}"\` in the tool call parameters. Never omit this — always specify it explicitly.`
 		: "";

-	const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — SF]\n\n${systemContent}${preferenceBlock}${knowledgeBlock}${architectureBlock}${codebaseBlock}${codeIntelligenceBlock}${memoryBlock}${newSkillsBlock}${worktreeBlock}${repositoryVcsBlock}${modelIdentityBlock}${subagentModelBlock}`;
+	// Inject autonomous-mode interaction policy only when auto-mode is active
+	// and the session has canAskUser=false (i.e. /sf autonomous, not /sf auto).
+	const autonomousPolicyBlock =
+		isAutoActive() && !isCanAskUser()
+			? `\n\n[INTERACTION POLICY — autonomous]\nYou are running in autonomous mode. Do NOT call \`ask_user_questions\`.\nResolve ambiguities by:\n1. Reading the codebase (sift, code-intelligence, source files)\n2. Web lookup (WebSearch, WebFetch, Context7)\n3. Inspecting prior decisions (.sf/DECISIONS.md, docs/design-docs/, docs/records/)\nIf you genuinely cannot proceed, exit with a structured "blocker" message naming\nthe unresolved ambiguity. The user will review at milestone close.`
+			: "";
+
+	const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — SF]\n\n${systemContent}${preferenceBlock}${knowledgeBlock}${architectureBlock}${codebaseBlock}${codeIntelligenceBlock}${memoryBlock}${newSkillsBlock}${worktreeBlock}${repositoryVcsBlock}${modelIdentityBlock}${subagentModelBlock}${autonomousPolicyBlock}`;

 	stopContextTimer({
 		systemPromptSize: fullSystem.length,
@ -321,17 +359,10 @@ export function loadKnowledgeBlock(
 	let globalSizeKb = 0;
 	const globalKnowledgePath = join(sfHomeDir, "agent", "KNOWLEDGE.md");
 	if (existsSync(globalKnowledgePath)) {
-		try {
-			const content = readFileSync(globalKnowledgePath, "utf-8").trim();
-			if (content) {
-				globalSizeKb = Buffer.byteLength(content, "utf-8") / 1024;
-				globalKnowledge = content;
-			}
-		} catch (e) {
-			logWarning(
-				"bootstrap",
-				`global knowledge file read failed: ${(e as Error).message}`,
-			);
+		const content = cachedReadFile(globalKnowledgePath)?.trim() ?? "";
+		if (content) {
+			globalSizeKb = Buffer.byteLength(content, "utf-8") / 1024;
+			globalKnowledge = content;
 		}
 	}

@ -339,15 +370,8 @@ export function loadKnowledgeBlock(
 	let projectKnowledge = "";
 	const knowledgePath = resolveSfRootFile(cwd, "KNOWLEDGE");
 	if (existsSync(knowledgePath)) {
-		try {
-			const content = readFileSync(knowledgePath, "utf-8").trim();
-			if (content) projectKnowledge = content;
-		} catch (e) {
-			logWarning(
-				"bootstrap",
-				`project knowledge file read failed: ${(e as Error).message}`,
-			);
-		}
+		const content = cachedReadFile(knowledgePath)?.trim() ?? "";
+		if (content) projectKnowledge = content;
 	}

 	if (!globalKnowledge && !projectKnowledge) {
@ -371,19 +395,15 @@ export function loadKnowledgeBlock(
 function loadArchitectureBlock(cwd: string): string {
 	const architecturePath = join(cwd, "ARCHITECTURE.md");
 	if (!existsSync(architecturePath)) return "";
-	try {
-		const raw = readFileSync(architecturePath, "utf-8").trim();
-		if (!raw) return "";
-		const MAX_CHARS = 8_000;
-		const content =
-			raw.length > MAX_CHARS
-				? raw.slice(0, MAX_CHARS) +
-					"\n\n*(truncated — see ARCHITECTURE.md for full map)*"
-				: raw;
-		return `\n\n[ARCHITECTURE — System map and invariants]\n\n${content}`;
-	} catch {
-		return "";
-	}
+	const raw = cachedReadFile(architecturePath)?.trim() ?? "";
+	if (!raw) return "";
+	const MAX_CHARS = 8_000;
+	const content =
+		raw.length > MAX_CHARS
+			? raw.slice(0, MAX_CHARS) +
+				"\n\n*(truncated — see ARCHITECTURE.md for full map)*"
+			: raw;
+	return `\n\n[ARCHITECTURE — System map and invariants]\n\n${content}`;
 }

 function buildWorktreeContextBlock(): string {
--- a/src/resources/extensions/sf/cache.ts
+++ b/src/resources/extensions/sf/cache.ts
@ -29,24 +29,24 @@ export function invalidateAllCaches(): void {
 	try {
 		invalidateStateCache();
 	} catch (err) {
-		logWarning(`Cache invalidation failed for state: ${err}`);
+		logWarning("state", `cache invalidation failed: ${err}`);
 	}

 	try {
 		clearPathCache();
 	} catch (err) {
-		logWarning(`Cache invalidation failed for paths: ${err}`);
+		logWarning("state", `cache invalidation failed: ${err}`);
 	}

 	try {
 		clearParseCache();
 	} catch (err) {
-		logWarning(`Cache invalidation failed for parse: ${err}`);
+		logWarning("state", `cache invalidation failed: ${err}`);
 	}

 	try {
 		clearArtifacts();
 	} catch (err) {
-		logWarning(`Cache invalidation failed for artifacts: ${err}`);
+		logWarning("db", `cache invalidation failed: ${err}`);
 	}
 }
--- a/src/resources/extensions/sf/codebase-generator.ts
+++ b/src/resources/extensions/sf/codebase-generator.ts
@ -16,12 +16,19 @@ import { sfRoot } from "./paths.js";

 // ─── Types ───────────────────────────────────────────────────────────────────

+/**
+ * Options for controlling codebase map generation behavior.
+ */
 export interface CodebaseMapOptions {
 	excludePatterns?: string[];
 	maxFiles?: number;
 	collapseThreshold?: number;
 }

+/**
+ * Metadata attached to a generated codebase map. Includes generation timestamp,
+ * content fingerprint, and information about truncation.
+ */
 export interface CodebaseMapMetadata {
 	generatedAt: string;
 	fingerprint: string;
@ -29,12 +36,19 @@ export interface CodebaseMapMetadata {
 	truncated: boolean;
 }

+/**
+ * Options for controlling codebase map freshness checks and regeneration.
+ */
 export interface EnsureCodebaseMapOptions {
 	ttlMs?: number;
 	maxAgeMs?: number;
 	force?: boolean;
 }

+/**
+ * Result from ensuring the codebase map is fresh. Indicates what action
+ * was taken (generated, updated, or already fresh) and relevant metadata.
+ */
 export interface EnsureCodebaseMapResult {
 	status: "generated" | "updated" | "fresh" | "empty";
 	fileCount: number;
--- a/src/resources/extensions/sf/commands/catalog.ts
+++ b/src/resources/extensions/sf/commands/catalog.ts
@ -37,11 +37,11 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly SfCommandDefinition[] = [
 	{ cmd: "next", desc: "Explicit step mode (same as /sf)" },
 	{
 		cmd: "autonomous",
-		desc: "Autonomous mode — research, plan, execute, commit, repeat",
+		desc: "Autonomous mode — continuous loop, never asks user (self-resolves or stops with blocker)",
 	},
 	{
 		cmd: "auto",
-		desc: "Alias for /sf autonomous",
+		desc: "Auto mode — continuous loop, can ask when blocked",
 	},
 	{ cmd: "stop", desc: "Stop autonomous mode gracefully" },
 	{
--- a/src/resources/extensions/sf/commands/handlers/auto.ts
+++ b/src/resources/extensions/sf/commands/handlers/auto.ts
@ -83,11 +83,10 @@ export async function handleAutoCommand(
 	ctx: ExtensionCommandContext,
 	pi: ExtensionAPI,
 ): Promise<boolean> {
-	const isAutonomousCommand =
-		trimmed === "auto" ||
-		trimmed.startsWith("auto ") ||
-		trimmed === "autonomous" ||
-		trimmed.startsWith("autonomous ");
+	const isAutonomousVerb =
+		trimmed === "autonomous" || trimmed.startsWith("autonomous ");
+	const isAutoVerb = trimmed === "auto" || trimmed.startsWith("auto ");
+	const isAutonomousFamily = isAutonomousVerb || isAutoVerb;

 	/**
 	 * Route an auto-mode launch through either the headless (in-process) or
@ -103,6 +102,7 @@ export async function handleAutoCommand(
 			step?: boolean;
 			milestoneLock?: string | null;
 			fullAutonomy?: boolean;
+			canAskUser?: boolean;
 		},
 	): Promise<void> => {
 		if (process.env.SF_HEADLESS === "1") {
@ -143,7 +143,7 @@ export async function handleAutoCommand(
 		return true;
 	}

-	if (isAutonomousCommand) {
+	if (isAutonomousFamily) {
 		const normalized = trimmed.replace(/^(?:auto|autonomous)\b/, "auto");
 		const { yoloSeedFile, rest: afterYolo } = parseYoloFlag(normalized);
 		const { milestoneId, rest: afterMilestone } =
@ -155,6 +155,8 @@ export async function handleAutoCommand(
 		// for human review. Git revert is the safety net.
 		const fullAutonomy =
 			/\bfull\b/.test(afterMilestone) || afterMilestone.includes("--full");
+		// `/sf auto` can ask the user when blocked; `/sf autonomous` cannot.
+		const canAskUser = isAutoVerb;
 		if (debugMode) enableDebug(projectRoot());
 		if (!(await guardRemoteSession(ctx, pi))) return true;

@ -192,9 +194,10 @@ export async function handleAutoCommand(
 			await launchAuto(verboseMode, {
 				milestoneLock: milestoneId,
 				fullAutonomy,
+				canAskUser,
 			});
 		} else {
-			await launchAuto(verboseMode, fullAutonomy ? { fullAutonomy } : undefined);
+			await launchAuto(verboseMode, { fullAutonomy, canAskUser });
 		}
 		return true;
 	}
--- a/src/resources/extensions/sf/context-budget.ts
+++ b/src/resources/extensions/sf/context-budget.ts
@ -220,18 +220,6 @@ export function resolveExecutorContextWindow(
 	return DEFAULT_CONTEXT_WINDOW;
 }

-/**
- * Reduce content to fit within budget using section-boundary truncation.
- */
-export function reduceToFit(
-	content: string,
-	budgetChars: number,
-): TruncationResult {
-	if (!content || content.length <= budgetChars) {
-		return { content, droppedSections: 0 };
-	}
-	return truncateAtSectionBoundary(content, budgetChars);
-}

 // ─── Internal helpers ────────────────────────────────────────────────────────

--- a/src/resources/extensions/sf/dispatch-guard.ts
+++ b/src/resources/extensions/sf/dispatch-guard.ts
@ -117,13 +117,6 @@ export function getPriorSliceCompletionBlocker(
 				// it may be a cross-milestone reference handled elsewhere.
 			}
 		} else {
-			const milestoneUsesExplicitDeps = slices.some(
-				(slice) => slice.depends.length > 0,
-			);
-			if (milestoneUsesExplicitDeps) {
-				return null;
-			}
-
 			// Positional fallback is only a heuristic for legacy slices with no
 			// declared dependencies. Skip any earlier slice that depends on the
 			// target, directly or transitively, or we can deadlock a valid zero-dep
--- a/src/resources/extensions/sf/doctor-environment.ts
+++ b/src/resources/extensions/sf/doctor-environment.ts
@ -400,8 +400,13 @@ function checkPortConflicts(basePath: string): EnvironmentCheckResult[] {
 				`lsof -i :${port} -sTCP:LISTEN -Fp | head -2`,
 				basePath,
 			);
+			// Parse lsof -F cn output: lines like "c<cmdname>" and "p<pid>"
+			// Use field mode to reliably extract process name from COMMAND field
 			const processName =
-				nameResult?.match(/p(\d+)\n?c?(.+)?/)?.[2] ?? "unknown";
+				nameResult
+					?.split("\n")
+					.find((line) => line.startsWith("c"))
+					?.substring(1) ?? "unknown";

 			results.push({
 				name: "port_conflict",
--- a/src/resources/extensions/sf/doctor-runtime-checks.ts
+++ b/src/resources/extensions/sf/doctor-runtime-checks.ts
@ -397,19 +397,15 @@ export async function checkRuntimeHealth(
 		// `untracked-with-no-archive-match` are non-actionable from SF's POV.
 		const actionable = c.missing + c.upgradable + c["editing-drift"];
 		if (actionable > 0) {
-			const parts: string[] = [];
-			if (c.missing > 0) parts.push(`${c.missing} missing`);
-			if (c.upgradable > 0) parts.push(`${c.upgradable} pending-upgrade`);
-			if (c["editing-drift"] > 0)
-				parts.push(`${c["editing-drift"]} edited-drift`);
+			const { parts, pendingCount } = formatBucketCountParts(c);
 			issues.push({
 				severity: "warning",
 				code: "scaffold_drift",
 				scope: "project",
 				unitId: "project",
-				message: `Scaffold drift: ${parts.join(", ")}. Auto-sync handles missing+pending; edited-drift needs review.`,
+				message: `Scaffold drift: ${parts.join(", ")}. Auto-sync handles missing+pending; editing-drift needs review.`,
 				file: ".sf/scaffold-manifest.json",
-				fixable: c.missing + c.upgradable > 0,
+				fixable: pendingCount > 0,
 			});

 			if (shouldFix("scaffold_drift") && c.missing + c.upgradable > 0) {
@ -724,6 +720,30 @@ export async function checkRuntimeHealth(
 	}
 }

+/**
+/**
+ * Format bucket counts into a readable parts array for scaffold drift messages.
+ * Shared logic between checkRuntimeHealth and checkScaffoldFreshness.
+ */
+function formatBucketCountParts(counts: {
+	missing?: number;
+	upgradable?: number;
+	"editing-drift"?: number;
+	untracked?: number;
+}): { parts: string[]; pendingCount: number } {
+	const parts: string[] = [];
+	if (counts.missing && counts.missing > 0)
+		parts.push(`${counts.missing} missing`);
+	if (counts.upgradable && counts.upgradable > 0)
+		parts.push(`${counts.upgradable} pending upgrade`);
+	if (counts["editing-drift"] && counts["editing-drift"] > 0)
+		parts.push(`${counts["editing-drift"]} editing-drift`);
+	if (counts.untracked && counts.untracked > 0)
+		parts.push(`${counts.untracked} untracked`);
+	const pendingCount = (counts.missing ?? 0) + (counts.upgradable ?? 0);
+	return { parts, pendingCount };
+}
+
 /**
 * ADR-021 Phase C: report scaffold drift bucket counts as a doctor finding.
 *
@ -749,17 +769,11 @@ export function checkScaffoldFreshness(basePath: string): DoctorIssue | null {
 		counts.untracked;
 	if (actionable === 0) return null;

-	const parts: string[] = [];
-	if (counts.missing > 0) parts.push(`${counts.missing} missing`);
-	if (counts.upgradable > 0) parts.push(`${counts.upgradable} pending upgrade`);
-	if (counts["editing-drift"] > 0)
-		parts.push(`${counts["editing-drift"]} editing-drift`);
-	if (counts.untracked > 0) parts.push(`${counts.untracked} untracked`);
-
+	const { parts, pendingCount } = formatBucketCountParts(counts);
 	const summary = parts.join(", ");
 	const guidance =
-		counts.upgradable + counts.missing > 0
-			? `Run /sf scaffold sync to refresh ${counts.upgradable + counts.missing} pending docs`
+		pendingCount > 0
+			? `Run /sf scaffold sync to refresh ${pendingCount} pending docs`
 			: "Run /sf scaffold sync to inspect drift";

 	return {
--- a/src/resources/extensions/sf/execution-instruction-guard.ts
+++ b/src/resources/extensions/sf/execution-instruction-guard.ts
@ -8,6 +8,7 @@ import { logWarning } from "./workflow-logger.js";
 import { writeManifest } from "./workflow-manifest.js";
 import { renderAllProjections } from "./workflow-projections.js";

+/** Reason why a task dispatch should be blocked due to repo instruction conflict. */
 export interface ExecutionInstructionConflict {
 	reason: string;
 }
@ -75,6 +76,7 @@ function taskRecordsExplicitLocalComposeRequest(taskText: string): boolean {
 	);
 }

+/** Check for conflicts between repo instructions and a task's execution context. Returns conflict details if dispatch should be blocked, null otherwise. */
 export function getExecuteTaskInstructionConflict(
 	basePath: string,
 	mid: string,
--- a/src/resources/extensions/sf/gate-registry.ts
+++ b/src/resources/extensions/sf/gate-registry.ts
@ -179,6 +179,7 @@ export const GATE_REGISTRY = {
 	},
 } as const satisfies Record<GateId, GateDefinition>;

+/** Type of the GATE_REGISTRY constant. */
 /** Type of the GATE_REGISTRY constant. */
 export type GateRegistry = typeof GATE_REGISTRY;

--- a/src/resources/extensions/sf/git-service.ts
+++ b/src/resources/extensions/sf/git-service.ts
@ -26,6 +26,7 @@ import { getErrorMessage } from "./error-utils.js";
 import { SF_GIT_ERROR, SF_MERGE_CONFLICT, SFError } from "./errors.js";
 import { normalizePlannedFileReference } from "./files.js";
 import { GIT_NO_PROMPT_ENV } from "./git-constants.js";
+import { SF_RUNTIME_PATTERNS } from "./gitignore.js";
 import {
 	_resetHasChangesCache,
 	nativeAddAllWithExclusions,
--- a/src/resources/extensions/sf/gitignore.ts
+++ b/src/resources/extensions/sf/gitignore.ts
@ -27,7 +27,7 @@ import { bodyHash as preferencesBodyHash } from "./scaffold-versioning.js";
 * With external state (symlink), these are a no-op in most cases,
 * but retained for backwards compatibility during migration.
 */
-const SF_RUNTIME_PATTERNS = [
+export const SF_RUNTIME_PATTERNS = [
 	".sf/activity/",
 	".sf/audit/",
 	".sf/exec/",
--- a/src/resources/extensions/sf/notification-store.ts
+++ b/src/resources/extensions/sf/notification-store.ts
@ -16,6 +16,7 @@ import {
 	writeFileSync,
 } from "node:fs";
 import { join } from "node:path";
+import { sfRuntimeRoot } from "./paths.js";

 // ─── Types ──────────────────────────────────────────────────────────────

@ -125,7 +126,7 @@ export function appendNotification(
 	};

 	try {
-		const dir = join(_basePath, ".sf");
+		const dir = sfRuntimeRoot(_basePath);
 		mkdirSync(dir, { recursive: true });
 		appendFileSync(join(dir, FILENAME), JSON.stringify(entry) + "\n", "utf-8");
 		_lineCount++;
@ -263,7 +264,7 @@ export function _resetNotificationStore(): void {
 // ─── Internal ───────────────────────────────────────────────────────────

 function _readEntriesFromDisk(basePath: string): NotificationEntry[] {
-	const filePath = join(basePath, ".sf", FILENAME);
+	const filePath = join(sfRuntimeRoot(basePath), FILENAME);
 	if (!existsSync(filePath)) return [];
 	try {
 		const content = readFileSync(filePath, "utf-8");
@ -316,7 +317,7 @@ function _emitChange(): void {
 * Must be called inside _withLock for cross-process safety.
 */
 function _atomicWrite(basePath: string, content: string): void {
-	const dir = join(basePath, ".sf");
+	const dir = sfRuntimeRoot(basePath);
 	mkdirSync(dir, { recursive: true });
 	const target = join(dir, FILENAME);
 	const tmp = target + ".tmp." + process.pid;
@ -331,14 +332,15 @@ function _atomicWrite(basePath: string, content: string): void {
 * to avoid deadlocking the UI on a stale lock.
 */
 function _withLock<T>(basePath: string, fn: () => T): T {
-	const lockPath = join(basePath, ".sf", LOCKFILE);
+	const runtimeDir = sfRuntimeRoot(basePath);
+	const lockPath = join(runtimeDir, LOCKFILE);
 	let fd: number | null = null;
 	const maxAttempts = 5;
 	const retryMs = 20;

 	for (let i = 0; i < maxAttempts; i++) {
 		try {
-			mkdirSync(join(basePath, ".sf"), { recursive: true });
+			mkdirSync(runtimeDir, { recursive: true });
 			fd = openSync(lockPath, "wx");
 			break;
 		} catch (err: any) {
--- a/src/resources/extensions/sf/paths.ts
+++ b/src/resources/extensions/sf/paths.ts
@ -10,7 +10,8 @@
 */

 import { spawnSync } from "node:child_process";
-import { Dirent, existsSync, readdirSync, realpathSync } from "node:fs";
+import { Dirent, existsSync, readFileSync, readdirSync, realpathSync } from "node:fs";
+import { homedir } from "node:os";
 import { dirname, join, normalize } from "node:path";
 import { DIR_CACHE_MAX } from "./constants.js";
 import {
@ -324,6 +325,75 @@ export function sfRoot(basePath: string): string {

 export const projectRoot = sfRoot;

+// ─── Self-Detection & Runtime Root ───────────────────────────────────────────
+
+const sfHome = process.env.SF_HOME || join(homedir(), ".sf");
+
+let _isRunningOnSelfCache: { basePath: string; result: boolean } | null = null;
+
+/**
+ * Detect whether SF is running on its own source tree. When true, runtime
+ * self-reporting (notifications, activity, journal, self-feedback, etc.) is
+ * redirected to `~/.sf/` instead of `<basePath>/.sf/` so that feedback ABOUT
+ * SF as a tool accumulates at the global level rather than polluting the
+ * forge repo with per-project runtime artifacts.
+ *
+ * Detection signals (must match BOTH for true):
+ *   1. `<basePath>/package.json` exists with `"name": "singularity-forge"`
+ *   2. `<basePath>/src/resources/extensions/sf/loader.ts` exists
+ *
+ * Cached on first call per basePath to avoid repeat filesystem hits.
+ */
+export function isRunningOnSelf(basePath: string): boolean {
+	if (_isRunningOnSelfCache?.basePath === basePath) {
+		return _isRunningOnSelfCache.result;
+	}
+	let result = false;
+	try {
+		const pkgPath = join(basePath, "package.json");
+		if (existsSync(pkgPath)) {
+			const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
+			if (pkg?.name === "singularity-forge") {
+				const loaderPath = join(
+					basePath,
+					"src/resources/extensions/sf/loader.ts",
+				);
+				if (existsSync(loaderPath)) {
+					result = true;
+				}
+			}
+		}
+	} catch {
+		// Detection failure → false (default to per-repo .sf/)
+	}
+	_isRunningOnSelfCache = { basePath, result };
+	return result;
+}
+
+/** Reset the self-detection cache. Test-only. */
+export function _resetSelfDetectionCache(): void {
+	_isRunningOnSelfCache = null;
+}
+
+/**
+ * Resolve the directory that holds SF runtime self-reporting artifacts:
+ * notifications.jsonl, activity/, journal/, self-feedback.jsonl,
+ * routing-history.json, metrics.json, event-log.jsonl, forensics/, audit/,
+ * exec/, model-benchmarks/, reports/, repo-meta.json.
+ *
+ * Default: `<basePath>/.sf` (same as sfRoot).
+ * When isRunningOnSelf(basePath) returns true: `~/.sf` (so SF self-development
+ * feedback lands at the global level, not in the singularity-forge tree).
+ *
+ * IMPORTANT: tracked artifacts (PROJECT.md, DECISIONS.md, REQUIREMENTS.md,
+ * QUEUE.md, milestones/, KNOWLEDGE.md) MUST continue to use sfRoot(basePath)
+ * — they are durable project memory per ADR-001 and remain in the repo.
+ */
+export function sfRuntimeRoot(basePath: string): string {
+	if (isRunningOnSelf(basePath)) return sfHome;
+	return sfRoot(basePath);
+}
+
 /**
 * Detect if a path is inside a .sf/worktrees/<name>/ structure.
 *
--- a/src/resources/extensions/sf/pre-execution-checks.ts
+++ b/src/resources/extensions/sf/pre-execution-checks.ts
@ -79,12 +79,14 @@ export function extractPackageReferences(description: string): string[] {
 		// something that's not a package (non-token char after whitespace)
 		const tokenPattern = /^([@a-zA-Z][a-zA-Z0-9@/_-]*)(?:\s+|$)/;
 		let remaining = afterCmd;
+		let afterFlag = false;

 		while (remaining.length > 0) {
-			// Skip any flags like -D, --save-dev
+			// Skip any flags like -D, --save-dev; next token is a bare flag-value
 			const flagMatch = remaining.match(/^(-[a-zA-Z-]+)\s*/);
 			if (flagMatch) {
 				remaining = remaining.slice(flagMatch[0].length);
+				afterFlag = true;
 				continue;
 			}

@ -92,12 +94,15 @@ export function extractPackageReferences(description: string): string[] {
 			const pkgMatch = remaining.match(tokenPattern);
 			if (pkgMatch) {
 				const token = pkgMatch[1];
-				// Skip stopwords - they indicate end of package list
-				if (stopwords.has(token.toLowerCase())) {
+				// Only stop on stopwords when the token is NOT a bare flag-value
+				// (e.g. `npm install -D test` — "test" follows -D so it is the
+				// package name, not an English stopword).
+				if (!afterFlag && stopwords.has(token.toLowerCase())) {
 					break;
 				}
 				packages.add(normalizePackageName(token));
 				remaining = remaining.slice(pkgMatch[0].length);
+				afterFlag = false;
 			} else {
 				// Not a package name, stop parsing this install command
 				break;
--- a/src/resources/extensions/sf/preferences.ts
+++ b/src/resources/extensions/sf/preferences.ts
@ -196,6 +196,9 @@ export function getProjectSFPreferencesPath(): string {

 // ─── Loading ────────────────────────────────────────────────────────────────

+/**
+ * Load global SF preferences, trying multiple paths and legacy locations.
+ */
 export function loadGlobalSFPreferences(): LoadedSFPreferences | null {
 	return (
 		loadPreferencesFile(globalPreferencesPath(), "global") ??
@ -204,6 +207,9 @@ export function loadGlobalSFPreferences(): LoadedSFPreferences | null {
 	);
 }

+/**
+ * Load project-level SF preferences.
+ */
 export function loadProjectSFPreferences(): LoadedSFPreferences | null {
 	return (
 		loadPreferencesFile(projectPreferencesPath(), "project") ??
@ -211,6 +217,9 @@ export function loadProjectSFPreferences(): LoadedSFPreferences | null {
 	);
 }

+/**
+ * Load and merge global and project preferences with profile defaults and mode defaults applied.
+ */
 export function loadEffectiveSFPreferences(): LoadedSFPreferences | null {
 	const globalPreferences = loadGlobalSFPreferences();
 	const projectPreferences = loadProjectSFPreferences();
@ -303,7 +312,11 @@ export function _resetParseWarningFlag(): void {
 	_warnedSectionParse = false;
 }

-/** @internal Exported for testing only */
+/**
+ * Parse preferences from markdown frontmatter or heading+list format.
+ *
+ * @internal Exported for testing only
+ */
 export function parsePreferencesMarkdown(
 	content: string,
 ): SFPreferences | null {
@ -438,6 +451,9 @@ function parseHeadingListFormat(content: string): SFPreferences {
 * Apply mode defaults as the lowest-priority layer.
 * Mode defaults fill in undefined fields; any explicit user value wins.
 */
+/**
+ * Apply mode defaults as the lowest-priority layer to preferences.
+ */
 export function applyModeDefaults(
 	mode: WorkflowMode,
 	prefs: SFPreferences,
@ -751,6 +767,9 @@ function mergePreDispatchHooks(

 // ─── System Prompt Rendering ──────────────────────────────────────────────────

+/**
+ * Render preferences as a formatted string for inclusion in system prompts.
+ */
 export function renderPreferencesForSystemPrompt(
 	preferences: SFPreferences,
 	resolutions?: Map<string, SkillResolution>,
@ -865,6 +884,9 @@ export function resolvePreDispatchHooks(): PreDispatchHookConfig[] {
 * Worktree isolation requires explicit opt-in because it depends on git
 * branch infrastructure that must be set up before use.
 */
+/**
+ * Get the effective git isolation mode from preferences (worktree, branch, or none).
+ */
 export function getIsolationMode(): "none" | "worktree" | "branch" {
 	const prefs = loadEffectiveSFPreferences()?.preferences?.git;
 	if (prefs?.isolation === "worktree") return "worktree";
@ -872,6 +894,9 @@ export function getIsolationMode(): "none" | "worktree" | "branch" {
 	return "none"; // default — no isolation, work on current branch
 }

+/**
+ * Resolve parallel execution configuration from preferences.
+ */
 export function resolveParallelConfig(
 	prefs: SFPreferences | undefined,
 ): import("./types.js").ParallelConfig {
--- a/src/resources/extensions/sf/production-mutation-approval.ts
+++ b/src/resources/extensions/sf/production-mutation-approval.ts
@ -40,6 +40,7 @@ export interface ProductionMutationApproval {
 	instructions: string[];
 }

+/** Result of checking approval status: path, approval decision, and reasons if rejected. */
 export interface ProductionMutationApprovalStatus {
 	path: string;
 	approved: boolean;
--- a/src/resources/extensions/sf/state.ts
+++ b/src/resources/extensions/sf/state.ts
@ -149,12 +149,6 @@ let _stateCache: StateCache | null = null;

 // ── Telemetry counters for derive-path observability ────────────────────────
 let _telemetry = { dbDeriveCount: 0, markdownDeriveCount: 0 };
-export function getDeriveTelemetry() {
-	return { ..._telemetry };
-}
-export function resetDeriveTelemetry() {
-	_telemetry = { dbDeriveCount: 0, markdownDeriveCount: 0 };
-}

 /**
 * Invalidate the deriveState() cache. Call this whenever planning files on disk
--- a/src/resources/extensions/sf/tests/provider-errors.test.ts
+++ b/src/resources/extensions/sf/tests/provider-errors.test.ts
@ -13,8 +13,10 @@ import { fileURLToPath } from "node:url";
 import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.ts";
 import {
 	classifyError,
+	createRetryState,
 	isTransient,
 	isTransientNetworkError,
+	resetRetryState,
 } from "../error-classifier.ts";
 import { getNextFallbackModel } from "../preferences.ts";
 import { pauseAutoForProviderError } from "../provider-error-pause.ts";
@ -739,3 +741,42 @@ test("agent-session retryable error regex matches server_error (underscore)", ()
 	// non-retryable errors must not match
 	assert.ok(!retryableRegex.test("model not found"));
 });
+
+// ── createRetryState / resetRetryState ────────────────────────────────────────
+
+test("createRetryState returns zero counters and undefined model", () => {
+	const state = createRetryState();
+	assert.equal(state.networkRetryCount, 0);
+	assert.equal(state.consecutiveTransientCount, 0);
+	assert.equal(state.currentRetryModelId, undefined);
+});
+
+test("createRetryState returns independent objects on each call", () => {
+	const a = createRetryState();
+	const b = createRetryState();
+	a.networkRetryCount = 5;
+	a.currentRetryModelId = "claude-3-haiku";
+	assert.equal(b.networkRetryCount, 0, "mutations to a must not affect b");
+	assert.equal(b.currentRetryModelId, undefined);
+});
+
+test("resetRetryState restores zero counters and clears model", () => {
+	const state = createRetryState();
+	state.networkRetryCount = 3;
+	state.consecutiveTransientCount = 2;
+	state.currentRetryModelId = "fallback-model";
+
+	resetRetryState(state);
+
+	assert.equal(state.networkRetryCount, 0);
+	assert.equal(state.consecutiveTransientCount, 0);
+	assert.equal(state.currentRetryModelId, undefined);
+});
+
+test("resetRetryState is idempotent — resetting a fresh state is a no-op", () => {
+	const state = createRetryState();
+	resetRetryState(state);
+	assert.equal(state.networkRetryCount, 0);
+	assert.equal(state.consecutiveTransientCount, 0);
+	assert.equal(state.currentRetryModelId, undefined);
+});
--- a/src/resources/extensions/sf/tests/record-promoter.test.ts
+++ b/src/resources/extensions/sf/tests/record-promoter.test.ts
@ -13,6 +13,7 @@ import { join } from "node:path";
 import { test } from "node:test";

 import {
+	dispatchRecordPromoterFireAndForget,
 	parseRecordFrontmatter,
 	promoteActionableRecords,
 } from "../record-promoter.ts";
--- a/src/resources/extensions/sf/tests/scaffold-keeper.test.ts
+++ b/src/resources/extensions/sf/tests/scaffold-keeper.test.ts
@ -26,7 +26,10 @@ import {
 	parseScaffoldSyncArgs,
 } from "../commands-scaffold-sync.ts";
 import { detectScaffoldDrift } from "../scaffold-drift.ts";
-import { dispatchScaffoldKeeperIfNeeded } from "../scaffold-keeper.ts";
+import {
+	dispatchScaffoldKeeperFireAndForget,
+	dispatchScaffoldKeeperIfNeeded,
+} from "../scaffold-keeper.ts";
 import { stampScaffoldFile } from "../scaffold-versioning.ts";

 interface NotifyCall {
--- a/src/resources/extensions/sf/tests/verification-evidence.test.ts
+++ b/src/resources/extensions/sf/tests/verification-evidence.test.ts
@ -22,6 +22,7 @@ import test from "node:test";
 import type { VerificationResult } from "../types.ts";
 import {
 	formatEvidenceTable,
+	writePreExecutionEvidence,
 	writeVerificationJSON,
 } from "../verification-evidence.ts";

@ -876,3 +877,125 @@ test("verification-evidence: integration — VerificationResult with auditWarnin
 		rmSync(tmp, { recursive: true, force: true });
 	}
 });
+
+// ─── writePreExecutionEvidence Tests ─────────────────────────────────────────
+
+test("verification-evidence: writePreExecutionEvidence writes correct JSON shape", () => {
+	const tmp = makeTempDir("ve-pre-exec-shape");
+	try {
+		writePreExecutionEvidence(
+			{
+				status: "pass",
+				checks: [
+					{ category: "package", target: "react", passed: true, message: "installed" },
+				],
+				durationMs: 120,
+			},
+			tmp,
+			"M001",
+			"S01",
+		);
+
+		const filePath = join(tmp, "S01-PRE-EXEC-VERIFY.json");
+		assert.ok(existsSync(filePath), "PRE-EXEC-VERIFY.json must exist");
+
+		const json = JSON.parse(readFileSync(filePath, "utf-8"));
+		assert.equal(json.schemaVersion, 1);
+		assert.equal(json.milestoneId, "M001");
+		assert.equal(json.sliceId, "S01");
+		assert.equal(json.status, "pass");
+		assert.equal(json.durationMs, 120);
+		assert.equal(json.checks.length, 1);
+		assert.equal(json.checks[0].category, "package");
+		assert.equal(json.checks[0].target, "react");
+		assert.equal(json.checks[0].passed, true);
+		assert.equal(json.checks[0].message, "installed");
+		assert.ok(typeof json.timestamp === "number" && json.timestamp > 0, "timestamp must be a positive number");
+	} finally {
+		rmSync(tmp, { recursive: true, force: true });
+	}
+});
+
+test("verification-evidence: writePreExecutionEvidence creates directory if not present", () => {
+	const tmp = makeTempDir("ve-pre-exec-mkdir");
+	const nested = join(tmp, "deep", "slice", "dir");
+	try {
+		assert.ok(!existsSync(nested), "directory should not exist yet");
+
+		writePreExecutionEvidence(
+			{ status: "warn", checks: [], durationMs: 0 },
+			nested,
+			"M002",
+			"S02",
+		);
+
+		assert.ok(existsSync(nested), "directory must be created");
+		assert.ok(existsSync(join(nested, "S02-PRE-EXEC-VERIFY.json")), "file must exist");
+	} finally {
+		rmSync(tmp, { recursive: true, force: true });
+	}
+});
+
+test("verification-evidence: writePreExecutionEvidence records fail status and blocking checks", () => {
+	const tmp = makeTempDir("ve-pre-exec-fail");
+	try {
+		writePreExecutionEvidence(
+			{
+				status: "fail",
+				checks: [
+					{ category: "file", target: "src/missing.ts", passed: false, message: "file not found", blocking: true },
+					{ category: "tool", target: "node", passed: true, message: "found" },
+				],
+				durationMs: 45,
+			},
+			tmp,
+			"M003",
+			"S03",
+		);
+
+		const json = JSON.parse(readFileSync(join(tmp, "S03-PRE-EXEC-VERIFY.json"), "utf-8"));
+		assert.equal(json.status, "fail");
+		assert.equal(json.checks.length, 2);
+		assert.equal(json.checks[0].passed, false);
+		assert.equal(json.checks[0].blocking, true);
+		assert.equal(json.checks[1].passed, true);
+	} finally {
+		rmSync(tmp, { recursive: true, force: true });
+	}
+});
+
+test("verification-evidence: writePreExecutionEvidence with empty checks still writes valid JSON", () => {
+	const tmp = makeTempDir("ve-pre-exec-empty");
+	try {
+		writePreExecutionEvidence(
+			{ status: "pass", checks: [], durationMs: 0 },
+			tmp,
+			"M001",
+			"S00",
+		);
+
+		const json = JSON.parse(readFileSync(join(tmp, "S00-PRE-EXEC-VERIFY.json"), "utf-8"));
+		assert.equal(json.schemaVersion, 1);
+		assert.deepStrictEqual(json.checks, []);
+		assert.equal(json.status, "pass");
+	} finally {
+		rmSync(tmp, { recursive: true, force: true });
+	}
+});
+
+test("verification-evidence: writePreExecutionEvidence uses sliceId in filename", () => {
+	const tmp = makeTempDir("ve-pre-exec-filename");
+	try {
+		writePreExecutionEvidence(
+			{ status: "warn", checks: [], durationMs: 10 },
+			tmp,
+			"M099",
+			"S42",
+		);
+
+		assert.ok(existsSync(join(tmp, "S42-PRE-EXEC-VERIFY.json")), "filename must use sliceId");
+		assert.ok(!existsSync(join(tmp, "M099-PRE-EXEC-VERIFY.json")), "filename must not use milestoneId");
+	} finally {
+		rmSync(tmp, { recursive: true, force: true });
+	}
+});
--- a/src/resources/extensions/sf/token-counter.ts
+++ b/src/resources/extensions/sf/token-counter.ts
@ -30,11 +30,17 @@ interface TokenEncoder {
 let encoder: TokenEncoder | null = null;
 let encoderFailed = false;

+/**
+ * Parsed credentials from Google Gemini CLI API key JSON.
+ */
 interface GeminiCliCredentials {
 	token: string;
 	projectId: string;
 }

+/**
+ * Dependency injection interface for Google Gemini token counting.
+ */
 interface GeminiCountTokensDeps {
 	buildServer(apiKeyRaw: string): Promise<{
 		countTokens(
@ -57,6 +63,9 @@ async function getEncoder(): Promise<TokenEncoder | null> {
 	}
 }

+/**
+ * Count tokens in text using tiktoken if available, otherwise estimate.
+ */
 export async function countTokens(text: string): Promise<number> {
 	const enc = await getEncoder();
 	if (enc) {
@ -66,6 +75,9 @@ export async function countTokens(text: string): Promise<number> {
 	return Math.ceil(text.length / 4);
 }

+/**
+ * Synchronously count tokens (requires tiktoken to be pre-loaded).
+ */
 export function countTokensSync(text: string): number {
 	if (encoder) {
 		return encoder.encode(text).length;
@ -73,21 +85,33 @@ export function countTokensSync(text: string): number {
 	return Math.ceil(text.length / 4);
 }

+/**
+ * Initialize the token counter by loading tiktoken encoder.
+ */
 export async function initTokenCounter(): Promise<boolean> {
 	const enc = await getEncoder();
 	return enc !== null;
 }

+/**
+ * Check if tiktoken encoder is loaded for accurate token counting.
+ */
 export function isAccurateCountingAvailable(): boolean {
 	return encoder !== null;
 }

+/**
+ * Get the provider-specific characters-per-token ratio for estimation.
+ */
 export function getCharsPerToken(provider: TokenProvider): number {
 	return (
 		CHARS_PER_TOKEN_BY_PROVIDER[provider] ?? CHARS_PER_TOKEN_BY_PROVIDER.unknown
 	);
 }

+/**
+ * Estimate token count for text using provider-specific ratio.
+ */
 export function estimateTokensForProvider(
 	text: string,
 	provider: TokenProvider,
--- a/src/resources/extensions/sf/verification-gate.ts
+++ b/src/resources/extensions/sf/verification-gate.ts
@ -129,10 +129,18 @@ export function formatFailureContext(result: VerificationResult): string {

 	const blocks: string[] = [];

+	// Give each failing check a fair share of the total budget so that
+	// diagnostics from later checks are not silently cut when the first
+	// check alone would exceed MAX_FAILURE_CONTEXT_CHARS.
+	const perCheckBudget = Math.floor(
+		MAX_FAILURE_CONTEXT_CHARS / failures.length,
+	);
+
 	for (const check of failures) {
 		let stderr = check.stderr ?? "";
-		if (stderr.length > MAX_STDERR_PER_CHECK) {
-			stderr = stderr.slice(0, MAX_STDERR_PER_CHECK) + "\n…[truncated]";
+		const cap = Math.min(MAX_STDERR_PER_CHECK, perCheckBudget);
+		if (stderr.length > cap) {
+			stderr = stderr.slice(0, cap) + "\n…[truncated]";
 		}

 		blocks.push(
--- a/src/resources/extensions/sf/workflow-projections.ts
+++ b/src/resources/extensions/sf/workflow-projections.ts
@ -45,6 +45,10 @@ export function stripIdPrefix(title: string, id: string): string {
 * Render PLAN.md content from a slice row and its task rows.
 * Pure function — no side effects.
 */
+/**
+ * Render PLAN.md content from a slice row and its task rows.
+ * Pure function with no side effects.
+ */
 export function renderPlanContent(
 	sliceRow: SliceRow,
 	taskRows: TaskRow[],
@ -232,6 +236,10 @@ export function renderPlanContent(
 * Render PLAN.md projection to disk for a specific slice.
 * Queries DB via helper functions, renders content, writes via atomicWriteSync.
 */
+/**
+ * Render and write PLAN.md projection to disk for a slice.
+ * Queries DB, renders content, and writes via atomic write.
+ */
 export function renderPlanProjection(
 	basePath: string,
 	milestoneId: string,
--- a/src/resources/extensions/sf/workflow-reconcile.ts
+++ b/src/resources/extensions/sf/workflow-reconcile.ts
@ -4,6 +4,7 @@ import { atomicWriteSync } from "./atomic-write.js";
 import { clearParseCache } from "./files.js";
 import { clearPathCache } from "./paths.js";
 import {
+	getMilestone,
 	getMilestoneSlices,
 	getSliceTasks,
 	insertMilestone,
--- a/src/resources/extensions/sf/workflow-template-compiler.ts
+++ b/src/resources/extensions/sf/workflow-template-compiler.ts
@ -9,6 +9,10 @@
 import type { WorkflowDefinition } from "./definition-loader.js";
 import type { TemplateEntry } from "./workflow-templates.js";

+/**
+ * Input to compileTemplateRun for converting /sf start templates to workflows.
+ * Contains template metadata, content, and run configuration.
+ */
 export interface CompileTemplateRunInput {
 	templateId: string;
 	template: TemplateEntry;
@ -21,6 +25,10 @@ export interface CompileTemplateRunInput {
 	mode?: "guided" | "autonomous" | "explicit";
 }

+/**
+ * Generate a step ID from a phase name and index.
+ * Lowercases, slugifies, and limits to 40 characters.
+ */
 function stepIdForPhase(phase: string, index: number): string {
 	const slug = phase
 		.toLowerCase()
@ -31,6 +39,9 @@ function stepIdForPhase(phase: string, index: number): string {
 	return slug || `phase-${index + 1}`;
 }

+/**
+ * Build the prompt text for executing a single phase of a template.
+ */
 function phasePrompt(input: CompileTemplateRunInput, phase: string): string {
 	const guided = input.mode === "guided";
 	return [
@ -55,6 +66,9 @@ function phasePrompt(input: CompileTemplateRunInput, phase: string): string {
 	].join("\n");
 }

+/**
+ * Check if a phase should have a guided review gate based on template config.
+ */
 function hasGuidedReviewGate(
 	input: CompileTemplateRunInput,
 	phase: string,
@ -73,6 +87,10 @@ function hasGuidedReviewGate(
 *
 * Consumer: `handleStart` before creating a template-backed workflow run.
 */
+/**
+ * Compile a workflow template into a WorkflowDefinition.
+ * Bridges /sf start templates into the custom workflow graph runtime.
+ */
 export function compileTemplateRun(
 	input: CompileTemplateRunInput,
 ): WorkflowDefinition {
--- a/src/resources/extensions/sf/workflow-templates.ts
+++ b/src/resources/extensions/sf/workflow-templates.ts
@ -29,6 +29,10 @@ function resolveSfExtensionDir(): string {

 // ─── Types ───────────────────────────────────────────────────────────────────

+/**
+ * A workflow template registry entry for /sf start workflows.
+ * Includes name, phases, triggers, and optional interaction config.
+ */
 export interface TemplateEntry {
 	name: string;
 	description: string;
@ -44,6 +48,10 @@ export interface TemplateEntry {
 	};
 }

+/**
+ * Registry of all available workflow templates keyed by template ID.
+ * Includes schema version for migration handling.
+ */
 export interface TemplateRegistry {
 	schemaVersion: number;
 	templates: Record<string, TemplateEntry>;
--- a/src/resources/extensions/sf/worktree-command.ts
+++ b/src/resources/extensions/sf/worktree-command.ts
@ -759,11 +759,15 @@ async function handleMerge(

 		// Switch to the main tree before merging.
 		// Must be on the main branch to run git merge --squash.
+		// NOTE: Do NOT clear originalCwd here — a crash or hang between this chdir and
+		// the completed merge would leave the session unable to detect it was inside a
+		// worktree on restart. originalCwd is cleared only in the success path below.
+		// The registerWorktreeCommand recovery logic reads process.cwd() on reload and
+		// can restore originalCwd for orphaned worktree sessions.
 		if (originalCwd) {
 			const prevCwd = process.cwd();
 			process.chdir(basePath);
 			nudgeGitBranchCache(prevCwd);
-			originalCwd = null;
 		}

 		// --- Deterministic merge path (preferred) ---
@ -785,6 +789,8 @@ async function handleMerge(

 		try {
 			mergeWorktreeToMain(basePath, name, commitMessage);
+			// Merge succeeded — safe to clear the worktree tracking state now
+			originalCwd = null;
 			ctx.ui.notify(
 				[
 					`${CLR.ok("✓")} Merged ${CLR.name(name)} → ${CLR.branch(mainBranch)} ${CLR.muted("(deterministic squash)")}`,
--- a/src/resources/extensions/sf/worktree-manager.ts
+++ b/src/resources/extensions/sf/worktree-manager.ts
@ -568,16 +568,28 @@ export function removeWorktree(
 	// inside .sf/worktrees/ — a symlink inside the directory could point out.
 	const resolvedPathSafe = isInsideWorktreesDir(basePath, resolvedWtPath);

-	// If we're inside the worktree, move out first — git can't remove an in-use directory
-	// Note: TOCTOU window between chdir and rmSync — another process could remove the
-	// worktree after we chdir but before we unlink. The fallback/retry pattern handles this.
+	// If we're inside the worktree, move out first — git can't remove an in-use directory.
+	// TOCTOU: the existence check (existsSync) and the chdir are not atomic. A concurrent
+	// process could remove the worktree between these two calls. If chdir fails because
+	// basePath was also deleted, retry once with the process's HOME directory as a
+	// last-resort fallback — the outer finally/catch handles any remaining ENOENT.
 	const cwd = process.cwd();
 	const resolvedCwd = existsSync(cwd) ? realpathSync(cwd) : cwd;
 	if (
 		resolvedCwd === resolvedWtPath ||
 		resolvedCwd.startsWith(resolvedWtPath + sep)
 	) {
-		process.chdir(basePath);
+		try {
+			process.chdir(basePath);
+		} catch {
+			// Retry: basePath may have been removed concurrently — fall back to HOME
+			const fallback = process.env.HOME ?? "/";
+			try {
+				process.chdir(fallback);
+			} catch {
+				/* nothing left to do — proceed with removal attempt */
+			}
+		}
 	}

 	if (!existsSync(wtPath)) {