feat(sf): worktree hardening, skip-slice handler, cwd anchoring + docstrings

- new worktree-root.ts / worktree-session-state.ts: track and restore original project root after /worktree merge or /worktree return - new tools/skip-slice.ts: cascade skip to tasks in the slice so milestone completion isn't blocked by pending tasks (#4375) - auto/run-unit.ts: anchor cwd to basePath before newSession() captures it (GAP-10) — prevents tool runtime / system prompt from rooting on drifted cwd from async_bash, background jobs, or prior unit cleanup - safety/git-checkpoint.ts: harden HEAD-rev-parse against execFileSync errors, surface stderr properly - broad JSDoc / docstring pass across the rest of the SF extension surface Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 01:48:37 +02:00 · 2026-05-02 01:48:37 +02:00 · 4f4b584e53
commit 4f4b584e53
parent ed47951960
33 changed files with 913 additions and 197 deletions
--- a/src/resources/extensions/sf/agentic-docs-scaffold.ts
+++ b/src/resources/extensions/sf/agentic-docs-scaffold.ts
@ -11,6 +11,12 @@ import {
 import { migrateLegacyScaffold } from "./scaffold-drift.js";
 import { logWarning } from "./workflow-logger.js";

+/**
+ * Single scaffold file template with path and template content.
+ *
+ * @property path — canonical file path relative to project root (e.g., "AGENTS.md", "docs/PLANS.md")
+ * @property content — template body to write when file is missing or pending-upgrade
+ */
 export interface ScaffoldFile {
 	path: string;
 	content: string;
@ -23,6 +29,14 @@ export interface ScaffoldFile {
 */
 const NO_MARKER_PATHS = new Set<string>([".siftignore"]);

+/**
+ * Canonical scaffold file templates SF manages for agent legibility.
+ *
+ * Includes AGENTS.md (routing map), ARCHITECTURE.md (system overview), and docs
+ * tree structure (product specs, design docs, execution plans, records, generated).
+ * Phase C syncs these to disk, stamps them with version markers, and records manifest
+ * entries (ADR-021).
+ */
 export const SCAFFOLD_FILES: ScaffoldFile[] = [
 	{
 		path: ".siftignore",
--- a/src/resources/extensions/sf/auto/loop.ts
+++ b/src/resources/extensions/sf/auto/loop.ts
@ -10,6 +10,7 @@
 import { randomUUID } from "node:crypto";
 import { mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
+import { atomicWriteSync } from "../atomic-write.js";
 import type {
 	ExtensionAPI,
 	ExtensionContext,
@ -636,9 +637,8 @@ export async function autoLoop(
 				const unitPhaseResult = await runUnitPhaseViaContract(dispatchContract, ic, iterData, loopState);
 				if (unitPhaseResult.action === "next") {
 					const d = unitPhaseResult.data as { unitStartedAt: number; requestDispatchedAt?: number };
-					if (d?.requestDispatchedAt) {
-						s.lastRequestTimestamp = d.requestDispatchedAt;
-					}
+					const requestTimestamp = d?.requestDispatchedAt ?? d?.unitStartedAt;
+					if (typeof requestTimestamp === "number") s.lastRequestTimestamp = requestTimestamp;
 				}
 				deps.uokObserver?.onPhaseResult("unit", unitPhaseResult.action, {
 					unitType: iterData.unitType,
@ -945,9 +945,8 @@ export async function autoLoop(
 			);
 			if (unitPhaseResult.action === "next") {
 				const d = unitPhaseResult.data as { unitStartedAt: number; requestDispatchedAt?: number };
-				if (d?.requestDispatchedAt) {
-					s.lastRequestTimestamp = d.requestDispatchedAt;
-				}
+				const requestTimestamp = d?.requestDispatchedAt ?? d?.unitStartedAt;
+				if (typeof requestTimestamp === "number") s.lastRequestTimestamp = requestTimestamp;
 			}
 			deps.uokObserver?.onPhaseResult("unit", unitPhaseResult.action, {
 				unitType: iterData.unitType,
--- a/src/resources/extensions/sf/auto/phases.ts
+++ b/src/resources/extensions/sf/auto/phases.ts
@ -96,6 +96,7 @@ import {
 	getRequiredWorkflowToolsForAutoUnit,
 	getWorkflowTransportSupportError,
 } from "../workflow-mcp.js";
+import { resolveWorktreeProjectRoot } from "../worktree-root.js";
 import { detectStuck } from "./detect-stuck.js";
 import {
 	FINALIZE_POST_TIMEOUT_MS,
@ -233,7 +234,7 @@ export function requiresHumanProductionMutationApproval(text: string): boolean {
 export function _resolveDispatchGuardBasePath(
 	s: Pick<AutoSession, "originalBasePath" | "basePath">,
 ): string {
-	return s.originalBasePath || s.basePath;
+	return resolveWorktreeProjectRoot(s.basePath, s.originalBasePath);
 }

 const PLANNING_FLOW_GATE_PHASES: ReadonlySet<Phase> = new Set([
@ -2784,6 +2785,20 @@ export async function runFinalize(
 			});
 		} else {
 			// s.pendingVerificationRetry was set by postUnitPreVerification.
+			// Emit a dedicated journal event so forensics can distinguish bounded
+			// verification retries from genuine stuck-loop dispatch repetitions (#4540).
+			const retryInfo = s.pendingVerificationRetry;
+			deps.emitJournalEvent({
+				ts: new Date().toISOString(),
+				flowId: ic.flowId,
+				seq: ic.nextSeq(),
+				eventType: "artifact-verification-retry",
+				data: {
+					unitType: _preUnitSnapshot?.type,
+					unitId: retryInfo?.unitId,
+					attempt: retryInfo?.attempt,
+				},
+			});
 			// Continue the loop — next iteration will inject the retry context into the prompt.
 			debugLog("autoLoop", {
 				phase: "artifact-verification-retry",
@ -2995,3 +3010,6 @@ export async function runFinalize(

 	return { action: "next", data: undefined as undefined };
 }
+
+// ─── GAP-12: exported alias ───────────────────────────────────────────────────
+export const resetSessionTimeoutState = resetConsecutiveSessionTimeouts;
--- a/src/resources/extensions/sf/auto/run-unit.ts
+++ b/src/resources/extensions/sf/auto/run-unit.ts
@ -14,7 +14,7 @@ import {
 	resolvePersistModelChanges,
 } from "../preferences.js";
 import { logWarning } from "../workflow-logger.js";
-import { _setCurrentResolve, _setSessionSwitchInFlight } from "./resolve.js";
+import { _clearCurrentResolve, _setCurrentResolve, _setSessionSwitchInFlight } from "./resolve.js";
 import type { AutoSession } from "./session.js";
 import { NEW_SESSION_TIMEOUT_MS } from "./session.js";
 import {
@ -45,12 +45,42 @@ export async function runUnit(
 ): Promise<UnitResult> {
 	debugLog("runUnit", { phase: "start", unitType, unitId });

+	// GAP-10: Ensure cwd matches basePath BEFORE newSession() captures it. The
+	// new session reads process.cwd() during construction to anchor its tool
+	// runtime and system prompt; if cwd has drifted (async_bash, background
+	// jobs, prior unit cleanup), the session would otherwise be rooted to the
+	// wrong directory. Must be synchronous — no awaits between chdir and
+	// newSession (#1389, #4762 follow-up).
+	try {
+		if (process.cwd() !== s.basePath) {
+			process.chdir(s.basePath);
+		}
+	} catch (e) {
+		const msg = `Failed to chdir to basePath before newSession (basePath: ${s.basePath}): ${String(e)}`;
+		logWarning("engine", msg, { basePath: s.basePath, error: String(e) });
+		return {
+			status: "cancelled",
+			errorContext: {
+				message: msg,
+				category: "session-failed",
+				isTransient: false,
+			},
+		};
+	}
+
 	// ── Session creation with timeout ──
 	debugLog("runUnit", { phase: "session-create", unitType, unitId });

 	let sessionResult: { cancelled: boolean };
 	let sessionTimeoutHandle: ReturnType<typeof setTimeout> | undefined;
 	const mySessionSwitchGeneration = ++sessionSwitchGeneration;
+	// GAP-07: Cancellation controller for newSession(). When the session-creation
+	// timeout fires, we abort this controller so that any still-in-flight
+	// newSession() work (which may clobber process.cwd()) is signalled to stop.
+	// Note: SF's newSession() does not currently accept abortSignal in its
+	// options type, so we cannot pass it directly — but aborting the controller
+	// documents the intent clearly and is a no-op call site when the API adds it.
+	const sessionAbortController = new AbortController();
 	_setSessionSwitchInFlight(true);
 	try {
 		const sessionPromise = s.cmdCtx!.newSession().finally(() => {
@ -60,7 +90,10 @@ export async function runUnit(
 		});
 		const timeoutPromise = new Promise<{ cancelled: true }>((resolve) => {
 			sessionTimeoutHandle = setTimeout(
-				() => resolve({ cancelled: true }),
+				() => {
+					sessionAbortController.abort();
+					resolve({ cancelled: true });
+				},
 				NEW_SESSION_TIMEOUT_MS,
 			);
 		});
@ -102,15 +135,23 @@ export async function runUnit(
 		return { status: "cancelled" };
 	}

+	// GAP-09: Hard-cancel if setModel fails rather than continuing with the
+	// wrong model. Running with an unexpected model wastes the unit and can
+	// cause quota / pricing surprises.
 	if (s.currentUnitModel && typeof pi.setModel === "function") {
-		const restored = await pi.setModel(s.currentUnitModel, {
+		const modelId = s.currentUnitModel;
+		const restored = await pi.setModel(modelId, {
 			persist: resolvePersistModelChanges(),
 		});
 		if (!restored) {
-			ctx.ui.notify(
-				`Failed to restore ${s.currentUnitModel.provider}/${s.currentUnitModel.id} after session creation. Using session default.`,
-				"warning",
-			);
+			return {
+				status: "cancelled",
+				errorContext: {
+					message: `setModel failed for ${modelId.provider}/${modelId.id}`,
+					category: "session-failed",
+					isTransient: false,
+				},
+			};
 		}
 	}

@ -122,18 +163,33 @@ export async function runUnit(
 		_setCurrentResolve(resolve);
 	});

-	// Ensure cwd matches basePath before dispatch (#1389).
-	// async_bash and background jobs can drift cwd away from the worktree.
-	// Realigning here prevents commits from landing on the wrong branch.
-	try {
-		if (process.cwd() !== s.basePath) {
-			process.chdir(s.basePath);
+	// GAP-08: Provider request-readiness pre-check (#4555).
+	// Verify the provider can accept requests before dispatching. If the token
+	// has expired since bootstrap, return cancelled immediately so the unit is
+	// not wasted on a guaranteed 401.
+	{
+		const provider = s.currentUnitModel?.provider ?? ctx.model?.provider;
+
+		if (provider != null && typeof ctx.modelRegistry.isProviderRequestReady === "function") {
+			let ready = false;
+			try {
+				ready = ctx.modelRegistry.isProviderRequestReady(provider);
+			} catch {
+				ready = false;
+			}
+
+			if (!ready) {
+				_clearCurrentResolve();
+				return {
+					status: "cancelled",
+					errorContext: {
+						message: `Provider ${provider} is not request-ready (login/token expired)`,
+						category: "provider",
+						isTransient: false,
+					},
+				};
+			}
 		}
-	} catch (e) {
-		logWarning("engine", "Failed to chdir to basePath before dispatch", {
-			basePath: s.basePath,
-			error: String(e),
-		});
 	}

 	// ── Send the prompt ──
--- a/src/resources/extensions/sf/bootstrap/register-hooks.ts
+++ b/src/resources/extensions/sf/bootstrap/register-hooks.ts
@ -502,6 +502,7 @@ export function registerHooks(
 	pi.on("tool_call", async (event, ctx) => {
 		if (!isAutoActive()) return;
 		safetyRecordToolCall(
+			event.toolCallId,
 			event.toolName,
 			event.input as Record<string, unknown>,
 		);
--- a/src/resources/extensions/sf/bootstrap/system-context.ts
+++ b/src/resources/extensions/sf/bootstrap/system-context.ts
@ -644,6 +644,10 @@ async function buildCarryForwardLines(
 	);
 }

+/**
+ * Build resume state section from CONTINUE.md or legacy continue.md.
+ * Returns progress, completed work, and next action if available.
+ */
 async function buildResumeSection(
 	basePath: string,
 	milestoneId: string,
@ -694,6 +698,10 @@ async function buildResumeSection(
 	return lines.join("\n");
 }

+/**
+ * Extract slice plan excerpt with goal, demo, verification, and observability.
+ * Returns formatted section for task execution context.
+ */
 function extractSliceExecutionExcerpt(
 	content: string | null,
 	relPath: string,
@ -726,6 +734,10 @@ function extractSliceExecutionExcerpt(
 	return parts.join("\n");
 }

+/**
+ * Extract a markdown section by heading name from content.
+ * Returns section content until next heading or null if not found.
+ */
 function extractMarkdownSection(
 	content: string,
 	heading: string,
@ -741,10 +753,16 @@ function extractMarkdownSection(
 	return rest.slice(0, end).trim();
 }

+/**
+ * Escape special regex characters in a string.
+ */
 function escapeRegExp(value: string): string {
 	return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
 }

+/**
+ * Collapse multiple whitespace in text to single spaces.
+ */
 function oneLine(text: string): string {
 	return text.replace(/\s+/g, " ").trim();
 }
--- a/src/resources/extensions/sf/bootstrap/write-gate.ts
+++ b/src/resources/extensions/sf/bootstrap/write-gate.ts
@ -112,6 +112,10 @@ const GATE_SAFE_TOOLS = new Set([
 	"search_and_read",
 ]);

+/**
+ * Snapshot of write gate state: depth verification, queue phase, and pending gates.
+ * Persisted to .sf/runtime/write-gate-state.json for cross-process coordination.
+ */
 export interface WriteGateSnapshot {
 	verifiedDepthMilestones: string[];
 	activeQueuePhase: boolean;
--- a/src/resources/extensions/sf/commands-extensions.ts
+++ b/src/resources/extensions/sf/commands-extensions.ts
@ -22,6 +22,9 @@ const sfHome = process.env.SF_HOME || join(homedir(), ".sf");

 // ─── Types (mirrored from extension-registry.ts) ────────────────────────────

+/**
+ * Extension manifest metadata including capabilities and dependencies.
+ */
 interface ExtensionManifest {
 	id: string;
 	name: string;
@ -41,6 +44,9 @@ interface ExtensionManifest {
 	};
 }

+/**
+ * Extension registry entry tracking enabled/disabled status and source.
+ */
 interface ExtensionRegistryEntry {
 	id: string;
 	enabled: boolean;
@ -49,6 +55,9 @@ interface ExtensionRegistryEntry {
 	disabledReason?: string;
 }

+/**
+ * Extension registry mapping extension IDs to their status entries.
+ */
 interface ExtensionRegistry {
 	version: 1;
 	entries: Record<string, ExtensionRegistryEntry>;
@ -56,14 +65,23 @@ interface ExtensionRegistry {

 // ─── Registry I/O ───────────────────────────────────────────────────────────

+/**
+ * Get the path to the extension registry file.
+ */
 function getRegistryPath(): string {
 	return join(sfHome, "extensions", "registry.json");
 }

+/**
+ * Get the path to the agent extensions directory.
+ */
 function getAgentExtensionsDir(): string {
 	return join(sfHome, "agent", "extensions");
 }

+/**
+ * Load the extension registry, defaulting to an empty registry on error.
+ */
 function loadRegistry(): ExtensionRegistry {
 	const filePath = getRegistryPath();
 	try {
@ -84,6 +102,9 @@ function loadRegistry(): ExtensionRegistry {
 	}
 }

+/**
+ * Save the extension registry to disk (atomic via temp file).
+ */
 function saveRegistry(registry: ExtensionRegistry): void {
 	const filePath = getRegistryPath();
 	try {
@ -96,12 +117,18 @@ function saveRegistry(registry: ExtensionRegistry): void {
 	}
 }

+/**
+ * Check if an extension is enabled in the registry (defaults to true if not registered).
+ */
 function isEnabled(registry: ExtensionRegistry, id: string): boolean {
 	const entry = registry.entries[id];
 	if (!entry) return true;
 	return entry.enabled;
 }

+/**
+ * Load extension manifest from a directory, or null if not found/invalid.
+ */
 function readManifest(dir: string): ExtensionManifest | null {
 	const mPath = join(dir, "extension-manifest.json");
 	if (!existsSync(mPath)) return null;
@ -115,6 +142,9 @@ function readManifest(dir: string): ExtensionManifest | null {
 	}
 }

+/**
+ * Discover all extension manifests from the agent extensions directory.
+ */
 function discoverManifests(): Map<string, ExtensionManifest> {
 	const extDir = getAgentExtensionsDir();
 	const manifests = new Map<string, ExtensionManifest>();
@ -129,6 +159,9 @@ function discoverManifests(): Map<string, ExtensionManifest> {

 // ─── Command Handler ────────────────────────────────────────────────────────

+/**
+ * Handler for /sf extensions subcommands (list, enable, disable, info).
+ */
 export async function handleExtensions(
 	args: string,
 	ctx: ExtensionCommandContext,
@ -162,6 +195,9 @@ export async function handleExtensions(
 	);
 }

+/**
+ * List all discovered extensions with their status and capabilities.
+ */
 function handleList(ctx: ExtensionCommandContext): void {
 	const manifests = discoverManifests();
 	const registry = loadRegistry();
@ -211,6 +247,9 @@ function handleList(ctx: ExtensionCommandContext): void {
 	ctx.ui.notify(lines.join("\n"), "info");
 }

+/**
+ * Enable a disabled extension in the registry.
+ */
 function handleEnable(
 	id: string | undefined,
 	ctx: ExtensionCommandContext,
--- a/src/resources/extensions/sf/context-budget.ts
+++ b/src/resources/extensions/sf/context-budget.ts
@ -48,6 +48,9 @@ const TASK_COUNT_TIERS: [number, number][] = [

 // ─── Types ───────────────────────────────────────────────────────────────────

+/**
+ * Result of truncation at section boundaries (content + dropped section count).
+ */
 export interface TruncationResult {
 	/** The (possibly truncated) content string */
 	content: string;
@ -55,6 +58,9 @@ export interface TruncationResult {
 	droppedSections: number;
 }

+/**
+ * Proportional character budget allocation for a context window.
+ */
 export interface BudgetAllocation {
 	/** Character budget for dependency/prior-task summaries */
 	summaryBudgetChars: number;
@ -71,6 +77,9 @@ export interface BudgetAllocation {
 // ─── Minimal interface slices for dependency injection ───────────────────────
 // These avoid coupling to full ModelRegistry/SFPreferences types in tests.

+/**
+ * Minimal model interface for context window resolution.
+ */
 export interface MinimalModel {
 	id: string;
 	provider: string;
--- a/src/resources/extensions/sf/context-store.ts
+++ b/src/resources/extensions/sf/context-store.ts
@ -9,11 +9,17 @@ import type { Decision, Requirement } from "./types.js";

 // ─── Query Functions ───────────────────────────────────────────────────────

+/**
+ * Options for filtering decisions by milestone and scope.
+ */
 export interface DecisionQueryOpts {
 	milestoneId?: string;
 	scope?: string;
 }

+/**
+ * Options for filtering requirements by milestone, slice, and status.
+ */
 export interface RequirementQueryOpts {
 	milestoneId?: string;
 	sliceId?: string;
--- a/src/resources/extensions/sf/doctor-engine-checks.ts
+++ b/src/resources/extensions/sf/doctor-engine-checks.ts
@ -8,6 +8,13 @@ import { deriveState } from "./state.js";
 import { readEvents } from "./workflow-events.js";
 import { renderAllProjections } from "./workflow-projections.js";

+/**
+ * Check SF engine health: database constraints, projection drift, and corruption.
+ *
+ * Verifies orphaned tasks/slices, duplicate IDs, and missing task summaries.
+ * Re-renders stale markdown projections when event log is newer than cached files.
+ * Non-fatal: issues are reported but never auto-fixed.
+ */
 export async function checkEngineHealth(
 	basePath: string,
 	issues: DoctorIssue[],
--- a/src/resources/extensions/sf/journal.ts
+++ b/src/resources/extensions/sf/journal.ts
@ -50,6 +50,7 @@ export type JournalEventType =
 	| "worktree-skip"
 	| "worktree-merge-start"
 	| "worktree-merge-failed"
+	| "artifact-verification-retry"
 	// #4764 — worktree lifespan / divergence telemetry
 	| "worktree-created"
 	| "worktree-merged"
@ -59,7 +60,10 @@ export type JournalEventType =
 	| "canonical-root-redirect"
 	// #4765 — slice-cadence collapse
 	| "slice-merged"
-	| "milestone-resquash";
+	| "milestone-resquash"
+	// dispatch telemetry — measure agent/subagent invocation frequency and shape
+	| "subagent-invoked"
+	| "subagent-completed";

 /** A single structured event in the journal. */
 export interface JournalEntry {
--- a/src/resources/extensions/sf/notification-overlay.ts
+++ b/src/resources/extensions/sf/notification-overlay.ts
@ -20,9 +20,19 @@ import {
 } from "./notification-store.js";
 import { formattedShortcutPair } from "./shortcut-defs.js";

+/**
+ * Filter mode for notification display. Controls which severity levels are shown.
+ */
 type FilterMode = "all" | "error" | "warning" | "info";
+
+/**
+ * Cycle of filter modes used when cycling through filter states with 'f' key.
+ */
 const FILTER_CYCLE: FilterMode[] = ["all", "error", "warning", "info"];

+/**
+ * Returns a single-character icon representing the notification severity.
+ */
 function severityIcon(severity: NotifySeverity): string {
 	switch (severity) {
 		case "error":
@ -36,7 +46,10 @@ function severityIcon(severity: NotifySeverity): string {
 	}
 }

-/** Word-wrap plain text to fit within maxWidth columns. */
+/**
+ * Word-wrap plain text to fit within maxWidth columns. Splits on whitespace,
+ * handles single words longer than maxWidth by truncating with ellipsis.
+ */
 function wrapText(text: string, maxWidth: number): string[] {
 	if (text.length <= maxWidth) return [text];
 	const words = text.split(/\s+/);
@ -59,6 +72,10 @@ function wrapText(text: string, maxWidth: number): string[] {
 	);
 }

+/**
+ * Format an ISO timestamp for display as relative time ("just now", "5m ago", etc.).
+ * Falls back to HH:MM:SS if parsing fails.
+ */
 function formatTimestamp(ts: string): string {
 	try {
 		const d = new Date(ts);
--- a/src/resources/extensions/sf/notification-store.ts
+++ b/src/resources/extensions/sf/notification-store.ts
@ -19,9 +19,21 @@ import { join } from "node:path";

 // ─── Types ──────────────────────────────────────────────────────────────

+/**
+ * Severity level for notifications. Indicates the urgency and importance of the notification.
+ */
 export type NotifySeverity = "info" | "success" | "warning" | "error";
+
+/**
+ * Source origin for notifications. Indicates whether the notification came from explicit
+ * notify() calls or workflow-logger warnings.
+ */
 export type NotificationSource = "notify" | "workflow-logger";

+/**
+ * Optional metadata attached to a notification. Provides context about
+ * notification classification, blocking behavior, and deduplication strategy.
+ */
 export interface NotificationMetadata {
 	kind?: "notice" | "approval_request" | "progress" | "terminal";
 	blocking?: boolean;
@ -29,6 +41,10 @@ export interface NotificationMetadata {
 	source?: string;
 }

+/**
+ * A persisted notification entry in the store. Contains all metadata and
+ * content necessary to reconstruct the notification UI state across sessions.
+ */
 export interface NotificationEntry {
 	id: string;
 	ts: string;
--- a/src/resources/extensions/sf/parallel-eligibility.ts
+++ b/src/resources/extensions/sf/parallel-eligibility.ts
@ -93,6 +93,9 @@ function detectFileOverlaps(
 * 3. It does not have file overlap with other eligible milestones
 *    (overlaps are flagged as warnings but do not disqualify)
 */
+/**
+ * Analyze which milestones are eligible for parallel execution based on deps and file overlaps.
+ */
 export async function analyzeParallelEligibility(
 	basePath: string,
 ): Promise<ParallelCandidates> {
@ -214,6 +217,9 @@ export async function analyzeParallelEligibility(
 /**
 * Produce a human-readable report of parallel eligibility analysis.
 */
+/**
+ * Format eligibility analysis results into a markdown report.
+ */
 export function formatEligibilityReport(
 	candidates: ParallelCandidates,
 ): string {
--- a/src/resources/extensions/sf/parallel-merge.ts
+++ b/src/resources/extensions/sf/parallel-merge.ts
@ -37,6 +37,9 @@ export type MergeOrder = "sequential" | "by-completion";
 * Uses a subprocess to avoid disrupting the global DB singleton.
 * Returns true when milestones.status = 'complete' in the worktree's sf.db.
 */
+/**
+ * Check if a milestone is marked as complete in its worktree database.
+ */
 export function isMilestoneCompleteInWorktreeDb(
 	basePath: string,
 	mid: string,
@ -95,6 +98,9 @@ function discoverDbCompletedMilestones(basePath: string): Set<string> {
 * are included if their worktree DB shows status='complete'.
 * See: https://github.com/singularity-forge/sf-run/issues/2812
 */
+/**
+ * Determine merge order for completed milestones (sequential or by-completion).
+ */
 export function determineMergeOrder(
 	workers: WorkerInfo[],
 	order: MergeOrder = "sequential",
@ -151,6 +157,9 @@ export function determineMergeOrder(
 * Attempt to merge a single milestone's worktree back to main.
 * Wraps mergeMilestoneToMain with error handling for parallel context.
 */
+/**
+ * Merge a completed milestone to main branch, returning success or conflict details.
+ */
 export async function mergeCompletedMilestone(
 	basePath: string,
 	milestoneId: string,
@ -208,6 +217,9 @@ export async function mergeCompletedMilestone(
 * Merge all completed milestones in sequence.
 * Stops on first conflict and returns results so far.
 */
+/**
+ * Merge all completed milestones in order, stopping on first conflict.
+ */
 export async function mergeAllCompleted(
 	basePath: string,
 	workers: WorkerInfo[],
@ -232,6 +244,9 @@ export async function mergeAllCompleted(
 /**
 * Format merge results for display.
 */
+/**
+ * Format merge operation results into a markdown report with conflicts and errors.
+ */
 export function formatMergeResults(results: MergeResult[]): string {
 	if (results.length === 0) return "No completed milestones to merge.";

--- a/src/resources/extensions/sf/preferences-models.ts
+++ b/src/resources/extensions/sf/preferences-models.ts
@ -49,7 +49,9 @@ export type {
 	SFPhaseModelConfig,
 } from "./preferences-types.js";

+/** Map of provider ID to allowed model ID patterns. */
 export type ProviderModelAllowList = Record<string, readonly string[]>;
+/** Map of provider ID to blocked model ID patterns. */
 export type ProviderModelBlockList = Record<string, readonly string[]>;

 type ProviderPolicyModel = {
@ -160,7 +162,10 @@ function isModelAllowedByBuiltInProviderPolicy(
 	return true;
 }

-export function isProviderModelAllowed(
+export /**
+ * Check if a provider/model pair is allowed by built-in and user-configured policies.
+ */
+function isProviderModelAllowed(
 	provider: string,
 	modelId: string,
 	providerModelAllow: ProviderModelAllowList | undefined,
@ -189,6 +194,9 @@ export function isProviderModelAllowed(
 	);
 }

+/**
+ * Filter models by provider/model allow and block lists from user preferences.
+ */
 export function filterModelsByProviderModelAllow<
 	T extends { provider: string; id: string },
 >(
@ -216,6 +224,9 @@ export function filterModelsByProviderModelAllow<
 	);
 }

+/**
+ * Check if a provider is in the allowed list and not in the blocked list.
+ */
 export function isProviderAllowedByLists(
 	provider: string,
 	allowedProviders: readonly string[] | undefined,
@ -247,17 +258,9 @@ export function resolveModelForUnit(unitType: string): string | undefined {
 * - Extended: `planning: { model: claude-opus-4-6, fallbacks: [glm-5, minimax-m2.5] }`
 */
 /**
- * Fallback resolver used when the user hasn't pinned `models.<unit>`:
- * iterate every model the pi-ai catalog knows about whose provider is in
- * `allowed_providers` (or every provider, if the allow-list is unset),
- * score them with the unit-type-specific benchmark profile, and return
- * the top pick plus diversified fallbacks.
- *
- * Pulls the candidate pool from `models.generated.js` rather than a live
- * registry lookup so it works during preference resolution (before the
- * registry is populated). The dispatch-time availability check happens
- * downstream in auto-model-selection.ts and filters unavailable
- * candidates naturally (expired keys, providers without auth, etc.).
+ * Auto-benchmark model picker when user hasn't pinned `models.<unit>`.
+ * Scores candidates using unit-type-specific benchmark profiles and returns top pick plus fallbacks.
+ * Works during preference resolution by pulling from pi-ai catalog rather than live registry.
 */
 function resolveAutoBenchmarkPickForUnit(
 	unitType: string,
--- a/src/resources/extensions/sf/production-mutation-approval.ts
+++ b/src/resources/extensions/sf/production-mutation-approval.ts
@ -106,6 +106,10 @@ function normalizedTask(unit: ProductionMutationUnit): string {
 	return [unit.taskTitle, unit.taskText].join("\n").toLowerCase();
 }

+/**
+ * Assess whether LLM auto-approval criteria are met for a production mutation.
+ * Returns approved=true only if all safety constraints are present in task text.
+ */
 export function assessLlmProductionMutationApproval(
 	unit: ProductionMutationUnit,
 ): { approved: boolean; reasons: string[] } {
@ -149,6 +153,10 @@ export function assessLlmProductionMutationApproval(
 	return { approved: reasons.length === 0, reasons };
 }

+/**
+ * Build a fully approved production mutation approval using LLM policy.
+ * Caller must verify assessLlmProductionMutationApproval() returned approved=true first.
+ */
 export function buildLlmProductionMutationApproval(
 	unit: ProductionMutationUnit,
 	approvedAt: Date = new Date(),
@ -181,6 +189,10 @@ export function buildLlmProductionMutationApproval(
 	};
 }

+/**
+ * Assess and atomically write an LLM-approved production mutation approval if criteria match.
+ * Returns approved=false with reasons if any safety constraint is missing.
+ */
 export function approveProductionMutationWithLlmPolicy(
 	basePath: string,
 	unit: ProductionMutationUnit,
@ -207,6 +219,9 @@ export function approveProductionMutationWithLlmPolicy(
 	return { path, approved: true, reasons: [], wrote: true };
 }

+/**
+ * Create an empty approval template if it doesn't exist. No-op if already present.
+ */
 export function ensureProductionMutationApprovalTemplate(
 	basePath: string,
 	unit: ProductionMutationUnit,
@ -237,6 +252,10 @@ function nonEmptyStringArray(value: unknown): value is string[] {
 	);
 }

+/**
+ * Validate a parsed approval JSON against schema and safety constraints.
+ * Returns approved=false with reasons if any field is missing or invalid.
+ */
 export function validateProductionMutationApproval(
 	data: unknown,
 	unit: ProductionMutationUnit,
@ -300,6 +319,9 @@ export function validateProductionMutationApproval(
 	return { approved: reasons.length === 0, reasons };
 }

+/**
+ * Read and validate approval status from disk. Returns rejected reasons if validation fails.
+ */
 export function readProductionMutationApprovalStatus(
 	basePath: string,
 	unit: ProductionMutationUnit,
--- a/src/resources/extensions/sf/safety/evidence-collector.ts
+++ b/src/resources/extensions/sf/safety/evidence-collector.ts
@ -3,12 +3,25 @@
 * Tracks every bash command, file write, and file edit during a unit execution.
 * Evidence is compared against LLM completion claims in evidence-cross-ref.ts.
 *
+ * Evidence is persisted to .sf/safety/evidence-<mid>-<sid>-<tid>.json so it
+ * survives session restarts (pause/resume, crash recovery). On unit start,
+ * call resetEvidence() then loadEvidenceFromDisk(). On every new tool call,
+ * saveEvidenceToDisk() is called automatically by recordToolCall/recordToolResult.
+ *
 * Follows the same module-level Map pattern as auto-tool-tracking.ts.
 * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
 */

-import { appendFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
-import { join } from "node:path";
+import {
+	existsSync,
+	mkdirSync,
+	readFileSync,
+	writeFileSync,
+	renameSync,
+	unlinkSync,
+} from "node:fs";
+import { join, dirname } from "node:path";
+import { randomBytes } from "node:crypto";

 // ─── Types ──────────────────────────────────────────────────────────────────

@ -41,22 +54,11 @@ export type EvidenceEntry = BashEvidence | FileWriteEvidence | FileEditEvidence;

 let unitEvidence: EvidenceEntry[] = [];

-// Disk persistence: unit context set by resetEvidence() at unit start.
-// Guarded by presence of currentUnitId — if absent, disk write is skipped.
-let currentUnitId: string | undefined = undefined;
-let currentBasePath: string | undefined = undefined;
-
 // ─── Public API ─────────────────────────────────────────────────────────────

-/**
- * Reset all evidence for a new unit. Call at unit start.
- * @param unitId - The active unit ID (e.g. M006/S02/T03)
- * @param basePath - Project root path for computing the evidence file path
- */
-export function resetEvidence(unitId?: string, basePath?: string): void {
+/** Reset all evidence for a new unit. Call at unit start. */
+export function resetEvidence(): void {
 	unitEvidence = [];
-	currentUnitId = unitId;
-	currentBasePath = basePath;
 }

 /** Get a read-only view of all evidence collected for the current unit. */
@ -79,123 +81,78 @@ export function getFilePaths(): string[] {
 		.map((e) => e.path);
 }

-// ─── Disk Persistence ──────────────────────────────────────────────────────
+// ─── Persistence (Bug #4385 — evidence must survive session restarts) ────────

 /**
- * Append an evidence entry to .sf/active/{unitId}/evidence.jsonl.
- *
- * Purpose: Evidence collected at tool_call time must survive a mid-unit re-dispatch
- * race where runUnitPhase re-fires between tool_call and tool_execution_end.
- * Without disk persistence, in-memory evidence is lost on re-dispatch.
- *
- * Consumer: phases.ts runUnitPhase calls resetEvidence(unitId, basePath) at unit start.
- * Best-effort: disk write failures are silently swallowed so they never crash the agent.
+ * Build the path for the evidence JSON file for a given unit.
+ * Lives under .sf/safety/ which is gitignored and session-scoped.
 */
-function saveEvidenceToDisk(entry: EvidenceEntry): void {
-	if (!currentUnitId || !currentBasePath) return;
-	try {
-		const dir = join(currentBasePath, ".sf", "active", currentUnitId);
-		if (!existsSync(dir)) {
-			mkdirSync(dir, { recursive: true });
-		}
-		const line = JSON.stringify(entry);
-		appendFileSync(join(dir, "evidence.jsonl"), line + "\n");
-	} catch {
-		// Best-effort: disk write failures must not crash the agent.
-	}
-}
-
-// ─── Recording (called from register-hooks.ts) ─────────────────────────────
-
-/**
- * Record a tool call at dispatch time (before execution).
- * Exit codes and output are filled in by recordToolResult after execution.
- */
-export function recordToolCall(
-	toolName: string,
-	input: Record<string, unknown>,
-): void {
-	let entry: EvidenceEntry | undefined;
-	if (toolName === "bash" || toolName === "Bash") {
-		entry = {
-			kind: "bash",
-			toolCallId: "",
-			command: String(input.command ?? ""),
-			exitCode: -1,
-			outputSnippet: "",
-			timestamp: Date.now(),
-		};
-	} else if (toolName === "write" || toolName === "Write") {
-		entry = {
-			kind: "write",
-			toolCallId: "",
-			path: String(input.file_path ?? input.path ?? ""),
-			timestamp: Date.now(),
-		};
-	} else if (toolName === "edit" || toolName === "Edit") {
-		entry = {
-			kind: "edit",
-			toolCallId: "",
-			path: String(input.file_path ?? input.path ?? ""),
-			timestamp: Date.now(),
-		};
-	}
-	if (entry) {
-		unitEvidence.push(entry);
-		saveEvidenceToDisk(entry);
-	}
+function evidencePath(
+	basePath: string,
+	milestoneId: string,
+	sliceId: string,
+	taskId: string,
+): string {
+	return join(
+		basePath,
+		".sf",
+		"safety",
+		`evidence-${milestoneId}-${sliceId}-${taskId}.json`,
+	);
 }

 /**
- * Record a tool execution result. Matches the most recent unresolved entry
- * of the same kind and fills in the toolCallId, exit code, and output.
+ * Validate that a parsed value is an array of EvidenceEntry objects.
+ * Rejects corrupt / schema-mismatch data rather than letting it poison state.
 */
-export function recordToolResult(
-	toolCallId: string,
-	toolName: string,
-	result: unknown,
-	isError: boolean,
-): void {
-	const normalizedName = toolName.toLowerCase();
-
-	if (normalizedName === "bash") {
-		const entry = findLastUnresolved("bash") as BashEvidence | undefined;
-		if (entry) {
-			entry.toolCallId = toolCallId;
-			const text = extractResultText(result);
-			entry.outputSnippet = text.slice(0, 500);
-			const exitMatch = text.match(/Command exited with code (\d+)/);
-			entry.exitCode = exitMatch ? Number(exitMatch[1]) : isError ? 1 : 0;
-			saveEvidenceToDisk(entry);
-		}
-	} else if (normalizedName === "write" || normalizedName === "edit") {
-		const entry = findLastUnresolved(normalizedName as "write" | "edit");
-		if (entry) {
-			entry.toolCallId = toolCallId;
-			saveEvidenceToDisk(entry);
-		}
-	}
-}
-
-// ─── Disk Load (session resume) ────────────────────────────────────────────
-
-function evidencePath(basePath: string, milestoneId: string, sliceId: string, taskId: string): string {
-	return join(basePath, ".sf", "active", `${milestoneId}/${sliceId}/${taskId}`, "evidence.jsonl");
-}
-
 function isEvidenceArray(data: unknown): data is EvidenceEntry[] {
 	if (!Array.isArray(data)) return false;
 	return data.every((e) => {
 		if (e === null || typeof e !== "object") return false;
 		const rec = e as Record<string, unknown>;
-		return typeof rec.toolCallId === "string" && typeof rec.kind === "string";
+		if (typeof rec.toolCallId !== "string") return false;
+		if (typeof rec.timestamp !== "number") return false;
+		if (rec.kind === "bash") {
+			return (
+				typeof rec.command === "string" &&
+				typeof rec.exitCode === "number" &&
+				typeof rec.outputSnippet === "string"
+			);
+		}
+		if (rec.kind === "write" || rec.kind === "edit") {
+			return typeof rec.path === "string";
+		}
+		return false;
 	});
 }

 /**
- * Load evidence from disk into module state after resetEvidence().
- * Call on session resume so evidence collected before a pause is restored.
- * No-op if the file does not exist (fresh unit).
+ * Persist the current in-memory evidence to disk so it survives a session
+ * restart. Called from saveEvidenceToDisk after recordToolCall/recordToolResult.
+ * Non-fatal — persistence failures must never break unit execution.
+ */
+export function saveEvidenceToDisk(
+	basePath: string,
+	milestoneId: string,
+	sliceId: string,
+	taskId: string,
+): void {
+	try {
+		const path = evidencePath(basePath, milestoneId, sliceId, taskId);
+		mkdirSync(dirname(path), { recursive: true });
+		const tmp = `${path}.tmp.${randomBytes(4).toString("hex")}`;
+		writeFileSync(tmp, JSON.stringify(unitEvidence, null, 2) + "\n", "utf-8");
+		renameSync(tmp, path);
+	} catch {
+		// Non-fatal — don't let persistence failures break unit execution
+	}
+}
+
+/**
+ * Load persisted evidence from disk into the in-memory array.
+ * Call after resetEvidence() on session resume to restore context for a
+ * partially-executed unit. If the file does not exist (fresh unit), this
+ * is a no-op — getEvidence() will return [] which is correct.
 */
 export function loadEvidenceFromDisk(
 	basePath: string,
@ -206,37 +163,98 @@ export function loadEvidenceFromDisk(
 	try {
 		const path = evidencePath(basePath, milestoneId, sliceId, taskId);
 		if (!existsSync(path)) return;
-		const lines = readFileSync(path, "utf-8")
-			.split("\n")
-			.filter((l) => l.trim().length > 0);
-		const entries: EvidenceEntry[] = [];
-		for (const line of lines) {
-			try {
-				const parsed = JSON.parse(line);
-				entries.push(parsed as EvidenceEntry);
-			} catch {
-				// Skip malformed lines
-			}
-		}
-		if (isEvidenceArray(entries)) {
-			unitEvidence = entries;
+		const raw = readFileSync(path, "utf-8");
+		const parsed = JSON.parse(raw);
+		if (isEvidenceArray(parsed)) {
+			unitEvidence = parsed;
 		}
 	} catch {
 		// Non-fatal — corrupt / missing file is treated as empty evidence
 	}
 }

-// ─── Internals ──────────────────────────────────────────────────────────────
-
-function findLastUnresolved(kind: string): EvidenceEntry | undefined {
-	for (let i = unitEvidence.length - 1; i >= 0; i--) {
-		if (unitEvidence[i].kind === kind && unitEvidence[i].toolCallId === "") {
-			return unitEvidence[i];
+/**
+ * Delete the persisted evidence file for a unit after it has been fully
+ * processed. Prevents stale evidence from affecting future retries of
+ * the same unit ID.
+ */
+export function clearEvidenceFromDisk(
+	basePath: string,
+	milestoneId: string,
+	sliceId: string,
+	taskId: string,
+): void {
+	try {
+		const path = evidencePath(basePath, milestoneId, sliceId, taskId);
+		if (existsSync(path)) {
+			unlinkSync(path);
 		}
+	} catch {
+		// Non-fatal
 	}
-	return undefined;
 }

+// ─── Recording (called from register-hooks.ts) ─────────────────────────────
+
+/**
+ * Record a tool call at dispatch time (before execution).
+ * Exit codes and output are filled in by recordToolResult after execution.
+ */
+export function recordToolCall(
+	toolCallId: string,
+	toolName: string,
+	input: Record<string, unknown>,
+): void {
+	if (toolName === "bash" || toolName === "Bash") {
+		unitEvidence.push({
+			kind: "bash",
+			toolCallId,
+			command: String(input.command ?? ""),
+			exitCode: -1,
+			outputSnippet: "",
+			timestamp: Date.now(),
+		});
+	} else if (toolName === "write" || toolName === "Write") {
+		unitEvidence.push({
+			kind: "write",
+			toolCallId,
+			path: String(input.file_path ?? input.path ?? ""),
+			timestamp: Date.now(),
+		});
+	} else if (toolName === "edit" || toolName === "Edit") {
+		unitEvidence.push({
+			kind: "edit",
+			toolCallId,
+			path: String(input.file_path ?? input.path ?? ""),
+			timestamp: Date.now(),
+		});
+	}
+}
+
+/**
+ * Record a tool execution result. Matches the entry by toolCallId (assigned
+ * at dispatch time) and fills in exit code + output. Prior versions matched
+ * by `kind + empty-string` which corrupted parallel tool calls.
+ */
+export function recordToolResult(
+	toolCallId: string,
+	toolName: string,
+	result: unknown,
+	isError: boolean,
+): void {
+	const entry = unitEvidence.find((e) => e.toolCallId === toolCallId);
+	if (!entry) return;
+
+	if (entry.kind === "bash") {
+		const text = extractResultText(result);
+		entry.outputSnippet = text.slice(0, 500);
+		const exitMatch = text.match(/Command exited with code (\d+)/);
+		entry.exitCode = exitMatch ? Number(exitMatch[1]) : isError ? 1 : 0;
+	}
+}
+
+// ─── Internals ──────────────────────────────────────────────────────────────
+
 function extractResultText(result: unknown): string {
 	if (typeof result === "string") return result;
 	if (result && typeof result === "object") {
--- a/src/resources/extensions/sf/safety/file-change-validator.ts
+++ b/src/resources/extensions/sf/safety/file-change-validator.ts
@ -4,7 +4,9 @@
 *
 * Uses tasks.expected_output (DB column, populated from per-task ## Expected Output)
 * and tasks.files (from slice PLAN.md - Files: subline) as the expected set.
- * Defaults to git diff HEAD~1 --name-only after auto-commit. Deferred-commit
+ * Compares against `git diff-tree --root --no-commit-id -r --name-only HEAD` after auto-commit.
+ * Using diff-tree --root handles initial commits, shallow clones, and merge commits correctly
+ * (Bug — git diff HEAD~1 failed on initial commits). Deferred-commit
 * flows can instead validate the staged diff before the commit is created.
 *
 * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
@ -159,15 +161,12 @@ function getChangedFilesFromLastCommit(basePath: string): string[] | null {
 	try {
 		const result = execFileSync(
 			"git",
-			["diff", "--name-only", "HEAD~1", "HEAD"],
+			["diff-tree", "--root", "--no-commit-id", "-r", "--name-only", "HEAD"],
 			{ cwd: basePath, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
 		).trim();
 		return result ? result.split("\n").filter(Boolean) : [];
 	} catch (e) {
-		logWarning(
-			"safety",
-			`git diff failed in file-change-validator: ${(e as Error).message}`,
-		);
+		logWarning("safety", `git diff failed in file-change-validator: ${(e as Error).message}`);
 		return null;
 	}
 }
--- a/src/resources/extensions/sf/safety/git-checkpoint.ts
+++ b/src/resources/extensions/sf/safety/git-checkpoint.ts
@ -26,7 +26,7 @@ export function createCheckpoint(
 	unitId: string,
 ): string | null {
 	try {
-		const sha = execFileSync("git", ["rev-parse", "HEAD"], {
+		const sha = execFileSync("git", ["rev-parse", "--verify", "HEAD"], {
 			cwd: basePath,
 			stdio: ["ignore", "pipe", "pipe"],
 			encoding: "utf-8",
@ -48,6 +48,17 @@ export function createCheckpoint(

 		return sha;
 	} catch (e) {
+		const stderr = (e as { stderr?: Buffer | string }).stderr;
+		const stderrText = Buffer.isBuffer(stderr)
+			? stderr.toString("utf-8")
+			: String(stderr ?? "");
+		if (
+			stderrText.includes("Needed a single revision") ||
+			stderrText.includes("unknown revision") ||
+			stderrText.includes("ambiguous argument 'HEAD'")
+		) {
+			return null;
+		}
 		logWarning("safety", `checkpoint creation failed: ${(e as Error).message}`);
 		return null;
 	}
--- a/src/resources/extensions/sf/safety/safety-harness.ts
+++ b/src/resources/extensions/sf/safety/safety-harness.ts
@ -114,12 +114,15 @@ export type {
 	FileWriteEvidence,
 } from "./evidence-collector.js";
 export {
+	clearEvidenceFromDisk,
 	getBashEvidence,
 	getEvidence,
 	getFilePaths,
+	loadEvidenceFromDisk,
 	recordToolCall,
 	recordToolResult,
 	resetEvidence,
+	saveEvidenceToDisk,
 } from "./evidence-collector.js";
 export type {
 	ClaimedEvidence,
--- a/src/resources/extensions/sf/self-feedback.ts
+++ b/src/resources/extensions/sf/self-feedback.ts
@ -53,6 +53,9 @@ const BACKLOG_HEADER =

 export type SelfFeedbackSeverity = "critical" | "high" | "medium" | "low";

+/**
+ * Context where a feedback entry occurred (milestone/slice/task/unit).
+ */
 export interface SelfFeedbackOccurredIn {
 	milestone?: string;
 	slice?: string;
@ -60,6 +63,9 @@ export interface SelfFeedbackOccurredIn {
 	unitType?: string;
 }

+/**
+ * A self-feedback entry reporting an anomaly or issue detected during auto-mode execution.
+ */
 export interface SelfFeedbackEntry {
 	kind: string;
 	severity: SelfFeedbackSeverity;
@ -105,6 +111,9 @@ export type ResolutionEvidence =
 	| { kind: "human-clear" }
 	| { kind: "promoted-to-requirement"; requirementId: string };

+/**
+ * Persisted feedback entry with metadata: ID, timestamp, version, and resolution state.
+ */
 export interface PersistedSelfFeedbackEntry extends SelfFeedbackEntry {
 	id: string;
 	ts: string;
@ -120,6 +129,9 @@ export interface PersistedSelfFeedbackEntry extends SelfFeedbackEntry {
 	resolvedCriteriaMet?: string[];
 }

+/**
+ * Result of recording a self-feedback entry. Contains the persisted entry and blocking status.
+ */
 export interface RecordResult {
 	entry: PersistedSelfFeedbackEntry;
 	/** True when callers should treat the originating unit as blocked. */
@ -308,6 +320,9 @@ export function getBlockedEntries(
 	);
 }

+/**
+ * Input for marking a feedback entry as resolved with evidence and reasoning.
+ */
 export interface ResolutionInput {
 	reason: string;
 	evidence: ResolutionEvidence;
@ -407,6 +422,9 @@ function compareSemver(a: string, b: string): number {
 *
 * Returns the entries split by retry-eligibility for the dispatcher to act on.
 */
+/**
+ * Result of triaging blocked entries by their version eligibility for retry.
+ */
 export interface BlockedTriage {
 	retry: PersistedSelfFeedbackEntry[];
 	stillBlocked: PersistedSelfFeedbackEntry[];
--- a/src/resources/extensions/sf/skill-telemetry.ts
+++ b/src/resources/extensions/sf/skill-telemetry.ts
@ -27,7 +27,8 @@ const activelyLoadedSkills = new Set<string>();

 /**
 * Capture the list of available skill names at dispatch time.
- * Called before each unit starts.
+ *
+ * Called before each unit starts to establish the baseline for telemetry.
 */
 export function captureAvailableSkills(): void {
 	const skillsDir = join(homedir(), ".agents", "skills");
@ -44,18 +45,19 @@ export function captureAvailableSkills(): void {
 }

 /**
- * Record that a skill was actively loaded (its SKILL.md was read).
- * Call this when the agent reads a SKILL.md file.
+ * Record that a skill was actively loaded during execution.
+ *
+ * @param skillName - Name of the skill file that was read.
 */
 export function recordSkillRead(skillName: string): void {
 	activelyLoadedSkills.add(skillName);
 }

 /**
- * Get the skill names for the current unit and clear state.
- * Returns actively loaded skills if any, otherwise available skills.
- * This gives the most useful signal: if the agent read specific skills,
- * report those; otherwise report what was available.
+ * Retrieve captured skills for the current unit and reset state.
+ *
+ * Returns actively loaded skills if any were read, otherwise returns available skills.
+ * Clears all telemetry state after retrieval.
 */
 export function getAndClearSkills(): string[] {
 	const result =
@ -68,7 +70,7 @@ export function getAndClearSkills(): string[] {
 }

 /**
- * Reset all telemetry state. Called when auto-mode stops.
+ * Clear all telemetry state. Called when auto-mode stops.
 */
 export function resetSkillTelemetry(): void {
 	availableSkills = [];
@ -76,8 +78,10 @@ export function resetSkillTelemetry(): void {
 }

 /**
- * Get last-used timestamps for all skills from metrics data.
- * Returns a Map from skill name to most recent ms timestamp.
+ * Extract last-used timestamp for each skill from unit metrics.
+ *
+ * @param units - Array of unit metrics with skill lists and timestamps.
+ * @returns Map from skill name to most recent finishedAt timestamp.
 */
 export function getSkillLastUsed(
 	units: Array<{ finishedAt: number; skills?: string[] }>,
@ -96,8 +100,11 @@ export function getSkillLastUsed(
 }

 /**
- * Detect stale skills — those not used within the given threshold (in days).
- * Returns skill names that should be deprioritized.
+ * Identify skills unused since a given threshold, including untracked installs.
+ *
+ * @param units - Unit metrics containing skill usage.
+ * @param thresholdDays - Days of inactivity to consider stale.
+ * @returns All installed skills exceeding the staleness threshold.
 */
 export function detectStaleSkills(
 	units: Array<{ finishedAt: number; skills?: string[] }>,
--- a/src/resources/extensions/sf/slice-parallel-eligibility.ts
+++ b/src/resources/extensions/sf/slice-parallel-eligibility.ts
@ -37,6 +37,9 @@ export interface EligibleSlice {
 * @param completedSliceIds  Set of slice IDs that are already complete.
 * @returns           Array of eligible slice descriptors.
 */
+/**
+ * Determine which slices are eligible to run based on explicit dependencies or positional order.
+ */
 export function getEligibleSlices(
 	slices: SliceInput[],
 	completedSliceIds: Set<string>,
--- a/src/resources/extensions/sf/slice-parallel-orchestrator.ts
+++ b/src/resources/extensions/sf/slice-parallel-orchestrator.ts
@ -72,6 +72,9 @@ let sliceState: SliceOrchestratorState | null = null;
 /**
 * Check whether slice-level parallel is currently active.
 */
+/**
+ * Check if slice parallel execution is currently active.
+ */
 export function isSliceParallelActive(): boolean {
 	return sliceState?.active === true;
 }
@ -79,6 +82,9 @@ export function isSliceParallelActive(): boolean {
 /**
 * Get current slice orchestrator state (read-only snapshot).
 */
+/**
+ * Get the current slice orchestrator state, including active workers and cost tracking.
+ */
 export function getSliceOrchestratorState(): SliceOrchestratorState | null {
 	return sliceState;
 }
@ -89,6 +95,9 @@ export function getSliceOrchestratorState(): SliceOrchestratorState | null {
 * For each eligible slice: create a worktree, spawn `sf --mode json --print "/sf autonomous"`
 * with env SF_SLICE_LOCK=<SID> + SF_MILESTONE_LOCK=<MID> + SF_PARALLEL_WORKER=1.
 */
+/**
+ * Start parallel execution of eligible slices with worker processes and budget tracking.
+ */
 export async function startSliceParallel(
 	basePath: string,
 	milestoneId: string,
@ -201,6 +210,9 @@ export async function startSliceParallel(
 /**
 * Stop all slice-parallel workers and deactivate.
 */
+/**
+ * Terminate all active slice workers and clean up their worktrees.
+ */
 export function stopSliceParallel(): void {
 	if (!sliceState) return;

@ -235,6 +247,9 @@ export function stopSliceParallel(): void {
 /**
 * Get aggregate cost across all slice workers.
 */
+/**
+ * Calculate total cost across all active slice workers.
+ */
 export function getSliceAggregateCost(): number {
 	if (!sliceState) return 0;
 	let total = 0;
@ -247,6 +262,9 @@ export function getSliceAggregateCost(): number {
 /**
 * Check if budget ceiling has been exceeded.
 */
+/**
+ * Check if total slice cost has met or exceeded the budget ceiling.
+ */
 export function isSliceBudgetExceeded(): boolean {
 	if (!sliceState?.budgetCeiling) return false;
 	return getSliceAggregateCost() >= sliceState.budgetCeiling;
@ -255,6 +273,9 @@ export function isSliceBudgetExceeded(): boolean {
 /**
 * Reset module state (for testing).
 */
+/**
+ * Reset orchestrator state and clear all worker cleanup handlers.
+ */
 export function resetSliceOrchestrator(): void {
 	if (sliceState) {
 		for (const w of sliceState.workers.values()) {
--- a/src/resources/extensions/sf/tools/skip-slice.ts
+++ b/src/resources/extensions/sf/tools/skip-slice.ts
@ -0,0 +1,133 @@
+/**
+ * skip-slice handler — the core operation behind sf_skip_slice.
+ *
+ * Marks a slice as skipped and cascades the skip to every non-closed task in
+ * that slice. Without the task cascade the deep-check in
+ * executeCompleteMilestone reports pending tasks inside the skipped slice and
+ * blocks milestone completion (see #4375).
+ *
+ * This function performs DB writes only. The MCP wrapper in
+ * bootstrap/db-tools.ts handles state-cache invalidation and STATE.md rebuild.
+ */
+
+import {
+  getSlice,
+  getSliceTasks,
+  isDbAvailable,
+  transaction,
+  updateSliceStatus,
+  updateTaskStatus,
+} from "../sf-db.js";
+import { isClosedStatus } from "../status-guards.js";
+
+/**
+ * Input parameters for {@link handleSkipSlice}.
+ *
+ * - `milestoneId` / `sliceId` identify the target slice.
+ * - `reason` is a free-form note surfaced in the MCP response; optional
+ *   because the caller (e.g. rethink flow) may not have a structured reason.
+ */
+export interface SkipSliceParams {
+  milestoneId: string;
+  sliceId: string;
+  reason?: string;
+}
+
+/**
+ * Stable machine-readable error codes for {@link SkipSliceResult.error}.
+ * Keep in sync with the wrapper in bootstrap/db-tools.ts.
+ */
+export type SkipSliceErrorCode = "slice_not_found" | "already_complete";
+
+/**
+ * Result of a {@link handleSkipSlice} call.
+ *
+ * - `tasksSkipped` — count of tasks whose status was cascaded to "skipped".
+ *   Zero is a valid success (slice had no non-closed tasks).
+ * - `wasAlreadySkipped` — true when the slice was in "skipped" status on
+ *   entry; callers can use this to distinguish first-skip from re-skip.
+ * - `error` / `errorCode` — set together for recoverable validation failures
+ *   (unknown slice, slice already complete). Both absent on success. DB
+ *   errors propagate as thrown exceptions and should be caught by the caller.
+ */
+export interface SkipSliceResult {
+  milestoneId: string;
+  sliceId: string;
+  tasksSkipped: number;
+  wasAlreadySkipped: boolean;
+  reason?: string;
+  error?: string;
+  errorCode?: SkipSliceErrorCode;
+}
+
+/**
+ * Mark a slice as "skipped" and cascade the skip to every non-closed task in
+ * that slice. Runs as a single transaction so slice status and task statuses
+ * are always consistent.
+ *
+ * Behaviour summary:
+ * - Unknown slice → returns {@link SkipSliceResult} with `error`.
+ * - Slice already complete/done → returns `error` (cannot un-complete).
+ * - Slice already skipped → still cascades leftover non-closed tasks
+ *   (heals inconsistent historical state from projects that ran older
+ *   versions before the #4375 cascade fix).
+ * - Tasks in closed status (complete/done/skipped) are never downgraded.
+ */
+export function handleSkipSlice(params: SkipSliceParams): SkipSliceResult {
+  const base: SkipSliceResult = {
+    milestoneId: params.milestoneId,
+    sliceId: params.sliceId,
+    tasksSkipped: 0,
+    wasAlreadySkipped: false,
+    reason: params.reason,
+  };
+
+  // Fail loudly on a closed DB so a `null` from getSlice() inside the
+  // transaction unambiguously means "slice not found", never "DB unavailable".
+  // The MCP wrapper in bootstrap/db-tools.ts runs ensureDbOpen() before calling
+  // this helper; this guard protects direct callers (tests, future code).
+  if (!isDbAvailable()) {
+    throw new Error("handleSkipSlice: SF database is not available");
+  }
+
+  // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ────
+  let guardError: string | null = null;
+  let guardCode: SkipSliceErrorCode | null = null;
+  let wasAlreadySkipped = false;
+  let tasksSkipped = 0;
+
+  transaction(() => {
+    const slice = getSlice(params.milestoneId, params.sliceId);
+    if (!slice) {
+      guardError = `Slice ${params.sliceId} not found in milestone ${params.milestoneId}`;
+      guardCode = "slice_not_found";
+      return;
+    }
+    if (slice.status === "complete" || slice.status === "done") {
+      guardError = `Slice ${params.sliceId} is already complete — cannot skip.`;
+      guardCode = "already_complete";
+      return;
+    }
+
+    wasAlreadySkipped = slice.status === "skipped";
+    if (!wasAlreadySkipped) {
+      updateSliceStatus(params.milestoneId, params.sliceId, "skipped");
+    }
+
+    // Cascade: mark every non-closed task as skipped so milestone completion
+    // doesn't trip the deep-task guard (#4375). Closed tasks (complete/done/
+    // skipped) are left untouched — we never downgrade.
+    const tasks = getSliceTasks(params.milestoneId, params.sliceId);
+    for (const task of tasks) {
+      if (!isClosedStatus(task.status)) {
+        updateTaskStatus(params.milestoneId, params.sliceId, task.id, "skipped");
+        tasksSkipped++;
+      }
+    }
+  });
+
+  if (guardError) {
+    return { ...base, error: guardError, errorCode: guardCode ?? undefined };
+  }
+  return { ...base, tasksSkipped, wasAlreadySkipped };
+}
--- a/src/resources/extensions/sf/visualizer-views.ts
+++ b/src/resources/extensions/sf/visualizer-views.ts
@ -345,6 +345,9 @@ function renderRiskHeatmap(

 // ─── Dependencies View ───────────────────────────────────────────────────────

+/**
+ * Render milestone/slice dependencies and critical path with data flow.
+ */
 export function renderDepsView(
 	data: VisualizerData,
 	th: Theme,
@ -506,6 +509,9 @@ function renderCriticalPath(

 // ─── Metrics View ────────────────────────────────────────────────────────────

+/**
+ * Render cost and usage metrics by phase, model, tier, and projections.
+ */
 export function renderMetricsView(
 	data: VisualizerData,
 	th: Theme,
@ -687,6 +693,9 @@ function renderCostProjections(

 // ─── Timeline View (Gantt) ──────────────────────────────────────────────────

+/**
+ * Render execution timeline as Gantt view (wide) or list view (narrow).
+ */
 export function renderTimelineView(
 	data: VisualizerData,
 	th: Theme,
@ -850,6 +859,9 @@ function formatTimeLabel(ts: number): string {

 // ─── Agent View ──────────────────────────────────────────────────────────────

+/**
+ * Render current agent status, completion progress, budget pressure, and recent units.
+ */
 export function renderAgentView(
 	data: VisualizerData,
 	th: Theme,
@ -976,6 +988,9 @@ export function renderAgentView(

 // ─── Changelog View ──────────────────────────────────────────────────────────

+/**
+ * Render completed slices with one-liners, files modified, decisions, and patterns.
+ */
 export function renderChangelogView(
 	data: VisualizerData,
 	th: Theme,
@ -1041,6 +1056,9 @@ export function renderChangelogView(

 // ─── Export View ─────────────────────────────────────────────────────────────

+/**
+ * Render export options (markdown, JSON, snapshot) with last export path.
+ */
 export function renderExportView(
 	_data: VisualizerData,
 	th: Theme,
--- a/src/resources/extensions/sf/worktree-command-bootstrap.ts
+++ b/src/resources/extensions/sf/worktree-command-bootstrap.ts
@ -43,6 +43,7 @@ function getWorktreeCompletions(prefix: string) {
 	return null;
 }

+/** Register a lazy-loaded worktree command alias via dynamic import. */
 function registerLazyWorktreeAlias(
 	pi: ExtensionAPI,
 	name: "worktree" | "wt",
@ -60,6 +61,7 @@ function registerLazyWorktreeAlias(
 	});
 }

+/** Register /worktree and /wt commands with lazy loading via dynamic import. */
 export function registerLazyWorktreeCommands(pi: ExtensionAPI): void {
 	registerLazyWorktreeAlias(
 		pi,
--- a/src/resources/extensions/sf/worktree-command.ts
+++ b/src/resources/extensions/sf/worktree-command.ts
@ -54,12 +54,18 @@ import {
 */
 let originalCwd: string | null = null;

-/** Get the original project root if currently in a worktree, or null. */
+/**
+ * Get the original project root if currently in a worktree, or null.
+ * Used to restore context after `/worktree merge` or `/worktree return`.
+ */
 export function getWorktreeOriginalCwd(): string | null {
 	return originalCwd;
 }

-/** Get the name of the active worktree, or null if not in one. */
+/**
+ * Get the name of the active worktree, or null if not in one.
+ * Extracts from .sf/worktrees/ path segment.
+ */
 export function getActiveWorktreeName(): string | null {
 	if (!originalCwd) return null;
 	const cwd = process.cwd();
@ -263,6 +269,10 @@ async function worktreeHandler(
 	}
 }

+/**
+ * Main handler for /worktree and /wt commands — routes to subcommand handlers.
+ * Manages worktree state tracking (originalCwd) across chdir calls.
+ */
 export async function handleWorktreeCommand(
 	args: string,
 	ctx: ExtensionCommandContext,
@ -272,6 +282,7 @@ export async function handleWorktreeCommand(
 	await worktreeHandler(args, ctx, pi, alias);
 }

+/** Register /worktree and /wt commands with completion support. */
 export function registerWorktreeCommand(pi: ExtensionAPI): void {
 	// Restore worktree state after /reload.
 	// The module-level originalCwd resets to null when extensions are re-loaded,
--- a/src/resources/extensions/sf/worktree-health.ts
+++ b/src/resources/extensions/sf/worktree-health.ts
@ -20,6 +20,10 @@ import { listWorktrees, type WorktreeInfo } from "./worktree-manager.js";

 // ─── Types ─────────────────────────────────────────────────────────────────

+/**
+ * Health and lifecycle status of a single worktree.
+ * Used for audits, health checks, and the `/worktree list` status display.
+ */
 export interface WorktreeHealthStatus {
 	/** The worktree info from worktree-manager */
 	worktree: WorktreeInfo;
--- a/src/resources/extensions/sf/worktree-root.ts
+++ b/src/resources/extensions/sf/worktree-root.ts
@ -0,0 +1,179 @@
+import { existsSync, readFileSync, realpathSync, statSync } from "node:fs";
+import { homedir } from "node:os";
+import { join, resolve } from "node:path";
+
+function sfHome(): string {
+	return process.env.SF_HOME || join(homedir(), ".sf");
+}
+
+export interface WorktreeSegment {
+	sfIdx: number;
+	afterWorktrees: number;
+}
+
+export function normalizeWorktreePathForCompare(path: string): string {
+	let normalized: string;
+	try {
+		normalized = realpathSync(path);
+	} catch {
+		normalized = resolve(path);
+	}
+	const slashed = normalized.replaceAll("\\", "/");
+	const trimmed = slashed.replace(/\/+$/, "");
+	return process.platform === "win32" ? (trimmed || "/").toLowerCase() : (trimmed || "/");
+}
+
+/**
+ * Find the SF worktree segment in both direct project layout and the
+ * symlink-resolved external-state layout used by ~/.sf/projects/<hash>.
+ */
+export function findWorktreeSegment(normalizedPath: string): WorktreeSegment | null {
+	const directMarker = "/.sf/worktrees/";
+	const directIdx = normalizedPath.indexOf(directMarker);
+	if (directIdx !== -1) {
+		return { sfIdx: directIdx, afterWorktrees: directIdx + directMarker.length };
+	}
+
+	const externalRe = /\/\.sf\/projects\/[^/]+\/worktrees\//;
+	const externalMatch = normalizedPath.match(externalRe);
+	if (externalMatch && externalMatch.index !== undefined) {
+		return {
+			sfIdx: externalMatch.index,
+			afterWorktrees: externalMatch.index + externalMatch[0].length,
+		};
+	}
+
+	return null;
+}
+
+export function isSfWorktreePath(path: string): boolean {
+	return findWorktreeSegment(path.replaceAll("\\", "/")) !== null;
+}
+
+/**
+ * Resolve the canonical project root for worktree operations.
+ *
+ * `originalBasePath` wins when available because session state already knows the
+ * root. `SF_PROJECT_ROOT` is the next strongest signal for worker processes.
+ * Otherwise, derive the root from direct `.sf/worktrees` paths, or recover it
+ * from the worktree `.git` file for symlink-resolved ~/.sf/project paths.
+ */
+export function resolveWorktreeProjectRoot(
+	basePath: string,
+	originalBasePath?: string | null,
+): string {
+	const preferred =
+		originalBasePath?.trim() ||
+		process.env.SF_PROJECT_ROOT?.trim() ||
+		basePath;
+
+	return resolveProjectRootFromPath(preferred);
+}
+
+function resolveProjectRootFromPath(path: string): string {
+	const normalizedPath = path.replaceAll("\\", "/");
+	const segment = findWorktreeSegment(normalizedPath);
+	if (!segment) {
+		return resolveNearestBootstrappedSfRoot(path) ?? resolveGitWorkingTreeRoot(path) ?? path;
+	}
+
+	const sepChar = path.includes("\\") ? "\\" : "/";
+	const sfMarker = `${sepChar}.sf${sepChar}`;
+	const markerIdx = path.indexOf(sfMarker);
+	const candidate = markerIdx !== -1
+		? path.slice(0, markerIdx)
+		: path.slice(0, segment.sfIdx);
+
+	const sfHomeNorm = normalizeWorktreePathForCompare(sfHome());
+	const candidateSfPath = normalizeWorktreePathForCompare(join(candidate, ".sf"));
+
+	if (candidateSfPath === sfHomeNorm || candidateSfPath.startsWith(`${sfHomeNorm}/`)) {
+		const realRoot = resolveProjectRootFromGitFile(path);
+		return realRoot ?? path;
+	}
+
+	return candidate;
+}
+
+function resolveNearestBootstrappedSfRoot(path: string): string | null {
+	try {
+		let dir = existsSync(path) && !statSync(path).isDirectory()
+			? resolve(path, "..")
+			: path;
+
+		for (let i = 0; i < 30; i++) {
+			if (hasSfBootstrapArtifacts(join(dir, ".sf"))) return dir;
+
+			const gitPath = join(dir, ".git");
+			if (existsSync(gitPath)) return null;
+
+			const parent = resolve(dir, "..");
+			if (parent === dir) break;
+			dir = parent;
+		}
+	} catch {
+		// Non-fatal: callers fall back to git root resolution.
+	}
+	return null;
+}
+
+function hasSfBootstrapArtifacts(sfPath: string): boolean {
+	return existsSync(sfPath) &&
+		(existsSync(join(sfPath, "PREFERENCES.md")) ||
+			existsSync(join(sfPath, "preferences.md")) ||
+			existsSync(join(sfPath, "milestones")));
+}
+
+function resolveGitWorkingTreeRoot(path: string): string | null {
+	try {
+		let dir = existsSync(path) && !statSync(path).isDirectory()
+			? resolve(path, "..")
+			: path;
+
+		for (let i = 0; i < 30; i++) {
+			const gitPath = join(dir, ".git");
+			if (existsSync(gitPath)) return dir;
+
+			const parent = resolve(dir, "..");
+			if (parent === dir) break;
+			dir = parent;
+		}
+	} catch {
+		// Non-fatal: callers either keep the original path or fail closed.
+	}
+	return null;
+}
+
+function resolveProjectRootFromGitFile(worktreePath: string): string | null {
+	try {
+		let dir = worktreePath;
+		for (let i = 0; i < 30; i++) {
+			const gitPath = join(dir, ".git");
+			if (existsSync(gitPath)) {
+				const content = readFileSync(gitPath, "utf8").trim();
+				if (content.startsWith("gitdir: ")) {
+					const gitDir = resolve(dir, content.slice(8));
+					const dotGitDir = resolve(gitDir, "..", "..");
+					if (dotGitDir.endsWith(".git") || dotGitDir.endsWith(".git/") || dotGitDir.endsWith(".git\\")) {
+						return resolve(dotGitDir, "..");
+					}
+
+					const commonDirPath = join(gitDir, "commondir");
+					if (existsSync(commonDirPath)) {
+						const commonDir = readFileSync(commonDirPath, "utf8").trim();
+						const resolvedCommonDir = resolve(gitDir, commonDir);
+						return resolve(resolvedCommonDir, "..");
+					}
+				}
+				break;
+			}
+
+			const parent = resolve(dir, "..");
+			if (parent === dir) break;
+			dir = parent;
+		}
+	} catch {
+		// Non-fatal: callers either keep the original path or fail closed.
+	}
+	return null;
+}
--- a/src/resources/extensions/sf/worktree-session-state.ts
+++ b/src/resources/extensions/sf/worktree-session-state.ts
@ -0,0 +1,35 @@
+// SF worktree session state
+let originalCwd: string | null = null;
+
+export function getWorktreeOriginalCwd(): string | null {
+  return originalCwd;
+}
+
+export function setWorktreeOriginalCwd(cwd: string): void {
+  originalCwd = cwd;
+}
+
+export function clearWorktreeOriginalCwd(): void {
+  originalCwd = null;
+}
+
+export function ensureWorktreeOriginalCwdFromPath(cwd: string = process.cwd()): string | null {
+  if (originalCwd) return originalCwd;
+  const marker = `${/\\/.test(cwd) ? "\\" : "/"}.sf${/\\/.test(cwd) ? "\\" : "/"}worktrees${/\\/.test(cwd) ? "\\" : "/"}`;
+  const markerIdx = cwd.indexOf(marker);
+  if (markerIdx !== -1) {
+    originalCwd = cwd.slice(0, markerIdx);
+  }
+  return originalCwd;
+}
+
+export function getActiveWorktreeName(): string | null {
+  if (!originalCwd) return null;
+  const cwd = process.cwd();
+  const wtDir = `${originalCwd.replace(/[\\/]+$/, "")}/.sf/worktrees`.replaceAll("\\", "/");
+  const normalizedCwd = cwd.replaceAll("\\", "/");
+  if (!normalizedCwd.startsWith(`${wtDir}/`)) return null;
+  const rel = normalizedCwd.slice(wtDir.length + 1);
+  const name = rel.split("/")[0];
+  return name || null;
+}