feat(sf): worktree hardening, skip-slice handler, cwd anchoring + docstrings

- new worktree-root.ts / worktree-session-state.ts: track and restore original project root after /worktree merge or /worktree return - new tools/skip-slice.ts: cascade skip to tasks in the slice so milestone completion isn't blocked by pending tasks (#4375) - auto/run-unit.ts: anchor cwd to basePath before newSession() captures it (GAP-10) — prevents tool runtime / system prompt from rooting on drifted cwd from async_bash, background jobs, or prior unit cleanup - safety/git-checkpoint.ts: harden HEAD-rev-parse against execFileSync errors, surface stderr properly - broad JSDoc / docstring pass across the rest of the SF extension surface Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 01:48:37 +02:00 · 2026-05-02 01:48:37 +02:00 · 4f4b584e53
commit 4f4b584e53
parent ed47951960
33 changed files with 913 additions and 197 deletions
--- a/src/resources/extensions/sf/agentic-docs-scaffold.ts
+++ b/src/resources/extensions/sf/agentic-docs-scaffold.ts
@ -11,6 +11,12 @@ import {
 import { migrateLegacyScaffold } from "./scaffold-drift.js";
 import { logWarning } from "./workflow-logger.js";
 /**
 * Single scaffold file template with path and template content.
 *
 * @property path — canonical file path relative to project root (e.g., "AGENTS.md", "docs/PLANS.md")
 * @property content — template body to write when file is missing or pending-upgrade
 */
 export interface ScaffoldFile {
 	path: string;
 	content: string;
@ -23,6 +29,14 @@ export interface ScaffoldFile {
 */
 const NO_MARKER_PATHS = new Set<string>([".siftignore"]);
 /**
 * Canonical scaffold file templates SF manages for agent legibility.
 *
 * Includes AGENTS.md (routing map), ARCHITECTURE.md (system overview), and docs
 * tree structure (product specs, design docs, execution plans, records, generated).
 * Phase C syncs these to disk, stamps them with version markers, and records manifest
 * entries (ADR-021).
 */
 export const SCAFFOLD_FILES: ScaffoldFile[] = [
 	{
 		path: ".siftignore",
--- a/src/resources/extensions/sf/auto/loop.ts
+++ b/src/resources/extensions/sf/auto/loop.ts
@ -10,6 +10,7 @@
 import { randomUUID } from "node:crypto";
 import { mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { atomicWriteSync } from "../atomic-write.js";
 import type {
 	ExtensionAPI,
 	ExtensionContext,
@ -636,9 +637,8 @@ export async function autoLoop(
 				const unitPhaseResult = await runUnitPhaseViaContract(dispatchContract, ic, iterData, loopState);
 				if (unitPhaseResult.action === "next") {
 					const d = unitPhaseResult.data as { unitStartedAt: number; requestDispatchedAt?: number };
-					if (d?.requestDispatchedAt) {
+					const requestTimestamp = d?.requestDispatchedAt ?? d?.unitStartedAt;
-						s.lastRequestTimestamp = d.requestDispatchedAt;
+					if (typeof requestTimestamp === "number") s.lastRequestTimestamp = requestTimestamp;
 					}
 				}
 				deps.uokObserver?.onPhaseResult("unit", unitPhaseResult.action, {
 					unitType: iterData.unitType,
@ -945,9 +945,8 @@ export async function autoLoop(
 			);
 			if (unitPhaseResult.action === "next") {
 				const d = unitPhaseResult.data as { unitStartedAt: number; requestDispatchedAt?: number };
-				if (d?.requestDispatchedAt) {
+				const requestTimestamp = d?.requestDispatchedAt ?? d?.unitStartedAt;
-					s.lastRequestTimestamp = d.requestDispatchedAt;
+				if (typeof requestTimestamp === "number") s.lastRequestTimestamp = requestTimestamp;
 				}
 			}
 			deps.uokObserver?.onPhaseResult("unit", unitPhaseResult.action, {
 				unitType: iterData.unitType,
--- a/src/resources/extensions/sf/auto/phases.ts
+++ b/src/resources/extensions/sf/auto/phases.ts
@ -96,6 +96,7 @@ import {
 	getRequiredWorkflowToolsForAutoUnit,
 	getWorkflowTransportSupportError,
 } from "../workflow-mcp.js";
 import { resolveWorktreeProjectRoot } from "../worktree-root.js";
 import { detectStuck } from "./detect-stuck.js";
 import {
 	FINALIZE_POST_TIMEOUT_MS,
@ -233,7 +234,7 @@ export function requiresHumanProductionMutationApproval(text: string): boolean {
 export function _resolveDispatchGuardBasePath(
 	s: Pick<AutoSession, "originalBasePath" | "basePath">,
 ): string {
-	return s.originalBasePath || s.basePath;
+	return resolveWorktreeProjectRoot(s.basePath, s.originalBasePath);
 }
 const PLANNING_FLOW_GATE_PHASES: ReadonlySet<Phase> = new Set([
@ -2784,6 +2785,20 @@ export async function runFinalize(
 			});
 		} else {
 			// s.pendingVerificationRetry was set by postUnitPreVerification.
 			// Emit a dedicated journal event so forensics can distinguish bounded
 			// verification retries from genuine stuck-loop dispatch repetitions (#4540).
 			const retryInfo = s.pendingVerificationRetry;
 			deps.emitJournalEvent({
 				ts: new Date().toISOString(),
 				flowId: ic.flowId,
 				seq: ic.nextSeq(),
 				eventType: "artifact-verification-retry",
 				data: {
 					unitType: _preUnitSnapshot?.type,
 					unitId: retryInfo?.unitId,
 					attempt: retryInfo?.attempt,
 				},
 			});
 			// Continue the loop — next iteration will inject the retry context into the prompt.
 			debugLog("autoLoop", {
 				phase: "artifact-verification-retry",
@ -2995,3 +3010,6 @@ export async function runFinalize(
 	return { action: "next", data: undefined as undefined };
 }
 // ─── GAP-12: exported alias ───────────────────────────────────────────────────
 export const resetSessionTimeoutState = resetConsecutiveSessionTimeouts;
--- a/src/resources/extensions/sf/auto/run-unit.ts
+++ b/src/resources/extensions/sf/auto/run-unit.ts
@ -14,7 +14,7 @@ import {
 	resolvePersistModelChanges,
 } from "../preferences.js";
 import { logWarning } from "../workflow-logger.js";
-import { _setCurrentResolve, _setSessionSwitchInFlight } from "./resolve.js";
+import { _clearCurrentResolve, _setCurrentResolve, _setSessionSwitchInFlight } from "./resolve.js";
 import type { AutoSession } from "./session.js";
 import { NEW_SESSION_TIMEOUT_MS } from "./session.js";
 import {
@ -45,12 +45,42 @@ export async function runUnit(
 ): Promise<UnitResult> {
 	debugLog("runUnit", { phase: "start", unitType, unitId });
 	// GAP-10: Ensure cwd matches basePath BEFORE newSession() captures it. The
 	// new session reads process.cwd() during construction to anchor its tool
 	// runtime and system prompt; if cwd has drifted (async_bash, background
 	// jobs, prior unit cleanup), the session would otherwise be rooted to the
 	// wrong directory. Must be synchronous — no awaits between chdir and
 	// newSession (#1389, #4762 follow-up).
 	try {
 		if (process.cwd() !== s.basePath) {
 			process.chdir(s.basePath);
 		}
 	} catch (e) {
 		const msg = `Failed to chdir to basePath before newSession (basePath: ${s.basePath}): ${String(e)}`;
 		logWarning("engine", msg, { basePath: s.basePath, error: String(e) });
 		return {
 			status: "cancelled",
 			errorContext: {
 				message: msg,
 				category: "session-failed",
 				isTransient: false,
 			},
 		};
 	}
 	// ── Session creation with timeout ──
 	debugLog("runUnit", { phase: "session-create", unitType, unitId });
 	let sessionResult: { cancelled: boolean };
 	let sessionTimeoutHandle: ReturnType<typeof setTimeout> | undefined;
 	const mySessionSwitchGeneration = ++sessionSwitchGeneration;
 	// GAP-07: Cancellation controller for newSession(). When the session-creation
 	// timeout fires, we abort this controller so that any still-in-flight
 	// newSession() work (which may clobber process.cwd()) is signalled to stop.
 	// Note: SF's newSession() does not currently accept abortSignal in its
 	// options type, so we cannot pass it directly — but aborting the controller
 	// documents the intent clearly and is a no-op call site when the API adds it.
 	const sessionAbortController = new AbortController();
 	_setSessionSwitchInFlight(true);
 	try {
 		const sessionPromise = s.cmdCtx!.newSession().finally(() => {
@ -60,7 +90,10 @@ export async function runUnit(
 		});
 		const timeoutPromise = new Promise<{ cancelled: true }>((resolve) => {
 			sessionTimeoutHandle = setTimeout(
-				() => resolve({ cancelled: true }),
+				() => {
 					sessionAbortController.abort();
 					resolve({ cancelled: true });
 				},
 				NEW_SESSION_TIMEOUT_MS,
 			);
 		});
@ -102,15 +135,23 @@ export async function runUnit(
 		return { status: "cancelled" };
 	}
 	// GAP-09: Hard-cancel if setModel fails rather than continuing with the
 	// wrong model. Running with an unexpected model wastes the unit and can
 	// cause quota / pricing surprises.
 	if (s.currentUnitModel && typeof pi.setModel === "function") {
-		const restored = await pi.setModel(s.currentUnitModel, {
+		const modelId = s.currentUnitModel;
 		const restored = await pi.setModel(modelId, {
 			persist: resolvePersistModelChanges(),
 		});
 		if (!restored) {
-			ctx.ui.notify(
+			return {
-				`Failed to restore ${s.currentUnitModel.provider}/${s.currentUnitModel.id} after session creation. Using session default.`,
+				status: "cancelled",
-				"warning",
+				errorContext: {
-			);
+					message: `setModel failed for ${modelId.provider}/${modelId.id}`,
 					category: "session-failed",
 					isTransient: false,
 				},
 			};
 		}
 	}
@ -122,18 +163,33 @@ export async function runUnit(
 		_setCurrentResolve(resolve);
 	});
-	// Ensure cwd matches basePath before dispatch (#1389).
+	// GAP-08: Provider request-readiness pre-check (#4555).
-	// async_bash and background jobs can drift cwd away from the worktree.
+	// Verify the provider can accept requests before dispatching. If the token
-	// Realigning here prevents commits from landing on the wrong branch.
+	// has expired since bootstrap, return cancelled immediately so the unit is
-	try {
+	// not wasted on a guaranteed 401.
-		if (process.cwd() !== s.basePath) {
+	{
-			process.chdir(s.basePath);
+		const provider = s.currentUnitModel?.provider ?? ctx.model?.provider;
 		if (provider != null && typeof ctx.modelRegistry.isProviderRequestReady === "function") {
 			let ready = false;
 			try {
 				ready = ctx.modelRegistry.isProviderRequestReady(provider);
 			} catch {
 				ready = false;
 			}
 			if (!ready) {
 				_clearCurrentResolve();
 				return {
 					status: "cancelled",
 					errorContext: {
 						message: `Provider ${provider} is not request-ready (login/token expired)`,
 						category: "provider",
 						isTransient: false,
 					},
 				};
 			}
 		}
 	} catch (e) {
 		logWarning("engine", "Failed to chdir to basePath before dispatch", {
 			basePath: s.basePath,
 			error: String(e),
 		});
 	}
 	// ── Send the prompt ──
--- a/src/resources/extensions/sf/bootstrap/register-hooks.ts
+++ b/src/resources/extensions/sf/bootstrap/register-hooks.ts
@ -502,6 +502,7 @@ export function registerHooks(
 	pi.on("tool_call", async (event, ctx) => {
 		if (!isAutoActive()) return;
 		safetyRecordToolCall(
 			event.toolCallId,
 			event.toolName,
 			event.input as Record<string, unknown>,
 		);
--- a/src/resources/extensions/sf/bootstrap/system-context.ts
+++ b/src/resources/extensions/sf/bootstrap/system-context.ts
@ -644,6 +644,10 @@ async function buildCarryForwardLines(
 	);
 }
 /**
 * Build resume state section from CONTINUE.md or legacy continue.md.
 * Returns progress, completed work, and next action if available.
 */
 async function buildResumeSection(
 	basePath: string,
 	milestoneId: string,
@ -694,6 +698,10 @@ async function buildResumeSection(
 	return lines.join("\n");
 }
 /**
 * Extract slice plan excerpt with goal, demo, verification, and observability.
 * Returns formatted section for task execution context.
 */
 function extractSliceExecutionExcerpt(
 	content: string | null,
 	relPath: string,
@ -726,6 +734,10 @@ function extractSliceExecutionExcerpt(
 	return parts.join("\n");
 }
 /**
 * Extract a markdown section by heading name from content.
 * Returns section content until next heading or null if not found.
 */
 function extractMarkdownSection(
 	content: string,
 	heading: string,
@ -741,10 +753,16 @@ function extractMarkdownSection(
 	return rest.slice(0, end).trim();
 }
 /**
 * Escape special regex characters in a string.
 */
 function escapeRegExp(value: string): string {
 	return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
 }
 /**
 * Collapse multiple whitespace in text to single spaces.
 */
 function oneLine(text: string): string {
 	return text.replace(/\s+/g, " ").trim();
 }
--- a/src/resources/extensions/sf/bootstrap/write-gate.ts
+++ b/src/resources/extensions/sf/bootstrap/write-gate.ts
@ -112,6 +112,10 @@ const GATE_SAFE_TOOLS = new Set([
 	"search_and_read",
 ]);
 /**
 * Snapshot of write gate state: depth verification, queue phase, and pending gates.
 * Persisted to .sf/runtime/write-gate-state.json for cross-process coordination.
 */
 export interface WriteGateSnapshot {
 	verifiedDepthMilestones: string[];
 	activeQueuePhase: boolean;
--- a/src/resources/extensions/sf/commands-extensions.ts
+++ b/src/resources/extensions/sf/commands-extensions.ts
@ -22,6 +22,9 @@ const sfHome = process.env.SF_HOME || join(homedir(), ".sf");
 // ─── Types (mirrored from extension-registry.ts) ────────────────────────────
 /**
 * Extension manifest metadata including capabilities and dependencies.
 */
 interface ExtensionManifest {
 	id: string;
 	name: string;
@ -41,6 +44,9 @@ interface ExtensionManifest {
 	};
 }
 /**
 * Extension registry entry tracking enabled/disabled status and source.
 */
 interface ExtensionRegistryEntry {
 	id: string;
 	enabled: boolean;
@ -49,6 +55,9 @@ interface ExtensionRegistryEntry {
 	disabledReason?: string;
 }
 /**
 * Extension registry mapping extension IDs to their status entries.
 */
 interface ExtensionRegistry {
 	version: 1;
 	entries: Record<string, ExtensionRegistryEntry>;
@ -56,14 +65,23 @@ interface ExtensionRegistry {
 // ─── Registry I/O ───────────────────────────────────────────────────────────
 /**
 * Get the path to the extension registry file.
 */
 function getRegistryPath(): string {
 	return join(sfHome, "extensions", "registry.json");
 }
 /**
 * Get the path to the agent extensions directory.
 */
 function getAgentExtensionsDir(): string {
 	return join(sfHome, "agent", "extensions");
 }
 /**
 * Load the extension registry, defaulting to an empty registry on error.
 */
 function loadRegistry(): ExtensionRegistry {
 	const filePath = getRegistryPath();
 	try {
@ -84,6 +102,9 @@ function loadRegistry(): ExtensionRegistry {
 	}
 }
 /**
 * Save the extension registry to disk (atomic via temp file).
 */
 function saveRegistry(registry: ExtensionRegistry): void {
 	const filePath = getRegistryPath();
 	try {
@ -96,12 +117,18 @@ function saveRegistry(registry: ExtensionRegistry): void {
 	}
 }
 /**
 * Check if an extension is enabled in the registry (defaults to true if not registered).
 */
 function isEnabled(registry: ExtensionRegistry, id: string): boolean {
 	const entry = registry.entries[id];
 	if (!entry) return true;
 	return entry.enabled;
 }
 /**
 * Load extension manifest from a directory, or null if not found/invalid.
 */
 function readManifest(dir: string): ExtensionManifest | null {
 	const mPath = join(dir, "extension-manifest.json");
 	if (!existsSync(mPath)) return null;
@ -115,6 +142,9 @@ function readManifest(dir: string): ExtensionManifest | null {
 	}
 }
 /**
 * Discover all extension manifests from the agent extensions directory.
 */
 function discoverManifests(): Map<string, ExtensionManifest> {
 	const extDir = getAgentExtensionsDir();
 	const manifests = new Map<string, ExtensionManifest>();
@ -129,6 +159,9 @@ function discoverManifests(): Map<string, ExtensionManifest> {
 // ─── Command Handler ────────────────────────────────────────────────────────
 /**
 * Handler for /sf extensions subcommands (list, enable, disable, info).
 */
 export async function handleExtensions(
 	args: string,
 	ctx: ExtensionCommandContext,
@ -162,6 +195,9 @@ export async function handleExtensions(
 	);
 }
 /**
 * List all discovered extensions with their status and capabilities.
 */
 function handleList(ctx: ExtensionCommandContext): void {
 	const manifests = discoverManifests();
 	const registry = loadRegistry();
@ -211,6 +247,9 @@ function handleList(ctx: ExtensionCommandContext): void {
 	ctx.ui.notify(lines.join("\n"), "info");
 }
 /**
 * Enable a disabled extension in the registry.
 */
 function handleEnable(
 	id: string | undefined,
 	ctx: ExtensionCommandContext,
--- a/src/resources/extensions/sf/context-budget.ts
+++ b/src/resources/extensions/sf/context-budget.ts
@ -48,6 +48,9 @@ const TASK_COUNT_TIERS: [number, number][] = [
 // ─── Types ───────────────────────────────────────────────────────────────────
 /**
 * Result of truncation at section boundaries (content + dropped section count).
 */
 export interface TruncationResult {
 	/** The (possibly truncated) content string */
 	content: string;
@ -55,6 +58,9 @@ export interface TruncationResult {
 	droppedSections: number;
 }
 /**
 * Proportional character budget allocation for a context window.
 */
 export interface BudgetAllocation {
 	/** Character budget for dependency/prior-task summaries */
 	summaryBudgetChars: number;
@ -71,6 +77,9 @@ export interface BudgetAllocation {
 // ─── Minimal interface slices for dependency injection ───────────────────────
 // These avoid coupling to full ModelRegistry/SFPreferences types in tests.
 /**
 * Minimal model interface for context window resolution.
 */
 export interface MinimalModel {
 	id: string;
 	provider: string;
--- a/src/resources/extensions/sf/context-store.ts
+++ b/src/resources/extensions/sf/context-store.ts
@ -9,11 +9,17 @@ import type { Decision, Requirement } from "./types.js";
 // ─── Query Functions ───────────────────────────────────────────────────────
 /**
 * Options for filtering decisions by milestone and scope.
 */
 export interface DecisionQueryOpts {
 	milestoneId?: string;
 	scope?: string;
 }
 /**
 * Options for filtering requirements by milestone, slice, and status.
 */
 export interface RequirementQueryOpts {
 	milestoneId?: string;
 	sliceId?: string;
--- a/src/resources/extensions/sf/doctor-engine-checks.ts
+++ b/src/resources/extensions/sf/doctor-engine-checks.ts
@ -8,6 +8,13 @@ import { deriveState } from "./state.js";
 import { readEvents } from "./workflow-events.js";
 import { renderAllProjections } from "./workflow-projections.js";
 /**
 * Check SF engine health: database constraints, projection drift, and corruption.
 *
 * Verifies orphaned tasks/slices, duplicate IDs, and missing task summaries.
 * Re-renders stale markdown projections when event log is newer than cached files.
 * Non-fatal: issues are reported but never auto-fixed.
 */
 export async function checkEngineHealth(
 	basePath: string,
 	issues: DoctorIssue[],
--- a/src/resources/extensions/sf/journal.ts
+++ b/src/resources/extensions/sf/journal.ts
@ -50,6 +50,7 @@ export type JournalEventType =
 	| "worktree-skip"
 	| "worktree-merge-start"
 	| "worktree-merge-failed"
 	| "artifact-verification-retry"
 	// #4764 — worktree lifespan / divergence telemetry
 	| "worktree-created"
 	| "worktree-merged"
@ -59,7 +60,10 @@ export type JournalEventType =
 	| "canonical-root-redirect"
 	// #4765 — slice-cadence collapse
 	| "slice-merged"
-	| "milestone-resquash";
+	| "milestone-resquash"
 	// dispatch telemetry — measure agent/subagent invocation frequency and shape
 	| "subagent-invoked"
 	| "subagent-completed";
 /** A single structured event in the journal. */
 export interface JournalEntry {
--- a/src/resources/extensions/sf/notification-overlay.ts
+++ b/src/resources/extensions/sf/notification-overlay.ts
@ -20,9 +20,19 @@ import {
 } from "./notification-store.js";
 import { formattedShortcutPair } from "./shortcut-defs.js";
 /**
 * Filter mode for notification display. Controls which severity levels are shown.
 */
 type FilterMode = "all" | "error" | "warning" | "info";
 /**
 * Cycle of filter modes used when cycling through filter states with 'f' key.
 */
 const FILTER_CYCLE: FilterMode[] = ["all", "error", "warning", "info"];
 /**
 * Returns a single-character icon representing the notification severity.
 */
 function severityIcon(severity: NotifySeverity): string {
 	switch (severity) {
 		case "error":
@ -36,7 +46,10 @@ function severityIcon(severity: NotifySeverity): string {
 	}
 }
-/** Word-wrap plain text to fit within maxWidth columns. */
+/**
 * Word-wrap plain text to fit within maxWidth columns. Splits on whitespace,
 * handles single words longer than maxWidth by truncating with ellipsis.
 */
 function wrapText(text: string, maxWidth: number): string[] {
 	if (text.length <= maxWidth) return [text];
 	const words = text.split(/\s+/);
@ -59,6 +72,10 @@ function wrapText(text: string, maxWidth: number): string[] {
 	);
 }
 /**
 * Format an ISO timestamp for display as relative time ("just now", "5m ago", etc.).
 * Falls back to HH:MM:SS if parsing fails.
 */
 function formatTimestamp(ts: string): string {
 	try {
 		const d = new Date(ts);
--- a/src/resources/extensions/sf/notification-store.ts
+++ b/src/resources/extensions/sf/notification-store.ts
@ -19,9 +19,21 @@ import { join } from "node:path";
 // ─── Types ──────────────────────────────────────────────────────────────
 /**
 * Severity level for notifications. Indicates the urgency and importance of the notification.
 */
 export type NotifySeverity = "info" | "success" | "warning" | "error";
 /**
 * Source origin for notifications. Indicates whether the notification came from explicit
 * notify() calls or workflow-logger warnings.
 */
 export type NotificationSource = "notify" | "workflow-logger";
 /**
 * Optional metadata attached to a notification. Provides context about
 * notification classification, blocking behavior, and deduplication strategy.
 */
 export interface NotificationMetadata {
 	kind?: "notice" | "approval_request" | "progress" | "terminal";
 	blocking?: boolean;
@ -29,6 +41,10 @@ export interface NotificationMetadata {
 	source?: string;
 }
 /**
 * A persisted notification entry in the store. Contains all metadata and
 * content necessary to reconstruct the notification UI state across sessions.
 */
 export interface NotificationEntry {
 	id: string;
 	ts: string;
--- a/src/resources/extensions/sf/parallel-eligibility.ts
+++ b/src/resources/extensions/sf/parallel-eligibility.ts
@ -93,6 +93,9 @@ function detectFileOverlaps(
 * 3. It does not have file overlap with other eligible milestones
 *    (overlaps are flagged as warnings but do not disqualify)
 */
 /**
 * Analyze which milestones are eligible for parallel execution based on deps and file overlaps.
 */
 export async function analyzeParallelEligibility(
 	basePath: string,
 ): Promise<ParallelCandidates> {
@ -214,6 +217,9 @@ export async function analyzeParallelEligibility(
 /**
 * Produce a human-readable report of parallel eligibility analysis.
 */
 /**
 * Format eligibility analysis results into a markdown report.
 */
 export function formatEligibilityReport(
 	candidates: ParallelCandidates,
 ): string {
--- a/src/resources/extensions/sf/parallel-merge.ts
+++ b/src/resources/extensions/sf/parallel-merge.ts
@ -37,6 +37,9 @@ export type MergeOrder = "sequential" | "by-completion";
 * Uses a subprocess to avoid disrupting the global DB singleton.
 * Returns true when milestones.status = 'complete' in the worktree's sf.db.
 */
 /**
 * Check if a milestone is marked as complete in its worktree database.
 */
 export function isMilestoneCompleteInWorktreeDb(
 	basePath: string,
 	mid: string,
@ -95,6 +98,9 @@ function discoverDbCompletedMilestones(basePath: string): Set<string> {
 * are included if their worktree DB shows status='complete'.
 * See: https://github.com/singularity-forge/sf-run/issues/2812
 */
 /**
 * Determine merge order for completed milestones (sequential or by-completion).
 */
 export function determineMergeOrder(
 	workers: WorkerInfo[],
 	order: MergeOrder = "sequential",
@ -151,6 +157,9 @@ export function determineMergeOrder(
 * Attempt to merge a single milestone's worktree back to main.
 * Wraps mergeMilestoneToMain with error handling for parallel context.
 */
 /**
 * Merge a completed milestone to main branch, returning success or conflict details.
 */
 export async function mergeCompletedMilestone(
 	basePath: string,
 	milestoneId: string,
@ -208,6 +217,9 @@ export async function mergeCompletedMilestone(
 * Merge all completed milestones in sequence.
 * Stops on first conflict and returns results so far.
 */
 /**
 * Merge all completed milestones in order, stopping on first conflict.
 */
 export async function mergeAllCompleted(
 	basePath: string,
 	workers: WorkerInfo[],
@ -232,6 +244,9 @@ export async function mergeAllCompleted(
 /**
 * Format merge results for display.
 */
 /**
 * Format merge operation results into a markdown report with conflicts and errors.
 */
 export function formatMergeResults(results: MergeResult[]): string {
 	if (results.length === 0) return "No completed milestones to merge.";
--- a/src/resources/extensions/sf/preferences-models.ts
+++ b/src/resources/extensions/sf/preferences-models.ts
@ -49,7 +49,9 @@ export type {
 	SFPhaseModelConfig,
 } from "./preferences-types.js";
 /** Map of provider ID to allowed model ID patterns. */
 export type ProviderModelAllowList = Record<string, readonly string[]>;
 /** Map of provider ID to blocked model ID patterns. */
 export type ProviderModelBlockList = Record<string, readonly string[]>;
 type ProviderPolicyModel = {
@ -160,7 +162,10 @@ function isModelAllowedByBuiltInProviderPolicy(
 	return true;
 }
-export function isProviderModelAllowed(
+export /**
 * Check if a provider/model pair is allowed by built-in and user-configured policies.
 */
 function isProviderModelAllowed(
 	provider: string,
 	modelId: string,
 	providerModelAllow: ProviderModelAllowList | undefined,
@ -189,6 +194,9 @@ export function isProviderModelAllowed(
 	);
 }
 /**
 * Filter models by provider/model allow and block lists from user preferences.
 */
 export function filterModelsByProviderModelAllow<
 	T extends { provider: string; id: string },
 >(
@ -216,6 +224,9 @@ export function filterModelsByProviderModelAllow<
 	);
 }
 /**
 * Check if a provider is in the allowed list and not in the blocked list.
 */
 export function isProviderAllowedByLists(
 	provider: string,
 	allowedProviders: readonly string[] | undefined,
@ -247,17 +258,9 @@ export function resolveModelForUnit(unitType: string): string | undefined {
 * - Extended: `planning: { model: claude-opus-4-6, fallbacks: [glm-5, minimax-m2.5] }`
 */
 /**
- * Fallback resolver used when the user hasn't pinned `models.<unit>`:
+ * Auto-benchmark model picker when user hasn't pinned `models.<unit>`.
- * iterate every model the pi-ai catalog knows about whose provider is in
+ * Scores candidates using unit-type-specific benchmark profiles and returns top pick plus fallbacks.
- * `allowed_providers` (or every provider, if the allow-list is unset),
+ * Works during preference resolution by pulling from pi-ai catalog rather than live registry.
 * score them with the unit-type-specific benchmark profile, and return
 * the top pick plus diversified fallbacks.
 *
 * Pulls the candidate pool from `models.generated.js` rather than a live
 * registry lookup so it works during preference resolution (before the
 * registry is populated). The dispatch-time availability check happens
 * downstream in auto-model-selection.ts and filters unavailable
 * candidates naturally (expired keys, providers without auth, etc.).
 */
 function resolveAutoBenchmarkPickForUnit(
 	unitType: string,
--- a/src/resources/extensions/sf/production-mutation-approval.ts
+++ b/src/resources/extensions/sf/production-mutation-approval.ts
@ -106,6 +106,10 @@ function normalizedTask(unit: ProductionMutationUnit): string {
 	return [unit.taskTitle, unit.taskText].join("\n").toLowerCase();
 }
 /**
 * Assess whether LLM auto-approval criteria are met for a production mutation.
 * Returns approved=true only if all safety constraints are present in task text.
 */
 export function assessLlmProductionMutationApproval(
 	unit: ProductionMutationUnit,
 ): { approved: boolean; reasons: string[] } {
@ -149,6 +153,10 @@ export function assessLlmProductionMutationApproval(
 	return { approved: reasons.length === 0, reasons };
 }
 /**
 * Build a fully approved production mutation approval using LLM policy.
 * Caller must verify assessLlmProductionMutationApproval() returned approved=true first.
 */
 export function buildLlmProductionMutationApproval(
 	unit: ProductionMutationUnit,
 	approvedAt: Date = new Date(),
@ -181,6 +189,10 @@ export function buildLlmProductionMutationApproval(
 	};
 }
 /**
 * Assess and atomically write an LLM-approved production mutation approval if criteria match.
 * Returns approved=false with reasons if any safety constraint is missing.
 */
 export function approveProductionMutationWithLlmPolicy(
 	basePath: string,
 	unit: ProductionMutationUnit,
@ -207,6 +219,9 @@ export function approveProductionMutationWithLlmPolicy(
 	return { path, approved: true, reasons: [], wrote: true };
 }
 /**
 * Create an empty approval template if it doesn't exist. No-op if already present.
 */
 export function ensureProductionMutationApprovalTemplate(
 	basePath: string,
 	unit: ProductionMutationUnit,
@ -237,6 +252,10 @@ function nonEmptyStringArray(value: unknown): value is string[] {
 	);
 }
 /**
 * Validate a parsed approval JSON against schema and safety constraints.
 * Returns approved=false with reasons if any field is missing or invalid.
 */
 export function validateProductionMutationApproval(
 	data: unknown,
 	unit: ProductionMutationUnit,
@ -300,6 +319,9 @@ export function validateProductionMutationApproval(
 	return { approved: reasons.length === 0, reasons };
 }
 /**
 * Read and validate approval status from disk. Returns rejected reasons if validation fails.
 */
 export function readProductionMutationApprovalStatus(
 	basePath: string,
 	unit: ProductionMutationUnit,
--- a/src/resources/extensions/sf/safety/evidence-collector.ts
+++ b/src/resources/extensions/sf/safety/evidence-collector.ts
@ -3,12 +3,25 @@
 * Tracks every bash command, file write, and file edit during a unit execution.
 * Evidence is compared against LLM completion claims in evidence-cross-ref.ts.
 *
 * Evidence is persisted to .sf/safety/evidence-<mid>-<sid>-<tid>.json so it
 * survives session restarts (pause/resume, crash recovery). On unit start,
 * call resetEvidence() then loadEvidenceFromDisk(). On every new tool call,
 * saveEvidenceToDisk() is called automatically by recordToolCall/recordToolResult.
 *
 * Follows the same module-level Map pattern as auto-tool-tracking.ts.
 * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
 */
-import { appendFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
+import {
-import { join } from "node:path";
+	existsSync,
 	mkdirSync,
 	readFileSync,
 	writeFileSync,
 	renameSync,
 	unlinkSync,
 } from "node:fs";
 import { join, dirname } from "node:path";
 import { randomBytes } from "node:crypto";
 // ─── Types ──────────────────────────────────────────────────────────────────
@ -41,22 +54,11 @@ export type EvidenceEntry = BashEvidence | FileWriteEvidence | FileEditEvidence;
 let unitEvidence: EvidenceEntry[] = [];
 // Disk persistence: unit context set by resetEvidence() at unit start.
 // Guarded by presence of currentUnitId — if absent, disk write is skipped.
 let currentUnitId: string | undefined = undefined;
 let currentBasePath: string | undefined = undefined;
 // ─── Public API ─────────────────────────────────────────────────────────────
-/**
+/** Reset all evidence for a new unit. Call at unit start. */
- * Reset all evidence for a new unit. Call at unit start.
+export function resetEvidence(): void {
 * @param unitId - The active unit ID (e.g. M006/S02/T03)
 * @param basePath - Project root path for computing the evidence file path
 */
 export function resetEvidence(unitId?: string, basePath?: string): void {
 	unitEvidence = [];
 	currentUnitId = unitId;
 	currentBasePath = basePath;
 }
 /** Get a read-only view of all evidence collected for the current unit. */
@ -79,123 +81,78 @@ export function getFilePaths(): string[] {
 		.map((e) => e.path);
 }
-// ─── Disk Persistence ──────────────────────────────────────────────────────
+// ─── Persistence (Bug #4385 — evidence must survive session restarts) ────────
 /**
- * Append an evidence entry to .sf/active/{unitId}/evidence.jsonl.
+ * Build the path for the evidence JSON file for a given unit.
- *
+ * Lives under .sf/safety/ which is gitignored and session-scoped.
 * Purpose: Evidence collected at tool_call time must survive a mid-unit re-dispatch
 * race where runUnitPhase re-fires between tool_call and tool_execution_end.
 * Without disk persistence, in-memory evidence is lost on re-dispatch.
 *
 * Consumer: phases.ts runUnitPhase calls resetEvidence(unitId, basePath) at unit start.
 * Best-effort: disk write failures are silently swallowed so they never crash the agent.
 */
-function saveEvidenceToDisk(entry: EvidenceEntry): void {
+function evidencePath(
-	if (!currentUnitId || !currentBasePath) return;
+	basePath: string,
-	try {
+	milestoneId: string,
-		const dir = join(currentBasePath, ".sf", "active", currentUnitId);
+	sliceId: string,
-		if (!existsSync(dir)) {
+	taskId: string,
-			mkdirSync(dir, { recursive: true });
+): string {
-		}
+	return join(
-		const line = JSON.stringify(entry);
+		basePath,
-		appendFileSync(join(dir, "evidence.jsonl"), line + "\n");
+		".sf",
-	} catch {
+		"safety",
-		// Best-effort: disk write failures must not crash the agent.
+		`evidence-${milestoneId}-${sliceId}-${taskId}.json`,
-	}
+	);
 }
 // ─── Recording (called from register-hooks.ts) ─────────────────────────────
 /**
 * Record a tool call at dispatch time (before execution).
 * Exit codes and output are filled in by recordToolResult after execution.
 */
 export function recordToolCall(
 	toolName: string,
 	input: Record<string, unknown>,
 ): void {
 	let entry: EvidenceEntry | undefined;
 	if (toolName === "bash" || toolName === "Bash") {
 		entry = {
 			kind: "bash",
 			toolCallId: "",
 			command: String(input.command ?? ""),
 			exitCode: -1,
 			outputSnippet: "",
 			timestamp: Date.now(),
 		};
 	} else if (toolName === "write" || toolName === "Write") {
 		entry = {
 			kind: "write",
 			toolCallId: "",
 			path: String(input.file_path ?? input.path ?? ""),
 			timestamp: Date.now(),
 		};
 	} else if (toolName === "edit" || toolName === "Edit") {
 		entry = {
 			kind: "edit",
 			toolCallId: "",
 			path: String(input.file_path ?? input.path ?? ""),
 			timestamp: Date.now(),
 		};
 	}
 	if (entry) {
 		unitEvidence.push(entry);
 		saveEvidenceToDisk(entry);
 	}
 }
 /**
- * Record a tool execution result. Matches the most recent unresolved entry
+ * Validate that a parsed value is an array of EvidenceEntry objects.
- * of the same kind and fills in the toolCallId, exit code, and output.
+ * Rejects corrupt / schema-mismatch data rather than letting it poison state.
 */
 export function recordToolResult(
 	toolCallId: string,
 	toolName: string,
 	result: unknown,
 	isError: boolean,
 ): void {
 	const normalizedName = toolName.toLowerCase();
 	if (normalizedName === "bash") {
 		const entry = findLastUnresolved("bash") as BashEvidence | undefined;
 		if (entry) {
 			entry.toolCallId = toolCallId;
 			const text = extractResultText(result);
 			entry.outputSnippet = text.slice(0, 500);
 			const exitMatch = text.match(/Command exited with code (\d+)/);
 			entry.exitCode = exitMatch ? Number(exitMatch[1]) : isError ? 1 : 0;
 			saveEvidenceToDisk(entry);
 		}
 	} else if (normalizedName === "write" || normalizedName === "edit") {
 		const entry = findLastUnresolved(normalizedName as "write" | "edit");
 		if (entry) {
 			entry.toolCallId = toolCallId;
 			saveEvidenceToDisk(entry);
 		}
 	}
 }
 // ─── Disk Load (session resume) ────────────────────────────────────────────
 function evidencePath(basePath: string, milestoneId: string, sliceId: string, taskId: string): string {
 	return join(basePath, ".sf", "active", `${milestoneId}/${sliceId}/${taskId}`, "evidence.jsonl");
 }
 function isEvidenceArray(data: unknown): data is EvidenceEntry[] {
 	if (!Array.isArray(data)) return false;
 	return data.every((e) => {
 		if (e === null || typeof e !== "object") return false;
 		const rec = e as Record<string, unknown>;
-		return typeof rec.toolCallId === "string" && typeof rec.kind === "string";
+		if (typeof rec.toolCallId !== "string") return false;
 		if (typeof rec.timestamp !== "number") return false;
 		if (rec.kind === "bash") {
 			return (
 				typeof rec.command === "string" &&
 				typeof rec.exitCode === "number" &&
 				typeof rec.outputSnippet === "string"
 			);
 		}
 		if (rec.kind === "write" || rec.kind === "edit") {
 			return typeof rec.path === "string";
 		}
 		return false;
 	});
 }
 /**
- * Load evidence from disk into module state after resetEvidence().
+ * Persist the current in-memory evidence to disk so it survives a session
- * Call on session resume so evidence collected before a pause is restored.
+ * restart. Called from saveEvidenceToDisk after recordToolCall/recordToolResult.
- * No-op if the file does not exist (fresh unit).
+ * Non-fatal — persistence failures must never break unit execution.
 */
 export function saveEvidenceToDisk(
 	basePath: string,
 	milestoneId: string,
 	sliceId: string,
 	taskId: string,
 ): void {
 	try {
 		const path = evidencePath(basePath, milestoneId, sliceId, taskId);
 		mkdirSync(dirname(path), { recursive: true });
 		const tmp = `${path}.tmp.${randomBytes(4).toString("hex")}`;
 		writeFileSync(tmp, JSON.stringify(unitEvidence, null, 2) + "\n", "utf-8");
 		renameSync(tmp, path);
 	} catch {
 		// Non-fatal — don't let persistence failures break unit execution
 	}
 }
 /**
 * Load persisted evidence from disk into the in-memory array.
 * Call after resetEvidence() on session resume to restore context for a
 * partially-executed unit. If the file does not exist (fresh unit), this
 * is a no-op — getEvidence() will return [] which is correct.
 */
 export function loadEvidenceFromDisk(
 	basePath: string,
@ -206,37 +163,98 @@ export function loadEvidenceFromDisk(
 	try {
 		const path = evidencePath(basePath, milestoneId, sliceId, taskId);
 		if (!existsSync(path)) return;
-		const lines = readFileSync(path, "utf-8")
+		const raw = readFileSync(path, "utf-8");
-			.split("\n")
+		const parsed = JSON.parse(raw);
-			.filter((l) => l.trim().length > 0);
+		if (isEvidenceArray(parsed)) {
-		const entries: EvidenceEntry[] = [];
+			unitEvidence = parsed;
 		for (const line of lines) {
 			try {
 				const parsed = JSON.parse(line);
 				entries.push(parsed as EvidenceEntry);
 			} catch {
 				// Skip malformed lines
 			}
 		}
 		if (isEvidenceArray(entries)) {
 			unitEvidence = entries;
 		}
 	} catch {
 		// Non-fatal — corrupt / missing file is treated as empty evidence
 	}
 }
-// ─── Internals ──────────────────────────────────────────────────────────────
+/**
-
+ * Delete the persisted evidence file for a unit after it has been fully
-function findLastUnresolved(kind: string): EvidenceEntry | undefined {
+ * processed. Prevents stale evidence from affecting future retries of
-	for (let i = unitEvidence.length - 1; i >= 0; i--) {
+ * the same unit ID.
-		if (unitEvidence[i].kind === kind && unitEvidence[i].toolCallId === "") {
+ */
-			return unitEvidence[i];
+export function clearEvidenceFromDisk(
 	basePath: string,
 	milestoneId: string,
 	sliceId: string,
 	taskId: string,
 ): void {
 	try {
 		const path = evidencePath(basePath, milestoneId, sliceId, taskId);
 		if (existsSync(path)) {
 			unlinkSync(path);
 		}
 	} catch {
 		// Non-fatal
 	}
 	return undefined;
 }
 // ─── Recording (called from register-hooks.ts) ─────────────────────────────
 /**
 * Record a tool call at dispatch time (before execution).
 * Exit codes and output are filled in by recordToolResult after execution.
 */
 export function recordToolCall(
 	toolCallId: string,
 	toolName: string,
 	input: Record<string, unknown>,
 ): void {
 	if (toolName === "bash" || toolName === "Bash") {
 		unitEvidence.push({
 			kind: "bash",
 			toolCallId,
 			command: String(input.command ?? ""),
 			exitCode: -1,
 			outputSnippet: "",
 			timestamp: Date.now(),
 		});
 	} else if (toolName === "write" || toolName === "Write") {
 		unitEvidence.push({
 			kind: "write",
 			toolCallId,
 			path: String(input.file_path ?? input.path ?? ""),
 			timestamp: Date.now(),
 		});
 	} else if (toolName === "edit" || toolName === "Edit") {
 		unitEvidence.push({
 			kind: "edit",
 			toolCallId,
 			path: String(input.file_path ?? input.path ?? ""),
 			timestamp: Date.now(),
 		});
 	}
 }
 /**
 * Record a tool execution result. Matches the entry by toolCallId (assigned
 * at dispatch time) and fills in exit code + output. Prior versions matched
 * by `kind + empty-string` which corrupted parallel tool calls.
 */
 export function recordToolResult(
 	toolCallId: string,
 	toolName: string,
 	result: unknown,
 	isError: boolean,
 ): void {
 	const entry = unitEvidence.find((e) => e.toolCallId === toolCallId);
 	if (!entry) return;
 	if (entry.kind === "bash") {
 		const text = extractResultText(result);
 		entry.outputSnippet = text.slice(0, 500);
 		const exitMatch = text.match(/Command exited with code (\d+)/);
 		entry.exitCode = exitMatch ? Number(exitMatch[1]) : isError ? 1 : 0;
 	}
 }
 // ─── Internals ──────────────────────────────────────────────────────────────
 function extractResultText(result: unknown): string {
 	if (typeof result === "string") return result;
 	if (result && typeof result === "object") {
--- a/src/resources/extensions/sf/safety/file-change-validator.ts
+++ b/src/resources/extensions/sf/safety/file-change-validator.ts
@ -4,7 +4,9 @@
 *
 * Uses tasks.expected_output (DB column, populated from per-task ## Expected Output)
 * and tasks.files (from slice PLAN.md - Files: subline) as the expected set.
- * Defaults to git diff HEAD~1 --name-only after auto-commit. Deferred-commit
+ * Compares against `git diff-tree --root --no-commit-id -r --name-only HEAD` after auto-commit.
 * Using diff-tree --root handles initial commits, shallow clones, and merge commits correctly
 * (Bug — git diff HEAD~1 failed on initial commits). Deferred-commit
 * flows can instead validate the staged diff before the commit is created.
 *
 * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
@ -159,15 +161,12 @@ function getChangedFilesFromLastCommit(basePath: string): string[] | null {
 	try {
 		const result = execFileSync(
 			"git",
-			["diff", "--name-only", "HEAD~1", "HEAD"],
+			["diff-tree", "--root", "--no-commit-id", "-r", "--name-only", "HEAD"],
 			{ cwd: basePath, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
 		).trim();
 		return result ? result.split("\n").filter(Boolean) : [];
 	} catch (e) {
-		logWarning(
+		logWarning("safety", `git diff failed in file-change-validator: ${(e as Error).message}`);
 			"safety",
 			`git diff failed in file-change-validator: ${(e as Error).message}`,
 		);
 		return null;
 	}
 }
--- a/src/resources/extensions/sf/safety/git-checkpoint.ts
+++ b/src/resources/extensions/sf/safety/git-checkpoint.ts
@ -26,7 +26,7 @@ export function createCheckpoint(
 	unitId: string,
 ): string | null {
 	try {
-		const sha = execFileSync("git", ["rev-parse", "HEAD"], {
+		const sha = execFileSync("git", ["rev-parse", "--verify", "HEAD"], {
 			cwd: basePath,
 			stdio: ["ignore", "pipe", "pipe"],
 			encoding: "utf-8",
@ -48,6 +48,17 @@ export function createCheckpoint(
 		return sha;
 	} catch (e) {
 		const stderr = (e as { stderr?: Buffer | string }).stderr;
 		const stderrText = Buffer.isBuffer(stderr)
 			? stderr.toString("utf-8")
 			: String(stderr ?? "");
 		if (
 			stderrText.includes("Needed a single revision") ||
 			stderrText.includes("unknown revision") ||
 			stderrText.includes("ambiguous argument 'HEAD'")
 		) {
 			return null;
 		}
 		logWarning("safety", `checkpoint creation failed: ${(e as Error).message}`);
 		return null;
 	}
--- a/src/resources/extensions/sf/safety/safety-harness.ts
+++ b/src/resources/extensions/sf/safety/safety-harness.ts
@ -114,12 +114,15 @@ export type {
 	FileWriteEvidence,
 } from "./evidence-collector.js";
 export {
 	clearEvidenceFromDisk,
 	getBashEvidence,
 	getEvidence,
 	getFilePaths,
 	loadEvidenceFromDisk,
 	recordToolCall,
 	recordToolResult,
 	resetEvidence,
 	saveEvidenceToDisk,
 } from "./evidence-collector.js";
 export type {
 	ClaimedEvidence,
--- a/src/resources/extensions/sf/self-feedback.ts
+++ b/src/resources/extensions/sf/self-feedback.ts
@ -53,6 +53,9 @@ const BACKLOG_HEADER =
 export type SelfFeedbackSeverity = "critical" | "high" | "medium" | "low";
 /**
 * Context where a feedback entry occurred (milestone/slice/task/unit).
 */
 export interface SelfFeedbackOccurredIn {
 	milestone?: string;
 	slice?: string;
@ -60,6 +63,9 @@ export interface SelfFeedbackOccurredIn {
 	unitType?: string;
 }
 /**
 * A self-feedback entry reporting an anomaly or issue detected during auto-mode execution.
 */
 export interface SelfFeedbackEntry {
 	kind: string;
 	severity: SelfFeedbackSeverity;
@ -105,6 +111,9 @@ export type ResolutionEvidence =
 	| { kind: "human-clear" }
 	| { kind: "promoted-to-requirement"; requirementId: string };
 /**
 * Persisted feedback entry with metadata: ID, timestamp, version, and resolution state.
 */
 export interface PersistedSelfFeedbackEntry extends SelfFeedbackEntry {
 	id: string;
 	ts: string;
@ -120,6 +129,9 @@ export interface PersistedSelfFeedbackEntry extends SelfFeedbackEntry {
 	resolvedCriteriaMet?: string[];
 }
 /**
 * Result of recording a self-feedback entry. Contains the persisted entry and blocking status.
 */
 export interface RecordResult {
 	entry: PersistedSelfFeedbackEntry;
 	/** True when callers should treat the originating unit as blocked. */
@ -308,6 +320,9 @@ export function getBlockedEntries(
 	);
 }
 /**
 * Input for marking a feedback entry as resolved with evidence and reasoning.
 */
 export interface ResolutionInput {
 	reason: string;
 	evidence: ResolutionEvidence;
@ -407,6 +422,9 @@ function compareSemver(a: string, b: string): number {
 *
 * Returns the entries split by retry-eligibility for the dispatcher to act on.
 */
 /**
 * Result of triaging blocked entries by their version eligibility for retry.
 */
 export interface BlockedTriage {
 	retry: PersistedSelfFeedbackEntry[];
 	stillBlocked: PersistedSelfFeedbackEntry[];
--- a/src/resources/extensions/sf/skill-telemetry.ts
+++ b/src/resources/extensions/sf/skill-telemetry.ts
@ -27,7 +27,8 @@ const activelyLoadedSkills = new Set<string>();
 /**
 * Capture the list of available skill names at dispatch time.
- * Called before each unit starts.
+ *
 * Called before each unit starts to establish the baseline for telemetry.
 */
 export function captureAvailableSkills(): void {
 	const skillsDir = join(homedir(), ".agents", "skills");
@ -44,18 +45,19 @@ export function captureAvailableSkills(): void {
 }
 /**
- * Record that a skill was actively loaded (its SKILL.md was read).
+ * Record that a skill was actively loaded during execution.
- * Call this when the agent reads a SKILL.md file.
+ *
 * @param skillName - Name of the skill file that was read.
 */
 export function recordSkillRead(skillName: string): void {
 	activelyLoadedSkills.add(skillName);
 }
 /**
- * Get the skill names for the current unit and clear state.
+ * Retrieve captured skills for the current unit and reset state.
- * Returns actively loaded skills if any, otherwise available skills.
+ *
- * This gives the most useful signal: if the agent read specific skills,
+ * Returns actively loaded skills if any were read, otherwise returns available skills.
- * report those; otherwise report what was available.
+ * Clears all telemetry state after retrieval.
 */
 export function getAndClearSkills(): string[] {
 	const result =
@ -68,7 +70,7 @@ export function getAndClearSkills(): string[] {
 }
 /**
- * Reset all telemetry state. Called when auto-mode stops.
+ * Clear all telemetry state. Called when auto-mode stops.
 */
 export function resetSkillTelemetry(): void {
 	availableSkills = [];
@ -76,8 +78,10 @@ export function resetSkillTelemetry(): void {
 }
 /**
- * Get last-used timestamps for all skills from metrics data.
+ * Extract last-used timestamp for each skill from unit metrics.
- * Returns a Map from skill name to most recent ms timestamp.
+ *
 * @param units - Array of unit metrics with skill lists and timestamps.
 * @returns Map from skill name to most recent finishedAt timestamp.
 */
 export function getSkillLastUsed(
 	units: Array<{ finishedAt: number; skills?: string[] }>,
@ -96,8 +100,11 @@ export function getSkillLastUsed(
 }
 /**
- * Detect stale skills — those not used within the given threshold (in days).
+ * Identify skills unused since a given threshold, including untracked installs.
- * Returns skill names that should be deprioritized.
+ *
 * @param units - Unit metrics containing skill usage.
 * @param thresholdDays - Days of inactivity to consider stale.
 * @returns All installed skills exceeding the staleness threshold.
 */
 export function detectStaleSkills(
 	units: Array<{ finishedAt: number; skills?: string[] }>,
--- a/src/resources/extensions/sf/slice-parallel-eligibility.ts
+++ b/src/resources/extensions/sf/slice-parallel-eligibility.ts
@ -37,6 +37,9 @@ export interface EligibleSlice {
 * @param completedSliceIds  Set of slice IDs that are already complete.
 * @returns           Array of eligible slice descriptors.
 */
 /**
 * Determine which slices are eligible to run based on explicit dependencies or positional order.
 */
 export function getEligibleSlices(
 	slices: SliceInput[],
 	completedSliceIds: Set<string>,
--- a/src/resources/extensions/sf/slice-parallel-orchestrator.ts
+++ b/src/resources/extensions/sf/slice-parallel-orchestrator.ts
@ -72,6 +72,9 @@ let sliceState: SliceOrchestratorState | null = null;
 /**
 * Check whether slice-level parallel is currently active.
 */
 /**
 * Check if slice parallel execution is currently active.
 */
 export function isSliceParallelActive(): boolean {
 	return sliceState?.active === true;
 }
@ -79,6 +82,9 @@ export function isSliceParallelActive(): boolean {
 /**
 * Get current slice orchestrator state (read-only snapshot).
 */
 /**
 * Get the current slice orchestrator state, including active workers and cost tracking.
 */
 export function getSliceOrchestratorState(): SliceOrchestratorState | null {
 	return sliceState;
 }
@ -89,6 +95,9 @@ export function getSliceOrchestratorState(): SliceOrchestratorState | null {
 * For each eligible slice: create a worktree, spawn `sf --mode json --print "/sf autonomous"`
 * with env SF_SLICE_LOCK=<SID> + SF_MILESTONE_LOCK=<MID> + SF_PARALLEL_WORKER=1.
 */
 /**
 * Start parallel execution of eligible slices with worker processes and budget tracking.
 */
 export async function startSliceParallel(
 	basePath: string,
 	milestoneId: string,
@ -201,6 +210,9 @@ export async function startSliceParallel(
 /**
 * Stop all slice-parallel workers and deactivate.
 */
 /**
 * Terminate all active slice workers and clean up their worktrees.
 */
 export function stopSliceParallel(): void {
 	if (!sliceState) return;
@ -235,6 +247,9 @@ export function stopSliceParallel(): void {
 /**
 * Get aggregate cost across all slice workers.
 */
 /**
 * Calculate total cost across all active slice workers.
 */
 export function getSliceAggregateCost(): number {
 	if (!sliceState) return 0;
 	let total = 0;
@ -247,6 +262,9 @@ export function getSliceAggregateCost(): number {
 /**
 * Check if budget ceiling has been exceeded.
 */
 /**
 * Check if total slice cost has met or exceeded the budget ceiling.
 */
 export function isSliceBudgetExceeded(): boolean {
 	if (!sliceState?.budgetCeiling) return false;
 	return getSliceAggregateCost() >= sliceState.budgetCeiling;
@ -255,6 +273,9 @@ export function isSliceBudgetExceeded(): boolean {
 /**
 * Reset module state (for testing).
 */
 /**
 * Reset orchestrator state and clear all worker cleanup handlers.
 */
 export function resetSliceOrchestrator(): void {
 	if (sliceState) {
 		for (const w of sliceState.workers.values()) {
--- a/src/resources/extensions/sf/tools/skip-slice.ts
+++ b/src/resources/extensions/sf/tools/skip-slice.ts
@ -0,0 +1,133 @@
 /**
 * skip-slice handler — the core operation behind sf_skip_slice.
 *
 * Marks a slice as skipped and cascades the skip to every non-closed task in
 * that slice. Without the task cascade the deep-check in
 * executeCompleteMilestone reports pending tasks inside the skipped slice and
 * blocks milestone completion (see #4375).
 *
 * This function performs DB writes only. The MCP wrapper in
 * bootstrap/db-tools.ts handles state-cache invalidation and STATE.md rebuild.
 */
 import {
  getSlice,
  getSliceTasks,
  isDbAvailable,
  transaction,
  updateSliceStatus,
  updateTaskStatus,
 } from "../sf-db.js";
 import { isClosedStatus } from "../status-guards.js";
 /**
 * Input parameters for {@link handleSkipSlice}.
 *
 * - `milestoneId` / `sliceId` identify the target slice.
 * - `reason` is a free-form note surfaced in the MCP response; optional
 *   because the caller (e.g. rethink flow) may not have a structured reason.
 */
 export interface SkipSliceParams {
  milestoneId: string;
  sliceId: string;
  reason?: string;
 }
 /**
 * Stable machine-readable error codes for {@link SkipSliceResult.error}.
 * Keep in sync with the wrapper in bootstrap/db-tools.ts.
 */
 export type SkipSliceErrorCode = "slice_not_found" | "already_complete";
 /**
 * Result of a {@link handleSkipSlice} call.
 *
 * - `tasksSkipped` — count of tasks whose status was cascaded to "skipped".
 *   Zero is a valid success (slice had no non-closed tasks).
 * - `wasAlreadySkipped` — true when the slice was in "skipped" status on
 *   entry; callers can use this to distinguish first-skip from re-skip.
 * - `error` / `errorCode` — set together for recoverable validation failures
 *   (unknown slice, slice already complete). Both absent on success. DB
 *   errors propagate as thrown exceptions and should be caught by the caller.
 */
 export interface SkipSliceResult {
  milestoneId: string;
  sliceId: string;
  tasksSkipped: number;
  wasAlreadySkipped: boolean;
  reason?: string;
  error?: string;
  errorCode?: SkipSliceErrorCode;
 }
 /**
 * Mark a slice as "skipped" and cascade the skip to every non-closed task in
 * that slice. Runs as a single transaction so slice status and task statuses
 * are always consistent.
 *
 * Behaviour summary:
 * - Unknown slice → returns {@link SkipSliceResult} with `error`.
 * - Slice already complete/done → returns `error` (cannot un-complete).
 * - Slice already skipped → still cascades leftover non-closed tasks
 *   (heals inconsistent historical state from projects that ran older
 *   versions before the #4375 cascade fix).
 * - Tasks in closed status (complete/done/skipped) are never downgraded.
 */
 export function handleSkipSlice(params: SkipSliceParams): SkipSliceResult {
  const base: SkipSliceResult = {
    milestoneId: params.milestoneId,
    sliceId: params.sliceId,
    tasksSkipped: 0,
    wasAlreadySkipped: false,
    reason: params.reason,
  };
  // Fail loudly on a closed DB so a `null` from getSlice() inside the
  // transaction unambiguously means "slice not found", never "DB unavailable".
  // The MCP wrapper in bootstrap/db-tools.ts runs ensureDbOpen() before calling
  // this helper; this guard protects direct callers (tests, future code).
  if (!isDbAvailable()) {
    throw new Error("handleSkipSlice: SF database is not available");
  }
  // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ────
  let guardError: string | null = null;
  let guardCode: SkipSliceErrorCode | null = null;
  let wasAlreadySkipped = false;
  let tasksSkipped = 0;
  transaction(() => {
    const slice = getSlice(params.milestoneId, params.sliceId);
    if (!slice) {
      guardError = `Slice ${params.sliceId} not found in milestone ${params.milestoneId}`;
      guardCode = "slice_not_found";
      return;
    }
    if (slice.status === "complete" || slice.status === "done") {
      guardError = `Slice ${params.sliceId} is already complete — cannot skip.`;
      guardCode = "already_complete";
      return;
    }
    wasAlreadySkipped = slice.status === "skipped";
    if (!wasAlreadySkipped) {
      updateSliceStatus(params.milestoneId, params.sliceId, "skipped");
    }
    // Cascade: mark every non-closed task as skipped so milestone completion
    // doesn't trip the deep-task guard (#4375). Closed tasks (complete/done/
    // skipped) are left untouched — we never downgrade.
    const tasks = getSliceTasks(params.milestoneId, params.sliceId);
    for (const task of tasks) {
      if (!isClosedStatus(task.status)) {
        updateTaskStatus(params.milestoneId, params.sliceId, task.id, "skipped");
        tasksSkipped++;
      }
    }
  });
  if (guardError) {
    return { ...base, error: guardError, errorCode: guardCode ?? undefined };
  }
  return { ...base, tasksSkipped, wasAlreadySkipped };
 }
--- a/src/resources/extensions/sf/visualizer-views.ts
+++ b/src/resources/extensions/sf/visualizer-views.ts
@ -345,6 +345,9 @@ function renderRiskHeatmap(
 // ─── Dependencies View ───────────────────────────────────────────────────────
 /**
 * Render milestone/slice dependencies and critical path with data flow.
 */
 export function renderDepsView(
 	data: VisualizerData,
 	th: Theme,
@ -506,6 +509,9 @@ function renderCriticalPath(
 // ─── Metrics View ────────────────────────────────────────────────────────────
 /**
 * Render cost and usage metrics by phase, model, tier, and projections.
 */
 export function renderMetricsView(
 	data: VisualizerData,
 	th: Theme,
@ -687,6 +693,9 @@ function renderCostProjections(
 // ─── Timeline View (Gantt) ──────────────────────────────────────────────────
 /**
 * Render execution timeline as Gantt view (wide) or list view (narrow).
 */
 export function renderTimelineView(
 	data: VisualizerData,
 	th: Theme,
@ -850,6 +859,9 @@ function formatTimeLabel(ts: number): string {
 // ─── Agent View ──────────────────────────────────────────────────────────────
 /**
 * Render current agent status, completion progress, budget pressure, and recent units.
 */
 export function renderAgentView(
 	data: VisualizerData,
 	th: Theme,
@ -976,6 +988,9 @@ export function renderAgentView(
 // ─── Changelog View ──────────────────────────────────────────────────────────
 /**
 * Render completed slices with one-liners, files modified, decisions, and patterns.
 */
 export function renderChangelogView(
 	data: VisualizerData,
 	th: Theme,
@ -1041,6 +1056,9 @@ export function renderChangelogView(
 // ─── Export View ─────────────────────────────────────────────────────────────
 /**
 * Render export options (markdown, JSON, snapshot) with last export path.
 */
 export function renderExportView(
 	_data: VisualizerData,
 	th: Theme,
--- a/src/resources/extensions/sf/worktree-command-bootstrap.ts
+++ b/src/resources/extensions/sf/worktree-command-bootstrap.ts
@ -43,6 +43,7 @@ function getWorktreeCompletions(prefix: string) {
 	return null;
 }
 /** Register a lazy-loaded worktree command alias via dynamic import. */
 function registerLazyWorktreeAlias(
 	pi: ExtensionAPI,
 	name: "worktree" | "wt",
@ -60,6 +61,7 @@ function registerLazyWorktreeAlias(
 	});
 }
 /** Register /worktree and /wt commands with lazy loading via dynamic import. */
 export function registerLazyWorktreeCommands(pi: ExtensionAPI): void {
 	registerLazyWorktreeAlias(
 		pi,
--- a/src/resources/extensions/sf/worktree-command.ts
+++ b/src/resources/extensions/sf/worktree-command.ts
@ -54,12 +54,18 @@ import {
 */
 let originalCwd: string | null = null;
-/** Get the original project root if currently in a worktree, or null. */
+/**
 * Get the original project root if currently in a worktree, or null.
 * Used to restore context after `/worktree merge` or `/worktree return`.
 */
 export function getWorktreeOriginalCwd(): string | null {
 	return originalCwd;
 }
-/** Get the name of the active worktree, or null if not in one. */
+/**
 * Get the name of the active worktree, or null if not in one.
 * Extracts from .sf/worktrees/ path segment.
 */
 export function getActiveWorktreeName(): string | null {
 	if (!originalCwd) return null;
 	const cwd = process.cwd();
@ -263,6 +269,10 @@ async function worktreeHandler(
 	}
 }
 /**
 * Main handler for /worktree and /wt commands — routes to subcommand handlers.
 * Manages worktree state tracking (originalCwd) across chdir calls.
 */
 export async function handleWorktreeCommand(
 	args: string,
 	ctx: ExtensionCommandContext,
@ -272,6 +282,7 @@ export async function handleWorktreeCommand(
 	await worktreeHandler(args, ctx, pi, alias);
 }
 /** Register /worktree and /wt commands with completion support. */
 export function registerWorktreeCommand(pi: ExtensionAPI): void {
 	// Restore worktree state after /reload.
 	// The module-level originalCwd resets to null when extensions are re-loaded,
--- a/src/resources/extensions/sf/worktree-health.ts
+++ b/src/resources/extensions/sf/worktree-health.ts
@ -20,6 +20,10 @@ import { listWorktrees, type WorktreeInfo } from "./worktree-manager.js";
 // ─── Types ─────────────────────────────────────────────────────────────────
 /**
 * Health and lifecycle status of a single worktree.
 * Used for audits, health checks, and the `/worktree list` status display.
 */
 export interface WorktreeHealthStatus {
 	/** The worktree info from worktree-manager */
 	worktree: WorktreeInfo;
--- a/src/resources/extensions/sf/worktree-root.ts
+++ b/src/resources/extensions/sf/worktree-root.ts
@ -0,0 +1,179 @@
 import { existsSync, readFileSync, realpathSync, statSync } from "node:fs";
 import { homedir } from "node:os";
 import { join, resolve } from "node:path";
 function sfHome(): string {
 	return process.env.SF_HOME || join(homedir(), ".sf");
 }
 export interface WorktreeSegment {
 	sfIdx: number;
 	afterWorktrees: number;
 }
 export function normalizeWorktreePathForCompare(path: string): string {
 	let normalized: string;
 	try {
 		normalized = realpathSync(path);
 	} catch {
 		normalized = resolve(path);
 	}
 	const slashed = normalized.replaceAll("\\", "/");
 	const trimmed = slashed.replace(/\/+$/, "");
 	return process.platform === "win32" ? (trimmed || "/").toLowerCase() : (trimmed || "/");
 }
 /**
 * Find the SF worktree segment in both direct project layout and the
 * symlink-resolved external-state layout used by ~/.sf/projects/<hash>.
 */
 export function findWorktreeSegment(normalizedPath: string): WorktreeSegment | null {
 	const directMarker = "/.sf/worktrees/";
 	const directIdx = normalizedPath.indexOf(directMarker);
 	if (directIdx !== -1) {
 		return { sfIdx: directIdx, afterWorktrees: directIdx + directMarker.length };
 	}
 	const externalRe = /\/\.sf\/projects\/[^/]+\/worktrees\//;
 	const externalMatch = normalizedPath.match(externalRe);
 	if (externalMatch && externalMatch.index !== undefined) {
 		return {
 			sfIdx: externalMatch.index,
 			afterWorktrees: externalMatch.index + externalMatch[0].length,
 		};
 	}
 	return null;
 }
 export function isSfWorktreePath(path: string): boolean {
 	return findWorktreeSegment(path.replaceAll("\\", "/")) !== null;
 }
 /**
 * Resolve the canonical project root for worktree operations.
 *
 * `originalBasePath` wins when available because session state already knows the
 * root. `SF_PROJECT_ROOT` is the next strongest signal for worker processes.
 * Otherwise, derive the root from direct `.sf/worktrees` paths, or recover it
 * from the worktree `.git` file for symlink-resolved ~/.sf/project paths.
 */
 export function resolveWorktreeProjectRoot(
 	basePath: string,
 	originalBasePath?: string | null,
 ): string {
 	const preferred =
 		originalBasePath?.trim() ||
 		process.env.SF_PROJECT_ROOT?.trim() ||
 		basePath;
 	return resolveProjectRootFromPath(preferred);
 }
 function resolveProjectRootFromPath(path: string): string {
 	const normalizedPath = path.replaceAll("\\", "/");
 	const segment = findWorktreeSegment(normalizedPath);
 	if (!segment) {
 		return resolveNearestBootstrappedSfRoot(path) ?? resolveGitWorkingTreeRoot(path) ?? path;
 	}
 	const sepChar = path.includes("\\") ? "\\" : "/";
 	const sfMarker = `${sepChar}.sf${sepChar}`;
 	const markerIdx = path.indexOf(sfMarker);
 	const candidate = markerIdx !== -1
 		? path.slice(0, markerIdx)
 		: path.slice(0, segment.sfIdx);
 	const sfHomeNorm = normalizeWorktreePathForCompare(sfHome());
 	const candidateSfPath = normalizeWorktreePathForCompare(join(candidate, ".sf"));
 	if (candidateSfPath === sfHomeNorm || candidateSfPath.startsWith(`${sfHomeNorm}/`)) {
 		const realRoot = resolveProjectRootFromGitFile(path);
 		return realRoot ?? path;
 	}
 	return candidate;
 }
 function resolveNearestBootstrappedSfRoot(path: string): string | null {
 	try {
 		let dir = existsSync(path) && !statSync(path).isDirectory()
 			? resolve(path, "..")
 			: path;
 		for (let i = 0; i < 30; i++) {
 			if (hasSfBootstrapArtifacts(join(dir, ".sf"))) return dir;
 			const gitPath = join(dir, ".git");
 			if (existsSync(gitPath)) return null;
 			const parent = resolve(dir, "..");
 			if (parent === dir) break;
 			dir = parent;
 		}
 	} catch {
 		// Non-fatal: callers fall back to git root resolution.
 	}
 	return null;
 }
 function hasSfBootstrapArtifacts(sfPath: string): boolean {
 	return existsSync(sfPath) &&
 		(existsSync(join(sfPath, "PREFERENCES.md")) ||
 			existsSync(join(sfPath, "preferences.md")) ||
 			existsSync(join(sfPath, "milestones")));
 }
 function resolveGitWorkingTreeRoot(path: string): string | null {
 	try {
 		let dir = existsSync(path) && !statSync(path).isDirectory()
 			? resolve(path, "..")
 			: path;
 		for (let i = 0; i < 30; i++) {
 			const gitPath = join(dir, ".git");
 			if (existsSync(gitPath)) return dir;
 			const parent = resolve(dir, "..");
 			if (parent === dir) break;
 			dir = parent;
 		}
 	} catch {
 		// Non-fatal: callers either keep the original path or fail closed.
 	}
 	return null;
 }
 function resolveProjectRootFromGitFile(worktreePath: string): string | null {
 	try {
 		let dir = worktreePath;
 		for (let i = 0; i < 30; i++) {
 			const gitPath = join(dir, ".git");
 			if (existsSync(gitPath)) {
 				const content = readFileSync(gitPath, "utf8").trim();
 				if (content.startsWith("gitdir: ")) {
 					const gitDir = resolve(dir, content.slice(8));
 					const dotGitDir = resolve(gitDir, "..", "..");
 					if (dotGitDir.endsWith(".git") || dotGitDir.endsWith(".git/") || dotGitDir.endsWith(".git\\")) {
 						return resolve(dotGitDir, "..");
 					}
 					const commonDirPath = join(gitDir, "commondir");
 					if (existsSync(commonDirPath)) {
 						const commonDir = readFileSync(commonDirPath, "utf8").trim();
 						const resolvedCommonDir = resolve(gitDir, commonDir);
 						return resolve(resolvedCommonDir, "..");
 					}
 				}
 				break;
 			}
 			const parent = resolve(dir, "..");
 			if (parent === dir) break;
 			dir = parent;
 		}
 	} catch {
 		// Non-fatal: callers either keep the original path or fail closed.
 	}
 	return null;
 }
--- a/src/resources/extensions/sf/worktree-session-state.ts
+++ b/src/resources/extensions/sf/worktree-session-state.ts
@ -0,0 +1,35 @@
 // SF worktree session state
 let originalCwd: string | null = null;
 export function getWorktreeOriginalCwd(): string | null {
  return originalCwd;
 }
 export function setWorktreeOriginalCwd(cwd: string): void {
  originalCwd = cwd;
 }
 export function clearWorktreeOriginalCwd(): void {
  originalCwd = null;
 }
 export function ensureWorktreeOriginalCwdFromPath(cwd: string = process.cwd()): string | null {
  if (originalCwd) return originalCwd;
  const marker = `${/\\/.test(cwd) ? "\\" : "/"}.sf${/\\/.test(cwd) ? "\\" : "/"}worktrees${/\\/.test(cwd) ? "\\" : "/"}`;
  const markerIdx = cwd.indexOf(marker);
  if (markerIdx !== -1) {
    originalCwd = cwd.slice(0, markerIdx);
  }
  return originalCwd;
 }
 export function getActiveWorktreeName(): string | null {
  if (!originalCwd) return null;
  const cwd = process.cwd();
  const wtDir = `${originalCwd.replace(/[\\/]+$/, "")}/.sf/worktrees`.replaceAll("\\", "/");
  const normalizedCwd = cwd.replaceAll("\\", "/");
  if (!normalizedCwd.startsWith(`${wtDir}/`)) return null;
  const rel = normalizedCwd.slice(wtDir.length + 1);
  const name = rel.split("/")[0];
  return name || null;
 }