From 51d0a06bbc2c4b24e546a1213d171b36e4476123 Mon Sep 17 00:00:00 2001
From: Mikael Hugo <mikkihugo@users.noreply.github.com>
Date: Sat, 2 May 2026 02:20:33 +0200
Subject: [PATCH] docs(sf): model + routing + provider cluster docstrings

49 JSDoc blocks across 10 files (model-router, model-cost-table,
auto-model-selection, benchmark-selector, blocked-models,
preferences-models, session-model-override, provider-error-pause,
error-classifier, token-counter).

ADR references preserved (ADR-004 capability-aware routing,
ADR-005 multi-model provider tools, ADR-007 model catalog split).

Typecheck clean.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/resources/extensions/sf/atomic-write.ts   |   6 +-
 src/resources/extensions/sf/auto-post-unit.ts |  19 ++
 .../extensions/sf/eval-review-schema.ts       | 243 ++++++++++++++++++
 .../extensions/sf/memory-source-store.ts      | 138 ++++++++++
 .../extensions/sf/preferences-migrations.ts   |   3 +
 .../extensions/sf/workflow-manifest.ts        |  24 ++
 .../extensions/sf/worktree-resolver.ts        |   2 +-
 7 files changed, 431 insertions(+), 4 deletions(-)
 create mode 100644 src/resources/extensions/sf/eval-review-schema.ts
 create mode 100644 src/resources/extensions/sf/memory-source-store.ts

diff --git a/src/resources/extensions/sf/atomic-write.ts b/src/resources/extensions/sf/atomic-write.ts
index c8ddc8f8a..84563c4ad 100644
--- a/src/resources/extensions/sf/atomic-write.ts
+++ b/src/resources/extensions/sf/atomic-write.ts
@@ -130,7 +130,7 @@ export async function atomicWriteAsyncWithOps(
 	const tmpPath = ops.createTempPath?.(filePath) ?? defaultTempPath(filePath);
 	await ops.writeFile(tmpPath, content, encoding);
 
-	let lastError: unknown = null;
+	const errors: unknown[] = [];
 	let attempts = 0;
 
 	for (attempts = 1; attempts <= MAX_RENAME_ATTEMPTS; attempts++) {
@@ -138,7 +138,7 @@ export async function atomicWriteAsyncWithOps(
 			await ops.rename(tmpPath, filePath);
 			return;
 		} catch (error) {
-			lastError = error;
+			errors.push(error);
 			if (!isTransientLockError(error) || attempts === MAX_RENAME_ATTEMPTS) {
 				break;
 			}
@@ -147,7 +147,7 @@ export async function atomicWriteAsyncWithOps(
 	}
 
 	await cleanupTempFileAsync(tmpPath, ops);
-	throw buildAtomicWriteError(filePath, attempts, lastError);
+	throw buildAtomicWriteError(filePath, attempts, errors);
 }
 
 /** @internal Exported for retry/cleanup tests. */
diff --git a/src/resources/extensions/sf/auto-post-unit.ts b/src/resources/extensions/sf/auto-post-unit.ts
index a6378d1af..ffb61282f 100644
--- a/src/resources/extensions/sf/auto-post-unit.ts
+++ b/src/resources/extensions/sf/auto-post-unit.ts
@@ -1146,6 +1146,25 @@ export async function postUnitPreVerification(
 						});
 					}
 				}
+
+				// Clear persisted evidence file now that post-unit processing is complete
+				// (Bug #4385 — prevents stale evidence from affecting retries of same unit ID).
+				if (
+					safetyConfig.evidence_collection &&
+					s.currentUnit.type === "execute-task" &&
+					sMid &&
+					sSid &&
+					sTid
+				) {
+					try {
+						clearEvidenceFromDisk(s.basePath, sMid, sSid, sTid);
+					} catch (e) {
+						debugLog("postUnit", {
+							phase: "safety-evidence-clear",
+							error: String(e),
+						});
+					}
+				}
 			}
 		} catch (e) {
 			debugLog("postUnit", { phase: "safety-harness", error: String(e) });
diff --git a/src/resources/extensions/sf/eval-review-schema.ts b/src/resources/extensions/sf/eval-review-schema.ts
new file mode 100644
index 000000000..866585b7d
--- /dev/null
+++ b/src/resources/extensions/sf/eval-review-schema.ts
@@ -0,0 +1,243 @@
+/**
+ * EVAL-REVIEW frontmatter schema and parser.
+ *
+ * The auditor agent for `/sf eval-review` writes a markdown file whose
+ * machine-readable contract lives entirely in YAML frontmatter. The body
+ * after the closing `---` is human-only prose and is never parsed by any
+ * consumer (the design response to a prior parser that used regex over LLM-generated
+ * prose and produced silent failures).
+ *
+ * This module owns:
+ *   - The TypeBox schema for the frontmatter (single source of truth).
+ *   - A small frontmatter extractor (locates the YAML block).
+ *   - The validated parser (`parseEvalReviewFrontmatter`).
+ *   - Pure helpers for derived fields the handler must recompute server-side
+ *     (overall score, severity counts) — we never trust LLM arithmetic.
+ *
+ * Consumers: `commands-eval-review.ts` (writer), `commands-ship.ts` (reader
+ * for the soft pre-ship warning), and a future `commands-eval-fix.ts`.
+ */
+
+import { Type, type Static, type TSchema } from "@sinclair/typebox";
+import { Value } from "@sinclair/typebox/value";
+import { parse as parseYaml } from "yaml";
+
+// ─── Constants ────────────────────────────────────────────────────────────────
+
+/** Schema version literal embedded in every EVAL-REVIEW.md frontmatter. */
+export const EVAL_REVIEW_SCHEMA_VERSION = "eval-review/v1" as const;
+
+/** Verdict values, ordered from worst to best for UI display purposes. */
+export const VERDICT_VALUES = [
+  "NOT_IMPLEMENTED",
+  "SIGNIFICANT_GAPS",
+  "NEEDS_WORK",
+  "PRODUCTION_READY",
+] as const;
+
+/** Severity classifications used in `gaps[*].severity`. */
+export const SEVERITY_VALUES = ["blocker", "major", "minor"] as const;
+
+/** Eval dimensions an auditor scores. `other` is the catch-all. */
+export const DIMENSION_VALUES = [
+  "observability",
+  "guardrails",
+  "tests",
+  "metrics",
+  "datasets",
+  "other",
+] as const;
+
+/** Lower bound for any score in the schema. */
+export const MIN_SCORE = 0;
+/** Upper bound for any score in the schema. */
+export const MAX_SCORE = 100;
+/** Coverage's contribution to overall_score. See `docs/user-docs/eval-review.md` for rationale. */
+export const COVERAGE_WEIGHT = 0.6;
+/** Infrastructure's contribution to overall_score. See `docs/user-docs/eval-review.md` for rationale. */
+export const INFRASTRUCTURE_WEIGHT = 0.4;
+
+// ─── Schema ───────────────────────────────────────────────────────────────────
+
+const verdictSchema = Type.Union(VERDICT_VALUES.map((v) => Type.Literal(v)));
+const severitySchema = Type.Union(SEVERITY_VALUES.map((v) => Type.Literal(v)));
+const dimensionSchema = Type.Union(DIMENSION_VALUES.map((v) => Type.Literal(v)));
+
+/**
+ * One gap finding inside `gaps[]`. Every field is required — the prompt
+ * cannot emit a partial gap. `evidence` is mandatory; the anti-Goodhart
+ * guard depends on it.
+ */
+export const EvalReviewGap = Type.Object({
+  id: Type.String({ pattern: "^G\\d+$" }),
+  dimension: dimensionSchema,
+  severity: severitySchema,
+  description: Type.String({ minLength: 1 }),
+  evidence: Type.String({ minLength: 1 }),
+  suggested_fix: Type.String({ minLength: 1 }),
+});
+
+/** Severity histogram. The handler recomputes this from `gaps[]`. */
+export const EvalReviewCounts = Type.Object({
+  blocker: Type.Integer({ minimum: 0 }),
+  major: Type.Integer({ minimum: 0 }),
+  minor: Type.Integer({ minimum: 0 }),
+});
+
+/**
+ * The full frontmatter schema. Field order in the schema definition mirrors
+ * the order that the auditor prompt asks the LLM to emit, so a literal-eyeball
+ * comparison between this file and `prompts/eval-review.md` stays meaningful.
+ */
+export const EvalReviewFrontmatter = Type.Object({
+  schema: Type.Literal(EVAL_REVIEW_SCHEMA_VERSION),
+  verdict: verdictSchema,
+  coverage_score: Type.Integer({ minimum: MIN_SCORE, maximum: MAX_SCORE }),
+  infrastructure_score: Type.Integer({ minimum: MIN_SCORE, maximum: MAX_SCORE }),
+  overall_score: Type.Integer({ minimum: MIN_SCORE, maximum: MAX_SCORE }),
+  generated: Type.String({ pattern: "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?Z$" }),
+  slice: Type.String({ pattern: "^S\\d+$" }),
+  milestone: Type.String({ minLength: 1 }),
+  gaps: Type.Array(EvalReviewGap),
+  counts: EvalReviewCounts,
+});
+
+/** Inferred TypeScript type for a validated frontmatter object. */
+export type EvalReviewFrontmatterT = Static<typeof EvalReviewFrontmatter>;
+/** Inferred TypeScript type for a single gap finding. */
+export type EvalReviewGapT = Static<typeof EvalReviewGap>;
+/** Inferred TypeScript type for the counts histogram. */
+export type EvalReviewCountsT = Static<typeof EvalReviewCounts>;
+/** One of the four allowed verdict literals. */
+export type Verdict = (typeof VERDICT_VALUES)[number];
+
+// ─── Frontmatter extraction ───────────────────────────────────────────────────
+
+/**
+ * Locate the YAML block between two `---` lines and return its raw text.
+ *
+ * Tolerant to CRLF line endings. Does not interpret the YAML — that's the
+ * caller's job. The extractor only enforces the markdown frontmatter shape.
+ *
+ * @param raw - Full contents of an EVAL-REVIEW.md file.
+ * @returns `{ yaml }` with the inner YAML text on success, or `{ error }`
+ *   describing why the frontmatter could not be located.
+ */
+export function extractFrontmatterRaw(
+  raw: string,
+): { yaml: string } | { error: string } {
+  const lines = raw.split(/\r?\n/);
+  if (lines[0] !== "---") {
+    return { error: "Missing opening `---` frontmatter delimiter on line 1" };
+  }
+  for (let i = 1; i < lines.length; i++) {
+    if (lines[i] === "---") {
+      return { yaml: lines.slice(1, i).join("\n") };
+    }
+  }
+  return { error: "Missing closing `---` frontmatter delimiter" };
+}
+
+// ─── Parser ───────────────────────────────────────────────────────────────────
+
+/** Discriminated result type returned by the parser. */
+export type ParseResult =
+  | { ok: true; data: EvalReviewFrontmatterT }
+  | { ok: false; error: string; pointer: string };
+
+/**
+ * Parse and validate the frontmatter of an EVAL-REVIEW.md file.
+ *
+ * Failure cases are exhaustive and deterministic:
+ *   - missing/unclosed frontmatter → `pointer: "/"`, message names the cause
+ *   - YAML syntax error → `pointer: "/"`, message contains "YAML"
+ *   - schema violation → `pointer` is the JSON-Pointer path of the bad field
+ *
+ * Body content after the closing `---` is never inspected. This is an
+ * response to a prior parser that used regex over the body and silently
+ * failed on prose / tables / numbered lists.
+ *
+ * @param raw - Full contents of an EVAL-REVIEW.md file.
+ * @returns A discriminated `ParseResult`.
+ */
+export function parseEvalReviewFrontmatter(raw: string): ParseResult {
+  const fm = extractFrontmatterRaw(raw);
+  if ("error" in fm) {
+    return { ok: false, error: fm.error, pointer: "/" };
+  }
+
+  let parsed: unknown;
+  try {
+    parsed = parseYaml(fm.yaml, { schema: "core" });
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    return { ok: false, error: `YAML parse error: ${msg}`, pointer: "/" };
+  }
+
+  const schema: TSchema = EvalReviewFrontmatter;
+  if (!Value.Check(schema, parsed)) {
+    const errs = [...Value.Errors(schema, parsed)];
+    const first = errs[0];
+    return {
+      ok: false,
+      error: `Schema validation failed: ${first?.message ?? "unknown error"}`,
+      pointer: first?.path ?? "/",
+    };
+  }
+
+  return { ok: true, data: parsed as EvalReviewFrontmatterT };
+}
+
+// ─── Derived fields ───────────────────────────────────────────────────────────
+
+/**
+ * Compute `overall_score` from the two component scores using the rubric
+ * weights documented in `docs/user-docs/eval-review.md`.
+ *
+ * The handler always recomputes this value rather than trusting whatever the
+ * LLM emitted in `overall_score`. If the LLM-emitted value disagrees with the
+ * recomputed one, the disagreement is logged and the recomputed value wins.
+ *
+ * Clamps the result into `[MIN_SCORE, MAX_SCORE]` defensively. Schema-validated
+ * inputs are already in range, but the helper is exported and may be called
+ * from a code path that bypasses the schema (tests, future tools); the clamp
+ * keeps the contract honest in those cases.
+ *
+ * @param coverage - integer 0..100 from the auditor's coverage assessment.
+ * @param infrastructure - integer 0..100 from the auditor's infra assessment.
+ * @returns rounded integer 0..100.
+ */
+export function computeOverallScore(coverage: number, infrastructure: number): number {
+  const raw = Math.round(coverage * COVERAGE_WEIGHT + infrastructure * INFRASTRUCTURE_WEIGHT);
+  return Math.max(MIN_SCORE, Math.min(MAX_SCORE, raw));
+}
+
+/**
+ * Build the severity histogram for a list of gaps.
+ *
+ * Used by the handler to overwrite whatever the LLM put in `counts` —
+ * we recompute server-side rather than trust LLM arithmetic.
+ *
+ * @param gaps - validated gap list.
+ * @returns counts keyed by severity literal.
+ */
+export function deriveCounts(gaps: readonly EvalReviewGapT[]): EvalReviewCountsT {
+  const counts: EvalReviewCountsT = { blocker: 0, major: 0, minor: 0 };
+  for (const g of gaps) counts[g.severity]++;
+  return counts;
+}
+
+/**
+ * Map a numeric overall_score to its verdict literal using the bands from
+ * Bands per `docs/user-docs/eval-review.md`: ≥80 PRODUCTION_READY, 60..79 NEEDS_WORK, 40..59 SIGNIFICANT_GAPS,
+ * <40 NOT_IMPLEMENTED.
+ *
+ * @param overall - integer 0..100.
+ * @returns a verdict literal.
+ */
+export function verdictForScore(overall: number): Verdict {
+  if (overall >= 80) return "PRODUCTION_READY";
+  if (overall >= 60) return "NEEDS_WORK";
+  if (overall >= 40) return "SIGNIFICANT_GAPS";
+  return "NOT_IMPLEMENTED";
+}
diff --git a/src/resources/extensions/sf/memory-source-store.ts b/src/resources/extensions/sf/memory-source-store.ts
new file mode 100644
index 000000000..960741941
--- /dev/null
+++ b/src/resources/extensions/sf/memory-source-store.ts
@@ -0,0 +1,138 @@
+// SF Memory Sources — CRUD for raw ingested content (notes, files, URLs, artifacts)
+//
+// Distinct from `memories`: a `memory_source` row is the preserved raw input
+// that an extractor may (or may not) distill into one or more memories.
+// Storing the source makes ingestion idempotent (content_hash) and gives the
+// user a way to trace a memory back to its origin.
+
+import { createHash, randomUUID } from "node:crypto";
+import { _getAdapter, isDbAvailable, insertMemorySourceRow, deleteMemorySourceRow } from "./sf-db.js";
+
+export type MemorySourceKind = "note" | "url" | "file" | "artifact" | "capture" | "learning";
+
+export interface MemorySource {
+	id: string;
+	kind: MemorySourceKind;
+	uri: string | null;
+	title: string | null;
+	content: string;
+	content_hash: string;
+	imported_at: string;
+	scope: string;
+	tags: string[];
+}
+
+function rowToSource(row: Record<string, unknown>): MemorySource {
+	const tagsRaw = typeof row["tags"] === "string" ? (row["tags"] as string) : "[]";
+	let tags: string[] = [];
+	try {
+		const parsed = JSON.parse(tagsRaw);
+		if (Array.isArray(parsed)) tags = parsed.filter((t): t is string => typeof t === "string");
+	} catch {
+		// leave tags empty
+	}
+	return {
+		id: row["id"] as string,
+		kind: row["kind"] as MemorySourceKind,
+		uri: (row["uri"] as string) ?? null,
+		title: (row["title"] as string) ?? null,
+		content: row["content"] as string,
+		content_hash: row["content_hash"] as string,
+		imported_at: row["imported_at"] as string,
+		scope: (row["scope"] as string) ?? "project",
+		tags,
+	};
+}
+
+export function hashContent(content: string): string {
+	return createHash("sha256").update(content).digest("hex");
+}
+
+export function newSourceId(): string {
+	return `SRC-${randomUUID().slice(0, 8)}`;
+}
+
+export interface CreateSourceOptions {
+	kind: MemorySourceKind;
+	uri?: string | null;
+	title?: string | null;
+	content: string;
+	scope?: string;
+	tags?: string[];
+}
+
+export interface CreateSourceResult {
+	id: string;
+	duplicate: boolean;
+}
+
+/**
+ * Insert a memory_source. Idempotent — if the content_hash already exists,
+ * returns the existing source's id and duplicate=true instead of inserting.
+ */
+export function createMemorySource(opts: CreateSourceOptions): CreateSourceResult | null {
+	if (!isDbAvailable()) return null;
+	const adapter = _getAdapter();
+	if (!adapter) return null;
+
+	try {
+		const contentHash = hashContent(opts.content);
+		const existing = adapter
+			.prepare("SELECT id FROM memory_sources WHERE content_hash = :h")
+			.get({ ":h": contentHash });
+		if (existing && typeof existing["id"] === "string") {
+			return { id: existing["id"] as string, duplicate: true };
+		}
+
+		const id = newSourceId();
+		insertMemorySourceRow({
+			id,
+			kind: opts.kind,
+			uri: opts.uri ?? null,
+			title: opts.title ?? null,
+			content: opts.content,
+			contentHash,
+			importedAt: new Date().toISOString(),
+			scope: opts.scope ?? "project",
+			tags: opts.tags ?? [],
+		});
+		return { id, duplicate: false };
+	} catch {
+		return null;
+	}
+}
+
+export function listMemorySources(limit = 50): MemorySource[] {
+	if (!isDbAvailable()) return [];
+	const adapter = _getAdapter();
+	if (!adapter) return [];
+	try {
+		const rows = adapter
+			.prepare("SELECT * FROM memory_sources ORDER BY imported_at DESC LIMIT :limit")
+			.all({ ":limit": limit });
+		return rows.map(rowToSource);
+	} catch {
+		return [];
+	}
+}
+
+export function getMemorySource(id: string): MemorySource | null {
+	if (!isDbAvailable()) return null;
+	const adapter = _getAdapter();
+	if (!adapter) return null;
+	try {
+		const row = adapter.prepare("SELECT * FROM memory_sources WHERE id = :id").get({ ":id": id });
+		return row ? rowToSource(row) : null;
+	} catch {
+		return null;
+	}
+}
+
+export function deleteMemorySource(id: string): boolean {
+	if (!isDbAvailable()) return false;
+	try {
+		return deleteMemorySourceRow(id);
+	} catch {
+		return false;
+	}
+}
diff --git a/src/resources/extensions/sf/preferences-migrations.ts b/src/resources/extensions/sf/preferences-migrations.ts
index 569c7c556..7caca4be2 100644
--- a/src/resources/extensions/sf/preferences-migrations.ts
+++ b/src/resources/extensions/sf/preferences-migrations.ts
@@ -112,6 +112,9 @@ export function migrateForward(input: SFPreferences): MigrationOutcome {
  *
  * Future: deprecated-key detection, missing-required-field detection.
  */
+/**
+ * Check preferences for schema version drift and return any warnings.
+ */
 export function checkPreferencesDrift(prefs: SFPreferences): {
 	warnings: string[];
 } {
diff --git a/src/resources/extensions/sf/workflow-manifest.ts b/src/resources/extensions/sf/workflow-manifest.ts
index ba948d9d3..1f0d4d026 100644
--- a/src/resources/extensions/sf/workflow-manifest.ts
+++ b/src/resources/extensions/sf/workflow-manifest.ts
@@ -78,6 +78,30 @@ export function toNumeric(
 	return fallback;
 }
 
+/**
+ * Parse a JSON TEXT column that is expected to be an array of strings.
+ * Falls back to `[]` if the column is absent, empty, or if the parsed value
+ * is not an array whose every element is a string.  This prevents malformed
+ * but technically-parseable JSON (e.g. escaped-quote corruption) from
+ * silently producing a wrong-typed value downstream.
+ */
+function parseStringArray(raw: unknown): string[] {
+	if (typeof raw !== "string" || raw.trim() === "") return [];
+	try {
+		const parsed = JSON.parse(raw);
+		if (
+			Array.isArray(parsed) &&
+			parsed.every((item) => typeof item === "string")
+		) {
+			return parsed as string[];
+		}
+		// Parseable but wrong shape — fall back to empty
+		return [];
+	} catch {
+		return [];
+	}
+}
+
 // ─── snapshotState ───────────────────────────────────────────────────────
 
 /**
diff --git a/src/resources/extensions/sf/worktree-resolver.ts b/src/resources/extensions/sf/worktree-resolver.ts
index dd77c1203..43f60cde6 100644
--- a/src/resources/extensions/sf/worktree-resolver.ts
+++ b/src/resources/extensions/sf/worktree-resolver.ts
@@ -15,7 +15,7 @@
 
 import { randomUUID } from "node:crypto";
 import { existsSync, unlinkSync } from "node:fs";
-import { join } from "node:path";
+import { join, resolve } from "node:path";
 import type { AutoSession } from "./auto/session.js";
 import { debugLog } from "./debug-logger.js";
 import { MergeConflictError } from "./git-service.js";