feat(reflection): add Phase 1A reflection layer (corpus + prompt + sf headless reflect)

Addresses self-feedback entry sf-mp4uzvcd-pazg6v (architecture-defect:no-reflection-layer-over-self-feedback-corpus): SF detected symptoms and triaged individual entries but had no layer that reasoned about the corpus to recognize recurring structural patterns. The same architectural pressure expressed itself across multiple entries with different exact-kind strings; nothing escalated the pattern to a class. The cognitive work fell on the operator. This commit ships Phase 1A — the data-assembly + prompt half of the reflection layer + an operator-driven entry point. Phase 1B (LLM dispatch via the autonomous loop as a real unit type) lands once sf-mp4rxkwb-l4baga (triage-not-a-first-class-unit-type) is in. Files: - src/resources/extensions/sf/reflection.js (new) - assembleReflectionCorpus(basePath): bundles open + recent-resolved self-feedback (full json), last 50 commits via git log, milestone + slice + task state, all milestone validation verdicts, and prior reflection report into one struct. Returns null on prerequisite failure (DB closed) so callers downgrade gracefully. - renderReflectionCorpusBrief(corpus): renders the corpus into a markdown brief the LLM consumes in one turn. - writeReflectionReport(basePath, content): persists to .sf/reflection/<timestamp>-report.md so next pass detects "what changed since last reflection." - src/resources/extensions/sf/prompts/reflection-pass.md (new) - {{include:working-directory}} prefix. - Reasoning order: cluster by structural shape (not exact kind), identify recurring patterns, identify commit/ledger gaps, identify stale validation drift, identify the deepest architectural concern, compare against prior report. - Output contract: structured markdown report with named sections, terminator REFLECTION_COMPLETE for clean-finish detection. - Constraints: don't fix anything (reflection layer not executor), don't resolve entries without commit-SHA evidence, don't invent IDs. - src/headless-reflect.ts (new) — sf headless reflect [--json] - Pre-opens the project DB via auto-start.openProjectDbIfPresent (one-shot bypass path doesn't run the full SF agent bootstrap). - Default: emits the rendered prompt brief (template + corpus) for operators to pipe into any model. Lets the corpus-assembly layer ship and validate before the LLM-dispatch layer is wired. - --json: emits raw corpus snapshot for tooling. - src/headless.ts: registers the new "reflect" command after the existing usage block. - src/help-text.ts: documents it in the headless command list. - src/resources/extensions/sf/tests/reflection.test.mjs (new, 9 tests): null-when-DB-closed; collects open + recent-resolved; excludes >30d resolutions; captures milestone/slice/task tree; captures validation verdicts; commits returned as array (best-effort tmpdir is ok); brief renders all major sections; entry IDs/severity/kind appear in brief; writeReflectionReport round-trips through assembleReflectionCorpus's previousReport read. Live smoke verified: sf headless reflect against the real .sf/sf.db returns 15 open + 23 recent-resolved entries, 50 commits, 2 milestones, 1 validation file (correctly surfacing M001's stale needs-attention verdict against actual 5/5 slices done — exactly the case that motivated this layer). Total: +848 LOC, full SF extension suite (1534 tests) passes, typecheck clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 04:27:29 +02:00 · 2026-05-14 04:27:29 +02:00 · e161a59e2f
commit e161a59e2f
parent 7570aac4b7
6 changed files with 848 additions and 0 deletions
--- a/src/headless-reflect.ts
+++ b/src/headless-reflect.ts
@ -0,0 +1,154 @@
 /**
 * headless-reflect.ts — `sf headless reflect`
 *
 * Purpose: operator-driven entry point for the SF reflection layer. Assembles
 * the full corpus snapshot (open + recent-resolved self-feedback, recent
 * commits, milestone state, validation files, prior report) and outputs
 * either:
 *   - A rendered prompt (default): the markdown brief that would be sent to
 *     an LLM for meta-analysis. Lets the operator pipe it into any model
 *     they choose (gemini, codex, claude) without SF dispatching.
 *   - A JSON dump of the raw corpus (--json): structured snapshot for
 *     tooling.
 *
 * The actual LLM-driven analysis pass and the autonomous-loop dispatch
 * (reflection-pass unit type) land in follow-up commits — both depend on
 * the corpus this command produces, and on the triage-not-a-first-class-
 * unit-type fix being landed first.
 *
 * Consumer: headless.ts when command === "reflect".
 */
 import { existsSync } from "node:fs";
 import { join } from "node:path";
 import { createJiti } from "@mariozechner/jiti";
 import { resolveBundledSourceResource } from "./bundled-resource-path.js";
 import { getSfEnv } from "./env.js";
 const jiti = createJiti(import.meta.filename, {
 	interopDefault: true,
 	debug: false,
 });
 const agentExtensionsDir = join(getSfEnv().agentDir, "extensions", "sf");
 const useAgentDir = existsSync(join(agentExtensionsDir, "state.js"));
 function sfExtensionPath(moduleName: string): string {
 	if (useAgentDir) return join(agentExtensionsDir, `${moduleName}.js`);
 	const tsPath = resolveBundledSourceResource(
 		import.meta.url,
 		"extensions",
 		"sf",
 		`${moduleName}.ts`,
 	);
 	if (existsSync(tsPath)) return tsPath;
 	return resolveBundledSourceResource(
 		import.meta.url,
 		"extensions",
 		"sf",
 		`${moduleName}.js`,
 	);
 }
 export interface HandleReflectOptions {
 	json?: boolean;
 }
 export interface HandleReflectResult {
 	exitCode: number;
 }
 /**
 * Render the corpus to stdout. Default: markdown prompt brief that includes
 * the reflection prompt template + the corpus rendered as markdown. With
 * --json: raw corpus object as JSON.
 *
 * Never throws — best-effort, returns non-zero exit on assembly failure.
 */
 export async function handleReflect(
 	cwd: string,
 	options: HandleReflectOptions = {},
 ): Promise<HandleReflectResult> {
 	// Open the project DB before assembling. The one-shot bypass path doesn't
 	// run the full SF agent bootstrap, so DB-open isn't done for us.
 	try {
 		const autoStartModule = (await jiti.import(
 			sfExtensionPath("auto-start"),
 			{},
 		)) as { openProjectDbIfPresent?: (cwd: string) => Promise<unknown> };
 		if (typeof autoStartModule.openProjectDbIfPresent === "function") {
 			await autoStartModule.openProjectDbIfPresent(cwd);
 		}
 	} catch (err) {
 		// Non-fatal — the corpus assembler will return null and we will emit
 		// the friendly "DB must be open" error below.
 		const msg = err instanceof Error ? err.message : String(err);
 		process.stderr.write(`[reflect] DB pre-open warning: ${msg}\n`);
 	}
 	let mod: {
 		assembleReflectionCorpus: (basePath: string) => unknown;
 		renderReflectionCorpusBrief: (corpus: unknown) => string;
 	};
 	try {
 		mod = (await jiti.import(sfExtensionPath("reflection"))) as typeof mod;
 	} catch (err) {
 		const msg = err instanceof Error ? err.message : String(err);
 		process.stderr.write(`[reflect] failed to load reflection module: ${msg}\n`);
 		return { exitCode: 1 };
 	}
 	let corpus: unknown;
 	try {
 		corpus = mod.assembleReflectionCorpus(cwd);
 	} catch (err) {
 		const msg = err instanceof Error ? err.message : String(err);
 		process.stderr.write(`[reflect] corpus assembly failed: ${msg}\n`);
 		return { exitCode: 1 };
 	}
 	if (!corpus) {
 		const payload = {
 			ok: false,
 			error:
 				"Could not assemble reflection corpus — sf.db must be open and at least one self-feedback entry must exist.",
 		};
 		process.stdout.write(
 			options.json ? `${JSON.stringify(payload)}\n` : `${payload.error}\n`,
 		);
 		return { exitCode: 1 };
 	}
 	if (options.json) {
 		process.stdout.write(`${JSON.stringify({ ok: true, corpus })}\n`);
 		return { exitCode: 0 };
 	}
 	let promptTemplate: string;
 	try {
 		const fs = await import("node:fs/promises");
 		const templatePath = useAgentDir
 			? join(agentExtensionsDir, "prompts", "reflection-pass.md")
 			: resolveBundledSourceResource(
 					import.meta.url,
 					"extensions",
 					"sf",
 					"prompts",
 					"reflection-pass.md",
 				);
 		promptTemplate = await fs.readFile(templatePath, "utf-8");
 	} catch (err) {
 		const msg = err instanceof Error ? err.message : String(err);
 		process.stderr.write(`[reflect] prompt template load failed: ${msg}\n`);
 		return { exitCode: 1 };
 	}
 	const brief = mod.renderReflectionCorpusBrief(corpus);
 	// Inline-replace {{corpus}} in the template. We do NOT run the full
 	// loadPrompt fragment-resolver here — the operator just needs a clean
 	// rendered prompt to pipe into a model. The full template path runs
 	// inside SF when reflection-pass becomes a real unit type.
 	const rendered = promptTemplate.replace("{{corpus}}", brief);
 	process.stdout.write(`${rendered}\n`);
 	return { exitCode: 0 };
 }
--- a/src/headless.ts
+++ b/src/headless.ts
@ -824,6 +824,18 @@ async function runHeadlessOnce(
 		return { exitCode: result.exitCode, interrupted: false, timedOut: false };
 	}
 	// Reflect: assemble the SF reflection corpus snapshot (open + recent
 	// self-feedback, recent commits, milestone state, validation files,
 	// prior report) and emit either the rendered prompt brief (default) or
 	// the raw corpus JSON (--json). Operator-driven — the autonomous-loop
 	// reflection unit is a separate follow-up.
 	if (options.command === "reflect") {
 		const wantsJson = options.json || options.commandArgs.includes("--json");
 		const { handleReflect } = await import("./headless-reflect.js");
 		const result = await handleReflect(process.cwd(), { json: wantsJson });
 		return { exitCode: result.exitCode, interrupted: false, timedOut: false };
 	}
 	// Usage: gemini-cli account snapshot (tier, project, per-model quota), no
 	// RPC child needed. Uses snapshotGeminiCliAccount from the
 	// @singularity-forge/google-gemini-cli-provider package directly.
--- a/src/help-text.ts
+++ b/src/help-text.ts
@ -224,6 +224,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
 		"  new-milestone        Create a milestone from a specification document",
 		"  query                Machine snapshot: JSON state + next dispatch + costs (no LLM)",
 		"  usage                Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)",
 		"  reflect              Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw)",
 		"",
 		"new-milestone flags:",
 		"  --context <path>     Path to spec/PRD file (use '-' for stdin)",
--- a/src/resources/extensions/sf/prompts/reflection-pass.md
+++ b/src/resources/extensions/sf/prompts/reflection-pass.md
@ -0,0 +1,73 @@
 {{include:working-directory}}
 ## SF Reflection Pass
 You are the reflection layer for SF's self-feedback system. SF detects symptoms and triages individual entries; YOUR job is to reason across the entire corpus and produce meta-output that no per-entry triage could find.
 The architectural rule that motivates this pass: SF kept producing the same class of bug (artifact-drift between data planes; fragile delivery in the self-improvement loop; trust gaps in the resolution pipeline) without anything noticing the recurring shape. You exist to notice the shape.
 ## Your reasoning, in order
 1. **Cluster the open + recently-resolved entries by structural shape, not exact `kind` string.** Two entries with kinds `gap:upstream-bridge-no-closure-memory` and `architecture-defect:self-feedback-not-wired-to-memory-subsystem` may both express the same pressure (no cross-channel memory). Look for the pressure, not the words.
 2. **Identify recurring patterns.** A pattern is structural if 2+ entries share it. State the pattern in one sentence, list the contributing entry IDs, and cite the structural commonality (not just keyword overlap).
 3. **Identify gaps between filed entries and recent commits.** If 5 commits touched routing in the last week but 0 routing entries closed, something is being silently fixed without ledger update — or the work isn't addressing what was filed. Either is a finding.
 4. **Identify stale validation drift.** Compare each milestone's validation file verdict against its actual slice/task state. A `needs-attention` verdict whose stated reasons no longer apply is a pattern to surface — and the absence of any detector for it is a deeper finding.
 5. **Identify the deepest architectural concern.** Of the patterns you identified, which one — if addressed — would close multiple surface entries? That is the highest-leverage finding.
 6. **Compare against the previous reflection report.** If a pattern you identified was already named in the prior report, note whether progress has been made (commits/resolutions) or whether it is still open. Do NOT re-state unchanged findings; surface them as "still open since {prior date}."
 ## Output contract
 Produce a markdown report with these sections in order:
 ```
 # SF Reflection — <ISO date>
 ## Pattern clusters
 For each pattern: a one-sentence name, the contributing entry IDs, the
 structural commonality, and the suggested rollup (file a single
 architecture-defect, promote to requirement, or no action because already
 covered by an existing entry).
 ## Commit / ledger gap
 Areas where commit activity does not match filed entries (silent fixes,
 unfiled symptoms, stale ledger).
 ## Stale validation drift
 Per-milestone: validation verdict vs actual state. Name any drift.
 ## Deepest architectural concern
 The single highest-leverage pattern, framed as a concrete refactor or
 design change.
 ## Recommended new self-feedback entries
 For each: kind (start with `reflection:` prefix), severity, one-line summary,
 and the entry IDs that would be transitively addressed.
 ## Compared to last report
 Patterns that persist; patterns that closed; patterns that are new.
 ```
 After the markdown report, on a separate line, emit:
 ```
 REFLECTION_COMPLETE
 ```
 This terminator lets the orchestrator detect a clean finish vs a truncated stream.
 ## Constraints
 - Do NOT propose to fix anything in this turn. You are the reflection layer; execution is the dispatcher's job.
 - Do NOT recommend resolving entries unless you have direct evidence (a commit SHA in the corpus that addresses the entry's acceptance criteria). Pattern observation is not the same as resolution.
 - Do NOT invent entry IDs. Cite only IDs that appear in the corpus.
 - Be specific about structural commonalities. "Both entries are about memory" is too vague. "Both entries describe one-direction state writes that don't echo back to the originating data plane" is the kind of structural shape worth surfacing.
 - Keep the markdown under ~3000 words. Density over length.
 ## The corpus
 {{corpus}}
--- a/src/resources/extensions/sf/reflection.js
+++ b/src/resources/extensions/sf/reflection.js
@ -0,0 +1,346 @@
 /**
 * reflection.js — corpus assembler + prompt builder for the SF reflection layer.
 *
 * Purpose: address the architectural-defect filed as
 * "no-reflection-layer-over-self-feedback-corpus". SF detects symptoms and
 * triages individual entries but has no layer that reasons about the whole
 * self-feedback corpus to recognize recurring structural patterns. This
 * module is the data-assembly + prompt half of that layer; the LLM-call
 * half lives in the operator surface (headless-reflect.ts) and the future
 * autonomous unit handler (auto/phases-reflection.js).
 *
 * Output: a single Reflection corpus object that bundles every signal an
 * LLM would need to do meta-analysis (open + recent-resolved self-feedback,
 * recent commits, milestone state, validation files, prior reflection
 * report). The prompt builder renders that into a markdown brief the LLM
 * processes in one turn.
 */
 import { execFileSync } from "node:child_process";
 import {
 	existsSync,
 	mkdirSync,
 	readFileSync,
 	readdirSync,
 	statSync,
 	writeFileSync,
 } from "node:fs";
 import { join } from "node:path";
 import { sfRoot } from "./paths.js";
 import {
 	listSelfFeedbackEntries,
 	getAllMilestones,
 	getMilestoneSlices,
 	getSliceTasks,
 	isDbAvailable,
 } from "./sf-db.js";
 const RECENT_RESOLVED_LOOKBACK_DAYS = 30;
 const COMMIT_LOOKBACK = 50;
 function sfReflectionDir(basePath) {
 	return join(sfRoot(basePath), "reflection");
 }
 /**
 * Read recent commits via git log. Best-effort — returns [] on any failure.
 *
 * Why git log directly and not a wrapper: this module is part of SF's
 * runtime; it cannot depend on dev tooling. git is universally present in
 * any SF working tree (SF asserts this in safe-git checks).
 */
 function readRecentCommits(basePath, limit) {
 	try {
 		const out = execFileSync(
 			"git",
 			[
 				"log",
 				`-${limit}`,
 				"--pretty=format:%H%x09%an%x09%aI%x09%s%x00%b%x1e",
 				"--no-merges",
 			],
 			{ cwd: basePath, encoding: "utf-8", stdio: ["ignore", "pipe", "ignore"] },
 		);
 		return out
 			.split("\x1e")
 			.filter((s) => s.trim())
 			.map((entry) => {
 				const [headLine, body] = entry.split("\x00");
 				const [sha, author, isoDate, subject] = (headLine ?? "").split("\x09");
 				return {
 					sha: (sha ?? "").trim(),
 					author: (author ?? "").trim(),
 					ts: (isoDate ?? "").trim(),
 					subject: (subject ?? "").trim(),
 					body: (body ?? "").trim(),
 				};
 			})
 			.filter((c) => c.sha);
 	} catch {
 		return [];
 	}
 }
 /**
 * Snapshot active milestone + slice + task state from the DB.
 */
 function readMilestoneSnapshot() {
 	if (!isDbAvailable()) return null;
 	try {
 		const milestones = getAllMilestones();
 		return milestones.map((m) => {
 			const slices = getMilestoneSlices(m.id) ?? [];
 			return {
 				id: m.id,
 				title: m.title,
 				status: m.status,
 				slices: slices.map((s) => {
 					const tasks = getSliceTasks(m.id, s.id) ?? [];
 					return {
 						id: s.id,
 						status: s.status,
 						tasks: tasks.map((t) => ({
 							id: t.id,
 							status: t.status,
 						})),
 					};
 				}),
 			};
 		});
 	} catch {
 		return null;
 	}
 }
 /**
 * Read every milestone's VALIDATION.md verdict + first ~1.5 KiB so the
 * reflection prompt can spot stale verdicts (the M001 case that motivated
 * this module: validation file said S02-S05 pending while the DB had them
 * complete — nothing was looking for that drift).
 */
 function readMilestoneValidationFiles(basePath) {
 	const milestonesDir = join(sfRoot(basePath), "milestones");
 	if (!existsSync(milestonesDir)) return [];
 	const out = [];
 	try {
 		for (const entry of readdirSync(milestonesDir)) {
 			const dir = join(milestonesDir, entry);
 			try {
 				if (!statSync(dir).isDirectory()) continue;
 			} catch {
 				continue;
 			}
 			for (const fname of readdirSync(dir)) {
 				if (!/-VALIDATION\.md$/.test(fname)) continue;
 				try {
 					const content = readFileSync(join(dir, fname), "utf-8");
 					const verdictMatch = content.match(/^verdict:\s*(\S+)/m);
 					out.push({
 						milestoneId: entry,
 						file: fname,
 						verdict: verdictMatch ? verdictMatch[1] : null,
 						excerpt: content.slice(0, 1500),
 					});
 				} catch {
 					// best-effort
 				}
 			}
 		}
 	} catch {
 		// best-effort
 	}
 	return out;
 }
 /**
 * Read the most recent reflection report so the next pass can avoid
 * re-stating the same patterns and can detect "what changed since last
 * reflection."
 */
 function readPreviousReflectionReport(basePath) {
 	const dir = sfReflectionDir(basePath);
 	if (!existsSync(dir)) return null;
 	try {
 		const reports = readdirSync(dir)
 			.filter((n) => n.endsWith("-report.md"))
 			.sort()
 			.reverse();
 		if (reports.length === 0) return null;
 		const latest = reports[0];
 		return {
 			file: latest,
 			content: readFileSync(join(dir, latest), "utf-8"),
 		};
 	} catch {
 		return null;
 	}
 }
 /**
 * Assemble the full reflection corpus snapshot. All inputs the LLM would
 * need to do meta-analysis, in one struct. Returns null when prerequisites
 * are missing (DB not open) so callers downgrade gracefully.
 *
 * Consumer: buildReflectionPrompt + headless-reflect operator surface +
 * future auto/phases-reflection.js dispatcher handler.
 */
 export function assembleReflectionCorpus(basePath = process.cwd()) {
 	if (!isDbAvailable()) return null;
 	const allEntries = listSelfFeedbackEntries();
 	const forgeEntries = allEntries.filter((e) => e.repoIdentity === "forge");
 	const cutoffMs =
 		Date.now() - RECENT_RESOLVED_LOOKBACK_DAYS * 24 * 60 * 60 * 1000;
 	const open = forgeEntries.filter((e) => !e.resolvedAt);
 	const recentResolved = forgeEntries.filter(
 		(e) =>
 			e.resolvedAt &&
 			new Date(e.resolvedAt).getTime() >= cutoffMs,
 	);
 	return {
 		generatedAt: new Date().toISOString(),
 		basePath,
 		lookbackDays: RECENT_RESOLVED_LOOKBACK_DAYS,
 		openEntries: open,
 		recentResolvedEntries: recentResolved,
 		commits: readRecentCommits(basePath, COMMIT_LOOKBACK),
 		milestones: readMilestoneSnapshot(),
 		validations: readMilestoneValidationFiles(basePath),
 		previousReport: readPreviousReflectionReport(basePath),
 	};
 }
 function renderEntryBrief(entry) {
 	let evidenceKind = null;
 	if (entry.resolvedEvidence && typeof entry.resolvedEvidence === "object") {
 		evidenceKind = entry.resolvedEvidence.kind ?? null;
 	}
 	const lines = [
 		`### ${entry.id}  [${entry.severity}]  ${entry.kind}`,
 		`- filed: ${entry.ts}`,
 		entry.resolvedAt
 			? `- resolved: ${entry.resolvedAt}  (evidence.kind=${evidenceKind ?? "(none)"})`
 			: `- status: OPEN`,
 		`- summary: ${(entry.summary ?? "").slice(0, 600)}`,
 	];
 	if (entry.suggestedFix) {
 		lines.push(`- suggestedFix: ${entry.suggestedFix.slice(0, 400)}`);
 	}
 	return lines.filter(Boolean).join("\n");
 }
 function renderCommitBrief(commit) {
 	const summary = commit.subject;
 	const bodyLine = commit.body
 		? ` — ${commit.body.split("\n")[0].slice(0, 200)}`
 		: "";
 	return `- ${commit.sha.slice(0, 9)}  ${commit.ts.slice(0, 10)}  ${summary}${bodyLine}`;
 }
 function renderMilestoneSnapshot(snapshot) {
 	if (!snapshot || snapshot.length === 0) return "  (no milestone state)";
 	return snapshot
 		.map((m) => {
 			const sliceLines = m.slices
 				.map((s) => {
 					const taskCounts = s.tasks.length;
 					const doneTasks = s.tasks.filter(
 						(t) => t.status === "complete" || t.status === "done",
 					).length;
 					return `    - ${s.id} [${s.status}]  tasks=${doneTasks}/${taskCounts}`;
 				})
 				.join("\n");
 			return `  - ${m.id} [${m.status}]\n${sliceLines}`;
 		})
 		.join("\n");
 }
 function renderValidations(validations) {
 	if (!validations || validations.length === 0)
 		return "  (no validation files)";
 	return validations
 		.map((v) => `  - ${v.milestoneId}/${v.file}  verdict=${v.verdict ?? "?"}`)
 		.join("\n");
 }
 /**
 * Render the corpus into a markdown brief the reflection LLM consumes in
 * one turn. The prompt template (prompts/reflection-pass.md) ends with the
 * placeholder {{corpus}} which gets replaced with this output.
 */
 export function renderReflectionCorpusBrief(corpus) {
 	const sections = [];
 	sections.push(`## Corpus snapshot — generated ${corpus.generatedAt}`);
 	sections.push(`Project: ${corpus.basePath}`);
 	sections.push(
 		`Lookback for resolved entries: ${corpus.lookbackDays} days`,
 	);
 	sections.push("");
 	sections.push(`## Open self-feedback entries (${corpus.openEntries.length})`);
 	if (corpus.openEntries.length === 0) {
 		sections.push("(none)");
 	} else {
 		for (const e of corpus.openEntries) sections.push(renderEntryBrief(e));
 	}
 	sections.push("");
 	sections.push(
 		`## Recently resolved self-feedback (${corpus.recentResolvedEntries.length}, last ${corpus.lookbackDays}d)`,
 	);
 	if (corpus.recentResolvedEntries.length === 0) {
 		sections.push("(none)");
 	} else {
 		for (const e of corpus.recentResolvedEntries)
 			sections.push(renderEntryBrief(e));
 	}
 	sections.push("");
 	sections.push(`## Recent commits (${corpus.commits.length})`);
 	if (corpus.commits.length === 0) {
 		sections.push("(none — git log unavailable)");
 	} else {
 		for (const c of corpus.commits) sections.push(renderCommitBrief(c));
 	}
 	sections.push("");
 	sections.push("## Milestone state");
 	sections.push(renderMilestoneSnapshot(corpus.milestones));
 	sections.push("");
 	sections.push("## Milestone validation verdicts");
 	sections.push(renderValidations(corpus.validations));
 	sections.push("");
 	sections.push("## Previous reflection report");
 	if (!corpus.previousReport) {
 		sections.push("(no prior report)");
 	} else {
 		sections.push(`File: ${corpus.previousReport.file}`);
 		sections.push("```");
 		sections.push(corpus.previousReport.content.slice(0, 4000));
 		if (corpus.previousReport.content.length > 4000) {
 			sections.push("[... truncated for brief; full file in .sf/reflection/]");
 		}
 		sections.push("```");
 	}
 	return sections.join("\n");
 }
 /**
 * Persist a reflection report to .sf/reflection/<timestamp>-report.md.
 * Returns the absolute path on success, null on failure (best-effort).
 *
 * Consumer: headless-reflect operator surface + future auto handler.
 */
 export function writeReflectionReport(basePath, content) {
 	try {
 		const dir = sfReflectionDir(basePath);
 		mkdirSync(dir, { recursive: true });
 		const ts = new Date().toISOString().replace(/[:.]/g, "-");
 		const path = join(dir, `${ts}-report.md`);
 		writeFileSync(path, content, "utf-8");
 		return path;
 	} catch {
 		return null;
 	}
 }
--- a/src/resources/extensions/sf/tests/reflection.test.mjs
+++ b/src/resources/extensions/sf/tests/reflection.test.mjs
@ -0,0 +1,262 @@
 /**
 * reflection.test.mjs — corpus assembler + brief renderer.
 *
 * Phase 1A scope: validate the data-assembly half of the reflection layer.
 * The LLM-call half lives in headless-reflect.ts and the future
 * autonomous-loop unit handler; those get their own coverage when they land.
 */
 import {
 	mkdirSync,
 	mkdtempSync,
 	rmSync,
 	writeFileSync,
 } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { afterEach, describe, expect, test } from "vitest";
 import {
 	assembleReflectionCorpus,
 	renderReflectionCorpusBrief,
 	writeReflectionReport,
 } from "../reflection.js";
 import {
 	closeDatabase,
 	insertMilestone,
 	insertSlice,
 	insertTask,
 	openDatabase,
 } from "../sf-db.js";
 import { recordSelfFeedback, markResolved } from "../self-feedback.js";
 const tmpDirs = [];
 afterEach(() => {
 	closeDatabase();
 	while (tmpDirs.length > 0) {
 		const dir = tmpDirs.pop();
 		if (dir) rmSync(dir, { recursive: true, force: true });
 	}
 });
 function makeForgeProject() {
 	const dir = mkdtempSync(join(tmpdir(), "sf-reflection-"));
 	tmpDirs.push(dir);
 	mkdirSync(join(dir, ".sf"), { recursive: true });
 	writeFileSync(
 		join(dir, "package.json"),
 		JSON.stringify({ name: "singularity-forge" }),
 	);
 	openDatabase(join(dir, ".sf", "sf.db"));
 	return dir;
 }
 function seedMilestone(dir, milestoneId, slices) {
 	insertMilestone({ id: milestoneId, title: milestoneId, status: "active" });
 	for (const slice of slices) {
 		insertSlice({
 			milestoneId,
 			id: slice.id,
 			title: slice.id,
 			status: slice.status,
 			risk: "medium",
 			sequence: 1,
 		});
 		for (const task of slice.tasks ?? []) {
 			insertTask({
 				milestoneId,
 				sliceId: slice.id,
 				id: task.id,
 				title: task.id,
 				status: task.status,
 				description: "",
 				estimate: "",
 				files: [],
 				sequence: 1,
 			});
 		}
 	}
 }
 function writeValidation(dir, milestoneId, verdict, body) {
 	const mdir = join(dir, ".sf", "milestones", milestoneId);
 	mkdirSync(mdir, { recursive: true });
 	writeFileSync(
 		join(mdir, `${milestoneId}-VALIDATION.md`),
 		`---\nverdict: ${verdict}\n---\n\n${body}\n`,
 	);
 }
 describe("assembleReflectionCorpus", () => {
 	test("returns null when DB is not open", () => {
 		// no makeForgeProject → no openDatabase → DB not available
 		const corpus = assembleReflectionCorpus("/tmp/does-not-exist");
 		expect(corpus).toBe(null);
 	});
 	test("collects open and recent-resolved forge entries", () => {
 		const dir = makeForgeProject();
 		// open entry
 		const openRes = recordSelfFeedback(
 			{ kind: "gap:foo", severity: "medium", summary: "open one" },
 			dir,
 		);
 		// resolved entry (today, well within lookback)
 		const resolvedRes = recordSelfFeedback(
 			{
 				kind: "architecture-defect:bar",
 				severity: "high",
 				summary: "resolved one",
 			},
 			dir,
 		);
 		markResolved(
 			resolvedRes.entry.id,
 			{
 				reason: "fixed",
 				evidence: { kind: "agent-fix", commitSha: "abc1234" },
 			},
 			dir,
 		);
 		const corpus = assembleReflectionCorpus(dir);
 		expect(corpus).not.toBe(null);
 		expect(corpus.openEntries).toHaveLength(1);
 		expect(corpus.openEntries[0].id).toBe(openRes.entry.id);
 		expect(corpus.recentResolvedEntries).toHaveLength(1);
 		expect(corpus.recentResolvedEntries[0].id).toBe(resolvedRes.entry.id);
 		expect(corpus.recentResolvedEntries[0].resolvedEvidence?.kind).toBe(
 			"agent-fix",
 		);
 	});
 	test("excludes resolutions older than the lookback window", async () => {
 		const dir = makeForgeProject();
 		// Seed an entry, mark it resolved, then back-date its resolved_at
 		// directly via DB so it falls outside the 30-day lookback.
 		const filed = recordSelfFeedback(
 			{ kind: "gap:old", severity: "low", summary: "old one" },
 			dir,
 		);
 		markResolved(
 			filed.entry.id,
 			{ reason: "old fix", evidence: { kind: "human-clear" } },
 			dir,
 		);
 		// Back-date 60 days. Use the same DB adapter via dynamic ESM import.
 		const oldTs = new Date(
 			Date.now() - 60 * 24 * 60 * 60 * 1000,
 		).toISOString();
 		const { _getAdapter } = await import("../sf-db/sf-db-core.js");
 		_getAdapter()
 			.prepare("UPDATE self_feedback SET resolved_at = :ts WHERE id = :id")
 			.run({ ":ts": oldTs, ":id": filed.entry.id });
 		const corpus = assembleReflectionCorpus(dir);
 		expect(corpus.openEntries).toHaveLength(0);
 		expect(corpus.recentResolvedEntries).toHaveLength(0);
 	});
 	test("captures milestone state and per-slice task counts", () => {
 		const dir = makeForgeProject();
 		seedMilestone(dir, "M001", [
 			{
 				id: "S01",
 				status: "complete",
 				tasks: [
 					{ id: "T01", status: "complete" },
 					{ id: "T02", status: "complete" },
 				],
 			},
 			{
 				id: "S02",
 				status: "pending",
 				tasks: [{ id: "T01", status: "pending" }],
 			},
 		]);
 		const corpus = assembleReflectionCorpus(dir);
 		expect(corpus.milestones).toHaveLength(1);
 		const m = corpus.milestones[0];
 		expect(m.id).toBe("M001");
 		expect(m.slices).toHaveLength(2);
 		expect(m.slices[0].id).toBe("S01");
 		expect(m.slices[0].tasks).toHaveLength(2);
 		expect(m.slices[1].tasks).toHaveLength(1);
 	});
 	test("captures milestone validation verdicts", () => {
 		const dir = makeForgeProject();
 		writeValidation(
 			dir,
 			"M001",
 			"needs-attention",
 			"S02-S05 still pending per this stale file",
 		);
 		writeValidation(dir, "M002", "pass", "All criteria met");
 		const corpus = assembleReflectionCorpus(dir);
 		expect(corpus.validations).toHaveLength(2);
 		const verdictByMilestone = Object.fromEntries(
 			corpus.validations.map((v) => [v.milestoneId, v.verdict]),
 		);
 		expect(verdictByMilestone.M001).toBe("needs-attention");
 		expect(verdictByMilestone.M002).toBe("pass");
 	});
 	test("returns commits as an array (best-effort, may be empty in tmpdir)", () => {
 		const dir = makeForgeProject();
 		const corpus = assembleReflectionCorpus(dir);
 		// tmpdir is not a git repo so git log fails → []. Any value other than
 		// an array would mean the helper threw out, which it must never.
 		expect(Array.isArray(corpus.commits)).toBe(true);
 	});
 });
 describe("renderReflectionCorpusBrief", () => {
 	test("renders all major sections even with empty inputs", () => {
 		const dir = makeForgeProject();
 		const corpus = assembleReflectionCorpus(dir);
 		const brief = renderReflectionCorpusBrief(corpus);
 		expect(brief).toContain("Open self-feedback entries");
 		expect(brief).toContain("Recently resolved self-feedback");
 		expect(brief).toContain("Recent commits");
 		expect(brief).toContain("Milestone state");
 		expect(brief).toContain("Milestone validation verdicts");
 		expect(brief).toContain("Previous reflection report");
 	});
 	test("includes entry id, severity, and kind in brief", () => {
 		const dir = makeForgeProject();
 		recordSelfFeedback(
 			{
 				kind: "gap:visible-in-brief",
 				severity: "high",
 				summary: "must appear",
 			},
 			dir,
 		);
 		const corpus = assembleReflectionCorpus(dir);
 		const brief = renderReflectionCorpusBrief(corpus);
 		expect(brief).toMatch(/sf-[\w-]+/);
 		expect(brief).toContain("gap:visible-in-brief");
 		expect(brief).toContain("[high]");
 		expect(brief).toContain("must appear");
 	});
 });
 describe("writeReflectionReport", () => {
 	test("writes a timestamped report to .sf/reflection/", () => {
 		const dir = makeForgeProject();
 		const path = writeReflectionReport(
 			dir,
 			"# Test reflection\n\nSome content.\n",
 		);
 		expect(path).toBeTruthy();
 		expect(path).toMatch(/-report\.md$/);
 		// And next-pass should be able to read it as the previous report
 		const corpus = assembleReflectionCorpus(dir);
 		expect(corpus.previousReport).toBeTruthy();
 		expect(corpus.previousReport.content).toContain("Test reflection");
 	});
 });