feat(reflection): wire LLM dispatch (sf headless reflect --run)

Phase 1B of the reflection layer: complete the operator-driven loop by adding actual LLM dispatch. Phase 1A (commit e161a59e2) shipped the corpus assembler + prompt template + the prompt-emit operator surface. This commit wires the dispatch end so `sf headless reflect --run` produces a real report on disk without manual model piping. Why shell-out to the gemini CLI and not SF's provider abstraction: reflection is a single-prompt one-shot inference. Going through SF's full agent dispatch would require a session, model registry, tool registration, recovery shell — overkill for "render this prompt, capture text." The gemini CLI handles auth (~/.gemini/oauth_creds.json), Code Assist project discovery, and protocol drift on its behalf. Subprocess cost is paid once per reflection (rare). Implementation: - reflection.js: runGeminiReflection(prompt, options) spawns `gemini --yolo --model <model> -p "<directive>"` and pipes the giant rendered template via stdin (gemini -p reads stdin and appends). Returns { ok, content, cleanFinish, exitCode, error, stderr }; never throws. Defaults to gemini-3-pro-preview (0% used on AI Ultra, strongest agentic model with quota). 8-minute timeout. cleanFinish detected by REFLECTION_COMPLETE terminator (emitted by the prompt template's output contract) — operator gets a warning when the report is truncated. - headless-reflect.ts: --run flag triggers dispatch + report write via writeReflectionReport. --model overrides the default. Errors surface as JSON or text per --json. Successful runs emit the report path on stdout; failures emit error + truncated stderr. - help-text.ts: documents --run and --model flags. - Tests (4 new, 13 total): use a fake `gemini` binary on PATH to exercise the spawn path without real OAuth/network — covers ok+cleanFinish, non-zero exit, hang/timeout, missing-terminator. All 1538 SF extension tests pass; typecheck clean. Phase 2 follow-up (still gated on sf-mp4rxkwb-l4baga triage-not-a-first-class-unit-type landing): reflection-pass becomes a real autonomous-loop unit type, milestone-close auto-triggers it, the report's `Recommended new self-feedback entries` section gets parsed and the entries auto-filed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 04:33:16 +02:00 · 2026-05-14 04:33:16 +02:00 · 62b19d7ba4
commit 62b19d7ba4
parent e161a59e2f
5 changed files with 296 additions and 3 deletions
--- a/src/headless-reflect.ts
+++ b/src/headless-reflect.ts
@ -52,6 +52,8 @@ function sfExtensionPath(moduleName: string): string {

 export interface HandleReflectOptions {
 	json?: boolean;
+	run?: boolean;
+	model?: string;
 }

 export interface HandleReflectResult {
@ -89,6 +91,18 @@ export async function handleReflect(
 	let mod: {
 		assembleReflectionCorpus: (basePath: string) => unknown;
 		renderReflectionCorpusBrief: (corpus: unknown) => string;
+		writeReflectionReport: (basePath: string, content: string) => string | null;
+		runGeminiReflection: (
+			prompt: string,
+			options?: { model?: string; timeoutMs?: number },
+		) => Promise<{
+			ok: boolean;
+			content?: string;
+			error?: string;
+			cleanFinish?: boolean;
+			stderr?: string;
+			exitCode?: number;
+		}>;
 	};
 	try {
 		mod = (await jiti.import(sfExtensionPath("reflection"))) as typeof mod;
@ -149,6 +163,47 @@ export async function handleReflect(
 	// rendered prompt to pipe into a model. The full template path runs
 	// inside SF when reflection-pass becomes a real unit type.
 	const rendered = promptTemplate.replace("{{corpus}}", brief);
-	process.stdout.write(`${rendered}\n`);
+
+	if (!options.run) {
+		process.stdout.write(`${rendered}\n`);
+		return { exitCode: 0 };
+	}
+
+	// --run: dispatch the rendered prompt to gemini-cli, capture the report,
+	// persist to .sf/reflection/<ts>-report.md, emit the report path on stdout.
+	process.stderr.write("[reflect] dispatching to gemini-cli (this can take a few minutes)…\n");
+	const result = await mod.runGeminiReflection(rendered, {
+		model: options.model,
+	});
+	if (!result.ok) {
+		const payload = {
+			ok: false,
+			error: result.error ?? "unknown gemini error",
+			exitCode: result.exitCode,
+			stderrTail: (result.stderr ?? "").slice(-500),
+		};
+		process.stdout.write(
+			options.json
+				? `${JSON.stringify(payload)}\n`
+				: `[reflect] failed: ${payload.error}\n`,
+		);
+		return { exitCode: 1 };
+	}
+	const reportPath = mod.writeReflectionReport(cwd, result.content ?? "");
+	const payload = {
+		ok: true,
+		reportPath,
+		cleanFinish: result.cleanFinish === true,
+	};
+	if (options.json) {
+		process.stdout.write(`${JSON.stringify(payload)}\n`);
+	} else {
+		process.stdout.write(`Reflection report written to: ${reportPath}\n`);
+		if (!result.cleanFinish) {
+			process.stderr.write(
+				"[reflect] WARNING: report did not include REFLECTION_COMPLETE terminator — output may be truncated\n",
+			);
+		}
+	}
 	return { exitCode: 0 };
 }
--- a/src/headless.ts
+++ b/src/headless.ts
@ -831,8 +831,18 @@ async function runHeadlessOnce(
 	// reflection unit is a separate follow-up.
 	if (options.command === "reflect") {
 		const wantsJson = options.json || options.commandArgs.includes("--json");
+		const wantsRun = options.commandArgs.includes("--run");
+		const modelIdx = options.commandArgs.indexOf("--model");
+		const model =
+			modelIdx >= 0 && modelIdx + 1 < options.commandArgs.length
+				? options.commandArgs[modelIdx + 1]
+				: undefined;
 		const { handleReflect } = await import("./headless-reflect.js");
-		const result = await handleReflect(process.cwd(), { json: wantsJson });
+		const result = await handleReflect(process.cwd(), {
+			json: wantsJson,
+			run: wantsRun,
+			model,
+		});
 		return { exitCode: result.exitCode, interrupted: false, timedOut: false };
 	}

--- a/src/help-text.ts
+++ b/src/help-text.ts
@ -224,7 +224,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
 		"  new-milestone        Create a milestone from a specification document",
 		"  query                Machine snapshot: JSON state + next dispatch + costs (no LLM)",
 		"  usage                Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)",
-		"  reflect              Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw)",
+		"  reflect              Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw, --run to dispatch to gemini-cli, --model <id> to override)",
 		"",
 		"new-milestone flags:",
 		"  --context <path>     Path to spec/PRD file (use '-' for stdin)",
--- a/src/resources/extensions/sf/reflection.js
+++ b/src/resources/extensions/sf/reflection.js
@ -344,3 +344,109 @@ export function writeReflectionReport(basePath, content) {
 		return null;
 	}
 }
+
+const REFLECTION_TERMINATOR = "REFLECTION_COMPLETE";
+
+/**
+ * Spawn the gemini CLI to run a reflection pass against the given prompt.
+ *
+ * Why shell-out and not SF's provider abstraction: the reflection pass is a
+ * single-prompt one-shot inference. Going through SF's full agent dispatch
+ * would require a session, model registry, tool registration, recovery
+ * shell — all overkill for "render this prompt, capture text". The gemini
+ * CLI handles auth via OAuth (~/.gemini/oauth_creds.json), Code Assist
+ * project discovery, and protocol drift on its behalf. We pay the cost of
+ * spawning a subprocess once per reflection, which is rare.
+ *
+ * The gemini CLI's `-p` flag takes a prompt and `Appended to input on
+ * stdin (if any)` — so the giant rendered template goes via stdin and -p
+ * carries a tiny directive that orients the model to the contract.
+ *
+ * Returns { ok, content?, exitCode?, error? }. Best-effort; never throws.
+ *
+ * Consumer: headless-reflect operator surface (--run flag).
+ */
+export async function runGeminiReflection(prompt, options = {}) {
+	const { spawn } = await import("node:child_process");
+	const model = options.model ?? "gemini-3-pro-preview";
+	const timeoutMs = options.timeoutMs ?? 8 * 60 * 1000;
+	return await new Promise((resolve) => {
+		let stdoutBuf = "";
+		let stderrBuf = "";
+		let settled = false;
+		const finish = (result) => {
+			if (settled) return;
+			settled = true;
+			resolve(result);
+		};
+		let proc;
+		try {
+			proc = spawn(
+				"gemini",
+				["--yolo", "--model", model, "-p", "Run the reflection pass per the contract in the prompt below."],
+				{ stdio: ["pipe", "pipe", "pipe"] },
+			);
+		} catch (err) {
+			finish({
+				ok: false,
+				error: `gemini spawn failed: ${err instanceof Error ? err.message : String(err)}`,
+			});
+			return;
+		}
+		const timer = setTimeout(() => {
+			try {
+				proc.kill("SIGTERM");
+			} catch {
+				/* ignore */
+			}
+			finish({
+				ok: false,
+				error: `gemini timed out after ${timeoutMs}ms`,
+				stderr: stderrBuf,
+			});
+		}, timeoutMs);
+		proc.stdout.on("data", (chunk) => {
+			stdoutBuf += chunk.toString("utf-8");
+		});
+		proc.stderr.on("data", (chunk) => {
+			stderrBuf += chunk.toString("utf-8");
+		});
+		proc.on("error", (err) => {
+			clearTimeout(timer);
+			finish({
+				ok: false,
+				error: `gemini process error: ${err.message}`,
+				stderr: stderrBuf,
+			});
+		});
+		proc.on("close", (code) => {
+			clearTimeout(timer);
+			if (code !== 0) {
+				finish({
+					ok: false,
+					exitCode: code,
+					error: `gemini exited with code ${code}`,
+					stderr: stderrBuf,
+					content: stdoutBuf,
+				});
+				return;
+			}
+			finish({
+				ok: true,
+				exitCode: 0,
+				content: stdoutBuf,
+				cleanFinish: stdoutBuf.includes(REFLECTION_TERMINATOR),
+			});
+		});
+		try {
+			proc.stdin.write(prompt);
+			proc.stdin.end();
+		} catch (err) {
+			clearTimeout(timer);
+			finish({
+				ok: false,
+				error: `gemini stdin write failed: ${err instanceof Error ? err.message : String(err)}`,
+			});
+		}
+	});
+}
--- a/src/resources/extensions/sf/tests/reflection.test.mjs
+++ b/src/resources/extensions/sf/tests/reflection.test.mjs
@ -260,3 +260,125 @@ describe("writeReflectionReport", () => {
 		expect(corpus.previousReport.content).toContain("Test reflection");
 	});
 });
+
+describe("runGeminiReflection", () => {
+	test("returns ok with content + cleanFinish when terminator present", async () => {
+		const { runGeminiReflection } = await import("../reflection.js");
+		// Stub gemini binary by spawning a node script that prints a fake
+		// response and exits 0. We invoke it via a wrapper PATH override —
+		// but the simplest approach: directly call spawn-replacement via
+		// a process.env.PATH prepend to a tmp dir containing a fake gemini.
+		const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-"));
+		tmpDirs.push(fakeBinDir);
+		const fakeGeminiPath = join(fakeBinDir, "gemini");
+		writeFileSync(
+			fakeGeminiPath,
+			`#!/usr/bin/env node
+let stdin = "";
+process.stdin.on("data", (c) => { stdin += c; });
+process.stdin.on("end", () => {
+  process.stdout.write("# Reflection\\nReceived " + stdin.length + " chars\\nREFLECTION_COMPLETE\\n");
+  process.exit(0);
+});
+`,
+		);
+		// Make it executable
+		const { chmodSync } = await import("node:fs");
+		chmodSync(fakeGeminiPath, 0o755);
+
+		const originalPath = process.env.PATH;
+		process.env.PATH = `${fakeBinDir}:${originalPath}`;
+		try {
+			const result = await runGeminiReflection("a".repeat(100), {
+				timeoutMs: 5000,
+			});
+			expect(result.ok).toBe(true);
+			expect(result.content).toContain("Received 100 chars");
+			expect(result.cleanFinish).toBe(true);
+		} finally {
+			process.env.PATH = originalPath;
+		}
+	});
+
+	test("returns ok=false on non-zero exit", async () => {
+		const { runGeminiReflection } = await import("../reflection.js");
+		const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-fail-"));
+		tmpDirs.push(fakeBinDir);
+		const fakeGeminiPath = join(fakeBinDir, "gemini");
+		writeFileSync(
+			fakeGeminiPath,
+			`#!/usr/bin/env node
+process.stderr.write("synthetic fail\\n");
+process.exit(2);
+`,
+		);
+		const { chmodSync } = await import("node:fs");
+		chmodSync(fakeGeminiPath, 0o755);
+
+		const originalPath = process.env.PATH;
+		process.env.PATH = `${fakeBinDir}:${originalPath}`;
+		try {
+			const result = await runGeminiReflection("ignored", {
+				timeoutMs: 5000,
+			});
+			expect(result.ok).toBe(false);
+			expect(result.exitCode).toBe(2);
+			expect(result.error).toMatch(/exited with code 2/);
+		} finally {
+			process.env.PATH = originalPath;
+		}
+	});
+
+	test("returns ok=false with timeout error when subprocess hangs", async () => {
+		const { runGeminiReflection } = await import("../reflection.js");
+		const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-hang-"));
+		tmpDirs.push(fakeBinDir);
+		const fakeGeminiPath = join(fakeBinDir, "gemini");
+		writeFileSync(
+			fakeGeminiPath,
+			`#!/usr/bin/env node
+setInterval(() => {}, 1000); // hang
+`,
+		);
+		const { chmodSync } = await import("node:fs");
+		chmodSync(fakeGeminiPath, 0o755);
+
+		const originalPath = process.env.PATH;
+		process.env.PATH = `${fakeBinDir}:${originalPath}`;
+		try {
+			const result = await runGeminiReflection("ignored", {
+				timeoutMs: 200,
+			});
+			expect(result.ok).toBe(false);
+			expect(result.error).toMatch(/timed out/);
+		} finally {
+			process.env.PATH = originalPath;
+		}
+	});
+
+	test("flags cleanFinish=false when terminator absent", async () => {
+		const { runGeminiReflection } = await import("../reflection.js");
+		const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-trunc-"));
+		tmpDirs.push(fakeBinDir);
+		const fakeGeminiPath = join(fakeBinDir, "gemini");
+		writeFileSync(
+			fakeGeminiPath,
+			`#!/usr/bin/env node
+process.stdout.write("# Truncated\\nNo terminator here.\\n");
+process.exit(0);
+`,
+		);
+		const { chmodSync } = await import("node:fs");
+		chmodSync(fakeGeminiPath, 0o755);
+
+		const originalPath = process.env.PATH;
+		process.env.PATH = `${fakeBinDir}:${originalPath}`;
+		try {
+			const result = await runGeminiReflection("ignored", { timeoutMs: 5000 });
+			expect(result.ok).toBe(true);
+			expect(result.cleanFinish).toBe(false);
+		} finally {
+			process.env.PATH = originalPath;
+		}
+	});
+});