feat(reflection): wire LLM dispatch (sf headless reflect --run)

Phase 1B of the reflection layer: complete the operator-driven loop by adding actual LLM dispatch. Phase 1A (commit e161a59e2) shipped the corpus assembler + prompt template + the prompt-emit operator surface. This commit wires the dispatch end so `sf headless reflect --run` produces a real report on disk without manual model piping. Why shell-out to the gemini CLI and not SF's provider abstraction: reflection is a single-prompt one-shot inference. Going through SF's full agent dispatch would require a session, model registry, tool registration, recovery shell — overkill for "render this prompt, capture text." The gemini CLI handles auth (~/.gemini/oauth_creds.json), Code Assist project discovery, and protocol drift on its behalf. Subprocess cost is paid once per reflection (rare). Implementation: - reflection.js: runGeminiReflection(prompt, options) spawns `gemini --yolo --model <model> -p "<directive>"` and pipes the giant rendered template via stdin (gemini -p reads stdin and appends). Returns { ok, content, cleanFinish, exitCode, error, stderr }; never throws. Defaults to gemini-3-pro-preview (0% used on AI Ultra, strongest agentic model with quota). 8-minute timeout. cleanFinish detected by REFLECTION_COMPLETE terminator (emitted by the prompt template's output contract) — operator gets a warning when the report is truncated. - headless-reflect.ts: --run flag triggers dispatch + report write via writeReflectionReport. --model overrides the default. Errors surface as JSON or text per --json. Successful runs emit the report path on stdout; failures emit error + truncated stderr. - help-text.ts: documents --run and --model flags. - Tests (4 new, 13 total): use a fake `gemini` binary on PATH to exercise the spawn path without real OAuth/network — covers ok+cleanFinish, non-zero exit, hang/timeout, missing-terminator. All 1538 SF extension tests pass; typecheck clean. Phase 2 follow-up (still gated on sf-mp4rxkwb-l4baga triage-not-a-first-class-unit-type landing): reflection-pass becomes a real autonomous-loop unit type, milestone-close auto-triggers it, the report's `Recommended new self-feedback entries` section gets parsed and the entries auto-filed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 04:33:16 +02:00 · 2026-05-14 04:33:16 +02:00 · 62b19d7ba4
commit 62b19d7ba4
parent e161a59e2f
5 changed files with 296 additions and 3 deletions
--- a/src/headless-reflect.ts
+++ b/src/headless-reflect.ts
@ -52,6 +52,8 @@ function sfExtensionPath(moduleName: string): string {
 export interface HandleReflectOptions {
 	json?: boolean;
 	run?: boolean;
 	model?: string;
 }
 export interface HandleReflectResult {
@ -89,6 +91,18 @@ export async function handleReflect(
 	let mod: {
 		assembleReflectionCorpus: (basePath: string) => unknown;
 		renderReflectionCorpusBrief: (corpus: unknown) => string;
 		writeReflectionReport: (basePath: string, content: string) => string | null;
 		runGeminiReflection: (
 			prompt: string,
 			options?: { model?: string; timeoutMs?: number },
 		) => Promise<{
 			ok: boolean;
 			content?: string;
 			error?: string;
 			cleanFinish?: boolean;
 			stderr?: string;
 			exitCode?: number;
 		}>;
 	};
 	try {
 		mod = (await jiti.import(sfExtensionPath("reflection"))) as typeof mod;
@ -149,6 +163,47 @@ export async function handleReflect(
 	// rendered prompt to pipe into a model. The full template path runs
 	// inside SF when reflection-pass becomes a real unit type.
 	const rendered = promptTemplate.replace("{{corpus}}", brief);
-	process.stdout.write(`${rendered}\n`);
+
 	if (!options.run) {
 		process.stdout.write(`${rendered}\n`);
 		return { exitCode: 0 };
 	}
 	// --run: dispatch the rendered prompt to gemini-cli, capture the report,
 	// persist to .sf/reflection/<ts>-report.md, emit the report path on stdout.
 	process.stderr.write("[reflect] dispatching to gemini-cli (this can take a few minutes)…\n");
 	const result = await mod.runGeminiReflection(rendered, {
 		model: options.model,
 	});
 	if (!result.ok) {
 		const payload = {
 			ok: false,
 			error: result.error ?? "unknown gemini error",
 			exitCode: result.exitCode,
 			stderrTail: (result.stderr ?? "").slice(-500),
 		};
 		process.stdout.write(
 			options.json
 				? `${JSON.stringify(payload)}\n`
 				: `[reflect] failed: ${payload.error}\n`,
 		);
 		return { exitCode: 1 };
 	}
 	const reportPath = mod.writeReflectionReport(cwd, result.content ?? "");
 	const payload = {
 		ok: true,
 		reportPath,
 		cleanFinish: result.cleanFinish === true,
 	};
 	if (options.json) {
 		process.stdout.write(`${JSON.stringify(payload)}\n`);
 	} else {
 		process.stdout.write(`Reflection report written to: ${reportPath}\n`);
 		if (!result.cleanFinish) {
 			process.stderr.write(
 				"[reflect] WARNING: report did not include REFLECTION_COMPLETE terminator — output may be truncated\n",
 			);
 		}
 	}
 	return { exitCode: 0 };
 }
--- a/src/headless.ts
+++ b/src/headless.ts
@ -831,8 +831,18 @@ async function runHeadlessOnce(
 	// reflection unit is a separate follow-up.
 	if (options.command === "reflect") {
 		const wantsJson = options.json || options.commandArgs.includes("--json");
 		const wantsRun = options.commandArgs.includes("--run");
 		const modelIdx = options.commandArgs.indexOf("--model");
 		const model =
 			modelIdx >= 0 && modelIdx + 1 < options.commandArgs.length
 				? options.commandArgs[modelIdx + 1]
 				: undefined;
 		const { handleReflect } = await import("./headless-reflect.js");
-		const result = await handleReflect(process.cwd(), { json: wantsJson });
+		const result = await handleReflect(process.cwd(), {
 			json: wantsJson,
 			run: wantsRun,
 			model,
 		});
 		return { exitCode: result.exitCode, interrupted: false, timedOut: false };
 	}
--- a/src/help-text.ts
+++ b/src/help-text.ts
@ -224,7 +224,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
 		"  new-milestone        Create a milestone from a specification document",
 		"  query                Machine snapshot: JSON state + next dispatch + costs (no LLM)",
 		"  usage                Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)",
-		"  reflect              Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw)",
+		"  reflect              Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw, --run to dispatch to gemini-cli, --model <id> to override)",
 		"",
 		"new-milestone flags:",
 		"  --context <path>     Path to spec/PRD file (use '-' for stdin)",
--- a/src/resources/extensions/sf/reflection.js
+++ b/src/resources/extensions/sf/reflection.js
@ -344,3 +344,109 @@ export function writeReflectionReport(basePath, content) {
 		return null;
 	}
 }
 const REFLECTION_TERMINATOR = "REFLECTION_COMPLETE";
 /**
 * Spawn the gemini CLI to run a reflection pass against the given prompt.
 *
 * Why shell-out and not SF's provider abstraction: the reflection pass is a
 * single-prompt one-shot inference. Going through SF's full agent dispatch
 * would require a session, model registry, tool registration, recovery
 * shell — all overkill for "render this prompt, capture text". The gemini
 * CLI handles auth via OAuth (~/.gemini/oauth_creds.json), Code Assist
 * project discovery, and protocol drift on its behalf. We pay the cost of
 * spawning a subprocess once per reflection, which is rare.
 *
 * The gemini CLI's `-p` flag takes a prompt and `Appended to input on
 * stdin (if any)` — so the giant rendered template goes via stdin and -p
 * carries a tiny directive that orients the model to the contract.
 *
 * Returns { ok, content?, exitCode?, error? }. Best-effort; never throws.
 *
 * Consumer: headless-reflect operator surface (--run flag).
 */
 export async function runGeminiReflection(prompt, options = {}) {
 	const { spawn } = await import("node:child_process");
 	const model = options.model ?? "gemini-3-pro-preview";
 	const timeoutMs = options.timeoutMs ?? 8 * 60 * 1000;
 	return await new Promise((resolve) => {
 		let stdoutBuf = "";
 		let stderrBuf = "";
 		let settled = false;
 		const finish = (result) => {
 			if (settled) return;
 			settled = true;
 			resolve(result);
 		};
 		let proc;
 		try {
 			proc = spawn(
 				"gemini",
 				["--yolo", "--model", model, "-p", "Run the reflection pass per the contract in the prompt below."],
 				{ stdio: ["pipe", "pipe", "pipe"] },
 			);
 		} catch (err) {
 			finish({
 				ok: false,
 				error: `gemini spawn failed: ${err instanceof Error ? err.message : String(err)}`,
 			});
 			return;
 		}
 		const timer = setTimeout(() => {
 			try {
 				proc.kill("SIGTERM");
 			} catch {
 				/* ignore */
 			}
 			finish({
 				ok: false,
 				error: `gemini timed out after ${timeoutMs}ms`,
 				stderr: stderrBuf,
 			});
 		}, timeoutMs);
 		proc.stdout.on("data", (chunk) => {
 			stdoutBuf += chunk.toString("utf-8");
 		});
 		proc.stderr.on("data", (chunk) => {
 			stderrBuf += chunk.toString("utf-8");
 		});
 		proc.on("error", (err) => {
 			clearTimeout(timer);
 			finish({
 				ok: false,
 				error: `gemini process error: ${err.message}`,
 				stderr: stderrBuf,
 			});
 		});
 		proc.on("close", (code) => {
 			clearTimeout(timer);
 			if (code !== 0) {
 				finish({
 					ok: false,
 					exitCode: code,
 					error: `gemini exited with code ${code}`,
 					stderr: stderrBuf,
 					content: stdoutBuf,
 				});
 				return;
 			}
 			finish({
 				ok: true,
 				exitCode: 0,
 				content: stdoutBuf,
 				cleanFinish: stdoutBuf.includes(REFLECTION_TERMINATOR),
 			});
 		});
 		try {
 			proc.stdin.write(prompt);
 			proc.stdin.end();
 		} catch (err) {
 			clearTimeout(timer);
 			finish({
 				ok: false,
 				error: `gemini stdin write failed: ${err instanceof Error ? err.message : String(err)}`,
 			});
 		}
 	});
 }
--- a/src/resources/extensions/sf/tests/reflection.test.mjs
+++ b/src/resources/extensions/sf/tests/reflection.test.mjs
@ -260,3 +260,125 @@ describe("writeReflectionReport", () => {
 		expect(corpus.previousReport.content).toContain("Test reflection");
 	});
 });
 describe("runGeminiReflection", () => {
 	test("returns ok with content + cleanFinish when terminator present", async () => {
 		const { runGeminiReflection } = await import("../reflection.js");
 		// Stub gemini binary by spawning a node script that prints a fake
 		// response and exits 0. We invoke it via a wrapper PATH override —
 		// but the simplest approach: directly call spawn-replacement via
 		// a process.env.PATH prepend to a tmp dir containing a fake gemini.
 		const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-"));
 		tmpDirs.push(fakeBinDir);
 		const fakeGeminiPath = join(fakeBinDir, "gemini");
 		writeFileSync(
 			fakeGeminiPath,
 			`#!/usr/bin/env node
 let stdin = "";
 process.stdin.on("data", (c) => { stdin += c; });
 process.stdin.on("end", () => {
  process.stdout.write("# Reflection\\nReceived " + stdin.length + " chars\\nREFLECTION_COMPLETE\\n");
  process.exit(0);
 });
 `,
 		);
 		// Make it executable
 		const { chmodSync } = await import("node:fs");
 		chmodSync(fakeGeminiPath, 0o755);
 		const originalPath = process.env.PATH;
 		process.env.PATH = `${fakeBinDir}:${originalPath}`;
 		try {
 			const result = await runGeminiReflection("a".repeat(100), {
 				timeoutMs: 5000,
 			});
 			expect(result.ok).toBe(true);
 			expect(result.content).toContain("Received 100 chars");
 			expect(result.cleanFinish).toBe(true);
 		} finally {
 			process.env.PATH = originalPath;
 		}
 	});
 	test("returns ok=false on non-zero exit", async () => {
 		const { runGeminiReflection } = await import("../reflection.js");
 		const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-fail-"));
 		tmpDirs.push(fakeBinDir);
 		const fakeGeminiPath = join(fakeBinDir, "gemini");
 		writeFileSync(
 			fakeGeminiPath,
 			`#!/usr/bin/env node
 process.stderr.write("synthetic fail\\n");
 process.exit(2);
 `,
 		);
 		const { chmodSync } = await import("node:fs");
 		chmodSync(fakeGeminiPath, 0o755);
 		const originalPath = process.env.PATH;
 		process.env.PATH = `${fakeBinDir}:${originalPath}`;
 		try {
 			const result = await runGeminiReflection("ignored", {
 				timeoutMs: 5000,
 			});
 			expect(result.ok).toBe(false);
 			expect(result.exitCode).toBe(2);
 			expect(result.error).toMatch(/exited with code 2/);
 		} finally {
 			process.env.PATH = originalPath;
 		}
 	});
 	test("returns ok=false with timeout error when subprocess hangs", async () => {
 		const { runGeminiReflection } = await import("../reflection.js");
 		const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-hang-"));
 		tmpDirs.push(fakeBinDir);
 		const fakeGeminiPath = join(fakeBinDir, "gemini");
 		writeFileSync(
 			fakeGeminiPath,
 			`#!/usr/bin/env node
 setInterval(() => {}, 1000); // hang
 `,
 		);
 		const { chmodSync } = await import("node:fs");
 		chmodSync(fakeGeminiPath, 0o755);
 		const originalPath = process.env.PATH;
 		process.env.PATH = `${fakeBinDir}:${originalPath}`;
 		try {
 			const result = await runGeminiReflection("ignored", {
 				timeoutMs: 200,
 			});
 			expect(result.ok).toBe(false);
 			expect(result.error).toMatch(/timed out/);
 		} finally {
 			process.env.PATH = originalPath;
 		}
 	});
 	test("flags cleanFinish=false when terminator absent", async () => {
 		const { runGeminiReflection } = await import("../reflection.js");
 		const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-trunc-"));
 		tmpDirs.push(fakeBinDir);
 		const fakeGeminiPath = join(fakeBinDir, "gemini");
 		writeFileSync(
 			fakeGeminiPath,
 			`#!/usr/bin/env node
 process.stdout.write("# Truncated\\nNo terminator here.\\n");
 process.exit(0);
 `,
 		);
 		const { chmodSync } = await import("node:fs");
 		chmodSync(fakeGeminiPath, 0o755);
 		const originalPath = process.env.PATH;
 		process.env.PATH = `${fakeBinDir}:${originalPath}`;
 		try {
 			const result = await runGeminiReflection("ignored", { timeoutMs: 5000 });
 			expect(result.ok).toBe(true);
 			expect(result.cleanFinish).toBe(false);
 		} finally {
 			process.env.PATH = originalPath;
 		}
 	});
 });