refactor(reflection): route through @singularity-forge/ai, drop subprocess + gemini hardcoding

User-correctable architecture defect: runGeminiReflection shelled out to the `gemini` CLI binary and hardcoded the gemini provider, duplicating auth discovery and disconnecting the call from SF's metrics, cost accounting, and provider abstraction. Should have routed through the existing @singularity-forge/ai layer from the start. Replace runGeminiReflection with runReflection that: - Resolves an operator-supplied "provider/modelId" string via @singularity-forge/ai's getModel (the canonical accessor for the runtime model registry — MODELS itself isn't re-exported). - Calls completeSimple from @singularity-forge/ai. Same provider routing every other SF LLM call uses (anthropic, openai, google-gemini-cli, openai-codex-responses, mistral, etc.). No subprocess. - Default model is google-gemini-cli/gemini-3-pro-preview because that matches the operator's primary AI Ultra tier — but the default lives in a single named constant (DEFAULT_REFLECTION_MODEL), no provider hardcoding in the call path. Operators override per-call via --model. - Returns { ok, content?, cleanFinish?, error?, provider, modelId } for observability into which provider actually answered. runGeminiReflection kept as an alias for back-compat so the existing headless-reflect.ts caller works unchanged. New code should use runReflection directly. Tests: switched from a fake-gemini-binary-on-PATH approach (5 tests) to a clean dependency-injection approach via options.complete (5 tests + 1 new "rejects bare model strings"). Mock returns AssistantMessage shape directly, no subprocess machinery. Two pre-existing migration test failures in sf-db-migration.test.mjs (openDatabase_migrates_v27, openDatabase_v52_db_heals_routing_history) are unaffected by this commit — they fail in isolation too, likely related to commit 7570aac4b's routing-metrics track. Logged here so the 1589/1591 pass rate is auditable. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 05:11:07 +02:00 · 2026-05-14 05:11:07 +02:00 · 6a88ad2f00
commit 6a88ad2f00
parent 21d9054611
2 changed files with 248 additions and 195 deletions
--- a/src/resources/extensions/sf/reflection.js
+++ b/src/resources/extensions/sf/reflection.js
@ -348,105 +348,177 @@ export function writeReflectionReport(basePath, content) {
 const REFLECTION_TERMINATOR = "REFLECTION_COMPLETE";

 /**
- * Spawn the gemini CLI to run a reflection pass against the given prompt.
+ * Default provider/model used when --model is not supplied. This is the
+ * single point of model defaulting — the rest of runReflection is
+ * provider-agnostic and routes through @singularity-forge/ai's completeSimple.
 *
- * Why shell-out and not SF's provider abstraction: the reflection pass is a
- * single-prompt one-shot inference. Going through SF's full agent dispatch
- * would require a session, model registry, tool registration, recovery
- * shell — all overkill for "render this prompt, capture text". The gemini
- * CLI handles auth via OAuth (~/.gemini/oauth_creds.json), Code Assist
- * project discovery, and protocol drift on its behalf. We pay the cost of
- * spawning a subprocess once per reflection, which is rare.
+ * Today's default is google-gemini-cli/gemini-3-pro-preview because that's
+ * what the operator's persistent AI Ultra session is wired to and reflection
+ * is a reasoning-heavy task that benefits from the pro tier. Operators
+ * override per-call via --model. A future change can let SF's
+ * persisted-default-model resolution feed this in, eliminating the constant.
+ */
+const DEFAULT_REFLECTION_MODEL = "google-gemini-cli/gemini-3-pro-preview";
+
+/**
+ * Extract assistant text from an AssistantMessage. The response is an
+ * array of TextContent | ThinkingContent | ToolCall | ... — we concatenate
+ * all text parts so multi-part responses come through intact. Thinking and
+ * tool-use parts are ignored — reflection produces text only by contract.
+ */
+function extractAssistantText(message) {
+	const content = message?.content;
+	if (!Array.isArray(content)) return "";
+	const out = [];
+	for (const part of content) {
+		if (part && typeof part === "object" && part.type === "text") {
+			if (typeof part.text === "string") out.push(part.text);
+		}
+	}
+	return out.join("");
+}
+
+/**
+ * Parse a "provider/modelId" string into [provider, modelId]. Returns null
+ * when the input isn't in that shape. We require the explicit
+ * provider/model form so reflection never accidentally picks a model from
+ * the wrong provider (e.g. anthropic claude-sonnet-4-6 when the operator
+ * meant google-gemini-cli/gemini-3-pro-preview).
+ */
+function parseModelString(input) {
+	if (typeof input !== "string") return null;
+	const slash = input.indexOf("/");
+	if (slash <= 0 || slash === input.length - 1) return null;
+	return [input.slice(0, slash), input.slice(slash + 1)];
+}
+
+/**
+ * Resolve a "provider/modelId" string to a Model object via
+ * @singularity-forge/ai's getModel. Returns null when the model isn't
+ * registered.
 *
- * The gemini CLI's `-p` flag takes a prompt and `Appended to input on
- * stdin (if any)` — so the giant rendered template goes via stdin and -p
- * carries a tiny directive that orients the model to the contract.
+ * Why getModel and not MODELS directly: the static MODELS catalog is
+ * imported into models.ts but not re-exported from the package's public
+ * API. getModel is the canonical accessor and pulls from the runtime
+ * model-registry which already merges generated MODELS + custom MODELS.
+ */
+async function resolveModel(providerModelString) {
+	const parsed = parseModelString(providerModelString);
+	if (!parsed) return null;
+	const [provider, modelId] = parsed;
+	const ai = await import("@singularity-forge/ai");
+	if (typeof ai.getModel !== "function") return null;
+	try {
+		const model = ai.getModel(provider, modelId);
+		return model ?? null;
+	} catch {
+		return null;
+	}
+}
+
+/**
+ * Run a reflection pass against the given prompt.
 *
- * Returns { ok, content?, exitCode?, error? }. Best-effort; never throws.
+ * Provider-agnostic: routes through @singularity-forge/ai's completeSimple,
+ * which dispatches via the same provider registry SF uses for every other
+ * LLM call (anthropic, openai, google-gemini-cli, openai-codex-responses,
+ * mistral, etc.). The model is chosen by the operator's --model flag
+ * (which arrives via options.model) and resolved against the @singularity-
+ * forge/ai MODELS catalog. No subprocess, no hardcoded provider in the
+ * call path.
+ *
+ * options.model — "provider/modelId" string (e.g. "google-gemini-cli/
+ * gemini-3-pro-preview", "anthropic/claude-sonnet-4-6"). Defaults to
+ * DEFAULT_REFLECTION_MODEL if unset.
+ *
+ * options.complete — dependency injection for tests. When supplied, it is
+ * called instead of the imported completeSimple, with the same (model,
+ * context, options) signature.
+ *
+ * Returns { ok, content?, cleanFinish?, error? }. Best-effort; never throws.
 *
 * Consumer: headless-reflect operator surface (--run flag).
 */
-export async function runGeminiReflection(prompt, options = {}) {
-	const { spawn } = await import("node:child_process");
-	const model = options.model ?? "gemini-3-pro-preview";
+export async function runReflection(prompt, options = {}) {
+	const modelString = options.model ?? DEFAULT_REFLECTION_MODEL;
 	const timeoutMs = options.timeoutMs ?? 8 * 60 * 1000;
-	return await new Promise((resolve) => {
-		let stdoutBuf = "";
-		let stderrBuf = "";
-		let settled = false;
-		const finish = (result) => {
-			if (settled) return;
-			settled = true;
-			resolve(result);
+
+	let model;
+	try {
+		model = await resolveModel(modelString);
+	} catch (err) {
+		return {
+			ok: false,
+			error: `failed to load model catalog: ${err instanceof Error ? err.message : String(err)}`,
 		};
-		let proc;
+	}
+	if (!model) {
+		return {
+			ok: false,
+			error: `unknown model "${modelString}" — expected "provider/modelId" with a model registered in @singularity-forge/ai MODELS`,
+		};
+	}
+
+	// completeSimple takes a Context (systemPrompt + messages). The rendered
+	// reflection template already contains both the system-style contract
+	// (working-directory + reasoning protocol + output contract) and the
+	// corpus brief, so we send it as a single user message rather than
+	// splitting it — the prompt template is designed as one self-contained
+	// brief for the LLM.
+	const context = {
+		systemPrompt: undefined,
+		messages: [
+			{
+				role: "user",
+				content: [{ type: "text", text: prompt }],
+				timestamp: Date.now(),
+			},
+		],
+	};
+
+	const completeFn =
+		options.complete ??
+		(await (async () => {
+			const ai = await import("@singularity-forge/ai");
+			return ai.completeSimple;
+		})());
+
+	const callPromise = (async () => {
 		try {
-			proc = spawn(
-				"gemini",
-				["--yolo", "--model", model, "-p", "Run the reflection pass per the contract in the prompt below."],
-				{ stdio: ["pipe", "pipe", "pipe"] },
-			);
+			const message = await completeFn(model, context, {});
+			const content = extractAssistantText(message);
+			return {
+				ok: true,
+				content,
+				cleanFinish: content.includes(REFLECTION_TERMINATOR),
+				provider: model.provider,
+				modelId: model.id,
+			};
 		} catch (err) {
-			finish({
+			return {
 				ok: false,
-				error: `gemini spawn failed: ${err instanceof Error ? err.message : String(err)}`,
-			});
-			return;
+				error: `provider call failed: ${err instanceof Error ? err.message : String(err)}`,
+				provider: model.provider,
+				modelId: model.id,
+			};
 		}
-		const timer = setTimeout(() => {
-			try {
-				proc.kill("SIGTERM");
-			} catch {
-				/* ignore */
-			}
-			finish({
+	})();
+
+	const timeoutPromise = new Promise((resolve) => {
+		setTimeout(() => {
+			resolve({
 				ok: false,
-				error: `gemini timed out after ${timeoutMs}ms`,
-				stderr: stderrBuf,
+				error: `reflection call timed out after ${timeoutMs}ms`,
+				provider: model.provider,
+				modelId: model.id,
 			});
 		}, timeoutMs);
-		proc.stdout.on("data", (chunk) => {
-			stdoutBuf += chunk.toString("utf-8");
-		});
-		proc.stderr.on("data", (chunk) => {
-			stderrBuf += chunk.toString("utf-8");
-		});
-		proc.on("error", (err) => {
-			clearTimeout(timer);
-			finish({
-				ok: false,
-				error: `gemini process error: ${err.message}`,
-				stderr: stderrBuf,
-			});
-		});
-		proc.on("close", (code) => {
-			clearTimeout(timer);
-			if (code !== 0) {
-				finish({
-					ok: false,
-					exitCode: code,
-					error: `gemini exited with code ${code}`,
-					stderr: stderrBuf,
-					content: stdoutBuf,
-				});
-				return;
-			}
-			finish({
-				ok: true,
-				exitCode: 0,
-				content: stdoutBuf,
-				cleanFinish: stdoutBuf.includes(REFLECTION_TERMINATOR),
-			});
-		});
-		try {
-			proc.stdin.write(prompt);
-			proc.stdin.end();
-		} catch (err) {
-			clearTimeout(timer);
-			finish({
-				ok: false,
-				error: `gemini stdin write failed: ${err instanceof Error ? err.message : String(err)}`,
-			});
-		}
 	});
+
+	return Promise.race([callPromise, timeoutPromise]);
 }
+
+// Back-compat alias: callers wired against the old spawn-based name still
+// work, but they pick up the new provider-agnostic implementation. New code
+// should use runReflection directly.
+export const runGeminiReflection = runReflection;
--- a/src/resources/extensions/sf/tests/reflection.test.mjs
+++ b/src/resources/extensions/sf/tests/reflection.test.mjs
@ -261,124 +261,105 @@ describe("writeReflectionReport", () => {
 	});
 });

-describe("runGeminiReflection", () => {
+describe("runReflection", () => {
+	function fakeAssistantMessage(text) {
+		return {
+			role: "assistant",
+			content: [{ type: "text", text }],
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			model: "gemini-3-pro-preview",
+			usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
+			stopReason: "stop",
+			timestamp: Date.now(),
+		};
+	}
+
 	test("returns ok with content + cleanFinish when terminator present", async () => {
-		const { runGeminiReflection } = await import("../reflection.js");
-		// Stub gemini binary by spawning a node script that prints a fake
-		// response and exits 0. We invoke it via a wrapper PATH override —
-		// but the simplest approach: directly call spawn-replacement via
-		// a process.env.PATH prepend to a tmp dir containing a fake gemini.
-		const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-"));
-		tmpDirs.push(fakeBinDir);
-		const fakeGeminiPath = join(fakeBinDir, "gemini");
-		writeFileSync(
-			fakeGeminiPath,
-			`#!/usr/bin/env node
-let stdin = "";
-process.stdin.on("data", (c) => { stdin += c; });
-process.stdin.on("end", () => {
-  process.stdout.write("# Reflection\\nReceived " + stdin.length + " chars\\nREFLECTION_COMPLETE\\n");
-  process.exit(0);
-});
-`,
-		);
-		// Make it executable
-		const { chmodSync } = await import("node:fs");
-		chmodSync(fakeGeminiPath, 0o755);
-
-		const originalPath = process.env.PATH;
-		process.env.PATH = `${fakeBinDir}:${originalPath}`;
-		try {
-			const result = await runGeminiReflection("a".repeat(100), {
-				timeoutMs: 5000,
-			});
-			expect(result.ok).toBe(true);
-			expect(result.content).toContain("Received 100 chars");
-			expect(result.cleanFinish).toBe(true);
-		} finally {
-			process.env.PATH = originalPath;
-		}
+		const { runReflection } = await import("../reflection.js");
+		const result = await runReflection("a".repeat(100), {
+			complete: async (_model, context) => {
+				const userText = context.messages[0].content[0].text;
+				return fakeAssistantMessage(
+					`# Reflection\nReceived ${userText.length} chars\nREFLECTION_COMPLETE\n`,
+				);
+			},
+			timeoutMs: 5000,
+		});
+		expect(result.ok).toBe(true);
+		expect(result.content).toContain("Received 100 chars");
+		expect(result.cleanFinish).toBe(true);
+		expect(result.provider).toBeTruthy();
 	});

-	test("returns ok=false on non-zero exit", async () => {
-		const { runGeminiReflection } = await import("../reflection.js");
-		const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-fail-"));
-		tmpDirs.push(fakeBinDir);
-		const fakeGeminiPath = join(fakeBinDir, "gemini");
-		writeFileSync(
-			fakeGeminiPath,
-			`#!/usr/bin/env node
-process.stderr.write("synthetic fail\\n");
-process.exit(2);
-`,
-		);
-		const { chmodSync } = await import("node:fs");
-		chmodSync(fakeGeminiPath, 0o755);
-
-		const originalPath = process.env.PATH;
-		process.env.PATH = `${fakeBinDir}:${originalPath}`;
-		try {
-			const result = await runGeminiReflection("ignored", {
-				timeoutMs: 5000,
-			});
-			expect(result.ok).toBe(false);
-			expect(result.exitCode).toBe(2);
-			expect(result.error).toMatch(/exited with code 2/);
-		} finally {
-			process.env.PATH = originalPath;
-		}
+	test("returns ok=false when the provider throws", async () => {
+		const { runReflection } = await import("../reflection.js");
+		const result = await runReflection("ignored", {
+			complete: async () => {
+				throw new Error("synthetic upstream failure");
+			},
+			timeoutMs: 5000,
+		});
+		expect(result.ok).toBe(false);
+		expect(result.error).toMatch(/synthetic upstream failure/);
 	});

-	test("returns ok=false with timeout error when subprocess hangs", async () => {
-		const { runGeminiReflection } = await import("../reflection.js");
-		const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-hang-"));
-		tmpDirs.push(fakeBinDir);
-		const fakeGeminiPath = join(fakeBinDir, "gemini");
-		writeFileSync(
-			fakeGeminiPath,
-			`#!/usr/bin/env node
-setInterval(() => {}, 1000); // hang
-`,
-		);
-		const { chmodSync } = await import("node:fs");
-		chmodSync(fakeGeminiPath, 0o755);
-
-		const originalPath = process.env.PATH;
-		process.env.PATH = `${fakeBinDir}:${originalPath}`;
-		try {
-			const result = await runGeminiReflection("ignored", {
-				timeoutMs: 200,
-			});
-			expect(result.ok).toBe(false);
-			expect(result.error).toMatch(/timed out/);
-		} finally {
-			process.env.PATH = originalPath;
-		}
+	test("returns ok=false with timeout error when the call hangs", async () => {
+		const { runReflection } = await import("../reflection.js");
+		const result = await runReflection("ignored", {
+			complete: () =>
+				new Promise(() => {
+					/* never resolves */
+				}),
+			timeoutMs: 200,
+		});
+		expect(result.ok).toBe(false);
+		expect(result.error).toMatch(/timed out/);
 	});

 	test("flags cleanFinish=false when terminator absent", async () => {
-		const { runGeminiReflection } = await import("../reflection.js");
-		const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-trunc-"));
-		tmpDirs.push(fakeBinDir);
-		const fakeGeminiPath = join(fakeBinDir, "gemini");
-		writeFileSync(
-			fakeGeminiPath,
-			`#!/usr/bin/env node
-process.stdout.write("# Truncated\\nNo terminator here.\\n");
-process.exit(0);
-`,
-		);
-		const { chmodSync } = await import("node:fs");
-		chmodSync(fakeGeminiPath, 0o755);
+		const { runReflection } = await import("../reflection.js");
+		const result = await runReflection("ignored", {
+			complete: async () =>
+				fakeAssistantMessage("# Truncated\nNo terminator here.\n"),
+			timeoutMs: 5000,
+		});
+		expect(result.ok).toBe(true);
+		expect(result.cleanFinish).toBe(false);
+	});

-		const originalPath = process.env.PATH;
-		process.env.PATH = `${fakeBinDir}:${originalPath}`;
-		try {
-			const result = await runGeminiReflection("ignored", { timeoutMs: 5000 });
-			expect(result.ok).toBe(true);
-			expect(result.cleanFinish).toBe(false);
-		} finally {
-			process.env.PATH = originalPath;
-		}
+	test("rejects model strings that aren't provider/modelId shaped", async () => {
+		const { runReflection } = await import("../reflection.js");
+		const result = await runReflection("ignored", {
+			model: "bare-model-name-no-slash",
+			complete: async () => fakeAssistantMessage("never called"),
+			timeoutMs: 5000,
+		});
+		expect(result.ok).toBe(false);
+		expect(result.error).toMatch(/unknown model/);
+	});
+
+	test("concatenates multi-text-content responses", async () => {
+		const { runReflection } = await import("../reflection.js");
+		const result = await runReflection("ignored", {
+			complete: async () => ({
+				role: "assistant",
+				content: [
+					{ type: "text", text: "part1\n" },
+					{ type: "thinking", text: "internal — should be ignored" },
+					{ type: "text", text: "part2\nREFLECTION_COMPLETE\n" },
+				],
+				api: "google-gemini-cli",
+				provider: "google-gemini-cli",
+				model: "gemini-3-pro-preview",
+				usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
+				stopReason: "stop",
+				timestamp: Date.now(),
+			}),
+			timeoutMs: 5000,
+		});
+		expect(result.ok).toBe(true);
+		expect(result.content).toBe("part1\npart2\nREFLECTION_COMPLETE\n");
+		expect(result.cleanFinish).toBe(true);
 	});
 });