From 6a88ad2f00c7038d3acc8899139565f98f7451c1 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Thu, 14 May 2026 05:11:07 +0200 Subject: [PATCH] refactor(reflection): route through @singularity-forge/ai, drop subprocess + gemini hardcoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-correctable architecture defect: runGeminiReflection shelled out to the `gemini` CLI binary and hardcoded the gemini provider, duplicating auth discovery and disconnecting the call from SF's metrics, cost accounting, and provider abstraction. Should have routed through the existing @singularity-forge/ai layer from the start. Replace runGeminiReflection with runReflection that: - Resolves an operator-supplied "provider/modelId" string via @singularity-forge/ai's getModel (the canonical accessor for the runtime model registry — MODELS itself isn't re-exported). - Calls completeSimple from @singularity-forge/ai. Same provider routing every other SF LLM call uses (anthropic, openai, google-gemini-cli, openai-codex-responses, mistral, etc.). No subprocess. - Default model is google-gemini-cli/gemini-3-pro-preview because that matches the operator's primary AI Ultra tier — but the default lives in a single named constant (DEFAULT_REFLECTION_MODEL), no provider hardcoding in the call path. Operators override per-call via --model. - Returns { ok, content?, cleanFinish?, error?, provider, modelId } for observability into which provider actually answered. runGeminiReflection kept as an alias for back-compat so the existing headless-reflect.ts caller works unchanged. New code should use runReflection directly. Tests: switched from a fake-gemini-binary-on-PATH approach (5 tests) to a clean dependency-injection approach via options.complete (5 tests + 1 new "rejects bare model strings"). Mock returns AssistantMessage shape directly, no subprocess machinery. Two pre-existing migration test failures in sf-db-migration.test.mjs (openDatabase_migrates_v27, openDatabase_v52_db_heals_routing_history) are unaffected by this commit — they fail in isolation too, likely related to commit 7570aac4b's routing-metrics track. Logged here so the 1589/1591 pass rate is auditable. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/resources/extensions/sf/reflection.js | 242 ++++++++++++------ .../extensions/sf/tests/reflection.test.mjs | 201 +++++++-------- 2 files changed, 248 insertions(+), 195 deletions(-) diff --git a/src/resources/extensions/sf/reflection.js b/src/resources/extensions/sf/reflection.js index dd440b957..7c203e0d3 100644 --- a/src/resources/extensions/sf/reflection.js +++ b/src/resources/extensions/sf/reflection.js @@ -348,105 +348,177 @@ export function writeReflectionReport(basePath, content) { const REFLECTION_TERMINATOR = "REFLECTION_COMPLETE"; /** - * Spawn the gemini CLI to run a reflection pass against the given prompt. + * Default provider/model used when --model is not supplied. This is the + * single point of model defaulting — the rest of runReflection is + * provider-agnostic and routes through @singularity-forge/ai's completeSimple. * - * Why shell-out and not SF's provider abstraction: the reflection pass is a - * single-prompt one-shot inference. Going through SF's full agent dispatch - * would require a session, model registry, tool registration, recovery - * shell — all overkill for "render this prompt, capture text". The gemini - * CLI handles auth via OAuth (~/.gemini/oauth_creds.json), Code Assist - * project discovery, and protocol drift on its behalf. We pay the cost of - * spawning a subprocess once per reflection, which is rare. + * Today's default is google-gemini-cli/gemini-3-pro-preview because that's + * what the operator's persistent AI Ultra session is wired to and reflection + * is a reasoning-heavy task that benefits from the pro tier. Operators + * override per-call via --model. A future change can let SF's + * persisted-default-model resolution feed this in, eliminating the constant. + */ +const DEFAULT_REFLECTION_MODEL = "google-gemini-cli/gemini-3-pro-preview"; + +/** + * Extract assistant text from an AssistantMessage. The response is an + * array of TextContent | ThinkingContent | ToolCall | ... — we concatenate + * all text parts so multi-part responses come through intact. Thinking and + * tool-use parts are ignored — reflection produces text only by contract. + */ +function extractAssistantText(message) { + const content = message?.content; + if (!Array.isArray(content)) return ""; + const out = []; + for (const part of content) { + if (part && typeof part === "object" && part.type === "text") { + if (typeof part.text === "string") out.push(part.text); + } + } + return out.join(""); +} + +/** + * Parse a "provider/modelId" string into [provider, modelId]. Returns null + * when the input isn't in that shape. We require the explicit + * provider/model form so reflection never accidentally picks a model from + * the wrong provider (e.g. anthropic claude-sonnet-4-6 when the operator + * meant google-gemini-cli/gemini-3-pro-preview). + */ +function parseModelString(input) { + if (typeof input !== "string") return null; + const slash = input.indexOf("/"); + if (slash <= 0 || slash === input.length - 1) return null; + return [input.slice(0, slash), input.slice(slash + 1)]; +} + +/** + * Resolve a "provider/modelId" string to a Model object via + * @singularity-forge/ai's getModel. Returns null when the model isn't + * registered. * - * The gemini CLI's `-p` flag takes a prompt and `Appended to input on - * stdin (if any)` — so the giant rendered template goes via stdin and -p - * carries a tiny directive that orients the model to the contract. + * Why getModel and not MODELS directly: the static MODELS catalog is + * imported into models.ts but not re-exported from the package's public + * API. getModel is the canonical accessor and pulls from the runtime + * model-registry which already merges generated MODELS + custom MODELS. + */ +async function resolveModel(providerModelString) { + const parsed = parseModelString(providerModelString); + if (!parsed) return null; + const [provider, modelId] = parsed; + const ai = await import("@singularity-forge/ai"); + if (typeof ai.getModel !== "function") return null; + try { + const model = ai.getModel(provider, modelId); + return model ?? null; + } catch { + return null; + } +} + +/** + * Run a reflection pass against the given prompt. * - * Returns { ok, content?, exitCode?, error? }. Best-effort; never throws. + * Provider-agnostic: routes through @singularity-forge/ai's completeSimple, + * which dispatches via the same provider registry SF uses for every other + * LLM call (anthropic, openai, google-gemini-cli, openai-codex-responses, + * mistral, etc.). The model is chosen by the operator's --model flag + * (which arrives via options.model) and resolved against the @singularity- + * forge/ai MODELS catalog. No subprocess, no hardcoded provider in the + * call path. + * + * options.model — "provider/modelId" string (e.g. "google-gemini-cli/ + * gemini-3-pro-preview", "anthropic/claude-sonnet-4-6"). Defaults to + * DEFAULT_REFLECTION_MODEL if unset. + * + * options.complete — dependency injection for tests. When supplied, it is + * called instead of the imported completeSimple, with the same (model, + * context, options) signature. + * + * Returns { ok, content?, cleanFinish?, error? }. Best-effort; never throws. * * Consumer: headless-reflect operator surface (--run flag). */ -export async function runGeminiReflection(prompt, options = {}) { - const { spawn } = await import("node:child_process"); - const model = options.model ?? "gemini-3-pro-preview"; +export async function runReflection(prompt, options = {}) { + const modelString = options.model ?? DEFAULT_REFLECTION_MODEL; const timeoutMs = options.timeoutMs ?? 8 * 60 * 1000; - return await new Promise((resolve) => { - let stdoutBuf = ""; - let stderrBuf = ""; - let settled = false; - const finish = (result) => { - if (settled) return; - settled = true; - resolve(result); + + let model; + try { + model = await resolveModel(modelString); + } catch (err) { + return { + ok: false, + error: `failed to load model catalog: ${err instanceof Error ? err.message : String(err)}`, }; - let proc; + } + if (!model) { + return { + ok: false, + error: `unknown model "${modelString}" — expected "provider/modelId" with a model registered in @singularity-forge/ai MODELS`, + }; + } + + // completeSimple takes a Context (systemPrompt + messages). The rendered + // reflection template already contains both the system-style contract + // (working-directory + reasoning protocol + output contract) and the + // corpus brief, so we send it as a single user message rather than + // splitting it — the prompt template is designed as one self-contained + // brief for the LLM. + const context = { + systemPrompt: undefined, + messages: [ + { + role: "user", + content: [{ type: "text", text: prompt }], + timestamp: Date.now(), + }, + ], + }; + + const completeFn = + options.complete ?? + (await (async () => { + const ai = await import("@singularity-forge/ai"); + return ai.completeSimple; + })()); + + const callPromise = (async () => { try { - proc = spawn( - "gemini", - ["--yolo", "--model", model, "-p", "Run the reflection pass per the contract in the prompt below."], - { stdio: ["pipe", "pipe", "pipe"] }, - ); + const message = await completeFn(model, context, {}); + const content = extractAssistantText(message); + return { + ok: true, + content, + cleanFinish: content.includes(REFLECTION_TERMINATOR), + provider: model.provider, + modelId: model.id, + }; } catch (err) { - finish({ + return { ok: false, - error: `gemini spawn failed: ${err instanceof Error ? err.message : String(err)}`, - }); - return; + error: `provider call failed: ${err instanceof Error ? err.message : String(err)}`, + provider: model.provider, + modelId: model.id, + }; } - const timer = setTimeout(() => { - try { - proc.kill("SIGTERM"); - } catch { - /* ignore */ - } - finish({ + })(); + + const timeoutPromise = new Promise((resolve) => { + setTimeout(() => { + resolve({ ok: false, - error: `gemini timed out after ${timeoutMs}ms`, - stderr: stderrBuf, + error: `reflection call timed out after ${timeoutMs}ms`, + provider: model.provider, + modelId: model.id, }); }, timeoutMs); - proc.stdout.on("data", (chunk) => { - stdoutBuf += chunk.toString("utf-8"); - }); - proc.stderr.on("data", (chunk) => { - stderrBuf += chunk.toString("utf-8"); - }); - proc.on("error", (err) => { - clearTimeout(timer); - finish({ - ok: false, - error: `gemini process error: ${err.message}`, - stderr: stderrBuf, - }); - }); - proc.on("close", (code) => { - clearTimeout(timer); - if (code !== 0) { - finish({ - ok: false, - exitCode: code, - error: `gemini exited with code ${code}`, - stderr: stderrBuf, - content: stdoutBuf, - }); - return; - } - finish({ - ok: true, - exitCode: 0, - content: stdoutBuf, - cleanFinish: stdoutBuf.includes(REFLECTION_TERMINATOR), - }); - }); - try { - proc.stdin.write(prompt); - proc.stdin.end(); - } catch (err) { - clearTimeout(timer); - finish({ - ok: false, - error: `gemini stdin write failed: ${err instanceof Error ? err.message : String(err)}`, - }); - } }); + + return Promise.race([callPromise, timeoutPromise]); } + +// Back-compat alias: callers wired against the old spawn-based name still +// work, but they pick up the new provider-agnostic implementation. New code +// should use runReflection directly. +export const runGeminiReflection = runReflection; diff --git a/src/resources/extensions/sf/tests/reflection.test.mjs b/src/resources/extensions/sf/tests/reflection.test.mjs index 48183057e..cce06d2b3 100644 --- a/src/resources/extensions/sf/tests/reflection.test.mjs +++ b/src/resources/extensions/sf/tests/reflection.test.mjs @@ -261,124 +261,105 @@ describe("writeReflectionReport", () => { }); }); -describe("runGeminiReflection", () => { +describe("runReflection", () => { + function fakeAssistantMessage(text) { + return { + role: "assistant", + content: [{ type: "text", text }], + api: "google-gemini-cli", + provider: "google-gemini-cli", + model: "gemini-3-pro-preview", + usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } }, + stopReason: "stop", + timestamp: Date.now(), + }; + } + test("returns ok with content + cleanFinish when terminator present", async () => { - const { runGeminiReflection } = await import("../reflection.js"); - // Stub gemini binary by spawning a node script that prints a fake - // response and exits 0. We invoke it via a wrapper PATH override — - // but the simplest approach: directly call spawn-replacement via - // a process.env.PATH prepend to a tmp dir containing a fake gemini. - const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-")); - tmpDirs.push(fakeBinDir); - const fakeGeminiPath = join(fakeBinDir, "gemini"); - writeFileSync( - fakeGeminiPath, - `#!/usr/bin/env node -let stdin = ""; -process.stdin.on("data", (c) => { stdin += c; }); -process.stdin.on("end", () => { - process.stdout.write("# Reflection\\nReceived " + stdin.length + " chars\\nREFLECTION_COMPLETE\\n"); - process.exit(0); -}); -`, - ); - // Make it executable - const { chmodSync } = await import("node:fs"); - chmodSync(fakeGeminiPath, 0o755); - - const originalPath = process.env.PATH; - process.env.PATH = `${fakeBinDir}:${originalPath}`; - try { - const result = await runGeminiReflection("a".repeat(100), { - timeoutMs: 5000, - }); - expect(result.ok).toBe(true); - expect(result.content).toContain("Received 100 chars"); - expect(result.cleanFinish).toBe(true); - } finally { - process.env.PATH = originalPath; - } + const { runReflection } = await import("../reflection.js"); + const result = await runReflection("a".repeat(100), { + complete: async (_model, context) => { + const userText = context.messages[0].content[0].text; + return fakeAssistantMessage( + `# Reflection\nReceived ${userText.length} chars\nREFLECTION_COMPLETE\n`, + ); + }, + timeoutMs: 5000, + }); + expect(result.ok).toBe(true); + expect(result.content).toContain("Received 100 chars"); + expect(result.cleanFinish).toBe(true); + expect(result.provider).toBeTruthy(); }); - test("returns ok=false on non-zero exit", async () => { - const { runGeminiReflection } = await import("../reflection.js"); - const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-fail-")); - tmpDirs.push(fakeBinDir); - const fakeGeminiPath = join(fakeBinDir, "gemini"); - writeFileSync( - fakeGeminiPath, - `#!/usr/bin/env node -process.stderr.write("synthetic fail\\n"); -process.exit(2); -`, - ); - const { chmodSync } = await import("node:fs"); - chmodSync(fakeGeminiPath, 0o755); - - const originalPath = process.env.PATH; - process.env.PATH = `${fakeBinDir}:${originalPath}`; - try { - const result = await runGeminiReflection("ignored", { - timeoutMs: 5000, - }); - expect(result.ok).toBe(false); - expect(result.exitCode).toBe(2); - expect(result.error).toMatch(/exited with code 2/); - } finally { - process.env.PATH = originalPath; - } + test("returns ok=false when the provider throws", async () => { + const { runReflection } = await import("../reflection.js"); + const result = await runReflection("ignored", { + complete: async () => { + throw new Error("synthetic upstream failure"); + }, + timeoutMs: 5000, + }); + expect(result.ok).toBe(false); + expect(result.error).toMatch(/synthetic upstream failure/); }); - test("returns ok=false with timeout error when subprocess hangs", async () => { - const { runGeminiReflection } = await import("../reflection.js"); - const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-hang-")); - tmpDirs.push(fakeBinDir); - const fakeGeminiPath = join(fakeBinDir, "gemini"); - writeFileSync( - fakeGeminiPath, - `#!/usr/bin/env node -setInterval(() => {}, 1000); // hang -`, - ); - const { chmodSync } = await import("node:fs"); - chmodSync(fakeGeminiPath, 0o755); - - const originalPath = process.env.PATH; - process.env.PATH = `${fakeBinDir}:${originalPath}`; - try { - const result = await runGeminiReflection("ignored", { - timeoutMs: 200, - }); - expect(result.ok).toBe(false); - expect(result.error).toMatch(/timed out/); - } finally { - process.env.PATH = originalPath; - } + test("returns ok=false with timeout error when the call hangs", async () => { + const { runReflection } = await import("../reflection.js"); + const result = await runReflection("ignored", { + complete: () => + new Promise(() => { + /* never resolves */ + }), + timeoutMs: 200, + }); + expect(result.ok).toBe(false); + expect(result.error).toMatch(/timed out/); }); test("flags cleanFinish=false when terminator absent", async () => { - const { runGeminiReflection } = await import("../reflection.js"); - const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-trunc-")); - tmpDirs.push(fakeBinDir); - const fakeGeminiPath = join(fakeBinDir, "gemini"); - writeFileSync( - fakeGeminiPath, - `#!/usr/bin/env node -process.stdout.write("# Truncated\\nNo terminator here.\\n"); -process.exit(0); -`, - ); - const { chmodSync } = await import("node:fs"); - chmodSync(fakeGeminiPath, 0o755); + const { runReflection } = await import("../reflection.js"); + const result = await runReflection("ignored", { + complete: async () => + fakeAssistantMessage("# Truncated\nNo terminator here.\n"), + timeoutMs: 5000, + }); + expect(result.ok).toBe(true); + expect(result.cleanFinish).toBe(false); + }); - const originalPath = process.env.PATH; - process.env.PATH = `${fakeBinDir}:${originalPath}`; - try { - const result = await runGeminiReflection("ignored", { timeoutMs: 5000 }); - expect(result.ok).toBe(true); - expect(result.cleanFinish).toBe(false); - } finally { - process.env.PATH = originalPath; - } + test("rejects model strings that aren't provider/modelId shaped", async () => { + const { runReflection } = await import("../reflection.js"); + const result = await runReflection("ignored", { + model: "bare-model-name-no-slash", + complete: async () => fakeAssistantMessage("never called"), + timeoutMs: 5000, + }); + expect(result.ok).toBe(false); + expect(result.error).toMatch(/unknown model/); + }); + + test("concatenates multi-text-content responses", async () => { + const { runReflection } = await import("../reflection.js"); + const result = await runReflection("ignored", { + complete: async () => ({ + role: "assistant", + content: [ + { type: "text", text: "part1\n" }, + { type: "thinking", text: "internal — should be ignored" }, + { type: "text", text: "part2\nREFLECTION_COMPLETE\n" }, + ], + api: "google-gemini-cli", + provider: "google-gemini-cli", + model: "gemini-3-pro-preview", + usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } }, + stopReason: "stop", + timestamp: Date.now(), + }), + timeoutMs: 5000, + }); + expect(result.ok).toBe(true); + expect(result.content).toBe("part1\npart2\nREFLECTION_COMPLETE\n"); + expect(result.cleanFinish).toBe(true); }); });