refactor(reflection): route through @singularity-forge/ai, drop subprocess + gemini hardcoding

User-correctable architecture defect: runGeminiReflection shelled out to
the `gemini` CLI binary and hardcoded the gemini provider, duplicating
auth discovery and disconnecting the call from SF's metrics, cost
accounting, and provider abstraction. Should have routed through the
existing @singularity-forge/ai layer from the start.

Replace runGeminiReflection with runReflection that:

- Resolves an operator-supplied "provider/modelId" string via
  @singularity-forge/ai's getModel (the canonical accessor for the
  runtime model registry — MODELS itself isn't re-exported).
- Calls completeSimple from @singularity-forge/ai. Same provider routing
  every other SF LLM call uses (anthropic, openai, google-gemini-cli,
  openai-codex-responses, mistral, etc.). No subprocess.
- Default model is google-gemini-cli/gemini-3-pro-preview because that
  matches the operator's primary AI Ultra tier — but the default lives
  in a single named constant (DEFAULT_REFLECTION_MODEL), no provider
  hardcoding in the call path. Operators override per-call via --model.
- Returns { ok, content?, cleanFinish?, error?, provider, modelId } for
  observability into which provider actually answered.

runGeminiReflection kept as an alias for back-compat so the existing
headless-reflect.ts caller works unchanged. New code should use
runReflection directly.

Tests: switched from a fake-gemini-binary-on-PATH approach (5 tests)
to a clean dependency-injection approach via options.complete (5 tests
+ 1 new "rejects bare model strings"). Mock returns AssistantMessage
shape directly, no subprocess machinery.

Two pre-existing migration test failures in sf-db-migration.test.mjs
(openDatabase_migrates_v27, openDatabase_v52_db_heals_routing_history)
are unaffected by this commit — they fail in isolation too, likely
related to commit 7570aac4b's routing-metrics track. Logged here so the
1589/1591 pass rate is auditable.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-14 05:11:07 +02:00
parent 21d9054611
commit 6a88ad2f00
2 changed files with 248 additions and 195 deletions

View file

@ -348,105 +348,177 @@ export function writeReflectionReport(basePath, content) {
const REFLECTION_TERMINATOR = "REFLECTION_COMPLETE";
/**
* Spawn the gemini CLI to run a reflection pass against the given prompt.
* Default provider/model used when --model is not supplied. This is the
* single point of model defaulting the rest of runReflection is
* provider-agnostic and routes through @singularity-forge/ai's completeSimple.
*
* Why shell-out and not SF's provider abstraction: the reflection pass is a
* single-prompt one-shot inference. Going through SF's full agent dispatch
* would require a session, model registry, tool registration, recovery
* shell all overkill for "render this prompt, capture text". The gemini
* CLI handles auth via OAuth (~/.gemini/oauth_creds.json), Code Assist
* project discovery, and protocol drift on its behalf. We pay the cost of
* spawning a subprocess once per reflection, which is rare.
* Today's default is google-gemini-cli/gemini-3-pro-preview because that's
* what the operator's persistent AI Ultra session is wired to and reflection
* is a reasoning-heavy task that benefits from the pro tier. Operators
* override per-call via --model. A future change can let SF's
* persisted-default-model resolution feed this in, eliminating the constant.
*/
const DEFAULT_REFLECTION_MODEL = "google-gemini-cli/gemini-3-pro-preview";
/**
* Extract assistant text from an AssistantMessage. The response is an
* array of TextContent | ThinkingContent | ToolCall | ... we concatenate
* all text parts so multi-part responses come through intact. Thinking and
* tool-use parts are ignored reflection produces text only by contract.
*/
function extractAssistantText(message) {
const content = message?.content;
if (!Array.isArray(content)) return "";
const out = [];
for (const part of content) {
if (part && typeof part === "object" && part.type === "text") {
if (typeof part.text === "string") out.push(part.text);
}
}
return out.join("");
}
/**
* Parse a "provider/modelId" string into [provider, modelId]. Returns null
* when the input isn't in that shape. We require the explicit
* provider/model form so reflection never accidentally picks a model from
* the wrong provider (e.g. anthropic claude-sonnet-4-6 when the operator
* meant google-gemini-cli/gemini-3-pro-preview).
*/
function parseModelString(input) {
if (typeof input !== "string") return null;
const slash = input.indexOf("/");
if (slash <= 0 || slash === input.length - 1) return null;
return [input.slice(0, slash), input.slice(slash + 1)];
}
/**
* Resolve a "provider/modelId" string to a Model object via
* @singularity-forge/ai's getModel. Returns null when the model isn't
* registered.
*
* The gemini CLI's `-p` flag takes a prompt and `Appended to input on
* stdin (if any)` — so the giant rendered template goes via stdin and -p
* carries a tiny directive that orients the model to the contract.
* Why getModel and not MODELS directly: the static MODELS catalog is
* imported into models.ts but not re-exported from the package's public
* API. getModel is the canonical accessor and pulls from the runtime
* model-registry which already merges generated MODELS + custom MODELS.
*/
async function resolveModel(providerModelString) {
const parsed = parseModelString(providerModelString);
if (!parsed) return null;
const [provider, modelId] = parsed;
const ai = await import("@singularity-forge/ai");
if (typeof ai.getModel !== "function") return null;
try {
const model = ai.getModel(provider, modelId);
return model ?? null;
} catch {
return null;
}
}
/**
* Run a reflection pass against the given prompt.
*
* Returns { ok, content?, exitCode?, error? }. Best-effort; never throws.
* Provider-agnostic: routes through @singularity-forge/ai's completeSimple,
* which dispatches via the same provider registry SF uses for every other
* LLM call (anthropic, openai, google-gemini-cli, openai-codex-responses,
* mistral, etc.). The model is chosen by the operator's --model flag
* (which arrives via options.model) and resolved against the @singularity-
* forge/ai MODELS catalog. No subprocess, no hardcoded provider in the
* call path.
*
* options.model "provider/modelId" string (e.g. "google-gemini-cli/
* gemini-3-pro-preview", "anthropic/claude-sonnet-4-6"). Defaults to
* DEFAULT_REFLECTION_MODEL if unset.
*
* options.complete dependency injection for tests. When supplied, it is
* called instead of the imported completeSimple, with the same (model,
* context, options) signature.
*
* Returns { ok, content?, cleanFinish?, error? }. Best-effort; never throws.
*
* Consumer: headless-reflect operator surface (--run flag).
*/
export async function runGeminiReflection(prompt, options = {}) {
const { spawn } = await import("node:child_process");
const model = options.model ?? "gemini-3-pro-preview";
export async function runReflection(prompt, options = {}) {
const modelString = options.model ?? DEFAULT_REFLECTION_MODEL;
const timeoutMs = options.timeoutMs ?? 8 * 60 * 1000;
return await new Promise((resolve) => {
let stdoutBuf = "";
let stderrBuf = "";
let settled = false;
const finish = (result) => {
if (settled) return;
settled = true;
resolve(result);
let model;
try {
model = await resolveModel(modelString);
} catch (err) {
return {
ok: false,
error: `failed to load model catalog: ${err instanceof Error ? err.message : String(err)}`,
};
let proc;
}
if (!model) {
return {
ok: false,
error: `unknown model "${modelString}" — expected "provider/modelId" with a model registered in @singularity-forge/ai MODELS`,
};
}
// completeSimple takes a Context (systemPrompt + messages). The rendered
// reflection template already contains both the system-style contract
// (working-directory + reasoning protocol + output contract) and the
// corpus brief, so we send it as a single user message rather than
// splitting it — the prompt template is designed as one self-contained
// brief for the LLM.
const context = {
systemPrompt: undefined,
messages: [
{
role: "user",
content: [{ type: "text", text: prompt }],
timestamp: Date.now(),
},
],
};
const completeFn =
options.complete ??
(await (async () => {
const ai = await import("@singularity-forge/ai");
return ai.completeSimple;
})());
const callPromise = (async () => {
try {
proc = spawn(
"gemini",
["--yolo", "--model", model, "-p", "Run the reflection pass per the contract in the prompt below."],
{ stdio: ["pipe", "pipe", "pipe"] },
);
const message = await completeFn(model, context, {});
const content = extractAssistantText(message);
return {
ok: true,
content,
cleanFinish: content.includes(REFLECTION_TERMINATOR),
provider: model.provider,
modelId: model.id,
};
} catch (err) {
finish({
return {
ok: false,
error: `gemini spawn failed: ${err instanceof Error ? err.message : String(err)}`,
});
return;
error: `provider call failed: ${err instanceof Error ? err.message : String(err)}`,
provider: model.provider,
modelId: model.id,
};
}
const timer = setTimeout(() => {
try {
proc.kill("SIGTERM");
} catch {
/* ignore */
}
finish({
})();
const timeoutPromise = new Promise((resolve) => {
setTimeout(() => {
resolve({
ok: false,
error: `gemini timed out after ${timeoutMs}ms`,
stderr: stderrBuf,
error: `reflection call timed out after ${timeoutMs}ms`,
provider: model.provider,
modelId: model.id,
});
}, timeoutMs);
proc.stdout.on("data", (chunk) => {
stdoutBuf += chunk.toString("utf-8");
});
proc.stderr.on("data", (chunk) => {
stderrBuf += chunk.toString("utf-8");
});
proc.on("error", (err) => {
clearTimeout(timer);
finish({
ok: false,
error: `gemini process error: ${err.message}`,
stderr: stderrBuf,
});
});
proc.on("close", (code) => {
clearTimeout(timer);
if (code !== 0) {
finish({
ok: false,
exitCode: code,
error: `gemini exited with code ${code}`,
stderr: stderrBuf,
content: stdoutBuf,
});
return;
}
finish({
ok: true,
exitCode: 0,
content: stdoutBuf,
cleanFinish: stdoutBuf.includes(REFLECTION_TERMINATOR),
});
});
try {
proc.stdin.write(prompt);
proc.stdin.end();
} catch (err) {
clearTimeout(timer);
finish({
ok: false,
error: `gemini stdin write failed: ${err instanceof Error ? err.message : String(err)}`,
});
}
});
return Promise.race([callPromise, timeoutPromise]);
}
// Back-compat alias: callers wired against the old spawn-based name still
// work, but they pick up the new provider-agnostic implementation. New code
// should use runReflection directly.
export const runGeminiReflection = runReflection;

View file

@ -261,124 +261,105 @@ describe("writeReflectionReport", () => {
});
});
describe("runGeminiReflection", () => {
describe("runReflection", () => {
function fakeAssistantMessage(text) {
return {
role: "assistant",
content: [{ type: "text", text }],
api: "google-gemini-cli",
provider: "google-gemini-cli",
model: "gemini-3-pro-preview",
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
stopReason: "stop",
timestamp: Date.now(),
};
}
test("returns ok with content + cleanFinish when terminator present", async () => {
const { runGeminiReflection } = await import("../reflection.js");
// Stub gemini binary by spawning a node script that prints a fake
// response and exits 0. We invoke it via a wrapper PATH override —
// but the simplest approach: directly call spawn-replacement via
// a process.env.PATH prepend to a tmp dir containing a fake gemini.
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-"));
tmpDirs.push(fakeBinDir);
const fakeGeminiPath = join(fakeBinDir, "gemini");
writeFileSync(
fakeGeminiPath,
`#!/usr/bin/env node
let stdin = "";
process.stdin.on("data", (c) => { stdin += c; });
process.stdin.on("end", () => {
process.stdout.write("# Reflection\\nReceived " + stdin.length + " chars\\nREFLECTION_COMPLETE\\n");
process.exit(0);
});
`,
);
// Make it executable
const { chmodSync } = await import("node:fs");
chmodSync(fakeGeminiPath, 0o755);
const originalPath = process.env.PATH;
process.env.PATH = `${fakeBinDir}:${originalPath}`;
try {
const result = await runGeminiReflection("a".repeat(100), {
timeoutMs: 5000,
});
expect(result.ok).toBe(true);
expect(result.content).toContain("Received 100 chars");
expect(result.cleanFinish).toBe(true);
} finally {
process.env.PATH = originalPath;
}
const { runReflection } = await import("../reflection.js");
const result = await runReflection("a".repeat(100), {
complete: async (_model, context) => {
const userText = context.messages[0].content[0].text;
return fakeAssistantMessage(
`# Reflection\nReceived ${userText.length} chars\nREFLECTION_COMPLETE\n`,
);
},
timeoutMs: 5000,
});
expect(result.ok).toBe(true);
expect(result.content).toContain("Received 100 chars");
expect(result.cleanFinish).toBe(true);
expect(result.provider).toBeTruthy();
});
test("returns ok=false on non-zero exit", async () => {
const { runGeminiReflection } = await import("../reflection.js");
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-fail-"));
tmpDirs.push(fakeBinDir);
const fakeGeminiPath = join(fakeBinDir, "gemini");
writeFileSync(
fakeGeminiPath,
`#!/usr/bin/env node
process.stderr.write("synthetic fail\\n");
process.exit(2);
`,
);
const { chmodSync } = await import("node:fs");
chmodSync(fakeGeminiPath, 0o755);
const originalPath = process.env.PATH;
process.env.PATH = `${fakeBinDir}:${originalPath}`;
try {
const result = await runGeminiReflection("ignored", {
timeoutMs: 5000,
});
expect(result.ok).toBe(false);
expect(result.exitCode).toBe(2);
expect(result.error).toMatch(/exited with code 2/);
} finally {
process.env.PATH = originalPath;
}
test("returns ok=false when the provider throws", async () => {
const { runReflection } = await import("../reflection.js");
const result = await runReflection("ignored", {
complete: async () => {
throw new Error("synthetic upstream failure");
},
timeoutMs: 5000,
});
expect(result.ok).toBe(false);
expect(result.error).toMatch(/synthetic upstream failure/);
});
test("returns ok=false with timeout error when subprocess hangs", async () => {
const { runGeminiReflection } = await import("../reflection.js");
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-hang-"));
tmpDirs.push(fakeBinDir);
const fakeGeminiPath = join(fakeBinDir, "gemini");
writeFileSync(
fakeGeminiPath,
`#!/usr/bin/env node
setInterval(() => {}, 1000); // hang
`,
);
const { chmodSync } = await import("node:fs");
chmodSync(fakeGeminiPath, 0o755);
const originalPath = process.env.PATH;
process.env.PATH = `${fakeBinDir}:${originalPath}`;
try {
const result = await runGeminiReflection("ignored", {
timeoutMs: 200,
});
expect(result.ok).toBe(false);
expect(result.error).toMatch(/timed out/);
} finally {
process.env.PATH = originalPath;
}
test("returns ok=false with timeout error when the call hangs", async () => {
const { runReflection } = await import("../reflection.js");
const result = await runReflection("ignored", {
complete: () =>
new Promise(() => {
/* never resolves */
}),
timeoutMs: 200,
});
expect(result.ok).toBe(false);
expect(result.error).toMatch(/timed out/);
});
test("flags cleanFinish=false when terminator absent", async () => {
const { runGeminiReflection } = await import("../reflection.js");
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-trunc-"));
tmpDirs.push(fakeBinDir);
const fakeGeminiPath = join(fakeBinDir, "gemini");
writeFileSync(
fakeGeminiPath,
`#!/usr/bin/env node
process.stdout.write("# Truncated\\nNo terminator here.\\n");
process.exit(0);
`,
);
const { chmodSync } = await import("node:fs");
chmodSync(fakeGeminiPath, 0o755);
const { runReflection } = await import("../reflection.js");
const result = await runReflection("ignored", {
complete: async () =>
fakeAssistantMessage("# Truncated\nNo terminator here.\n"),
timeoutMs: 5000,
});
expect(result.ok).toBe(true);
expect(result.cleanFinish).toBe(false);
});
const originalPath = process.env.PATH;
process.env.PATH = `${fakeBinDir}:${originalPath}`;
try {
const result = await runGeminiReflection("ignored", { timeoutMs: 5000 });
expect(result.ok).toBe(true);
expect(result.cleanFinish).toBe(false);
} finally {
process.env.PATH = originalPath;
}
test("rejects model strings that aren't provider/modelId shaped", async () => {
const { runReflection } = await import("../reflection.js");
const result = await runReflection("ignored", {
model: "bare-model-name-no-slash",
complete: async () => fakeAssistantMessage("never called"),
timeoutMs: 5000,
});
expect(result.ok).toBe(false);
expect(result.error).toMatch(/unknown model/);
});
test("concatenates multi-text-content responses", async () => {
const { runReflection } = await import("../reflection.js");
const result = await runReflection("ignored", {
complete: async () => ({
role: "assistant",
content: [
{ type: "text", text: "part1\n" },
{ type: "thinking", text: "internal — should be ignored" },
{ type: "text", text: "part2\nREFLECTION_COMPLETE\n" },
],
api: "google-gemini-cli",
provider: "google-gemini-cli",
model: "gemini-3-pro-preview",
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
stopReason: "stop",
timestamp: Date.now(),
}),
timeoutMs: 5000,
});
expect(result.ok).toBe(true);
expect(result.content).toBe("part1\npart2\nREFLECTION_COMPLETE\n");
expect(result.cleanFinish).toBe(true);
});
});