refactor(reflection): route through @singularity-forge/ai, drop subprocess + gemini hardcoding
User-correctable architecture defect: runGeminiReflection shelled out to
the `gemini` CLI binary and hardcoded the gemini provider, duplicating
auth discovery and disconnecting the call from SF's metrics, cost
accounting, and provider abstraction. Should have routed through the
existing @singularity-forge/ai layer from the start.
Replace runGeminiReflection with runReflection that:
- Resolves an operator-supplied "provider/modelId" string via
@singularity-forge/ai's getModel (the canonical accessor for the
runtime model registry — MODELS itself isn't re-exported).
- Calls completeSimple from @singularity-forge/ai. Same provider routing
every other SF LLM call uses (anthropic, openai, google-gemini-cli,
openai-codex-responses, mistral, etc.). No subprocess.
- Default model is google-gemini-cli/gemini-3-pro-preview because that
matches the operator's primary AI Ultra tier — but the default lives
in a single named constant (DEFAULT_REFLECTION_MODEL), no provider
hardcoding in the call path. Operators override per-call via --model.
- Returns { ok, content?, cleanFinish?, error?, provider, modelId } for
observability into which provider actually answered.
runGeminiReflection kept as an alias for back-compat so the existing
headless-reflect.ts caller works unchanged. New code should use
runReflection directly.
Tests: switched from a fake-gemini-binary-on-PATH approach (5 tests)
to a clean dependency-injection approach via options.complete (5 tests
+ 1 new "rejects bare model strings"). Mock returns AssistantMessage
shape directly, no subprocess machinery.
Two pre-existing migration test failures in sf-db-migration.test.mjs
(openDatabase_migrates_v27, openDatabase_v52_db_heals_routing_history)
are unaffected by this commit — they fail in isolation too, likely
related to commit 7570aac4b's routing-metrics track. Logged here so the
1589/1591 pass rate is auditable.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
21d9054611
commit
6a88ad2f00
2 changed files with 248 additions and 195 deletions
|
|
@ -348,105 +348,177 @@ export function writeReflectionReport(basePath, content) {
|
|||
const REFLECTION_TERMINATOR = "REFLECTION_COMPLETE";
|
||||
|
||||
/**
|
||||
* Spawn the gemini CLI to run a reflection pass against the given prompt.
|
||||
* Default provider/model used when --model is not supplied. This is the
|
||||
* single point of model defaulting — the rest of runReflection is
|
||||
* provider-agnostic and routes through @singularity-forge/ai's completeSimple.
|
||||
*
|
||||
* Why shell-out and not SF's provider abstraction: the reflection pass is a
|
||||
* single-prompt one-shot inference. Going through SF's full agent dispatch
|
||||
* would require a session, model registry, tool registration, recovery
|
||||
* shell — all overkill for "render this prompt, capture text". The gemini
|
||||
* CLI handles auth via OAuth (~/.gemini/oauth_creds.json), Code Assist
|
||||
* project discovery, and protocol drift on its behalf. We pay the cost of
|
||||
* spawning a subprocess once per reflection, which is rare.
|
||||
* Today's default is google-gemini-cli/gemini-3-pro-preview because that's
|
||||
* what the operator's persistent AI Ultra session is wired to and reflection
|
||||
* is a reasoning-heavy task that benefits from the pro tier. Operators
|
||||
* override per-call via --model. A future change can let SF's
|
||||
* persisted-default-model resolution feed this in, eliminating the constant.
|
||||
*/
|
||||
const DEFAULT_REFLECTION_MODEL = "google-gemini-cli/gemini-3-pro-preview";
|
||||
|
||||
/**
|
||||
* Extract assistant text from an AssistantMessage. The response is an
|
||||
* array of TextContent | ThinkingContent | ToolCall | ... — we concatenate
|
||||
* all text parts so multi-part responses come through intact. Thinking and
|
||||
* tool-use parts are ignored — reflection produces text only by contract.
|
||||
*/
|
||||
function extractAssistantText(message) {
|
||||
const content = message?.content;
|
||||
if (!Array.isArray(content)) return "";
|
||||
const out = [];
|
||||
for (const part of content) {
|
||||
if (part && typeof part === "object" && part.type === "text") {
|
||||
if (typeof part.text === "string") out.push(part.text);
|
||||
}
|
||||
}
|
||||
return out.join("");
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a "provider/modelId" string into [provider, modelId]. Returns null
|
||||
* when the input isn't in that shape. We require the explicit
|
||||
* provider/model form so reflection never accidentally picks a model from
|
||||
* the wrong provider (e.g. anthropic claude-sonnet-4-6 when the operator
|
||||
* meant google-gemini-cli/gemini-3-pro-preview).
|
||||
*/
|
||||
function parseModelString(input) {
|
||||
if (typeof input !== "string") return null;
|
||||
const slash = input.indexOf("/");
|
||||
if (slash <= 0 || slash === input.length - 1) return null;
|
||||
return [input.slice(0, slash), input.slice(slash + 1)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a "provider/modelId" string to a Model object via
|
||||
* @singularity-forge/ai's getModel. Returns null when the model isn't
|
||||
* registered.
|
||||
*
|
||||
* The gemini CLI's `-p` flag takes a prompt and `Appended to input on
|
||||
* stdin (if any)` — so the giant rendered template goes via stdin and -p
|
||||
* carries a tiny directive that orients the model to the contract.
|
||||
* Why getModel and not MODELS directly: the static MODELS catalog is
|
||||
* imported into models.ts but not re-exported from the package's public
|
||||
* API. getModel is the canonical accessor and pulls from the runtime
|
||||
* model-registry which already merges generated MODELS + custom MODELS.
|
||||
*/
|
||||
async function resolveModel(providerModelString) {
|
||||
const parsed = parseModelString(providerModelString);
|
||||
if (!parsed) return null;
|
||||
const [provider, modelId] = parsed;
|
||||
const ai = await import("@singularity-forge/ai");
|
||||
if (typeof ai.getModel !== "function") return null;
|
||||
try {
|
||||
const model = ai.getModel(provider, modelId);
|
||||
return model ?? null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a reflection pass against the given prompt.
|
||||
*
|
||||
* Returns { ok, content?, exitCode?, error? }. Best-effort; never throws.
|
||||
* Provider-agnostic: routes through @singularity-forge/ai's completeSimple,
|
||||
* which dispatches via the same provider registry SF uses for every other
|
||||
* LLM call (anthropic, openai, google-gemini-cli, openai-codex-responses,
|
||||
* mistral, etc.). The model is chosen by the operator's --model flag
|
||||
* (which arrives via options.model) and resolved against the @singularity-
|
||||
* forge/ai MODELS catalog. No subprocess, no hardcoded provider in the
|
||||
* call path.
|
||||
*
|
||||
* options.model — "provider/modelId" string (e.g. "google-gemini-cli/
|
||||
* gemini-3-pro-preview", "anthropic/claude-sonnet-4-6"). Defaults to
|
||||
* DEFAULT_REFLECTION_MODEL if unset.
|
||||
*
|
||||
* options.complete — dependency injection for tests. When supplied, it is
|
||||
* called instead of the imported completeSimple, with the same (model,
|
||||
* context, options) signature.
|
||||
*
|
||||
* Returns { ok, content?, cleanFinish?, error? }. Best-effort; never throws.
|
||||
*
|
||||
* Consumer: headless-reflect operator surface (--run flag).
|
||||
*/
|
||||
export async function runGeminiReflection(prompt, options = {}) {
|
||||
const { spawn } = await import("node:child_process");
|
||||
const model = options.model ?? "gemini-3-pro-preview";
|
||||
export async function runReflection(prompt, options = {}) {
|
||||
const modelString = options.model ?? DEFAULT_REFLECTION_MODEL;
|
||||
const timeoutMs = options.timeoutMs ?? 8 * 60 * 1000;
|
||||
return await new Promise((resolve) => {
|
||||
let stdoutBuf = "";
|
||||
let stderrBuf = "";
|
||||
let settled = false;
|
||||
const finish = (result) => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
resolve(result);
|
||||
|
||||
let model;
|
||||
try {
|
||||
model = await resolveModel(modelString);
|
||||
} catch (err) {
|
||||
return {
|
||||
ok: false,
|
||||
error: `failed to load model catalog: ${err instanceof Error ? err.message : String(err)}`,
|
||||
};
|
||||
let proc;
|
||||
}
|
||||
if (!model) {
|
||||
return {
|
||||
ok: false,
|
||||
error: `unknown model "${modelString}" — expected "provider/modelId" with a model registered in @singularity-forge/ai MODELS`,
|
||||
};
|
||||
}
|
||||
|
||||
// completeSimple takes a Context (systemPrompt + messages). The rendered
|
||||
// reflection template already contains both the system-style contract
|
||||
// (working-directory + reasoning protocol + output contract) and the
|
||||
// corpus brief, so we send it as a single user message rather than
|
||||
// splitting it — the prompt template is designed as one self-contained
|
||||
// brief for the LLM.
|
||||
const context = {
|
||||
systemPrompt: undefined,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [{ type: "text", text: prompt }],
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const completeFn =
|
||||
options.complete ??
|
||||
(await (async () => {
|
||||
const ai = await import("@singularity-forge/ai");
|
||||
return ai.completeSimple;
|
||||
})());
|
||||
|
||||
const callPromise = (async () => {
|
||||
try {
|
||||
proc = spawn(
|
||||
"gemini",
|
||||
["--yolo", "--model", model, "-p", "Run the reflection pass per the contract in the prompt below."],
|
||||
{ stdio: ["pipe", "pipe", "pipe"] },
|
||||
);
|
||||
const message = await completeFn(model, context, {});
|
||||
const content = extractAssistantText(message);
|
||||
return {
|
||||
ok: true,
|
||||
content,
|
||||
cleanFinish: content.includes(REFLECTION_TERMINATOR),
|
||||
provider: model.provider,
|
||||
modelId: model.id,
|
||||
};
|
||||
} catch (err) {
|
||||
finish({
|
||||
return {
|
||||
ok: false,
|
||||
error: `gemini spawn failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||
});
|
||||
return;
|
||||
error: `provider call failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||
provider: model.provider,
|
||||
modelId: model.id,
|
||||
};
|
||||
}
|
||||
const timer = setTimeout(() => {
|
||||
try {
|
||||
proc.kill("SIGTERM");
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
finish({
|
||||
})();
|
||||
|
||||
const timeoutPromise = new Promise((resolve) => {
|
||||
setTimeout(() => {
|
||||
resolve({
|
||||
ok: false,
|
||||
error: `gemini timed out after ${timeoutMs}ms`,
|
||||
stderr: stderrBuf,
|
||||
error: `reflection call timed out after ${timeoutMs}ms`,
|
||||
provider: model.provider,
|
||||
modelId: model.id,
|
||||
});
|
||||
}, timeoutMs);
|
||||
proc.stdout.on("data", (chunk) => {
|
||||
stdoutBuf += chunk.toString("utf-8");
|
||||
});
|
||||
proc.stderr.on("data", (chunk) => {
|
||||
stderrBuf += chunk.toString("utf-8");
|
||||
});
|
||||
proc.on("error", (err) => {
|
||||
clearTimeout(timer);
|
||||
finish({
|
||||
ok: false,
|
||||
error: `gemini process error: ${err.message}`,
|
||||
stderr: stderrBuf,
|
||||
});
|
||||
});
|
||||
proc.on("close", (code) => {
|
||||
clearTimeout(timer);
|
||||
if (code !== 0) {
|
||||
finish({
|
||||
ok: false,
|
||||
exitCode: code,
|
||||
error: `gemini exited with code ${code}`,
|
||||
stderr: stderrBuf,
|
||||
content: stdoutBuf,
|
||||
});
|
||||
return;
|
||||
}
|
||||
finish({
|
||||
ok: true,
|
||||
exitCode: 0,
|
||||
content: stdoutBuf,
|
||||
cleanFinish: stdoutBuf.includes(REFLECTION_TERMINATOR),
|
||||
});
|
||||
});
|
||||
try {
|
||||
proc.stdin.write(prompt);
|
||||
proc.stdin.end();
|
||||
} catch (err) {
|
||||
clearTimeout(timer);
|
||||
finish({
|
||||
ok: false,
|
||||
error: `gemini stdin write failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return Promise.race([callPromise, timeoutPromise]);
|
||||
}
|
||||
|
||||
// Back-compat alias: callers wired against the old spawn-based name still
|
||||
// work, but they pick up the new provider-agnostic implementation. New code
|
||||
// should use runReflection directly.
|
||||
export const runGeminiReflection = runReflection;
|
||||
|
|
|
|||
|
|
@ -261,124 +261,105 @@ describe("writeReflectionReport", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe("runGeminiReflection", () => {
|
||||
describe("runReflection", () => {
|
||||
function fakeAssistantMessage(text) {
|
||||
return {
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text }],
|
||||
api: "google-gemini-cli",
|
||||
provider: "google-gemini-cli",
|
||||
model: "gemini-3-pro-preview",
|
||||
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
}
|
||||
|
||||
test("returns ok with content + cleanFinish when terminator present", async () => {
|
||||
const { runGeminiReflection } = await import("../reflection.js");
|
||||
// Stub gemini binary by spawning a node script that prints a fake
|
||||
// response and exits 0. We invoke it via a wrapper PATH override —
|
||||
// but the simplest approach: directly call spawn-replacement via
|
||||
// a process.env.PATH prepend to a tmp dir containing a fake gemini.
|
||||
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-"));
|
||||
tmpDirs.push(fakeBinDir);
|
||||
const fakeGeminiPath = join(fakeBinDir, "gemini");
|
||||
writeFileSync(
|
||||
fakeGeminiPath,
|
||||
`#!/usr/bin/env node
|
||||
let stdin = "";
|
||||
process.stdin.on("data", (c) => { stdin += c; });
|
||||
process.stdin.on("end", () => {
|
||||
process.stdout.write("# Reflection\\nReceived " + stdin.length + " chars\\nREFLECTION_COMPLETE\\n");
|
||||
process.exit(0);
|
||||
});
|
||||
`,
|
||||
);
|
||||
// Make it executable
|
||||
const { chmodSync } = await import("node:fs");
|
||||
chmodSync(fakeGeminiPath, 0o755);
|
||||
|
||||
const originalPath = process.env.PATH;
|
||||
process.env.PATH = `${fakeBinDir}:${originalPath}`;
|
||||
try {
|
||||
const result = await runGeminiReflection("a".repeat(100), {
|
||||
timeoutMs: 5000,
|
||||
});
|
||||
expect(result.ok).toBe(true);
|
||||
expect(result.content).toContain("Received 100 chars");
|
||||
expect(result.cleanFinish).toBe(true);
|
||||
} finally {
|
||||
process.env.PATH = originalPath;
|
||||
}
|
||||
const { runReflection } = await import("../reflection.js");
|
||||
const result = await runReflection("a".repeat(100), {
|
||||
complete: async (_model, context) => {
|
||||
const userText = context.messages[0].content[0].text;
|
||||
return fakeAssistantMessage(
|
||||
`# Reflection\nReceived ${userText.length} chars\nREFLECTION_COMPLETE\n`,
|
||||
);
|
||||
},
|
||||
timeoutMs: 5000,
|
||||
});
|
||||
expect(result.ok).toBe(true);
|
||||
expect(result.content).toContain("Received 100 chars");
|
||||
expect(result.cleanFinish).toBe(true);
|
||||
expect(result.provider).toBeTruthy();
|
||||
});
|
||||
|
||||
test("returns ok=false on non-zero exit", async () => {
|
||||
const { runGeminiReflection } = await import("../reflection.js");
|
||||
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-fail-"));
|
||||
tmpDirs.push(fakeBinDir);
|
||||
const fakeGeminiPath = join(fakeBinDir, "gemini");
|
||||
writeFileSync(
|
||||
fakeGeminiPath,
|
||||
`#!/usr/bin/env node
|
||||
process.stderr.write("synthetic fail\\n");
|
||||
process.exit(2);
|
||||
`,
|
||||
);
|
||||
const { chmodSync } = await import("node:fs");
|
||||
chmodSync(fakeGeminiPath, 0o755);
|
||||
|
||||
const originalPath = process.env.PATH;
|
||||
process.env.PATH = `${fakeBinDir}:${originalPath}`;
|
||||
try {
|
||||
const result = await runGeminiReflection("ignored", {
|
||||
timeoutMs: 5000,
|
||||
});
|
||||
expect(result.ok).toBe(false);
|
||||
expect(result.exitCode).toBe(2);
|
||||
expect(result.error).toMatch(/exited with code 2/);
|
||||
} finally {
|
||||
process.env.PATH = originalPath;
|
||||
}
|
||||
test("returns ok=false when the provider throws", async () => {
|
||||
const { runReflection } = await import("../reflection.js");
|
||||
const result = await runReflection("ignored", {
|
||||
complete: async () => {
|
||||
throw new Error("synthetic upstream failure");
|
||||
},
|
||||
timeoutMs: 5000,
|
||||
});
|
||||
expect(result.ok).toBe(false);
|
||||
expect(result.error).toMatch(/synthetic upstream failure/);
|
||||
});
|
||||
|
||||
test("returns ok=false with timeout error when subprocess hangs", async () => {
|
||||
const { runGeminiReflection } = await import("../reflection.js");
|
||||
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-hang-"));
|
||||
tmpDirs.push(fakeBinDir);
|
||||
const fakeGeminiPath = join(fakeBinDir, "gemini");
|
||||
writeFileSync(
|
||||
fakeGeminiPath,
|
||||
`#!/usr/bin/env node
|
||||
setInterval(() => {}, 1000); // hang
|
||||
`,
|
||||
);
|
||||
const { chmodSync } = await import("node:fs");
|
||||
chmodSync(fakeGeminiPath, 0o755);
|
||||
|
||||
const originalPath = process.env.PATH;
|
||||
process.env.PATH = `${fakeBinDir}:${originalPath}`;
|
||||
try {
|
||||
const result = await runGeminiReflection("ignored", {
|
||||
timeoutMs: 200,
|
||||
});
|
||||
expect(result.ok).toBe(false);
|
||||
expect(result.error).toMatch(/timed out/);
|
||||
} finally {
|
||||
process.env.PATH = originalPath;
|
||||
}
|
||||
test("returns ok=false with timeout error when the call hangs", async () => {
|
||||
const { runReflection } = await import("../reflection.js");
|
||||
const result = await runReflection("ignored", {
|
||||
complete: () =>
|
||||
new Promise(() => {
|
||||
/* never resolves */
|
||||
}),
|
||||
timeoutMs: 200,
|
||||
});
|
||||
expect(result.ok).toBe(false);
|
||||
expect(result.error).toMatch(/timed out/);
|
||||
});
|
||||
|
||||
test("flags cleanFinish=false when terminator absent", async () => {
|
||||
const { runGeminiReflection } = await import("../reflection.js");
|
||||
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-trunc-"));
|
||||
tmpDirs.push(fakeBinDir);
|
||||
const fakeGeminiPath = join(fakeBinDir, "gemini");
|
||||
writeFileSync(
|
||||
fakeGeminiPath,
|
||||
`#!/usr/bin/env node
|
||||
process.stdout.write("# Truncated\\nNo terminator here.\\n");
|
||||
process.exit(0);
|
||||
`,
|
||||
);
|
||||
const { chmodSync } = await import("node:fs");
|
||||
chmodSync(fakeGeminiPath, 0o755);
|
||||
const { runReflection } = await import("../reflection.js");
|
||||
const result = await runReflection("ignored", {
|
||||
complete: async () =>
|
||||
fakeAssistantMessage("# Truncated\nNo terminator here.\n"),
|
||||
timeoutMs: 5000,
|
||||
});
|
||||
expect(result.ok).toBe(true);
|
||||
expect(result.cleanFinish).toBe(false);
|
||||
});
|
||||
|
||||
const originalPath = process.env.PATH;
|
||||
process.env.PATH = `${fakeBinDir}:${originalPath}`;
|
||||
try {
|
||||
const result = await runGeminiReflection("ignored", { timeoutMs: 5000 });
|
||||
expect(result.ok).toBe(true);
|
||||
expect(result.cleanFinish).toBe(false);
|
||||
} finally {
|
||||
process.env.PATH = originalPath;
|
||||
}
|
||||
test("rejects model strings that aren't provider/modelId shaped", async () => {
|
||||
const { runReflection } = await import("../reflection.js");
|
||||
const result = await runReflection("ignored", {
|
||||
model: "bare-model-name-no-slash",
|
||||
complete: async () => fakeAssistantMessage("never called"),
|
||||
timeoutMs: 5000,
|
||||
});
|
||||
expect(result.ok).toBe(false);
|
||||
expect(result.error).toMatch(/unknown model/);
|
||||
});
|
||||
|
||||
test("concatenates multi-text-content responses", async () => {
|
||||
const { runReflection } = await import("../reflection.js");
|
||||
const result = await runReflection("ignored", {
|
||||
complete: async () => ({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "text", text: "part1\n" },
|
||||
{ type: "thinking", text: "internal — should be ignored" },
|
||||
{ type: "text", text: "part2\nREFLECTION_COMPLETE\n" },
|
||||
],
|
||||
api: "google-gemini-cli",
|
||||
provider: "google-gemini-cli",
|
||||
model: "gemini-3-pro-preview",
|
||||
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
}),
|
||||
timeoutMs: 5000,
|
||||
});
|
||||
expect(result.ok).toBe(true);
|
||||
expect(result.content).toBe("part1\npart2\nREFLECTION_COMPLETE\n");
|
||||
expect(result.cleanFinish).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue