feat(reflection): wire LLM dispatch (sf headless reflect --run)

Phase 1B of the reflection layer: complete the operator-driven loop by
adding actual LLM dispatch. Phase 1A (commit e161a59e2) shipped the
corpus assembler + prompt template + the prompt-emit operator surface.
This commit wires the dispatch end so `sf headless reflect --run`
produces a real report on disk without manual model piping.

Why shell-out to the gemini CLI and not SF's provider abstraction:
reflection is a single-prompt one-shot inference. Going through SF's
full agent dispatch would require a session, model registry, tool
registration, recovery shell — overkill for "render this prompt,
capture text." The gemini CLI handles auth (~/.gemini/oauth_creds.json),
Code Assist project discovery, and protocol drift on its behalf.
Subprocess cost is paid once per reflection (rare).

Implementation:

- reflection.js: runGeminiReflection(prompt, options) spawns
  `gemini --yolo --model <model> -p "<directive>"` and pipes the giant
  rendered template via stdin (gemini -p reads stdin and appends).
  Returns { ok, content, cleanFinish, exitCode, error, stderr }; never
  throws. Defaults to gemini-3-pro-preview (0% used on AI Ultra,
  strongest agentic model with quota). 8-minute timeout.

  cleanFinish detected by REFLECTION_COMPLETE terminator (emitted by
  the prompt template's output contract) — operator gets a warning when
  the report is truncated.

- headless-reflect.ts: --run flag triggers dispatch + report write
  via writeReflectionReport. --model overrides the default. Errors
  surface as JSON or text per --json. Successful runs emit the report
  path on stdout; failures emit error + truncated stderr.

- help-text.ts: documents --run and --model flags.

- Tests (4 new, 13 total): use a fake `gemini` binary on PATH to
  exercise the spawn path without real OAuth/network — covers
  ok+cleanFinish, non-zero exit, hang/timeout, missing-terminator.

All 1538 SF extension tests pass; typecheck clean.

Phase 2 follow-up (still gated on sf-mp4rxkwb-l4baga
triage-not-a-first-class-unit-type landing): reflection-pass becomes a
real autonomous-loop unit type, milestone-close auto-triggers it, the
report's `Recommended new self-feedback entries` section gets parsed
and the entries auto-filed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-14 04:33:16 +02:00
parent e161a59e2f
commit 62b19d7ba4
5 changed files with 296 additions and 3 deletions

View file

@ -52,6 +52,8 @@ function sfExtensionPath(moduleName: string): string {
export interface HandleReflectOptions {
json?: boolean;
run?: boolean;
model?: string;
}
export interface HandleReflectResult {
@ -89,6 +91,18 @@ export async function handleReflect(
let mod: {
assembleReflectionCorpus: (basePath: string) => unknown;
renderReflectionCorpusBrief: (corpus: unknown) => string;
writeReflectionReport: (basePath: string, content: string) => string | null;
runGeminiReflection: (
prompt: string,
options?: { model?: string; timeoutMs?: number },
) => Promise<{
ok: boolean;
content?: string;
error?: string;
cleanFinish?: boolean;
stderr?: string;
exitCode?: number;
}>;
};
try {
mod = (await jiti.import(sfExtensionPath("reflection"))) as typeof mod;
@ -149,6 +163,47 @@ export async function handleReflect(
// rendered prompt to pipe into a model. The full template path runs
// inside SF when reflection-pass becomes a real unit type.
const rendered = promptTemplate.replace("{{corpus}}", brief);
process.stdout.write(`${rendered}\n`);
if (!options.run) {
process.stdout.write(`${rendered}\n`);
return { exitCode: 0 };
}
// --run: dispatch the rendered prompt to gemini-cli, capture the report,
// persist to .sf/reflection/<ts>-report.md, emit the report path on stdout.
process.stderr.write("[reflect] dispatching to gemini-cli (this can take a few minutes)…\n");
const result = await mod.runGeminiReflection(rendered, {
model: options.model,
});
if (!result.ok) {
const payload = {
ok: false,
error: result.error ?? "unknown gemini error",
exitCode: result.exitCode,
stderrTail: (result.stderr ?? "").slice(-500),
};
process.stdout.write(
options.json
? `${JSON.stringify(payload)}\n`
: `[reflect] failed: ${payload.error}\n`,
);
return { exitCode: 1 };
}
const reportPath = mod.writeReflectionReport(cwd, result.content ?? "");
const payload = {
ok: true,
reportPath,
cleanFinish: result.cleanFinish === true,
};
if (options.json) {
process.stdout.write(`${JSON.stringify(payload)}\n`);
} else {
process.stdout.write(`Reflection report written to: ${reportPath}\n`);
if (!result.cleanFinish) {
process.stderr.write(
"[reflect] WARNING: report did not include REFLECTION_COMPLETE terminator — output may be truncated\n",
);
}
}
return { exitCode: 0 };
}

View file

@ -831,8 +831,18 @@ async function runHeadlessOnce(
// reflection unit is a separate follow-up.
if (options.command === "reflect") {
const wantsJson = options.json || options.commandArgs.includes("--json");
const wantsRun = options.commandArgs.includes("--run");
const modelIdx = options.commandArgs.indexOf("--model");
const model =
modelIdx >= 0 && modelIdx + 1 < options.commandArgs.length
? options.commandArgs[modelIdx + 1]
: undefined;
const { handleReflect } = await import("./headless-reflect.js");
const result = await handleReflect(process.cwd(), { json: wantsJson });
const result = await handleReflect(process.cwd(), {
json: wantsJson,
run: wantsRun,
model,
});
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
}

View file

@ -224,7 +224,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
" new-milestone Create a milestone from a specification document",
" query Machine snapshot: JSON state + next dispatch + costs (no LLM)",
" usage Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)",
" reflect Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw)",
" reflect Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw, --run to dispatch to gemini-cli, --model <id> to override)",
"",
"new-milestone flags:",
" --context <path> Path to spec/PRD file (use '-' for stdin)",

View file

@ -344,3 +344,109 @@ export function writeReflectionReport(basePath, content) {
return null;
}
}
const REFLECTION_TERMINATOR = "REFLECTION_COMPLETE";
/**
* Spawn the gemini CLI to run a reflection pass against the given prompt.
*
* Why shell-out and not SF's provider abstraction: the reflection pass is a
* single-prompt one-shot inference. Going through SF's full agent dispatch
* would require a session, model registry, tool registration, recovery
* shell all overkill for "render this prompt, capture text". The gemini
* CLI handles auth via OAuth (~/.gemini/oauth_creds.json), Code Assist
* project discovery, and protocol drift on its behalf. We pay the cost of
* spawning a subprocess once per reflection, which is rare.
*
* The gemini CLI's `-p` flag takes a prompt and `Appended to input on
* stdin (if any)` — so the giant rendered template goes via stdin and -p
* carries a tiny directive that orients the model to the contract.
*
* Returns { ok, content?, exitCode?, error? }. Best-effort; never throws.
*
* Consumer: headless-reflect operator surface (--run flag).
*/
export async function runGeminiReflection(prompt, options = {}) {
const { spawn } = await import("node:child_process");
const model = options.model ?? "gemini-3-pro-preview";
const timeoutMs = options.timeoutMs ?? 8 * 60 * 1000;
return await new Promise((resolve) => {
let stdoutBuf = "";
let stderrBuf = "";
let settled = false;
const finish = (result) => {
if (settled) return;
settled = true;
resolve(result);
};
let proc;
try {
proc = spawn(
"gemini",
["--yolo", "--model", model, "-p", "Run the reflection pass per the contract in the prompt below."],
{ stdio: ["pipe", "pipe", "pipe"] },
);
} catch (err) {
finish({
ok: false,
error: `gemini spawn failed: ${err instanceof Error ? err.message : String(err)}`,
});
return;
}
const timer = setTimeout(() => {
try {
proc.kill("SIGTERM");
} catch {
/* ignore */
}
finish({
ok: false,
error: `gemini timed out after ${timeoutMs}ms`,
stderr: stderrBuf,
});
}, timeoutMs);
proc.stdout.on("data", (chunk) => {
stdoutBuf += chunk.toString("utf-8");
});
proc.stderr.on("data", (chunk) => {
stderrBuf += chunk.toString("utf-8");
});
proc.on("error", (err) => {
clearTimeout(timer);
finish({
ok: false,
error: `gemini process error: ${err.message}`,
stderr: stderrBuf,
});
});
proc.on("close", (code) => {
clearTimeout(timer);
if (code !== 0) {
finish({
ok: false,
exitCode: code,
error: `gemini exited with code ${code}`,
stderr: stderrBuf,
content: stdoutBuf,
});
return;
}
finish({
ok: true,
exitCode: 0,
content: stdoutBuf,
cleanFinish: stdoutBuf.includes(REFLECTION_TERMINATOR),
});
});
try {
proc.stdin.write(prompt);
proc.stdin.end();
} catch (err) {
clearTimeout(timer);
finish({
ok: false,
error: `gemini stdin write failed: ${err instanceof Error ? err.message : String(err)}`,
});
}
});
}

View file

@ -260,3 +260,125 @@ describe("writeReflectionReport", () => {
expect(corpus.previousReport.content).toContain("Test reflection");
});
});
describe("runGeminiReflection", () => {
test("returns ok with content + cleanFinish when terminator present", async () => {
const { runGeminiReflection } = await import("../reflection.js");
// Stub gemini binary by spawning a node script that prints a fake
// response and exits 0. We invoke it via a wrapper PATH override —
// but the simplest approach: directly call spawn-replacement via
// a process.env.PATH prepend to a tmp dir containing a fake gemini.
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-"));
tmpDirs.push(fakeBinDir);
const fakeGeminiPath = join(fakeBinDir, "gemini");
writeFileSync(
fakeGeminiPath,
`#!/usr/bin/env node
let stdin = "";
process.stdin.on("data", (c) => { stdin += c; });
process.stdin.on("end", () => {
process.stdout.write("# Reflection\\nReceived " + stdin.length + " chars\\nREFLECTION_COMPLETE\\n");
process.exit(0);
});
`,
);
// Make it executable
const { chmodSync } = await import("node:fs");
chmodSync(fakeGeminiPath, 0o755);
const originalPath = process.env.PATH;
process.env.PATH = `${fakeBinDir}:${originalPath}`;
try {
const result = await runGeminiReflection("a".repeat(100), {
timeoutMs: 5000,
});
expect(result.ok).toBe(true);
expect(result.content).toContain("Received 100 chars");
expect(result.cleanFinish).toBe(true);
} finally {
process.env.PATH = originalPath;
}
});
test("returns ok=false on non-zero exit", async () => {
const { runGeminiReflection } = await import("../reflection.js");
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-fail-"));
tmpDirs.push(fakeBinDir);
const fakeGeminiPath = join(fakeBinDir, "gemini");
writeFileSync(
fakeGeminiPath,
`#!/usr/bin/env node
process.stderr.write("synthetic fail\\n");
process.exit(2);
`,
);
const { chmodSync } = await import("node:fs");
chmodSync(fakeGeminiPath, 0o755);
const originalPath = process.env.PATH;
process.env.PATH = `${fakeBinDir}:${originalPath}`;
try {
const result = await runGeminiReflection("ignored", {
timeoutMs: 5000,
});
expect(result.ok).toBe(false);
expect(result.exitCode).toBe(2);
expect(result.error).toMatch(/exited with code 2/);
} finally {
process.env.PATH = originalPath;
}
});
test("returns ok=false with timeout error when subprocess hangs", async () => {
const { runGeminiReflection } = await import("../reflection.js");
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-hang-"));
tmpDirs.push(fakeBinDir);
const fakeGeminiPath = join(fakeBinDir, "gemini");
writeFileSync(
fakeGeminiPath,
`#!/usr/bin/env node
setInterval(() => {}, 1000); // hang
`,
);
const { chmodSync } = await import("node:fs");
chmodSync(fakeGeminiPath, 0o755);
const originalPath = process.env.PATH;
process.env.PATH = `${fakeBinDir}:${originalPath}`;
try {
const result = await runGeminiReflection("ignored", {
timeoutMs: 200,
});
expect(result.ok).toBe(false);
expect(result.error).toMatch(/timed out/);
} finally {
process.env.PATH = originalPath;
}
});
test("flags cleanFinish=false when terminator absent", async () => {
const { runGeminiReflection } = await import("../reflection.js");
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-trunc-"));
tmpDirs.push(fakeBinDir);
const fakeGeminiPath = join(fakeBinDir, "gemini");
writeFileSync(
fakeGeminiPath,
`#!/usr/bin/env node
process.stdout.write("# Truncated\\nNo terminator here.\\n");
process.exit(0);
`,
);
const { chmodSync } = await import("node:fs");
chmodSync(fakeGeminiPath, 0o755);
const originalPath = process.env.PATH;
process.env.PATH = `${fakeBinDir}:${originalPath}`;
try {
const result = await runGeminiReflection("ignored", { timeoutMs: 5000 });
expect(result.ok).toBe(true);
expect(result.cleanFinish).toBe(false);
} finally {
process.env.PATH = originalPath;
}
});
});