feat(reflection): wire LLM dispatch (sf headless reflect --run)
Phase 1B of the reflection layer: complete the operator-driven loop by
adding actual LLM dispatch. Phase 1A (commit e161a59e2) shipped the
corpus assembler + prompt template + the prompt-emit operator surface.
This commit wires the dispatch end so `sf headless reflect --run`
produces a real report on disk without manual model piping.
Why shell-out to the gemini CLI and not SF's provider abstraction:
reflection is a single-prompt one-shot inference. Going through SF's
full agent dispatch would require a session, model registry, tool
registration, recovery shell — overkill for "render this prompt,
capture text." The gemini CLI handles auth (~/.gemini/oauth_creds.json),
Code Assist project discovery, and protocol drift on its behalf.
Subprocess cost is paid once per reflection (rare).
Implementation:
- reflection.js: runGeminiReflection(prompt, options) spawns
`gemini --yolo --model <model> -p "<directive>"` and pipes the giant
rendered template via stdin (gemini -p reads stdin and appends).
Returns { ok, content, cleanFinish, exitCode, error, stderr }; never
throws. Defaults to gemini-3-pro-preview (0% used on AI Ultra,
strongest agentic model with quota). 8-minute timeout.
cleanFinish detected by REFLECTION_COMPLETE terminator (emitted by
the prompt template's output contract) — operator gets a warning when
the report is truncated.
- headless-reflect.ts: --run flag triggers dispatch + report write
via writeReflectionReport. --model overrides the default. Errors
surface as JSON or text per --json. Successful runs emit the report
path on stdout; failures emit error + truncated stderr.
- help-text.ts: documents --run and --model flags.
- Tests (4 new, 13 total): use a fake `gemini` binary on PATH to
exercise the spawn path without real OAuth/network — covers
ok+cleanFinish, non-zero exit, hang/timeout, missing-terminator.
All 1538 SF extension tests pass; typecheck clean.
Phase 2 follow-up (still gated on sf-mp4rxkwb-l4baga
triage-not-a-first-class-unit-type landing): reflection-pass becomes a
real autonomous-loop unit type, milestone-close auto-triggers it, the
report's `Recommended new self-feedback entries` section gets parsed
and the entries auto-filed.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e161a59e2f
commit
62b19d7ba4
5 changed files with 296 additions and 3 deletions
|
|
@ -52,6 +52,8 @@ function sfExtensionPath(moduleName: string): string {
|
|||
|
||||
export interface HandleReflectOptions {
|
||||
json?: boolean;
|
||||
run?: boolean;
|
||||
model?: string;
|
||||
}
|
||||
|
||||
export interface HandleReflectResult {
|
||||
|
|
@ -89,6 +91,18 @@ export async function handleReflect(
|
|||
let mod: {
|
||||
assembleReflectionCorpus: (basePath: string) => unknown;
|
||||
renderReflectionCorpusBrief: (corpus: unknown) => string;
|
||||
writeReflectionReport: (basePath: string, content: string) => string | null;
|
||||
runGeminiReflection: (
|
||||
prompt: string,
|
||||
options?: { model?: string; timeoutMs?: number },
|
||||
) => Promise<{
|
||||
ok: boolean;
|
||||
content?: string;
|
||||
error?: string;
|
||||
cleanFinish?: boolean;
|
||||
stderr?: string;
|
||||
exitCode?: number;
|
||||
}>;
|
||||
};
|
||||
try {
|
||||
mod = (await jiti.import(sfExtensionPath("reflection"))) as typeof mod;
|
||||
|
|
@ -149,6 +163,47 @@ export async function handleReflect(
|
|||
// rendered prompt to pipe into a model. The full template path runs
|
||||
// inside SF when reflection-pass becomes a real unit type.
|
||||
const rendered = promptTemplate.replace("{{corpus}}", brief);
|
||||
process.stdout.write(`${rendered}\n`);
|
||||
|
||||
if (!options.run) {
|
||||
process.stdout.write(`${rendered}\n`);
|
||||
return { exitCode: 0 };
|
||||
}
|
||||
|
||||
// --run: dispatch the rendered prompt to gemini-cli, capture the report,
|
||||
// persist to .sf/reflection/<ts>-report.md, emit the report path on stdout.
|
||||
process.stderr.write("[reflect] dispatching to gemini-cli (this can take a few minutes)…\n");
|
||||
const result = await mod.runGeminiReflection(rendered, {
|
||||
model: options.model,
|
||||
});
|
||||
if (!result.ok) {
|
||||
const payload = {
|
||||
ok: false,
|
||||
error: result.error ?? "unknown gemini error",
|
||||
exitCode: result.exitCode,
|
||||
stderrTail: (result.stderr ?? "").slice(-500),
|
||||
};
|
||||
process.stdout.write(
|
||||
options.json
|
||||
? `${JSON.stringify(payload)}\n`
|
||||
: `[reflect] failed: ${payload.error}\n`,
|
||||
);
|
||||
return { exitCode: 1 };
|
||||
}
|
||||
const reportPath = mod.writeReflectionReport(cwd, result.content ?? "");
|
||||
const payload = {
|
||||
ok: true,
|
||||
reportPath,
|
||||
cleanFinish: result.cleanFinish === true,
|
||||
};
|
||||
if (options.json) {
|
||||
process.stdout.write(`${JSON.stringify(payload)}\n`);
|
||||
} else {
|
||||
process.stdout.write(`Reflection report written to: ${reportPath}\n`);
|
||||
if (!result.cleanFinish) {
|
||||
process.stderr.write(
|
||||
"[reflect] WARNING: report did not include REFLECTION_COMPLETE terminator — output may be truncated\n",
|
||||
);
|
||||
}
|
||||
}
|
||||
return { exitCode: 0 };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -831,8 +831,18 @@ async function runHeadlessOnce(
|
|||
// reflection unit is a separate follow-up.
|
||||
if (options.command === "reflect") {
|
||||
const wantsJson = options.json || options.commandArgs.includes("--json");
|
||||
const wantsRun = options.commandArgs.includes("--run");
|
||||
const modelIdx = options.commandArgs.indexOf("--model");
|
||||
const model =
|
||||
modelIdx >= 0 && modelIdx + 1 < options.commandArgs.length
|
||||
? options.commandArgs[modelIdx + 1]
|
||||
: undefined;
|
||||
const { handleReflect } = await import("./headless-reflect.js");
|
||||
const result = await handleReflect(process.cwd(), { json: wantsJson });
|
||||
const result = await handleReflect(process.cwd(), {
|
||||
json: wantsJson,
|
||||
run: wantsRun,
|
||||
model,
|
||||
});
|
||||
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -224,7 +224,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
|
|||
" new-milestone Create a milestone from a specification document",
|
||||
" query Machine snapshot: JSON state + next dispatch + costs (no LLM)",
|
||||
" usage Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)",
|
||||
" reflect Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw)",
|
||||
" reflect Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw, --run to dispatch to gemini-cli, --model <id> to override)",
|
||||
"",
|
||||
"new-milestone flags:",
|
||||
" --context <path> Path to spec/PRD file (use '-' for stdin)",
|
||||
|
|
|
|||
|
|
@ -344,3 +344,109 @@ export function writeReflectionReport(basePath, content) {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
const REFLECTION_TERMINATOR = "REFLECTION_COMPLETE";
|
||||
|
||||
/**
|
||||
* Spawn the gemini CLI to run a reflection pass against the given prompt.
|
||||
*
|
||||
* Why shell-out and not SF's provider abstraction: the reflection pass is a
|
||||
* single-prompt one-shot inference. Going through SF's full agent dispatch
|
||||
* would require a session, model registry, tool registration, recovery
|
||||
* shell — all overkill for "render this prompt, capture text". The gemini
|
||||
* CLI handles auth via OAuth (~/.gemini/oauth_creds.json), Code Assist
|
||||
* project discovery, and protocol drift on its behalf. We pay the cost of
|
||||
* spawning a subprocess once per reflection, which is rare.
|
||||
*
|
||||
* The gemini CLI's `-p` flag takes a prompt and `Appended to input on
|
||||
* stdin (if any)` — so the giant rendered template goes via stdin and -p
|
||||
* carries a tiny directive that orients the model to the contract.
|
||||
*
|
||||
* Returns { ok, content?, exitCode?, error? }. Best-effort; never throws.
|
||||
*
|
||||
* Consumer: headless-reflect operator surface (--run flag).
|
||||
*/
|
||||
export async function runGeminiReflection(prompt, options = {}) {
|
||||
const { spawn } = await import("node:child_process");
|
||||
const model = options.model ?? "gemini-3-pro-preview";
|
||||
const timeoutMs = options.timeoutMs ?? 8 * 60 * 1000;
|
||||
return await new Promise((resolve) => {
|
||||
let stdoutBuf = "";
|
||||
let stderrBuf = "";
|
||||
let settled = false;
|
||||
const finish = (result) => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
resolve(result);
|
||||
};
|
||||
let proc;
|
||||
try {
|
||||
proc = spawn(
|
||||
"gemini",
|
||||
["--yolo", "--model", model, "-p", "Run the reflection pass per the contract in the prompt below."],
|
||||
{ stdio: ["pipe", "pipe", "pipe"] },
|
||||
);
|
||||
} catch (err) {
|
||||
finish({
|
||||
ok: false,
|
||||
error: `gemini spawn failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||
});
|
||||
return;
|
||||
}
|
||||
const timer = setTimeout(() => {
|
||||
try {
|
||||
proc.kill("SIGTERM");
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
finish({
|
||||
ok: false,
|
||||
error: `gemini timed out after ${timeoutMs}ms`,
|
||||
stderr: stderrBuf,
|
||||
});
|
||||
}, timeoutMs);
|
||||
proc.stdout.on("data", (chunk) => {
|
||||
stdoutBuf += chunk.toString("utf-8");
|
||||
});
|
||||
proc.stderr.on("data", (chunk) => {
|
||||
stderrBuf += chunk.toString("utf-8");
|
||||
});
|
||||
proc.on("error", (err) => {
|
||||
clearTimeout(timer);
|
||||
finish({
|
||||
ok: false,
|
||||
error: `gemini process error: ${err.message}`,
|
||||
stderr: stderrBuf,
|
||||
});
|
||||
});
|
||||
proc.on("close", (code) => {
|
||||
clearTimeout(timer);
|
||||
if (code !== 0) {
|
||||
finish({
|
||||
ok: false,
|
||||
exitCode: code,
|
||||
error: `gemini exited with code ${code}`,
|
||||
stderr: stderrBuf,
|
||||
content: stdoutBuf,
|
||||
});
|
||||
return;
|
||||
}
|
||||
finish({
|
||||
ok: true,
|
||||
exitCode: 0,
|
||||
content: stdoutBuf,
|
||||
cleanFinish: stdoutBuf.includes(REFLECTION_TERMINATOR),
|
||||
});
|
||||
});
|
||||
try {
|
||||
proc.stdin.write(prompt);
|
||||
proc.stdin.end();
|
||||
} catch (err) {
|
||||
clearTimeout(timer);
|
||||
finish({
|
||||
ok: false,
|
||||
error: `gemini stdin write failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -260,3 +260,125 @@ describe("writeReflectionReport", () => {
|
|||
expect(corpus.previousReport.content).toContain("Test reflection");
|
||||
});
|
||||
});
|
||||
|
||||
describe("runGeminiReflection", () => {
|
||||
test("returns ok with content + cleanFinish when terminator present", async () => {
|
||||
const { runGeminiReflection } = await import("../reflection.js");
|
||||
// Stub gemini binary by spawning a node script that prints a fake
|
||||
// response and exits 0. We invoke it via a wrapper PATH override —
|
||||
// but the simplest approach: directly call spawn-replacement via
|
||||
// a process.env.PATH prepend to a tmp dir containing a fake gemini.
|
||||
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-"));
|
||||
tmpDirs.push(fakeBinDir);
|
||||
const fakeGeminiPath = join(fakeBinDir, "gemini");
|
||||
writeFileSync(
|
||||
fakeGeminiPath,
|
||||
`#!/usr/bin/env node
|
||||
let stdin = "";
|
||||
process.stdin.on("data", (c) => { stdin += c; });
|
||||
process.stdin.on("end", () => {
|
||||
process.stdout.write("# Reflection\\nReceived " + stdin.length + " chars\\nREFLECTION_COMPLETE\\n");
|
||||
process.exit(0);
|
||||
});
|
||||
`,
|
||||
);
|
||||
// Make it executable
|
||||
const { chmodSync } = await import("node:fs");
|
||||
chmodSync(fakeGeminiPath, 0o755);
|
||||
|
||||
const originalPath = process.env.PATH;
|
||||
process.env.PATH = `${fakeBinDir}:${originalPath}`;
|
||||
try {
|
||||
const result = await runGeminiReflection("a".repeat(100), {
|
||||
timeoutMs: 5000,
|
||||
});
|
||||
expect(result.ok).toBe(true);
|
||||
expect(result.content).toContain("Received 100 chars");
|
||||
expect(result.cleanFinish).toBe(true);
|
||||
} finally {
|
||||
process.env.PATH = originalPath;
|
||||
}
|
||||
});
|
||||
|
||||
test("returns ok=false on non-zero exit", async () => {
|
||||
const { runGeminiReflection } = await import("../reflection.js");
|
||||
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-fail-"));
|
||||
tmpDirs.push(fakeBinDir);
|
||||
const fakeGeminiPath = join(fakeBinDir, "gemini");
|
||||
writeFileSync(
|
||||
fakeGeminiPath,
|
||||
`#!/usr/bin/env node
|
||||
process.stderr.write("synthetic fail\\n");
|
||||
process.exit(2);
|
||||
`,
|
||||
);
|
||||
const { chmodSync } = await import("node:fs");
|
||||
chmodSync(fakeGeminiPath, 0o755);
|
||||
|
||||
const originalPath = process.env.PATH;
|
||||
process.env.PATH = `${fakeBinDir}:${originalPath}`;
|
||||
try {
|
||||
const result = await runGeminiReflection("ignored", {
|
||||
timeoutMs: 5000,
|
||||
});
|
||||
expect(result.ok).toBe(false);
|
||||
expect(result.exitCode).toBe(2);
|
||||
expect(result.error).toMatch(/exited with code 2/);
|
||||
} finally {
|
||||
process.env.PATH = originalPath;
|
||||
}
|
||||
});
|
||||
|
||||
test("returns ok=false with timeout error when subprocess hangs", async () => {
|
||||
const { runGeminiReflection } = await import("../reflection.js");
|
||||
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-hang-"));
|
||||
tmpDirs.push(fakeBinDir);
|
||||
const fakeGeminiPath = join(fakeBinDir, "gemini");
|
||||
writeFileSync(
|
||||
fakeGeminiPath,
|
||||
`#!/usr/bin/env node
|
||||
setInterval(() => {}, 1000); // hang
|
||||
`,
|
||||
);
|
||||
const { chmodSync } = await import("node:fs");
|
||||
chmodSync(fakeGeminiPath, 0o755);
|
||||
|
||||
const originalPath = process.env.PATH;
|
||||
process.env.PATH = `${fakeBinDir}:${originalPath}`;
|
||||
try {
|
||||
const result = await runGeminiReflection("ignored", {
|
||||
timeoutMs: 200,
|
||||
});
|
||||
expect(result.ok).toBe(false);
|
||||
expect(result.error).toMatch(/timed out/);
|
||||
} finally {
|
||||
process.env.PATH = originalPath;
|
||||
}
|
||||
});
|
||||
|
||||
test("flags cleanFinish=false when terminator absent", async () => {
|
||||
const { runGeminiReflection } = await import("../reflection.js");
|
||||
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-trunc-"));
|
||||
tmpDirs.push(fakeBinDir);
|
||||
const fakeGeminiPath = join(fakeBinDir, "gemini");
|
||||
writeFileSync(
|
||||
fakeGeminiPath,
|
||||
`#!/usr/bin/env node
|
||||
process.stdout.write("# Truncated\\nNo terminator here.\\n");
|
||||
process.exit(0);
|
||||
`,
|
||||
);
|
||||
const { chmodSync } = await import("node:fs");
|
||||
chmodSync(fakeGeminiPath, 0o755);
|
||||
|
||||
const originalPath = process.env.PATH;
|
||||
process.env.PATH = `${fakeBinDir}:${originalPath}`;
|
||||
try {
|
||||
const result = await runGeminiReflection("ignored", { timeoutMs: 5000 });
|
||||
expect(result.ok).toBe(true);
|
||||
expect(result.cleanFinish).toBe(false);
|
||||
} finally {
|
||||
process.env.PATH = originalPath;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue