feat(reflection): wire LLM dispatch (sf headless reflect --run)
Phase 1B of the reflection layer: complete the operator-driven loop by
adding actual LLM dispatch. Phase 1A (commit e161a59e2) shipped the
corpus assembler + prompt template + the prompt-emit operator surface.
This commit wires the dispatch end so `sf headless reflect --run`
produces a real report on disk without manual model piping.
Why shell-out to the gemini CLI and not SF's provider abstraction:
reflection is a single-prompt one-shot inference. Going through SF's
full agent dispatch would require a session, model registry, tool
registration, recovery shell — overkill for "render this prompt,
capture text." The gemini CLI handles auth (~/.gemini/oauth_creds.json),
Code Assist project discovery, and protocol drift on its behalf.
Subprocess cost is paid once per reflection (rare).
Implementation:
- reflection.js: runGeminiReflection(prompt, options) spawns
`gemini --yolo --model <model> -p "<directive>"` and pipes the giant
rendered template via stdin (gemini -p reads stdin and appends).
Returns { ok, content, cleanFinish, exitCode, error, stderr }; never
throws. Defaults to gemini-3-pro-preview (0% used on AI Ultra,
strongest agentic model with quota). 8-minute timeout.
cleanFinish detected by REFLECTION_COMPLETE terminator (emitted by
the prompt template's output contract) — operator gets a warning when
the report is truncated.
- headless-reflect.ts: --run flag triggers dispatch + report write
via writeReflectionReport. --model overrides the default. Errors
surface as JSON or text per --json. Successful runs emit the report
path on stdout; failures emit error + truncated stderr.
- help-text.ts: documents --run and --model flags.
- Tests (4 new, 13 total): use a fake `gemini` binary on PATH to
exercise the spawn path without real OAuth/network — covers
ok+cleanFinish, non-zero exit, hang/timeout, missing-terminator.
All 1538 SF extension tests pass; typecheck clean.
Phase 2 follow-up (still gated on sf-mp4rxkwb-l4baga
triage-not-a-first-class-unit-type landing): reflection-pass becomes a
real autonomous-loop unit type, milestone-close auto-triggers it, the
report's `Recommended new self-feedback entries` section gets parsed
and the entries auto-filed.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e161a59e2f
commit
62b19d7ba4
5 changed files with 296 additions and 3 deletions
|
|
@ -52,6 +52,8 @@ function sfExtensionPath(moduleName: string): string {
|
||||||
|
|
||||||
export interface HandleReflectOptions {
|
export interface HandleReflectOptions {
|
||||||
json?: boolean;
|
json?: boolean;
|
||||||
|
run?: boolean;
|
||||||
|
model?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface HandleReflectResult {
|
export interface HandleReflectResult {
|
||||||
|
|
@ -89,6 +91,18 @@ export async function handleReflect(
|
||||||
let mod: {
|
let mod: {
|
||||||
assembleReflectionCorpus: (basePath: string) => unknown;
|
assembleReflectionCorpus: (basePath: string) => unknown;
|
||||||
renderReflectionCorpusBrief: (corpus: unknown) => string;
|
renderReflectionCorpusBrief: (corpus: unknown) => string;
|
||||||
|
writeReflectionReport: (basePath: string, content: string) => string | null;
|
||||||
|
runGeminiReflection: (
|
||||||
|
prompt: string,
|
||||||
|
options?: { model?: string; timeoutMs?: number },
|
||||||
|
) => Promise<{
|
||||||
|
ok: boolean;
|
||||||
|
content?: string;
|
||||||
|
error?: string;
|
||||||
|
cleanFinish?: boolean;
|
||||||
|
stderr?: string;
|
||||||
|
exitCode?: number;
|
||||||
|
}>;
|
||||||
};
|
};
|
||||||
try {
|
try {
|
||||||
mod = (await jiti.import(sfExtensionPath("reflection"))) as typeof mod;
|
mod = (await jiti.import(sfExtensionPath("reflection"))) as typeof mod;
|
||||||
|
|
@ -149,6 +163,47 @@ export async function handleReflect(
|
||||||
// rendered prompt to pipe into a model. The full template path runs
|
// rendered prompt to pipe into a model. The full template path runs
|
||||||
// inside SF when reflection-pass becomes a real unit type.
|
// inside SF when reflection-pass becomes a real unit type.
|
||||||
const rendered = promptTemplate.replace("{{corpus}}", brief);
|
const rendered = promptTemplate.replace("{{corpus}}", brief);
|
||||||
process.stdout.write(`${rendered}\n`);
|
|
||||||
|
if (!options.run) {
|
||||||
|
process.stdout.write(`${rendered}\n`);
|
||||||
|
return { exitCode: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
// --run: dispatch the rendered prompt to gemini-cli, capture the report,
|
||||||
|
// persist to .sf/reflection/<ts>-report.md, emit the report path on stdout.
|
||||||
|
process.stderr.write("[reflect] dispatching to gemini-cli (this can take a few minutes)…\n");
|
||||||
|
const result = await mod.runGeminiReflection(rendered, {
|
||||||
|
model: options.model,
|
||||||
|
});
|
||||||
|
if (!result.ok) {
|
||||||
|
const payload = {
|
||||||
|
ok: false,
|
||||||
|
error: result.error ?? "unknown gemini error",
|
||||||
|
exitCode: result.exitCode,
|
||||||
|
stderrTail: (result.stderr ?? "").slice(-500),
|
||||||
|
};
|
||||||
|
process.stdout.write(
|
||||||
|
options.json
|
||||||
|
? `${JSON.stringify(payload)}\n`
|
||||||
|
: `[reflect] failed: ${payload.error}\n`,
|
||||||
|
);
|
||||||
|
return { exitCode: 1 };
|
||||||
|
}
|
||||||
|
const reportPath = mod.writeReflectionReport(cwd, result.content ?? "");
|
||||||
|
const payload = {
|
||||||
|
ok: true,
|
||||||
|
reportPath,
|
||||||
|
cleanFinish: result.cleanFinish === true,
|
||||||
|
};
|
||||||
|
if (options.json) {
|
||||||
|
process.stdout.write(`${JSON.stringify(payload)}\n`);
|
||||||
|
} else {
|
||||||
|
process.stdout.write(`Reflection report written to: ${reportPath}\n`);
|
||||||
|
if (!result.cleanFinish) {
|
||||||
|
process.stderr.write(
|
||||||
|
"[reflect] WARNING: report did not include REFLECTION_COMPLETE terminator — output may be truncated\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
return { exitCode: 0 };
|
return { exitCode: 0 };
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -831,8 +831,18 @@ async function runHeadlessOnce(
|
||||||
// reflection unit is a separate follow-up.
|
// reflection unit is a separate follow-up.
|
||||||
if (options.command === "reflect") {
|
if (options.command === "reflect") {
|
||||||
const wantsJson = options.json || options.commandArgs.includes("--json");
|
const wantsJson = options.json || options.commandArgs.includes("--json");
|
||||||
|
const wantsRun = options.commandArgs.includes("--run");
|
||||||
|
const modelIdx = options.commandArgs.indexOf("--model");
|
||||||
|
const model =
|
||||||
|
modelIdx >= 0 && modelIdx + 1 < options.commandArgs.length
|
||||||
|
? options.commandArgs[modelIdx + 1]
|
||||||
|
: undefined;
|
||||||
const { handleReflect } = await import("./headless-reflect.js");
|
const { handleReflect } = await import("./headless-reflect.js");
|
||||||
const result = await handleReflect(process.cwd(), { json: wantsJson });
|
const result = await handleReflect(process.cwd(), {
|
||||||
|
json: wantsJson,
|
||||||
|
run: wantsRun,
|
||||||
|
model,
|
||||||
|
});
|
||||||
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
|
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -224,7 +224,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
|
||||||
" new-milestone Create a milestone from a specification document",
|
" new-milestone Create a milestone from a specification document",
|
||||||
" query Machine snapshot: JSON state + next dispatch + costs (no LLM)",
|
" query Machine snapshot: JSON state + next dispatch + costs (no LLM)",
|
||||||
" usage Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)",
|
" usage Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)",
|
||||||
" reflect Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw)",
|
" reflect Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw, --run to dispatch to gemini-cli, --model <id> to override)",
|
||||||
"",
|
"",
|
||||||
"new-milestone flags:",
|
"new-milestone flags:",
|
||||||
" --context <path> Path to spec/PRD file (use '-' for stdin)",
|
" --context <path> Path to spec/PRD file (use '-' for stdin)",
|
||||||
|
|
|
||||||
|
|
@ -344,3 +344,109 @@ export function writeReflectionReport(basePath, content) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const REFLECTION_TERMINATOR = "REFLECTION_COMPLETE";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spawn the gemini CLI to run a reflection pass against the given prompt.
|
||||||
|
*
|
||||||
|
* Why shell-out and not SF's provider abstraction: the reflection pass is a
|
||||||
|
* single-prompt one-shot inference. Going through SF's full agent dispatch
|
||||||
|
* would require a session, model registry, tool registration, recovery
|
||||||
|
* shell — all overkill for "render this prompt, capture text". The gemini
|
||||||
|
* CLI handles auth via OAuth (~/.gemini/oauth_creds.json), Code Assist
|
||||||
|
* project discovery, and protocol drift on its behalf. We pay the cost of
|
||||||
|
* spawning a subprocess once per reflection, which is rare.
|
||||||
|
*
|
||||||
|
* The gemini CLI's `-p` flag takes a prompt and `Appended to input on
|
||||||
|
* stdin (if any)` — so the giant rendered template goes via stdin and -p
|
||||||
|
* carries a tiny directive that orients the model to the contract.
|
||||||
|
*
|
||||||
|
* Returns { ok, content?, exitCode?, error? }. Best-effort; never throws.
|
||||||
|
*
|
||||||
|
* Consumer: headless-reflect operator surface (--run flag).
|
||||||
|
*/
|
||||||
|
export async function runGeminiReflection(prompt, options = {}) {
|
||||||
|
const { spawn } = await import("node:child_process");
|
||||||
|
const model = options.model ?? "gemini-3-pro-preview";
|
||||||
|
const timeoutMs = options.timeoutMs ?? 8 * 60 * 1000;
|
||||||
|
return await new Promise((resolve) => {
|
||||||
|
let stdoutBuf = "";
|
||||||
|
let stderrBuf = "";
|
||||||
|
let settled = false;
|
||||||
|
const finish = (result) => {
|
||||||
|
if (settled) return;
|
||||||
|
settled = true;
|
||||||
|
resolve(result);
|
||||||
|
};
|
||||||
|
let proc;
|
||||||
|
try {
|
||||||
|
proc = spawn(
|
||||||
|
"gemini",
|
||||||
|
["--yolo", "--model", model, "-p", "Run the reflection pass per the contract in the prompt below."],
|
||||||
|
{ stdio: ["pipe", "pipe", "pipe"] },
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
finish({
|
||||||
|
ok: false,
|
||||||
|
error: `gemini spawn failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const timer = setTimeout(() => {
|
||||||
|
try {
|
||||||
|
proc.kill("SIGTERM");
|
||||||
|
} catch {
|
||||||
|
/* ignore */
|
||||||
|
}
|
||||||
|
finish({
|
||||||
|
ok: false,
|
||||||
|
error: `gemini timed out after ${timeoutMs}ms`,
|
||||||
|
stderr: stderrBuf,
|
||||||
|
});
|
||||||
|
}, timeoutMs);
|
||||||
|
proc.stdout.on("data", (chunk) => {
|
||||||
|
stdoutBuf += chunk.toString("utf-8");
|
||||||
|
});
|
||||||
|
proc.stderr.on("data", (chunk) => {
|
||||||
|
stderrBuf += chunk.toString("utf-8");
|
||||||
|
});
|
||||||
|
proc.on("error", (err) => {
|
||||||
|
clearTimeout(timer);
|
||||||
|
finish({
|
||||||
|
ok: false,
|
||||||
|
error: `gemini process error: ${err.message}`,
|
||||||
|
stderr: stderrBuf,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
proc.on("close", (code) => {
|
||||||
|
clearTimeout(timer);
|
||||||
|
if (code !== 0) {
|
||||||
|
finish({
|
||||||
|
ok: false,
|
||||||
|
exitCode: code,
|
||||||
|
error: `gemini exited with code ${code}`,
|
||||||
|
stderr: stderrBuf,
|
||||||
|
content: stdoutBuf,
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
finish({
|
||||||
|
ok: true,
|
||||||
|
exitCode: 0,
|
||||||
|
content: stdoutBuf,
|
||||||
|
cleanFinish: stdoutBuf.includes(REFLECTION_TERMINATOR),
|
||||||
|
});
|
||||||
|
});
|
||||||
|
try {
|
||||||
|
proc.stdin.write(prompt);
|
||||||
|
proc.stdin.end();
|
||||||
|
} catch (err) {
|
||||||
|
clearTimeout(timer);
|
||||||
|
finish({
|
||||||
|
ok: false,
|
||||||
|
error: `gemini stdin write failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -260,3 +260,125 @@ describe("writeReflectionReport", () => {
|
||||||
expect(corpus.previousReport.content).toContain("Test reflection");
|
expect(corpus.previousReport.content).toContain("Test reflection");
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("runGeminiReflection", () => {
|
||||||
|
test("returns ok with content + cleanFinish when terminator present", async () => {
|
||||||
|
const { runGeminiReflection } = await import("../reflection.js");
|
||||||
|
// Stub gemini binary by spawning a node script that prints a fake
|
||||||
|
// response and exits 0. We invoke it via a wrapper PATH override —
|
||||||
|
// but the simplest approach: directly call spawn-replacement via
|
||||||
|
// a process.env.PATH prepend to a tmp dir containing a fake gemini.
|
||||||
|
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-"));
|
||||||
|
tmpDirs.push(fakeBinDir);
|
||||||
|
const fakeGeminiPath = join(fakeBinDir, "gemini");
|
||||||
|
writeFileSync(
|
||||||
|
fakeGeminiPath,
|
||||||
|
`#!/usr/bin/env node
|
||||||
|
let stdin = "";
|
||||||
|
process.stdin.on("data", (c) => { stdin += c; });
|
||||||
|
process.stdin.on("end", () => {
|
||||||
|
process.stdout.write("# Reflection\\nReceived " + stdin.length + " chars\\nREFLECTION_COMPLETE\\n");
|
||||||
|
process.exit(0);
|
||||||
|
});
|
||||||
|
`,
|
||||||
|
);
|
||||||
|
// Make it executable
|
||||||
|
const { chmodSync } = await import("node:fs");
|
||||||
|
chmodSync(fakeGeminiPath, 0o755);
|
||||||
|
|
||||||
|
const originalPath = process.env.PATH;
|
||||||
|
process.env.PATH = `${fakeBinDir}:${originalPath}`;
|
||||||
|
try {
|
||||||
|
const result = await runGeminiReflection("a".repeat(100), {
|
||||||
|
timeoutMs: 5000,
|
||||||
|
});
|
||||||
|
expect(result.ok).toBe(true);
|
||||||
|
expect(result.content).toContain("Received 100 chars");
|
||||||
|
expect(result.cleanFinish).toBe(true);
|
||||||
|
} finally {
|
||||||
|
process.env.PATH = originalPath;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("returns ok=false on non-zero exit", async () => {
|
||||||
|
const { runGeminiReflection } = await import("../reflection.js");
|
||||||
|
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-fail-"));
|
||||||
|
tmpDirs.push(fakeBinDir);
|
||||||
|
const fakeGeminiPath = join(fakeBinDir, "gemini");
|
||||||
|
writeFileSync(
|
||||||
|
fakeGeminiPath,
|
||||||
|
`#!/usr/bin/env node
|
||||||
|
process.stderr.write("synthetic fail\\n");
|
||||||
|
process.exit(2);
|
||||||
|
`,
|
||||||
|
);
|
||||||
|
const { chmodSync } = await import("node:fs");
|
||||||
|
chmodSync(fakeGeminiPath, 0o755);
|
||||||
|
|
||||||
|
const originalPath = process.env.PATH;
|
||||||
|
process.env.PATH = `${fakeBinDir}:${originalPath}`;
|
||||||
|
try {
|
||||||
|
const result = await runGeminiReflection("ignored", {
|
||||||
|
timeoutMs: 5000,
|
||||||
|
});
|
||||||
|
expect(result.ok).toBe(false);
|
||||||
|
expect(result.exitCode).toBe(2);
|
||||||
|
expect(result.error).toMatch(/exited with code 2/);
|
||||||
|
} finally {
|
||||||
|
process.env.PATH = originalPath;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("returns ok=false with timeout error when subprocess hangs", async () => {
|
||||||
|
const { runGeminiReflection } = await import("../reflection.js");
|
||||||
|
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-hang-"));
|
||||||
|
tmpDirs.push(fakeBinDir);
|
||||||
|
const fakeGeminiPath = join(fakeBinDir, "gemini");
|
||||||
|
writeFileSync(
|
||||||
|
fakeGeminiPath,
|
||||||
|
`#!/usr/bin/env node
|
||||||
|
setInterval(() => {}, 1000); // hang
|
||||||
|
`,
|
||||||
|
);
|
||||||
|
const { chmodSync } = await import("node:fs");
|
||||||
|
chmodSync(fakeGeminiPath, 0o755);
|
||||||
|
|
||||||
|
const originalPath = process.env.PATH;
|
||||||
|
process.env.PATH = `${fakeBinDir}:${originalPath}`;
|
||||||
|
try {
|
||||||
|
const result = await runGeminiReflection("ignored", {
|
||||||
|
timeoutMs: 200,
|
||||||
|
});
|
||||||
|
expect(result.ok).toBe(false);
|
||||||
|
expect(result.error).toMatch(/timed out/);
|
||||||
|
} finally {
|
||||||
|
process.env.PATH = originalPath;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("flags cleanFinish=false when terminator absent", async () => {
|
||||||
|
const { runGeminiReflection } = await import("../reflection.js");
|
||||||
|
const fakeBinDir = mkdtempSync(join(tmpdir(), "fake-gemini-trunc-"));
|
||||||
|
tmpDirs.push(fakeBinDir);
|
||||||
|
const fakeGeminiPath = join(fakeBinDir, "gemini");
|
||||||
|
writeFileSync(
|
||||||
|
fakeGeminiPath,
|
||||||
|
`#!/usr/bin/env node
|
||||||
|
process.stdout.write("# Truncated\\nNo terminator here.\\n");
|
||||||
|
process.exit(0);
|
||||||
|
`,
|
||||||
|
);
|
||||||
|
const { chmodSync } = await import("node:fs");
|
||||||
|
chmodSync(fakeGeminiPath, 0o755);
|
||||||
|
|
||||||
|
const originalPath = process.env.PATH;
|
||||||
|
process.env.PATH = `${fakeBinDir}:${originalPath}`;
|
||||||
|
try {
|
||||||
|
const result = await runGeminiReflection("ignored", { timeoutMs: 5000 });
|
||||||
|
expect(result.ok).toBe(true);
|
||||||
|
expect(result.cleanFinish).toBe(false);
|
||||||
|
} finally {
|
||||||
|
process.env.PATH = originalPath;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue