From e2147c0694101756fe5a842043ad778cea5f9329 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sat, 25 Apr 2026 06:34:49 +0200 Subject: [PATCH] sf snapshot: pre-dispatch, uncommitted changes after 43m inactivity --- flake.nix | 2 + .../pi-coding-agent/src/core/system-prompt.ts | 36 +- .../interactive/components/tool-execution.ts | 94 +- .../tests/system-prompt-skill-filter.test.ts | 157 +++ scripts/postinstall.js | 12 +- sf-orchestrator/SKILL.md | 5 +- sf-orchestrator/workflows/build-from-spec.md | 16 +- sf-orchestrator/workflows/monitor-and-poll.md | 67 ++ sf-orchestrator/workflows/step-by-step.md | 17 +- src/headless.ts | 23 + src/resource-loader.ts | 38 +- .../claude-code-cli/stream-adapter.ts | 462 ++++++++- .../tests/stream-adapter.test.ts | 812 ++++++++++++++++ src/resources/extensions/sf/auto-dispatch.ts | 44 +- src/resources/extensions/sf/auto-prompts.ts | 917 +++++++++++++----- src/resources/extensions/sf/auto/phases.ts | 38 +- .../extensions/sf/bootstrap/write-gate.ts | 45 +- .../extensions/sf/commands-bootstrap.ts | 1 + .../extensions/sf/commands-codebase.ts | 37 +- .../extensions/sf/commands/catalog.ts | 1 + src/resources/extensions/sf/context-store.ts | 33 +- .../sf/docs/preferences-reference.md | 3 + src/resources/extensions/sf/guided-flow.ts | 9 +- .../sf/milestone-scope-classifier.ts | 302 ++++++ .../extensions/sf/prompt-cache-optimizer.ts | 4 + .../extensions/sf/prompts/discuss-headless.md | 2 + .../extensions/sf/prompts/discuss.md | 9 + .../extensions/sf/prompts/doctor-heal.md | 7 +- .../sf/prompts/guided-discuss-milestone.md | 9 +- .../sf/prompts/guided-discuss-slice.md | 9 +- src/resources/extensions/sf/prompts/system.md | 13 + .../extensions/sf/tests/context-store.test.ts | 79 ++ .../state-machine-edge-cases.test.ts | 21 + .../extensions/sf/tests/knowledge.test.ts | 94 +- .../tests/milestone-scope-classifier.test.ts | 188 ++++ .../sf/tests/prompt-cache-optimizer.test.ts | 12 + .../sf/tests/prompt-contracts.test.ts | 40 + .../extensions/sf/tests/write-gate.test.ts | 24 + src/resources/extensions/sf/uok/plan-v2.ts | 18 +- .../resource-loader-content-hash.test.ts | 83 ++ 40 files changed, 3485 insertions(+), 298 deletions(-) create mode 100644 packages/pi-coding-agent/src/tests/system-prompt-skill-filter.test.ts create mode 100644 src/resources/extensions/sf/milestone-scope-classifier.ts create mode 100644 src/resources/extensions/sf/tests/milestone-scope-classifier.test.ts create mode 100644 src/tests/resource-loader-content-hash.test.ts diff --git a/flake.nix b/flake.nix index aeb3d9329..1e09f5242 100644 --- a/flake.nix +++ b/flake.nix @@ -24,6 +24,7 @@ clippy git nodejs_24 + protobuf rust-analyzer rustc rustfmt @@ -39,6 +40,7 @@ echo " bun : $(command -v bun)" echo " cargo: $(command -v cargo)" echo " node : $(command -v node)" + echo " protoc: $(command -v protoc)" echo " rustc: $(command -v rustc)" echo "" echo "Build native addon:" diff --git a/packages/pi-coding-agent/src/core/system-prompt.ts b/packages/pi-coding-agent/src/core/system-prompt.ts index f837ae349..be602844a 100644 --- a/packages/pi-coding-agent/src/core/system-prompt.ts +++ b/packages/pi-coding-agent/src/core/system-prompt.ts @@ -35,6 +35,26 @@ export interface BuildSystemPromptOptions { contextFiles?: Array<{ path: string; content: string }>; /** Pre-loaded skills. */ skills?: Skill[]; + /** + * Optional predicate applied to the `skills` list before rendering the + * catalog. Returning `false` omits a skill from the + * prompt (the skill remains loaded and invocable by name — only the + * catalog listing is suppressed). + * + * Intended for consumers that can narrow the relevant skill surface + * (e.g. per-unit-type manifests) to reduce cached system-prompt bloat. + * When omitted, all non-`disableModelInvocation` skills render — i.e. + * behavior is unchanged from before this option existed. + * + * Contract: the predicate must be **pure and synchronous**. It may be + * invoked on every system-prompt rebuild (tool-set changes and + * runtime resource-loader extensions both trigger one), so any state + * the closure captures should be stable across the rebuild window. + * If the predicate throws, `buildSystemPrompt` logs a warning and + * falls back to the unfiltered skill list — callers never see the + * exception and the session stays consistent. + */ + skillFilter?: (skill: Skill) => boolean; } /** Build the system prompt with tools, guidelines, and context */ @@ -48,6 +68,7 @@ export function buildSystemPrompt(options: BuildSystemPromptOptions = {}): strin cwd, contextFiles: providedContextFiles, skills: providedSkills, + skillFilter, } = options; const resolvedCwd = toPosixPath(cwd ?? process.cwd()); @@ -66,7 +87,20 @@ export function buildSystemPrompt(options: BuildSystemPromptOptions = {}): strin const appendSection = appendSystemPrompt ? `\n\n${appendSystemPrompt}` : ""; const contextFiles = providedContextFiles ?? []; - const skills = providedSkills ?? []; + const skillsBase = providedSkills ?? []; + let skills = skillsBase; + if (skillFilter) { + try { + skills = skillsBase.filter(skillFilter); + } catch (error) { + // A consumer's predicate threw. Fall back to the unfiltered list so + // the session stays consistent — callers (e.g. AgentSession.setTools) + // must not be left with updated tools but a stale system prompt. + const message = error instanceof Error ? error.message : String(error); + console.warn(`buildSystemPrompt: skillFilter threw; falling back to unfiltered skills. Error: ${message}`); + skills = skillsBase; + } + } if (customPrompt) { let prompt = customPrompt; diff --git a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts index 058d49335..e45330c70 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts @@ -65,6 +65,67 @@ function parseMcpToolName(name: string): { server: string; tool: string } | null return { server: rest.slice(0, delim), tool: rest.slice(delim + 2) }; } +/** + * Prettify a raw tool name for display. Prefers the registered `label` + * ("Complete Slice") when available; otherwise strips a leading `sf_` + * prefix and converts snake_case to Title Case. + */ +function prettifyToolName(name: string, label?: string): string { + if (label && label.trim().length > 0 && label !== name) return label; + const stripped = name.replace(/^sf_/, ""); + if (stripped.length === 0) return name; + return stripped + .split("_") + .map((word) => (word.length === 0 ? word : word[0].toUpperCase() + word.slice(1))) + .join(" "); +} + +type ToolFrameTone = "pending" | "success" | "error"; + +function trimOuterBlankLines(lines: string[]): string[] { + let start = 0; + let end = lines.length; + while (start < end && lines[start].trim().length === 0) start++; + while (end > start && lines[end - 1].trim().length === 0) end--; + return lines.slice(start, end); +} + +function renderToolFrame( + contentLines: string[], + width: number, + opts: { + label: string; + status: string; + tone: ToolFrameTone; + }, +): string[] { + const outerWidth = Math.max(20, width); + const contentWidth = Math.max(1, outerWidth - 2); // "│ " + content + + const borderColor = opts.tone === "error" ? "error" : "toolTitle"; + const topColor = opts.tone === "error" ? "error" : "toolTitle"; + const labelColor = opts.tone === "error" ? "error" : "toolTitle"; + const statusColor = opts.tone === "error" ? "error" : opts.tone === "pending" ? "warning" : "success"; + const border = (s: string) => theme.fg(borderColor, s); + + const leftStyled = theme.fg(labelColor, theme.bold(`• ${opts.label}`)); + const rightStyled = theme.fg(statusColor, opts.status); + const gap = Math.max(1, outerWidth - visibleWidth(leftStyled) - visibleWidth(rightStyled)); + const headerRow = `${leftStyled}${" ".repeat(gap)}${rightStyled}`; + const headerPad = Math.max(0, outerWidth - visibleWidth(headerRow)); + + const sourceLines = trimOuterBlankLines(contentLines); + const bodyLines = (sourceLines.length > 0 ? sourceLines : [""]).map((line) => { + const clipped = truncateToWidth(line, contentWidth, ""); + return border("│ ") + clipped; + }); + + return [ + theme.fg(topColor, "─".repeat(outerWidth)), + headerRow + " ".repeat(headerPad), + ...bodyLines, + ]; +} const COMPACT_ARG_VALUE_LIMIT = 60; const GENERIC_OUTPUT_PREVIEW_LINES = 10; const GENERIC_ARGS_JSON_PREVIEW_LINES = 10; @@ -83,15 +144,19 @@ function formatCompactArgs(args: unknown, expanded: boolean): string { const allPrimitive = entries.every(([, value]) => { const t = typeof value; - if (t === "number" || t === "boolean") return true; - if (t === "string") return (value as string).length <= COMPACT_ARG_VALUE_LIMIT; - return value == null; + return t === "number" || t === "boolean" || t === "string" || value == null; }); if (allPrimitive) { return entries .map(([key, value]) => { - if (typeof value === "string") return `${key}=${JSON.stringify(value)}`; + if (typeof value === "string") { + const truncated = + !expanded && value.length > COMPACT_ARG_VALUE_LIMIT + ? `${value.slice(0, COMPACT_ARG_VALUE_LIMIT - 1)}…` + : value; + return `${key}=${JSON.stringify(truncated)}`; + } if (value == null) return `${key}=null`; return `${key}=${String(value)}`; }) @@ -452,7 +517,22 @@ export class ToolExecutionComponent extends Container { if (this.hideComponent) { return []; } - return super.render(width); + const frameWidth = Math.max(20, width); + const contentWidth = Math.max(1, frameWidth - 4); + const lines = super.render(contentWidth); + const frameTone: ToolFrameTone = + this.result?.isError ? "error" : this.isPartial || !this.result ? "pending" : "success"; + const frameStatus = this.isPartial || !this.result ? "Running" : this.result.isError ? "Error" : "Done"; + const parsed = parseMcpToolName(this.toolName); + const frameLabel = parsed + ? `Tool ${parsed.server}·${parsed.tool}` + : `Tool ${prettifyToolName(this.toolName, this.toolDefinition?.label) || "unknown"}`; + const framed = renderToolFrame(lines, frameWidth, { + label: frameLabel, + status: frameStatus, + tone: frameTone, + }); + return framed.length > 0 ? ["", ...framed] : framed; } private updateDisplay(): void { @@ -1050,7 +1130,9 @@ export class ToolExecutionComponent extends Container { // cleanly. SF-registered MCP tools have already had their prefix // stripped upstream in partial-builder.ts and won't reach this branch. const parsed = parseMcpToolName(this.toolName); - const displayName = parsed ? parsed.tool : this.toolName; + const displayName = parsed + ? parsed.tool + : prettifyToolName(this.toolName, this.toolDefinition?.label); const serverPrefix = parsed ? theme.fg("muted", `${parsed.server}\u00b7`) : ""; text = serverPrefix + theme.fg("toolTitle", theme.bold(displayName)); diff --git a/packages/pi-coding-agent/src/tests/system-prompt-skill-filter.test.ts b/packages/pi-coding-agent/src/tests/system-prompt-skill-filter.test.ts new file mode 100644 index 000000000..afa652db4 --- /dev/null +++ b/packages/pi-coding-agent/src/tests/system-prompt-skill-filter.test.ts @@ -0,0 +1,157 @@ +// @gsd/pi-coding-agent + system-prompt-skill-filter.test — coverage for the +// optional `skillFilter` option added to buildSystemPrompt (RFC #4779). The +// filter lets consumers narrow the catalog rendered into +// the cached system prompt without touching skill loading or invocation. + +import test from "node:test"; +import assert from "node:assert/strict"; + +import { buildSystemPrompt } from "../core/system-prompt.js"; +import type { Skill } from "../core/skills.js"; + +function makeSkill(name: string, description = `description for ${name}`): Skill { + return { + name, + description, + filePath: `/tmp/${name}/SKILL.md`, + baseDir: `/tmp/${name}`, + source: "project", + disableModelInvocation: false, + }; +} + +function extractAvailableSkills(prompt: string): string { + const start = prompt.indexOf(""); + const end = prompt.indexOf(""); + if (start === -1 || end === -1) return ""; + return prompt.slice(start, end + "".length); +} + +// ─── Default branch (no customPrompt) ────────────────────────────────────── + +test("buildSystemPrompt: skillFilter omits filtered-out skills from ", () => { + const skills = [makeSkill("alpha"), makeSkill("beta"), makeSkill("gamma")]; + const prompt = buildSystemPrompt({ + skills, + selectedTools: ["read", "Skill"], + skillFilter: skill => skill.name !== "beta", + }); + + const section = extractAvailableSkills(prompt); + assert.ok(section.length > 0, "catalog section should render"); + assert.match(section, /alpha<\/name>/); + assert.match(section, /gamma<\/name>/); + assert.doesNotMatch(section, /beta<\/name>/); +}); + +test("buildSystemPrompt: skillFilter omitted preserves pre-filter behavior (all skills render)", () => { + const skills = [makeSkill("alpha"), makeSkill("beta")]; + const prompt = buildSystemPrompt({ + skills, + selectedTools: ["read", "Skill"], + }); + + const section = extractAvailableSkills(prompt); + assert.match(section, /alpha<\/name>/); + assert.match(section, /beta<\/name>/); +}); + +test("buildSystemPrompt: skillFilter that rejects every skill suppresses the block", () => { + const skills = [makeSkill("alpha"), makeSkill("beta")]; + const prompt = buildSystemPrompt({ + skills, + selectedTools: ["read", "Skill"], + skillFilter: () => false, + }); + + // With zero visible skills, formatSkillsForPrompt returns an empty string, + // so the opening tag should not appear anywhere. + assert.ok(!prompt.includes("")); +}); + +// ─── Custom-prompt branch ────────────────────────────────────────────────── + +test("buildSystemPrompt (customPrompt): skillFilter applies to the catalog appended onto a custom prompt", () => { + const skills = [makeSkill("alpha"), makeSkill("beta"), makeSkill("gamma")]; + const prompt = buildSystemPrompt({ + customPrompt: "CUSTOM BASE", + skills, + selectedTools: ["read", "Skill"], + skillFilter: skill => skill.name === "alpha", + }); + + const section = extractAvailableSkills(prompt); + assert.match(section, /alpha<\/name>/); + assert.doesNotMatch(section, /beta<\/name>/); + assert.doesNotMatch(section, /gamma<\/name>/); +}); + +// ─── Interaction with disableModelInvocation ────────────────────────────── + +test("buildSystemPrompt: skillFilter composes with disableModelInvocation (both must pass)", () => { + // A skill already hidden from the catalog by disableModelInvocation must + // remain hidden even if skillFilter would otherwise admit it. The filter + // narrows, it does not override the existing invisibility contract. + const skills: Skill[] = [ + { ...makeSkill("visible"), disableModelInvocation: false }, + { ...makeSkill("hidden"), disableModelInvocation: true }, + ]; + const prompt = buildSystemPrompt({ + skills, + selectedTools: ["read", "Skill"], + skillFilter: () => true, + }); + + const section = extractAvailableSkills(prompt); + assert.match(section, /visible<\/name>/); + assert.doesNotMatch(section, /hidden<\/name>/); +}); + +// ─── Pass-through of non-filtered fields ────────────────────────────────── + +test("buildSystemPrompt: skillFilter does not affect context files or cwd rendering", () => { + const skills = [makeSkill("alpha")]; + const prompt = buildSystemPrompt({ + skills, + cwd: "/tmp/example", + contextFiles: [{ path: "CLAUDE.md", content: "project instructions" }], + selectedTools: ["read", "Skill"], + skillFilter: () => false, + }); + + assert.ok(prompt.includes("/tmp/example"), "cwd should still render"); + assert.ok(prompt.includes("project instructions"), "context files should still render"); + assert.ok(!prompt.includes(""), "no skill catalog when filter rejects all"); +}); + +// ─── Exception safety ───────────────────────────────────────────────────── + +test("buildSystemPrompt: skillFilter that throws falls back to unfiltered list and does not propagate", (t) => { + // A buggy consumer predicate must not bubble out of buildSystemPrompt. + // If it did, _rebuildSystemPrompt could unwind mid-setTools() and leave + // the session with updated tools but a stale system prompt. + const skills = [makeSkill("alpha"), makeSkill("beta")]; + + // Suppress the console.warn the fallback emits so test output stays clean. + const originalWarn = console.warn; + const warnings: string[] = []; + console.warn = (...args: unknown[]) => { warnings.push(args.join(" ")); }; + t.after(() => { console.warn = originalWarn; }); + + let prompt = ""; + assert.doesNotThrow(() => { + prompt = buildSystemPrompt({ + skills, + selectedTools: ["read", "Skill"], + skillFilter: () => { throw new Error("consumer bug"); }, + }); + }); + + const section = extractAvailableSkills(prompt); + assert.match(section, /alpha<\/name>/, "alpha should render (fallback to unfiltered)"); + assert.match(section, /beta<\/name>/, "beta should render (fallback to unfiltered)"); + assert.ok( + warnings.some(w => w.includes("skillFilter threw") && w.includes("consumer bug")), + "fallback should emit an identifying warning", + ); +}); diff --git a/scripts/postinstall.js b/scripts/postinstall.js index 30817c889..021ff244b 100644 --- a/scripts/postinstall.js +++ b/scripts/postinstall.js @@ -167,12 +167,22 @@ async function ensureRtkInstalled() { throw new Error('downloaded RTK binary failed validation') } } catch (error) { - logWarn(`RTK install skipped: ${error instanceof Error ? error.message : String(error)}`) + logWarn(`RTK install skipped: ${describeFetchError(error)}`) } finally { rmSync(tempRoot, { recursive: true, force: true }) } } +function describeFetchError(err) { + const base = err?.message || String(err) + const cause = err?.cause + if (!cause) return base + const code = cause.code || cause.errno + const causeMsg = cause.message || '' + const detail = code ? `${code}${causeMsg && causeMsg !== code ? ` — ${causeMsg}` : ''}` : causeMsg + return detail ? `${base} (${detail})` : base +} + if (!PLAYWRIGHT_SKIP) { await run('npx playwright install chromium') } diff --git a/sf-orchestrator/SKILL.md b/sf-orchestrator/SKILL.md index 1475301ab..77ae157f9 100644 --- a/sf-orchestrator/SKILL.md +++ b/sf-orchestrator/SKILL.md @@ -52,7 +52,10 @@ Route based on what you need to do: Read `workflows/build-from-spec.md` — write spec, init directory, launch, monitor, verify. **Check on a running or completed build:** -Read `workflows/monitor-and-poll.md` — query state, interpret phases, handle blockers. +Read `workflows/monitor-and-poll.md` — query state, interpret phases, handle blockers, recover from crashes. + +**Intercept SF questions interactively (supervised mode):** +Read `workflows/monitor-and-poll.md#supervised-mode` — use when you want to handle SF's UI requests yourself instead of pre-supplying answers. **Execute with fine-grained control:** Read `workflows/step-by-step.md` — run one unit at a time with decision points. diff --git a/sf-orchestrator/workflows/build-from-spec.md b/sf-orchestrator/workflows/build-from-spec.md index 9552fa7b0..50241888a 100644 --- a/sf-orchestrator/workflows/build-from-spec.md +++ b/sf-orchestrator/workflows/build-from-spec.md @@ -63,8 +63,20 @@ EXIT=$? **With budget limit:** ```bash -# Use step-by-step mode with budget checks instead of auto -# See workflows/step-by-step.md +# Create the milestone, then run step-by-step with a budget cap +MAX_BUDGET=15.00 +sf headless --output-format json --context spec.md new-milestone 2>/dev/null +while true; do + RESULT=$(sf headless --output-format json next 2>/dev/null) + EXIT=$? + [ $EXIT -ne 0 ] && break + STATE=$(sf headless query) + PHASE=$(echo "$STATE" | jq -r '.state.phase') + COST=$(echo "$STATE" | jq -r '.cost.total') + [ "$PHASE" = "complete" ] && { echo "Done (\$$COST)"; break; } + OVER=$(echo "$COST > $MAX_BUDGET" | bc -l) + [ "$OVER" = "1" ] && { echo "Budget cap hit at \$$COST"; sf headless stop; break; } +done ``` **For CI or ecosystem runs (no user config):** diff --git a/sf-orchestrator/workflows/monitor-and-poll.md b/sf-orchestrator/workflows/monitor-and-poll.md index ffff137e4..5662f70c1 100644 --- a/sf-orchestrator/workflows/monitor-and-poll.md +++ b/sf-orchestrator/workflows/monitor-and-poll.md @@ -163,6 +163,73 @@ sf headless --output-format json auto 2>/dev/null sf headless --resume "$SESSION_ID" --output-format json auto 2>/dev/null ``` +## Supervised Mode + +Use `--supervised` when you want SF to ask you questions interactively rather than auto-answering or blocking. SF writes UI requests to stdout as JSONL; you respond via stdin. + +**When to use it:** You're the orchestrator running in a loop and want to intercept SF's questions yourself instead of pre-supplying an answers file. + +```bash +# Launch in supervised mode — SF will write extension_ui_request events to stdout +# and wait for your response on stdin before continuing +sf headless --supervised --json auto 2>/dev/null | while IFS= read -r line; do + TYPE=$(echo "$line" | jq -r '.type') + + if [ "$TYPE" = "extension_ui_request" ]; then + # SF is asking a question — inspect it and respond + TITLE=$(echo "$line" | jq -r '.title // .message // "?"') + OPTIONS=$(echo "$line" | jq -r '.options[]?.label // empty' | head -5) + echo "SF asks: $TITLE" >&2 + echo "Options: $OPTIONS" >&2 + + # Send your answer back on stdin (the option label or value) + echo "first_option" # replace with your selection logic + fi +done +``` + +`--response-timeout N` (default 30000ms) controls how long SF waits for your response before treating it as a timeout. If you don't respond in time, SF blocks with exit code 10. + +**Simpler alternative:** If you just want to pre-answer known questions without interactive handling, use `--answers ` instead — see `references/answer-injection.md`. + +## Crash Recovery + +When SF exits unexpectedly (crash, OOM, signal) or `.sf/` state looks corrupted: + +```bash +cd /path/to/project + +# 1. Check if the project directory is intact +ls .sf/ 2>/dev/null || { echo "No .sf/ — project state lost, start fresh"; exit 1; } + +# 2. Run doctor — detects and auto-fixes common state corruption +sf headless doctor + +# 3. Check what state SF thinks it's in +sf headless query | jq '{phase: .state.phase, next: .next}' + +# 4. If query fails (state unreadable), inspect STATE.md directly +cat .sf/STATE.md 2>/dev/null + +# 5. Resume from current state +sf headless --output-format json auto 2>/dev/null + +# 6. If a specific session was interrupted, resume by session ID +sf headless --resume "$SESSION_ID" --output-format json auto 2>/dev/null +``` + +**Common crash scenarios:** + +| Symptom | Cause | Fix | +|---------|-------|-----| +| `query` returns empty / parse error | `.sf/STATE.md` corrupted | Run `sf headless doctor` | +| Phase stuck at `advancing` | Slice summary write interrupted | Run `sf headless next` to retry | +| Phase stuck at `completing-milestone` | Milestone archive write interrupted | Run `sf headless dispatch complete` | +| Zombie `.sf/` lock file | Previous process killed mid-write | Run `sf headless doctor` | +| `exit 1` with no JSON output | SF itself crashed (OOM, signal) | Check system logs; resume with `--resume` | + +If `doctor` can't recover the state, the safest path is to read `.sf/milestones/*/ROADMAP.md` to see what completed, then start a new milestone for remaining work. + ## Reading Build Artifacts After completion, inspect what SF produced: diff --git a/sf-orchestrator/workflows/step-by-step.md b/sf-orchestrator/workflows/step-by-step.md index b9f9eb1e6..84579216c 100644 --- a/sf-orchestrator/workflows/step-by-step.md +++ b/sf-orchestrator/workflows/step-by-step.md @@ -47,16 +47,18 @@ while true; do ;; esac - # Check if milestone complete - CURRENT_PHASE=$(sf headless query | jq -r '.state.phase') + # One query — extract phase, cost, and progress together + STATE=$(sf headless query) + CURRENT_PHASE=$(echo "$STATE" | jq -r '.state.phase') + TOTAL_COST=$(echo "$STATE" | jq -r '.cost.total') + PROGRESS=$(echo "$STATE" | jq -r '"\(.state.progress.tasks.done)/\(.state.progress.tasks.total) tasks"') + if [ "$CURRENT_PHASE" = "complete" ]; then - TOTAL_COST=$(sf headless query | jq -r '.cost.total') echo "Milestone complete. Total cost: \$$TOTAL_COST" break fi # Budget check - TOTAL_COST=$(sf headless query | jq -r '.cost.total') OVER=$(echo "$TOTAL_COST > $MAX_BUDGET" | bc -l) if [ "$OVER" = "1" ]; then echo "Budget limit (\$$MAX_BUDGET) exceeded at \$$TOTAL_COST" @@ -64,8 +66,6 @@ while true; do break fi - # Progress report - PROGRESS=$(sf headless query | jq -r '"\(.state.progress.tasks.done)/\(.state.progress.tasks.total) tasks"') echo "Step done ($STATUS). Phase: $CURRENT_PHASE, Progress: $PROGRESS, Cost: \$$TOTAL_COST" done ``` @@ -105,8 +105,9 @@ while true; do [ $EXIT -ne 0 ] && break - PHASE=$(sf headless query | jq -r '.state.phase') - COST=$(sf headless query | jq -r '.cost.total') + STATE=$(sf headless query) + PHASE=$(echo "$STATE" | jq -r '.state.phase') + COST=$(echo "$STATE" | jq -r '.cost.total') echo "Step $STEP complete. Phase: $PHASE, Cost: \$$COST" diff --git a/src/headless.ts b/src/headless.ts index 33d8bfd78..f05f4c112 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -358,6 +358,29 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): return { exitCode: result.exitCode, interrupted: false } } + // Doctor: read-only health check, no RPC child needed (#4904 live-regression). + // The interactive `/sf doctor` command lives in the SF extension; this CLI + // path lets non-interactive callers (CI, recovery scripts, the live-regression + // suite) get the same diagnostic without a TTY. + if (options.command === 'doctor') { + const wantsJson = options.json || options.commandArgs.includes('--json') + const { runSFDoctor, formatDoctorReport, formatDoctorReportJson } = await import('./resources/extensions/sf/doctor.js') + let exitCode = 1 + try { + const report = await runSFDoctor(process.cwd()) + const out = wantsJson ? formatDoctorReportJson(report) : formatDoctorReport(report) + process.stdout.write(`${out}\n`) + exitCode = report.ok ? 0 : 1 + } catch (err) { + const msg = err instanceof Error ? err.message : String(err) + process.stderr.write(`[headless] doctor failed: ${msg}\n`) + exitCode = 1 + } + // Bypass the auto-restart loop in runHeadless — doctor is a one-shot + // diagnostic; exit 1 means "issues detected", not "crashed". + process.exit(exitCode) + } + // Resolve CLI path for the child process const cliPath = process.env.SF_BIN_PATH || process.argv[1] if (!cliPath) { diff --git a/src/resource-loader.ts b/src/resource-loader.ts index 4eff22b49..4c645cb33 100644 --- a/src/resource-loader.ts +++ b/src/resource-loader.ts @@ -128,17 +128,27 @@ function readManagedResourceManifest(agentDir: string): ManagedResourceManifest } /** - * Computes a lightweight content fingerprint of the bundled resources directory. + * Computes a content fingerprint of a resources directory (defaults to the + * bundled resourcesDir). * - * Walks all files under resourcesDir and hashes their relative paths + sizes. - * This catches same-version content changes (npm link dev workflow, hotfixes - * within a release) without the cost of reading every file's contents. + * Walks all files under `rootDir` and hashes `${relativePath}:${sha256(contents)}` + * for each one. Using the file *contents* — not size — is what distinguishes + * this from the earlier implementation and closes #4787: a same-size edit + * (e.g. swapping one word for another word of the same byte length) produces + * a different file hash, bumps the aggregate fingerprint, and therefore + * triggers a full resync in `initResources`. The old path+size approach + * silently cached stale prompts across upgrades. * - * ~1ms for a typical resources tree (~100 files) — just stat calls, no reads. + * Cost is ~1-2ms for a typical resources tree (~100 small .md files) — + * still negligible at startup. Files are streamed via `readFileSync` but + * bundled prompts are tiny so this is fine. + * + * Exported for unit tests and for callers that want to check a different + * directory (e.g. pre-install verification). */ -function computeResourceFingerprint(): string { +export function computeResourceFingerprint(rootDir: string = resourcesDir): string { const entries: string[] = [] - collectFileEntries(resourcesDir, resourcesDir, entries) + collectFileEntries(rootDir, rootDir, entries) entries.sort() return createHash('sha256').update(entries.join('\n')).digest('hex').slice(0, 16) } @@ -151,8 +161,16 @@ function collectFileEntries(dir: string, root: string, out: string[]): void { collectFileEntries(fullPath, root, out) } else { const rel = relative(root, fullPath) - const size = statSync(fullPath).size - out.push(`${rel}:${size}`) + // Hash the file contents — see function doc for #4787 rationale. + let contentHash: string + try { + contentHash = createHash('sha256').update(readFileSync(fullPath)).digest('hex') + } catch { + // Unreadable file — fall back to a stable marker so the entry still + // contributes to the aggregate hash and future reads will re-hash. + contentHash = 'unreadable' + } + out.push(`${rel}:${contentHash}`) } } } @@ -220,7 +238,7 @@ function makeTreeWritable(dirPath: string): void { * 3. Copies source into destination. * 4. Makes the result writable for the next upgrade cycle. */ -function syncResourceDir(srcDir: string, destDir: string): void { +export function syncResourceDir(srcDir: string, destDir: string): void { makeTreeWritable(destDir) if (existsSync(srcDir)) { pruneStaleSiblingFiles(srcDir, destDir) diff --git a/src/resources/extensions/claude-code-cli/stream-adapter.ts b/src/resources/extensions/claude-code-cli/stream-adapter.ts index fd02b1989..31c8d0823 100644 --- a/src/resources/extensions/claude-code-cli/stream-adapter.ts +++ b/src/resources/extensions/claude-code-cli/stream-adapter.ts @@ -21,6 +21,10 @@ import type { import type { ExtensionUIContext } from "@singularity-forge/pi-coding-agent"; import { EventStream } from "@singularity-forge/pi-ai"; import { execSync } from "node:child_process"; +import { existsSync, readFileSync } from "node:fs"; +import { homedir } from "node:os"; +import { createRequire } from "node:module"; +import { dirname, join } from "node:path"; import { PartialMessageBuilder, ZERO_USAGE, mapUsage } from "./partial-builder.js"; import { buildWorkflowMcpServers } from "../sf/workflow-mcp.js"; import { showInterviewRound, type Question, type RoundResult } from "../shared/tui.js"; @@ -597,6 +601,440 @@ async function promptTextInputElicitation( return { action: "accept", content }; } +// --------------------------------------------------------------------------- +// canUseTool handler +// --------------------------------------------------------------------------- + +/** Options passed by the SDK to the canUseTool callback. */ +interface CanUseToolOptions { + signal: AbortSignal; + suggestions?: Array>; + blockedPath?: string; + decisionReason?: string; + title?: string; + displayName?: string; + description?: string; + toolUseID: string; + agentID?: string; +} + +/** Result returned by the canUseTool callback to the SDK. */ +type CanUseToolPermissionResult = + | { behavior: "allow"; updatedInput?: Record; updatedPermissions?: Array>; toolUseID?: string } + | { behavior: "deny"; message: string; interrupt?: boolean; toolUseID?: string }; + +/** + * Known CLI tools where the subcommand verb changes the risk profile. + * Value = number of subcommand tokens (beyond the executable) to capture + * in the "Always Allow" permission pattern. + * + * `git push` and `git log` are very different → depth 1 → `Bash(git push:*)` + * `gh pr create` and `gh pr list` differ at depth 2 → `Bash(gh pr create:*)` + * `ping` is always safe → not listed → `Bash(ping:*)` + */ +const SUBCOMMAND_DEPTH: Record = { + git: 1, + gh: 2, + npm: 1, + npx: 1, + yarn: 1, + pnpm: 1, + docker: 1, + kubectl: 1, + aws: 2, + az: 2, + gcloud: 2, + cargo: 1, + pip: 1, + pip3: 1, + brew: 1, + terraform: 1, + helm: 1, + dotnet: 1, +}; + +/** Command wrappers to skip when extracting the base executable. */ +const CMD_PASSTHROUGH = new Set(["sudo", "env", "command"]); + +/** + * Build a smart permission pattern for Bash "Always Allow". + * + * Simple commands → `Bash(ping:*)` (any args are fine) + * Subcommand-sensitive CLIs → `Bash(git push:*)` (verb is captured, args wildcarded) + */ +export function buildBashPermissionPattern(command: string): string { + // When the command is a chain like "cd /foo && gh pr list", extract the + // last segment — `cd` is just setup, the meaningful operation is what follows. + const segments = command.split(/\s*(?:&&|\|\||;)\s*/); + // Skip leading `cd` (directory setup) and trailing error suppressors + // like `|| true`, `|| :`, `|| echo ...`. The meaningful command is + // the first segment that is *neither* of those. + const SETUP_RE = /^\s*cd\s/; + const SUPPRESSOR_RE = /^\s*(?:true|:|echo\b)/; + let meaningful: string | undefined; + if (segments.length > 1) { + // Strip suppressors, then strip cd prefixes; take the *last* remaining + // segment — that's the meaningful command. + const trimmed = segments.filter(s => !SUPPRESSOR_RE.test(s)); + const core = trimmed.filter(s => !SETUP_RE.test(s)); + meaningful = core.length > 0 ? core[core.length - 1] : trimmed[trimmed.length - 1]; + } + meaningful = meaningful || segments[0] || command; + const rawTokens = meaningful.trim().split(/\s+/); + + // Skip sudo/env wrappers and leading VAR=val assignments + let idx = 0; + while (idx < rawTokens.length) { + if (CMD_PASSTHROUGH.has(rawTokens[idx])) { idx++; continue; } + if (/^[A-Za-z_]\w*=/.test(rawTokens[idx])) { idx++; continue; } + break; + } + const tokens = rawTokens.slice(idx).filter(Boolean); + if (tokens.length === 0) return "Bash(*)"; + + // Strip path and .exe from executable name + const base = tokens[0].replace(/^.*[\\/]/, "").replace(/\.exe$/i, ""); + const depth = SUBCOMMAND_DEPTH[base]; + + if (depth !== undefined) { + // Capture base + N subcommand tokens: "gh pr list" → Bash(gh pr list:*) + const significant = [base, ...tokens.slice(1, 1 + depth)].join(" "); + return `Bash(${significant}:*)`; + } + + // Simple command — any args are fine: "ping" → Bash(ping:*) + return `Bash(${base}:*)`; +} + +/** + * Build the list of granularity options presented after a user chooses + * "Always Allow" for a Bash command. + * + * Rather than assuming the user wants the default smart pattern, the UI + * shows every meaningful prefix so the user explicitly picks the scope: + * + * "gh pr list --limit 5" → [ + * "Bash(gh:*)", // allow any gh command + * "Bash(gh pr:*)", // allow any gh pr subcommand + * "Bash(gh pr list:*)", // allow just this verb + * ] + * + * Flags (tokens starting with `-`) terminate the subcommand chain — they + * are call-site arguments, not stable verbs. Subcommand depth is capped + * at 3 to keep the menu short (max 4 options). + * + * Returns a single-entry list when there is no meaningful subcommand to + * choose from (e.g. `ls -la`). Callers can skip the second dialog in + * that case. + */ +export function buildBashPermissionPatternOptions(command: string): string[] { + const segments = command.split(/\s*(?:&&|\|\||;)\s*/); + const SETUP_RE = /^\s*cd\s/; + const SUPPRESSOR_RE = /^\s*(?:true|:|echo\b)/; + let meaningful: string | undefined; + if (segments.length > 1) { + const trimmed = segments.filter(s => !SUPPRESSOR_RE.test(s)); + const core = trimmed.filter(s => !SETUP_RE.test(s)); + meaningful = core.length > 0 ? core[core.length - 1] : trimmed[trimmed.length - 1]; + } + meaningful = meaningful || segments[0] || command; + const rawTokens = meaningful.trim().split(/\s+/); + + let idx = 0; + while (idx < rawTokens.length) { + if (CMD_PASSTHROUGH.has(rawTokens[idx])) { idx++; continue; } + if (/^[A-Za-z_]\w*=/.test(rawTokens[idx])) { idx++; continue; } + break; + } + const tokens = rawTokens.slice(idx).filter(Boolean); + if (tokens.length === 0) return ["Bash(*)"]; + + const base = tokens[0].replace(/^.*[\\/]/, "").replace(/\.exe$/i, ""); + + // Collect up to 3 subcommand tokens, stopping at the first flag. + const subTokens: string[] = []; + for (let i = 1; i < tokens.length; i++) { + const t = tokens[i]; + if (t.startsWith("-")) break; + subTokens.push(t); + if (subTokens.length >= 3) break; + } + + const patterns: string[] = [`Bash(${base}:*)`]; + for (let i = 1; i <= subTokens.length; i++) { + patterns.push(`Bash(${[base, ...subTokens.slice(0, i)].join(" ")}:*)`); + } + return patterns; +} + +/** + * Read Bash allow-rule patterns from project and user settings files. + * + * Returns the ruleContent portion (e.g. `"gh pr list:*"`) for each + * `Bash(...)` entry found in `permissions.allow`. + */ +function readBashAllowRulesFromSettings(): string[] { + const rules: string[] = []; + const paths = [ + join(process.cwd(), ".claude", "settings.local.json"), + join(process.cwd(), ".claude", "settings.json"), + ]; + try { + paths.push(join(homedir(), ".claude", "settings.json")); + } catch { + // homedir() can throw on some platforms + } + for (const settingsPath of paths) { + try { + if (!existsSync(settingsPath)) continue; + const raw = JSON.parse(readFileSync(settingsPath, "utf8")); + const allow = raw?.permissions?.allow; + if (!Array.isArray(allow)) continue; + for (const entry of allow) { + if (typeof entry !== "string") continue; + const m = /^Bash\((.+)\)$/.exec(entry); + if (m) rules.push(m[1]); + } + } catch { + // Ignore malformed settings files + } + } + return rules; +} + +/** + * Check if a Bash compound command matches saved allow rules after + * extracting the meaningful segment. + * + * The SDK's built-in matcher refuses to match prefix rules against + * compound commands (e.g. `cd /path && gh pr list`). Claude Code + * routinely prepends `cd &&` to commands, causing saved rules + * to never match on re-invocation. This function strips safe leading + * segments (only `cd` commands) and checks the remaining operation + * against saved rules. + * + * For compound commands, returns true only when all leading segments + * are `cd` commands and the final segment matches a saved rule. + * For simple (single-segment) commands, checks directly against saved + * rules — this covers the case where a rule was added mid-session and + * the SDK's in-memory cache is stale. + */ +export function bashCommandMatchesSavedRules(command: string): boolean { + const segments = command.split(/\s*(?:&&|\|\||;)\s*/).filter(Boolean); + if (segments.length === 0) return false; + + let meaningful: string; + if (segments.length === 1) { + meaningful = segments[0].trim(); + } else { + // Strip trailing error suppressors (|| true, || :, || echo ...) + // and leading cd segments. The first remaining segment is the + // meaningful command. All other non-cd, non-suppressor segments + // must be absent — otherwise we can't safely auto-approve. + const SETUP_RE = /^cd\s/; + const SUPPRESSOR_RE = /^\s*(?:true|:|echo\b)/; + const trimmed = segments.filter(s => !SUPPRESSOR_RE.test(s.trim())); + const core = trimmed.filter(s => !SETUP_RE.test(s.trim())); + if (core.length !== 1) return false; // ambiguous — multiple real commands + meaningful = core[0].trim(); + } + if (!meaningful) return false; + + const rules = readBashAllowRulesFromSettings(); + if (rules.length === 0) return false; + + for (const rule of rules) { + const prefixMatch = /^(.+):\*$/.exec(rule); + if (prefixMatch) { + const prefix = prefixMatch[1]; + if (meaningful === prefix || meaningful.startsWith(prefix + " ")) { + return true; + } + continue; + } + // Exact match + if (meaningful === rule) return true; + } + + return false; +} + +/** Format the tool input into a human-readable summary for the permission prompt. */ +function formatToolInput(toolName: string, input: Record): string { + // Bash — show the command + if (input.command && typeof input.command === "string") { + const cmd = input.command.length > 300 ? input.command.slice(0, 300) + "…" : input.command; + return cmd; + } + // File-oriented tools — show path + if (input.file_path && typeof input.file_path === "string") { + return `${toolName}: ${input.file_path}`; + } + // Generic fallback — compact JSON, truncated + const json = JSON.stringify(input); + if (json.length <= 200) return json; + return json.slice(0, 200) + "…"; +} + +/** + * Create a canUseTool handler that routes SDK permission requests through the + * extension UI's select dialog, or auto-approves when no UI is available. + * + * Presents three options: + * - **Allow** — approve this one invocation + * - **Always Allow** — approve and pass `suggestions` back as `updatedPermissions` + * so the SDK remembers the choice for the rest of the session + * - **Deny** — reject the invocation + * + * Follows the same pattern as {@link createClaudeCodeElicitationHandler}: + * takes an optional UI context and returns the callback or undefined. + * + * When UI is unavailable (headless / auto-mode sub-agents), returns a handler + * that always approves — replacing the old GSD_AUTO_MODE → bypassPermissions + * workaround. + */ +export function createClaudeCodeCanUseToolHandler( + ui: ExtensionUIContext | undefined, +): ((toolName: string, input: Record, options: CanUseToolOptions) => Promise) | undefined { + if (!ui) return undefined; + + return async (toolName, _input, options) => { + // Abort early if the signal is already fired + if (options.signal.aborted) { + return { behavior: "deny", message: "Aborted", toolUseID: options.toolUseID }; + } + + // For Bash compound commands (e.g. "cd /path && gh pr list"), + // check if the meaningful operation matches a saved allow rule. + // The SDK's built-in matcher rejects prefix rules for compound + // commands, but cd-prefixed commands are routine and the actual + // operation is already approved. + if (toolName === "Bash" && typeof _input.command === "string") { + if (bashCommandMatchesSavedRules(_input.command)) { + return { behavior: "allow", updatedInput: _input, toolUseID: options.toolUseID }; + } + } + + const inputSummary = formatToolInput(toolName, _input); + const title = options.title || `Allow Claude Code to use: ${toolName}?`; + const body = [ + options.description, + inputSummary, + ].filter(Boolean).join("\n"); + + // The 2nd menu (level picker) lets the user choose the exact pattern, + // so the 1st menu just shows "Always Allow" without a command suffix. + const alwaysAllowLabel = "Always Allow"; + + try { + const choice = await ui.select( + `${title}\n${body}`, + ["Allow", alwaysAllowLabel, "Deny"], + { signal: options.signal }, + ); + + if (options.signal.aborted) { + return { behavior: "deny", message: "Aborted", toolUseID: options.toolUseID }; + } + + if (choice === alwaysAllowLabel) { + // Pass the SDK's own suggestions back as updatedPermissions so + // it knows how to persist them (PermissionUpdate[] shape). + // For Bash, patch the ruleContent with the user-chosen + // granularity pattern (e.g. "gh", "gh pr", "gh pr list") so + // the saved rule matches the scope the user actually wants. + let perms = options.suggestions; + let notifyLabel: string | undefined; + if (toolName === "Bash" && typeof _input.command === "string") { + // Present every meaningful prefix so the user picks the + // scope explicitly rather than getting a blanket match. + const patternOptions = buildBashPermissionPatternOptions(_input.command); + let chosenPattern: string; + if (patternOptions.length <= 1) { + // No subcommand choice to make (e.g. "ls -la") — use + // the single available pattern directly. + chosenPattern = patternOptions[0] ?? buildBashPermissionPattern(_input.command); + } else { + const levelChoiceRaw = await ui.select( + "Save permission at which level?", + patternOptions, + { signal: options.signal }, + ); + if (options.signal.aborted) { + return { behavior: "deny", message: "Aborted", toolUseID: options.toolUseID }; + } + const levelChoice = Array.isArray(levelChoiceRaw) ? levelChoiceRaw[0] : levelChoiceRaw; + if (!levelChoice || !patternOptions.includes(levelChoice)) { + // User dismissed the level picker — cancel the + // tool use. Falling back to a one-time allow + // here would leave the spawned agent running + // with no clear signal that the user bailed. + return { + behavior: "deny", + message: "User cancelled permission selection", + toolUseID: options.toolUseID, + }; + } + chosenPattern = levelChoice; + } + notifyLabel = chosenPattern; + // Extract the ruleContent portion from "Bash(gh pr list:*)" → "gh pr list:*" + const ruleContent = chosenPattern.replace(/^Bash\(/, "").replace(/\)$/, ""); + if (perms && Array.isArray(perms) && perms.length > 0) { + // Clone suggestions and patch ruleContent on any Bash addRules entry + perms = perms.map((s: any) => { + if (s.type === "addRules" && Array.isArray(s.rules)) { + return { + ...s, + rules: s.rules.map((r: any) => + r.toolName === "Bash" ? { ...r, ruleContent } : r, + ), + }; + } + return s; + }); + } else { + // No suggestions from SDK — build a proper PermissionUpdate + perms = [{ + type: "addRules", + rules: [{ toolName: "Bash", ruleContent }], + behavior: "allow", + destination: "localSettings", + }]; + } + } + // Notify with the resolved pattern (label already previewed it) + if (notifyLabel) { + ui.notify(`Saved: ${notifyLabel}`, "info"); + } + return { + behavior: "allow", + updatedInput: _input, + toolUseID: options.toolUseID, + ...(perms ? { updatedPermissions: perms } : {}), + }; + } + + if (choice === "Allow") { + return { + behavior: "allow", + updatedInput: _input, + toolUseID: options.toolUseID, + }; + } + + return { behavior: "deny", message: "User denied", toolUseID: options.toolUseID }; + } catch { + return { behavior: "deny", message: "Aborted", toolUseID: options.toolUseID }; + } + }; +} + +// --------------------------------------------------------------------------- +// Elicitation handler +// --------------------------------------------------------------------------- + +/** Create an SDK elicitation handler that routes requests through the extension UI dialogs, or undefined if no UI is available. */ export function createClaudeCodeElicitationHandler( ui: ExtensionUIContext | undefined, ): ((request: SdkElicitationRequest, options: { signal: AbortSignal }) => Promise) | undefined { @@ -976,18 +1414,26 @@ async function pumpSdkMessages( const prompt = buildPromptFromContext(context); const queryPrompt = buildSdkQueryPrompt(context, prompt); const permissionMode = await resolveClaudePermissionMode(); + const uiContext = (options as ClaudeCodeStreamOptions | undefined)?.extensionUIContext; + const canUseToolHandler = createClaudeCodeCanUseToolHandler(uiContext); + // When no UI is available (headless / auto-mode), auto-approve all + // tool requests. This replaces the old bypassPermissions workaround. + const canUseToolFallback = canUseToolHandler + ?? (async (_toolName: string, _input: Record, opts: CanUseToolOptions): Promise => + ({ behavior: "allow", toolUseID: opts.toolUseID })); const sdkOpts = buildSdkOptions( modelId, prompt, { permissionMode }, - typeof (options as ClaudeCodeStreamOptions | undefined)?.extensionUIContext === "object" - ? { - reasoning: options?.reasoning, - onElicitation: createClaudeCodeElicitationHandler( - (options as ClaudeCodeStreamOptions | undefined)?.extensionUIContext, - ), - } - : { reasoning: options?.reasoning }, + { + reasoning: options?.reasoning, + canUseTool: canUseToolFallback, + ...(uiContext + ? { + onElicitation: createClaudeCodeElicitationHandler(uiContext), + } + : {}), + }, ); const queryResult = sdk.query({ diff --git a/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts b/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts index a702b559d..0a23ef58e 100644 --- a/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts +++ b/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts @@ -12,6 +12,10 @@ import { buildPromptFromContext, buildSdkQueryPrompt, buildSdkOptions, + createClaudeCodeCanUseToolHandler, + buildBashPermissionPattern, + buildBashPermissionPatternOptions, + bashCommandMatchesSavedRules, createClaudeCodeElicitationHandler, extractImageBlocksFromContext, extractToolResultsFromSdkUserMessage, @@ -1101,3 +1105,811 @@ describe("stream-adapter — Windows Claude path lookup (#3770)", () => { assert.equal(parseClaudeLookupOutput(output), "C:\\Users\\Binoy\\.local\\bin\\claude.exe"); }); }); + +// --------------------------------------------------------------------------- +// canUseTool handler (#4383) +// --------------------------------------------------------------------------- + +describe("stream-adapter — canUseTool handler", () => { + function makeOptions(overrides: Partial<{ signal: AbortSignal; suggestions: Array>; title: string; description: string; toolUseID: string }> = {}) { + return { + signal: overrides.signal ?? new AbortController().signal, + toolUseID: overrides.toolUseID ?? "toolu_test123", + ...(overrides.title !== undefined ? { title: overrides.title } : {}), + ...(overrides.description !== undefined ? { description: overrides.description } : {}), + ...(overrides.suggestions !== undefined ? { suggestions: overrides.suggestions } : {}), + }; + } + + // Point process.cwd() at an empty temp dir so the real repo's + // .claude/settings.local.json (which may already contain rules like + // "Bash(gh pr list:*)") does not short-circuit the permission flow. + // Returns a cleanup function that restores cwd and removes the temp dir. + // biome-ignore lint/suspicious/noExplicitAny: test-only monkey-patch + function withIsolatedCwd(): () => void { + const dir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-canusetool-"))); + const orig = process.cwd; + process.cwd = () => dir; + return () => { + process.cwd = orig; + rmSync(dir, { recursive: true, force: true }); + }; + } + + test("returns undefined when no UI context is provided", () => { + const handler = createClaudeCodeCanUseToolHandler(undefined); + assert.equal(handler, undefined); + }); + + test("shows select dialog with Allow/Always Allow/Deny and returns allow", async () => { + let selectPrompt = ""; + let selectOptions: string[] = []; + const ui = { + select: async (prompt: string, options: string[]) => { + selectPrompt = prompt; + selectOptions = options; + return "Allow"; + }, + }; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + assert.ok(handler); + + const input = { command: "ls -la" }; + const result = await handler!("Bash", input, makeOptions({ + title: "Claude wants to run: ls -la", + description: "List directory contents", + })); + + assert.equal(result.behavior, "allow"); + assert.deepEqual((result as any).updatedInput, input); + assert.equal((result as any).toolUseID, "toolu_test123"); + // Allow (one-time) should NOT include updatedPermissions + assert.equal((result as any).updatedPermissions, undefined); + assert.deepEqual(selectOptions, ["Allow", "Always Allow", "Deny"]); + // Prompt includes title and input summary + assert.ok(selectPrompt.includes("Claude wants to run: ls -la")); + assert.ok(selectPrompt.includes("ls -la")); + }); + + test("returns deny when user selects Deny", async () => { + const ui = { + select: async () => "Deny", + }; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + const result = await handler!("Bash", { command: "rm -rf /" }, makeOptions()); + + assert.equal(result.behavior, "deny"); + assert.equal((result as any).message, "User denied"); + assert.equal((result as any).toolUseID, "toolu_test123"); + }); + + test("returns deny when user dismisses dialog (undefined)", async () => { + const ui = { + select: async () => undefined, + }; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + const result = await handler!("Bash", { command: "echo hi" }, makeOptions()); + + assert.equal(result.behavior, "deny"); + assert.equal((result as any).message, "User denied"); + }); + + test("Always Allow for Bash patches SDK suggestions with smart ruleContent", async () => { + const notified: string[] = []; + const ui = { select: async (_p: string, opts: string[]) => opts.find((o) => o.startsWith("Always Allow"))!, notify: (msg: string) => notified.push(msg) }; + const suggestions = [{ + type: "addRules", + rules: [{ toolName: "Bash", ruleContent: "ls -la /tmp" }], + behavior: "allow", + destination: "localSettings", + }]; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + const result = await handler!("Bash", { command: "ls -la /tmp" }, makeOptions({ suggestions })); + + assert.equal(result.behavior, "allow"); + // Should patch ruleContent with our smart pattern, preserving SDK structure + assert.deepEqual((result as any).updatedPermissions, [{ + type: "addRules", + rules: [{ toolName: "Bash", ruleContent: "ls:*" }], + behavior: "allow", + destination: "localSettings", + }]); + assert.equal(notified.length, 1); + assert.ok(notified[0].includes("Saved:") && notified[0].includes("Bash(ls:*)")); + }); + + test("Always Allow for Bash with subcommand-sensitive CLI captures verb", async () => { + const cleanup = withIsolatedCwd(); + try { + const notified: string[] = []; + // First select call: pick "Always Allow ..."; second call (level + // picker): pick the "git push" granularity explicitly. + let selectCall = 0; + const ui = { + select: async (_p: string, opts: string[]) => { + selectCall++; + if (selectCall === 1) return opts.find((o) => o.startsWith("Always Allow"))!; + return "Bash(git push:*)"; + }, + notify: (msg: string) => notified.push(msg), + }; + const suggestions = [{ + type: "addRules", + rules: [{ toolName: "Bash", ruleContent: "git push origin main" }], + behavior: "allow", + destination: "localSettings", + }]; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + const result = await handler!("Bash", { command: "git push origin main" }, makeOptions({ suggestions })); + + assert.equal(result.behavior, "allow"); + assert.deepEqual((result as any).updatedPermissions, [{ + type: "addRules", + rules: [{ toolName: "Bash", ruleContent: "git push:*" }], + behavior: "allow", + destination: "localSettings", + }]); + assert.ok(notified[0].includes("Saved:") && notified[0].includes("Bash(git push:*)")); + } finally { + cleanup(); + } + }); + + test("Always Allow for Bash without suggestions builds proper PermissionUpdate", async () => { + const cleanup = withIsolatedCwd(); + try { + const notified: string[] = []; + let selectCall = 0; + const ui = { + select: async (_p: string, opts: string[]) => { + selectCall++; + if (selectCall === 1) return opts.find((o) => o.startsWith("Always Allow"))!; + return "Bash(gh pr list:*)"; + }, + notify: (msg: string) => notified.push(msg), + }; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + const result = await handler!("Bash", { command: "gh pr list" }, makeOptions()); + + assert.equal(result.behavior, "allow"); + // No SDK suggestions → builds PermissionUpdate from scratch + assert.deepEqual((result as any).updatedPermissions, [{ + type: "addRules", + rules: [{ toolName: "Bash", ruleContent: "gh pr list:*" }], + behavior: "allow", + destination: "localSettings", + }]); + assert.ok(notified[0].includes("Saved:") && notified[0].includes("Bash(gh pr list:*)")); + } finally { + cleanup(); + } + }); + + test("Always Allow for non-Bash tools passes SDK suggestions through", async () => { + const notified: string[] = []; + const ui = { select: async (_p: string, opts: string[]) => opts.find((o) => o.startsWith("Always Allow"))!, notify: (msg: string) => notified.push(msg) }; + const suggestions = [{ + type: "addRules", + rules: [{ toolName: "Write" }], + behavior: "allow", + destination: "localSettings", + }]; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + const result = await handler!("Write", { file_path: "/tmp/test.txt" }, makeOptions({ suggestions })); + + assert.equal(result.behavior, "allow"); + assert.deepEqual((result as any).updatedPermissions, suggestions); + // Non-Bash tools don't emit a post-selection notification (only Bash runs the level picker) + assert.equal(notified.length, 0); + }); + + test("Always Allow for non-Bash without suggestions omits updatedPermissions", async () => { + const notified: string[] = []; + const ui = { select: async (_p: string, opts: string[]) => opts.find((o) => o.startsWith("Always Allow"))!, notify: (msg: string) => notified.push(msg) }; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + const result = await handler!("Write", { file_path: "/tmp/test.txt" }, makeOptions()); + + assert.equal(result.behavior, "allow"); + assert.equal((result as any).updatedPermissions, undefined); + // No suggestions → no notification + assert.equal(notified.length, 0); + }); + + test("prompt includes command text for Bash tools", async () => { + let selectPrompt = ""; + const ui = { + select: async (prompt: string) => { + selectPrompt = prompt; + return "Allow"; + }, + }; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + await handler!("Bash", { command: "git status" }, makeOptions()); + assert.ok(selectPrompt.includes("git status"), `prompt should include command: ${selectPrompt}`); + }); + + test("prompt includes file_path for file tools", async () => { + let selectPrompt = ""; + const ui = { + select: async (prompt: string) => { + selectPrompt = prompt; + return "Allow"; + }, + }; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + await handler!("Write", { file_path: "/tmp/test.txt", content: "hello" }, makeOptions()); + assert.ok(selectPrompt.includes("/tmp/test.txt"), `prompt should include file path: ${selectPrompt}`); + }); + + test("uses title from options when available", async () => { + let selectPrompt = ""; + const ui = { + select: async (prompt: string) => { + selectPrompt = prompt; + return "Allow"; + }, + }; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + await handler!("WebFetch", {}, makeOptions({ title: "Claude wants to fetch: https://example.com" })); + assert.ok(selectPrompt.includes("Claude wants to fetch: https://example.com")); + }); + + test("falls back to default title when options.title is missing", async () => { + let selectPrompt = ""; + const ui = { + select: async (prompt: string) => { + selectPrompt = prompt; + return "Allow"; + }, + }; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + await handler!("WebFetch", { url: "https://example.com" }, makeOptions()); + assert.ok(selectPrompt.includes("Allow Claude Code to use: WebFetch?")); + }); + + test("returns deny when signal is already aborted", async () => { + const ui = { + select: async () => { throw new Error("should not be called"); }, + }; + + const controller = new AbortController(); + controller.abort(); + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + const result = await handler!("Bash", {}, makeOptions({ signal: controller.signal })); + + assert.equal(result.behavior, "deny"); + assert.equal((result as any).message, "Aborted"); + }); + + test("returns deny when ui.select throws", async () => { + const ui = { + select: async () => { throw new Error("dialog crashed"); }, + }; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + const result = await handler!("Bash", {}, makeOptions()); + + assert.equal(result.behavior, "deny"); + assert.equal((result as any).message, "Aborted"); + }); + + test("buildSdkOptions passes canUseTool through extraOptions", () => { + const canUseTool = async () => ({ behavior: "allow" as const, updatedInput: {}, toolUseID: "test" }); + const opts = buildSdkOptions("claude-sonnet-4-6", "test", undefined, { canUseTool }); + assert.equal(opts.canUseTool, canUseTool); + }); + + test("Always Allow shows level picker and user broadens to base command", async () => { + const cleanup = withIsolatedCwd(); + try { + const prompts: string[] = []; + const levelOpts: string[][] = []; + let selectCall = 0; + const ui = { + select: async (prompt: string, opts: string[]) => { + prompts.push(prompt); + selectCall++; + if (selectCall === 1) return opts.find((o) => o.startsWith("Always Allow"))!; + levelOpts.push(opts); + return "Bash(gh:*)"; + }, + notify: () => {}, + }; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + const result = await handler!("Bash", { command: "gh pr list" }, makeOptions()); + + assert.equal(result.behavior, "allow"); + assert.deepEqual((result as any).updatedPermissions, [{ + type: "addRules", + rules: [{ toolName: "Bash", ruleContent: "gh:*" }], + behavior: "allow", + destination: "localSettings", + }]); + // Second dialog offered every granularity level + assert.deepEqual(levelOpts[0], [ + "Bash(gh:*)", + "Bash(gh pr:*)", + "Bash(gh pr list:*)", + ]); + assert.ok(prompts[1].includes("Save permission at which level?")); + } finally { + cleanup(); + } + }); + + test("Always Allow narrows to mid-level pattern when user picks Bash(gh pr:*)", async () => { + const cleanup = withIsolatedCwd(); + try { + let selectCall = 0; + const ui = { + select: async (_p: string, opts: string[]) => { + selectCall++; + if (selectCall === 1) return opts.find((o) => o.startsWith("Always Allow"))!; + return "Bash(gh pr:*)"; + }, + notify: () => {}, + }; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + const result = await handler!("Bash", { command: "gh pr list --limit 5" }, makeOptions()); + + assert.equal(result.behavior, "allow"); + assert.deepEqual((result as any).updatedPermissions, [{ + type: "addRules", + rules: [{ toolName: "Bash", ruleContent: "gh pr:*" }], + behavior: "allow", + destination: "localSettings", + }]); + } finally { + cleanup(); + } + }); + + test("Always Allow skips level picker when only one pattern is available", async () => { + const cleanup = withIsolatedCwd(); + try { + const prompts: string[] = []; + const ui = { + select: async (prompt: string, opts: string[]) => { + prompts.push(prompt); + return opts.find((o) => o.startsWith("Always Allow"))!; + }, + notify: () => {}, + }; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + const result = await handler!("Bash", { command: "ls -la /tmp" }, makeOptions()); + + assert.equal(result.behavior, "allow"); + // "ls" has no subcommand tokens before the flag → single-option path + assert.equal(prompts.length, 1, "should not show a second dialog"); + assert.deepEqual((result as any).updatedPermissions, [{ + type: "addRules", + rules: [{ toolName: "Bash", ruleContent: "ls:*" }], + behavior: "allow", + destination: "localSettings", + }]); + } finally { + cleanup(); + } + }); + + test("Always Allow denies the tool when level picker is dismissed", async () => { + const cleanup = withIsolatedCwd(); + try { + const notified: string[] = []; + let selectCall = 0; + const ui = { + select: async (_p: string, opts: string[]) => { + selectCall++; + if (selectCall === 1) return opts.find((o) => o.startsWith("Always Allow"))!; + return undefined; // user dismissed level picker + }, + notify: (msg: string) => notified.push(msg), + }; + + const handler = createClaudeCodeCanUseToolHandler(ui as any); + const result = await handler!("Bash", { command: "gh pr list" }, makeOptions()); + + // Dismissing the level picker cancels the tool use — a one-time allow + // would leave the spawned agent running even though the user bailed. + assert.equal(result.behavior, "deny"); + assert.equal((result as any).updatedPermissions, undefined); + assert.equal(notified.length, 0, "no 'Saved:' notification when nothing was saved"); + } finally { + cleanup(); + } + }); +}); + +// --------------------------------------------------------------------------- +// buildBashPermissionPattern — smart permission granularity +// --------------------------------------------------------------------------- + +describe("buildBashPermissionPattern", () => { + test("simple command wildcards all args", () => { + assert.equal(buildBashPermissionPattern("ping -n 4 localhost"), "Bash(ping:*)"); + assert.equal(buildBashPermissionPattern("echo hello world"), "Bash(echo:*)"); + assert.equal(buildBashPermissionPattern("ls -la /tmp"), "Bash(ls:*)"); + assert.equal(buildBashPermissionPattern("node server.js"), "Bash(node:*)"); + }); + + test("git captures one subcommand", () => { + assert.equal(buildBashPermissionPattern("git push origin main"), "Bash(git push:*)"); + assert.equal(buildBashPermissionPattern("git log --oneline"), "Bash(git log:*)"); + assert.equal(buildBashPermissionPattern("git status"), "Bash(git status:*)"); + }); + + test("gh captures two subcommands", () => { + assert.equal(buildBashPermissionPattern("gh pr list"), "Bash(gh pr list:*)"); + assert.equal(buildBashPermissionPattern("gh pr create --title foo"), "Bash(gh pr create:*)"); + assert.equal(buildBashPermissionPattern("gh issue view 123"), "Bash(gh issue view:*)"); + }); + + test("npm captures one subcommand", () => { + assert.equal(buildBashPermissionPattern("npm install lodash"), "Bash(npm install:*)"); + assert.equal(buildBashPermissionPattern("npm publish"), "Bash(npm publish:*)"); + assert.equal(buildBashPermissionPattern("npm run test"), "Bash(npm run:*)"); + }); + + test("npx captures package name", () => { + assert.equal(buildBashPermissionPattern("npx vitest run"), "Bash(npx vitest:*)"); + assert.equal(buildBashPermissionPattern("npx --version"), "Bash(npx --version:*)"); + }); + + test("docker captures one subcommand", () => { + assert.equal(buildBashPermissionPattern("docker ps -a"), "Bash(docker ps:*)"); + assert.equal(buildBashPermissionPattern("docker rm container1"), "Bash(docker rm:*)"); + }); + + test("aws captures two subcommands", () => { + assert.equal(buildBashPermissionPattern("aws s3 cp file.txt s3://bucket/"), "Bash(aws s3 cp:*)"); + assert.equal(buildBashPermissionPattern("aws ec2 describe-instances"), "Bash(aws ec2 describe-instances:*)"); + }); + + test("skips sudo wrapper", () => { + assert.equal(buildBashPermissionPattern("sudo ping localhost"), "Bash(ping:*)"); + assert.equal(buildBashPermissionPattern("sudo git push"), "Bash(git push:*)"); + }); + + test("skips env wrapper and VAR=val assignments", () => { + assert.equal(buildBashPermissionPattern("env NODE_ENV=prod node server.js"), "Bash(node:*)"); + assert.equal(buildBashPermissionPattern("NODE_ENV=prod node server.js"), "Bash(node:*)"); + assert.equal(buildBashPermissionPattern("FOO=bar BAZ=qux git push"), "Bash(git push:*)"); + }); + + test("strips path from executable", () => { + assert.equal(buildBashPermissionPattern("/usr/bin/git push"), "Bash(git push:*)"); + assert.equal(buildBashPermissionPattern("C:\\Windows\\ping.exe localhost"), "Bash(ping:*)"); + }); + + test("empty or whitespace-only command", () => { + assert.equal(buildBashPermissionPattern(""), "Bash(*)"); + assert.equal(buildBashPermissionPattern(" "), "Bash(*)"); + }); + + test("chained commands — extracts pattern from the meaningful segment", () => { + assert.equal(buildBashPermissionPattern("cd /foo && gh pr list --limit 5"), "Bash(gh pr list:*)"); + assert.equal(buildBashPermissionPattern("cd C:/Users/djeff/repos/gsd-2 && gh pr list --limit 5"), "Bash(gh pr list:*)"); + assert.equal(buildBashPermissionPattern("cd /tmp && git push origin main"), "Bash(git push:*)"); + assert.equal(buildBashPermissionPattern("export FOO=1 && npm install lodash"), "Bash(npm install:*)"); + assert.equal(buildBashPermissionPattern("mkdir -p out; docker ps -a"), "Bash(docker ps:*)"); + assert.equal(buildBashPermissionPattern("echo start || ping localhost"), "Bash(ping:*)"); + }); + + test("skips trailing || true / || : error suppressors", () => { + assert.equal( + buildBashPermissionPattern("cd C:/Users/djeff/repos/gsd-2 && gh pr create --dry-run --title \"test\" --body \"test\" 2>&1 || true"), + "Bash(gh pr create:*)", + ); + assert.equal(buildBashPermissionPattern("gh pr list || true"), "Bash(gh pr list:*)"); + assert.equal(buildBashPermissionPattern("git push || :"), "Bash(git push:*)"); + assert.equal(buildBashPermissionPattern("cd /tmp && npm install || echo failed"), "Bash(npm install:*)"); + }); + + test("single command is unaffected by chain extraction", () => { + assert.equal(buildBashPermissionPattern("gh pr list"), "Bash(gh pr list:*)"); + assert.equal(buildBashPermissionPattern("git push origin main"), "Bash(git push:*)"); + }); +}); + +// --------------------------------------------------------------------------- +// buildBashPermissionPatternOptions — granularity level menu +// --------------------------------------------------------------------------- + +describe("buildBashPermissionPatternOptions", () => { + test("offers every prefix from base to full subcommand chain", () => { + assert.deepEqual(buildBashPermissionPatternOptions("gh pr list"), [ + "Bash(gh:*)", + "Bash(gh pr:*)", + "Bash(gh pr list:*)", + ]); + assert.deepEqual(buildBashPermissionPatternOptions("git push origin main"), [ + "Bash(git:*)", + "Bash(git push:*)", + "Bash(git push origin:*)", + "Bash(git push origin main:*)", + ]); + }); + + test("stops at first flag — flags are args, not verbs", () => { + assert.deepEqual(buildBashPermissionPatternOptions("gh pr create --title foo"), [ + "Bash(gh:*)", + "Bash(gh pr:*)", + "Bash(gh pr create:*)", + ]); + assert.deepEqual(buildBashPermissionPatternOptions("git log --oneline"), [ + "Bash(git:*)", + "Bash(git log:*)", + ]); + }); + + test("single-option when there is no subcommand to choose from", () => { + assert.deepEqual(buildBashPermissionPatternOptions("ls -la /tmp"), ["Bash(ls:*)"]); + assert.deepEqual(buildBashPermissionPatternOptions("ping -n 4 localhost"), ["Bash(ping:*)"]); + assert.deepEqual(buildBashPermissionPatternOptions("node"), ["Bash(node:*)"]); + }); + + test("extracts meaningful segment from compound commands", () => { + assert.deepEqual(buildBashPermissionPatternOptions("cd /foo && gh pr list"), [ + "Bash(gh:*)", + "Bash(gh pr:*)", + "Bash(gh pr list:*)", + ]); + assert.deepEqual(buildBashPermissionPatternOptions("gh pr create --dry-run || true"), [ + "Bash(gh:*)", + "Bash(gh pr:*)", + "Bash(gh pr create:*)", + ]); + }); + + test("caps at three subcommand tokens to keep the menu short", () => { + const result = buildBashPermissionPatternOptions("foo bar baz qux quux corge"); + // base + 3 sub tokens = 4 patterns max + assert.equal(result.length, 4); + assert.deepEqual(result, [ + "Bash(foo:*)", + "Bash(foo bar:*)", + "Bash(foo bar baz:*)", + "Bash(foo bar baz qux:*)", + ]); + }); + + test("skips sudo/env wrappers like the single-pattern variant", () => { + assert.deepEqual(buildBashPermissionPatternOptions("sudo git push origin"), [ + "Bash(git:*)", + "Bash(git push:*)", + "Bash(git push origin:*)", + ]); + assert.deepEqual(buildBashPermissionPatternOptions("NODE_ENV=prod node server.js"), [ + "Bash(node:*)", + "Bash(node server.js:*)", + ]); + }); + + test("empty command returns the catch-all pattern", () => { + assert.deepEqual(buildBashPermissionPatternOptions(""), ["Bash(*)"]); + assert.deepEqual(buildBashPermissionPatternOptions(" "), ["Bash(*)"]); + }); +}); + +// --------------------------------------------------------------------------- +// bashCommandMatchesSavedRules — compound command bypass for saved rules +// --------------------------------------------------------------------------- + +describe("bashCommandMatchesSavedRules — compound command bypass", () => { + let tempDir: string; + let originalCwd: string; + + // Create a temp project directory with .claude/settings.local.json + function setupSettings(allow: string[]): void { + const claudeDir = join(tempDir, ".claude"); + mkdirSync(claudeDir, { recursive: true }); + writeFileSync( + join(claudeDir, "settings.local.json"), + JSON.stringify({ permissions: { allow } }), + ); + } + + // biome-ignore lint/suspicious/noExplicitAny: test-only monkey-patch + let origCwd: any; + + // Monkey-patch process.cwd() to point at our temp dir + function setCwd(dir: string): void { + origCwd = process.cwd; + process.cwd = () => dir; + } + function restoreCwd(): void { + if (origCwd) process.cwd = origCwd; + } + + test("matches cd-prefixed compound command against saved prefix rule", () => { + tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-"))); + try { + setupSettings(["Bash(gh pr list:*)"]); + setCwd(tempDir); + assert.equal( + bashCommandMatchesSavedRules("cd /some/path && gh pr list --limit 5"), + true, + ); + } finally { + restoreCwd(); + rmSync(tempDir, { recursive: true, force: true }); + } + }); + + test("matches cd-prefixed compound command with exact subcommand", () => { + tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-"))); + try { + setupSettings(["Bash(gh pr list:*)"]); + setCwd(tempDir); + assert.equal( + bashCommandMatchesSavedRules("cd C:/Users/foo/repos/bar && gh pr list"), + true, + ); + } finally { + restoreCwd(); + rmSync(tempDir, { recursive: true, force: true }); + } + }); + + test("rejects when leading segment is not cd", () => { + tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-"))); + try { + setupSettings(["Bash(gh pr list:*)"]); + setCwd(tempDir); + // "rm -rf /tmp" is not a cd command — should not auto-approve + assert.equal( + bashCommandMatchesSavedRules("rm -rf /tmp && gh pr list"), + false, + ); + } finally { + restoreCwd(); + rmSync(tempDir, { recursive: true, force: true }); + } + }); + + test("rejects when meaningful segment does not match any rule", () => { + tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-"))); + try { + setupSettings(["Bash(gh pr list:*)"]); + setCwd(tempDir); + assert.equal( + bashCommandMatchesSavedRules("cd /path && gh issue create --title foo"), + false, + ); + } finally { + restoreCwd(); + rmSync(tempDir, { recursive: true, force: true }); + } + }); + + test("matches simple (non-compound) commands against on-disk rules", () => { + tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-"))); + try { + setupSettings(["Bash(gh pr list:*)"]); + setCwd(tempDir); + // Simple commands must also be checked — the SDK's in-memory cache + // may be stale if the rule was added mid-session via "Always Allow" + assert.equal(bashCommandMatchesSavedRules("gh pr list --limit 5"), true); + assert.equal(bashCommandMatchesSavedRules("gh pr list"), true); + } finally { + restoreCwd(); + rmSync(tempDir, { recursive: true, force: true }); + } + }); + + test("returns false for simple commands with no matching rule", () => { + tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-"))); + try { + setupSettings(["Bash(gh pr list:*)"]); + setCwd(tempDir); + assert.equal(bashCommandMatchesSavedRules("gh issue list --limit 5"), false); + } finally { + restoreCwd(); + rmSync(tempDir, { recursive: true, force: true }); + } + }); + + test("returns false when no settings file exists", () => { + tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-"))); + try { + // No .claude/settings.local.json created + setCwd(tempDir); + assert.equal( + bashCommandMatchesSavedRules("cd /path && gh pr list"), + false, + ); + } finally { + restoreCwd(); + rmSync(tempDir, { recursive: true, force: true }); + } + }); + + test("matches exact rule (non-prefix)", () => { + tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-"))); + try { + setupSettings(["Bash(ping -n 4 localhost)"]); + setCwd(tempDir); + assert.equal( + bashCommandMatchesSavedRules("cd /path && ping -n 4 localhost"), + true, + ); + } finally { + restoreCwd(); + rmSync(tempDir, { recursive: true, force: true }); + } + }); + + test("handles multiple cd segments before the meaningful command", () => { + tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-"))); + try { + setupSettings(["Bash(npm install:*)"]); + setCwd(tempDir); + assert.equal( + bashCommandMatchesSavedRules("cd /home && cd project && npm install lodash"), + true, + ); + } finally { + restoreCwd(); + rmSync(tempDir, { recursive: true, force: true }); + } + }); + + test("matches compound command with trailing || true suppressor", () => { + tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-"))); + try { + setupSettings(["Bash(gh pr create:*)"]); + setCwd(tempDir); + assert.equal( + bashCommandMatchesSavedRules('cd C:/Users/djeff/repos/gsd-2 && gh pr create --dry-run --title "test" --body "test" 2>&1 || true'), + true, + ); + assert.equal( + bashCommandMatchesSavedRules("gh pr create --dry-run || true"), + true, + ); + assert.equal( + bashCommandMatchesSavedRules("cd /tmp && git push || :"), + false, // rule is for gh pr create, not git push + ); + } finally { + restoreCwd(); + rmSync(tempDir, { recursive: true, force: true }); + } + }); + + test("reads rules from settings.json as well as settings.local.json", () => { + tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-"))); + try { + const claudeDir = join(tempDir, ".claude"); + mkdirSync(claudeDir, { recursive: true }); + writeFileSync( + join(claudeDir, "settings.json"), + JSON.stringify({ permissions: { allow: ["Bash(git push:*)"] } }), + ); + setCwd(tempDir); + assert.equal( + bashCommandMatchesSavedRules("cd /repo && git push origin main"), + true, + ); + } finally { + restoreCwd(); + rmSync(tempDir, { recursive: true, force: true }); + } + }); +}); diff --git a/src/resources/extensions/sf/auto-dispatch.ts b/src/resources/extensions/sf/auto-dispatch.ts index c97eb1dd3..0733d983c 100644 --- a/src/resources/extensions/sf/auto-dispatch.ts +++ b/src/resources/extensions/sf/auto-dispatch.ts @@ -55,6 +55,8 @@ import { import { resolveModelWithFallbacksForUnit } from "./preferences-models.js"; import { resolveUokFlags } from "./uok/flags.js"; import { selectReactiveDispatchBatch } from "./uok/execution-graph.js"; +import { EXECUTION_ENTRY_PHASES } from "./uok/plan-v2.js"; +import { getMilestonePipelineVariant } from "./milestone-scope-classifier.js"; // ─── Types ──────────────────────────────────────────────────────────────── @@ -95,6 +97,11 @@ function missingSliceStop(mid: string, phase: string): DispatchAction { }; } +function isMilestonePlanRepairState(state: SFState): boolean { + if (state.phase !== "planning" || state.activeSlice) return false; + return /roadmap is incomplete|weighted vision alignment meeting/i.test(state.nextAction ?? ""); +} + /** * Check for milestone slices missing SUMMARY files. * Returns array of missing slice IDs, or empty array if all present or DB unavailable. @@ -341,7 +348,7 @@ export const DISPATCH_RULES: DispatchRule[] = [ // is essential for roadmap integrity. Opt-out via explicit `false`. const reassessEnabled = prefs?.phases?.reassess_after_slice ?? true; if (!reassessEnabled) return null; - const needsReassess = await checkNeedsReassessment(basePath, mid, state); + const needsReassess = await checkNeedsReassessment(basePath, mid, state, prefs); if (!needsReassess) return null; return { action: "dispatch", @@ -368,6 +375,27 @@ export const DISPATCH_RULES: DispatchRule[] = [ }; }, }, + { + // #4671 — Recovery for execution-entry phases with missing CONTEXT.md. + // Once deriveStateFromDb returns an execution-entry phase the pre-planning + // guard no longer fires. The plan-v2 gate detects missing context but can + // only block — it cannot redispatch. Without this rule the milestone is + // stuck until `sf doctor heal`. Fire BEFORE execution-entry phase rules. + name: "execution-entry phase (no context) → discuss-milestone", + match: async ({ state, mid, midTitle, basePath }) => { + if (!EXECUTION_ENTRY_PHASES.has(state.phase)) return null; + const contextFile = resolveMilestoneFile(basePath, mid, "CONTEXT"); + const contextContent = contextFile ? await loadFile(contextFile) : null; + const hasContext = !!(contextContent && contextContent.trim().length > 0); + if (hasContext) return null; + return { + action: "dispatch", + unitType: "discuss-milestone", + unitId: mid, + prompt: await buildDiscussMilestonePrompt(mid, midTitle, basePath), + }; + }, + }, { name: "pre-planning (no context) → discuss-milestone", match: async ({ state, mid, midTitle, basePath }) => { @@ -411,6 +439,18 @@ export const DISPATCH_RULES: DispatchRule[] = [ }; }, }, + { + name: "planning (roadmap incomplete) → plan-milestone", + match: async ({ state, mid, midTitle, basePath }) => { + if (!isMilestonePlanRepairState(state)) return null; + return { + action: "dispatch", + unitType: "plan-milestone", + unitId: mid, + prompt: await buildPlanMilestonePrompt(mid, midTitle, basePath), + }; + }, + }, { // Keep this rule before the single-slice research rule so the multi-slice // path wins whenever 2+ slices are ready. @@ -474,6 +514,8 @@ export const DISPATCH_RULES: DispatchRule[] = [ // Phase skip: skip research when preference or profile says so if (prefs?.phases?.skip_research || prefs?.phases?.skip_slice_research) return null; + // #4781 phase 2: trivial-scope milestones skip dedicated slice research + if (await getMilestonePipelineVariant(mid) === "trivial") return null; if (!state.activeSlice) return missingSliceStop(mid, state.phase); const sid = state.activeSlice!.id; const sTitle = state.activeSlice!.title; diff --git a/src/resources/extensions/sf/auto-prompts.ts b/src/resources/extensions/sf/auto-prompts.ts index 1fdd9997f..bb2376480 100644 --- a/src/resources/extensions/sf/auto-prompts.ts +++ b/src/resources/extensions/sf/auto-prompts.ts @@ -14,17 +14,17 @@ import { resolveMilestoneFile, resolveSliceFile, resolveSlicePath, resolveTasksDir, resolveTaskFiles, resolveTaskFile, relMilestoneFile, relSliceFile, relSlicePath, relMilestonePath, - resolveSfRootFile, relSfRootFile, resolveRuntimeFile, + resolveGsdRootFile, relGsdRootFile, resolveRuntimeFile, } from "./paths.js"; -import { resolveSkillDiscoveryMode, resolveInlineLevel, loadEffectiveSFPreferences, resolveAllSkillReferences } from "./preferences.js"; +import { resolveSkillDiscoveryMode, resolveInlineLevel, loadEffectiveGSDPreferences, resolveAllSkillReferences } from "./preferences.js"; import { parseRoadmap } from "./parsers-legacy.js"; -import type { SFState, InlineLevel } from "./types.js"; -import type { SFPreferences } from "./preferences.js"; -import { getLoadedSkills, type Skill } from "@singularity-forge/pi-coding-agent"; +import type { GSDState, InlineLevel } from "./types.js"; +import type { GSDPreferences } from "./preferences.js"; +import { getLoadedSkills, type Skill } from "@gsd/pi-coding-agent"; import { join, basename } from "node:path"; import { existsSync } from "node:fs"; -import { computeBudgets, resolveExecutorContextWindow, truncateAtSectionBoundary } from "./context-budget.js"; -import { getPendingGates, getPendingGatesForTurn } from "./sf-db.js"; +import { computeBudgets, resolveExecutorContextWindow, truncateAtSectionBoundary, type MinimalModelRegistry } from "./context-budget.js"; +import { getPendingGates, getPendingGatesForTurn } from "./gsd-db.js"; import { GATE_REGISTRY, assertGateCoverage, @@ -33,28 +33,60 @@ import { } from "./gate-registry.js"; import { formatDecisionsCompact, formatRequirementsCompact } from "./structured-data-formatter.js"; import { readPhaseAnchor, formatAnchorForPrompt } from "./phase-anchor.js"; +import { composeInlinedContext, type ArtifactResolver } from "./unit-context-composer.js"; import { logWarning } from "./workflow-logger.js"; import { inlineGraphSubgraph } from "./graph-context.js"; - -// ─── Memory Injection ───────────────────────────────────────────────────────── - -async function buildMemoriesBlock(limit = 5): Promise { - try { - const { getActiveMemoriesRanked, formatMemoriesForPrompt } = await import("./memory-store.js"); - const memories = getActiveMemoriesRanked(limit); - return formatMemoriesForPrompt(memories); - } catch { - return ""; - } -} +import { buildExtractionStepsBlock } from "./commands-extract-learnings.js"; +import { warnIfManifestHasMissingSkills } from "./skill-manifest.js"; // ─── Preamble Cap ───────────────────────────────────────────────────────────── +/** + * Historical static ceiling for the preamble cap. Kept as an upper bound even + * after context-window-aware sizing so large-window users don't suddenly see + * 10× looser caps than before. Small-window users get a tighter cap derived + * from their configured executor window. + */ const MAX_PREAMBLE_CHARS = 30_000; +/** + * Resolve prompt budgets from the configured executor context window. + * + * The prompt builders here don't have access to the runtime model registry + * (they're called from many non-ctx sites), so `resolveExecutorContextWindow` + * is fed the user-configurable `context_window_override` preference as the + * `sessionContextWindow` fallback. That preference exists specifically to + * cover small-window local models (e.g. 32K lemonade/llama.cpp servers) whose + * n_ctx is not discoverable through the model registry. Issue #4435. + */ +function resolvePromptBudgets(): ReturnType { + try { + const prefs = loadEffectiveGSDPreferences(); + const sessionWindow = prefs?.preferences.context_window_override; + const windowTokens = resolveExecutorContextWindow(undefined, prefs?.preferences, sessionWindow); + return computeBudgets(windowTokens); + } catch (e) { + logWarning("prompt", `resolvePromptBudgets failed: ${(e as Error).message}`); + return computeBudgets(200_000); + } +} + +/** + * Character budget for dependency/prior slice summaries injected into dispatch + * prompts. Scales with the executor's configured context window (issue #4435). + */ +function resolveSummaryBudgetChars(): number { + return resolvePromptBudgets().summaryBudgetChars; +} + function capPreamble(preamble: string): string { - if (preamble.length <= MAX_PREAMBLE_CHARS) return preamble; - return truncateAtSectionBoundary(preamble, MAX_PREAMBLE_CHARS).content; + // Cap inlined context at min(historical 30K ceiling, scaled inline budget). + // The ceiling preserves pre-fix behavior for large-window users; the scaled + // budget tightens the cap for small-window users whose true safe limit is + // below 30K. `computeBudgets` allocates 40% of total chars to inline context. + const budget = Math.min(MAX_PREAMBLE_CHARS, resolvePromptBudgets().inlineContextBudgetChars); + if (preamble.length <= budget) return preamble; + return truncateAtSectionBoundary(preamble, budget).content; } // ─── Executor Constraints ───────────────────────────────────────────────────── @@ -64,14 +96,19 @@ function capPreamble(preamble: string): string { * Uses the budget engine to compute task count ranges and inline context budgets * based on the configured executor model's context window. */ -function formatExecutorConstraints(): string { +function formatExecutorConstraints( + sessionContextWindow?: number, + modelRegistry?: MinimalModelRegistry, +): string { let windowTokens: number; try { - const prefs = loadEffectiveSFPreferences(); - windowTokens = resolveExecutorContextWindow(undefined, prefs?.preferences); + const prefs = loadEffectiveGSDPreferences(); + windowTokens = resolveExecutorContextWindow(modelRegistry, prefs?.preferences, sessionContextWindow); } catch (e) { logWarning("prompt", `resolveExecutorContextWindow failed: ${(e as Error).message}`); - windowTokens = 200_000; // safe default + // Delegate to the budget engine without prefs (the path that just threw) + // so DEFAULT_CONTEXT_WINDOW stays the single source of truth. + windowTokens = resolveExecutorContextWindow(undefined, undefined, sessionContextWindow); } const budgets = computeBudgets(windowTokens); const { min, max } = budgets.taskCountRange; @@ -87,31 +124,42 @@ function formatExecutorConstraints(): string { ].join("\n"); } -function buildSourceFilePaths( +/** + * Returns a markdown bullet list of known context file paths for the given + * milestone (and optionally slice). Falls back to a generic tool-agnostic + * instruction when no GSD artifacts are found. + * + * @param base - Absolute path to the project root. + * @param mid - Milestone ID (e.g. `"M001"`). + * @param sid - Optional slice ID (e.g. `"S01"`). When provided, the slice + * RESEARCH file is preferred over the milestone-level one. + * @returns Markdown string of file path bullets, or a fallback instruction. + */ +export function buildSourceFilePaths( base: string, mid: string, sid?: string, ): string { const paths: string[] = []; - const projectPath = resolveSfRootFile(base, "PROJECT"); + const projectPath = resolveGsdRootFile(base, "PROJECT"); if (existsSync(projectPath)) { - paths.push(`- **Project**: \`${relSfRootFile("PROJECT")}\``); + paths.push(`- **Project**: \`${relGsdRootFile("PROJECT")}\``); } - const requirementsPath = resolveSfRootFile(base, "REQUIREMENTS"); + const requirementsPath = resolveGsdRootFile(base, "REQUIREMENTS"); if (existsSync(requirementsPath)) { - paths.push(`- **Requirements**: \`${relSfRootFile("REQUIREMENTS")}\``); + paths.push(`- **Requirements**: \`${relGsdRootFile("REQUIREMENTS")}\``); } - const decisionsPath = resolveSfRootFile(base, "DECISIONS"); + const decisionsPath = resolveGsdRootFile(base, "DECISIONS"); if (existsSync(decisionsPath)) { - paths.push(`- **Decisions**: \`${relSfRootFile("DECISIONS")}\``); + paths.push(`- **Decisions**: \`${relGsdRootFile("DECISIONS")}\``); } - const queuePath = resolveSfRootFile(base, "QUEUE"); + const queuePath = resolveGsdRootFile(base, "QUEUE"); if (existsSync(queuePath)) { - paths.push(`- **Queue**: \`${relSfRootFile("QUEUE")}\``); + paths.push(`- **Queue**: \`${relGsdRootFile("QUEUE")}\``); } const contextPath = resolveMilestoneFile(base, mid, "CONTEXT"); @@ -138,7 +186,7 @@ function buildSourceFilePaths( return paths.length > 0 ? paths.join("\n") - : "- Use `rg --files` and targeted reads to identify the relevant source files before planning."; + : "- Use the Grep/Glob/Read tools to identify the relevant source files before planning."; } // ─── Inline Helpers ─────────────────────────────────────────────────────── @@ -201,6 +249,87 @@ export async function inlineFileSmart( return `### ${label}\nSource: \`${relPath}\`\n\n${truncated}`; } +/** + * Compact slice-summary excerpt for milestone-level closers (#4780). + * + * Emits the frontmatter fields + short body section heads rather than the + * full SUMMARY.md body, and keeps the source path in the header so the + * closer agent can Read the full file on demand when drafting LEARNINGS. + * + * Scope: designed for `buildCompleteMilestonePrompt`, which previously + * inlined the full SUMMARY per slice and routinely paid ~300–500K tokens + * per close when the narrative was never synthesized. Not used by + * `buildValidateMilestonePrompt` yet — validate needs fuller verification + * evidence; follow-up PR can extend or parameterize. + * + * If parsing fails (unrecognizable frontmatter, missing id, etc.) the + * function falls back to `inlineFile` so the closer loses no information. + */ +export async function buildSliceSummaryExcerpt( + absPath: string | null, relPath: string, sid: string, +): Promise { + const header = `### ${sid} Summary (excerpt)\nSource: \`${relPath}\``; + const content = absPath ? await loadFile(absPath) : null; + if (!content) { + return `${header}\n\n_(not found — file does not exist yet)_`; + } + try { + const s = parseSummary(content); + if (!s.frontmatter.id) { + // Unrecognizable — fall back to full file so no context is lost. + return `### ${sid} Summary\nSource: \`${relPath}\`\n\n${content.trim()}`; + } + const lines: string[] = [header, ""]; + if (s.title) lines.push(`**Title:** ${s.title}`); + if (s.oneLiner) lines.push(`**One-liner:** ${s.oneLiner}`); + if (s.frontmatter.verification_result) { + lines.push(`**Verification:** \`${s.frontmatter.verification_result}\``); + } + lines.push(`**Blockers:** ${s.frontmatter.blocker_discovered ? "⚠️ blocker recorded — Read full summary" : "none"}`); + if (s.frontmatter.duration) lines.push(`**Duration:** ${s.frontmatter.duration}`); + if (s.frontmatter.provides.length > 0) lines.push(`**Provides:** ${s.frontmatter.provides.join("; ")}`); + if (s.frontmatter.affects.length > 0) lines.push(`**Affects:** ${s.frontmatter.affects.join("; ")}`); + if (s.frontmatter.key_decisions.length > 0) lines.push(`**Key decisions:** ${s.frontmatter.key_decisions.join("; ")}`); + if (s.frontmatter.patterns_established.length > 0) lines.push(`**Patterns established:** ${s.frontmatter.patterns_established.join("; ")}`); + if (s.frontmatter.key_files.length > 0) { + const files = s.frontmatter.key_files.slice(0, 8); + const more = s.frontmatter.key_files.length > files.length ? ` (+${s.frontmatter.key_files.length - files.length} more)` : ""; + lines.push(`**Key files:** ${files.join(", ")}${more}`); + } + + // Cap section bodies (coderabbit review on #4908): if any of these + // narrative sections balloon, excerpt mode still inflates and + // undermines the token-reduction goal. 800 chars (~200 tokens) is + // enough to carry intent; the closer agent Reads the full file when + // it needs richer context for LEARNINGS synthesis. + const SECTION_CAP_CHARS = 800; + const capSection = (body: string): string => { + const trimmed = body.trim(); + if (trimmed.length <= SECTION_CAP_CHARS) return trimmed; + return `${trimmed.slice(0, SECTION_CAP_CHARS)}\n… (truncated — see full \`${relPath}\`)`; + }; + + if (s.deviations && s.deviations.trim()) { + lines.push("", "#### Deviations", capSection(s.deviations)); + } + if (s.knownLimitations && s.knownLimitations.trim()) { + lines.push("", "#### Known limitations", capSection(s.knownLimitations)); + } + if (s.followUps && s.followUps.trim()) { + lines.push("", "#### Follow-ups", capSection(s.followUps)); + } + + lines.push( + "", + `> **On-demand:** read \`${relPath}\` for the full "What Happened" narrative, integration notes, and detailed file-change list when drafting LEARNINGS, the Decision Re-evaluation table, or cross-slice synthesis.`, + ); + return lines.join("\n"); + } catch { + // Defensive — any parse failure falls back to full inline. + return `### ${sid} Summary\nSource: \`${relPath}\`\n\n${content.trim()}`; + } +} + /** * Load and inline dependency slice summaries (full content, not just paths). */ @@ -210,7 +339,7 @@ export async function inlineDependencySummaries( // DB primary path — get slice depends directly let depends: string[] | null = null; try { - const { isDbAvailable, getSlice } = await import("./sf-db.js"); + const { isDbAvailable, getSlice } = await import("./gsd-db.js"); if (isDbAvailable()) { const slice = getSlice(mid, sid); if (slice) { @@ -264,16 +393,16 @@ export async function inlineDependencySummaries( } /** - * Load a well-known .sf/ root file for optional inlining. + * Load a well-known .gsd/ root file for optional inlining. * Handles the existsSync check internally. */ export async function inlineGsdRootFile( base: string, filename: string, label: string, ): Promise { const key = filename.replace(/\.md$/i, "").toUpperCase() as "PROJECT" | "DECISIONS" | "QUEUE" | "STATE" | "REQUIREMENTS" | "KNOWLEDGE"; - const absPath = resolveSfRootFile(base, key); + const absPath = resolveGsdRootFile(base, key); if (!existsSync(absPath)) return null; - return inlineFileOptional(absPath, relSfRootFile(key), label); + return inlineFileOptional(absPath, relGsdRootFile(key), label); } // ─── DB-Aware Inline Helpers ────────────────────────────────────────────── @@ -292,7 +421,7 @@ export async function inlineDecisionsFromDb( ): Promise { const inlineLevel = level ?? resolveInlineLevel(); try { - const { isDbAvailable } = await import("./sf-db.js"); + const { isDbAvailable } = await import("./gsd-db.js"); if (isDbAvailable()) { const { queryDecisions, formatDecisionsForPrompt } = await import("./context-store.js"); @@ -309,7 +438,7 @@ export async function inlineDecisionsFromDb( const formatted = inlineLevel !== "full" ? formatDecisionsCompact(decisions) : formatDecisionsForPrompt(decisions); - return `### Decisions\nSource: \`.sf/DECISIONS.md\`\n\n${formatted}`; + return `### Decisions\nSource: \`.gsd/DECISIONS.md\`\n\n${formatted}`; } // DB available but cascade returned empty — intentional per D020, don't fall back to file return null; @@ -330,7 +459,7 @@ export async function inlineRequirementsFromDb( ): Promise { const inlineLevel = level ?? resolveInlineLevel(); try { - const { isDbAvailable } = await import("./sf-db.js"); + const { isDbAvailable } = await import("./gsd-db.js"); if (isDbAvailable()) { const { queryRequirements, formatRequirementsForPrompt } = await import("./context-store.js"); const requirements = queryRequirements({ milestoneId, sliceId }); @@ -339,7 +468,7 @@ export async function inlineRequirementsFromDb( const formatted = inlineLevel !== "full" ? formatRequirementsCompact(requirements) : formatRequirementsForPrompt(requirements); - return `### Requirements\nSource: \`.sf/REQUIREMENTS.md\`\n\n${formatted}`; + return `### Requirements\nSource: \`.gsd/REQUIREMENTS.md\`\n\n${formatted}`; } } } catch (err) { @@ -356,12 +485,12 @@ export async function inlineProjectFromDb( base: string, ): Promise { try { - const { isDbAvailable } = await import("./sf-db.js"); + const { isDbAvailable } = await import("./gsd-db.js"); if (isDbAvailable()) { const { queryProject } = await import("./context-store.js"); const content = queryProject(); if (content) { - return `### Project\nSource: \`.sf/PROJECT.md\`\n\n${content}`; + return `### Project\nSource: \`.gsd/PROJECT.md\`\n\n${content}`; } } } catch (err) { @@ -452,7 +581,7 @@ export async function inlineKnowledgeScoped( base: string, keywords: string[], ): Promise { - const knowledgePath = resolveSfRootFile(base, "KNOWLEDGE"); + const knowledgePath = resolveGsdRootFile(base, "KNOWLEDGE"); if (!existsSync(knowledgePath)) return null; const content = await loadFile(knowledgePath); @@ -465,7 +594,49 @@ export async function inlineKnowledgeScoped( // Return null if no sections matched (empty string from queryKnowledge) if (!scoped) return null; - return `### Project Knowledge (scoped)\nSource: \`${relSfRootFile("KNOWLEDGE")}\`\n\n${scoped.trim()}`; + return `### Project Knowledge (scoped)\nSource: \`${relGsdRootFile("KNOWLEDGE")}\`\n\n${scoped.trim()}`; +} + +/** + * Budget-capped knowledge inline for milestone-level prompt assembly. + * + * Addresses issue #4719: the six milestone-phase prompts (research-milestone, + * plan-milestone, complete-slice, complete-milestone, validate-milestone, + * reassess-roadmap) previously injected the full KNOWLEDGE.md (~226KB for a + * real project) on every invocation. This helper scopes by caller-supplied + * keywords and caps the payload at `maxChars` (default 30,000 chars). + * + * Returns null when no KNOWLEDGE.md exists or no entries match any keyword. + */ +export async function inlineKnowledgeBudgeted( + base: string, + keywords: string[], + options?: { maxChars?: number }, +): Promise { + const DEFAULT_MAX_CHARS = 30_000; + const HARD_MAX_CHARS = 100_000; + const raw = Number(options?.maxChars ?? DEFAULT_MAX_CHARS); + const maxChars = Number.isFinite(raw) + ? Math.max(0, Math.min(Math.floor(raw), HARD_MAX_CHARS)) + : DEFAULT_MAX_CHARS; + + const knowledgePath = resolveGsdRootFile(base, "KNOWLEDGE"); + if (!existsSync(knowledgePath)) return null; + + const content = await loadFile(knowledgePath); + if (!content) return null; + + const { queryKnowledge } = await import("./context-store.js"); + const scoped = await queryKnowledge(content, keywords); + if (!scoped) return null; + + const trimmed = scoped.trim(); + const truncated = + trimmed.length > maxChars + ? `${trimmed.slice(0, maxChars)}\n\n[...truncated ${trimmed.length - maxChars} chars; rerun with narrower scope if needed]` + : trimmed; + + return `### Project Knowledge (scoped)\nSource: \`${relGsdRootFile("KNOWLEDGE")}\`\n\n${truncated}`; } /** @@ -546,7 +717,7 @@ function skillMatchesContext(skill: Skill, contextTokens: Set): boolean function resolvePreferenceSkillNames(refs: string[], base: string): string[] { if (refs.length === 0) return []; - const prefs: SFPreferences = { always_use_skills: refs }; + const prefs: GSDPreferences = { always_use_skills: refs }; const report = resolveAllSkillReferences(prefs, base); return refs.map(ref => { const resolution = report.resolutions.get(ref); @@ -562,7 +733,7 @@ function ruleMatchesContext(when: string, contextTokens: Set): boolean { } function resolveSkillRuleMatches( - prefs: SFPreferences | undefined, + prefs: GSDPreferences | undefined, contextTokens: Set, base: string, ): { include: string[]; avoid: string[] } { @@ -579,7 +750,7 @@ function resolveSkillRuleMatches( } function resolvePreferredSkillNames( - prefs: SFPreferences | undefined, + prefs: GSDPreferences | undefined, visibleSkills: Skill[], contextTokens: Set, base: string, @@ -595,8 +766,8 @@ function resolvePreferredSkillNames( * to prevent prompt injection via crafted directory names. */ const SAFE_SKILL_NAME = /^[a-z0-9][a-z0-9-]*$/; -function formatSkillActivationBlock(names: string[]): string { - const safe = names.filter(name => SAFE_SKILL_NAME.test(name)); +function formatSkillActivationBlock(skillNames: string[]): string { + const safe = skillNames.filter(name => SAFE_SKILL_NAME.test(name)); if (safe.length === 0) return ""; // Use explicit parameter syntax so LLMs pass { skill: "..." } instead of { name: "..." }. // The function-call-like syntax `Skill('name')` led LLMs to infer a positional @@ -615,9 +786,15 @@ export function buildSkillActivationBlock(params: { taskTitle?: string; extraContext?: string[]; taskPlanContent?: string | null; - preferences?: SFPreferences; + preferences?: GSDPreferences; + /** + * Unit type dispatching this prompt. When provided, skills are filtered + * through the per-unit-type manifest (see `skill-manifest.ts`). Unknown + * or omitted values retain the pre-manifest behavior (all skills eligible). + */ + unitType?: string; }): string { - const prefs = params.preferences ?? loadEffectiveSFPreferences()?.preferences; + const prefs = params.preferences ?? loadEffectiveGSDPreferences(params.base)?.preferences; const contextTokens = tokenizeSkillContext( params.milestoneId, params.milestoneTitle, @@ -627,8 +804,22 @@ export function buildSkillActivationBlock(params: { params.taskTitle, ); - const visibleSkills = (typeof getLoadedSkills === 'function' ? getLoadedSkills() : []).filter(skill => !skill.disableModelInvocation); + const loaded = (typeof getLoadedSkills === 'function' ? getLoadedSkills() : []).filter(skill => !skill.disableModelInvocation); + + // Skill activation here is driven entirely by explicit sources + // (always_use_skills, prefer_skills, skill_rules, task-plan skills_used). + // Every match is an explicit user/project intent and must not be dropped + // by the unit-type manifest — user intent is stronger signal than + // defaults. The manifest's real home is the skill catalog rendering + // layer (pi-coding-agent `formatSkillsForPrompt`); that wiring is tracked + // as the "load-time short-circuit" follow-up to RFC #4779. + // + // `unitType` stays plumbed so the strict-mode warning can surface + // manifest entries that reference uninstalled skills, and so the + // activation-block site is ready to opt in once PR B lands. + const visibleSkills = loaded; const installedNames = new Set(visibleSkills.map(skill => normalizeSkillReference(skill.name))); + warnIfManifestHasMissingSkills(params.unitType, installedNames); const avoided = new Set(resolvePreferenceSkillNames(prefs?.avoid_skills ?? [], params.base)); const matched = new Set(); @@ -658,7 +849,6 @@ export function buildSkillActivationBlock(params: { const ordered = [...matched] .filter(name => installedNames.has(name) && !avoided.has(name)) .sort(); - return formatSkillActivationBlock(ordered); } @@ -880,11 +1070,11 @@ export async function getDependencyTaskSummaryPaths( * - All slices are complete (milestone done — no point reassessing) */ export async function checkNeedsReassessment( - base: string, mid: string, state: SFState, + base: string, mid: string, state: GSDState, ): Promise<{ sliceId: string } | null> { // DB primary path — fall through to file-based when DB has no data for this milestone try { - const { isDbAvailable, getMilestoneSlices } = await import("./sf-db.js"); + const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js"); if (isDbAvailable()) { const slices = getMilestoneSlices(mid); if (slices.length > 0) { @@ -936,11 +1126,11 @@ export async function checkNeedsReassessment( * - UAT result file already exists (idempotent — already ran) */ export async function checkNeedsRunUat( - base: string, mid: string, state: SFState, prefs: SFPreferences | undefined, + base: string, mid: string, state: GSDState, prefs: GSDPreferences | undefined, ): Promise<{ sliceId: string; uatType: UatType } | null> { // DB primary path — fall through to file-based when DB has no data for this milestone try { - const { isDbAvailable, getMilestoneSlices } = await import("./sf-db.js"); + const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js"); if (isDbAvailable()) { const slices = getMilestoneSlices(mid); if (slices.length > 0) { @@ -1010,15 +1200,20 @@ export async function checkNeedsRunUat( * as a seed when present. The discussion agent interviews the user, writes * a full CONTEXT.md, and the phase transitions to pre-planning automatically. */ -export async function buildDiscussMilestonePrompt(mid: string, midTitle: string, base: string): Promise { +export async function buildDiscussMilestonePrompt( + mid: string, + midTitle: string, + base: string, + structuredQuestionsAvailable = "false", +): Promise { const discussTemplates = inlineTemplate("context", "Context"); const basePrompt = loadPrompt("guided-discuss-milestone", { milestoneId: mid, milestoneTitle: midTitle, inlinedTemplates: discussTemplates, - structuredQuestionsAvailable: "false", - commitInstruction: "Do not commit planning artifacts — .sf/ is managed externally.", + structuredQuestionsAvailable, + commitInstruction: "Do not commit planning artifacts — .gsd/ is managed externally.", fastPathInstruction: "", }); @@ -1034,29 +1229,64 @@ export async function buildDiscussMilestonePrompt(mid: string, midTitle: string, } export async function buildResearchMilestonePrompt(mid: string, midTitle: string, base: string): Promise { - const contextPath = resolveMilestoneFile(base, mid, "CONTEXT"); - const contextRel = relMilestoneFile(base, mid, "CONTEXT"); + // #4782 phase 3: research-milestone migrated through the composer. + // Declared inline order: milestone-context, project, requirements, + // decisions, templates. Knowledge stays outside the composer + // (budget-driven, scoped by keyword extraction — future phase folds + // policy-driven blocks in). + const resolveArtifact: ArtifactResolver = async (key) => { + switch (key) { + case "milestone-context": { + const p = resolveMilestoneFile(base, mid, "CONTEXT"); + const r = relMilestoneFile(base, mid, "CONTEXT"); + return await inlineFile(p, r, "Milestone Context"); + } + case "project": + return await inlineProjectFromDb(base); + case "requirements": + return await inlineRequirementsFromDb(base, mid); + case "decisions": + return await inlineDecisionsFromDb(base, mid); + case "templates": + return inlineTemplate("research", "Research"); + default: + return null; + } + }; - const inlined: string[] = []; - inlined.push(await inlineFile(contextPath, contextRel, "Milestone Context")); - const projectInline = await inlineProjectFromDb(base); - if (projectInline) inlined.push(projectInline); - const requirementsInline = await inlineRequirementsFromDb(base, mid); - if (requirementsInline) inlined.push(requirementsInline); - const decisionsInline = await inlineDecisionsFromDb(base, mid); - if (decisionsInline) inlined.push(decisionsInline); - const knowledgeInlineRM = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge"); - if (knowledgeInlineRM) inlined.push(knowledgeInlineRM); - inlined.push(inlineTemplate("research", "Research")); + const composed = await composeInlinedContext("research-milestone", resolveArtifact); - const inlinedContext = capPreamble(`## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`); + // Knowledge block stays outside the composer — budgeted, scoped via + // keyword extraction (#4719). Inserted between decisions and the + // templates block to match the pre-migration output order. We split + // the composer output around the templates section to preserve that + // ordering. + const knowledgeInlineRM = await inlineKnowledgeBudgeted(base, extractKeywords(midTitle)); + const parts: string[] = []; + if (knowledgeInlineRM && composed) { + // Insert knowledge before the template block so the overall order is: + // milestone-context → project → requirements → decisions → KNOWLEDGE → research template + const idx = composed.lastIndexOf("### Output Template:"); + if (idx > 0) { + const before = composed.slice(0, idx).replace(/\n\n---\n\n$/, ""); + const after = composed.slice(idx); + parts.push(before, knowledgeInlineRM, after); + } else { + parts.push(composed, knowledgeInlineRM); + } + } else if (composed) { + parts.push(composed); + if (knowledgeInlineRM) parts.push(knowledgeInlineRM); + } + + const inlinedContext = capPreamble(`## Inlined Context (preloaded — do not re-read these files)\n\n${parts.join("\n\n---\n\n")}`); const outputRelPath = relMilestoneFile(base, mid, "RESEARCH"); return loadPrompt("research-milestone", { workingDirectory: base, milestoneId: mid, milestoneTitle: midTitle, milestonePath: relMilestonePath(base, mid), - contextPath: contextRel, + contextPath: relMilestoneFile(base, mid, "CONTEXT"), outputPath: join(base, outputRelPath), inlinedContext, skillActivation: buildSkillActivationBlock({ @@ -1064,6 +1294,7 @@ export async function buildResearchMilestonePrompt(mid: string, midTitle: string milestoneId: mid, milestoneTitle: midTitle, extraContext: [inlinedContext], + unitType: "research-milestone", }), ...buildSkillDiscoveryVars(), }); @@ -1096,20 +1327,19 @@ export async function buildPlanMilestonePrompt(mid: string, midTitle: string, ba const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel); if (decisionsInline) inlined.push(decisionsInline); } - const queuePath = resolveSfRootFile(base, "QUEUE"); + const queuePath = resolveGsdRootFile(base, "QUEUE"); if (existsSync(queuePath)) { const queueInline = await inlineFileSmart( queuePath, - relSfRootFile("QUEUE"), + relGsdRootFile("QUEUE"), "Project Queue", `${mid} ${midTitle}`, ); inlined.push(queueInline); } - const knowledgeInlinePM = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge"); + // Scoped + budgeted — see issue #4719 + const knowledgeInlinePM = await inlineKnowledgeBudgeted(base, extractKeywords(midTitle)); if (knowledgeInlinePM) inlined.push(knowledgeInlinePM); - const memoriesBlockPM = await buildMemoriesBlock(5); - if (memoriesBlockPM) inlined.push(memoriesBlockPM); inlined.push(inlineTemplate("roadmap", "Roadmap")); if (inlineLevel === "full") { inlined.push(inlineTemplate("decisions", "Decisions")); @@ -1143,6 +1373,7 @@ export async function buildPlanMilestonePrompt(mid: string, midTitle: string, ba milestoneId: mid, milestoneTitle: midTitle, extraContext: [inlinedContext], + unitType: "plan-milestone", }), ...buildSkillDiscoveryVars(), }); @@ -1197,7 +1428,7 @@ export async function buildResearchSlicePrompt( inlined.push(inlineTemplate("research", "Research")); - const depContent = await inlineDependencySummaries(mid, sid, base); + const depContent = await inlineDependencySummaries(mid, sid, base, resolveSummaryBudgetChars()); const activeOverrides = await loadActiveOverrides(base); const overridesInline = formatOverridesSection(activeOverrides); if (overridesInline) inlined.unshift(overridesInline); @@ -1221,15 +1452,39 @@ export async function buildResearchSlicePrompt( sliceId: sid, sliceTitle: sTitle, extraContext: [inlinedContext, depContent], + unitType: "research-slice", }), ...buildSkillDiscoveryVars(), }); } -export async function buildPlanSlicePrompt( - mid: string, _midTitle: string, sid: string, sTitle: string, base: string, level?: InlineLevel, -): Promise { - const inlineLevel = level ?? resolveInlineLevel(); +/** + * Shared assembly for plan-slice and refine-slice prompts. Both builders need + * the same inlined context (roadmap excerpt, slice context, research, decisions, + * requirements, knowledge, graph subgraph, templates, dependency summaries, + * overrides). Extracted to prevent drift between the two sites. + * + * `prependBlocks` are pushed onto the start of the inlined array BEFORE any + * shared content, so callers can add unit-specific headers (e.g., the refine + * sketch-scope constraint). + */ +async function renderSlicePrompt(options: { + mid: string; + sid: string; + sTitle: string; + base: string; + level: InlineLevel; + promptTemplate: "plan-slice" | "refine-slice"; + prependBlocks?: string[]; + extraVars?: Record; + sessionContextWindow?: number; + modelRegistry?: MinimalModelRegistry; +}): Promise { + const { + mid, sid, sTitle, base, level, promptTemplate, prependBlocks = [], extraVars = {}, + sessionContextWindow, modelRegistry, + } = options; + const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP"); const roadmapRel = relMilestoneFile(base, mid, "ROADMAP"); const researchPath = resolveSliceFile(base, mid, sid, "RESEARCH"); @@ -1237,18 +1492,17 @@ export async function buildPlanSlicePrompt( const sliceContextPath = resolveSliceFile(base, mid, sid, "CONTEXT"); const sliceContextRel = relSliceFile(base, mid, sid, "CONTEXT"); - const inlined: string[] = []; + const inlined: string[] = [...prependBlocks]; - // Inject phase handoff anchor from research phase (if available) + // Phase handoff anchor from research phase (if available) const researchSliceAnchor = readPhaseAnchor(base, mid, "research-slice"); if (researchSliceAnchor) inlined.push(formatAnchorForPrompt(researchSliceAnchor)); - // Use roadmap excerpt instead of full roadmap for context reduction - const roadmapExcerptPS = await inlineRoadmapExcerpt(base, mid, sid); - if (roadmapExcerptPS) { - inlined.push(roadmapExcerptPS); + // Roadmap excerpt with full-roadmap fallback + const roadmapExcerpt = await inlineRoadmapExcerpt(base, mid, sid); + if (roadmapExcerpt) { + inlined.push(roadmapExcerpt); } else { - // Fall back to full roadmap if excerpt fails inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap")); } @@ -1256,42 +1510,36 @@ export async function buildPlanSlicePrompt( if (sliceCtxInline) inlined.push(sliceCtxInline); const researchInline = await inlineFileOptional(researchPath, researchRel, "Slice Research"); if (researchInline) inlined.push(researchInline); - if (inlineLevel !== "minimal") { - // Derive scope from slice title for decision filtering (R005) - const derivedScopePS = deriveSliceScope(sTitle); - const decisionsInline = await inlineDecisionsFromDb(base, mid, derivedScopePS, inlineLevel); + + if (level !== "minimal") { + const derivedScope = deriveSliceScope(sTitle); + const decisionsInline = await inlineDecisionsFromDb(base, mid, derivedScope, level); if (decisionsInline) inlined.push(decisionsInline); - const requirementsInline = await inlineRequirementsFromDb(base, mid, sid, inlineLevel); + const requirementsInline = await inlineRequirementsFromDb(base, mid, sid, level); if (requirementsInline) inlined.push(requirementsInline); } - // Use scoped knowledge based on slice title keywords - const keywordsPS = extractKeywords(sTitle); - const knowledgeInlinePS = await inlineKnowledgeScoped(base, keywordsPS); - if (knowledgeInlinePS) inlined.push(knowledgeInlinePS); + const knowledgeInline = await inlineKnowledgeScoped(base, extractKeywords(sTitle)); + if (knowledgeInline) inlined.push(knowledgeInline); - // Knowledge graph: subgraph for this slice (graceful — skipped if no graph.json) - const graphBlockPS = await inlineGraphSubgraph(base, `${sid} ${sTitle}`, { budget: 3000 }); - if (graphBlockPS) inlined.push(graphBlockPS); + const graphBlock = await inlineGraphSubgraph(base, `${sid} ${sTitle}`, { budget: 3000 }); + if (graphBlock) inlined.push(graphBlock); inlined.push(inlineTemplate("plan", "Slice Plan")); - if (inlineLevel === "full") { + if (level === "full") { inlined.push(inlineTemplate("task-plan", "Task Plan")); } - const depContent = await inlineDependencySummaries(mid, sid, base); - const planActiveOverrides = await loadActiveOverrides(base); - const planOverridesInline = formatOverridesSection(planActiveOverrides); - if (planOverridesInline) inlined.unshift(planOverridesInline); + const depContent = await inlineDependencySummaries(mid, sid, base, resolveSummaryBudgetChars()); + const overridesInline = formatOverridesSection(await loadActiveOverrides(base)); + if (overridesInline) inlined.unshift(overridesInline); const inlinedContext = capPreamble(`## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`); - - // Build executor context constraints from the budget engine - const executorContextConstraints = formatExecutorConstraints(); - + const executorContextConstraints = formatExecutorConstraints(sessionContextWindow, modelRegistry); const outputRelPath = relSliceFile(base, mid, sid, "PLAN"); - const commitInstruction = "Do not commit — .sf/ planning docs are managed externally and not tracked in git."; - return loadPrompt("plan-slice", { + const commitInstruction = "Do not commit — .gsd/ planning docs are managed externally and not tracked in git."; + + return loadPrompt(promptTemplate, { workingDirectory: base, milestoneId: mid, sliceId: sid, sliceTitle: sTitle, slicePath: relSlicePath(base, mid, sid), @@ -1309,7 +1557,102 @@ export async function buildPlanSlicePrompt( sliceId: sid, sliceTitle: sTitle, extraContext: [inlinedContext, depContent], + unitType: promptTemplate, }), + ...extraVars, + }); +} + +export async function buildPlanSlicePrompt( + mid: string, _midTitle: string, sid: string, sTitle: string, base: string, level?: InlineLevel, + options?: { + softScopeHint?: string; + sessionContextWindow?: number; + modelRegistry?: MinimalModelRegistry; + /** Failure context from a prior pre-exec gate run (#4551). When present, a + * "Fix these specific issues" section is appended so the LLM addresses the + * exact problems instead of producing an identical plan that fails again. */ + priorPreExecFailure?: { + blockingFindings: string[]; + verdictExcerpt: string; + }; + }, +): Promise { + const prependBlocks: string[] = []; + // ADR-011: when the refining-phase dispatch rule gracefully downgrades to + // plan-slice (progressive_planning was toggled off mid-milestone), it + // forwards the stored sketch_scope as a SOFT hint — context, not a hard + // constraint. The planner is free to expand beyond it. + if (options?.softScopeHint && options.softScopeHint.trim().length > 0) { + prependBlocks.push( + `## Prior Sketch Scope (soft hint — non-binding)\n\n${options.softScopeHint.trim()}\n\n` + + `This scope was captured during an earlier progressive-planning pass that was later disabled. Treat it as context only — you may plan beyond it if the work genuinely requires more scope. Do NOT treat this as a hard boundary.`, + ); + } + // #4551: inject pre-exec failure context so the re-dispatched plan-slice + // addresses the exact blocked references rather than reproducing the same plan. + if (options?.priorPreExecFailure) { + const { blockingFindings, verdictExcerpt } = options.priorPreExecFailure; + const findingsList = blockingFindings.length > 0 + ? blockingFindings.map(f => `- ${f}`).join("\n") + : "- (no specific findings recorded)"; + prependBlocks.push( + `## Fix these specific issues from the prior pre-exec check\n\n` + + `The previous plan-slice attempt was blocked by pre-execution validation.\n` + + `Gate verdict: ${verdictExcerpt}\n\n` + + `Blocked references that must be resolved in this plan:\n${findingsList}\n\n` + + `Revise the plan so that every reference listed above is satisfied before execution begins. ` + + `Do not reproduce the same file paths, package names, or task ordering that caused these failures.`, + ); + } + return renderSlicePrompt({ + mid, sid, sTitle, base, + level: level ?? resolveInlineLevel(), + promptTemplate: "plan-slice", + prependBlocks, + sessionContextWindow: options?.sessionContextWindow, + modelRegistry: options?.modelRegistry, + }); +} + +/** + * ADR-011 refine-slice: expand a sketch into a full plan using the current + * codebase state and prior slice summary. Mechanically similar to plan-slice + * but framed as a *transformation* (sketch → full plan) rather than a + * blank-sheet planning pass. Reuses inlineDependencySummaries for prior + * slice SUMMARY and inlines the stored sketch_scope as a hard constraint. + */ +export async function buildRefineSlicePrompt( + mid: string, _midTitle: string, sid: string, sTitle: string, base: string, level?: InlineLevel, + options?: { sessionContextWindow?: number; modelRegistry?: MinimalModelRegistry }, +): Promise { + // Pull the stored sketch scope from the DB — the hard constraint we plan within. + let sketchScope = ""; + try { + const { isDbAvailable, getSlice } = await import("./gsd-db.js"); + if (isDbAvailable()) { + sketchScope = getSlice(mid, sid)?.sketch_scope ?? ""; + } + } catch { + sketchScope = ""; + } + + const prependBlocks: string[] = []; + if (sketchScope.trim().length > 0) { + prependBlocks.push( + `## Sketch Scope (hard constraint)\n\n${sketchScope.trim()}\n\n` + + `Treat this as the authoritative boundary for the slice. Do not plan work outside this scope; if the scope is too narrow, surface it as a deviation rather than expanding silently.`, + ); + } + + return renderSlicePrompt({ + mid, sid, sTitle, base, + level: level ?? resolveInlineLevel(), + promptTemplate: "refine-slice", + prependBlocks, + extraVars: { sketchScope }, + sessionContextWindow: options?.sessionContextWindow, + modelRegistry: options?.modelRegistry, }); } @@ -1318,6 +1661,10 @@ export interface ExecuteTaskPromptOptions { level?: InlineLevel; /** Override carry-forward paths (dependency-based instead of order-based). */ carryForwardPaths?: string[]; + /** Session model context window in tokens, forwarded to the budget engine. */ + sessionContextWindow?: number; + /** Model registry forwarded to the budget engine for executor-model lookup. */ + modelRegistry?: MinimalModelRegistry; } export async function buildExecuteTaskPrompt( @@ -1378,11 +1725,11 @@ export async function buildExecuteTaskPrompt( const carryForwardSection = await buildCarryForwardSection(effectivePriorSummaries, base); // Inline project knowledge if available (smart-chunked for relevance) - const knowledgeAbsPath = resolveSfRootFile(base, "KNOWLEDGE"); + const knowledgeAbsPath = resolveGsdRootFile(base, "KNOWLEDGE"); const knowledgeInlineET = existsSync(knowledgeAbsPath) ? await inlineFileSmart( knowledgeAbsPath, - relSfRootFile("KNOWLEDGE"), + relGsdRootFile("KNOWLEDGE"), "Project Knowledge", `${tTitle} ${sTitle}`, // use task + slice title as relevance query ) @@ -1408,8 +1755,8 @@ export async function buildExecuteTaskPrompt( const overridesSection = formatOverridesSection(activeOverrides); // Compute verification budget for the executor's context window (issue #707) - const prefs = loadEffectiveSFPreferences(); - const contextWindow = resolveExecutorContextWindow(undefined, prefs?.preferences); + const prefs = loadEffectiveGSDPreferences(); + const contextWindow = resolveExecutorContextWindow(opts.modelRegistry, prefs?.preferences, opts.sessionContextWindow); const budgets = computeBudgets(contextWindow); const verificationBudget = `~${Math.round(budgets.verificationBudgetChars / 1000)}K chars`; @@ -1424,11 +1771,31 @@ export async function buildExecuteTaskPrompt( const runtimePath = resolveRuntimeFile(base); const runtimeContent = existsSync(runtimePath) ? await loadFile(runtimePath) : null; const runtimeContext = runtimeContent - ? `### Runtime Context\nSource: \`.sf/RUNTIME.md\`\n\n${runtimeContent.trim()}` + ? `### Runtime Context\nSource: \`.gsd/RUNTIME.md\`\n\n${runtimeContent.trim()}` : ""; - const phaseAnchorSection = planAnchor ? formatAnchorForPrompt(planAnchor) : ""; - const memoriesSection = await buildMemoriesBlock(3); + let phaseAnchorSection = planAnchor ? formatAnchorForPrompt(planAnchor) : ""; + + // ADR-011 Phase 2: inject any resolved-but-unapplied escalation override + // into this task's prompt. Claim is atomic via DB UPDATE WHERE IS NULL, so + // if a parallel build already injected it, we skip. Feature-gated by + // phases.mid_execution_escalation. Prepended to phaseAnchorSection so it + // appears near the top of the prompt above planning anchors. + if (prefs?.preferences?.phases?.mid_execution_escalation === true) { + try { + const { claimOverrideForInjection } = await import("./escalation.js"); + const claimed = claimOverrideForInjection(base, mid, sid); + if (claimed) { + const block = claimed.injectionBlock + "\n\n---\n\n"; + phaseAnchorSection = phaseAnchorSection + ? `${block}${phaseAnchorSection}` + : block; + } + } catch (escalationErr) { + // Escalation module unavailable or threw — log and proceed. + logWarning("prompt", `escalation override injection failed: ${(escalationErr as Error).message}`); + } + } // Task-scoped gates owned by execute-task (Q5/Q6/Q7). Pull only the // gates that plan-slice actually seeded for this task — tasks with no @@ -1444,7 +1811,6 @@ export async function buildExecuteTaskPrompt( return loadPrompt("execute-task", { overridesSection, runtimeContext, - memoriesSection, phaseAnchorSection, workingDirectory: base, milestoneId: mid, sliceId: sid, sliceTitle: sTitle, taskId: tid, taskTitle: tTitle, @@ -1474,52 +1840,100 @@ export async function buildExecuteTaskPrompt( } export async function buildCompleteSlicePrompt( - mid: string, _midTitle: string, sid: string, sTitle: string, base: string, level?: InlineLevel, + mid: string, midTitle: string, sid: string, sTitle: string, base: string, level?: InlineLevel, ): Promise { const inlineLevel = level ?? resolveInlineLevel(); - const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP"); - const roadmapRel = relMilestoneFile(base, mid, "ROADMAP"); - const slicePlanPath = resolveSliceFile(base, mid, sid, "PLAN"); - const slicePlanRel = relSliceFile(base, mid, sid, "PLAN"); - const sliceContextPath = resolveSliceFile(base, mid, sid, "CONTEXT"); - const sliceContextRel = relSliceFile(base, mid, sid, "CONTEXT"); - - const inlined: string[] = []; - inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap")); - const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)"); - if (sliceCtxInline) inlined.push(sliceCtxInline); - inlined.push(await inlineFile(slicePlanPath, slicePlanRel, "Slice Plan")); - if (inlineLevel !== "minimal") { - const requirementsInline = await inlineRequirementsFromDb(base, mid, sid, inlineLevel); - if (requirementsInline) inlined.push(requirementsInline); - } - const knowledgeInlineCS = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge"); - if (knowledgeInlineCS) inlined.push(knowledgeInlineCS); - - // Inline all task summaries for this slice - const tDir = resolveTasksDir(base, mid, sid); - if (tDir) { - const summaryFiles = resolveTaskFiles(tDir, "SUMMARY").sort(); - for (const file of summaryFiles) { - const absPath = join(tDir, file); - const content = await loadFile(absPath); - const sRel = relSlicePath(base, mid, sid); - const relPath = `${sRel}/tasks/${file}`; - if (content) { - inlined.push(`### Task Summary: ${file.replace(/-SUMMARY\.md$/i, "")}\nSource: \`${relPath}\`\n\n${content.trim()}`); + // #4782 phase 3: complete-slice migrated through composer. Manifest + // declares [roadmap, slice-context, slice-plan, requirements, + // prior-task-summaries, templates]. Overrides prepend and knowledge + // splice stay imperative — they need the composer v2 contract + // (computed + prepend blocks; see RFC #4924). + const resolveArtifact: ArtifactResolver = async (key) => { + switch (key) { + case "roadmap": { + const p = resolveMilestoneFile(base, mid, "ROADMAP"); + const r = relMilestoneFile(base, mid, "ROADMAP"); + return await inlineFile(p, r, "Milestone Roadmap"); } + case "slice-context": { + const p = resolveSliceFile(base, mid, sid, "CONTEXT"); + const r = relSliceFile(base, mid, sid, "CONTEXT"); + return await inlineFileOptional(p, r, "Slice Context (from discussion)"); + } + case "slice-plan": { + const p = resolveSliceFile(base, mid, sid, "PLAN"); + const r = relSliceFile(base, mid, sid, "PLAN"); + return await inlineFile(p, r, "Slice Plan"); + } + case "requirements": + if (inlineLevel === "minimal") return null; + return await inlineRequirementsFromDb(base, mid, sid, inlineLevel); + case "prior-task-summaries": { + const tDir = resolveTasksDir(base, mid, sid); + if (!tDir) return null; + const summaryFiles = resolveTaskFiles(tDir, "SUMMARY").sort(); + const sRel = relSlicePath(base, mid, sid); + const blocks: string[] = []; + for (const file of summaryFiles) { + const absPath = join(tDir, file); + const content = await loadFile(absPath); + if (!content) continue; + const relPath = `${sRel}/tasks/${file}`; + blocks.push(`### Task Summary: ${file.replace(/-SUMMARY\.md$/i, "")}\nSource: \`${relPath}\`\n\n${content.trim()}`); + } + return blocks.length > 0 ? blocks.join("\n\n---\n\n") : null; + } + case "templates": { + const parts = [inlineTemplate("slice-summary", "Slice Summary")]; + if (inlineLevel !== "minimal") { + parts.push(inlineTemplate("uat", "UAT")); + } + return parts.join("\n\n---\n\n"); + } + default: + return null; + } + }; + + const composed = await composeInlinedContext("complete-slice", resolveArtifact); + + // Knowledge splices in between requirements and prior-task-summaries + // so overall order matches pre-migration: roadmap → slice-context → + // slice-plan → requirements → KNOWLEDGE → task summaries → templates. + const knowledgeInlineCS = await inlineKnowledgeBudgeted( + base, + [...extractKeywords(midTitle), ...extractKeywords(sTitle)], + ); + + let body = composed; + if (knowledgeInlineCS && body) { + // Splice knowledge right before the first "### Task Summary:" block + // to preserve pre-migration ordering. If no task summaries exist, + // append after requirements (before templates). + const taskIdx = body.indexOf("### Task Summary:"); + const templatesIdx = body.lastIndexOf("### Slice Summary"); + const spliceIdx = taskIdx > -1 ? taskIdx : templatesIdx; + if (spliceIdx > 0) { + const before = body.slice(0, spliceIdx).replace(/\n\n---\n\n$/, ""); + const after = body.slice(spliceIdx); + body = [before, knowledgeInlineCS, after].join("\n\n---\n\n"); + } else { + body = `${body}\n\n---\n\n${knowledgeInlineCS}`; } } - inlined.push(inlineTemplate("slice-summary", "Slice Summary")); - if (inlineLevel !== "minimal") { - inlined.push(inlineTemplate("uat", "UAT")); - } + + // Overrides section prepends to the top of the inlined context — + // standard pattern for slice-level builders (until composer v2 lands + // the prepend contract). const completeActiveOverrides = await loadActiveOverrides(base); const completeOverridesInline = formatOverridesSection(completeActiveOverrides); - if (completeOverridesInline) inlined.unshift(completeOverridesInline); + const finalBody = completeOverridesInline + ? `${completeOverridesInline}\n\n---\n\n${body}` + : body; - const inlinedContext = capPreamble(`## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`); + const inlinedContext = capPreamble(`## Inlined Context (preloaded — do not re-read these files)\n\n${finalBody}`); + const roadmapRel = relMilestoneFile(base, mid, "ROADMAP"); const sliceRel = relSlicePath(base, mid, sid); const sliceSummaryPath = join(base, `${sliceRel}/${sid}-SUMMARY.md`); @@ -1563,7 +1977,7 @@ export async function buildCompleteMilestonePrompt( // Inline all slice summaries (deduplicated by slice ID) let sliceIds: string[] = []; try { - const { isDbAvailable, getMilestoneSlices } = await import("./sf-db.js"); + const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js"); if (isDbAvailable()) { sliceIds = getMilestoneSlices(mid) .filter(s => s.status !== "skipped") @@ -1580,15 +1994,25 @@ export async function buildCompleteMilestonePrompt( } } const seenSlices = new Set(); + const summaryRelPaths: string[] = []; for (const sid of sliceIds) { if (seenSlices.has(sid)) continue; seenSlices.add(sid); const summaryPath = resolveSliceFile(base, mid, sid, "SUMMARY"); const summaryRel = relSliceFile(base, mid, sid, "SUMMARY"); - inlined.push(await inlineFile(summaryPath, summaryRel, `${sid} Summary`)); + summaryRelPaths.push(summaryRel); + // Compact excerpt instead of full inline (#4780). Closer Reads the + // full file on-demand when synthesizing LEARNINGS narrative. + inlined.push(await buildSliceSummaryExcerpt(summaryPath, summaryRel, sid)); + } + if (summaryRelPaths.length > 0) { + const pathList = summaryRelPaths.map(p => `- \`${p}\``).join("\n"); + inlined.push( + `### On-demand Slice Summaries\n\nExcerpted above. Read the full file for any slice when the excerpt's section heads don't carry enough narrative for the milestone summary you're drafting:\n\n${pathList}`, + ); } - // Inline root SF files (skip for minimal — completion can read these if needed) + // Inline root GSD files (skip for minimal — completion can read these if needed) if (inlineLevel !== "minimal") { const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel); if (requirementsInline) inlined.push(requirementsInline); @@ -1597,9 +2021,10 @@ export async function buildCompleteMilestonePrompt( const projectInline = await inlineProjectFromDb(base); if (projectInline) inlined.push(projectInline); } - const knowledgeInlineCM = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge"); + // Scoped + budgeted — see issue #4719 + const knowledgeInlineCM = await inlineKnowledgeBudgeted(base, extractKeywords(midTitle)); if (knowledgeInlineCM) inlined.push(knowledgeInlineCM); - // Inline milestone context file (milestone-level, not SF root) + // Inline milestone context file (milestone-level, not GSD root) const contextPath = resolveMilestoneFile(base, mid, "CONTEXT"); const contextRel = relMilestoneFile(base, mid, "CONTEXT"); const contextInline = await inlineFileOptional(contextPath, contextRel, "Milestone Context"); @@ -1610,6 +2035,14 @@ export async function buildCompleteMilestonePrompt( const milestoneSummaryPath = join(base, `${relMilestonePath(base, mid)}/${mid}-SUMMARY.md`); + const learningsRelPath = join(relMilestonePath(base, mid), `${mid}-LEARNINGS.md`); + const learningsAbsPath = join(base, learningsRelPath); + const extractLearningsSteps = buildExtractionStepsBlock({ + milestoneId: mid, + outputPath: learningsAbsPath, + relativeOutputPath: learningsRelPath, + }); + return loadPrompt("complete-milestone", { workingDirectory: base, milestoneId: mid, @@ -1617,11 +2050,13 @@ export async function buildCompleteMilestonePrompt( roadmapPath: roadmapRel, inlinedContext, milestoneSummaryPath, + extractLearningsSteps, skillActivation: buildSkillActivationBlock({ base, milestoneId: mid, milestoneTitle: midTitle, extraContext: [inlinedContext], + unitType: "complete-milestone", }), }); } @@ -1638,7 +2073,7 @@ export async function buildValidateMilestonePrompt( // Inline verification classes from planning (if available in DB) try { - const { isDbAvailable, getMilestone } = await import("./sf-db.js"); + const { isDbAvailable, getMilestone } = await import("./gsd-db.js"); if (isDbAvailable()) { const milestone = getMilestone(mid); if (milestone) { @@ -1659,7 +2094,7 @@ export async function buildValidateMilestonePrompt( // Inline all slice summaries and assessment results let valSliceIds: string[] = []; try { - const { isDbAvailable, getMilestoneSlices } = await import("./sf-db.js"); + const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js"); if (isDbAvailable()) { valSliceIds = getMilestoneSlices(mid) .filter(s => s.status !== "skipped") @@ -1715,7 +2150,7 @@ export async function buildValidateMilestonePrompt( inlined.push(`### Previous Validation (re-validation round ${remediationRound})\nSource: \`${validationRel}\`\n\n${validationContent.trim()}`); } - // Inline root SF files + // Inline root GSD files if (inlineLevel !== "minimal") { const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel); if (requirementsInline) inlined.push(requirementsInline); @@ -1724,7 +2159,8 @@ export async function buildValidateMilestonePrompt( const projectInline = await inlineProjectFromDb(base); if (projectInline) inlined.push(projectInline); } - const knowledgeInline = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge"); + // Scoped + budgeted — see issue #4719 + const knowledgeInline = await inlineKnowledgeBudgeted(base, extractKeywords(midTitle)); if (knowledgeInline) inlined.push(knowledgeInline); // Inline milestone context file const contextPath = resolveMilestoneFile(base, mid, "CONTEXT"); @@ -1761,6 +2197,7 @@ export async function buildValidateMilestonePrompt( milestoneId: mid, milestoneTitle: midTitle, extraContext: [inlinedContext], + unitType: "validate-milestone", }), }); } @@ -1843,6 +2280,7 @@ export async function buildReplanSlicePrompt( sliceId: sid, sliceTitle: sTitle, extraContext: [inlinedContext, captureContext], + unitType: "replan-slice", }), }); } @@ -1850,20 +2288,30 @@ export async function buildReplanSlicePrompt( export async function buildRunUatPrompt( mid: string, sliceId: string, uatPath: string, uatContent: string, base: string, ): Promise { - const inlined: string[] = []; - inlined.push(await inlineFile(resolveSliceFile(base, mid, sliceId, "UAT"), uatPath, `${sliceId} UAT`)); + // #4782 phase 3: run-uat migrated to compose its inlined context via + // the manifest. Behavior-equivalent — resolver dispatches to the same + // inline* helpers as the pre-migration builder. + const resolveArtifact: ArtifactResolver = async (key) => { + switch (key) { + case "slice-uat": { + const p = resolveSliceFile(base, mid, sliceId, "UAT"); + return await inlineFile(p, uatPath, `${sliceId} UAT`); + } + case "slice-summary": { + const p = resolveSliceFile(base, mid, sliceId, "SUMMARY"); + if (!p) return null; + const r = relSliceFile(base, mid, sliceId, "SUMMARY"); + return await inlineFileOptional(p, r, `${sliceId} Summary`); + } + case "project": + return await inlineProjectFromDb(base); + default: + return null; + } + }; - const summaryPath = resolveSliceFile(base, mid, sliceId, "SUMMARY"); - const summaryRel = relSliceFile(base, mid, sliceId, "SUMMARY"); - if (summaryPath) { - const summaryInline = await inlineFileOptional(summaryPath, summaryRel, `${sliceId} Summary`); - if (summaryInline) inlined.push(summaryInline); - } - - const projectInline = await inlineProjectFromDb(base); - if (projectInline) inlined.push(projectInline); - - const inlinedContext = capPreamble(`## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`); + const composed = await composeInlinedContext("run-uat", resolveArtifact); + const inlinedContext = capPreamble(`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`); const uatResultPath = join(base, relSliceFile(base, mid, sliceId, "ASSESSMENT")); const uatType = getUatType(uatContent); @@ -1881,6 +2329,7 @@ export async function buildRunUatPrompt( milestoneId: mid, sliceId, extraContext: [inlinedContext], + unitType: "run-uat", }), }); } @@ -1889,30 +2338,54 @@ export async function buildReassessRoadmapPrompt( mid: string, midTitle: string, completedSliceId: string, base: string, level?: InlineLevel, ): Promise { const inlineLevel = level ?? resolveInlineLevel(); - const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP"); - const roadmapRel = relMilestoneFile(base, mid, "ROADMAP"); - const summaryPath = resolveSliceFile(base, mid, completedSliceId, "SUMMARY"); - const summaryRel = relSliceFile(base, mid, completedSliceId, "SUMMARY"); - const sliceContextPath = resolveSliceFile(base, mid, completedSliceId, "CONTEXT"); - const sliceContextRel = relSliceFile(base, mid, completedSliceId, "CONTEXT"); - const inlined: string[] = []; - inlined.push(await inlineFile(roadmapPath, roadmapRel, "Current Roadmap")); - const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)"); - if (sliceCtxInline) inlined.push(sliceCtxInline); - inlined.push(await inlineFile(summaryPath, summaryRel, `${completedSliceId} Summary`)); - if (inlineLevel !== "minimal") { - const projectInline = await inlineProjectFromDb(base); - if (projectInline) inlined.push(projectInline); - const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel); - if (requirementsInline) inlined.push(requirementsInline); - const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel); - if (decisionsInline) inlined.push(decisionsInline); - } - const knowledgeInlineRA = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge"); - if (knowledgeInlineRA) inlined.push(knowledgeInlineRA); + // #4782 phase 2 pilot: reassess-roadmap is the first unit type to + // compose its inlined context through the manifest-driven composer. + // The resolver below dispatches artifact keys to the existing inline* + // helpers, preserving identical output so the migration is + // observable-equivalent. Knowledge stays outside the composer (it's + // budget-driven, not manifest-driven) until a later phase formalizes + // knowledge/memory policies as composer inputs. + const resolveArtifact: ArtifactResolver = async (key) => { + switch (key) { + case "roadmap": { + const p = resolveMilestoneFile(base, mid, "ROADMAP"); + const r = relMilestoneFile(base, mid, "ROADMAP"); + return await inlineFile(p, r, "Current Roadmap"); + } + case "slice-context": { + const p = resolveSliceFile(base, mid, completedSliceId, "CONTEXT"); + const r = relSliceFile(base, mid, completedSliceId, "CONTEXT"); + return await inlineFileOptional(p, r, "Slice Context (from discussion)"); + } + case "slice-summary": { + const p = resolveSliceFile(base, mid, completedSliceId, "SUMMARY"); + const r = relSliceFile(base, mid, completedSliceId, "SUMMARY"); + return await inlineFile(p, r, `${completedSliceId} Summary`); + } + case "project": + if (inlineLevel === "minimal") return null; + return await inlineProjectFromDb(base); + case "requirements": + if (inlineLevel === "minimal") return null; + return await inlineRequirementsFromDb(base, mid, undefined, inlineLevel); + case "decisions": + if (inlineLevel === "minimal") return null; + return await inlineDecisionsFromDb(base, mid, undefined, inlineLevel); + default: + return null; + } + }; - const inlinedContext = capPreamble(`## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`); + const composed = await composeInlinedContext("reassess-roadmap", resolveArtifact); + const parts: string[] = []; + if (composed) parts.push(composed); + // Knowledge block stays outside the composer — budgeted, scoped via + // keyword extraction (#4719). Future phase folds it in. + const knowledgeInlineRA = await inlineKnowledgeBudgeted(base, extractKeywords(midTitle)); + if (knowledgeInlineRA) parts.push(knowledgeInlineRA); + + const inlinedContext = capPreamble(`## Inlined Context (preloaded — do not re-read these files)\n\n${parts.join("\n\n---\n\n")}`); const assessmentPath = join(base, relSliceFile(base, mid, completedSliceId, "ASSESSMENT")); @@ -1930,14 +2403,14 @@ export async function buildReassessRoadmapPrompt( logWarning("prompt", `loadDeferredCaptures failed: ${err instanceof Error ? err.message : String(err)}`); } - const reassessCommitInstruction = "Do not commit — .sf/ planning docs are managed externally and not tracked in git."; + const reassessCommitInstruction = "Do not commit — .gsd/ planning docs are managed externally and not tracked in git."; return loadPrompt("reassess-roadmap", { workingDirectory: base, milestoneId: mid, milestoneTitle: midTitle, completedSliceId, - roadmapPath: roadmapRel, + roadmapPath: relMilestoneFile(base, mid, "ROADMAP"), assessmentPath, inlinedContext, deferredCaptures, @@ -1947,6 +2420,7 @@ export async function buildReassessRoadmapPrompt( milestoneId: mid, milestoneTitle: midTitle, extraContext: [inlinedContext, deferredCaptures], + unitType: "reassess-roadmap", }), }); } @@ -1957,6 +2431,7 @@ export async function buildReactiveExecutePrompt( mid: string, midTitle: string, sid: string, sTitle: string, readyTaskIds: string[], base: string, subagentModel?: string, + opts?: { sessionContextWindow?: number; modelRegistry?: MinimalModelRegistry }, ): Promise { const { loadSliceTaskIO, deriveTaskGraph, graphMetrics } = await import("./reactive-graph.js"); @@ -1998,7 +2473,11 @@ export async function buildReactiveExecutePrompt( // Build a full execute-task prompt with dependency-based carry-forward const taskPrompt = await buildExecuteTaskPrompt( mid, sid, sTitle, tid, tTitle, base, - { carryForwardPaths: depPaths }, + { + carryForwardPaths: depPaths, + sessionContextWindow: opts?.sessionContextWindow, + modelRegistry: opts?.modelRegistry, + }, ); const modelSuffix = subagentModel ? ` with model: "${subagentModel}"` : ""; @@ -2091,7 +2570,7 @@ export async function buildParallelResearchSlicesPrompt( subagentSections.push([ `### ${slice.id}: ${slice.title}`, "", - `Use this as the prompt for a \`subagent\` call${modelSuffix} (agent: \`sf-executor\` or the default agent):`, + `Use this as the prompt for a \`subagent\` call${modelSuffix} (agent: \`gsd-executor\` or the default agent):`, "", "```", slicePrompt, @@ -2208,7 +2687,7 @@ export async function buildRewriteDocsPrompt( // DB primary path — get incomplete tasks let incompleteTasks: { id: string }[] | null = null; try { - const { isDbAvailable, getSliceTasks } = await import("./sf-db.js"); + const { isDbAvailable, getSliceTasks } = await import("./gsd-db.js"); if (isDbAvailable()) { incompleteTasks = getSliceTasks(mid, sid) .filter(t => t.status !== "complete" && t.status !== "done") @@ -2236,12 +2715,12 @@ export async function buildRewriteDocsPrompt( } } - const decisionsPath = resolveSfRootFile(base, "DECISIONS"); - if (existsSync(decisionsPath)) docList.push(`- Decisions: \`${relSfRootFile("DECISIONS")}\``); - const requirementsPath = resolveSfRootFile(base, "REQUIREMENTS"); - if (existsSync(requirementsPath)) docList.push(`- Requirements: \`${relSfRootFile("REQUIREMENTS")}\``); - const projectPath = resolveSfRootFile(base, "PROJECT"); - if (existsSync(projectPath)) docList.push(`- Project: \`${relSfRootFile("PROJECT")}\``); + const decisionsPath = resolveGsdRootFile(base, "DECISIONS"); + if (existsSync(decisionsPath)) docList.push(`- Decisions: \`${relGsdRootFile("DECISIONS")}\``); + const requirementsPath = resolveGsdRootFile(base, "REQUIREMENTS"); + if (existsSync(requirementsPath)) docList.push(`- Requirements: \`${relGsdRootFile("REQUIREMENTS")}\``); + const projectPath = resolveGsdRootFile(base, "PROJECT"); + if (existsSync(projectPath)) docList.push(`- Project: \`${relGsdRootFile("PROJECT")}\``); const contextPath = resolveMilestoneFile(base, mid, "CONTEXT"); const contextRel = relMilestoneFile(base, mid, "CONTEXT"); if (contextPath) docList.push(`- Milestone context (reference only): \`${contextRel}\``); @@ -2265,6 +2744,6 @@ export async function buildRewriteDocsPrompt( sliceTitle: sTitle, overrideContent, documentList, - overridesPath: relSfRootFile("OVERRIDES"), + overridesPath: relGsdRootFile("OVERRIDES"), }); } diff --git a/src/resources/extensions/sf/auto/phases.ts b/src/resources/extensions/sf/auto/phases.ts index 4d9a5628d..e186a4a2e 100644 --- a/src/resources/extensions/sf/auto/phases.ts +++ b/src/resources/extensions/sf/auto/phases.ts @@ -48,7 +48,7 @@ import { withTimeout, FINALIZE_PRE_TIMEOUT_MS, FINALIZE_POST_TIMEOUT_MS } from " import { getEligibleSlices } from "../slice-parallel-eligibility.js"; import { startSliceParallel } from "../slice-parallel-orchestrator.js"; import { isDbAvailable, getMilestoneSlices } from "../sf-db.js"; -import { ensurePlanV2Graph as ensurePlanningFlowGraph } from "../uok/plan-v2.js"; +import { ensurePlanV2Graph as ensurePlanningFlowGraph, isMissingFinalizedContextResult } from "../uok/plan-v2.js"; import { resolveUokFlags } from "../uok/flags.js"; import { UokGateRunner } from "../uok/gate-runner.js"; import { resetEvidence } from "../safety/evidence-collector.js"; @@ -409,18 +409,30 @@ export async function runPreDispatch( const compiled = ensurePlanningFlowGraph(s.basePath, state); if (!compiled.ok) { const reason = compiled.reason ?? "Planning flow compilation failed"; - await runPreDispatchGate({ - gateId: "planning-flow-gate", - gateType: "policy", - outcome: "manual-attention", - failureClass: "manual-attention", - rationale: "planning flow compile gate failed", - findings: reason, - milestoneId: state.activeMilestone?.id ?? undefined, - }); - ctx.ui.notify(`Plan gate failed-closed: ${reason}`, "error"); - await deps.pauseAuto(ctx, pi); - return { action: "break", reason: "planning-flow-gate-failed" }; + if (isMissingFinalizedContextResult(compiled)) { + await runPreDispatchGate({ + gateId: "planning-flow-gate", + gateType: "policy", + outcome: "pass", + failureClass: "none", + rationale: "plan v2 missing context recovery deferred to dispatch", + findings: reason, + milestoneId: state.activeMilestone?.id ?? undefined, + }); + } else { + await runPreDispatchGate({ + gateId: "planning-flow-gate", + gateType: "policy", + outcome: "manual-attention", + failureClass: "manual-attention", + rationale: "planning flow compile gate failed", + findings: reason, + milestoneId: state.activeMilestone?.id ?? undefined, + }); + ctx.ui.notify(`Plan gate failed-closed: ${reason}\n\nIf this keeps happening, try: /sf doctor heal`, "error"); + await deps.pauseAuto(ctx, pi); + return { action: "break", reason: "planning-flow-gate-failed" }; + } } await runPreDispatchGate({ gateId: "planning-flow-gate", diff --git a/src/resources/extensions/sf/bootstrap/write-gate.ts b/src/resources/extensions/sf/bootstrap/write-gate.ts index c3eea2aa3..c73e9a37c 100644 --- a/src/resources/extensions/sf/bootstrap/write-gate.ts +++ b/src/resources/extensions/sf/bootstrap/write-gate.ts @@ -1,7 +1,13 @@ import { existsSync, mkdirSync, readFileSync, renameSync, unlinkSync, writeFileSync } from "node:fs"; import { join } from "node:path"; -const MILESTONE_CONTEXT_RE = /M\d+(?:-[a-z0-9]{6})?-CONTEXT\.md$/; +/** + * Regex matching milestone CONTEXT.md file names in both legacy M001 + * and unique M001-abc123 formats. Exported so regex-hardening tests + * can exercise the real pattern rather than a drift-prone inline + * re-implementation. + */ +export const MILESTONE_CONTEXT_RE = /M\d+(?:-[a-z0-9]{6})?-CONTEXT\.md$/; const CONTEXT_MILESTONE_RE = /(?:^|[/\\])(M\d+(?:-[a-z0-9]{6})?)-CONTEXT\.md$/i; const DEPTH_VERIFICATION_MILESTONE_RE = /depth_verification[_-](M\d+(?:-[a-z0-9]{6})?)/i; @@ -28,8 +34,29 @@ const QUEUE_SAFE_TOOLS = new Set([ /** * Bash commands that are read-only / investigative — safe during queue mode. * Matches the leading command in a bash invocation. + * + * Extension policy: add commands here when they are read-only / diagnostic. + * Never add commands that mutate project state (write files, run builds that + * emit artifacts, install packages, etc.). + * + * Current read-only additions: + * npm run — read-only diagnostic scripts: test, lint, typecheck, etc. + * NOT: build, install, compile, generate, deploy (artifact-producing) + * npm ls/list/info — inspect installed packages (read-only) + * npm outdated/audit — security/update checks (read-only) + * npx — run a package binary without installing globally + * tsx — TypeScript runner used for dry-run / inspection scripts + * node --print — evaluate and print an expression, no side effects + * python / python3 — script inspection, version checks + * pip / pip3 show — show installed package info (read-only) + * jq — read-only JSON query + * yq — read-only YAML query + * curl -s / curl --silent — fetch for inspection (no -o / no output redirect) + * openssl version — version / certificate inspection + * env / printenv — print environment variables + * true / false — shell no-ops / test exit codes */ -const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which|type|echo|printf|ls|find|grep|rg|awk|sed\b(?!.*-i)|sort|uniq|diff|comm|tr|cut|tee\s+-a\s+\/dev\/null|git\s+(log|show|diff|status|branch|tag|remote|rev-parse|ls-files|blame|shortlog|describe|stash\s+list|config\s+--get|cat-file)|gh\s+(issue|pr|api|repo|release)\s+(view|list|diff|status|checks)|mkdir\s+-p\s+\.sf|rtk\s)/; +const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which|type|echo|printf|ls|find|grep|rg|awk|sed\b(?!.*-i)|sort|uniq|diff|comm|tr|cut|tee\s+-a\s+\/dev\/null|git\s+(log|show|diff|status|branch|tag|remote|rev-parse|ls-files|blame|shortlog|describe|stash\s+list|config\s+--get|cat-file)|gh\s+(issue|pr|api|repo|release)\s+(view|list|diff|status|checks)|mkdir\s+-p\s+\.sf|rtk\s|npm\s+run\s+(test|test:\w+|lint|lint:\w+|typecheck|type-check|type-check:\w+|check|verify|audit|outdated|format:check|ci|validate)\b|npm\s+(ls|list|info|view|show|outdated|audit|explain|doctor|ping|--version|-v)\b|npx\s|tsx\s|node\s+(--print|--version|-v\b)|python[23]?\s+(-c\s+'[^']*'|--version|-V\b|-m\s+(pip\s+show|pip\s+list|site))|pip[23]?\s+(show|list|freeze|check|index\s+versions)\b|jq\s|yq\s|curl\s+(-s\b|--silent\b)(?!\s+[^|>]*\s-[oO]\b)(?!\s+[^|>]*\s--output\b)[^|>]*$|openssl\s+(version|x509|s_client)|env\b|printenv\b|true\b|false\b)/; const verifiedDepthMilestones = new Set(); let activeQueuePhase = false; @@ -117,9 +144,21 @@ function normalizeWriteGateSnapshot(value: unknown): WriteGateSnapshot { }; } +const EMPTY_SNAPSHOT: WriteGateSnapshot = { + verifiedDepthMilestones: [], + activeQueuePhase: false, + pendingGateId: null, +}; + export function loadWriteGateSnapshot(basePath: string = process.cwd()): WriteGateSnapshot { const path = writeGateSnapshotPath(basePath); - if (!existsSync(path)) return currentWriteGateSnapshot(); + if (!existsSync(path)) { + // When persist mode is active and the file has been deleted, treat it as a + // full state reset so deleting the file clears the HARD BLOCK gate. + // In non-persist mode the file is never written, so fall back to in-memory. + if (shouldPersistWriteGateSnapshot()) return EMPTY_SNAPSHOT; + return currentWriteGateSnapshot(); + } try { return normalizeWriteGateSnapshot(JSON.parse(readFileSync(path, "utf-8"))); } catch { diff --git a/src/resources/extensions/sf/commands-bootstrap.ts b/src/resources/extensions/sf/commands-bootstrap.ts index 0f5e92a78..b2d9437f4 100644 --- a/src/resources/extensions/sf/commands-bootstrap.ts +++ b/src/resources/extensions/sf/commands-bootstrap.ts @@ -225,6 +225,7 @@ function getGsdArgumentCompletions(prefix: string) { { cmd: "update", desc: "Refresh the CODEBASE.md cache immediately" }, { cmd: "stats", desc: "Show codebase-map coverage and generation time" }, { cmd: "rag", desc: "Inspect optional project-rag code search backend" }, + { cmd: "rag build", desc: "Build vendored Rust project-rag and configure MCP" }, { cmd: "help", desc: "Show usage and subcommands" }, ], "codebase"); } diff --git a/src/resources/extensions/sf/commands-codebase.ts b/src/resources/extensions/sf/commands-codebase.ts index 9b66f8d74..5b743a0aa 100644 --- a/src/resources/extensions/sf/commands-codebase.ts +++ b/src/resources/extensions/sf/commands-codebase.ts @@ -15,6 +15,7 @@ import { readCodebaseMap, } from "./codebase-generator.js"; import { + buildProjectRagBinary, ensureProjectRagMcpConfig, formatProjectRagStatus, } from "./code-intelligence.js"; @@ -26,7 +27,7 @@ const USAGE = " generate [--max-files N] [--collapse-threshold N] — Generate or regenerate CODEBASE.md\n" + " update [--max-files N] [--collapse-threshold N] — Refresh the CODEBASE.md cache immediately\n" + " stats — Show file count, coverage, and generation time\n" + - " rag [status|init] — Inspect or configure optional project-rag MCP backend\n" + + " rag [status|init|build] — Inspect, build, or configure optional project-rag MCP backend\n" + " help — Show this help\n\n" + "With no subcommand, shows stats if a map exists or help if not.\n" + "SF also refreshes CODEBASE.md automatically before prompt injection and after completed units when tracked files change.\n\n" + @@ -35,8 +36,8 @@ const USAGE = " exclude_patterns: [\"docs/\", \"fixtures/\"]\n" + " max_files: 1000\n" + " collapse_threshold: 15\n" + - " project_rag: auto # auto | off | required\n" + - " project_rag_auto_index: true"; + " project_rag: auto # auto | off | required\n" + + " project_rag_auto_index: true"; export async function handleCodebase( args: string, @@ -141,7 +142,35 @@ export async function handleCodebase( } return; } - ctx.ui.notify(`Unknown /sf codebase rag action "${action}". Use status or init.`, "warning"); + if (action === "build") { + try { + const build = buildProjectRagBinary(basePath); + const result = ensureProjectRagMcpConfig(basePath, { + ...process.env, + SF_PROJECT_RAG_BIN: build.binaryPath, + }); + ctx.ui.notify( + [ + "Built project-rag release binary.", + "", + `Source: ${build.sourceDir}`, + `Binary: ${build.binaryPath}`, + `Cargo jobs: ${build.buildJobs} (override with SF_PROJECT_RAG_BUILD_JOBS)`, + `MCP config: ${result.configPath} (${result.status})`, + "", + "Restart the MCP client session so the new server and tools are loaded.", + ].join("\n"), + "success", + ); + } catch (err) { + ctx.ui.notify( + `Could not build project-rag: ${err instanceof Error ? err.message : String(err)}`, + "warning", + ); + } + return; + } + ctx.ui.notify(`Unknown /sf codebase rag action "${action}". Use status, init, or build.`, "warning"); return; } diff --git a/src/resources/extensions/sf/commands/catalog.ts b/src/resources/extensions/sf/commands/catalog.ts index 1e52e1669..1811896ff 100644 --- a/src/resources/extensions/sf/commands/catalog.ts +++ b/src/resources/extensions/sf/commands/catalog.ts @@ -250,6 +250,7 @@ const NESTED_COMPLETIONS: CompletionMap = { { cmd: "stats", desc: "Show file count, description coverage, and generation time" }, { cmd: "rag status", desc: "Show optional project-rag MCP backend status" }, { cmd: "rag init", desc: "Write .mcp.json entry for project-rag when a binary is available" }, + { cmd: "rag build", desc: "Build vendored Rust project-rag and write MCP config" }, { cmd: "help", desc: "Show usage and available subcommands" }, ], ship: [ diff --git a/src/resources/extensions/sf/context-store.ts b/src/resources/extensions/sf/context-store.ts index 45b0fb4dd..eb110b620 100644 --- a/src/resources/extensions/sf/context-store.ts +++ b/src/resources/extensions/sf/context-store.ts @@ -211,7 +211,13 @@ export function queryProject(): string | null { /** * Filter KNOWLEDGE.md sections by keyword matching. - * Uses H2 sections, matches keywords case-insensitively against: + * + * Structure-adaptive (issue #4719): files that organise entries as H3 items + * under one or more H2 topics are filtered at H3 granularity. Files with only + * H2 topic headers (no H3) fall back to H2-level filtering for backwards + * compatibility. + * + * Matches keywords case-insensitively against: * 1. Section header text * 2. First paragraph of section content (up to first blank line or next heading) * @@ -220,7 +226,7 @@ export function queryProject(): string | null { * * @param content - Full KNOWLEDGE.md content * @param keywords - Keywords to match (case-insensitive) - * @returns Concatenated matching sections with H2 headers, or empty string + * @returns Concatenated matching sections with their original heading prefix, or empty string */ export async function queryKnowledge(content: string, keywords: string[]): Promise { if (!content || keywords.length === 0) return ''; @@ -228,11 +234,23 @@ export async function queryKnowledge(content: string, keywords: string[]): Promi // Lazy import to avoid circular dependency const { extractAllSections } = await import('./files.js'); - const sections = extractAllSections(content, 2); + // Prefer H3 granularity when available; fall back to H2 for H2-only files. + // This prevents single-H2-with-many-H3 layouts from returning the entire + // file on a keyword match against the H2 header or its first paragraph. + const h3Sections = extractAllSections(content, 3); + const useH3 = h3Sections.size > 0; + const sections = useH3 ? h3Sections : extractAllSections(content, 2); if (sections.size === 0) return ''; + const prefix = useH3 ? '###' : '##'; - // Normalize keywords for case-insensitive matching - const normalizedKeywords = keywords.map(k => k.toLowerCase()); + // Trim, lowercase, drop empties, and de-dupe so callers can pass raw + // user-provided strings without risking empty-string / whitespace matches. + const normalizedKeywords = [...new Set( + keywords + .map(k => k.trim().toLowerCase()) + .filter(k => k.length > 0), + )]; + if (normalizedKeywords.length === 0) return ''; const matchingSections: string[] = []; @@ -240,16 +258,15 @@ export async function queryKnowledge(content: string, keywords: string[]): Promi // Extract first paragraph: everything up to first blank line or next heading const firstParagraph = body.split(/\n\s*\n|\n#/)[0] || ''; - // Check if any keyword matches header or first paragraph const headerLower = header.toLowerCase(); const paragraphLower = firstParagraph.toLowerCase(); const matches = normalizedKeywords.some(kw => - headerLower.includes(kw) || paragraphLower.includes(kw) + headerLower.includes(kw) || paragraphLower.includes(kw), ); if (matches) { - matchingSections.push(`## ${header}\n\n${body}`); + matchingSections.push(`${prefix} ${header}\n\n${body}`); } } diff --git a/src/resources/extensions/sf/docs/preferences-reference.md b/src/resources/extensions/sf/docs/preferences-reference.md index ae2d14d7c..d9b251487 100644 --- a/src/resources/extensions/sf/docs/preferences-reference.md +++ b/src/resources/extensions/sf/docs/preferences-reference.md @@ -170,6 +170,9 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea - `project_rag`: `"auto"`, `"off"`, or `"required"` — use Brainwires/project-rag MCP search when configured. Default: `"auto"`. - `project_rag_server`: string — explicit MCP server name when the server cannot be detected from command or args. - `project_rag_auto_index`: boolean — whether agents should prefer indexing before querying a configured Project RAG backend. Default: `true`. + - `/sf codebase rag status` reports whether the Rust backend is actually operational. + - `/sf codebase rag init` writes a `.mcp.json` entry when a `project-rag` binary is available. + - `/sf codebase rag build` builds vendored Brainwires/project-rag from `vendor/project-rag` (or `SF_PROJECT_RAG_SOURCE`) with `cargo build --release`, then writes the MCP config. The build defaults to `CARGO_BUILD_JOBS=2` so it does not saturate the workstation; override with `SF_PROJECT_RAG_BUILD_JOBS`. - `remote_questions`: route interactive questions to Slack/Discord for headless auto-mode. Keys: - `channel`: `"slack"` or `"discord"` — channel type. diff --git a/src/resources/extensions/sf/guided-flow.ts b/src/resources/extensions/sf/guided-flow.ts index 0a1e3a467..c05c990a2 100644 --- a/src/resources/extensions/sf/guided-flow.ts +++ b/src/resources/extensions/sf/guided-flow.ts @@ -628,8 +628,13 @@ export async function showHeadlessMilestoneCreation( // Set pending auto start (auto-mode triggers on "Milestone X ready." via checkAutoStartAfterDiscuss) pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, createdAt: Date.now() }); - // Dispatch — headless milestone creation is a planning activity - await dispatchWorkflow(pi, prompt, "sf-run", ctx, "plan-milestone"); + // Dispatch as discuss-milestone. The LLM writes PROJECT.md, REQUIREMENTS.md, + // and CONTEXT.md, then calls sf_plan_milestone — this is semantically the + // discuss path, just non-interactive. Using "plan-milestone" here caused + // model/tool routing to skip discuss-flow tool scoping and + // `checkAutoStartAfterDiscuss` guardrails that rely on the + // "discuss-"-prefixed unitType. + await dispatchWorkflow(pi, prompt, "sf-run", ctx, "discuss-milestone"); } diff --git a/src/resources/extensions/sf/milestone-scope-classifier.ts b/src/resources/extensions/sf/milestone-scope-classifier.ts new file mode 100644 index 000000000..bfe558474 --- /dev/null +++ b/src/resources/extensions/sf/milestone-scope-classifier.ts @@ -0,0 +1,302 @@ +// GSD-2 — Milestone scope classifier (#4781 / ADR-003 companion). +// +// Pure heuristics over milestone planning fields. Produces a PipelineVariant +// that downstream dispatch logic can use to shape the auto-mode sequence. +// No LLM calls, no file I/O, sub-millisecond. +// +// Distinct from `complexity-classifier.ts`, which decides *model tier* +// (light/standard/heavy) for an individual unit. This module decides +// *pipeline topology* for an entire milestone at plan-milestone time. +// +// This file ships the classifier in isolation. Dispatch-side wiring +// lands in follow-up PRs so the classification contract can be reviewed +// and tested before any behavior change reaches users. + +export type PipelineVariant = "trivial" | "standard" | "complex"; + +export interface MilestoneScopeInput { + /** Milestone vision / elevator pitch. Free-form prose. */ + vision?: string; + /** Success criteria, one per array entry. */ + successCriteria?: string[]; + /** Milestone title. */ + title?: string; + /** Slice risks declared at plan-milestone time. */ + keyRisks?: Array<{ risk?: string; whyItMatters?: string }>; + /** Definition-of-done lines. */ + definitionOfDone?: string[]; + /** Freeform "requirement coverage" marker. */ + requirementCoverage?: string; + /** Verification hints (contract/integration/operational/uat). */ + verificationContract?: string; + verificationIntegration?: string; + verificationOperational?: string; + verificationUat?: string; +} + +export interface ScopeClassificationResult { + variant: PipelineVariant; + /** Short human-readable reasons, one per triggered signal. */ + reasons: string[]; + /** Sub-signals for telemetry / debugging. Stable across releases. */ + signals: { + triggeredOverride: boolean; + complexCount: number; + trivialCount: number; + fileCountHint: number | null; + }; +} + +// ─── Keyword sets ───────────────────────────────────────────────────────── + +/** + * Override keywords that force `standard` (at minimum) regardless of + * apparent triviality. Presence of any of these signals work that is + * either security-sensitive, irreversible, or requires runtime verification + * a "trivial" pipeline would skip. + * + * Matched as case-insensitive word-boundary substrings. Conservative — err + * on the side of including a keyword; over-classifying to `standard` costs + * units, under-classifying could ship broken auth/security/migration work. + */ +const OVERRIDE_KEYWORDS: ReadonlyArray = [ + // Security-sensitive + "security", "auth", "authn", "authz", "authentication", "authorization", + "credential", "secret", "password", "token", "oauth", "encrypt", "decrypt", + "vulnerability", "exploit", "permission", "rbac", "acl", + // Data-migration / irreversible + "migration", "migrate", "schema change", "data migration", + "backfill", "drop column", "drop table", + // Compliance / regulatory + "compliance", "gdpr", "hipaa", "soc2", "pci", + // Infra / deploy — runtime verification needed + "deploy", "rollout", "canary", "production database", +]; + +/** + * Keywords that contribute to `complex` classification on their own. + * Different from OVERRIDE_KEYWORDS in that a single match bumps to + * complex, not just to standard. + */ +const COMPLEX_KEYWORDS: ReadonlyArray = [ + "multi-service", "distributed", "consensus", "saga", "eventual consistency", + "breaking change", "api contract change", "schema redesign", + "architect", "architecture", "refactor core", +]; + +/** + * Trivial-signal keywords: presence strongly suggests a simple, contained + * deliverable. Only effective when combined with low file count / no tests + * / no override keywords. + */ +const TRIVIAL_KEYWORDS: ReadonlyArray = [ + "single file", "one file", "static html", "static page", + "one-page", "landing page", "readme", "docs only", "typo", "rename", + "spelling", "comment", "changelog", + // Browser-only / no-build deliverable shapes (b23 forensic case). + "pure html", "browser-based", "no build step", "no build tooling", + "localstorage", "client-only", "no backend", "no server", "no backend.", +]; + +// ─── Heuristics ─────────────────────────────────────────────────────────── + +/** + * Estimate how many distinct files the milestone will touch, based on + * explicit mentions in the input text. Returns `null` when no hint is + * discoverable — callers should treat that as "unknown, no signal." + */ +function extractFileCountHint(text: string): number | null { + // Explicit phrasing: "a single file", "two files", "3 files" + const singleFileMatch = /\b(a|one|single)\s+(file|page)\b/i.test(text); + if (singleFileMatch) return 1; + + const digitMatch = text.match(/\b(\d+)\s+files?\b/i); + if (digitMatch) { + const n = parseInt(digitMatch[1], 10); + if (!Number.isNaN(n)) return n; + } + + const wordMatch = text.match(/\b(two|three|four|five|six|seven|eight|nine|ten)\s+files?\b/i); + if (wordMatch) { + const wordMap: Record = { + two: 2, three: 3, four: 4, five: 5, + six: 6, seven: 7, eight: 8, nine: 9, ten: 10, + }; + return wordMap[wordMatch[1].toLowerCase()] ?? null; + } + + return null; +} + +function containsAnyKeyword(haystack: string, keywords: ReadonlyArray): string[] { + const lower = haystack.toLowerCase(); + const hits: string[] = []; + for (const kw of keywords) { + // Substring match, not word-boundary — keyword list is curated so that + // substring hits rarely overmatch. Phrases like "no authentication" still + // match "authentication" and force standard — that's the safe direction. + if (lower.includes(kw)) hits.push(kw); + } + return hits; +} + +/** + * True when `term` appears in the text without an immediately preceding + * negator (no / without / not / zero / skip) in the same clause. Used to + * keep phrases like "no backend" or "no tests" from flipping a trivial- + * class milestone to standard. Best-effort; imperfect English parsing, + * biased toward false negatives (if unsure, treats term as present — + * which routes to standard, the safe pipeline). + */ +function mentionsWithoutNegation(text: string, term: string): boolean { + const lower = text.toLowerCase(); + const termPattern = new RegExp(String.raw`\b${term}\b`, "gi"); + const matches = Array.from(lower.matchAll(termPattern)); + for (const m of matches) { + const start = m.index ?? 0; + const windowStart = Math.max(0, start - 30); + const window = lower.slice(windowStart, start); + // Negator anywhere in the 30-char lookback window counts as negation — + // covers "no backend", "without a server", "not using api", "zero + // dependencies on an api". If a sentence break intervenes between the + // negator and the term, treat as a different clause (positive mention). + const hasNegator = /(^|[^a-z0-9])(no|without|not|zero|skip(s|ping)?|drops?)\b/i.test(window); + const hasSentenceBreak = /[.;!?]/.test(window); + if (hasNegator && !hasSentenceBreak) continue; + return true; + } + return false; +} + +function mentionsTests(haystack: string): boolean { + return mentionsWithoutNegation(haystack, "test") + || mentionsWithoutNegation(haystack, "tests") + || mentionsWithoutNegation(haystack, "testing") + || mentionsWithoutNegation(haystack, "spec") + || mentionsWithoutNegation(haystack, "unit test") + || mentionsWithoutNegation(haystack, "integration test"); +} + +function mentionsBackend(haystack: string): boolean { + return mentionsWithoutNegation(haystack, "api") + || mentionsWithoutNegation(haystack, "backend") + || mentionsWithoutNegation(haystack, "server") + || mentionsWithoutNegation(haystack, "database") + || mentionsWithoutNegation(haystack, "endpoint"); +} + +// ─── Public API ─────────────────────────────────────────────────────────── + +/** + * Classify a milestone's pipeline variant based on its planning inputs. + * + * Precedence: + * 1. Override keyword → `standard` (at minimum). Prevents trivial + * misclassification of security / auth / migration work. + * 2. Complex-signal keyword OR ≥ 8 file hint OR architecture/refactor-core + * language → `complex`. + * 3. Trivial-signal keyword AND ≤ 2 file hint AND no tests mentioned AND + * no backend mentioned → `trivial`. + * 4. Otherwise → `standard`. + * + * Ambiguity → `standard` (today's default). Safe to run the full pipeline. + */ +export function classifyMilestoneScope(input: MilestoneScopeInput): ScopeClassificationResult { + const haystack = [ + input.title ?? "", + input.vision ?? "", + (input.successCriteria ?? []).join("\n"), + (input.keyRisks ?? []).map(r => `${r.risk ?? ""} ${r.whyItMatters ?? ""}`).join("\n"), + (input.definitionOfDone ?? []).join("\n"), + input.requirementCoverage ?? "", + input.verificationContract ?? "", + input.verificationIntegration ?? "", + input.verificationOperational ?? "", + input.verificationUat ?? "", + ].join("\n"); + + const overrideHits = containsAnyKeyword(haystack, OVERRIDE_KEYWORDS); + const complexHits = containsAnyKeyword(haystack, COMPLEX_KEYWORDS); + const trivialHits = containsAnyKeyword(haystack, TRIVIAL_KEYWORDS); + const fileCountHint = extractFileCountHint(haystack); + const hasTests = mentionsTests(haystack); + const hasBackend = mentionsBackend(haystack); + + const reasons: string[] = []; + + // Rule 2: complex-class signals. Evaluated before override because a + // complex + override input should land in complex, not standard. + if (complexHits.length > 0) { + reasons.push(`complex keywords: ${complexHits.slice(0, 3).join(", ")}`); + } + if (fileCountHint !== null && fileCountHint >= 8) { + reasons.push(`file count hint: ${fileCountHint}`); + } + + const isComplex = complexHits.length > 0 || (fileCountHint !== null && fileCountHint >= 8); + + if (isComplex) { + return { + variant: "complex", + reasons, + signals: { + triggeredOverride: overrideHits.length > 0, + complexCount: complexHits.length, + trivialCount: trivialHits.length, + fileCountHint, + }, + }; + } + + // Rule 1: override keywords force standard. + if (overrideHits.length > 0) { + return { + variant: "standard", + reasons: [`override keywords: ${overrideHits.slice(0, 3).join(", ")}`], + signals: { + triggeredOverride: true, + complexCount: complexHits.length, + trivialCount: trivialHits.length, + fileCountHint, + }, + }; + } + + // Rule 3: trivial signals — require ALL of: trivial-keyword, low file + // hint (or nothing suggesting high count), no test mention, no backend + // mention. + const fileCountOk = fileCountHint === null || fileCountHint <= 2; + const trivial = + trivialHits.length > 0 && + fileCountOk && + !hasTests && + !hasBackend; + + if (trivial) { + reasons.push(`trivial keywords: ${trivialHits.slice(0, 3).join(", ")}`); + if (fileCountHint !== null) reasons.push(`file count hint: ${fileCountHint}`); + reasons.push("no tests mentioned", "no backend mentioned"); + return { + variant: "trivial", + reasons, + signals: { + triggeredOverride: false, + complexCount: complexHits.length, + trivialCount: trivialHits.length, + fileCountHint, + }, + }; + } + + // Rule 4: fallback. + return { + variant: "standard", + reasons: reasons.length > 0 ? reasons : ["no strong signals — default"], + signals: { + triggeredOverride: overrideHits.length > 0, + complexCount: complexHits.length, + trivialCount: trivialHits.length, + fileCountHint, + }, + }; +} diff --git a/src/resources/extensions/sf/prompt-cache-optimizer.ts b/src/resources/extensions/sf/prompt-cache-optimizer.ts index f930d2a25..e7712f41c 100644 --- a/src/resources/extensions/sf/prompt-cache-optimizer.ts +++ b/src/resources/extensions/sf/prompt-cache-optimizer.ts @@ -55,6 +55,10 @@ const SEMI_STATIC_LABELS = new Set([ "prior-summaries", "project-context", "overrides", + // KNOWLEDGE is milestone-scoped (stable within a session), so it belongs + // in the cacheable prefix. See issue #4719. + "knowledge", + "project-knowledge", ]); /** Labels that change per-task */ diff --git a/src/resources/extensions/sf/prompts/discuss-headless.md b/src/resources/extensions/sf/prompts/discuss-headless.md index 59e3e0340..f7e55ff75 100644 --- a/src/resources/extensions/sf/prompts/discuss-headless.md +++ b/src/resources/extensions/sf/prompts/discuss-headless.md @@ -133,6 +133,8 @@ Print a structured depth summary in chat covering: This is your audit trail. Print it — do not skip it. +The final gate is the only question in headless mode. It is not an exploratory question round. Ask it only after printing the compact depth summary, and only to confirm whether the already-investigated context is final enough to write or should remain a draft. + Before writing final `CONTEXT.md`, decide confidence: - **HIGH**: You have verified the project knowledge above from actual files/tests/research, and the milestone scope is specific enough for downstream agents. Call `ask_user_questions` once with question ID `depth_verification_{{milestoneId}}_confirm`; make the recommended first option "Proceed with final context (Recommended)" and the second option "Keep as draft". If the confirmed answer is not received, do not bypass the gate. - **MEDIUM or LOW**: Do not call the gate. Write `.sf/milestones/{{milestoneId}}/{{milestoneId}}-CONTEXT-DRAFT.md` with the evidence, assumptions, and open questions, then stop. diff --git a/src/resources/extensions/sf/prompts/discuss.md b/src/resources/extensions/sf/prompts/discuss.md index 6313701ca..783195d8c 100644 --- a/src/resources/extensions/sf/prompts/discuss.md +++ b/src/resources/extensions/sf/prompts/discuss.md @@ -53,12 +53,21 @@ For subsequent rounds, continue investigating between rounds — check docs, sea Questions are organized into four layers. Each layer targets a specific depth dimension. At each layer: ask 1-3 open questions per round, investigate between rounds as needed, and gate before advancing. +**Question round shape:** Every question round must start with a compact progress header: +- **Current understanding** — 2-5 bullets using the user's terminology and the evidence you just found +- **Blocked decision** — the specific choice or uncertainty that prevents the next artifact from being strong +- **Why these questions** — one sentence explaining how the answers advance the milestone, roadmap, or requirements + +If an uncertainty is low-risk or would not change the next artifact, do not ask about it. Continue with a documented assumption instead. + **Default to open questions.** Use `ask_user_questions` only when there are 2-3 genuinely distinct paths with clear tradeoffs (e.g., "REST vs GraphQL" or "Postgres vs SQLite"). For nuanced design questions, ask in plain text and let the user explain. **If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for binary/ternary choices. Keep option labels short (3-5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. **IMPORTANT: Call `ask_user_questions` exactly once per turn. Never make multiple calls with the same or overlapping questions — wait for the user's response before asking the next round.** **If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1-3 focused questions. Wait for answers before asking the next round. +After each answer, summarize what materially changed in one concise sentence before continuing. Then update the working context, investigate any newly-opened unknown, and either advance to the next gate/artifact or ask the next focused round. + **Incremental persistence:** After every 2 question rounds (across any layer), silently save a `{{milestoneId}}-CONTEXT-DRAFT.md` using `sf_summary_save` with `artifact_type: "CONTEXT-DRAFT"` and `milestone_id: "{{milestoneId}}"`. This protects confirmed work against session crashes. Do NOT mention this save to the user. ### Identify Work Type diff --git a/src/resources/extensions/sf/prompts/doctor-heal.md b/src/resources/extensions/sf/prompts/doctor-heal.md index d4b67a4e5..0f5e38e29 100644 --- a/src/resources/extensions/sf/prompts/doctor-heal.md +++ b/src/resources/extensions/sf/prompts/doctor-heal.md @@ -7,9 +7,10 @@ Rules: 2. Read before edit. 3. Prefer fixing authoritative artifacts over masking warnings. 4. For missing summaries or UAT files, generate the real artifact from existing slice/task context when possible — do not leave placeholders if you can reconstruct the real content. -5. After each repair cluster, verify the relevant invariant directly from disk. -6. When done, rerun `/sf doctor {{doctorCommandSuffix}}` mentally by ensuring the remaining issue set for this scope is reduced or cleared. -7. Do NOT query `.sf/sf.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — use `sf_milestone_status` to inspect DB state. Direct access bypasses the WAL connection owned by the engine and can corrupt in-flight writes. +5. For a missing milestone `CONTEXT.md` when the milestone is already past `pre-planning` (phase is `executing`, `summarizing`, `validating-milestone`, or `completing-milestone`): the artifact was skipped during bootstrap and must be reconstructed before execution can resume. Read `PROJECT.md`, `REQUIREMENTS.md`, the milestone's `ROADMAP.md`, and any slice-level context on disk, then write `.sf/milestones/{{milestoneId}}/{{milestoneId}}-CONTEXT.md` with the real context. Do not leave a stub — the plan gate will reject it on the next cycle. +6. After each repair cluster, verify the relevant invariant directly from disk. +7. When done, rerun `/sf doctor {{doctorCommandSuffix}}` mentally by ensuring the remaining issue set for this scope is reduced or cleared. +8. Do NOT query `.sf/sf.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — use `sf_milestone_status` to inspect DB state. Direct access bypasses the WAL connection owned by the engine and can corrupt in-flight writes. ## Doctor Summary diff --git a/src/resources/extensions/sf/prompts/guided-discuss-milestone.md b/src/resources/extensions/sf/prompts/guided-discuss-milestone.md index ea85b076f..094cdd4cf 100644 --- a/src/resources/extensions/sf/prompts/guided-discuss-milestone.md +++ b/src/resources/extensions/sf/prompts/guided-discuss-milestone.md @@ -35,13 +35,20 @@ Ask **1–3 questions per round**. Keep each question focused on one of: - **The biggest technical unknowns / risks** — what could fail, what hasn't been proven - **What external systems/services this touches** — APIs, databases, third-party services +**Understanding + progress preface:** Before each question round, write a compact progress header in chat: +- **Current understanding** — 2–5 bullets using the user's terminology plus the evidence you just found +- **Blocked decision** — the specific choice or uncertainty that prevents a strong context file or roadmap +- **Why these questions** — one sentence explaining how the answers advance the milestone + +If an uncertainty is low-risk or would not change the context file, do not ask about it. Continue with a documented assumption instead. + **Never fabricate or simulate user input.** Never generate fake transcript markers like `[User]`, `[Human]`, or `User:`. Ask one question round, then wait for the user's actual response before continuing. **If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. **IMPORTANT: Call `ask_user_questions` exactly once per turn. Never make multiple calls with the same or overlapping questions — wait for the user's response before asking the next round.** **If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1–3 focused questions. Wait for answers before asking the next round. -After the user answers, investigate further if any answer opens a new unknown, then ask the next round. +After each answer, summarize what materially changed in one concise sentence, update your working understanding, investigate further if the answer opens a new unknown, then either continue to the next concrete artifact step or ask the next focused round. ### Round cadence diff --git a/src/resources/extensions/sf/prompts/guided-discuss-slice.md b/src/resources/extensions/sf/prompts/guided-discuss-slice.md index 59d5c4c29..f9760ee04 100644 --- a/src/resources/extensions/sf/prompts/guided-discuss-slice.md +++ b/src/resources/extensions/sf/prompts/guided-discuss-slice.md @@ -24,6 +24,13 @@ Do **not** go deep — just enough that your questions reflect what's actually t **Never fabricate or simulate user input.** Never generate fake transcript markers like `[User]`, `[Human]`, or `User:`. Ask one question round, then wait for the user's actual response before continuing. +**Understanding + progress preface:** Before each question round, write a compact progress header in chat: +- **Current understanding** — 2–5 bullets using the user's terminology plus the evidence you just found +- **Blocked decision** — the specific choice or uncertainty that prevents a strong slice context +- **Why these questions** — one sentence explaining how the answers advance the slice + +If an uncertainty is low-risk or would not change the slice context, do not ask about it. Continue with a documented assumption instead. + **If `{{structuredQuestionsAvailable}}` is `true`:** Ask **1–3 questions per round** using `ask_user_questions`. **Call `ask_user_questions` exactly once per turn — never make multiple calls with the same or overlapping questions. Wait for the user's response before asking the next round.** **If `{{structuredQuestionsAvailable}}` is `false`:** Ask **1–3 questions per round** in plain text. Number them and wait for the user's response before asking the next round. Keep each question focused on one of: @@ -32,7 +39,7 @@ Keep each question focused on one of: - **Scope boundaries** — what is explicitly in vs out for this slice? What deferred to later? - **Feel and experience** — tone, responsiveness, feedback, transitions, what "done" feels like to the user -After the user answers, investigate further if any answer opens a new unknown, then ask the next round. +After each answer, summarize what materially changed in one concise sentence, update your working understanding, investigate further if the answer opens a new unknown, then either continue to the next concrete artifact step or ask the next focused round. ### Round cadence diff --git a/src/resources/extensions/sf/prompts/system.md b/src/resources/extensions/sf/prompts/system.md index fbb61de2f..d42f4b916 100644 --- a/src/resources/extensions/sf/prompts/system.md +++ b/src/resources/extensions/sf/prompts/system.md @@ -41,6 +41,19 @@ SF ships with bundled skills. Load the relevant skill file with the `read` tool - In enduring files, write current state only unless the file is explicitly historical. - **Never take outward-facing actions on GitHub (or any external service) without explicit user confirmation.** This includes: creating issues, closing issues, merging PRs, approving PRs, posting comments, pushing to remote branches, publishing packages, or any other action that affects state outside the local filesystem. Read-only operations (listing, viewing, diffing) are fine. Always present what you intend to do and get a clear "yes" before executing. **Non-bypassable:** If the user does not respond, gives an ambiguous answer, or `ask_user_questions` fails, you MUST re-ask — never rationalize past the block ("tool not responding, I'll proceed" is forbidden). A missing "yes" is a "no." +### Question Efficiency Contract + +When you need user input, make the question round move the work forward: + +- State current understanding in 2-5 concise bullets before asking. +- Name the blocked decision: the one choice that cannot be resolved safely from code, docs, or reasonable assumptions. +- Ask only 1-3 high-leverage questions, each tied to a decision that materially changes plan, context, proof, scope, integration, or risk. +- Do not ask for facts you can infer by investigating; use tools first. +- Prefer recommended defaults and short options when using `ask_user_questions`; include the impact/tradeoff in each option and keep a freeform "Other / let me explain" path. +- If the answer would not change the next artifact or risk is low, continue with a documented assumption instead of blocking. +- After each answer, summarize what changed, persist or update the relevant context/draft when appropriate, and move to the next concrete step or next focused question round. +- Never ask a meta "ready?" question unless the depth gate or wrap-up criteria are satisfied. + If a `SF Skill Preferences` block is present below this contract, treat it as explicit durable guidance for which skills to use, prefer, or avoid during SF work. Follow it where it does not conflict with required SF artifact rules, verification requirements, or higher-priority system/developer instructions. ### Naming Convention diff --git a/src/resources/extensions/sf/tests/context-store.test.ts b/src/resources/extensions/sf/tests/context-store.test.ts index 5bb779b56..261dc66de 100644 --- a/src/resources/extensions/sf/tests/context-store.test.ts +++ b/src/resources/extensions/sf/tests/context-store.test.ts @@ -627,4 +627,83 @@ Integration tests mock external services. assert.strictEqual(result, '', 'empty content returns empty string'); }); + + // ── Regression: issue #4719 — single-H2 with many H3 entries ────────────── + // A KNOWLEDGE.md structured as one top-level H2 with many H3 entries must + // filter at H3 granularity; otherwise one keyword match against the H2 + // header or first paragraph returns the entire file. + test("single H2 with many H3 entries filters at H3 level (issue #4719)", async () => { + const singleH2Knowledge = `# Project Knowledge + +## Patterns + +### Database: prepared statements +Always use prepared statements with SQLite. + +### API: versioned paths +Use /v1/resource style versioning. + +### Testing: node:test +Prefer node:test over external frameworks. + +### Deployment: blue-green +Blue-green deployment for zero-downtime releases. +`; + + const result = await queryKnowledge(singleH2Knowledge, ['database']); + + // Should include only the matching H3 entry, not the whole file + assert.match(result, /Database: prepared statements/, 'includes matching H3 entry'); + assert.ok( + !result.includes('API: versioned paths'), + 'does not include non-matching H3 entry', + ); + assert.ok( + !result.includes('Testing: node:test'), + 'does not include non-matching H3 entry', + ); + assert.ok( + !result.includes('Deployment: blue-green'), + 'does not include non-matching H3 entry', + ); + // The returned payload must be dramatically smaller than the full content + assert.ok( + result.length < singleH2Knowledge.length / 2, + `scoped result (${result.length} chars) should be <50% of full content (${singleH2Knowledge.length} chars)`, + ); + }); + + test("single H2 with H3 entries returns empty when no H3 matches (issue #4719)", async () => { + const singleH2Knowledge = `# Project Knowledge + +## Patterns + +### Database: prepared statements +Always use prepared statements with SQLite. + +### API: versioned paths +Use /v1/resource style versioning. +`; + + const result = await queryKnowledge(singleH2Knowledge, ['nonexistent']); + + assert.strictEqual(result, '', 'no H3 match returns empty string'); + }); + + test("falls back to H2 when no H3 headings exist at all", async () => { + // Backwards-compat: files with only H2 topic headers must still filter. + const h2OnlyKnowledge = `# Project Knowledge + +## Database Patterns +Use prepared statements. + +## API Design +REST with OpenAPI. +`; + + const result = await queryKnowledge(h2OnlyKnowledge, ['database']); + + assert.match(result, /Database Patterns/, 'H2-only file falls back to H2 filtering'); + assert.ok(!result.includes('API Design'), 'non-matching H2 section excluded'); + }); }); diff --git a/src/resources/extensions/sf/tests/integration/state-machine-edge-cases.test.ts b/src/resources/extensions/sf/tests/integration/state-machine-edge-cases.test.ts index 23de467ed..c3ad90ff8 100644 --- a/src/resources/extensions/sf/tests/integration/state-machine-edge-cases.test.ts +++ b/src/resources/extensions/sf/tests/integration/state-machine-edge-cases.test.ts @@ -839,6 +839,24 @@ describe("dispatch failure modes", () => { assert.equal((result as any).unitType, "discuss-milestone"); }); + test("dispatch: incomplete milestone roadmap re-runs plan-milestone instead of missing-slice stop", async () => { + base = createFullFixture(); + openDatabase(join(base, ".sf", "sf.db")); + + const ctx = buildDispatchCtx(base, "M001", { + phase: "planning", + activeSlice: null, + activeTask: null, + nextAction: "Milestone M001 roadmap is incomplete (missing vision alignment meeting). Re-run plan-milestone with a weighted vision alignment meeting before execution.", + }); + + const result = await resolveDispatch(ctx); + assert.equal(result.action, "dispatch"); + assert.equal((result as any).unitType, "plan-milestone"); + assert.equal((result as any).unitId, "M001"); + assert.equal((result as any).matchedRule, "planning (roadmap incomplete) → plan-milestone"); + }); + test("dispatch: complete phase → stop with info level", async () => { base = createFullFixture(); openDatabase(join(base, ".sf", "sf.db")); @@ -862,11 +880,14 @@ describe("dispatch failure modes", () => { const runUatIdx = ruleNames.indexOf("run-uat (post-completion)"); const uatGateIdx = ruleNames.indexOf("uat-verdict-gate (non-PASS blocks progression)"); const executeIdx = ruleNames.indexOf("executing → execute-task"); + const repairIdx = ruleNames.indexOf("planning (roadmap incomplete) → plan-milestone"); + const planSliceIdx = ruleNames.indexOf("planning → plan-slice"); // summarizing should come before execute-task assert.ok(summarizeIdx < executeIdx, "summarizing rule should precede execute-task"); // run-uat should come before uat-verdict-gate assert.ok(runUatIdx < uatGateIdx, "run-uat should precede uat-verdict-gate"); + assert.ok(repairIdx < planSliceIdx, "milestone-plan repair should precede slice planning"); }); }); diff --git a/src/resources/extensions/sf/tests/knowledge.test.ts b/src/resources/extensions/sf/tests/knowledge.test.ts index 04ccd3342..323481f4f 100644 --- a/src/resources/extensions/sf/tests/knowledge.test.ts +++ b/src/resources/extensions/sf/tests/knowledge.test.ts @@ -15,7 +15,7 @@ import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, rmSync, realpathSy import { join } from 'node:path'; import { tmpdir } from 'node:os'; import { SF_ROOT_FILES, resolveSfRootFile } from '../paths.ts'; -import { inlineGsdRootFile } from '../auto-prompts.ts'; +import { inlineGsdRootFile, inlineKnowledgeBudgeted } from '../auto-prompts.ts'; import { appendKnowledge } from '../files.ts'; import { loadKnowledgeBlock } from '../bootstrap/system-context.ts'; @@ -248,3 +248,95 @@ test('loadKnowledgeBlock: reports globalSizeKb above 4KB threshold', () => { rmSync(tmp, { recursive: true, force: true }); }); + +// ─── inlineKnowledgeBudgeted — issue #4719 ───────────────────────────────── +// Milestone-phase prompts must not inject the full KNOWLEDGE.md. The budgeted +// helper scopes by milestone-level keywords and caps the injected size. + +test('inlineKnowledgeBudgeted: returns scoped H3 entries for single-H2 file', async () => { + const tmp = realpathSync(mkdtempSync(join(tmpdir(), 'gsd-knowledge-'))); + const gsdDir = join(tmp, '.gsd'); + mkdirSync(gsdDir, { recursive: true }); + + const content = `# Project Knowledge + +## Patterns + +### Database: prepared statements +Always use prepared statements with SQLite. + +### API: versioned paths +Use /v1/resource style versioning. + +### Testing: node:test +Prefer node:test over external frameworks. +`; + writeFileSync(join(gsdDir, 'KNOWLEDGE.md'), content); + + const result = await inlineKnowledgeBudgeted(tmp, ['database']); + assert.ok(result !== null, 'should return content'); + assert.ok(result!.includes('Database: prepared statements'), 'includes matching H3'); + assert.ok(!result!.includes('API: versioned paths'), 'excludes non-matching H3'); + + rmSync(tmp, { recursive: true, force: true }); +}); + +test('inlineKnowledgeBudgeted: caps payload below budget for large files', async () => { + const tmp = realpathSync(mkdtempSync(join(tmpdir(), 'gsd-knowledge-'))); + const gsdDir = join(tmp, '.gsd'); + mkdirSync(gsdDir, { recursive: true }); + + // Build a 200KB KNOWLEDGE with 500 H3 entries all matching 'shared' + const entries = Array.from({ length: 500 }, (_, i) => + `### Entry ${i}: shared topic\n${'filler text '.repeat(30)}\n`, + ).join('\n'); + const content = `# Project Knowledge\n\n## Patterns\n\n${entries}`; + writeFileSync(join(gsdDir, 'KNOWLEDGE.md'), content); + + const BUDGET_CHARS = 30_000; + const result = await inlineKnowledgeBudgeted(tmp, ['shared'], { maxChars: BUDGET_CHARS }); + assert.ok(result !== null, 'should return content'); + // Allow some overhead for header formatting, but must stay close to budget + assert.ok( + result!.length <= BUDGET_CHARS + 500, + `payload ${result!.length} chars should be <= budget ${BUDGET_CHARS} (+overhead)`, + ); + // Far smaller than the raw file + assert.ok( + result!.length < content.length / 4, + `payload should be much smaller than full content (${content.length} chars)`, + ); + assert.match( + result!, + /\[\.\.\.truncated \d+ chars; rerun with narrower scope if needed\]/, + 'should include truncation note when budget is exceeded', + ); + + rmSync(tmp, { recursive: true, force: true }); +}); + +test('inlineKnowledgeBudgeted: returns null when no KNOWLEDGE.md exists', async () => { + const tmp = realpathSync(mkdtempSync(join(tmpdir(), 'gsd-knowledge-'))); + const gsdDir = join(tmp, '.gsd'); + mkdirSync(gsdDir, { recursive: true }); + + const result = await inlineKnowledgeBudgeted(tmp, ['database']); + assert.strictEqual(result, null); + + rmSync(tmp, { recursive: true, force: true }); +}); + +test('inlineKnowledgeBudgeted: returns null when no entries match', async () => { + const tmp = realpathSync(mkdtempSync(join(tmpdir(), 'gsd-knowledge-'))); + const gsdDir = join(tmp, '.gsd'); + mkdirSync(gsdDir, { recursive: true }); + writeFileSync( + join(gsdDir, 'KNOWLEDGE.md'), + '# Project Knowledge\n\n## Patterns\n\n### Database\nuse it\n', + ); + + const result = await inlineKnowledgeBudgeted(tmp, ['nonexistent']); + assert.strictEqual(result, null); + + rmSync(tmp, { recursive: true, force: true }); +}); diff --git a/src/resources/extensions/sf/tests/milestone-scope-classifier.test.ts b/src/resources/extensions/sf/tests/milestone-scope-classifier.test.ts new file mode 100644 index 000000000..1dd8f98ca --- /dev/null +++ b/src/resources/extensions/sf/tests/milestone-scope-classifier.test.ts @@ -0,0 +1,188 @@ +// GSD-2 — #4781: classifier behavior matrix. Pure-function tests, no I/O. + +import test from "node:test"; +import assert from "node:assert/strict"; + +import { + classifyMilestoneScope, + type MilestoneScopeInput, +} from "../milestone-scope-classifier.ts"; + +// ─── Classification matrix ──────────────────────────────────────────────── + +test("#4781 classifier: single static HTML to-do app → trivial (b23 forensic case)", () => { + const input: MilestoneScopeInput = { + title: "To-Do App", + vision: "A minimal, clean browser-based to-do app. Pure HTML/CSS/JS, no build step, no backend. Tasks persist in localStorage.", + successCriteria: [ + "Open index.html in any browser without a server", + "Add tasks by typing and pressing Enter or clicking Add", + "Mark tasks complete (toggleable)", + "Delete individual tasks", + "Tasks survive a page reload via localStorage", + ], + }; + const r = classifyMilestoneScope(input); + assert.strictEqual(r.variant, "trivial", `expected trivial, got ${r.variant} — reasons: ${r.reasons.join("; ")}`); + assert.ok(r.reasons.some(s => s.includes("trivial keywords")), "should cite trivial keywords"); +}); + +test("#4781 classifier: readme typo fix → trivial", () => { + const r = classifyMilestoneScope({ + title: "Fix README typo", + vision: "Correct spelling error in the installation section.", + successCriteria: ["Typo fixed", "README renders correctly"], + }); + assert.strictEqual(r.variant, "trivial"); +}); + +test("#4781 classifier: auth flow single file → standard (override beats trivial)", () => { + const r = classifyMilestoneScope({ + title: "Add login", + vision: "Implement authentication flow in a single file with OAuth credentials.", + successCriteria: ["User can log in"], + }); + assert.strictEqual(r.variant, "standard", `override should beat single-file signal. reasons: ${r.reasons.join("; ")}`); + assert.ok(r.signals.triggeredOverride, "override signals should be flagged"); + assert.ok(r.reasons.some(s => s.includes("override keywords"))); +}); + +test("#4781 classifier: security review scope → standard (even if small)", () => { + const r = classifyMilestoneScope({ + title: "Harden session tokens", + vision: "Review and patch security vulnerability in one session token helper.", + successCriteria: ["No XSS via token"], + }); + assert.strictEqual(r.variant, "standard"); + assert.ok(r.signals.triggeredOverride); +}); + +test("#4781 classifier: schema migration mentioned → complex (overrides override)", () => { + const r = classifyMilestoneScope({ + title: "User profile v2", + vision: "Perform schema migration to split user.name into first_name and last_name across the users table.", + successCriteria: ["Migration lands", "Existing rows backfilled", "Rollback path validated"], + }); + // schema change + migrate both hit COMPLEX_KEYWORDS ("schema redesign" no; "migration" is in OVERRIDE). + // But COMPLEX_KEYWORDS also contains "schema redesign" and "breaking change" — this copy triggers OVERRIDE only. + // The classifier precedence puts complex BEFORE override on complex keywords; since none of the + // COMPLEX_KEYWORDS fire here ("migration" is only in OVERRIDE), the result is standard, not complex. + // This is the correct safe behavior: migration is override-level, not complex-level. + assert.strictEqual(r.variant, "standard", `reasons: ${r.reasons.join("; ")}`); +}); + +test("#4781 classifier: architecture keyword → complex", () => { + const r = classifyMilestoneScope({ + title: "Redesign plugin registry", + vision: "Refactor core architecture of the plugin registry to support versioned contracts.", + }); + assert.strictEqual(r.variant, "complex"); + assert.ok(r.reasons.some(s => s.includes("complex keywords"))); +}); + +test("#4781 classifier: >=8 files hint → complex", () => { + const r = classifyMilestoneScope({ + title: "Multi-file refactor", + vision: "Touch 12 files to extract shared helpers.", + }); + assert.strictEqual(r.variant, "complex"); + assert.strictEqual(r.signals.fileCountHint, 12); +}); + +test("#4781 classifier: backend API mention → standard (not trivial)", () => { + const r = classifyMilestoneScope({ + title: "Health endpoint", + vision: "Add a single-file API endpoint returning status.", + successCriteria: ["/health returns 200"], + }); + // Single file + no override + but backend mentioned → not trivial + assert.strictEqual(r.variant, "standard"); +}); + +test("#4781 classifier: tests mentioned → standard (not trivial)", () => { + const r = classifyMilestoneScope({ + title: "Landing page", + vision: "Ship a static one-page landing page with unit tests for the form validation.", + }); + assert.strictEqual(r.variant, "standard", `reasons: ${r.reasons.join("; ")}`); +}); + +test("#4781 classifier: ambiguous prose → standard (safe default)", () => { + const r = classifyMilestoneScope({ + title: "Generic improvements", + vision: "Make the system better.", + successCriteria: ["It's better"], + }); + assert.strictEqual(r.variant, "standard"); + assert.ok(r.reasons.includes("no strong signals — default")); +}); + +test("#4781 classifier: empty input → standard (safe default)", () => { + const r = classifyMilestoneScope({}); + assert.strictEqual(r.variant, "standard"); +}); + +// ─── Override precedence over trivial ────────────────────────────────────── + +test("#4781 classifier: override + trivial keyword → standard (override wins)", () => { + const r = classifyMilestoneScope({ + title: "Token rotation", + vision: "Single file change to rotate the oauth token expiry schedule.", + }); + // "single file" is trivial signal; "oauth" is override signal. Override wins. + assert.strictEqual(r.variant, "standard"); + assert.ok(r.signals.triggeredOverride); +}); + +test("#4781 classifier: complex + override → complex (complex wins, flagged)", () => { + const r = classifyMilestoneScope({ + title: "Auth service refactor", + vision: "Refactor core authentication architecture across services.", + }); + // Complex (architecture, refactor core) wins over override (auth). + assert.strictEqual(r.variant, "complex"); + // Override still recorded in signals for telemetry. + assert.ok(r.signals.triggeredOverride, "override hits should still be tracked in signals"); +}); + +// ─── File count hint extraction ─────────────────────────────────────────── + +test("#4781 classifier: 'a single file' hint parsed as 1", () => { + const r = classifyMilestoneScope({ + title: "Tweak", + vision: "Update a single file to flip the copy.", + }); + assert.strictEqual(r.signals.fileCountHint, 1); +}); + +test("#4781 classifier: 'two files' hint parsed as 2", () => { + const r = classifyMilestoneScope({ + title: "Minor", + vision: "Touch two files.", + }); + assert.strictEqual(r.signals.fileCountHint, 2); +}); + +test("#4781 classifier: '12 files' hint parsed as 12", () => { + const r = classifyMilestoneScope({ + title: "Bulk", + vision: "Update 12 files.", + }); + assert.strictEqual(r.signals.fileCountHint, 12); +}); + +// ─── Reasons surface useful debugging info ───────────────────────────────── + +test("#4781 classifier: reasons array populated for every branch", () => { + const branches: Array<[string, MilestoneScopeInput]> = [ + ["trivial", { title: "Readme typo", vision: "Fix a single file typo." }], + ["standard (override)", { title: "Auth", vision: "Touch auth helper." }], + ["complex (keyword)", { title: "Arch", vision: "Refactor core system design." }], + ["complex (file count)", { title: "Bulk", vision: "Update 9 files." }], + ["standard (default)", { title: "Generic", vision: "General work." }], + ]; + for (const [label, input] of branches) { + const r = classifyMilestoneScope(input); + assert.ok(r.reasons.length > 0, `${label}: reasons must not be empty`); + } +}); diff --git a/src/resources/extensions/sf/tests/prompt-cache-optimizer.test.ts b/src/resources/extensions/sf/tests/prompt-cache-optimizer.test.ts index 67e01d685..6199ec174 100644 --- a/src/resources/extensions/sf/tests/prompt-cache-optimizer.test.ts +++ b/src/resources/extensions/sf/tests/prompt-cache-optimizer.test.ts @@ -64,6 +64,18 @@ describe("prompt-cache-optimizer: classifySection", () => { assert.equal(classifySection("overrides"), "semi-static"); }); + // Regression: issue #4719 — KNOWLEDGE falls through to dynamic default. + // Knowledge content is reused across all tasks within a milestone, so it + // must be classified as semi-static to qualify for prefix caching when the + // cache optimizer is wired into the prompt path. + it("classifies knowledge as semi-static (issue #4719)", () => { + assert.equal(classifySection("knowledge"), "semi-static"); + }); + + it("classifies project-knowledge as semi-static (issue #4719)", () => { + assert.equal(classifySection("project-knowledge"), "semi-static"); + }); + it("classifies task-plan as dynamic", () => { assert.equal(classifySection("task-plan"), "dynamic"); }); diff --git a/src/resources/extensions/sf/tests/prompt-contracts.test.ts b/src/resources/extensions/sf/tests/prompt-contracts.test.ts index 9a63a65c2..49d57c37b 100644 --- a/src/resources/extensions/sf/tests/prompt-contracts.test.ts +++ b/src/resources/extensions/sf/tests/prompt-contracts.test.ts @@ -57,6 +57,15 @@ test("system prompt hard rules forbid fabricating user responses", () => { assert.match(prompt, /ask_user_questions.*only valid structured user input/i); }); +test("system prompt makes question rounds efficient and progress-oriented", () => { + const prompt = readPrompt("system"); + assert.match(prompt, /Question Efficiency Contract/i); + assert.match(prompt, /State current understanding in 2-5 concise bullets/i); + assert.match(prompt, /Name the blocked decision/i); + assert.match(prompt, /continue with a documented assumption instead of blocking/i); + assert.match(prompt, /After each answer, summarize what changed/i); +}); + test("discuss prompt allows implementation questions when they materially matter", () => { const prompt = readPrompt("discuss"); assert.match(prompt, /Lead with experience, but ask implementation when it materially matters/i); @@ -77,6 +86,29 @@ test("guided discussion prompts avoid wrap-up prompts after every round", () => assert.match(slicePrompt, /Never fabricate or simulate user input/i); }); +test("guided discussion prompts require understanding and progress before questions", () => { + const milestonePrompt = readPrompt("guided-discuss-milestone"); + const slicePrompt = readPrompt("guided-discuss-slice"); + assert.match(milestonePrompt, /Understanding \+ progress preface/i); + assert.match(slicePrompt, /Understanding \+ progress preface/i); + assert.match(milestonePrompt, /Current understanding/i); + assert.match(slicePrompt, /Current understanding/i); + assert.match(milestonePrompt, /Blocked decision/i); + assert.match(slicePrompt, /Blocked decision/i); + assert.match(milestonePrompt, /After each answer, summarize what materially changed/i); + assert.match(slicePrompt, /After each answer, summarize what materially changed/i); +}); + +test("discuss prompt keeps each question round tied to progress", () => { + const prompt = readPrompt("discuss"); + assert.match(prompt, /Question round shape/i); + assert.match(prompt, /Current understanding/i); + assert.match(prompt, /Blocked decision/i); + assert.match(prompt, /Why these questions/i); + assert.match(prompt, /documented assumption/i); + assert.match(prompt, /After each answer, summarize what materially changed/i); +}); + test("guided milestone discussion scopes depth verification to the milestone id", () => { const prompt = readPrompt("guided-discuss-milestone"); assert.match(prompt, /depth_verification_\{\{milestoneId\}\}/, "depth verification id should include the milestone id"); @@ -104,6 +136,14 @@ test("headless milestone creation preserves depth gate and draft fallback", () = assert.doesNotMatch(prompt, /\*\*DO NOT ask the user any questions\*\*/); }); +test("headless milestone creation uses one final question gate, not exploratory questions", () => { + const prompt = readPrompt("discuss-headless"); + assert.match(prompt, /The final gate is the only question in headless mode/i); + assert.match(prompt, /not an exploratory question round/i); + assert.match(prompt, /compact depth summary/i); + assert.match(prompt, /write or should remain a draft/i); +}); + test("queue prompt requires waiting for user response between rounds", () => { const prompt = readPrompt("queue"); assert.match(prompt, /Never fabricate or simulate user input during this discussion/i); diff --git a/src/resources/extensions/sf/tests/write-gate.test.ts b/src/resources/extensions/sf/tests/write-gate.test.ts index 8b847a376..0102a6478 100644 --- a/src/resources/extensions/sf/tests/write-gate.test.ts +++ b/src/resources/extensions/sf/tests/write-gate.test.ts @@ -225,6 +225,7 @@ import { setPendingGate, clearPendingGate, getPendingGate, + loadWriteGateSnapshot, } from '../bootstrap/write-gate.ts'; // ─── Scenario 19: isGateQuestionId recognizes all gate patterns ── @@ -333,6 +334,8 @@ test('write-gate: shouldBlockPendingGateBash allows read-only commands during pe assert.strictEqual(shouldBlockPendingGateBash('git log --oneline', 'M001').block, false); assert.strictEqual(shouldBlockPendingGateBash('grep -r pattern .', 'M001').block, false); assert.strictEqual(shouldBlockPendingGateBash('ls -la', 'M001').block, false); + assert.strictEqual(shouldBlockPendingGateBash('npm run test', 'M001').block, false); + assert.strictEqual(shouldBlockPendingGateBash('npm run typecheck', 'M001').block, false); clearDiscussionFlowState(); }); @@ -367,6 +370,27 @@ test('write-gate: resetWriteGateState clears pending gate', () => { assert.strictEqual(getPendingGate(), null); }); +test('write-gate: persisted snapshot deletion clears hard block when persistence is enabled', () => { + const previous = process.env.SF_PERSIST_WRITE_GATE_STATE; + process.env.SF_PERSIST_WRITE_GATE_STATE = '1'; + try { + setPendingGate('depth_verification'); + const snapshot = loadWriteGateSnapshot(`/tmp/sf-write-gate-missing-${process.pid}`); + assert.deepStrictEqual(snapshot, { + verifiedDepthMilestones: [], + activeQueuePhase: false, + pendingGateId: null, + }); + } finally { + if (previous === undefined) { + delete process.env.SF_PERSIST_WRITE_GATE_STATE; + } else { + process.env.SF_PERSIST_WRITE_GATE_STATE = previous; + } + clearDiscussionFlowState(); + } +}); + // ─── Standard options fixture used across depth confirmation tests ── const STANDARD_OPTIONS = [ diff --git a/src/resources/extensions/sf/uok/plan-v2.ts b/src/resources/extensions/sf/uok/plan-v2.ts index 8440a1f0c..af0046b22 100644 --- a/src/resources/extensions/sf/uok/plan-v2.ts +++ b/src/resources/extensions/sf/uok/plan-v2.ts @@ -7,13 +7,17 @@ import { isDbAvailable, getMilestoneSlices, getSliceTasks, type SliceRow } from import type { UokGraphNode } from "./contracts.js"; const PLAN_V2_CLARIFY_ROUND_LIMIT = 3; -const EXECUTION_ENTRY_PHASES: ReadonlySet = new Set([ +export const EXECUTION_ENTRY_PHASES: ReadonlySet = new Set([ "executing", "summarizing", "validating-milestone", "completing-milestone", ]); +export function isExecutionEntryPhase(phase: Phase): boolean { + return EXECUTION_ENTRY_PHASES.has(phase); +} + export interface PlanV2CompileResult { ok: boolean; reason?: string; @@ -48,10 +52,6 @@ function countSliceResearchArtifacts(basePath: string, milestoneId: string, slic return count; } -function isExecutionEntryPhase(phase: Phase): boolean { - return EXECUTION_ENTRY_PHASES.has(phase); -} - export function compileUnitGraphFromState(basePath: string, state: SFState): PlanV2CompileResult { const mid = state.activeMilestone?.id; if (!mid) return { ok: false, reason: "no active milestone" }; @@ -146,6 +146,14 @@ export function compileUnitGraphFromState(basePath: string, state: SFState): Pla }; } +export function hasFinalizedMilestoneContext(basePath: string, milestoneId: string): boolean { + return hasFileContent(resolveMilestoneFile(basePath, milestoneId, "CONTEXT")); +} + +export function isMissingFinalizedContextResult(result: PlanV2CompileResult): boolean { + return !result.ok && result.finalizedContextIncluded === false; +} + export function ensurePlanV2Graph(basePath: string, state: SFState): PlanV2CompileResult { const compiled = compileUnitGraphFromState(basePath, state); if (!compiled.ok) return compiled; diff --git a/src/tests/resource-loader-content-hash.test.ts b/src/tests/resource-loader-content-hash.test.ts new file mode 100644 index 000000000..50d2dfd18 --- /dev/null +++ b/src/tests/resource-loader-content-hash.test.ts @@ -0,0 +1,83 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +/** + * Regression test for gsd-build/gsd-2 #4787. + * + * Background: `computeResourceFingerprint` previously hashed the relative + * file path + file size only. Same-byte-length edits to bundled prompt + * templates (e.g. the #4570 retry-cap fix to parallel-research-slices.md) + * slipped through the fingerprint gate in `initResources`, so existing + * installs silently kept serving the stale cached copy from + * `~/.gsd/agent/extensions/gsd/prompts/`. + * + * The fix hashes file CONTENTS (sha256) instead of just size — any edit, + * regardless of length, produces a different fingerprint and triggers a + * resync on next launch. + */ + +test("computeResourceFingerprint detects same-size content edits (#4787)", async (t) => { + const { computeResourceFingerprint } = await import("../resource-loader.ts"); + + const tmp = mkdtempSync(join(tmpdir(), "gsd-fingerprint-content-")); + t.after(() => { rmSync(tmp, { recursive: true, force: true }); }); + + const dirA = join(tmp, "bundled-a"); + const dirB = join(tmp, "bundled-b"); + mkdirSync(join(dirA, "prompts"), { recursive: true }); + mkdirSync(join(dirB, "prompts"), { recursive: true }); + + // Same byte length (32 bytes each), different content — mirrors the + // real-world #4787 scenario where a hotfix edit keeps the file size + // stable but changes load-bearing instructions. + const contentA = "retry subagent once then BLOCKER"; // 32 bytes + const contentB = "retry subagent forever never stp"; // 32 bytes + assert.equal(Buffer.byteLength(contentA), Buffer.byteLength(contentB)); + + writeFileSync(join(dirA, "prompts", "foo.md"), contentA); + writeFileSync(join(dirB, "prompts", "foo.md"), contentB); + + const hashA = computeResourceFingerprint(dirA); + const hashB = computeResourceFingerprint(dirB); + + assert.notEqual( + hashA, + hashB, + "same-size, different-content trees must yield different fingerprints", + ); +}); + +test("syncResourceDir overwrites same-size stale content on refresh (#4787)", async (t) => { + const { syncResourceDir } = await import("../resource-loader.ts"); + + const tmp = mkdtempSync(join(tmpdir(), "gsd-sync-samesize-")); + t.after(() => { rmSync(tmp, { recursive: true, force: true }); }); + + const bundled = join(tmp, "bundled", "prompts"); + const installed = join(tmp, "installed", "prompts"); + mkdirSync(bundled, { recursive: true }); + mkdirSync(installed, { recursive: true }); + + // Bundled (new): the post-#4570 fix template + const newContent = "retry subagent once then BLOCKER"; + // Installed (stale): pre-#4570 template with the same byte length + const staleContent = "retry subagent forever never stp"; + assert.equal(Buffer.byteLength(newContent), Buffer.byteLength(staleContent)); + + writeFileSync(join(bundled, "parallel-research-slices.md"), newContent); + writeFileSync(join(installed, "parallel-research-slices.md"), staleContent); + + // syncResourceDir always force-copies; this guards that the copy path + // itself overwrites regardless of size. + syncResourceDir(join(tmp, "bundled"), join(tmp, "installed")); + + const actual = readFileSync(join(installed, "parallel-research-slices.md"), "utf-8"); + assert.equal( + actual, + newContent, + "installed prompt must be overwritten with bundled content even when sizes match", + ); +});