From 4fdd8700a3c8b986aabd0b9c893941e607289d36 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sat, 25 Apr 2026 08:08:11 +0200 Subject: [PATCH] port gsd2 upstream features: scope classifier, composer v2, GPT-5.5, test timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - milestone-scope-classifier: add getMilestonePipelineVariant + milestoneRowToScopeInput wired into auto-dispatch trivial-skip for research/validation phases (#4781) - auto-prompts: rename GSD→SF identifiers, add isSummaryCleanForSkip, prefs param on checkNeedsReassessment, buildExtractionStepsBlock from commands-extract-learnings - unit-context-manifest + unit-context-composer: port v2 typed computed artifacts (#4924) - skill-manifest: per-unit-type skill filter resolver (#4788, #4792) - escalation: stub for ADR-011 mid-execution escalation (full port deferred) - auto-start: extract decideSurvivorAction for testability (#4832) - models: add gpt-5.5 + gpt-5.4-mini to cost table, router, and models.generated.ts - types: EscalationArtifact, context_window_override, skip_clean_reassess, mid_execution_escalation, sketch_scope on SliceRow - tool-execution: add visibleWidth import (was undefined) - package.json: add --test-timeout=30000 to prevent parallel tests from freezing machine Co-Authored-By: Claude Sonnet 4.6 --- package.json | 2 +- packages/pi-ai/src/models.generated.ts | 17 + .../interactive/components/tool-execution.ts | 1 + src/resources/extensions/sf/auto-dispatch.ts | 16 +- src/resources/extensions/sf/auto-prompts.ts | 145 +++-- src/resources/extensions/sf/auto-start.ts | 21 +- .../extensions/sf/code-intelligence.ts | 393 ++++++++++++ .../sf/commands-extract-learnings.ts | 84 +++ src/resources/extensions/sf/escalation.ts | 9 + .../extensions/sf/milestone-quality.ts | 153 +++++ .../sf/milestone-scope-classifier.ts | 45 ++ .../extensions/sf/model-cost-table.ts | 4 + src/resources/extensions/sf/model-router.ts | 9 + src/resources/extensions/sf/plan-quality.ts | 140 +++++ .../extensions/sf/preferences-types.ts | 10 + src/resources/extensions/sf/service-tier.ts | 3 + src/resources/extensions/sf/sf-db.ts | 2 + src/resources/extensions/sf/skill-manifest.ts | 175 ++++++ .../sf/tests/code-intelligence.test.ts | 194 ++++++ .../sf/tests/complete-slice-composer.test.ts | 147 +++++ .../sf/tests/remote-questions-manager.test.ts | 69 +++ .../tests/research-milestone-composer.test.ts | 97 +++ .../sf/tests/run-uat-composer.test.ts | 113 ++++ .../sf/tests/unit-context-composer.test.ts | 175 ++++++ .../sf/tests/unit-context-manifest.test.ts | 169 ++++++ src/resources/extensions/sf/types.ts | 26 + .../extensions/sf/unit-context-composer.ts | 197 ++++++ .../extensions/sf/unit-context-manifest.ts | 574 ++++++++++++++++++ 28 files changed, 2927 insertions(+), 63 deletions(-) create mode 100644 src/resources/extensions/sf/code-intelligence.ts create mode 100644 src/resources/extensions/sf/escalation.ts create mode 100644 src/resources/extensions/sf/milestone-quality.ts create mode 100644 src/resources/extensions/sf/plan-quality.ts create mode 100644 src/resources/extensions/sf/skill-manifest.ts create mode 100644 src/resources/extensions/sf/tests/code-intelligence.test.ts create mode 100644 src/resources/extensions/sf/tests/complete-slice-composer.test.ts create mode 100644 src/resources/extensions/sf/tests/remote-questions-manager.test.ts create mode 100644 src/resources/extensions/sf/tests/research-milestone-composer.test.ts create mode 100644 src/resources/extensions/sf/tests/run-uat-composer.test.ts create mode 100644 src/resources/extensions/sf/tests/unit-context-composer.test.ts create mode 100644 src/resources/extensions/sf/tests/unit-context-manifest.test.ts create mode 100644 src/resources/extensions/sf/unit-context-composer.ts create mode 100644 src/resources/extensions/sf/unit-context-manifest.ts diff --git a/package.json b/package.json index fb2fe80fa..ab26698e1 100644 --- a/package.json +++ b/package.json @@ -58,7 +58,7 @@ "copy-themes": "node scripts/copy-themes.cjs", "copy-export-html": "node scripts/copy-export-html.cjs", "test:compile": "node scripts/compile-tests.mjs", - "test:unit": "npm run test:compile && node --import ./scripts/dist-test-resolve.mjs --experimental-test-isolation=process --test-reporter=./scripts/test-reporter-compact.mjs --test \"dist-test/src/tests/*.test.js\" \"dist-test/src/resources/extensions/sf/tests/*.test.js\" \"dist-test/src/resources/extensions/sf/tests/*.test.mjs\" \"dist-test/src/resources/extensions/shared/tests/*.test.js\" \"dist-test/src/resources/extensions/claude-code-cli/tests/*.test.js\" \"dist-test/src/resources/extensions/github-sync/tests/*.test.js\" \"dist-test/src/resources/extensions/universal-config/tests/*.test.js\" \"dist-test/src/resources/extensions/voice/tests/*.test.js\" \"dist-test/src/resources/extensions/mcp-client/tests/*.test.js\"", + "test:unit": "npm run test:compile && node --import ./scripts/dist-test-resolve.mjs --experimental-test-isolation=process --test-timeout=30000 --test-reporter=./scripts/test-reporter-compact.mjs --test \"dist-test/src/tests/*.test.js\" \"dist-test/src/resources/extensions/sf/tests/*.test.js\" \"dist-test/src/resources/extensions/sf/tests/*.test.mjs\" \"dist-test/src/resources/extensions/shared/tests/*.test.js\" \"dist-test/src/resources/extensions/claude-code-cli/tests/*.test.js\" \"dist-test/src/resources/extensions/github-sync/tests/*.test.js\" \"dist-test/src/resources/extensions/universal-config/tests/*.test.js\" \"dist-test/src/resources/extensions/voice/tests/*.test.js\" \"dist-test/src/resources/extensions/mcp-client/tests/*.test.js\"", "test:packages": "node --test packages/pi-coding-agent/dist/core/*.test.js packages/pi-coding-agent/dist/core/tools/spawn-shell-windows.test.js", "test:marketplace": "node scripts/with-env.mjs SF_TEST_CLONE_MARKETPLACES=1 -- node --import ./src/resources/extensions/sf/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/sf/tests/claude-import-tui.test.ts src/resources/extensions/sf/tests/plugin-importer-live.test.ts src/tests/marketplace-discovery.test.ts", "test:coverage": "c8 --reporter=text --reporter=lcov --exclude=\"src/resources/extensions/sf/tests/**\" --exclude=\"src/tests/**\" --exclude=\"scripts/**\" --exclude=\"native/**\" --exclude=\"node_modules/**\" --check-coverage --statements=40 --lines=40 --branches=20 --functions=20 node --import ./src/resources/extensions/sf/tests/resolve-ts.mjs --experimental-strip-types --experimental-test-isolation=process --test src/resources/extensions/sf/tests/*.test.ts src/resources/extensions/sf/tests/*.test.mjs src/tests/*.test.ts src/resources/extensions/shared/tests/*.test.ts", diff --git a/packages/pi-ai/src/models.generated.ts b/packages/pi-ai/src/models.generated.ts index a3a5c11a1..2c11c7a6a 100644 --- a/packages/pi-ai/src/models.generated.ts +++ b/packages/pi-ai/src/models.generated.ts @@ -5834,6 +5834,23 @@ export const MODELS = { contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, + "gpt-5.5": { + id: "gpt-5.5", + name: "GPT-5.5", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 5, + output: 30, + cacheRead: 0.5, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, "gpt-5.4-nano": { id: "gpt-5.4-nano", name: "GPT-5.4 nano", diff --git a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts index e45330c70..2c3047453 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts @@ -9,6 +9,7 @@ import { Text, type TUI, truncateToWidth, + visibleWidth, } from "@singularity-forge/pi-tui"; import stripAnsi from "strip-ansi"; import type { ToolDefinition } from "../../../core/extensions/types.js"; diff --git a/src/resources/extensions/sf/auto-dispatch.ts b/src/resources/extensions/sf/auto-dispatch.ts index 0733d983c..df3290076 100644 --- a/src/resources/extensions/sf/auto-dispatch.ts +++ b/src/resources/extensions/sf/auto-dispatch.ts @@ -417,6 +417,8 @@ export const DISPATCH_RULES: DispatchRule[] = [ if (state.phase !== "pre-planning") return null; // Phase skip: skip research when preference or profile says so if (prefs?.phases?.skip_research) return null; + // #4781 phase 2: trivial-scope milestones skip dedicated milestone research + if (await getMilestonePipelineVariant(mid) === "trivial") return null; const researchFile = resolveMilestoneFile(basePath, mid, "RESEARCH"); if (researchFile) return null; // has research, fall through return { @@ -458,6 +460,8 @@ export const DISPATCH_RULES: DispatchRule[] = [ match: async ({ state, mid, midTitle, basePath, prefs }) => { if (state.phase !== "planning") return null; if (prefs?.phases?.skip_research || prefs?.phases?.skip_slice_research) return null; + // #4781 phase 2: trivial-scope milestones skip dedicated slice research + if (await getMilestonePipelineVariant(mid) === "trivial") return null; // Load roadmap to find all slices const roadmapFile = resolveMilestoneFile(basePath, mid, "ROADMAP"); @@ -818,8 +822,12 @@ export const DISPATCH_RULES: DispatchRule[] = [ }; } - // Skip preference: write a minimal pass-through VALIDATION file - if (prefs?.phases?.skip_milestone_validation) { + // Skip preference or trivial-scope pipeline variant: write a minimal pass-through VALIDATION file + const trivialVariant = await getMilestonePipelineVariant(mid) === "trivial"; + const skipSource = trivialVariant + ? "trivial-scope pipeline variant (#4781)" + : "`skip_milestone_validation` preference"; + if (prefs?.phases?.skip_milestone_validation || trivialVariant) { const mDir = resolveMilestonePath(basePath, mid); if (mDir) { if (!existsSync(mDir)) mkdirSync(mDir, { recursive: true }); @@ -833,9 +841,9 @@ export const DISPATCH_RULES: DispatchRule[] = [ "remediation_round: 0", "---", "", - "# Milestone Validation (skipped by preference)", + "# Milestone Validation (skipped)", "", - "Milestone validation was skipped via `skip_milestone_validation` preference.", + `Milestone validation was skipped via ${skipSource}.`, ].join("\n"); writeFileSync(validationPath, content, "utf-8"); } diff --git a/src/resources/extensions/sf/auto-prompts.ts b/src/resources/extensions/sf/auto-prompts.ts index bb2376480..ecd56988f 100644 --- a/src/resources/extensions/sf/auto-prompts.ts +++ b/src/resources/extensions/sf/auto-prompts.ts @@ -14,17 +14,17 @@ import { resolveMilestoneFile, resolveSliceFile, resolveSlicePath, resolveTasksDir, resolveTaskFiles, resolveTaskFile, relMilestoneFile, relSliceFile, relSlicePath, relMilestonePath, - resolveGsdRootFile, relGsdRootFile, resolveRuntimeFile, + resolveSfRootFile, relSfRootFile, resolveRuntimeFile, } from "./paths.js"; -import { resolveSkillDiscoveryMode, resolveInlineLevel, loadEffectiveGSDPreferences, resolveAllSkillReferences } from "./preferences.js"; +import { resolveSkillDiscoveryMode, resolveInlineLevel, loadEffectiveSFPreferences, resolveAllSkillReferences } from "./preferences.js"; import { parseRoadmap } from "./parsers-legacy.js"; -import type { GSDState, InlineLevel } from "./types.js"; -import type { GSDPreferences } from "./preferences.js"; -import { getLoadedSkills, type Skill } from "@gsd/pi-coding-agent"; +import type { SFState, InlineLevel } from "./types.js"; +import type { SFPreferences } from "./preferences.js"; +import { getLoadedSkills, type Skill } from "@singularity-forge/pi-coding-agent"; import { join, basename } from "node:path"; import { existsSync } from "node:fs"; import { computeBudgets, resolveExecutorContextWindow, truncateAtSectionBoundary, type MinimalModelRegistry } from "./context-budget.js"; -import { getPendingGates, getPendingGatesForTurn } from "./gsd-db.js"; +import { getPendingGates, getPendingGatesForTurn } from "./sf-db.js"; import { GATE_REGISTRY, assertGateCoverage, @@ -61,7 +61,7 @@ const MAX_PREAMBLE_CHARS = 30_000; */ function resolvePromptBudgets(): ReturnType { try { - const prefs = loadEffectiveGSDPreferences(); + const prefs = loadEffectiveSFPreferences(); const sessionWindow = prefs?.preferences.context_window_override; const windowTokens = resolveExecutorContextWindow(undefined, prefs?.preferences, sessionWindow); return computeBudgets(windowTokens); @@ -102,7 +102,7 @@ function formatExecutorConstraints( ): string { let windowTokens: number; try { - const prefs = loadEffectiveGSDPreferences(); + const prefs = loadEffectiveSFPreferences(); windowTokens = resolveExecutorContextWindow(modelRegistry, prefs?.preferences, sessionContextWindow); } catch (e) { logWarning("prompt", `resolveExecutorContextWindow failed: ${(e as Error).message}`); @@ -142,24 +142,24 @@ export function buildSourceFilePaths( ): string { const paths: string[] = []; - const projectPath = resolveGsdRootFile(base, "PROJECT"); + const projectPath = resolveSfRootFile(base, "PROJECT"); if (existsSync(projectPath)) { - paths.push(`- **Project**: \`${relGsdRootFile("PROJECT")}\``); + paths.push(`- **Project**: \`${relSfRootFile("PROJECT")}\``); } - const requirementsPath = resolveGsdRootFile(base, "REQUIREMENTS"); + const requirementsPath = resolveSfRootFile(base, "REQUIREMENTS"); if (existsSync(requirementsPath)) { - paths.push(`- **Requirements**: \`${relGsdRootFile("REQUIREMENTS")}\``); + paths.push(`- **Requirements**: \`${relSfRootFile("REQUIREMENTS")}\``); } - const decisionsPath = resolveGsdRootFile(base, "DECISIONS"); + const decisionsPath = resolveSfRootFile(base, "DECISIONS"); if (existsSync(decisionsPath)) { - paths.push(`- **Decisions**: \`${relGsdRootFile("DECISIONS")}\``); + paths.push(`- **Decisions**: \`${relSfRootFile("DECISIONS")}\``); } - const queuePath = resolveGsdRootFile(base, "QUEUE"); + const queuePath = resolveSfRootFile(base, "QUEUE"); if (existsSync(queuePath)) { - paths.push(`- **Queue**: \`${relGsdRootFile("QUEUE")}\``); + paths.push(`- **Queue**: \`${relSfRootFile("QUEUE")}\``); } const contextPath = resolveMilestoneFile(base, mid, "CONTEXT"); @@ -339,7 +339,7 @@ export async function inlineDependencySummaries( // DB primary path — get slice depends directly let depends: string[] | null = null; try { - const { isDbAvailable, getSlice } = await import("./gsd-db.js"); + const { isDbAvailable, getSlice } = await import("./sf-db.js"); if (isDbAvailable()) { const slice = getSlice(mid, sid); if (slice) { @@ -400,9 +400,9 @@ export async function inlineGsdRootFile( base: string, filename: string, label: string, ): Promise { const key = filename.replace(/\.md$/i, "").toUpperCase() as "PROJECT" | "DECISIONS" | "QUEUE" | "STATE" | "REQUIREMENTS" | "KNOWLEDGE"; - const absPath = resolveGsdRootFile(base, key); + const absPath = resolveSfRootFile(base, key); if (!existsSync(absPath)) return null; - return inlineFileOptional(absPath, relGsdRootFile(key), label); + return inlineFileOptional(absPath, relSfRootFile(key), label); } // ─── DB-Aware Inline Helpers ────────────────────────────────────────────── @@ -421,7 +421,7 @@ export async function inlineDecisionsFromDb( ): Promise { const inlineLevel = level ?? resolveInlineLevel(); try { - const { isDbAvailable } = await import("./gsd-db.js"); + const { isDbAvailable } = await import("./sf-db.js"); if (isDbAvailable()) { const { queryDecisions, formatDecisionsForPrompt } = await import("./context-store.js"); @@ -459,7 +459,7 @@ export async function inlineRequirementsFromDb( ): Promise { const inlineLevel = level ?? resolveInlineLevel(); try { - const { isDbAvailable } = await import("./gsd-db.js"); + const { isDbAvailable } = await import("./sf-db.js"); if (isDbAvailable()) { const { queryRequirements, formatRequirementsForPrompt } = await import("./context-store.js"); const requirements = queryRequirements({ milestoneId, sliceId }); @@ -485,7 +485,7 @@ export async function inlineProjectFromDb( base: string, ): Promise { try { - const { isDbAvailable } = await import("./gsd-db.js"); + const { isDbAvailable } = await import("./sf-db.js"); if (isDbAvailable()) { const { queryProject } = await import("./context-store.js"); const content = queryProject(); @@ -581,7 +581,7 @@ export async function inlineKnowledgeScoped( base: string, keywords: string[], ): Promise { - const knowledgePath = resolveGsdRootFile(base, "KNOWLEDGE"); + const knowledgePath = resolveSfRootFile(base, "KNOWLEDGE"); if (!existsSync(knowledgePath)) return null; const content = await loadFile(knowledgePath); @@ -594,7 +594,7 @@ export async function inlineKnowledgeScoped( // Return null if no sections matched (empty string from queryKnowledge) if (!scoped) return null; - return `### Project Knowledge (scoped)\nSource: \`${relGsdRootFile("KNOWLEDGE")}\`\n\n${scoped.trim()}`; + return `### Project Knowledge (scoped)\nSource: \`${relSfRootFile("KNOWLEDGE")}\`\n\n${scoped.trim()}`; } /** @@ -620,7 +620,7 @@ export async function inlineKnowledgeBudgeted( ? Math.max(0, Math.min(Math.floor(raw), HARD_MAX_CHARS)) : DEFAULT_MAX_CHARS; - const knowledgePath = resolveGsdRootFile(base, "KNOWLEDGE"); + const knowledgePath = resolveSfRootFile(base, "KNOWLEDGE"); if (!existsSync(knowledgePath)) return null; const content = await loadFile(knowledgePath); @@ -636,7 +636,7 @@ export async function inlineKnowledgeBudgeted( ? `${trimmed.slice(0, maxChars)}\n\n[...truncated ${trimmed.length - maxChars} chars; rerun with narrower scope if needed]` : trimmed; - return `### Project Knowledge (scoped)\nSource: \`${relGsdRootFile("KNOWLEDGE")}\`\n\n${truncated}`; + return `### Project Knowledge (scoped)\nSource: \`${relSfRootFile("KNOWLEDGE")}\`\n\n${truncated}`; } /** @@ -717,7 +717,7 @@ function skillMatchesContext(skill: Skill, contextTokens: Set): boolean function resolvePreferenceSkillNames(refs: string[], base: string): string[] { if (refs.length === 0) return []; - const prefs: GSDPreferences = { always_use_skills: refs }; + const prefs: SFPreferences = { always_use_skills: refs }; const report = resolveAllSkillReferences(prefs, base); return refs.map(ref => { const resolution = report.resolutions.get(ref); @@ -733,7 +733,7 @@ function ruleMatchesContext(when: string, contextTokens: Set): boolean { } function resolveSkillRuleMatches( - prefs: GSDPreferences | undefined, + prefs: SFPreferences | undefined, contextTokens: Set, base: string, ): { include: string[]; avoid: string[] } { @@ -750,7 +750,7 @@ function resolveSkillRuleMatches( } function resolvePreferredSkillNames( - prefs: GSDPreferences | undefined, + prefs: SFPreferences | undefined, visibleSkills: Skill[], contextTokens: Set, base: string, @@ -786,7 +786,7 @@ export function buildSkillActivationBlock(params: { taskTitle?: string; extraContext?: string[]; taskPlanContent?: string | null; - preferences?: GSDPreferences; + preferences?: SFPreferences; /** * Unit type dispatching this prompt. When provided, skills are filtered * through the per-unit-type manifest (see `skill-manifest.ts`). Unknown @@ -794,7 +794,7 @@ export function buildSkillActivationBlock(params: { */ unitType?: string; }): string { - const prefs = params.preferences ?? loadEffectiveGSDPreferences(params.base)?.preferences; + const prefs = params.preferences ?? loadEffectiveSFPreferences()?.preferences; const contextTokens = tokenizeSkillContext( params.milestoneId, params.milestoneTitle, @@ -1070,11 +1070,11 @@ export async function getDependencyTaskSummaryPaths( * - All slices are complete (milestone done — no point reassessing) */ export async function checkNeedsReassessment( - base: string, mid: string, state: GSDState, + base: string, mid: string, state: SFState, prefs?: SFPreferences, ): Promise<{ sliceId: string } | null> { // DB primary path — fall through to file-based when DB has no data for this milestone try { - const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js"); + const { isDbAvailable, getMilestoneSlices } = await import("./sf-db.js"); if (isDbAvailable()) { const slices = getMilestoneSlices(mid); if (slices.length > 0) { @@ -1086,8 +1086,9 @@ export async function checkNeedsReassessment( const hasAssessment = !!(assessmentFile && await loadFile(assessmentFile)); if (hasAssessment) return null; const summaryFile = resolveSliceFile(base, mid, lastCompleted, "SUMMARY"); - const hasSummary = !!(summaryFile && await loadFile(summaryFile)); - if (!hasSummary) return null; + const summaryContent = summaryFile ? await loadFile(summaryFile) : null; + if (!summaryContent) return null; + if (prefs?.skip_clean_reassess && isSummaryCleanForSkip(summaryContent)) return null; return { sliceId: lastCompleted }; } } @@ -1109,11 +1110,43 @@ export async function checkNeedsReassessment( const hasAssess = !!(assessFile && await loadFile(assessFile)); if (hasAssess) return null; const summFile = resolveSliceFile(base, mid, lastDone, "SUMMARY"); - const hasSumm = !!(summFile && await loadFile(summFile)); - if (!hasSumm) return null; + const summContent = summFile ? await loadFile(summFile) : null; + if (!summContent) return null; + if (prefs?.skip_clean_reassess && isSummaryCleanForSkip(summContent)) return null; return { sliceId: lastDone }; } +/** + * Return true when a slice SUMMARY signals a structurally clean completion + * that makes reassess-roadmap dispatch unnecessary. Gated behind the + * `skip_clean_reassess` preference (#4778). + */ +export function isSummaryCleanForSkip(content: string): boolean { + try { + const summary = parseSummary(content); + if (!summary.frontmatter.id) return false; + if (summary.frontmatter.blocker_discovered === true) return false; + + const decisions = (summary.frontmatter.key_decisions ?? []) + .map((d: string) => d.trim()) + .filter((d: string) => d.length > 0 && d.toLowerCase() !== "(none)"); + if (decisions.length > 0) return false; + + const ROADMAP_CHANGE_MARKERS = [ + "add slice", "added slice", "remove slice", "removed slice", + "new slice", "scope expansion", "scope change", "scope widened", + "dependency discovered", "added dependency", "new dependency", + ]; + const haystack = content.toLowerCase(); + for (const marker of ROADMAP_CHANGE_MARKERS) { + if (haystack.includes(marker)) return false; + } + return true; + } catch { + return false; + } +} + /** * Check if the most recently completed slice needs a UAT run. * Returns { sliceId, uatType } if UAT should be dispatched, null otherwise. @@ -1126,11 +1159,11 @@ export async function checkNeedsReassessment( * - UAT result file already exists (idempotent — already ran) */ export async function checkNeedsRunUat( - base: string, mid: string, state: GSDState, prefs: GSDPreferences | undefined, + base: string, mid: string, state: SFState, prefs: SFPreferences | undefined, ): Promise<{ sliceId: string; uatType: UatType } | null> { // DB primary path — fall through to file-based when DB has no data for this milestone try { - const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js"); + const { isDbAvailable, getMilestoneSlices } = await import("./sf-db.js"); if (isDbAvailable()) { const slices = getMilestoneSlices(mid); if (slices.length > 0) { @@ -1327,11 +1360,11 @@ export async function buildPlanMilestonePrompt(mid: string, midTitle: string, ba const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel); if (decisionsInline) inlined.push(decisionsInline); } - const queuePath = resolveGsdRootFile(base, "QUEUE"); + const queuePath = resolveSfRootFile(base, "QUEUE"); if (existsSync(queuePath)) { const queueInline = await inlineFileSmart( queuePath, - relGsdRootFile("QUEUE"), + relSfRootFile("QUEUE"), "Project Queue", `${mid} ${midTitle}`, ); @@ -1629,7 +1662,7 @@ export async function buildRefineSlicePrompt( // Pull the stored sketch scope from the DB — the hard constraint we plan within. let sketchScope = ""; try { - const { isDbAvailable, getSlice } = await import("./gsd-db.js"); + const { isDbAvailable, getSlice } = await import("./sf-db.js"); if (isDbAvailable()) { sketchScope = getSlice(mid, sid)?.sketch_scope ?? ""; } @@ -1725,11 +1758,11 @@ export async function buildExecuteTaskPrompt( const carryForwardSection = await buildCarryForwardSection(effectivePriorSummaries, base); // Inline project knowledge if available (smart-chunked for relevance) - const knowledgeAbsPath = resolveGsdRootFile(base, "KNOWLEDGE"); + const knowledgeAbsPath = resolveSfRootFile(base, "KNOWLEDGE"); const knowledgeInlineET = existsSync(knowledgeAbsPath) ? await inlineFileSmart( knowledgeAbsPath, - relGsdRootFile("KNOWLEDGE"), + relSfRootFile("KNOWLEDGE"), "Project Knowledge", `${tTitle} ${sTitle}`, // use task + slice title as relevance query ) @@ -1755,7 +1788,7 @@ export async function buildExecuteTaskPrompt( const overridesSection = formatOverridesSection(activeOverrides); // Compute verification budget for the executor's context window (issue #707) - const prefs = loadEffectiveGSDPreferences(); + const prefs = loadEffectiveSFPreferences(); const contextWindow = resolveExecutorContextWindow(opts.modelRegistry, prefs?.preferences, opts.sessionContextWindow); const budgets = computeBudgets(contextWindow); const verificationBudget = `~${Math.round(budgets.verificationBudgetChars / 1000)}K chars`; @@ -1977,7 +2010,7 @@ export async function buildCompleteMilestonePrompt( // Inline all slice summaries (deduplicated by slice ID) let sliceIds: string[] = []; try { - const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js"); + const { isDbAvailable, getMilestoneSlices } = await import("./sf-db.js"); if (isDbAvailable()) { sliceIds = getMilestoneSlices(mid) .filter(s => s.status !== "skipped") @@ -2073,7 +2106,7 @@ export async function buildValidateMilestonePrompt( // Inline verification classes from planning (if available in DB) try { - const { isDbAvailable, getMilestone } = await import("./gsd-db.js"); + const { isDbAvailable, getMilestone } = await import("./sf-db.js"); if (isDbAvailable()) { const milestone = getMilestone(mid); if (milestone) { @@ -2094,7 +2127,7 @@ export async function buildValidateMilestonePrompt( // Inline all slice summaries and assessment results let valSliceIds: string[] = []; try { - const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js"); + const { isDbAvailable, getMilestoneSlices } = await import("./sf-db.js"); if (isDbAvailable()) { valSliceIds = getMilestoneSlices(mid) .filter(s => s.status !== "skipped") @@ -2687,7 +2720,7 @@ export async function buildRewriteDocsPrompt( // DB primary path — get incomplete tasks let incompleteTasks: { id: string }[] | null = null; try { - const { isDbAvailable, getSliceTasks } = await import("./gsd-db.js"); + const { isDbAvailable, getSliceTasks } = await import("./sf-db.js"); if (isDbAvailable()) { incompleteTasks = getSliceTasks(mid, sid) .filter(t => t.status !== "complete" && t.status !== "done") @@ -2715,12 +2748,12 @@ export async function buildRewriteDocsPrompt( } } - const decisionsPath = resolveGsdRootFile(base, "DECISIONS"); - if (existsSync(decisionsPath)) docList.push(`- Decisions: \`${relGsdRootFile("DECISIONS")}\``); - const requirementsPath = resolveGsdRootFile(base, "REQUIREMENTS"); - if (existsSync(requirementsPath)) docList.push(`- Requirements: \`${relGsdRootFile("REQUIREMENTS")}\``); - const projectPath = resolveGsdRootFile(base, "PROJECT"); - if (existsSync(projectPath)) docList.push(`- Project: \`${relGsdRootFile("PROJECT")}\``); + const decisionsPath = resolveSfRootFile(base, "DECISIONS"); + if (existsSync(decisionsPath)) docList.push(`- Decisions: \`${relSfRootFile("DECISIONS")}\``); + const requirementsPath = resolveSfRootFile(base, "REQUIREMENTS"); + if (existsSync(requirementsPath)) docList.push(`- Requirements: \`${relSfRootFile("REQUIREMENTS")}\``); + const projectPath = resolveSfRootFile(base, "PROJECT"); + if (existsSync(projectPath)) docList.push(`- Project: \`${relSfRootFile("PROJECT")}\``); const contextPath = resolveMilestoneFile(base, mid, "CONTEXT"); const contextRel = relMilestoneFile(base, mid, "CONTEXT"); if (contextPath) docList.push(`- Milestone context (reference only): \`${contextRel}\``); @@ -2744,6 +2777,6 @@ export async function buildRewriteDocsPrompt( sliceTitle: sTitle, overrideContent, documentList, - overridesPath: relGsdRootFile("OVERRIDES"), + overridesPath: relSfRootFile("OVERRIDES"), }); } diff --git a/src/resources/extensions/sf/auto-start.ts b/src/resources/extensions/sf/auto-start.ts index dd2d32834..9fd5e5a1c 100644 --- a/src/resources/extensions/sf/auto-start.ts +++ b/src/resources/extensions/sf/auto-start.ts @@ -112,6 +112,23 @@ export interface BootstrapDeps { // Counter moved to AutoSession.consecutiveCompleteBootstraps so s.reset() clears it. const MAX_CONSECUTIVE_COMPLETE_BOOTSTRAPS = 2; +export type SurvivorAction = "none" | "discuss" | "finalize"; + +/** + * Decide which survivor-branch recovery action bootstrapAutoSession must + * run for the current (hasSurvivorBranch, phase) combination. Pure function, + * extracted for testability. + */ +export function decideSurvivorAction( + hasSurvivorBranch: boolean, + phase: string | null | undefined, +): SurvivorAction { + if (!hasSurvivorBranch) return "none"; + if (phase === "needs-discussion") return "discuss"; + if (phase === "complete") return "finalize"; + return "none"; +} + export async function openProjectDbIfPresent(basePath: string): Promise { const sfDbPath = resolveProjectRootDbPath(basePath); if (!existsSync(sfDbPath) || isDbAvailable()) return; @@ -488,7 +505,7 @@ export async function bootstrapAutoSession( // The worktree/branch was created but the milestone only has CONTEXT-DRAFT.md. // Route to the interactive discussion handler instead of falling through to // auto-mode, which would immediately stop with "needs discussion". - if (hasSurvivorBranch && state.phase === "needs-discussion") { + if (decideSurvivorAction(hasSurvivorBranch, state.phase) === "discuss") { const { showWorkflowEntry } = await import("./guided-flow.js"); await showWorkflowEntry(ctx, pi, base, { step: requestedStepMode }); @@ -514,7 +531,7 @@ export async function bootstrapAutoSession( // The milestone artifacts were written but finalization (merge, worktree // cleanup) never ran. Run mergeAndExit to finalize, then re-derive state // so the normal "all milestones complete" or "next milestone" path runs. - if (hasSurvivorBranch && state.phase === "complete") { + if (decideSurvivorAction(hasSurvivorBranch, state.phase) === "finalize") { const mid = state.activeMilestone!.id; ctx.ui.notify( `Milestone ${mid} is complete but branch/worktree was not finalized. Running merge now.`, diff --git a/src/resources/extensions/sf/code-intelligence.ts b/src/resources/extensions/sf/code-intelligence.ts new file mode 100644 index 000000000..7279ae862 --- /dev/null +++ b/src/resources/extensions/sf/code-intelligence.ts @@ -0,0 +1,393 @@ +/** + * Optional code-intelligence backends for SF. + * + * CODEBASE.md stays the durable baseline. Project RAG is an optional MCP + * accelerator for local hybrid vector + BM25 code retrieval. + */ + +import { existsSync, readFileSync, writeFileSync } from "node:fs"; +import { spawnSync } from "node:child_process"; +import { delimiter, join, resolve } from "node:path"; + +import type { CodebaseMapPreferences } from "./preferences-types.js"; + +export const PROJECT_RAG_MCP_SERVER_NAME = "project-rag"; +const PROJECT_RAG_BINARY_NAME = process.platform === "win32" ? "project-rag.exe" : "project-rag"; + +const PROJECT_RAG_SOURCE_CANDIDATES = [ + "vendor/project-rag", + "vendor/brainwires/project-rag", + "third_party/project-rag", + "third_party/brainwires/project-rag", + "tools/project-rag", + "project-rag", +] as const; + +export interface ProjectRagMcpConfig { + command?: string; + args?: string[]; + cwd?: string; + env?: Record; + url?: string; +} + +interface McpConfigFile { + mcpServers?: Record; + servers?: Record; + [key: string]: unknown; +} + +export interface ProjectRagDetection { + status: "disabled" | "configured" | "missing"; + serverName?: string; + configPath?: string; + command?: string; + binaryPath?: string; + sourceDir?: string; + reason: string; +} + +export interface EnsureProjectRagMcpConfigResult { + configPath: string; + serverName: string; + status: "created" | "updated" | "unchanged"; +} + +export interface ProjectRagBuildResult { + sourceDir: string; + binaryPath: string; + buildJobs: string; + stdout: string; + stderr: string; +} + +function readJsonConfig(configPath: string): McpConfigFile { + if (!existsSync(configPath)) return {}; + const raw = readFileSync(configPath, "utf-8"); + const parsed = JSON.parse(raw) as McpConfigFile; + return parsed && typeof parsed === "object" ? parsed : {}; +} + +function readMcpConfigEntries(projectRoot: string): Array<{ name: string; config: ProjectRagMcpConfig; configPath: string }> { + const entries: Array<{ name: string; config: ProjectRagMcpConfig; configPath: string }> = []; + const seen = new Set(); + for (const configPath of [join(projectRoot, ".mcp.json"), join(projectRoot, ".sf", "mcp.json")]) { + try { + const data = readJsonConfig(configPath); + const servers = data.mcpServers ?? data.servers; + if (!servers || typeof servers !== "object") continue; + for (const [name, config] of Object.entries(servers)) { + if (seen.has(name)) continue; + seen.add(name); + entries.push({ name, config, configPath }); + } + } catch { + // Malformed optional MCP config should not block SF startup. + } + } + return entries; +} + +function configLooksLikeProjectRag(name: string, config: ProjectRagMcpConfig): boolean { + const haystack = [ + name, + config.command ?? "", + ...(config.args ?? []), + config.cwd ?? "", + ].join(" ").toLowerCase(); + return /project[-_]?rag|brainwires/.test(haystack); +} + +function normalizeProjectRoot(projectRoot: string): string { + return resolve(projectRoot); +} + +function commandExists(command: string | undefined, env: NodeJS.ProcessEnv = process.env): boolean { + if (!command) return false; + return lookupExecutable(command, env) !== null; +} + +export function detectProjectRag( + projectRoot: string, + prefs?: CodebaseMapPreferences, +): ProjectRagDetection { + const mode = prefs?.project_rag ?? "auto"; + if (mode === "off") { + return { status: "disabled", reason: "codebase.project_rag is off" }; + } + + const configuredServer = prefs?.project_rag_server?.trim(); + const normalizedRoot = normalizeProjectRoot(projectRoot); + const binaryPath = resolveProjectRagBinaryForProject(normalizedRoot, process.env) ?? undefined; + const sourceDir = findProjectRagSourceDir(normalizedRoot, process.env) ?? undefined; + const entries = readMcpConfigEntries(normalizedRoot); + const match = entries.find(({ name, config }) => + configuredServer ? name === configuredServer : configLooksLikeProjectRag(name, config) + ); + + if (match) { + const configuredCommandExists = commandExists(match.config.command); + return { + status: "configured", + serverName: match.name, + configPath: match.configPath, + command: match.config.command, + binaryPath, + sourceDir, + reason: configuredCommandExists + ? "project-rag MCP server configured" + : "project-rag MCP server configured but command is not currently executable", + }; + } + + return { + status: "missing", + binaryPath, + sourceDir, + reason: mode === "required" + ? "codebase.project_rag is required but no project-rag MCP server is configured" + : "no project-rag MCP server configured", + }; +} + +function lookupExecutable(command: string, env: NodeJS.ProcessEnv = process.env): string | null { + if (command.includes("/") && existsSync(command)) return command; + const pathValue = env.PATH ?? ""; + for (const dir of pathValue.split(delimiter).filter(Boolean)) { + const candidate = join(dir, command); + if (existsSync(candidate)) return candidate; + } + return null; +} + +export function resolveProjectRagBinary(env: NodeJS.ProcessEnv = process.env): string | null { + const explicit = env.SF_PROJECT_RAG_BIN?.trim() || env.PROJECT_RAG_BIN?.trim(); + if (explicit) return explicit; + return lookupExecutable("project-rag", env); +} + +function projectRagBinaryFromSource(sourceDir: string): string | null { + const candidate = join(sourceDir, "target", "release", PROJECT_RAG_BINARY_NAME); + return existsSync(candidate) ? candidate : null; +} + +export function resolveProjectRagBuildJobs(env: NodeJS.ProcessEnv = process.env): string { + const configured = env.SF_PROJECT_RAG_BUILD_JOBS?.trim() || env.CARGO_BUILD_JOBS?.trim(); + if (!configured) return "2"; + + const parsed = Number.parseInt(configured, 10); + return Number.isFinite(parsed) && parsed > 0 ? String(parsed) : "2"; +} + +export function findProjectRagSourceDir( + projectRoot: string, + env: NodeJS.ProcessEnv = process.env, +): string | null { + const explicit = env.SF_PROJECT_RAG_SOURCE?.trim() || env.PROJECT_RAG_SOURCE?.trim(); + const candidates = [ + ...(explicit ? [explicit] : []), + ...PROJECT_RAG_SOURCE_CANDIDATES.map(relativePath => join(normalizeProjectRoot(projectRoot), relativePath)), + ]; + for (const candidate of candidates) { + const manifestPath = join(candidate, "Cargo.toml"); + if (!existsSync(manifestPath)) continue; + try { + const manifest = readFileSync(manifestPath, "utf-8"); + if (/name\s*=\s*"project-rag"/.test(manifest) || /project-rag/i.test(candidate)) { + return resolve(candidate); + } + } catch { + // Optional vendored source discovery should never block SF startup. + } + } + return null; +} + +export function resolveProjectRagBinaryForProject( + projectRoot: string, + env: NodeJS.ProcessEnv = process.env, +): string | null { + const explicitOrPath = resolveProjectRagBinary(env); + if (explicitOrPath) return explicitOrPath; + + const sourceDir = findProjectRagSourceDir(projectRoot, env); + if (sourceDir) { + const builtBinary = projectRagBinaryFromSource(sourceDir); + if (builtBinary) return builtBinary; + } + + for (const relativePath of [ + join("target", "release", PROJECT_RAG_BINARY_NAME), + join(".bin", PROJECT_RAG_BINARY_NAME), + join("bin", PROJECT_RAG_BINARY_NAME), + ]) { + const candidate = join(normalizeProjectRoot(projectRoot), relativePath); + if (existsSync(candidate)) return candidate; + } + + return null; +} + +export function buildProjectRagMcpServerConfig( + projectRoot: string = process.cwd(), + env: NodeJS.ProcessEnv = process.env, +): ProjectRagMcpConfig { + const command = resolveProjectRagBinaryForProject(projectRoot, env); + if (!command) { + const sourceDir = findProjectRagSourceDir(projectRoot, env); + throw new Error( + sourceDir + ? `project-rag source found at ${sourceDir}, but no release binary exists. Run /sf codebase rag build first.` + : "project-rag binary not found. Set SF_PROJECT_RAG_BIN, install project-rag on PATH, or vendor Brainwires/project-rag under vendor/project-rag.", + ); + } + return { + command, + env: { + RUST_LOG: env.RUST_LOG ?? "info", + }, + }; +} + +export function buildProjectRagBinary( + projectRoot: string, + env: NodeJS.ProcessEnv = process.env, +): ProjectRagBuildResult { + const sourceDir = findProjectRagSourceDir(projectRoot, env); + if (!sourceDir) { + throw new Error( + "project-rag source not found. Vendor Brainwires/project-rag under vendor/project-rag or set SF_PROJECT_RAG_SOURCE.", + ); + } + const cargo = lookupExecutable("cargo", env); + if (!cargo) { + throw new Error("cargo not found in PATH; cannot build vendored project-rag."); + } + + const buildJobs = resolveProjectRagBuildJobs(env); + const result = spawnSync(cargo, ["build", "--release"], { + cwd: sourceDir, + env: { ...process.env, ...env, CARGO_BUILD_JOBS: buildJobs }, + encoding: "utf-8", + maxBuffer: 20 * 1024 * 1024, + }); + const stdout = result.stdout ?? ""; + const stderr = result.stderr ?? ""; + if (result.error) { + throw new Error(`cargo build failed to start: ${result.error.message}`); + } + if (result.status !== 0) { + throw new Error( + `cargo build --release failed with exit ${result.status ?? "unknown"}:\n${stderr || stdout}`.trim(), + ); + } + + const binaryPath = projectRagBinaryFromSource(sourceDir); + if (!binaryPath) { + throw new Error(`cargo build completed, but ${join(sourceDir, "target", "release", PROJECT_RAG_BINARY_NAME)} was not found.`); + } + + return { sourceDir, binaryPath, buildJobs, stdout, stderr }; +} + +export function ensureProjectRagMcpConfig( + projectRoot: string, + env: NodeJS.ProcessEnv = process.env, +): EnsureProjectRagMcpConfigResult { + const resolvedProjectRoot = normalizeProjectRoot(projectRoot); + const configPath = join(resolvedProjectRoot, ".mcp.json"); + const alreadyPresent = existsSync(configPath); + const existing = readJsonConfig(configPath); + const desiredServer = buildProjectRagMcpServerConfig(resolvedProjectRoot, env); + const previousServers = existing.mcpServers ?? {}; + const current = previousServers[PROJECT_RAG_MCP_SERVER_NAME]; + const unchanged = + JSON.stringify(current ?? null) === JSON.stringify(desiredServer) + && existing.mcpServers !== undefined; + + if (unchanged) { + return { configPath, serverName: PROJECT_RAG_MCP_SERVER_NAME, status: "unchanged" }; + } + + const nextConfig: McpConfigFile = { + ...existing, + mcpServers: { + ...previousServers, + [PROJECT_RAG_MCP_SERVER_NAME]: desiredServer, + }, + }; + writeFileSync(configPath, `${JSON.stringify(nextConfig, null, 2)}\n`, "utf-8"); + return { + configPath, + serverName: PROJECT_RAG_MCP_SERVER_NAME, + status: alreadyPresent ? "updated" : "created", + }; +} + +function formatToolPrefix(serverName: string): string { + return `mcp__${serverName.replace(/[^A-Za-z0-9_]/g, "_")}__`; +} + +export function buildCodeIntelligenceContextBlock( + projectRoot: string, + prefs?: CodebaseMapPreferences, +): string { + const detection = detectProjectRag(projectRoot, prefs); + const lines = [ + "[PROJECT CODE INTELLIGENCE]", + "", + "- Durable baseline: use `.sf/CODEBASE.md` for structural orientation and persistent project knowledge.", + ]; + + if (detection.status === "disabled") { + lines.push("- Project RAG: disabled by `codebase.project_rag: off`."); + } else if (detection.status === "configured" && detection.serverName) { + const prefix = formatToolPrefix(detection.serverName); + lines.push(`- Project RAG: configured as MCP server \`${detection.serverName}\`.`); + lines.push( + "- Use Project RAG for broad code retrieval before manual file-by-file reading, " + + "especially conceptual queries, exact identifiers, schema fields, and git-history questions.", + ); + lines.push( + `- Expected MCP tool prefix: \`${prefix}\` ` + + `(for example \`${prefix}index_codebase\`, \`${prefix}query_codebase\`, ` + + `\`${prefix}search_by_filters\`, \`${prefix}find_definition\`, ` + + `\`${prefix}find_references\`, \`${prefix}get_call_graph\`).`, + ); + lines.push(prefs?.project_rag_auto_index === false + ? "- Do not auto-index unless explicitly needed; query existing indexes first. " + + "If any Project RAG tool is missing or fails, continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout." + : "- Index first if the backend is stale or empty; use incremental indexing when available. " + + "If any Project RAG tool is missing or fails, continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout."); + } else { + lines.push("- Project RAG: not configured. This is optional; continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout."); + lines.push("- To enable later: build/install Brainwires/project-rag, then run `/sf codebase rag init` or set `SF_PROJECT_RAG_BIN` before initializing MCP config."); + } + + return `\n\n${lines.join("\n")}`; +} + +export function formatProjectRagStatus(projectRoot: string, prefs?: CodebaseMapPreferences): string { + const detection = detectProjectRag(projectRoot, prefs); + const lines = ["Project RAG Status", ""]; + lines.push(`Status: ${detection.status}`); + lines.push(`Reason: ${detection.reason}`); + if (detection.serverName) lines.push(`Server: ${detection.serverName}`); + if (detection.configPath) lines.push(`Config: ${detection.configPath}`); + if (detection.command) lines.push(`Command: ${detection.command}`); + if (detection.binaryPath) lines.push(`Binary: ${detection.binaryPath}`); + if (detection.sourceDir) lines.push(`Source: ${detection.sourceDir}`); + if (detection.status === "configured" && detection.command) { + lines.push(`Operational: ${commandExists(detection.command) ? "yes" : "no - configured command is missing"}`); + } else if (detection.binaryPath) { + lines.push("Operational: no - binary exists but MCP config is missing; run /sf codebase rag init."); + } else if (detection.sourceDir) { + lines.push("Operational: no - source exists but release binary is missing; run /sf codebase rag build."); + } else { + lines.push("Operational: no - binary/source not found."); + } + lines.push(""); + lines.push("Project RAG is optional. SF falls back to CODEBASE.md, rg, lsp, and scout when it is unavailable."); + lines.push("When configured, agents should use index_codebase, query_codebase, search_by_filters, find_definition, find_references, and get_call_graph before manual file-by-file reading."); + return lines.join("\n"); +} diff --git a/src/resources/extensions/sf/commands-extract-learnings.ts b/src/resources/extensions/sf/commands-extract-learnings.ts index a6c1f0eb7..ad3b97147 100644 --- a/src/resources/extensions/sf/commands-extract-learnings.ts +++ b/src/resources/extensions/sf/commands-extract-learnings.ts @@ -302,3 +302,87 @@ export async function handleExtractLearnings( { triggerTurn: true }, ); } + +export interface ExtractionStepsContext { + milestoneId: string; + outputPath: string; + relativeOutputPath: string; +} + +/** + * Canonical structured-extraction instructions, shared by the manual + * `/sf extract-learnings` path and the auto-mode complete-milestone turn. + */ +export function buildExtractionStepsBlock(ctx: ExtractionStepsContext): string { + return `## Structured Learnings Extraction + +Perform the following steps IN ORDER. Each step is mandatory unless explicitly +marked optional. These instructions are the single source of truth shared by +\`/sf extract-learnings\` and the auto-mode milestone-completion turn. + +### Step 1 — Classify findings into four categories + +Review the milestone artefacts (roadmap, slice summaries, verification report, +UAT report) and structure your findings into exactly four categories: + +- **Decisions** — architectural or design choices made during this milestone, including rationale and alternatives considered. +- **Lessons** — technical discoveries, process insights, knowledge gaps that were filled. +- **Patterns** — reusable approaches or solutions that emerged and should be applied in future work. +- **Surprises** — unexpected challenges, discoveries, or outcomes that deviated from assumptions. + +Every item MUST carry a \`Source:\` line using the format +\`Source: {artifact-filename}/{section}\` (e.g. +\`Source: ${ctx.milestoneId}-ROADMAP.md/Architecture Decisions\`). +Items without a source attribution are invalid — drop them. + +### Step 2 — Write the LEARNINGS.md audit trail + +Using the \`write\` tool, persist the full structured report to +\`${ctx.relativeOutputPath}\` with this shape: + +- YAML frontmatter with keys: \`phase\`, \`phase_name\`, \`project\`, \`generated\` (ISO-8601 UTC), \`counts\` (decisions / lessons / patterns / surprises), \`missing_artifacts\`. +- Four H3 sections (\`### Decisions\`, \`### Lessons\`, \`### Patterns\`, \`### Surprises\`) containing bullet points. Each bullet is followed by its \`Source:\` line. + +LEARNINGS.md is the full, cited audit trail. Write it first — subsequent steps +feed from its content. + +### Step 3 — Optionally pre-query the memory store for semantic duplicates + +Before persisting any extracted item in Steps 4–6, you may call +\`memory_query\` with 2–3 keywords from the item to check whether the +memory store already holds a semantically equivalent entry at high +confidence. Skip those items in their respective steps. + +### Step 4 — Persist Patterns via \`capture_thought\` + +For each extracted Pattern, call \`capture_thought\` exactly once with: +- \`category: "pattern"\` +- \`content\`: a 1–2 sentence restatement combining the Pattern, Where, and any non-obvious notes +- \`scope: "${ctx.milestoneId}"\` + +### Step 5 — Persist Lessons via \`capture_thought\` + +For each extracted Lesson, call \`capture_thought\` exactly once with: +- \`category: "gotcha"\` when the Lesson describes a pitfall, surprise root cause, or recurring failure mode; \`category: "convention"\` when it describes a project-wide rule or normative practice +- \`content\`: a 1–3 sentence restatement of What Happened + Root Cause + Fix +- \`scope: "${ctx.milestoneId}"\` + +### Step 6 — Persist Decisions via \`capture_thought\` + +For each extracted Decision, call \`capture_thought\` exactly once with: +- \`category: "architecture"\` +- \`content\`: a 1–3 sentence restatement combining decision + choice + rationale +- \`scope: "${ctx.milestoneId}"\` +- \`structuredFields\`: an object preserving the original decision schema + +### Step 7 — Deduplication rule (applies to Steps 4, 5, 6) + +Before each \`capture_thought\` call, optionally call \`memory_query\` with 2–3 +keywords from the entry. If a semantically equivalent memory is returned at +high confidence, skip the capture entirely. + +### Step 8 — Surprises stay only in LEARNINGS.md + +Surprises are milestone-local context and are NOT cross-session-reusable. Do +not persist them via \`capture_thought\` or any other MCP tool.`; +} diff --git a/src/resources/extensions/sf/escalation.ts b/src/resources/extensions/sf/escalation.ts new file mode 100644 index 000000000..a532e2012 --- /dev/null +++ b/src/resources/extensions/sf/escalation.ts @@ -0,0 +1,9 @@ +// ADR-011 Phase 2 Mid-Execution Escalation — stub pending full DB schema port. + +export function claimOverrideForInjection( + _basePath: string, + _milestoneId: string, + _sliceId: string, +): { injectionBlock: string; sourceTaskId: string } | null { + return null; +} diff --git a/src/resources/extensions/sf/milestone-quality.ts b/src/resources/extensions/sf/milestone-quality.ts new file mode 100644 index 000000000..3ea1d2052 --- /dev/null +++ b/src/resources/extensions/sf/milestone-quality.ts @@ -0,0 +1,153 @@ +export type VisionMeetingRoute = "discussing" | "researching" | "planning"; + +export interface VisionAlignmentMeetingRecord { + trigger: string; + pm: string; + userAdvocate: string; + customerPanel: string; + business: string; + researcher: string; + deliveryLead: string; + partner: string; + combatant: string; + architect: string; + moderator: string; + weightedSynthesis: string; + confidenceByArea: string; + recommendedRoute: VisionMeetingRoute; +} + +export interface MilestonePlanQualityCheck { + issues: string[]; +} + +const PLACEHOLDER_VALUES = new Set([ + "", + "not provided.", + "missing vision alignment meeting.", + "missing weighted synthesis.", + "missing confidence by area.", +]); + +function isMeaningful(value: string | null | undefined): boolean { + const normalized = (value ?? "").trim().toLowerCase(); + return normalized.length > 0 && !PLACEHOLDER_VALUES.has(normalized); +} + +function extractSection(content: string, heading: string): string { + const lines = content.split("\n"); + const start = lines.findIndex((line) => line.trim() === `## ${heading}`); + if (start === -1) return ""; + + let end = lines.length; + for (let index = start + 1; index < lines.length; index += 1) { + if (/^##\s+/.test(lines[index])) { + end = index; + break; + } + } + + return lines.slice(start + 1, end).join("\n").trim(); +} + +function extractSubsection(content: string, heading: string): string { + const lines = content.split("\n"); + const start = lines.findIndex((line) => line.trim() === `### ${heading}`); + if (start === -1) return ""; + + let end = lines.length; + for (let index = start + 1; index < lines.length; index += 1) { + if (/^###\s+/.test(lines[index]) || /^##\s+/.test(lines[index])) { + end = index; + break; + } + } + + return lines.slice(start + 1, end).join("\n").trim(); +} + +export function hasStructuredVisionAlignmentMeeting( + meeting: Partial | null | undefined, +): meeting is VisionAlignmentMeetingRecord { + if (!meeting) return false; + const route = meeting.recommendedRoute; + return isMeaningful(meeting.trigger) + && isMeaningful(meeting.pm) + && isMeaningful(meeting.userAdvocate) + && isMeaningful(meeting.customerPanel) + && isMeaningful(meeting.business) + && isMeaningful(meeting.researcher) + && isMeaningful(meeting.deliveryLead) + && isMeaningful(meeting.partner) + && isMeaningful(meeting.combatant) + && isMeaningful(meeting.architect) + && isMeaningful(meeting.moderator) + && isMeaningful(meeting.weightedSynthesis) + && isMeaningful(meeting.confidenceByArea) + && (route === "discussing" || route === "researching" || route === "planning"); +} + +export function getVisionAlignmentBlockingIssue( + meeting: Partial | null | undefined, +): string | null { + if (!meeting) return "missing vision alignment meeting"; + if (!isMeaningful(meeting.trigger)) return "missing vision meeting trigger"; + if (!isMeaningful(meeting.pm)) return "missing vision meeting pm view"; + if (!isMeaningful(meeting.userAdvocate)) return "missing vision meeting user advocate view"; + if (!isMeaningful(meeting.customerPanel)) return "missing vision meeting customer panel view"; + if (!isMeaningful(meeting.business)) return "missing vision meeting business view"; + if (!isMeaningful(meeting.researcher)) return "missing vision meeting researcher view"; + if (!isMeaningful(meeting.deliveryLead)) return "missing vision meeting delivery lead view"; + if (!isMeaningful(meeting.partner)) return "missing vision meeting partner view"; + if (!isMeaningful(meeting.combatant)) return "missing vision meeting combatant view"; + if (!isMeaningful(meeting.architect)) return "missing vision meeting architect view"; + if (!isMeaningful(meeting.moderator)) return "missing vision meeting moderator decision"; + if (!isMeaningful(meeting.weightedSynthesis)) return "missing weighted synthesis"; + if (!isMeaningful(meeting.confidenceByArea)) return "missing confidence by area"; + if (meeting.recommendedRoute && meeting.recommendedRoute !== "planning") { + return `vision meeting routed back to ${meeting.recommendedRoute}`; + } + if ( + meeting.recommendedRoute !== "planning" + && meeting.recommendedRoute !== "researching" + && meeting.recommendedRoute !== "discussing" + ) { + return "invalid vision meeting route"; + } + return null; +} + +export function inspectMilestoneRoadmapMarkdown(content: string): MilestonePlanQualityCheck { + const issues: string[] = []; + const meetingSection = extractSection(content, "Vision Alignment Meeting"); + if (!meetingSection) { + issues.push("missing vision alignment meeting"); + return { issues }; + } + + const meeting: Partial = { + trigger: extractSubsection(meetingSection, "Trigger"), + pm: extractSubsection(meetingSection, "Product Manager"), + userAdvocate: extractSubsection(meetingSection, "User Advocate"), + customerPanel: extractSubsection(meetingSection, "Customer Panel"), + business: extractSubsection(meetingSection, "Business"), + researcher: extractSubsection(meetingSection, "Researcher"), + deliveryLead: extractSubsection(meetingSection, "Delivery Lead"), + partner: extractSubsection(meetingSection, "Partner"), + combatant: extractSubsection(meetingSection, "Combatant"), + architect: extractSubsection(meetingSection, "Architect"), + moderator: extractSubsection(meetingSection, "Moderator"), + weightedSynthesis: extractSubsection(meetingSection, "Weighted Synthesis"), + confidenceByArea: extractSubsection(meetingSection, "Confidence By Area"), + recommendedRoute: extractSubsection(meetingSection, "Recommended Route").toLowerCase() as VisionMeetingRoute, + }; + + const blockingIssue = getVisionAlignmentBlockingIssue(meeting); + if (blockingIssue) issues.push(blockingIssue); + return { issues }; +} + +export function getMilestonePlanBlockingIssue(content: string): string | null { + const check = inspectMilestoneRoadmapMarkdown(content); + return check.issues[0] ?? null; +} diff --git a/src/resources/extensions/sf/milestone-scope-classifier.ts b/src/resources/extensions/sf/milestone-scope-classifier.ts index bfe558474..b944ec594 100644 --- a/src/resources/extensions/sf/milestone-scope-classifier.ts +++ b/src/resources/extensions/sf/milestone-scope-classifier.ts @@ -300,3 +300,48 @@ export function classifyMilestoneScope(input: MilestoneScopeInput): ScopeClassif }, }; } + +export function milestoneRowToScopeInput(row: { + title?: string; + vision?: string; + success_criteria?: string[]; + key_risks?: Array<{ risk?: string; whyItMatters?: string }>; + definition_of_done?: string[]; + requirement_coverage?: string; + verification_contract?: string; + verification_integration?: string; + verification_operational?: string; + verification_uat?: string; +}): MilestoneScopeInput { + return { + title: row.title, + vision: row.vision, + successCriteria: row.success_criteria, + keyRisks: row.key_risks, + definitionOfDone: row.definition_of_done, + requirementCoverage: row.requirement_coverage, + verificationContract: row.verification_contract, + verificationIntegration: row.verification_integration, + verificationOperational: row.verification_operational, + verificationUat: row.verification_uat, + }; +} + +/** + * Compute the pipeline variant for a milestone by reading its planning + * fields from the DB and running the classifier. Returns `null` when + * classification is unavailable (DB closed, milestone missing, unexpected + * error) — callers MUST treat null as "run the full pipeline" so a + * classification failure never silently downshifts dispatch. + */ +export async function getMilestonePipelineVariant(mid: string): Promise { + try { + const { isDbAvailable, getMilestone } = await import("./sf-db.js"); + if (!isDbAvailable()) return null; + const row = getMilestone(mid); + if (!row) return null; + return classifyMilestoneScope(milestoneRowToScopeInput(row)).variant; + } catch { + return null; + } +} diff --git a/src/resources/extensions/sf/model-cost-table.ts b/src/resources/extensions/sf/model-cost-table.ts index bc6d15574..a48a21102 100644 --- a/src/resources/extensions/sf/model-cost-table.ts +++ b/src/resources/extensions/sf/model-cost-table.ts @@ -55,6 +55,10 @@ export const BUNDLED_COST_TABLE: ModelCostEntry[] = [ { id: "gpt-5.3-codex", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" }, { id: "gpt-5.3-codex-spark", inputPer1k: 0.0003, outputPer1k: 0.0012, updatedAt: "2026-03-29" }, { id: "gpt-5.4", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" }, + { id: "gpt-5.4-mini", inputPer1k: 0.00075, outputPer1k: 0.0045, updatedAt: "2026-04-18" }, + // GPT-5.5 API list price, also used for live Codex OAuth routing. + // Source: https://openai.com/api/pricing/ + { id: "gpt-5.5", inputPer1k: 0.005, outputPer1k: 0.03, updatedAt: "2026-04-23" }, // Google { id: "gemini-2.0-flash", inputPer1k: 0.0001, outputPer1k: 0.0004, updatedAt: "2025-03-15" }, diff --git a/src/resources/extensions/sf/model-router.ts b/src/resources/extensions/sf/model-router.ts index 40ff1df0c..a992685b4 100644 --- a/src/resources/extensions/sf/model-router.ts +++ b/src/resources/extensions/sf/model-router.ts @@ -99,6 +99,8 @@ export const MODEL_CAPABILITY_TIER: Record = { "gpt-5.2-codex": "heavy", "gpt-5.3-codex": "heavy", "gpt-5.4": "heavy", + "gpt-5.4-mini": "standard", + "gpt-5.5": "heavy", "o1": "heavy", "o3": "heavy", "o4-mini": "heavy", @@ -132,6 +134,8 @@ const MODEL_COST_PER_1K_INPUT: Record = { "gpt-5.3-codex": 0.005, "gpt-5.3-codex-spark": 0.0003, "gpt-5.4": 0.005, + "gpt-5.4-mini": 0.00075, + "gpt-5.5": 0.005, "o4-mini": 0.005, "o4-mini-deep-research": 0.005, "gemini-2.0-flash": 0.0001, @@ -173,6 +177,11 @@ export const MODEL_CAPABILITY_PROFILES: Record = { "gpt-5.3-codex": { coding: 94, debugging: 91, research: 74, reasoning: 89, speed: 50, longContext: 80, instruction: 89 }, "gpt-5.3-codex-spark": { coding: 68, debugging: 58, research: 42, reasoning: 52, speed: 90, longContext: 50, instruction: 74 }, "gpt-5.4": { coding: 95, debugging: 92, research: 88, reasoning: 94, speed: 42, longContext: 88, instruction: 92 }, + "gpt-5.4-mini": { coding: 80, debugging: 75, research: 70, reasoning: 78, speed: 72, longContext: 72, instruction: 80 }, + // GPT-5.5 scores are relative to the existing gpt-5.4 profile and backed by + // OpenAI's 2026-04-23 published eval deltas across coding, tool use, and long context. + // Source: https://openai.com/index/introducing-gpt-5-5/ + "gpt-5.5": { coding: 96, debugging: 93, research: 89, reasoning: 95, speed: 42, longContext: 90, instruction: 93 }, // ── OpenAI o-series (reasoning-first) ────────────────────────────────────── "o1": { coding: 78, debugging: 82, research: 78, reasoning: 90, speed: 20, longContext: 65, instruction: 82 }, diff --git a/src/resources/extensions/sf/plan-quality.ts b/src/resources/extensions/sf/plan-quality.ts new file mode 100644 index 000000000..4b66f887b --- /dev/null +++ b/src/resources/extensions/sf/plan-quality.ts @@ -0,0 +1,140 @@ +export interface AdversarialReviewRecord { + partner: string; + combatant: string; + architect: string; +} + +export type PlanningMeetingRoute = "discussing" | "researching" | "planning"; + +export interface PlanningMeetingRecord { + trigger: string; + pm: string; + researcher: string; + partner: string; + combatant: string; + architect: string; + moderator: string; + recommendedRoute: PlanningMeetingRoute; + confidenceSummary: string; +} + +export interface SlicePlanQualityCheck { + issues: string[]; +} + +const PLACEHOLDER_VALUES = new Set([ + "", + "not provided.", + "missing adversarial review.", + "missing partner review.", + "missing combatant review.", + "missing architect review.", +]); + +function isMeaningfulReviewBody(value: string | null | undefined): boolean { + const normalized = (value ?? "").trim().toLowerCase(); + return normalized.length > 0 && !PLACEHOLDER_VALUES.has(normalized); +} + +function extractSection(content: string, heading: string): string { + const lines = content.split("\n"); + const start = lines.findIndex((line) => line.trim() === `## ${heading}`); + if (start === -1) return ""; + + let end = lines.length; + for (let index = start + 1; index < lines.length; index += 1) { + if (/^##\s+/.test(lines[index])) { + end = index; + break; + } + } + + return lines.slice(start + 1, end).join("\n").trim(); +} + +function extractSubsection(content: string, heading: string): string { + const lines = content.split("\n"); + const start = lines.findIndex((line) => line.trim() === `### ${heading}`); + if (start === -1) return ""; + + let end = lines.length; + for (let index = start + 1; index < lines.length; index += 1) { + if (/^###\s+/.test(lines[index]) || /^##\s+/.test(lines[index])) { + end = index; + break; + } + } + + return lines.slice(start + 1, end).join("\n").trim(); +} + +export function hasCompleteAdversarialReview(review: Partial | null | undefined): boolean { + return isMeaningfulReviewBody(review?.partner) + && isMeaningfulReviewBody(review?.combatant) + && isMeaningfulReviewBody(review?.architect); +} + +export function hasStructuredPlanningMeeting(meeting: Partial | null | undefined): meeting is PlanningMeetingRecord { + if (!meeting) return false; + const route = meeting.recommendedRoute; + return isMeaningfulReviewBody(meeting.trigger) + && isMeaningfulReviewBody(meeting.pm) + && isMeaningfulReviewBody(meeting.researcher) + && isMeaningfulReviewBody(meeting.partner) + && isMeaningfulReviewBody(meeting.combatant) + && isMeaningfulReviewBody(meeting.architect) + && isMeaningfulReviewBody(meeting.moderator) + && isMeaningfulReviewBody(meeting.confidenceSummary) + && (route === "discussing" || route === "researching" || route === "planning"); +} + +export function inspectSlicePlanMarkdown(content: string): SlicePlanQualityCheck { + const issues: string[] = []; + const adversarialSection = extractSection(content, "Adversarial Review"); + if (!adversarialSection) { + issues.push("missing adversarial review"); + return { issues }; + } + + const partner = extractSubsection(adversarialSection, "Partner Review"); + const combatant = extractSubsection(adversarialSection, "Combatant Review"); + const architect = extractSubsection(adversarialSection, "Architect Review"); + + if (!isMeaningfulReviewBody(partner)) issues.push("missing partner review"); + if (!isMeaningfulReviewBody(combatant)) issues.push("missing combatant review"); + if (!isMeaningfulReviewBody(architect)) issues.push("missing architect review"); + + const planningMeeting = extractSection(content, "Planning Meeting"); + if (planningMeeting) { + const trigger = extractSubsection(planningMeeting, "Trigger"); + const pm = extractSubsection(planningMeeting, "Product Manager"); + const researcher = extractSubsection(planningMeeting, "Researcher"); + const meetingPartner = extractSubsection(planningMeeting, "Partner"); + const meetingCombatant = extractSubsection(planningMeeting, "Combatant"); + const meetingArchitect = extractSubsection(planningMeeting, "Architect"); + const moderator = extractSubsection(planningMeeting, "Moderator"); + const route = extractSubsection(planningMeeting, "Recommended Route").toLowerCase(); + const confidence = extractSubsection(planningMeeting, "Confidence"); + + if (!isMeaningfulReviewBody(trigger)) issues.push("missing planning meeting trigger"); + if (!isMeaningfulReviewBody(pm)) issues.push("missing planning meeting pm review"); + if (!isMeaningfulReviewBody(researcher)) issues.push("missing planning meeting researcher review"); + if (!isMeaningfulReviewBody(meetingPartner)) issues.push("missing planning meeting partner review"); + if (!isMeaningfulReviewBody(meetingCombatant)) issues.push("missing planning meeting combatant review"); + if (!isMeaningfulReviewBody(meetingArchitect)) issues.push("missing planning meeting architect review"); + if (!isMeaningfulReviewBody(moderator)) issues.push("missing planning meeting moderator decision"); + if (!isMeaningfulReviewBody(confidence)) issues.push("missing planning meeting confidence"); + if (route && route !== "planning" && route !== "researching" && route !== "discussing") { + issues.push("invalid planning meeting route"); + } else if (route && route !== "planning") { + issues.push(`planning meeting routed back to ${route}`); + } + } + + return { issues }; +} + +export function getSlicePlanBlockingIssue(content: string): string | null { + const check = inspectSlicePlanMarkdown(content); + return check.issues[0] ?? null; +} diff --git a/src/resources/extensions/sf/preferences-types.ts b/src/resources/extensions/sf/preferences-types.ts index ed2cdad0d..6c3e44220 100644 --- a/src/resources/extensions/sf/preferences-types.ts +++ b/src/resources/extensions/sf/preferences-types.ts @@ -420,6 +420,16 @@ export interface SFPreferences { * Default: false (warnings only for non-critical failures). */ enhanced_verification_strict?: boolean; + /** + * Override the executor context window size (tokens). When set, overrides + * the model's default context window for budget calculations. + */ + context_window_override?: number; + /** + * When true, skip reassess-roadmap dispatch for slices whose SUMMARY + * signals a structurally clean completion (ADR-003 §4, #4778). Default: false. + */ + skip_clean_reassess?: boolean; /** * Enable the preparation phase before discussion sessions. * Preparation analyzes the codebase, reviews prior context, and optionally researches the ecosystem. diff --git a/src/resources/extensions/sf/service-tier.ts b/src/resources/extensions/sf/service-tier.ts index 8e146c07d..13ade1a8d 100644 --- a/src/resources/extensions/sf/service-tier.ts +++ b/src/resources/extensions/sf/service-tier.ts @@ -36,6 +36,9 @@ const SERVICE_TIER_SCOPE_NOTE = "Only affects gpt-5.4 models, regardless of prov * (set via CAPABILITY_PATCHES in packages/pi-ai/src/models.ts). When callers * have access to the full Model object, prefer reading capabilities directly. * + * GPT-5.5 is intentionally excluded until we verify its provider payload + * contract instead of assuming `service_tier` support. + * * See: https://github.com/singularity-forge/sf-run/issues/2546 */ const SERVICE_TIER_MODEL_PREFIXES = ["gpt-5.4"] as const; diff --git a/src/resources/extensions/sf/sf-db.ts b/src/resources/extensions/sf/sf-db.ts index 4e5372172..8c42ca7e7 100644 --- a/src/resources/extensions/sf/sf-db.ts +++ b/src/resources/extensions/sf/sf-db.ts @@ -1840,6 +1840,8 @@ export interface SliceRow { planning_meeting?: PlanningMeetingRecord | null; sequence: number; replan_triggered_at: string | null; + /** Optional freeform scope sketch written at plan-slice time. */ + sketch_scope?: string | null; } function parsePlanningMeeting(raw: unknown): PlanningMeetingRecord | null { diff --git a/src/resources/extensions/sf/skill-manifest.ts b/src/resources/extensions/sf/skill-manifest.ts new file mode 100644 index 000000000..a57242d6e --- /dev/null +++ b/src/resources/extensions/sf/skill-manifest.ts @@ -0,0 +1,175 @@ +// GSD2 + skill-manifest — per-unit-type skill allowlist resolver (RFC #4779) +// +// Each auto-mode unit type can declare which skills are relevant to it. This +// trims the set of skills considered for activation in the per-unit prompt, +// reducing prompt token bloat and sharpening model focus. +// +// Contract: +// - Unknown unit types fall through to "all skills" (current behavior). +// - A manifest entry referencing a skill that is not installed is a silent +// no-op at filter time — the filter passes through installed skills only. +// - The allowlist is an inclusion list: only skills whose normalized name +// appears in the allowlist are retained. Order is not preserved. +// +// Phase 1 scope: seed manifests for a small number of unit types as proof. +// Additional unit types can be added incrementally; each addition is a pure +// data change with no wiring cost. + +import { logWarning } from "./workflow-logger.js"; + +/** Normalize a skill reference the same way callers do (lowercase, trim). */ +function normalize(name: string): string { + return name.trim().toLowerCase(); +} + +/** + * Allowlist per unit type. Keys match unit type identifiers used by auto-mode + * dispatch. Values are normalized skill names. + * + * Wildcard semantics: a unit type absent from this map resolves to `null` + * (wildcard) — meaning "all installed skills are eligible". Prefer absence + * over an exhaustive list when uncertain. + */ +const UNIT_TYPE_SKILL_MANIFEST: Record = { + // Milestone-level planning / meta flows — predictable skill sets. + "research-milestone": [ + "write-docs", + "write-milestone-brief", + "decompose-into-slices", + "grill-me", + "design-an-interface", + "api-design", + "observability", + ], + "plan-milestone": [ + "write-milestone-brief", + "decompose-into-slices", + "design-an-interface", + "grill-me", + "write-docs", + "api-design", + "tdd", + "verify-before-complete", + ], + "complete-milestone": [ + "verify-before-complete", + "write-docs", + "handoff", + "forensics", + "observability", + "security-review", + ], + "validate-milestone": [ + "verify-before-complete", + "review", + "test", + "lint", + "security-review", + "accessibility", + "forensics", + "observability", + ], + "reassess-roadmap": [ + "decompose-into-slices", + "grill-me", + "write-milestone-brief", + "write-docs", + "forensics", + ], + // Slice-level research / planning. + "research-slice": [ + "write-docs", + "decompose-into-slices", + "design-an-interface", + "grill-me", + "api-design", + "observability", + ], + "plan-slice": [ + "decompose-into-slices", + "design-an-interface", + "grill-me", + "write-docs", + "api-design", + "tdd", + "verify-before-complete", + ], + "refine-slice": [ + "decompose-into-slices", + "design-an-interface", + "grill-me", + "write-docs", + "api-design", + "tdd", + "verify-before-complete", + ], + "replan-slice": [ + "decompose-into-slices", + "grill-me", + "design-an-interface", + "write-docs", + "api-design", + ], + "run-uat": [ + "verify-before-complete", + "test", + "review", + "accessibility", + ], + // `execute-task` intentionally omitted — implementation hot path covers a + // wide surface of technologies; wildcard fallback preserves today's + // behavior until per-task skill hints can be derived from task-plan + // frontmatter. See RFC #4779. +}; + +/** + * Resolve the skill allowlist for a unit type. + * + * @returns Array of normalized skill names when an entry exists, or `null` + * when the unit type is unknown (wildcard — caller should not filter). + */ +export function resolveSkillManifest(unitType: string | undefined): string[] | null { + if (!unitType) return null; + const entry = UNIT_TYPE_SKILL_MANIFEST[unitType]; + if (!entry) return null; + return entry.map(normalize); +} + +/** + * Filter a skill list by the manifest for `unitType`. Pass-through when the + * manifest is wildcard (unknown unit type) or `unitType` is undefined. + */ +export function filterSkillsByManifest( + skills: T[], + unitType: string | undefined, +): T[] { + const allowlist = resolveSkillManifest(unitType); + if (allowlist === null) return skills; + const allowed = new Set(allowlist); + return skills.filter(skill => allowed.has(normalize(skill.name))); +} + +/** + * Dev-mode guard: warn once per process if a manifest entry references a name + * that is not currently installed. Silent in production. + */ +const warnedMissing = new Set(); + +export function warnIfManifestHasMissingSkills( + unitType: string | undefined, + installedNames: Set, +): void { + // Strict mode is intentionally opt-in via exactly "1"; values like "0" or + // "false" must preserve the normal silent manifest behavior. + if (process.env.GSD_SKILL_MANIFEST_STRICT !== "1") return; + const allowlist = resolveSkillManifest(unitType); + if (!allowlist) return; + for (const name of allowlist) { + const key = `${unitType}:${name}`; + if (warnedMissing.has(key)) continue; + if (!installedNames.has(name)) { + warnedMissing.add(key); + logWarning("prompt", `skill-manifest: references uninstalled skill '${name}' for unit '${unitType}'`); + } + } +} diff --git a/src/resources/extensions/sf/tests/code-intelligence.test.ts b/src/resources/extensions/sf/tests/code-intelligence.test.ts new file mode 100644 index 000000000..fa4d7259b --- /dev/null +++ b/src/resources/extensions/sf/tests/code-intelligence.test.ts @@ -0,0 +1,194 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { + buildCodeIntelligenceContextBlock, + detectProjectRag, + ensureProjectRagMcpConfig, + findProjectRagSourceDir, + formatProjectRagStatus, + PROJECT_RAG_MCP_SERVER_NAME, + resolveProjectRagBuildJobs, + resolveProjectRagBinaryForProject, +} from "../code-intelligence.ts"; + +function makeProject(): string { + const projectRoot = mkdtempSync(join(tmpdir(), "sf-code-intel-")); + mkdirSync(join(projectRoot, ".sf"), { recursive: true }); + return projectRoot; +} + +function cleanup(projectRoot: string): void { + rmSync(projectRoot, { recursive: true, force: true }); +} + +test("detectProjectRag finds a project-rag server even when the MCP name is generic", () => { + const projectRoot = makeProject(); + try { + writeFileSync( + join(projectRoot, ".mcp.json"), + `${JSON.stringify({ + mcpServers: { + project: { + command: "/opt/project-rag/target/release/project-rag", + }, + }, + }, null, 2)}\n`, + "utf-8", + ); + + const detection = detectProjectRag(projectRoot); + assert.equal(detection.status, "configured"); + assert.equal(detection.serverName, "project"); + assert.match(detection.command ?? "", /project-rag/); + } finally { + cleanup(projectRoot); + } +}); + +test("detectProjectRag honors explicit server preference and disabled mode", () => { + const projectRoot = makeProject(); + try { + writeFileSync( + join(projectRoot, ".mcp.json"), + `${JSON.stringify({ + mcpServers: { + code: { + command: "/bin/project-rag", + }, + }, + }, null, 2)}\n`, + "utf-8", + ); + + assert.equal(detectProjectRag(projectRoot, { project_rag_server: "code" }).serverName, "code"); + assert.equal(detectProjectRag(projectRoot, { project_rag: "off" }).status, "disabled"); + } finally { + cleanup(projectRoot); + } +}); + +test("ensureProjectRagMcpConfig preserves existing servers and writes project-rag", () => { + const projectRoot = makeProject(); + try { + writeFileSync( + join(projectRoot, ".mcp.json"), + `${JSON.stringify({ + mcpServers: { + existing: { + command: "npx", + args: ["other-mcp"], + }, + }, + }, null, 2)}\n`, + "utf-8", + ); + + const result = ensureProjectRagMcpConfig(projectRoot, { + ...process.env, + SF_PROJECT_RAG_BIN: "/tmp/project-rag", + RUST_LOG: "warn", + }); + assert.equal(result.status, "updated"); + assert.equal(result.serverName, PROJECT_RAG_MCP_SERVER_NAME); + + const parsed = JSON.parse(readFileSync(result.configPath, "utf-8")) as { + mcpServers?: Record }>; + }; + assert.deepEqual(parsed.mcpServers?.existing, { + command: "npx", + args: ["other-mcp"], + }); + assert.equal(parsed.mcpServers?.[PROJECT_RAG_MCP_SERVER_NAME]?.command, "/tmp/project-rag"); + assert.equal(parsed.mcpServers?.[PROJECT_RAG_MCP_SERVER_NAME]?.env?.RUST_LOG, "warn"); + } finally { + cleanup(projectRoot); + } +}); + +test("resolveProjectRagBinaryForProject finds a vendored release binary", () => { + const projectRoot = makeProject(); + try { + const sourceDir = join(projectRoot, "vendor", "project-rag"); + const binaryPath = join(sourceDir, "target", "release", process.platform === "win32" ? "project-rag.exe" : "project-rag"); + mkdirSync(join(sourceDir, "target", "release"), { recursive: true }); + writeFileSync(join(sourceDir, "Cargo.toml"), '[package]\nname = "project-rag"\n', "utf-8"); + writeFileSync(binaryPath, "", "utf-8"); + + assert.equal(findProjectRagSourceDir(projectRoot), sourceDir); + assert.equal(resolveProjectRagBinaryForProject(projectRoot, { PATH: "" }), binaryPath); + } finally { + cleanup(projectRoot); + } +}); + +test("ensureProjectRagMcpConfig uses vendored release binary when available", () => { + const projectRoot = makeProject(); + try { + const sourceDir = join(projectRoot, "vendor", "project-rag"); + const binaryPath = join(sourceDir, "target", "release", process.platform === "win32" ? "project-rag.exe" : "project-rag"); + mkdirSync(join(sourceDir, "target", "release"), { recursive: true }); + writeFileSync(join(sourceDir, "Cargo.toml"), '[package]\nname = "project-rag"\n', "utf-8"); + writeFileSync(binaryPath, "", "utf-8"); + + const result = ensureProjectRagMcpConfig(projectRoot, { PATH: "" }); + const parsed = JSON.parse(readFileSync(result.configPath, "utf-8")) as { + mcpServers?: Record; + }; + assert.equal(parsed.mcpServers?.[PROJECT_RAG_MCP_SERVER_NAME]?.command, binaryPath); + } finally { + cleanup(projectRoot); + } +}); + +test("resolveProjectRagBuildJobs caps project-rag builds by default and supports overrides", () => { + assert.equal(resolveProjectRagBuildJobs({}), "2"); + assert.equal(resolveProjectRagBuildJobs({ CARGO_BUILD_JOBS: "4" }), "4"); + assert.equal(resolveProjectRagBuildJobs({ CARGO_BUILD_JOBS: "4", SF_PROJECT_RAG_BUILD_JOBS: "1" }), "1"); + assert.equal(resolveProjectRagBuildJobs({ SF_PROJECT_RAG_BUILD_JOBS: "nope" }), "2"); +}); + +test("formatProjectRagStatus reports source-only project-rag as not operational", () => { + const projectRoot = makeProject(); + try { + const sourceDir = join(projectRoot, "vendor", "project-rag"); + mkdirSync(sourceDir, { recursive: true }); + writeFileSync(join(sourceDir, "Cargo.toml"), '[package]\nname = "project-rag"\n', "utf-8"); + + const status = formatProjectRagStatus(projectRoot); + assert.match(status, /Source:/); + assert.match(status, /release binary is missing/i); + assert.match(status, /\/sf codebase rag build/i); + } finally { + cleanup(projectRoot); + } +}); + +test("buildCodeIntelligenceContextBlock injects project-rag usage guidance when configured", () => { + const projectRoot = makeProject(); + try { + writeFileSync( + join(projectRoot, ".mcp.json"), + `${JSON.stringify({ + mcpServers: { + "project-rag": { + command: "/tmp/project-rag", + }, + }, + }, null, 2)}\n`, + "utf-8", + ); + + const block = buildCodeIntelligenceContextBlock(projectRoot); + assert.match(block, /PROJECT CODE INTELLIGENCE/); + assert.match(block, /Project RAG: configured/); + assert.match(block, /query_codebase/); + assert.match(block, /search_by_filters/); + assert.match(block, /If any Project RAG tool is missing or fails/); + } finally { + cleanup(projectRoot); + } +}); diff --git a/src/resources/extensions/sf/tests/complete-slice-composer.test.ts b/src/resources/extensions/sf/tests/complete-slice-composer.test.ts new file mode 100644 index 000000000..052168c75 --- /dev/null +++ b/src/resources/extensions/sf/tests/complete-slice-composer.test.ts @@ -0,0 +1,147 @@ +// GSD-2 — #4782 phase 3 batch 3: complete-slice migrated through composer. + +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { buildCompleteSlicePrompt } from "../auto-prompts.ts"; +import { invalidateAllCaches } from "../cache.ts"; +import { + openDatabase, + closeDatabase, + insertMilestone, + upsertMilestonePlanning, + insertSlice, + insertTask, +} from "../sf-db.ts"; + +function makeBase(): string { + const base = mkdtempSync(join(tmpdir(), "gsd-completeslice-composer-")); + mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks"), { recursive: true }); + return base; +} + +function cleanup(base: string): void { + try { closeDatabase(); } catch { /* noop */ } + invalidateAllCaches(); + rmSync(base, { recursive: true, force: true }); +} + +function seed(base: string, mid: string): void { + openDatabase(join(base, ".gsd", "gsd.db")); + insertMilestone({ id: mid, title: "Composer Test", status: "active", depends_on: [] }); + upsertMilestonePlanning(mid, { + title: "Composer Test", + status: "active", + vision: "Validate complete-slice migration", + successCriteria: ["Prompt compiles"], + keyRisks: [], + proofStrategy: [], + verificationContract: "", + verificationIntegration: "", + verificationOperational: "", + verificationUat: "", + definitionOfDone: [], + requirementCoverage: "", + boundaryMapMarkdown: "", + }); + insertSlice({ + id: "S01", + milestoneId: mid, + title: "First", + status: "complete", + risk: "low", + depends: [], + demo: "", + sequence: 1, + }); + insertTask({ + id: "T01", + sliceId: "S01", + milestoneId: mid, + title: "Task one", + status: "complete", + }); +} + +function writeArtifacts(base: string): void { + writeFileSync( + join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), + "# M001 Roadmap\n## Slices\n- [x] **S01: First** `risk:low` `depends:[]`\n", + ); + writeFileSync( + join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md"), + "# S01 Plan\n\nSlice plan body.\n", + ); + writeFileSync( + join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md"), + "---\nid: T01\n---\n# T01 Summary\n\nTask one did the thing.\n", + ); +} + +test("#4782 phase 3: buildCompleteSlicePrompt composes roadmap → plan → task summaries → templates in declared order", async (t) => { + const base = makeBase(); + t.after(() => cleanup(base)); + invalidateAllCaches(); + + seed(base, "M001"); + writeArtifacts(base); + + const prompt = await buildCompleteSlicePrompt("M001", "Composer Test", "S01", "First", base); + + // Context wrapper present + assert.match(prompt, /## Inlined Context \(preloaded — do not re-read these files\)/); + + // Manifest-declared artifacts present + assert.match(prompt, /### Milestone Roadmap/); + assert.match(prompt, /### Slice Plan/); + assert.match(prompt, /### Task Summary: T01/); + assert.match(prompt, /### Output Template: Slice Summary/); + + // Ordering: roadmap → slice plan → task summaries → slice summary template + const roadmapIdx = prompt.indexOf("### Milestone Roadmap"); + const planIdx = prompt.indexOf("### Slice Plan"); + const taskSummaryIdx = prompt.indexOf("### Task Summary: T01"); + const sliceSummaryTemplateIdx = prompt.indexOf("### Output Template: Slice Summary"); + + assert.ok(roadmapIdx > -1 && planIdx > roadmapIdx, "roadmap precedes slice plan"); + assert.ok(planIdx > -1 && taskSummaryIdx > planIdx, "slice plan precedes task summaries"); + assert.ok( + taskSummaryIdx > -1 && sliceSummaryTemplateIdx > taskSummaryIdx, + "task summaries precede slice-summary template", + ); + + // Task body inlined + assert.match(prompt, /Task one did the thing/); +}); + +test("#4782 phase 3: buildCompleteSlicePrompt handles missing task summaries gracefully", async (t) => { + const base = makeBase(); + t.after(() => cleanup(base)); + invalidateAllCaches(); + + seed(base, "M001"); + // Write roadmap + plan but no task summaries + writeFileSync( + join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), + "# M001 Roadmap\n## Slices\n- [x] **S01: First** `risk:low` `depends:[]`\n", + ); + writeFileSync( + join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md"), + "# S01 Plan\n", + ); + + const prompt = await buildCompleteSlicePrompt("M001", "Composer Test", "S01", "First", base); + + // Still succeeds — prior-task-summaries resolver returns null when dir is empty + assert.match(prompt, /### Milestone Roadmap/); + assert.match(prompt, /### Slice Plan/); + // No task summary blocks — they'd have a "### Task Summary:" prefix + assert.ok(!prompt.includes("### Task Summary:")); + // Roadmap still precedes slice plan despite the missing block + const roadmapIdx = prompt.indexOf("### Milestone Roadmap"); + const planIdx = prompt.indexOf("### Slice Plan"); + assert.ok(roadmapIdx > -1 && planIdx > roadmapIdx); +}); diff --git a/src/resources/extensions/sf/tests/remote-questions-manager.test.ts b/src/resources/extensions/sf/tests/remote-questions-manager.test.ts new file mode 100644 index 000000000..a1233bb8e --- /dev/null +++ b/src/resources/extensions/sf/tests/remote-questions-manager.test.ts @@ -0,0 +1,69 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { tryAutoResolveQuestions } from "../../remote-questions/manager.ts"; +import { isUsableRemoteQuestionResult } from "../../ask-user-questions.ts"; + +test("remote question timeout auto-resolution picks the single recommended option for each single-select question", () => { + const answer = tryAutoResolveQuestions([ + { + id: "lane", + header: "Lane", + question: "Which lane?", + options: [ + { label: "Use the safer path (Recommended)", description: "Lower risk" }, + { label: "Use the aggressive path", description: "Higher risk" }, + ], + }, + { + id: "depth", + header: "Depth", + question: "How deep?", + options: [ + { label: "Standard (Recommended)", description: "Balanced" }, + { label: "Thorough", description: "Deeper" }, + ], + }, + ]); + + assert.deepEqual(answer, { + answers: { + lane: { answers: ["Use the safer path (Recommended)"] }, + depth: { answers: ["Standard (Recommended)"] }, + }, + }); +}); + +test("remote question timeout auto-resolution refuses ambiguous or multi-select prompts", () => { + assert.equal(tryAutoResolveQuestions([ + { + id: "ambiguous", + header: "Ambiguous", + question: "Pick one", + options: [ + { label: "Alpha (Recommended)", description: "A" }, + { label: "Beta (Recommended)", description: "B" }, + ], + }, + ]), null); + + assert.equal(tryAutoResolveQuestions([ + { + id: "multi", + header: "Multi", + question: "Pick many", + allowMultiple: true, + options: [ + { label: "Alpha (Recommended)", description: "A" }, + { label: "Beta", description: "B" }, + ], + }, + ]), null); +}); + +test("ask_user_questions treats timeout auto-resolution as a usable remote answer", () => { + assert.equal(isUsableRemoteQuestionResult({ timed_out: true, autoResolved: true }), true); + assert.equal(isUsableRemoteQuestionResult({ timed_out: true }), false); + assert.equal(isUsableRemoteQuestionResult({ error: true, autoResolved: true }), false); + assert.equal(isUsableRemoteQuestionResult({ cancelled: true, autoResolved: true }), false); +}); diff --git a/src/resources/extensions/sf/tests/research-milestone-composer.test.ts b/src/resources/extensions/sf/tests/research-milestone-composer.test.ts new file mode 100644 index 000000000..96311d9a2 --- /dev/null +++ b/src/resources/extensions/sf/tests/research-milestone-composer.test.ts @@ -0,0 +1,97 @@ +// GSD-2 — #4782 phase 3 batch 2: research-milestone migrated through composer. + +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { buildResearchMilestonePrompt } from "../auto-prompts.ts"; +import { invalidateAllCaches } from "../cache.ts"; +import { + openDatabase, + closeDatabase, + insertMilestone, + upsertMilestonePlanning, +} from "../sf-db.ts"; + +function makeBase(): string { + const base = mkdtempSync(join(tmpdir(), "gsd-research-ms-composer-")); + mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true }); + return base; +} + +function cleanup(base: string): void { + try { closeDatabase(); } catch { /* noop */ } + invalidateAllCaches(); + rmSync(base, { recursive: true, force: true }); +} + +function seed(base: string, mid: string): void { + openDatabase(join(base, ".gsd", "gsd.db")); + insertMilestone({ id: mid, title: "Research Test", status: "active", depends_on: [] }); + upsertMilestonePlanning(mid, { + title: "Research Test", + status: "active", + vision: "Research composer migration", + successCriteria: ["Prompt compiles"], + keyRisks: [], + proofStrategy: [], + verificationContract: "", + verificationIntegration: "", + verificationOperational: "", + verificationUat: "", + definitionOfDone: [], + requirementCoverage: "", + boundaryMapMarkdown: "", + }); +} + +test("#4782 phase 3: buildResearchMilestonePrompt emits milestone-context then research template via composer", async (t) => { + const base = makeBase(); + t.after(() => cleanup(base)); + invalidateAllCaches(); + + seed(base, "M001"); + + writeFileSync( + join(base, ".gsd", "milestones", "M001", "M001-CONTEXT.md"), + "# M001 Context\n\nA research test milestone.\n", + ); + + const prompt = await buildResearchMilestonePrompt("M001", "Research Test", base); + + // Context wrapper present + assert.match(prompt, /## Inlined Context \(preloaded — do not re-read these files\)/); + + // Milestone context inlined first (manifest order) + assert.match(prompt, /### Milestone Context/); + assert.match(prompt, /A research test milestone/); + + // Research template inlined as the templates artifact + assert.match(prompt, /### Output Template: Research/); + + // Ordering: milestone-context precedes the research template + const contextIdx = prompt.indexOf("### Milestone Context"); + const researchIdx = prompt.indexOf("### Output Template: Research"); + assert.ok(contextIdx > -1 && researchIdx > contextIdx, + `milestone-context (${contextIdx}) must precede research template (${researchIdx})`); +}); + +test("#4782 phase 3: buildResearchMilestonePrompt still includes project + requirements + decisions in declared order", async (t) => { + const base = makeBase(); + t.after(() => cleanup(base)); + invalidateAllCaches(); + + seed(base, "M001"); + writeFileSync(join(base, ".gsd", "milestones", "M001", "M001-CONTEXT.md"), "# M001 Context\n"); + + const prompt = await buildResearchMilestonePrompt("M001", "Research Test", base); + + // Manifest-declared order: milestone-context, project, requirements, decisions, templates. + // Any projections that resolve to content must preserve that order. + const contextIdx = prompt.indexOf("### Milestone Context"); + const researchIdx = prompt.indexOf("### Output Template: Research"); + assert.ok(contextIdx > -1 && researchIdx > contextIdx, + "milestone-context must come before research template regardless of which optional artifacts are present"); +}); diff --git a/src/resources/extensions/sf/tests/run-uat-composer.test.ts b/src/resources/extensions/sf/tests/run-uat-composer.test.ts new file mode 100644 index 000000000..d01d2b876 --- /dev/null +++ b/src/resources/extensions/sf/tests/run-uat-composer.test.ts @@ -0,0 +1,113 @@ +// GSD-2 — #4782 phase 3: run-uat migrated to compose context via manifest. +// Regression test: prompt still carries the declared artifacts in the +// expected shape after the migration. + +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { buildRunUatPrompt } from "../auto-prompts.ts"; +import { invalidateAllCaches } from "../cache.ts"; +import { + openDatabase, + closeDatabase, + insertMilestone, + upsertMilestonePlanning, + insertSlice, +} from "../sf-db.ts"; + +function makeBase(): string { + const base = mkdtempSync(join(tmpdir(), "gsd-runuat-composer-")); + mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks"), { recursive: true }); + return base; +} + +function cleanup(base: string): void { + try { closeDatabase(); } catch { /* noop */ } + invalidateAllCaches(); + rmSync(base, { recursive: true, force: true }); +} + +function seed(base: string, mid: string): void { + openDatabase(join(base, ".gsd", "gsd.db")); + insertMilestone({ id: mid, title: "Test", status: "active", depends_on: [] }); + upsertMilestonePlanning(mid, { + title: "Test Milestone", + status: "active", + vision: "Demo the composer migration", + successCriteria: ["Prompt compiles", "UAT passes"], + keyRisks: [], + proofStrategy: [], + verificationContract: "", + verificationIntegration: "", + verificationOperational: "", + verificationUat: "", + definitionOfDone: [], + requirementCoverage: "", + boundaryMapMarkdown: "", + }); + insertSlice({ + id: "S01", + milestoneId: mid, + title: "First", + status: "complete", + risk: "low", + depends: [], + demo: "", + sequence: 1, + }); +} + +test("#4782 phase 3: buildRunUatPrompt inlines slice UAT, slice summary, project via composer", async (t) => { + const base = makeBase(); + t.after(() => cleanup(base)); + invalidateAllCaches(); + + seed(base, "M001"); + + // Write UAT + SUMMARY files for the slice + const uatRel = ".gsd/milestones/M001/slices/S01/S01-UAT.md"; + writeFileSync(join(base, uatRel), "# S01 UAT\n\n- Check X\n- Check Y\n"); + writeFileSync( + join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md"), + "---\nid: S01\nparent: M001\n---\n# S01 Summary\n**One-liner**\n\n## What Happened\nShip.\n", + ); + + const uatContent = "# S01 UAT\n\n- Check X\n- Check Y\n"; + const prompt = await buildRunUatPrompt("M001", "S01", uatRel, uatContent, base); + + // Context wrapper present + assert.match(prompt, /## Inlined Context \(preloaded — do not re-read these files\)/); + + // Artifacts from the manifest inline list, in declared order + assert.match(prompt, /### S01 UAT[\s\S]*### S01 Summary/); + + // UAT body content inlined + assert.match(prompt, /Check X[\s\S]*Check Y/); + + // Summary body content inlined + assert.match(prompt, /What Happened[\s\S]*Ship/); +}); + +test("#4782 phase 3: buildRunUatPrompt omits optional slice summary when file is missing", async (t) => { + const base = makeBase(); + t.after(() => cleanup(base)); + invalidateAllCaches(); + + seed(base, "M001"); + + const uatRel = ".gsd/milestones/M001/slices/S01/S01-UAT.md"; + writeFileSync(join(base, uatRel), "# S01 UAT\n"); + // No SUMMARY.md written — composer should skip the slice-summary key. + + const prompt = await buildRunUatPrompt("M001", "S01", uatRel, "# S01 UAT\n", base); + + // UAT still present + assert.match(prompt, /### S01 UAT/); + // No empty "S01 Summary" section — section body would be blank without a file + assert.ok(!prompt.includes("### S01 Summary")); + // No double separator from a skipped block + assert.ok(!prompt.includes("---\n\n---")); +}); diff --git a/src/resources/extensions/sf/tests/unit-context-composer.test.ts b/src/resources/extensions/sf/tests/unit-context-composer.test.ts new file mode 100644 index 000000000..8985a16d5 --- /dev/null +++ b/src/resources/extensions/sf/tests/unit-context-composer.test.ts @@ -0,0 +1,175 @@ +// GSD-2 — #4782 phase 2 composer tests. Pure-function tests using mock +// resolvers plus an integration check that reassess-roadmap's migrated +// builder produces a prompt matching expectations. + +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { + composeInlinedContext, + manifestBudgetChars, + type ArtifactResolver, +} from "../unit-context-composer.ts"; +import type { ArtifactKey } from "../unit-context-manifest.ts"; +import { buildReassessRoadmapPrompt } from "../auto-prompts.ts"; +import { invalidateAllCaches } from "../cache.ts"; +import { + openDatabase, + closeDatabase, + insertMilestone, + upsertMilestonePlanning, + insertSlice, +} from "../sf-db.ts"; + +// ─── Pure composer tests ────────────────────────────────────────────────── + +test("#4782 composer: returns empty string for unknown unit type", async () => { + const out = await composeInlinedContext("never-dispatched", async () => "body"); + assert.strictEqual(out, ""); +}); + +test("#4782 composer: walks the manifest's inline list in declared order", async () => { + // reassess-roadmap manifest: [roadmap, slice-context, slice-summary, project, requirements, decisions] + const calls: ArtifactKey[] = []; + const resolver: ArtifactResolver = async (key) => { + calls.push(key); + return `BODY:${key}`; + }; + const out = await composeInlinedContext("reassess-roadmap", resolver); + assert.deepEqual(calls, [ + "roadmap", + "slice-context", + "slice-summary", + "project", + "requirements", + "decisions", + ]); + // Output joins blocks with the "---" separator. + assert.match(out, /BODY:roadmap\n\n---\n\nBODY:slice-context/); +}); + +test("#4782 composer: null-returning resolvers are silently omitted", async () => { + const resolver: ArtifactResolver = async (key) => { + if (key === "slice-context" || key === "project") return null; + return `BODY:${key}`; + }; + const out = await composeInlinedContext("reassess-roadmap", resolver); + // slice-context + project skipped — not in output, no empty blocks + assert.ok(!out.includes("BODY:slice-context")); + assert.ok(!out.includes("BODY:project")); + // Remaining keys still emitted in declared order + assert.match(out, /BODY:roadmap\n\n---\n\nBODY:slice-summary\n\n---\n\nBODY:requirements\n\n---\n\nBODY:decisions/); +}); + +test("#4782 composer: empty-string resolvers are omitted (treated as no-op)", async () => { + const resolver: ArtifactResolver = async (key) => { + if (key === "slice-context") return ""; + if (key === "slice-summary") return null; + return `BODY:${key}`; + }; + const out = await composeInlinedContext("reassess-roadmap", resolver); + assert.ok(!out.includes("BODY:slice-context")); + assert.ok(!out.includes("BODY:slice-summary")); + // Must not leave double-separators when blocks are skipped + assert.ok(!out.includes("---\n\n---")); +}); + +test("#4782 composer: resolver errors surface to caller", async () => { + const resolver: ArtifactResolver = async () => { + throw new Error("resolver boom"); + }; + await assert.rejects( + () => composeInlinedContext("reassess-roadmap", resolver), + /resolver boom/, + ); +}); + +test("#4782 composer: manifestBudgetChars returns declared budget", () => { + const small = manifestBudgetChars("reassess-roadmap"); + assert.ok(small !== null && small > 0); + assert.strictEqual(manifestBudgetChars("never-dispatched"), null); +}); + +// ─── Integration: migrated buildReassessRoadmapPrompt ───────────────────── + +function makeFixtureBase(): string { + const base = mkdtempSync(join(tmpdir(), "gsd-composer-pilot-")); + mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks"), { recursive: true }); + return base; +} + +function cleanup(base: string): void { + try { closeDatabase(); } catch { /* noop */ } + invalidateAllCaches(); + rmSync(base, { recursive: true, force: true }); +} + +function seed(base: string, mid: string): void { + openDatabase(join(base, ".gsd", "gsd.db")); + insertMilestone({ id: mid, title: "Test", status: "active", depends_on: [] }); + upsertMilestonePlanning(mid, { + title: "Test", + status: "active", + vision: "Ship it", + successCriteria: ["It ships"], + keyRisks: [], + proofStrategy: [], + verificationContract: "", + verificationIntegration: "", + verificationOperational: "", + verificationUat: "", + definitionOfDone: [], + requirementCoverage: "", + boundaryMapMarkdown: "", + }); + insertSlice({ + id: "S01", + milestoneId: mid, + title: "First", + status: "complete", + risk: "low", + depends: [], + demo: "", + sequence: 1, + }); +} + +function writeArtifacts(base: string): void { + writeFileSync( + join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), + "# M001\n## Slices\n- [x] **S01: First** `risk:low` `depends:[]`\n", + ); + writeFileSync( + join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md"), + "---\nid: S01\nparent: M001\n---\n# S01 Summary\n**One-liner**\n\n## What Happened\nDone.\n", + ); +} + +test("#4782 phase 2: buildReassessRoadmapPrompt emits composer-shaped context with manifest-declared artifacts", async (t) => { + const base = makeFixtureBase(); + t.after(() => cleanup(base)); + invalidateAllCaches(); + + seed(base, "M001"); + writeArtifacts(base); + + const prompt = await buildReassessRoadmapPrompt("M001", "Test", "S01", base); + + // Context block wrapper from capPreamble + assert.match(prompt, /## Inlined Context \(preloaded — do not re-read these files\)/); + + // Roadmap inlined first (manifest order) + assert.match(prompt, /### Current Roadmap/); + assert.match(prompt, /S01: First/); + + // Slice summary present + assert.match(prompt, /### S01 Summary/); + assert.match(prompt, /One-liner/); + + // Slice context is optional and not present in this fixture — must not + // leave a stray empty section + assert.ok(!prompt.includes("Slice Context (from discussion)")); +}); diff --git a/src/resources/extensions/sf/tests/unit-context-manifest.test.ts b/src/resources/extensions/sf/tests/unit-context-manifest.test.ts new file mode 100644 index 000000000..4ca746841 --- /dev/null +++ b/src/resources/extensions/sf/tests/unit-context-manifest.test.ts @@ -0,0 +1,169 @@ +// GSD-2 — #4782 phase 1: schema tests + CI coverage guard for manifests. + +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { + ARTIFACT_KEYS, + KNOWN_UNIT_TYPES, + UNIT_MANIFESTS, + resolveManifest, + type ArtifactKey, + type SkillsPolicy, + type UnitContextManifest, +} from "../unit-context-manifest.ts"; + +// ─── Coverage: every known unit type has a manifest ────────────────────── + +test("#4782 phase 1: every KNOWN_UNIT_TYPES entry has a UNIT_MANIFESTS entry", () => { + for (const unitType of KNOWN_UNIT_TYPES) { + assert.ok( + UNIT_MANIFESTS[unitType], + `unit type "${unitType}" is declared in KNOWN_UNIT_TYPES but has no manifest`, + ); + } +}); + +test("#4782 phase 1: every UNIT_MANIFESTS entry corresponds to a known unit type", () => { + const known = new Set(KNOWN_UNIT_TYPES as readonly string[]); + for (const unitType of Object.keys(UNIT_MANIFESTS)) { + assert.ok( + known.has(unitType), + `manifest entry "${unitType}" is not in KNOWN_UNIT_TYPES — add it there or remove the manifest`, + ); + } +}); + +// ─── Coverage: every unitType stringly-typed in auto-dispatch.ts is known ─ + +test("#4782 phase 1: every unitType string in auto-dispatch.ts has a manifest", () => { + // Source-only coverage check — read the dispatcher and enumerate its + // unitType literals. This is a CI guard against manifest drift: if a + // new dispatch rule is added without a corresponding manifest entry, + // this test fails loudly. Read-only check of source text; the cheapest + // way to enumerate declared unit types without running the dispatcher. + // allow-source-grep: enumerate unitType literals for CI coverage guard + const __dirname = dirname(fileURLToPath(import.meta.url)); + const dispatchSrc = readFileSync(join(__dirname, "..", "auto-dispatch.ts"), "utf-8"); + const matches = Array.from(dispatchSrc.matchAll(/unitType:\s*"([^"]+)"/g)); + const seen = new Set(); + for (const m of matches) { + const t = m[1]; + if (!t) continue; + seen.add(t); + } + const missing: string[] = []; + for (const t of seen) { + if (!UNIT_MANIFESTS[t as keyof typeof UNIT_MANIFESTS]) { + missing.push(t); + } + } + assert.deepEqual(missing, [], `unit types dispatched in auto-dispatch.ts but missing from UNIT_MANIFESTS: ${missing.join(", ")}`); +}); + +// ─── Shape: every manifest conforms to the schema invariants ────────────── + +test("#4782 phase 1: every manifest's artifacts reference known ArtifactKey values", () => { + const validKeys = new Set(ARTIFACT_KEYS as readonly string[]); + for (const [unitType, manifest] of Object.entries(UNIT_MANIFESTS)) { + const all: ArtifactKey[] = [ + ...manifest.artifacts.inline, + ...manifest.artifacts.excerpt, + ...manifest.artifacts.onDemand, + ]; + for (const key of all) { + assert.ok( + validKeys.has(key), + `manifest "${unitType}" references unknown artifact key "${key}"`, + ); + } + } +}); + +test("#4782 phase 1: no manifest has the same artifact key in inline AND excerpt (mutually exclusive)", () => { + for (const [unitType, manifest] of Object.entries(UNIT_MANIFESTS)) { + const inline = new Set(manifest.artifacts.inline as readonly string[]); + const clashes = (manifest.artifacts.excerpt as readonly string[]).filter(k => inline.has(k)); + assert.deepEqual( + clashes, + [], + `manifest "${unitType}" has overlapping inline+excerpt artifact keys: ${clashes.join(", ")}. Pick one.`, + ); + } +}); + +test("#4782 phase 1: every manifest has a positive maxSystemPromptChars", () => { + for (const [unitType, manifest] of Object.entries(UNIT_MANIFESTS)) { + assert.ok( + typeof manifest.maxSystemPromptChars === "number" && manifest.maxSystemPromptChars > 0, + `manifest "${unitType}" has invalid maxSystemPromptChars: ${manifest.maxSystemPromptChars}`, + ); + } +}); + +test("#4782 phase 1: skills policy shapes are valid discriminated-union members", () => { + for (const [unitType, manifest] of Object.entries(UNIT_MANIFESTS)) { + const p = manifest.skills as SkillsPolicy; + switch (p.mode) { + case "none": + case "all": + break; + case "allowlist": + assert.ok( + Array.isArray(p.skills) && p.skills.every(s => typeof s === "string"), + `manifest "${unitType}" has allowlist policy with invalid skills[]`, + ); + break; + default: { + const _exhaustive: never = p; + void _exhaustive; + assert.fail(`manifest "${unitType}" has unrecognized skills.mode`); + } + } + } +}); + +// ─── Lookup helper ──────────────────────────────────────────────────────── + +test("#4782 phase 1: resolveManifest returns null for an unknown unit type", () => { + assert.strictEqual(resolveManifest("never-dispatched-unit-type"), null); +}); + +test("#4782 phase 1: resolveManifest returns a manifest for every known unit type", () => { + for (const unitType of KNOWN_UNIT_TYPES) { + const m = resolveManifest(unitType); + assert.ok(m, `resolveManifest("${unitType}") should return a manifest`); + // Identity check — the helper should return the exact object, not a copy. + assert.strictEqual(m, UNIT_MANIFESTS[unitType]); + } +}); + +// ─── Phase-2 target: complete-milestone manifest reflects #4780's excerpt shape ─ + +test("#4782 phase 1: complete-milestone manifest declares slice-summary as excerpt (matches #4780)", () => { + const m = UNIT_MANIFESTS["complete-milestone"]; + assert.ok( + m.artifacts.excerpt.includes("slice-summary"), + "complete-milestone should declare slice-summary as excerpt (alignment with #4780)", + ); + assert.ok( + !m.artifacts.inline.includes("slice-summary"), + "complete-milestone should NOT declare slice-summary as inline — that was the #4780 bloat", + ); +}); + +// ─── Phase-2 target: reassess-roadmap manifest is the tightest budget ──── + +test("#4782 phase 1: reassess-roadmap manifest has the smallest budget among manifests", () => { + const m = UNIT_MANIFESTS["reassess-roadmap"]; + for (const [unitType, other] of Object.entries(UNIT_MANIFESTS)) { + if (unitType === "reassess-roadmap") continue; + assert.ok( + m.maxSystemPromptChars <= other.maxSystemPromptChars, + `reassess-roadmap budget (${m.maxSystemPromptChars}) should be ≤ ${unitType} budget (${other.maxSystemPromptChars})`, + ); + } +}); diff --git a/src/resources/extensions/sf/types.ts b/src/resources/extensions/sf/types.ts index 4142635fa..951a43443 100644 --- a/src/resources/extensions/sf/types.ts +++ b/src/resources/extensions/sf/types.ts @@ -338,6 +338,8 @@ export interface PhaseSkipPreferences { reassess_after_slice?: boolean; /** When true, auto-mode pauses before each slice for discussion (#789). */ require_slice_discussion?: boolean; + /** ADR-011 Phase 2: when true, mid-execution escalation overrides are injected into the execute-task prompt. */ + mid_execution_escalation?: boolean; } export interface NotificationPreferences { @@ -647,3 +649,27 @@ export interface GateEvaluationConfig { /** Whether to evaluate task-level gates (Q5/Q6/Q7) via reactive-execute. Default: true when enabled. */ task_gates?: boolean; } + +// ─── ADR-011 Phase 2 Escalation ────────────────────────────────────────── + +export interface EscalationOption { + id: string; + label: string; + tradeoffs: string; +} + +export interface EscalationArtifact { + version: 1; + taskId: string; + sliceId: string; + milestoneId: string; + question: string; + options: EscalationOption[]; + recommendation: string; + recommendationRationale: string; + continueWithDefault: boolean; + createdAt: string; + respondedAt?: string; + userChoice?: string; + userRationale?: string; +} diff --git a/src/resources/extensions/sf/unit-context-composer.ts b/src/resources/extensions/sf/unit-context-composer.ts new file mode 100644 index 000000000..8306171a8 --- /dev/null +++ b/src/resources/extensions/sf/unit-context-composer.ts @@ -0,0 +1,197 @@ +// GSD-2 — UnitContextComposer (#4782 phase 2). +// +// Reads a unit type's manifest and orchestrates artifact inlining through +// a caller-provided resolver. Returns a joined context block suitable for +// substitution into the unit's prompt template. +// +// Design rationale: +// - Pure dependency on the manifest module — no circular import with +// `auto-prompts.ts` where the per-artifact-key resolver lives. +// - Caller-supplied resolver means the composer can be unit-tested with +// trivial mocks; production wiring in `auto-prompts.ts` dispatches to +// the existing `inlineFile` / `inline*FromDb` helpers. +// - Null-returning resolvers are skipped silently: they model the +// "artifact is optional / missing / not applicable to this milestone" +// case. The composer never errors on a missing artifact. +// +// Scope: phase 2 pilot shipped `composeInlinedContext` for static-key +// inlining. Phase 3.5 (#4924) adds the v2 surface — `composeUnitContext` +// — which also handles excerpts, computed artifacts, and prepended blocks. +// `composeInlinedContext` stays for backward compatibility with the +// already-migrated simple builders. +// +// ─── Composer boundary invariant (#4924) ───────────────────────────────── +// +// The composer is allowed to: +// - order named sections per the manifest's declared sequence +// - resolve registered artifacts (static / computed / excerpt / on-demand) +// - apply typed policies (knowledge / memory / codebase-map / preferences) +// +// The composer must NOT grow: +// - arbitrary conditionals on unit state +// - loops over caller-supplied data +// - string templating beyond section composition (join + separator) +// +// Logic that needs those belongs in a typed computed-artifact builder +// owned by the unit, not in the composer. Reviews must enforce this — it +// is the difference between an orchestrator and a runaway DSL. + +import { + resolveManifest, + type ArtifactKey, + type BaseResolverContext, + type ComputedArtifactId, + type ComputedArtifactRegistry, + type UnitContextManifest, +} from "./unit-context-manifest.js"; + +/** + * Async function mapping an artifact key to its inlined-content string, + * or `null` when the artifact does not apply to the current milestone + * (missing file, empty table, etc). + */ +export type ArtifactResolver = (key: ArtifactKey) => Promise; + +/** + * Produce the inlined-context portion of a unit's system prompt by + * walking the manifest's `artifacts.inline` list in order and calling + * the provided resolver for each key. + * + * Returns an empty string when the unit type has no manifest registered, + * so callers can guard their wiring with a simple truthy check. Unknown + * unit types do not error — this mirrors `resolveManifest`'s contract. + * + * The separator between inlined blocks matches the in-tree convention + * (`\n\n---\n\n`) so composer output slots into existing prompt templates + * without visible diff. + */ +export async function composeInlinedContext( + unitType: string, + resolveArtifact: ArtifactResolver, +): Promise { + const manifest: UnitContextManifest | null = resolveManifest(unitType); + if (!manifest) return ""; + + const blocks: string[] = []; + for (const key of manifest.artifacts.inline) { + const body = await resolveArtifact(key); + if (body !== null && body.length > 0) { + blocks.push(body); + } + } + return blocks.join("\n\n---\n\n"); +} + +/** + * Convenience helper returning the manifest's declared budget so callers + * can telemetry a mismatch between actual prompt size and declared budget. + * Returns null for unknown unit types. + */ +export function manifestBudgetChars(unitType: string): number | null { + const manifest = resolveManifest(unitType); + return manifest ? manifest.maxSystemPromptChars : null; +} + +// ─── v2 surface (#4924) ─────────────────────────────────────────────────── + +/** + * Resolver for excerpt-class artifacts. Returns the compact block body + * (per-unit excerpt rendering — e.g. `buildSliceSummaryExcerpt` for the + * complete-milestone closer) or `null` to omit. Mirrors `ArtifactResolver` + * shape so consumers can reuse the same registry pattern. + */ +export type ExcerptResolver = (key: ArtifactKey) => Promise; + +/** + * Inputs to the v2 composer entrypoint. The base context is required; + * each resolver/registry is optional and absent ones are treated as + * "manifest declares no entries of that class for this unit." + */ +export interface ComposeUnitContextOptions { + readonly base: BaseResolverContext; + readonly resolveArtifact?: ArtifactResolver; + readonly resolveExcerpt?: ExcerptResolver; + readonly computed?: ComputedArtifactRegistry; +} + +/** + * Composer output. Kept structured (rather than a single joined string) + * because some builders need to splice the prepend block above their own + * preamble while keeping the main context block in its existing position. + * + * Both fields are joined with the in-tree `\n\n---\n\n` separator. Empty + * string means "no content for this section" — callers branch on truthy + * to decide whether to render any wrapper headers. + */ +export interface ComposedUnitContext { + readonly prepend: string; + readonly inline: string; +} + +const SECTION_SEPARATOR = "\n\n---\n\n"; + +/** + * Compose all manifest-declared context for a unit type using the v2 + * surface. Walks `prepend` first (computed-only), then the `inline` list + * (static keys via `resolveArtifact`), then `excerpt` (via `resolveExcerpt`), + * then `artifacts.computed` (via the typed registry). Order within each + * section follows the manifest's declared sequence. + * + * Unknown unit types return empty strings for both sections — callers can + * fall back to existing imperative wiring without a special case. + * + * Resolver / registry omissions: if the manifest declares an entry but no + * resolver / registry entry is provided, the composer skips it silently. + * This matches the v1 contract where a null body is a no-op, and lets + * partial migrations land without forcing every consumer to register + * every artifact class up-front. + */ +export async function composeUnitContext( + unitType: string, + opts: ComposeUnitContextOptions, +): Promise { + const manifest: UnitContextManifest | null = resolveManifest(unitType); + if (!manifest) return { prepend: "", inline: "" }; + + const prependBlocks = await runComputed(manifest.prepend ?? [], opts); + const inlineBlocks: string[] = []; + + for (const key of manifest.artifacts.inline) { + if (!opts.resolveArtifact) break; + const body = await opts.resolveArtifact(key); + if (body && body.length > 0) inlineBlocks.push(body); + } + for (const key of manifest.artifacts.excerpt) { + if (!opts.resolveExcerpt) break; + const body = await opts.resolveExcerpt(key); + if (body && body.length > 0) inlineBlocks.push(body); + } + inlineBlocks.push(...await runComputed(manifest.artifacts.computed ?? [], opts)); + + return { + prepend: prependBlocks.join(SECTION_SEPARATOR), + inline: inlineBlocks.join(SECTION_SEPARATOR), + }; +} + +/** + * Invoke the registered builder for each declared computed id, in order. + * Missing registry entries (manifest declares the id but caller didn't + * register it) are skipped silently — see composeUnitContext rationale. + */ +async function runComputed( + ids: readonly ComputedArtifactId[], + opts: ComposeUnitContextOptions, +): Promise { + if (ids.length === 0 || !opts.computed) return []; + const out: string[] = []; + for (const id of ids) { + const entry = opts.computed[id] as + | { build: (i: unknown, b: BaseResolverContext) => Promise; inputs: unknown } + | undefined; + if (!entry) continue; + const body = await entry.build(entry.inputs, opts.base); + if (body && body.length > 0) out.push(body); + } + return out; +} diff --git a/src/resources/extensions/sf/unit-context-manifest.ts b/src/resources/extensions/sf/unit-context-manifest.ts new file mode 100644 index 000000000..6327ec387 --- /dev/null +++ b/src/resources/extensions/sf/unit-context-manifest.ts @@ -0,0 +1,574 @@ +// GSD-2 — UnitContextManifest (#4782 phase 1). +// +// Declarative description of what context each auto-mode unit type needs +// in its system prompt. Establishes the contract that later phases will +// use to drive a single composeSystemPromptForUnit() — replacing the +// per-unit-type branching currently spread across `auto-prompts.ts`. +// +// **Phase 1 ships the type + the data + a CI coverage guard.** It adds +// zero wiring — no caller reads a manifest yet. Every unit type gets a +// manifest that describes today's behavior as faithfully as possible, so +// when the composer lands in phase 2 the migration can proceed manifest- +// by-manifest without behavior change. +// +// Phased rollout tracking: +// - Phase 1 (this PR): schema + manifests + coverage test. +// - Phase 2: add composeSystemPromptForUnit(); migrate one low-risk +// unit type (e.g. reassess-roadmap) as the pilot. +// - Phase 3: migrate remaining unit types, tighten manifests per +// empirical usage, introduce skipWhen predicates absorbing the +// reassess opt-in gate from #4778. +// - Phase 4: introduce pipeline variants as declared sequences, +// absorbing the scope-classifier gates from #4781. +// +// Naming: +// - Artifact keys are STABLE strings (not paths). Path resolution is +// the composer's job; manifests describe intent, not disk layout. +// - Char budgets are nominal — blown budgets log a telemetry event, +// they do not truncate or error (the composer decides fallback). + +// ─── Artifact registry ──────────────────────────────────────────────────── + +/** + * Stable identifiers for every artifact class a unit might inline, excerpt, + * or reference on-demand. Adding a new artifact class requires (a) a key + * here, (b) path/body resolution in the composer, and (c) updates to any + * manifest that should surface it. + */ +export const ARTIFACT_KEYS = [ + // Milestone-scoped + "roadmap", + "milestone-context", + "milestone-summary", + "milestone-validation", + "milestone-research", + "milestone-plan", + // Slice-scoped + "slice-context", + "slice-research", + "slice-plan", + "slice-summary", + "slice-uat", + "slice-assessment", + // Task-scoped + "task-plan", + "task-summary", + "prior-task-summaries", + "dependency-summaries", + // Project-scoped + "requirements", + "decisions", + "project", + "templates", +] as const; + +export type ArtifactKey = typeof ARTIFACT_KEYS[number]; + +// ─── Policy types ───────────────────────────────────────────────────────── + +/** + * Skill catalog policy. `all` preserves today's default: the full catalog + * is stamped into the prompt. `allowlist` narrows to the named skills. + * `none` suppresses the catalog entirely. + * + * The allowlist mode pairs with `skill-manifest.ts` (#4779) — entries + * there are the source of truth for "which skills are dispatched for a + * unit type"; this manifest carries the policy shape so the composer + * can unify the two surfaces in phase 2. + */ +export type SkillsPolicy = + | { readonly mode: "none" } + | { readonly mode: "all" } + | { readonly mode: "allowlist"; readonly skills: readonly string[] }; + +/** Knowledge block policy — see `bootstrap/system-context.ts` loadKnowledgeBlock. */ +export type KnowledgePolicy = "none" | "critical-only" | "scoped" | "full"; + +/** Memory store policy — see `bootstrap/system-context.ts` loadMemoryBlock. */ +export type MemoryPolicy = "none" | "critical-only" | "prompt-relevant"; + +/** Preferences block policy. */ +export type PreferencesPolicy = "none" | "active-only" | "full"; + +/** + * Tool-access policy per unit type (#4934). + * + * Declarative-only in this PR — runtime enforcement (write-gate.ts predicate + * + dispatch-time isolation) lands in follow-up PRs. The shape is the + * agreement between manifest authors and enforcement; surfacing it now lets + * reviewers ratify per-unit policy intent before any blocking logic ships. + * + * Modes: + * - "all" — Read + Edit/Write/MultiEdit/NotebookEdit + Bash + Task. + * The unit may modify any file in the working tree. + * Reserved for execute-task / reactive-execute, which run + * in worktrees today and whose writes are committed. + * - "read-only" — Read tools only. No file mutation. No shell. No subagent + * dispatch. Reserved for future units that should be + * strictly observational (none today). + * - "planning" — Read tools always; writes restricted to .gsd/** under + * basePath; Bash limited to a per-unit safe allowlist; + * Task subagent dispatch denied. Catches the bug class + * where a discuss-milestone turn modifies user source + * files (forensics: ~/Github/test-apps/b23, #4934). + * - "docs" — Read tools always; writes restricted to .gsd/** AND + * the explicit `allowedPathGlobs` set; Bash safe-allowlist; + * no subagents. Reserved for rewrite-docs, which legitimately + * edits project markdown outside .gsd/. + * + * The allowlist for "docs" is declared per-manifest rather than hardcoded so + * projects with non-standard doc layouts can extend it without forking the + * enforcement code (open question for the wiring PR — exact representation + * may shift). Globs are interpreted relative to the project basePath. + */ +export type ToolsPolicy = + | { readonly mode: "all" } + | { readonly mode: "read-only" } + | { readonly mode: "planning" } + | { readonly mode: "docs"; readonly allowedPathGlobs: readonly string[] }; + +// ─── Computed-artifact registry (#4924 v2 contract) ─────────────────────── + +/** + * Typed registry of computed-artifact ids → their per-call input shape. + * + * **This is the core anti-`extra: Record` surface.** Each + * computed block a unit may emit is registered here with an explicit input + * type. Adding a new computed block requires extending this interface — a + * deliberate, reviewable change rather than a silent ad-hoc field. + * + * Consumers extend via module augmentation if a downstream package needs to + * register new computed ids (rare in-tree; no public API today). The repo's + * own computed blocks are declared inline below. + * + * Invariant: the value type for each id MUST be a plain serializable shape. + * No closures, no class instances, no `any`. If a builder needs framework + * state, declare the specific fields it needs — don't smuggle objects. + */ +// eslint-disable-next-line @typescript-eslint/no-empty-interface +export interface ComputedArtifactInputs { + // Phase 3.5 (v2 contract PR — #4924): no computed ids are registered yet. + // Each follow-up batch (slice prompt, replan-slice, gate-evaluate, etc.) + // adds the ids it needs as part of its migration commit. + // + // Example shape an upcoming batch will register: + // "slice-handoff-anchors": { sliceId: string; phase: string }; + // "roadmap-excerpt": { milestoneId: string; aroundSlice: string }; + // "graph-subgraph": { rootArtifact: ArtifactKey }; + // "blocker-task-summary": { sliceId: string }; + // "overrides-banner": { /* basePath via BaseResolverContext */ }; +} + +/** Stable string ids for registered computed artifacts. */ +export type ComputedArtifactId = keyof ComputedArtifactInputs & string; + +/** + * Always-present context the composer hands every computed-artifact builder. + * Carries unit-shape fields that don't belong in per-id input types because + * every builder needs them (path resolution, dispatch identity). + */ +export interface BaseResolverContext { + readonly unitType: string; + readonly basePath: string; + readonly milestoneId?: string; + readonly sliceId?: string; + readonly taskId?: string; +} + +/** + * Builder signature for one computed artifact id. Returns the rendered + * block body (joined into the composed prompt at the manifest-declared + * position) or `null` to omit the block entirely. + */ +export type ComputedArtifactBuilder = ( + inputs: ComputedArtifactInputs[K], + base: BaseResolverContext, +) => Promise; + +/** + * Per-call registry: for each computed id the manifest declares, the + * caller supplies the matching builder + the input value for this call. + * + * Runtime shape: `{ [id]: { build, inputs } }`. Type narrowing per key is + * handled inside the composer via the `ComputedArtifactInputs` map — calls + * stay type-safe across the registration boundary. + */ +export type ComputedArtifactRegistry = { + readonly [K in ComputedArtifactId]?: { + readonly build: ComputedArtifactBuilder; + readonly inputs: ComputedArtifactInputs[K]; + }; +}; + +// ─── Manifest ───────────────────────────────────────────────────────────── + +export interface UnitContextManifest { + /** Skills catalog shape to surface. */ + readonly skills: SkillsPolicy; + /** Knowledge block policy. */ + readonly knowledge: KnowledgePolicy; + /** Memory store policy. */ + readonly memory: MemoryPolicy; + /** Whether CODEBASE.md is inlined. */ + readonly codebaseMap: boolean; + /** Preferences block policy. */ + readonly preferences: PreferencesPolicy; + /** + * Tool-access policy (#4934). Declarative in this PR; runtime enforcement + * (path-scoped write blocking + subagent denial + bash allowlist) lands + * in follow-ups. Required on every manifest so missing entries fail loud + * via the CI invariant test rather than defaulting to "all" silently. + */ + readonly tools: ToolsPolicy; + /** Artifact handling: inline (full body), excerpt (compact), or on-demand (path only). */ + readonly artifacts: { + readonly inline: readonly ArtifactKey[]; + readonly excerpt: readonly ArtifactKey[]; + readonly onDemand: readonly ArtifactKey[]; + /** + * Ordered list of computed-block ids emitted in the inline position + * (interleaved with `inline` in declared order — see composer for the + * exact merge rule). v2 contract addition (#4924). Unknown ids fail + * the manifest validator; absent registry entries are skipped silently. + */ + readonly computed?: readonly ComputedArtifactId[]; + }; + /** + * Ordered list of computed-block ids emitted ABOVE the main inlined + * context block. Models the existing pattern of overrides / banners + * that some builders prepend with `inlined.unshift(...)`. v2 contract + * addition (#4924). + */ + readonly prepend?: readonly ComputedArtifactId[]; + /** + * Nominal upper bound for composer-generated system prompt size, in + * characters. Phase 2 composer logs telemetry when a unit exceeds its + * budget; truncation is not enforced. Set conservatively — today's + * observed maxima come from `complete-milestone` (~1.2M tokens cached; + * ~4.8M chars) and `validate-milestone` (~300K tokens; ~1.2M chars). + */ + readonly maxSystemPromptChars: number; +} + +// ─── Manifests ──────────────────────────────────────────────────────────── + +// Phase 1 policy: every manifest encodes today's behavior. Skills = "all" +// unless the unit type was already narrowed via the existing skill-manifest +// resolver (#4779). Memory/knowledge policies reflect the defaults in +// `bootstrap/system-context.ts`. Artifact classifications follow what +// `auto-prompts.ts` inlines today for each unit type. + +const COMMON_BUDGET_LARGE = 1_500_000; // ~400K tokens +const COMMON_BUDGET_MEDIUM = 750_000; // ~200K tokens +const COMMON_BUDGET_SMALL = 250_000; // ~65K tokens + +// ─── Tool policy constants (#4934) ──────────────────────────────────────── +// Reused across manifests so per-unit assignment stays declarative and the +// allowed-path set for the docs policy lives in one reviewable place. + +const TOOLS_ALL: ToolsPolicy = { mode: "all" }; +const TOOLS_PLANNING: ToolsPolicy = { mode: "planning" }; +const TOOLS_DOCS: ToolsPolicy = { + mode: "docs", + // Globs are resolved relative to project basePath. The set is intentionally + // narrow: top-level docs/, README, CHANGELOG, and any markdown at the + // project root. Projects with non-standard layouts (e.g. mintlify-docs/) + // will need this list extended in a follow-up; landed conservative now, + // expand on demand. + allowedPathGlobs: [ + "docs/**", + "README.md", + "README.*.md", + "CHANGELOG.md", + "*.md", + ], +}; + +/** + * Canonical unit types handled by auto-mode dispatch. The coverage test + * enumerates these against `UNIT_MANIFESTS` to catch manifest drift when + * a new unit type lands. + */ +export const KNOWN_UNIT_TYPES = [ + "research-milestone", + "plan-milestone", + "discuss-milestone", + "validate-milestone", + "complete-milestone", + "research-slice", + "plan-slice", + "refine-slice", + "replan-slice", + "complete-slice", + "reassess-roadmap", + "execute-task", + "reactive-execute", + "run-uat", + "gate-evaluate", + "rewrite-docs", +] as const; + +export type UnitType = typeof KNOWN_UNIT_TYPES[number]; + +export const UNIT_MANIFESTS: Record = { + // ─── Milestone-scoped ──────────────────────────────────────────────── + "research-milestone": { + skills: { mode: "all" }, + knowledge: "full", + memory: "prompt-relevant", + codebaseMap: true, + preferences: "active-only", + tools: TOOLS_PLANNING, + artifacts: { + // Phase 3 migration (#4782): matches today's actual + // buildResearchMilestonePrompt inlining order. + inline: ["milestone-context", "project", "requirements", "decisions", "templates"], + excerpt: [], + onDemand: [], + }, + maxSystemPromptChars: COMMON_BUDGET_MEDIUM, + }, + "plan-milestone": { + skills: { mode: "all" }, + knowledge: "full", + memory: "prompt-relevant", + codebaseMap: true, + preferences: "active-only", + tools: TOOLS_PLANNING, + artifacts: { + inline: ["project", "requirements", "decisions", "milestone-research", "templates"], + excerpt: [], + onDemand: [], + }, + maxSystemPromptChars: COMMON_BUDGET_LARGE, + }, + "discuss-milestone": { + skills: { mode: "all" }, + knowledge: "full", + memory: "prompt-relevant", + codebaseMap: true, + preferences: "active-only", + tools: TOOLS_PLANNING, + artifacts: { + inline: ["project", "requirements", "decisions", "milestone-context", "templates"], + excerpt: [], + onDemand: [], + }, + maxSystemPromptChars: COMMON_BUDGET_MEDIUM, + }, + "validate-milestone": { + skills: { mode: "all" }, + knowledge: "scoped", + memory: "prompt-relevant", + codebaseMap: false, + preferences: "active-only", + tools: TOOLS_PLANNING, + artifacts: { + inline: ["roadmap", "slice-summary", "slice-uat", "requirements", "decisions", "templates"], + excerpt: [], + onDemand: [], + }, + maxSystemPromptChars: COMMON_BUDGET_LARGE, + }, + "complete-milestone": { + skills: { mode: "all" }, + knowledge: "scoped", + memory: "prompt-relevant", + codebaseMap: false, + preferences: "active-only", + tools: TOOLS_PLANNING, + artifacts: { + // #4780 landed slice-summary as excerpt for this unit; phase 2 of + // the architecture will read this manifest as the source of truth + // and retire the special-case wiring in auto-prompts.ts. + inline: ["roadmap", "milestone-context", "requirements", "decisions", "project", "templates"], + excerpt: ["slice-summary"], + onDemand: ["slice-summary"], + }, + maxSystemPromptChars: COMMON_BUDGET_MEDIUM, + }, + + // ─── Slice-scoped ──────────────────────────────────────────────────── + "research-slice": { + skills: { mode: "all" }, + knowledge: "full", + memory: "prompt-relevant", + codebaseMap: true, + preferences: "active-only", + tools: TOOLS_PLANNING, + artifacts: { + inline: ["roadmap", "milestone-research", "dependency-summaries", "templates"], + excerpt: [], + onDemand: [], + }, + maxSystemPromptChars: COMMON_BUDGET_MEDIUM, + }, + "plan-slice": { + skills: { mode: "all" }, + knowledge: "full", + memory: "prompt-relevant", + codebaseMap: true, + preferences: "active-only", + tools: TOOLS_PLANNING, + artifacts: { + inline: ["roadmap", "slice-research", "dependency-summaries", "requirements", "decisions", "templates"], + excerpt: [], + onDemand: [], + }, + maxSystemPromptChars: COMMON_BUDGET_LARGE, + }, + "refine-slice": { + skills: { mode: "all" }, + knowledge: "scoped", + memory: "prompt-relevant", + codebaseMap: true, + preferences: "active-only", + tools: TOOLS_PLANNING, + artifacts: { + inline: ["slice-plan", "slice-research", "dependency-summaries", "templates"], + excerpt: [], + onDemand: [], + }, + maxSystemPromptChars: COMMON_BUDGET_MEDIUM, + }, + "replan-slice": { + skills: { mode: "all" }, + knowledge: "scoped", + memory: "prompt-relevant", + codebaseMap: true, + preferences: "active-only", + tools: TOOLS_PLANNING, + artifacts: { + inline: ["slice-plan", "slice-research", "dependency-summaries", "prior-task-summaries", "templates"], + excerpt: [], + onDemand: [], + }, + maxSystemPromptChars: COMMON_BUDGET_MEDIUM, + }, + "complete-slice": { + skills: { mode: "all" }, + knowledge: "scoped", + memory: "prompt-relevant", + codebaseMap: false, + preferences: "active-only", + tools: TOOLS_PLANNING, + artifacts: { + // Phase 3 migration (#4782): matches today's actual + // buildCompleteSlicePrompt inlining order. Overrides prepend + + // knowledge splice stay in the builder imperatively (see RFC + // #4924 — computed/prepend blocks are phase-4 composer work). + inline: ["roadmap", "slice-context", "slice-plan", "requirements", "prior-task-summaries", "templates"], + excerpt: [], + onDemand: [], + }, + maxSystemPromptChars: COMMON_BUDGET_LARGE, + }, + "reassess-roadmap": { + skills: { mode: "all" }, + knowledge: "scoped", + memory: "critical-only", + codebaseMap: false, + preferences: "none", + tools: TOOLS_PLANNING, + artifacts: { + // Phase 2 pilot (#4782): manifest now matches today's actual + // buildReassessRoadmapPrompt behavior for equivalence. Phase 3 + // will tighten this list once the composer reports real telemetry. + inline: ["roadmap", "slice-context", "slice-summary", "project", "requirements", "decisions"], + excerpt: [], + onDemand: [], + }, + maxSystemPromptChars: COMMON_BUDGET_MEDIUM, + }, + + // ─── Task-scoped ───────────────────────────────────────────────────── + "execute-task": { + skills: { mode: "all" }, + knowledge: "scoped", + memory: "prompt-relevant", + codebaseMap: true, + preferences: "active-only", + tools: TOOLS_ALL, + artifacts: { + inline: ["task-plan", "slice-plan", "prior-task-summaries", "templates"], + excerpt: [], + onDemand: ["slice-research"], + }, + maxSystemPromptChars: COMMON_BUDGET_LARGE, + }, + "reactive-execute": { + skills: { mode: "all" }, + knowledge: "scoped", + memory: "prompt-relevant", + codebaseMap: true, + preferences: "active-only", + tools: TOOLS_ALL, + artifacts: { + inline: ["slice-plan", "prior-task-summaries", "templates"], + excerpt: [], + onDemand: ["slice-research"], + }, + maxSystemPromptChars: COMMON_BUDGET_LARGE, + }, + + // ─── Ancillary units ───────────────────────────────────────────────── + "run-uat": { + skills: { mode: "all" }, + knowledge: "critical-only", + memory: "critical-only", + codebaseMap: false, + preferences: "active-only", + tools: TOOLS_PLANNING, + artifacts: { + // Phase 3 migration (#4782): manifest matches today's actual + // buildRunUatPrompt inlining. Prior phase-1 entry listed + // `slice-plan` aspirationally — the real builder inlines the UAT + // file, the slice SUMMARY (optional), and the project row. + inline: ["slice-uat", "slice-summary", "project"], + excerpt: [], + onDemand: [], + }, + maxSystemPromptChars: COMMON_BUDGET_SMALL, + }, + "gate-evaluate": { + skills: { mode: "all" }, + knowledge: "critical-only", + memory: "critical-only", + codebaseMap: false, + preferences: "active-only", + tools: TOOLS_PLANNING, + artifacts: { + inline: ["slice-plan", "prior-task-summaries"], + excerpt: [], + onDemand: [], + }, + maxSystemPromptChars: COMMON_BUDGET_SMALL, + }, + "rewrite-docs": { + skills: { mode: "all" }, + knowledge: "scoped", + memory: "prompt-relevant", + codebaseMap: true, + preferences: "active-only", + tools: TOOLS_DOCS, + artifacts: { + inline: ["project", "requirements", "decisions", "templates"], + excerpt: [], + onDemand: [], + }, + maxSystemPromptChars: COMMON_BUDGET_MEDIUM, + }, +}; + +// ─── Lookup helper ──────────────────────────────────────────────────────── + +/** + * Return the manifest for a unit type, or null when the type is unknown. + * + * Callers MUST treat null as "fall through to today's default behavior" + * rather than erroring — unknown unit types may be experimental and + * should not crash the composer. + */ +export function resolveManifest(unitType: string): UnitContextManifest | null { + return (UNIT_MANIFESTS as Record)[unitType] ?? null; +}