diff --git a/package.json b/package.json index 2e96166e0..fb2fe80fa 100644 --- a/package.json +++ b/package.json @@ -53,6 +53,7 @@ "build": "npm run build:core && node scripts/build-web-if-stale.cjs", "stage:web-host": "node scripts/stage-web-standalone.cjs", "build:web-host": "npm --prefix web run build && npm run stage:web-host", + "docs:features": "node scripts/generate-features-inventory.mjs", "copy-resources": "node scripts/copy-resources.cjs", "copy-themes": "node scripts/copy-themes.cjs", "copy-export-html": "node scripts/copy-export-html.cjs", diff --git a/packages/mcp-server/src/tool-credentials.test.ts b/packages/mcp-server/src/tool-credentials.test.ts index 0827ee52a..3c8777dc4 100644 --- a/packages/mcp-server/src/tool-credentials.test.ts +++ b/packages/mcp-server/src/tool-credentials.test.ts @@ -16,7 +16,10 @@ describe("tool credentials", () => { writeFileSync(authPath, JSON.stringify({ anthropic: { type: "api_key", key: "sk-ant-secret" }, openai: { type: "api_key", key: "sk-openai-secret" }, + xiaomi: { type: "api_key", key: "xiaomi-secret" }, tavily: { type: "api_key", key: "tvly-secret" }, + serper: { type: "api_key", key: "serper-secret" }, + exa: { type: "api_key", key: "exa-secret" }, context7: [{ type: "api_key", key: "ctx7-secret" }], })); @@ -24,13 +27,19 @@ describe("tool credentials", () => { assert.deepEqual(loaded.sort(), [ "ANTHROPIC_API_KEY", "CONTEXT7_API_KEY", + "EXA_API_KEY", "OPENAI_API_KEY", + "SERPER_API_KEY", "TAVILY_API_KEY", + "XIAOMI_API_KEY", ]); assert.equal(env.ANTHROPIC_API_KEY, "sk-ant-secret"); assert.equal(env.OPENAI_API_KEY, "sk-openai-secret"); assert.equal(env.TAVILY_API_KEY, "tvly-secret"); + assert.equal(env.SERPER_API_KEY, "serper-secret"); + assert.equal(env.EXA_API_KEY, "exa-secret"); assert.equal(env.CONTEXT7_API_KEY, "ctx7-secret"); + assert.equal(env.XIAOMI_API_KEY, "xiaomi-secret"); } finally { rmSync(tempRoot, { recursive: true, force: true }); } diff --git a/packages/mcp-server/src/tool-credentials.ts b/packages/mcp-server/src/tool-credentials.ts index d9f2821b8..e84d9f074 100644 --- a/packages/mcp-server/src/tool-credentials.ts +++ b/packages/mcp-server/src/tool-credentials.ts @@ -17,6 +17,7 @@ const AUTH_ENV_KEYS = [ ["xai", "XAI_API_KEY"], ["openrouter", "OPENROUTER_API_KEY"], ["mistral", "MISTRAL_API_KEY"], + ["xiaomi", "XIAOMI_API_KEY"], ["ollama-cloud", "OLLAMA_API_KEY"], ["custom-openai", "CUSTOM_OPENAI_API_KEY"], ["cerebras", "CEREBRAS_API_KEY"], @@ -32,6 +33,8 @@ const AUTH_ENV_KEYS = [ ["alibaba-coding-plan", "ALIBABA_API_KEY"], ["brave", "BRAVE_API_KEY"], ["brave_answers", "BRAVE_ANSWERS_KEY"], + ["serper", "SERPER_API_KEY"], + ["exa", "EXA_API_KEY"], ["context7", "CONTEXT7_API_KEY"], ["jina", "JINA_API_KEY"], ["tavily", "TAVILY_API_KEY"], diff --git a/packages/mcp-server/src/workflow-tools.ts b/packages/mcp-server/src/workflow-tools.ts index fa6b8e1e2..73b54d5fe 100644 --- a/packages/mcp-server/src/workflow-tools.ts +++ b/packages/mcp-server/src/workflow-tools.ts @@ -14,7 +14,7 @@ type WorkflowToolExecutors = { milestoneId: string; title: string; vision: string; - slices: Array<{ + slices?: Array<{ sliceId: string; title: string; risk: string; @@ -26,6 +26,7 @@ type WorkflowToolExecutors = { integrationClosure: string; observabilityImpact: string; }>; + templateId?: string; status?: string; dependsOn?: string[]; successCriteria?: string[]; @@ -38,6 +39,22 @@ type WorkflowToolExecutors = { definitionOfDone?: string[]; requirementCoverage?: string; boundaryMapMarkdown?: string; + visionMeeting?: { + trigger: string; + pm: string; + userAdvocate: string; + customerPanel: string; + business: string; + researcher: string; + deliveryLead: string; + partner: string; + combatant: string; + architect: string; + moderator: string; + weightedSynthesis: string; + confidenceByArea: string; + recommendedRoute: "discussing" | "researching" | "planning"; + }; }, basePath?: string, ) => Promise; @@ -682,7 +699,8 @@ const planMilestoneParams = { proofLevel: z.string(), integrationClosure: z.string(), observabilityImpact: z.string(), - })).describe("Planned slices for the milestone"), + })).optional().describe("Planned slices for the milestone. Optional when templateId is used for scaffolding."), + templateId: z.string().optional().describe("Optional milestone template scaffold (e.g. bugfix, small-feature, refactor)"), status: z.string().optional().describe("Milestone status"), dependsOn: z.array(z.string()).optional().describe("Milestone dependencies"), successCriteria: z.array(z.string()).optional().describe("Top-level success criteria bullets"), @@ -702,6 +720,22 @@ const planMilestoneParams = { definitionOfDone: z.array(z.string()).optional(), requirementCoverage: z.string().optional(), boundaryMapMarkdown: z.string().optional(), + visionMeeting: z.object({ + trigger: z.string(), + pm: z.string(), + userAdvocate: z.string(), + customerPanel: z.string(), + business: z.string(), + researcher: z.string(), + deliveryLead: z.string(), + partner: z.string(), + combatant: z.string(), + architect: z.string(), + moderator: z.string(), + weightedSynthesis: z.string(), + confidenceByArea: z.string(), + recommendedRoute: z.enum(["discussing", "researching", "planning"]), + }).optional().describe("Structured top-level vision and roadmap alignment meeting with weighted synthesis"), }; const planMilestoneSchema = z.object(planMilestoneParams); @@ -710,6 +744,22 @@ const planSliceParams = { milestoneId: z.string().describe("Milestone ID (e.g. M001)"), sliceId: z.string().describe("Slice ID (e.g. S01)"), goal: z.string().describe("Slice goal"), + adversarialReview: z.object({ + partner: z.string(), + combatant: z.string(), + architect: z.string(), + }).optional().describe("Adversarial review summary with partner, combatant, and architect perspectives"), + planningMeeting: z.object({ + trigger: z.string(), + pm: z.string(), + researcher: z.string(), + partner: z.string(), + combatant: z.string(), + architect: z.string(), + moderator: z.string(), + recommendedRoute: z.enum(["discussing", "researching", "planning"]), + confidenceSummary: z.string(), + }).optional().describe("Optional structured planning meeting for ambiguous or higher-complexity slices"), tasks: z.array(z.object({ taskId: z.string(), title: z.string(), @@ -802,6 +852,27 @@ const replanSliceParams = { blockerTaskId: z.string().describe("Task ID that discovered the blocker"), blockerDescription: z.string().describe("Description of the blocker"), whatChanged: z.string().describe("Summary of what changed in the plan"), + goal: z.string().optional().describe("Updated slice goal when the replan changes the slice contract"), + successCriteria: z.string().optional().describe("Updated slice success criteria block"), + proofLevel: z.string().optional().describe("Updated slice proof level"), + integrationClosure: z.string().optional().describe("Updated slice integration closure"), + observabilityImpact: z.string().optional().describe("Updated slice observability impact"), + adversarialReview: z.object({ + partner: z.string(), + combatant: z.string(), + architect: z.string(), + }).optional().describe("Updated adversarial review summary for the replanned slice"), + planningMeeting: z.object({ + trigger: z.string(), + pm: z.string(), + researcher: z.string(), + partner: z.string(), + combatant: z.string(), + architect: z.string(), + moderator: z.string(), + recommendedRoute: z.enum(["discussing", "researching", "planning"]), + confidenceSummary: z.string(), + }).optional().describe("Updated structured planning meeting artifact for the replanned slice"), updatedTasks: z.array(z.object({ taskId: z.string(), title: z.string(), diff --git a/packages/pi-ai/src/env-api-keys.ts b/packages/pi-ai/src/env-api-keys.ts index 1fa055125..41513e0ee 100644 --- a/packages/pi-ai/src/env-api-keys.ts +++ b/packages/pi-ai/src/env-api-keys.ts @@ -136,6 +136,7 @@ export function getEnvApiKey(provider: any): string | undefined { opencode: "OPENCODE_API_KEY", "opencode-go": "OPENCODE_API_KEY", "kimi-coding": "KIMI_API_KEY", + xiaomi: "XIAOMI_API_KEY", "alibaba-coding-plan": "ALIBABA_API_KEY", "alibaba-dashscope": "DASHSCOPE_API_KEY", ollama: "OLLAMA_API_KEY", diff --git a/packages/pi-ai/src/models.custom.ts b/packages/pi-ai/src/models.custom.ts index 7a64d0dfe..e425a887c 100644 --- a/packages/pi-ai/src/models.custom.ts +++ b/packages/pi-ai/src/models.custom.ts @@ -292,4 +292,78 @@ export const CUSTOM_MODELS = { compat: { thinkingFormat: "zai", supportsDeveloperRole: false }, } satisfies Model<"openai-completions">, }, + + // ─── Xiaomi MiMo ───────────────────────────────────────────────────── + // Direct Xiaomi Token Plan AMS endpoint (Anthropic-compatible). + // Uses Bearer auth with XIAOMI_API_KEY against /anthropic. + "xiaomi": { + "mimo-v2-omni": { + id: "mimo-v2-omni", + name: "MiMo V2 Omni", + api: "anthropic-messages", + provider: "xiaomi", + baseUrl: "https://token-plan-ams.xiaomimimo.com/anthropic", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.4, + output: 2, + cacheRead: 0.08, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 65536, + } satisfies Model<"anthropic-messages">, + "mimo-v2-pro": { + id: "mimo-v2-pro", + name: "MiMo V2 Pro", + api: "anthropic-messages", + provider: "xiaomi", + baseUrl: "https://token-plan-ams.xiaomimimo.com/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0.2, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, + "mimo-v2.5": { + id: "mimo-v2.5", + name: "MiMo V2.5", + api: "anthropic-messages", + provider: "xiaomi", + baseUrl: "https://token-plan-ams.xiaomimimo.com/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0.2, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, + "mimo-v2.5-pro": { + id: "mimo-v2.5-pro", + name: "MiMo V2.5 Pro", + api: "anthropic-messages", + provider: "xiaomi", + baseUrl: "https://token-plan-ams.xiaomimimo.com/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0.2, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, + }, } as const; diff --git a/packages/pi-ai/src/models.test.ts b/packages/pi-ai/src/models.test.ts index d8a3a20af..2969ae59a 100644 --- a/packages/pi-ai/src/models.test.ts +++ b/packages/pi-ai/src/models.test.ts @@ -96,7 +96,7 @@ describe("model registry — custom zai provider (GLM-5.1)", () => { const model = getModel("zai" as any, "glm-5.1" as any); assert.ok(model); assert.equal(model.reasoning, true); - assert.equal(model.contextWindow, 204800); + assert.equal(model.contextWindow, 200000); assert.equal(model.maxTokens, 131072); }); @@ -109,6 +109,64 @@ describe("model registry — custom zai provider (GLM-5.1)", () => { }); }); +describe("model registry — xiaomi provider", () => { + it("xiaomi is a registered provider", () => { + const providers = getProviders(); + assert.ok( + providers.includes("xiaomi"), + `Expected "xiaomi" in providers, got: ${providers.join(", ")}`, + ); + }); + + it("xiaomi includes the expected chat models from the direct Anthropic-compatible endpoint", () => { + const models = getModels("xiaomi" as any); + const ids = models.map((m) => m.id).sort(); + assert.deepEqual(ids, [ + "mimo-v2-omni", + "mimo-v2-pro", + "mimo-v2.5", + "mimo-v2.5-pro", + ]); + }); + + it("xiaomi models use the Anthropic-compatible endpoint and provider identity", () => { + const models = getModels("xiaomi" as any); + for (const model of models) { + assert.equal(model.provider, "xiaomi"); + assert.equal(model.api, "anthropic-messages"); + assert.equal(model.baseUrl, "https://token-plan-ams.xiaomimimo.com/anthropic"); + } + }); + + it("getModel retrieves xiaomi MiMo models by provider+id", () => { + const model = getModel("xiaomi" as any, "mimo-v2-pro" as any); + assert.ok(model, "Expected getModel to return a model for xiaomi/mimo-v2-pro"); + assert.equal(model.id, "mimo-v2-pro"); + assert.equal(model.provider, "xiaomi"); + assert.equal(model.api, "anthropic-messages"); + }); +}); + +describe("model registry — kimi-coding provider", () => { + it("kimi-coding is a registered provider", () => { + const providers = getProviders(); + assert.ok( + providers.includes("kimi-coding"), + `Expected \"kimi-coding\" in providers, got: ${providers.join(", ")}`, + ); + }); + + it("kimi-coding exposes the canonical live model id", () => { + const model = getModel("kimi-coding" as any, "kimi-for-coding" as any); + assert.ok(model, "Expected getModel to return kimi-coding/kimi-for-coding"); + assert.equal(model.id, "kimi-for-coding"); + assert.equal(model.provider, "kimi-coding"); + assert.equal(model.api, "anthropic-messages"); + assert.equal(model.baseUrl, "https://api.kimi.com/coding"); + assert.equal(model.contextWindow, 262144); + }); +}); + // ═══════════════════════════════════════════════════════════════════════════ // New provider: alibaba-dashscope (feat: #3891) // diff --git a/packages/pi-ai/src/providers/anthropic-auth.test.ts b/packages/pi-ai/src/providers/anthropic-auth.test.ts index f95ebafab..917039321 100644 --- a/packages/pi-ai/src/providers/anthropic-auth.test.ts +++ b/packages/pi-ai/src/providers/anthropic-auth.test.ts @@ -13,6 +13,7 @@ test("usesAnthropicBearerAuth covers Bearer-only Anthropic-compatible providers assert.equal(usesAnthropicBearerAuth("minimax"), true); assert.equal(usesAnthropicBearerAuth("minimax-cn"), true); assert.equal(usesAnthropicBearerAuth("longcat"), true); + assert.equal(usesAnthropicBearerAuth("xiaomi"), true); assert.equal(usesAnthropicBearerAuth("anthropic"), false); }); diff --git a/packages/pi-ai/src/providers/anthropic.ts b/packages/pi-ai/src/providers/anthropic.ts index e644045b8..55a53b4cc 100644 --- a/packages/pi-ai/src/providers/anthropic.ts +++ b/packages/pi-ai/src/providers/anthropic.ts @@ -67,7 +67,8 @@ export function usesAnthropicBearerAuth(provider: Model<"anthropic-messages">["p provider === "alibaba-coding-plan" || provider === "minimax" || provider === "minimax-cn" || - provider === "longcat" + provider === "longcat" || + provider === "xiaomi" ); } @@ -123,7 +124,7 @@ async function createClient( const client = new AnthropicClass({ apiKey: usesBearerAuth ? null : apiKey, authToken: usesBearerAuth ? apiKey : undefined, - baseURL: model.baseUrl, + baseURL: resolveAnthropicBaseUrl(model), dangerouslyAllowBrowser: true, defaultHeaders: mergeHeaders( { diff --git a/packages/pi-ai/src/providers/google-gemini-cli.ts b/packages/pi-ai/src/providers/google-gemini-cli.ts index 33d67706b..e13618d4b 100644 --- a/packages/pi-ai/src/providers/google-gemini-cli.ts +++ b/packages/pi-ai/src/providers/google-gemini-cli.ts @@ -82,7 +82,14 @@ function buildCodeAssistServer(token: string, projectId: string): CodeAssistServ authClient.setCredentials({ access_token: token }); // httpOptions is an empty-headers pass-through — cli-core sets the // correct User-Agent / Client-Metadata / X-Goog-Api-Client itself. - return new CodeAssistServer(authClient, projectId, { headers: {} }); + // cli-core vendors its own google-auth-library copy, so TypeScript sees + // a package-identity mismatch even though the runtime object shape is + // compatible with the constructor's AuthClient contract. + return new CodeAssistServer( + authClient as unknown as ConstructorParameters[0], + projectId, + { headers: {} }, + ); } diff --git a/packages/pi-ai/src/types.ts b/packages/pi-ai/src/types.ts index 9ef901033..c0f258197 100644 --- a/packages/pi-ai/src/types.ts +++ b/packages/pi-ai/src/types.ts @@ -42,6 +42,7 @@ export type KnownProvider = | "opencode" | "opencode-go" | "kimi-coding" + | "xiaomi" | "alibaba-coding-plan" | "alibaba-dashscope" | "ollama" diff --git a/packages/pi-ai/src/web-runtime-env-api-keys.ts b/packages/pi-ai/src/web-runtime-env-api-keys.ts index d97c101cc..107485c15 100644 --- a/packages/pi-ai/src/web-runtime-env-api-keys.ts +++ b/packages/pi-ai/src/web-runtime-env-api-keys.ts @@ -78,6 +78,7 @@ export function getEnvApiKey(provider: string): string | undefined { opencode: "OPENCODE_API_KEY", "opencode-go": "OPENCODE_API_KEY", "kimi-coding": "KIMI_API_KEY", + xiaomi: "XIAOMI_API_KEY", "alibaba-coding-plan": "ALIBABA_API_KEY", }; diff --git a/src/headless.ts b/src/headless.ts index 808388d42..33d8bfd78 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -192,6 +192,8 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions { if (options.outputFormat === 'text') { options.outputFormat = 'stream-json' } + } else if (arg === '--no-supervised') { + options.supervised = false } else if (arg === '--response-timeout' && i + 1 < args.length) { options.responseTimeout = parseInt(args[++i], 10) if (Number.isNaN(options.responseTimeout) || options.responseTimeout <= 0) { @@ -264,6 +266,22 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): // discuss and plan are multi-turn: they involve multiple question rounds, // codebase scanning, and artifact writing before the workflow completes (#3547). const isMultiTurnCommand = options.command === 'auto' || options.command === 'next' || options.command === 'discuss' || options.command === 'plan' + + // Auto-mode defaults to supervised: wait for user input instead of exiting on questions + // This is the desired behavior - auto should wait, not exit on blocked + // Can be disabled via --no-supervised or preferences.auto_supervisor.supervised_mode: false + if (options.command === 'auto' && options.supervised === undefined) { + // Check preferences for default + try { + const { loadEffectiveSFPreferences } = await import('./resources/extensions/sf/preferences.js'); + const prefs = loadEffectiveSFPreferences(); + // Default to true unless explicitly set to false in preferences + options.supervised = prefs?.preferences?.auto_supervisor?.supervised_mode ?? true; + } catch { + options.supervised = true; + } + } + if (isAutoMode && options.timeout === 300_000) { options.timeout = 0 } diff --git a/src/onboarding.ts b/src/onboarding.ts index 5d4ca6e16..f5149f35c 100644 --- a/src/onboarding.ts +++ b/src/onboarding.ts @@ -75,6 +75,7 @@ const LLM_PROVIDER_IDS = [ 'xai', 'openrouter', 'mistral', + 'xiaomi', 'ollama', 'ollama-cloud', 'custom-openai', @@ -92,6 +93,7 @@ const OTHER_PROVIDERS = [ { value: 'xai', label: 'xAI (Grok)', hint: 'console.x.ai' }, { value: 'openrouter', label: 'OpenRouter', hint: '200+ models — openrouter.ai/keys' }, { value: 'mistral', label: 'Mistral', hint: 'console.mistral.ai/api-keys' }, + { value: 'xiaomi', label: 'Xiaomi MiMo', hint: 'token-plan-ams.xiaomimimo.com' }, { value: 'ollama-cloud', label: 'Ollama Cloud' }, { value: 'custom-openai', label: 'Custom (OpenAI-compatible)', hint: 'Ollama, LM Studio, vLLM, proxies — see docs/providers.md' }, ] @@ -658,7 +660,9 @@ async function runWebSearchStep( // Check if web search is already configured const hasBrave = !!process.env.BRAVE_API_KEY || authStorage.has('brave') const hasTavily = !!process.env.TAVILY_API_KEY || authStorage.has('tavily') - const existingSearch = hasBrave ? 'Brave Search' : hasTavily ? 'Tavily' : null + const hasSerper = !!process.env.SERPER_API_KEY || authStorage.has('serper') + const hasExa = !!process.env.EXA_API_KEY || authStorage.has('exa') + const existingSearch = hasBrave ? 'Brave Search' : hasTavily ? 'Tavily' : hasSerper ? 'Serper' : hasExa ? 'Exa' : null // Build options based on what's available type SearchOption = { value: string; label: string; hint?: string } @@ -679,6 +683,8 @@ async function runWebSearchStep( options.push( { value: 'brave', label: 'Brave Search', hint: 'requires API key — brave.com/search/api' }, { value: 'tavily', label: 'Tavily', hint: 'requires API key — tavily.com' }, + { value: 'serper', label: 'Serper', hint: 'requires API key — serper.dev' }, + { value: 'exa', label: 'Exa Search', hint: 'requires API key — dashboard.exa.ai' }, { value: 'skip', label: 'Skip for now', hint: 'use /search-provider inside SF later' }, ) @@ -721,6 +727,32 @@ async function runWebSearchStep( return 'Tavily' } + if (choice === 'serper') { + const key = await p.password({ + message: `Paste your Serper API key ${pc.dim('(serper.dev)')}:`, + mask: '●', + }) + if (p.isCancel(key) || !(key as string)?.trim()) return null + const trimmed = (key as string).trim() + authStorage.set('serper', { type: 'api_key', key: trimmed }) + process.env.SERPER_API_KEY = trimmed + p.log.success(`Web search: ${pc.green('Serper')} configured`) + return 'Serper' + } + + if (choice === 'exa') { + const key = await p.password({ + message: `Paste your Exa API key ${pc.dim('(dashboard.exa.ai)')}:`, + mask: '●', + }) + if (p.isCancel(key) || !(key as string)?.trim()) return null + const trimmed = (key as string).trim() + authStorage.set('exa', { type: 'api_key', key: trimmed }) + process.env.EXA_API_KEY = trimmed + p.log.success(`Web search: ${pc.green('Exa')} configured`) + return 'Exa' + } + return null } @@ -1041,4 +1073,3 @@ async function runDiscordChannelStep(p: ClackModule, pc: PicoModule, token: stri p.log.success(`Discord channel: ${pc.green(channelName ? `#${channelName}` : channelId)}`) return channelName ?? null } - diff --git a/src/remote-questions-config.ts b/src/remote-questions-config.ts index 7a66543a4..8d9a69347 100644 --- a/src/remote-questions-config.ts +++ b/src/remote-questions-config.ts @@ -15,8 +15,9 @@ import { appRoot } from "./app-paths.js"; // Inlined from preferences.ts to avoid crossing the compiled/uncompiled // boundary — this file is compiled by tsc, but preferences.ts is loaded // via jiti at runtime. Importing it as .js fails because no .js exists -// in dist/. See #592, #1110. -const GLOBAL_PREFERENCES_PATH = join(appRoot, "PREFERENCES.md"); +// in dist/. See #592, #1110. Global preferences are canonical lowercase; +// uppercase remains a project-file legacy fallback in preferences.ts. +const GLOBAL_PREFERENCES_PATH = join(appRoot, "preferences.md"); export function saveRemoteQuestionsConfig(channel: "slack" | "discord" | "telegram", channelId: string): void { const prefsPath = GLOBAL_PREFERENCES_PATH; diff --git a/src/resources/extensions/ask-user-questions.ts b/src/resources/extensions/ask-user-questions.ts index 77922f498..1378d91ae 100644 --- a/src/resources/extensions/ask-user-questions.ts +++ b/src/resources/extensions/ask-user-questions.ts @@ -36,6 +36,8 @@ interface RemoteResultDetails { promptId?: string; threadUrl?: string; status?: string; + autoResolved?: boolean; + autoResolveStrategy?: string; questions?: Question[]; response?: import("./remote-questions/types.js").RemoteAnswer; error?: boolean; @@ -114,6 +116,13 @@ interface RaceableResult { details?: unknown; } +/** @internal Exported for tests. */ +export function isUsableRemoteQuestionResult(details: Record | undefined): boolean { + if (details?.error || details?.cancelled) return false; + if (details?.timed_out && details.autoResolved !== true) return false; + return true; +} + /** * Race a remote channel dispatch against the local TUI. The first to produce * a valid (non-error, non-timeout) result wins. The loser is cancelled via @@ -143,8 +152,9 @@ async function raceRemoteAndLocal( const remotePromise = startRemote().then((result): RaceableResult | null => { if (!result) return null; const details = result.details as Record | undefined; - // Treat timeouts and errors as non-wins — let the local TUI win instead - if (details?.timed_out || details?.error) return null; + // Plain timeouts/errors are non-wins, but timeout auto-resolution is a + // real answer and must win in headless/supervised flows. + if (!isUsableRemoteQuestionResult(details)) return null; return result; }).catch(() => null); @@ -263,7 +273,7 @@ export default function AskUserQuestions(pi: ExtensionAPI) { if (raceResult) { const details = raceResult.details as Record | undefined; - if (details && !details.timed_out && !details.error && !details.cancelled) { + if (details && isUsableRemoteQuestionResult(details)) { turnCache.set(sig, raceResult as unknown as CachedResult); } return { ...raceResult, details: raceResult.details as unknown }; @@ -277,7 +287,7 @@ export default function AskUserQuestions(pi: ExtensionAPI) { const remoteResult = await tryRemoteQuestions(params.questions, signal); if (remoteResult) { const remoteDetails = remoteResult.details as Record | undefined; - if (remoteDetails && !remoteDetails.timed_out && !remoteDetails.error) { + if (remoteDetails && isUsableRemoteQuestionResult(remoteDetails)) { turnCache.set(sig, remoteResult as unknown as CachedResult); } return { ...remoteResult, details: remoteResult.details as unknown }; @@ -402,7 +412,7 @@ export default function AskUserQuestions(pi: ExtensionAPI) { // Remote channel result (discriminated on details.remote === true) if (details.remote) { - if (details.timed_out) { + if (details.timed_out && !details.autoResolved) { return new Text( `${theme.fg("warning", `${details.channel} — timed out`)}${details.threadUrl ? theme.fg("dim", ` ${details.threadUrl}`) : ""}`, 0, @@ -412,7 +422,7 @@ export default function AskUserQuestions(pi: ExtensionAPI) { const questions = (details.questions ?? []) as Question[]; const lines: string[] = []; - lines.push(theme.fg("dim", details.channel)); + lines.push(theme.fg("dim", details.autoResolved ? `${details.channel} — auto-resolved on timeout` : details.channel)); if (details.response) { for (const q of questions) { const answer = details.response.answers[q.id]; diff --git a/src/resources/extensions/aws-auth/index.ts b/src/resources/extensions/aws-auth/index.ts index 5c209a1b9..3097ebb7c 100644 --- a/src/resources/extensions/aws-auth/index.ts +++ b/src/resources/extensions/aws-auth/index.ts @@ -68,7 +68,7 @@ function getAwsAuthRefreshCommand(): string | undefined { try { const settings = JSON.parse(readFileSync(settingsPath, "utf-8")); if (settings.awsAuthRefresh) return settings.awsAuthRefresh; - } catch {} + } catch {} // file missing or corrupt → skip, try next location } return undefined; } diff --git a/src/resources/extensions/bg-shell/bg-shell-lifecycle.ts b/src/resources/extensions/bg-shell/bg-shell-lifecycle.ts index e4ea2b8a1..f1fcc3f82 100644 --- a/src/resources/extensions/bg-shell/bg-shell-lifecycle.ts +++ b/src/resources/extensions/bg-shell/bg-shell-lifecycle.ts @@ -46,9 +46,9 @@ export function registerBgShellLifecycle(pi: ExtensionAPI, state: BgShellSharedS const { listDescendants } = require("@singularity-forge/native") as typeof import("@singularity-forge/native"); const descendants = listDescendants(process.pid); for (const childPid of descendants) { - try { process.kill(childPid, "SIGKILL"); } catch {} + try { process.kill(childPid, "SIGKILL"); } catch {} // child already dead → harmless } - } catch {} + } catch {} // native not available → can't track descendants, continue }; process.on("SIGTERM", signalCleanup); process.on("SIGINT", signalCleanup); diff --git a/src/resources/extensions/remote-questions/config.ts b/src/resources/extensions/remote-questions/config.ts index 282f71068..33416555b 100644 --- a/src/resources/extensions/remote-questions/config.ts +++ b/src/resources/extensions/remote-questions/config.ts @@ -12,6 +12,8 @@ export interface ResolvedConfig { timeoutMs: number; pollIntervalMs: number; token: string; + autoResolveOnTimeout: boolean; + autoResolveStrategy: "recommended-option"; } const ENV_KEYS: Record = { @@ -96,6 +98,8 @@ export function resolveRemoteConfig(): ResolvedConfig | null { timeoutMs: timeoutMinutes * 60 * 1000, pollIntervalMs: pollIntervalSeconds * 1000, token, + autoResolveOnTimeout: rq.auto_resolve_on_timeout === true, + autoResolveStrategy: rq.auto_resolve_strategy ?? "recommended-option", }; } diff --git a/src/resources/extensions/remote-questions/manager.ts b/src/resources/extensions/remote-questions/manager.ts index 338e354f1..63ec413d7 100644 --- a/src/resources/extensions/remote-questions/manager.ts +++ b/src/resources/extensions/remote-questions/manager.ts @@ -24,6 +24,23 @@ interface QuestionInput { allowMultiple?: boolean; } +export function tryAutoResolveQuestions( + questions: QuestionInput[], + strategy: "recommended-option" = "recommended-option", +): RemoteAnswer | null { + if (strategy !== "recommended-option") return null; + const answers: RemoteAnswer["answers"] = {}; + + for (const question of questions) { + if (question.allowMultiple) return null; + const recommended = question.options.filter((option) => option.label.includes("(Recommended)")); + if (recommended.length !== 1) return null; + answers[question.id] = { answers: [recommended[0].label] }; + } + + return { answers }; +} + /** * Check whether a remote channel is configured without triggering any * side effects (no HTTP requests, no prompt records). Used by the race @@ -62,6 +79,28 @@ export async function tryRemoteQuestions( const answer = await pollUntilDone(adapter, prompt, dispatch.ref, signal); if (!answer) { + const autoResolved = !signal?.aborted && config.autoResolveOnTimeout + ? tryAutoResolveQuestions(questions, config.autoResolveStrategy) + : null; + if (autoResolved) { + markPromptAnswered(prompt.id, autoResolved); + return { + content: [{ type: "text", text: JSON.stringify({ answers: formatForTool(autoResolved) }) }], + details: { + remote: true, + channel: config.channel, + timed_out: true, + promptId: prompt.id, + threadUrl: dispatch.ref.threadUrl ?? null, + status: "auto-resolved", + autoResolved: true, + autoResolveStrategy: config.autoResolveStrategy, + questions, + response: autoResolved, + }, + }; + } + markPromptStatus(prompt.id, signal?.aborted ? "cancelled" : "timed_out"); return { content: [{ diff --git a/src/resources/extensions/search-the-web/command-search-provider.ts b/src/resources/extensions/search-the-web/command-search-provider.ts index 0db6c2579..191dd7268 100644 --- a/src/resources/extensions/search-the-web/command-search-provider.ts +++ b/src/resources/extensions/search-the-web/command-search-provider.ts @@ -1,7 +1,7 @@ /** * /search-provider slash command. * - * Lets users switch between tavily, brave, ollama, combosearch, and auto search backends. + * Lets users switch between tavily, brave, serper, exa, ollama, combosearch, and auto search backends. * Supports direct arg (`/search-provider tavily`) or interactive select UI. * Tab completion provides the three valid options with key status. * @@ -13,6 +13,8 @@ import type { AutocompleteItem } from '@singularity-forge/pi-tui' import { getTavilyApiKey, getBraveApiKey, + getSerperApiKey, + getExaApiKey, getOllamaApiKey, getSearchProviderPreference, setSearchProviderPreference, @@ -20,10 +22,12 @@ import { type SearchProviderPreference, } from './provider.js' -const VALID_PREFERENCES: SearchProviderPreference[] = ['tavily', 'brave', 'ollama', 'combosearch', 'auto'] +const VALID_PREFERENCES: SearchProviderPreference[] = ['tavily', 'brave', 'serper', 'exa', 'ollama', 'combosearch', 'auto'] -function keyStatus(provider: 'tavily' | 'brave' | 'ollama'): string { +function keyStatus(provider: 'tavily' | 'brave' | 'serper' | 'exa' | 'ollama'): string { if (provider === 'tavily') return getTavilyApiKey() ? '✓' : '✗' + if (provider === 'serper') return getSerperApiKey() ? '✓' : '✗' + if (provider === 'exa') return getExaApiKey() ? '✓' : '✗' if (provider === 'ollama') return getOllamaApiKey() ? '✓' : '✗' return getBraveApiKey() ? '✓' : '✗' } @@ -32,6 +36,8 @@ function comboStatus(): string { const available = [ getTavilyApiKey() ? 'tavily' : null, getBraveApiKey() ? 'brave' : null, + getSerperApiKey() ? 'serper' : null, + getExaApiKey() ? 'exa' : null, getOllamaApiKey() ? 'ollama' : null, ].filter(Boolean) as string[] return available.length > 0 ? `${available.length} source${available.length === 1 ? '' : 's'}` : '✗' @@ -41,6 +47,8 @@ function buildSelectOptions(): string[] { return [ `tavily (key: ${keyStatus('tavily')})`, `brave (key: ${keyStatus('brave')})`, + `serper (key: ${keyStatus('serper')})`, + `exa (key: ${keyStatus('exa')})`, `ollama (key: ${keyStatus('ollama')})`, `combosearch (${comboStatus()})`, `auto`, @@ -50,6 +58,8 @@ function buildSelectOptions(): string[] { function parseSelectChoice(choice: string): SearchProviderPreference { if (choice.startsWith('tavily')) return 'tavily' if (choice.startsWith('brave')) return 'brave' + if (choice.startsWith('serper')) return 'serper' + if (choice.startsWith('exa')) return 'exa' if (choice.startsWith('ollama')) return 'ollama' if (choice.startsWith('combosearch')) return 'combosearch' return 'auto' @@ -57,7 +67,7 @@ function parseSelectChoice(choice: string): SearchProviderPreference { export function registerSearchProviderCommand(pi: ExtensionAPI): void { pi.registerCommand('search-provider', { - description: 'Switch search provider (tavily, brave, ollama, combosearch, auto)', + description: 'Switch search provider (tavily, brave, serper, exa, ollama, combosearch, auto)', getArgumentCompletions(prefix: string): AutocompleteItem[] | null { const trimmed = prefix.trim().toLowerCase() @@ -66,7 +76,7 @@ export function registerSearchProviderCommand(pi: ExtensionAPI): void { .map((p) => { let description: string if (p === 'auto') { - description = `Auto-select (tavily: ${keyStatus('tavily')}, brave: ${keyStatus('brave')}, ollama: ${keyStatus('ollama')})` + description = `Auto-select (tavily: ${keyStatus('tavily')}, brave: ${keyStatus('brave')}, serper: ${keyStatus('serper')}, exa: ${keyStatus('exa')}, ollama: ${keyStatus('ollama')})` } else if (p === 'combosearch') { description = `fan-out aggregator (${comboStatus()})` } else { diff --git a/src/resources/extensions/search-the-web/native-search.ts b/src/resources/extensions/search-the-web/native-search.ts index 65a772bf1..d830c0cb5 100644 --- a/src/resources/extensions/search-the-web/native-search.ts +++ b/src/resources/extensions/search-the-web/native-search.ts @@ -30,7 +30,7 @@ export const MAX_NATIVE_SEARCHES_PER_SESSION = 15; export function preferBraveSearch(): boolean { // PREFERENCES.md takes priority over env var const prefsPref = resolveSearchProviderFromPreferences(); - if (prefsPref === "brave" || prefsPref === "tavily" || prefsPref === "ollama" || prefsPref === "combosearch") return true; + if (prefsPref === "brave" || prefsPref === "tavily" || prefsPref === "serper" || prefsPref === "exa" || prefsPref === "ollama" || prefsPref === "combosearch") return true; if (prefsPref === "native") return false; // Fall back to env var return process.env.PREFER_BRAVE_SEARCH === "1" || process.env.PREFER_BRAVE_SEARCH === "true"; @@ -96,7 +96,7 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: const wasAnthropic = isAnthropicProvider; isAnthropicProvider = event.model.provider === "anthropic"; - const hasSearchKey = !!(process.env.BRAVE_API_KEY || process.env.TAVILY_API_KEY || process.env.OLLAMA_API_KEY); + const hasSearchKey = !!(process.env.BRAVE_API_KEY || process.env.TAVILY_API_KEY || process.env.SERPER_API_KEY || process.env.EXA_API_KEY || process.env.OLLAMA_API_KEY); // When Anthropic (and not preferring Brave): disable custom search tools — // native web_search is server-side and more reliable. @@ -123,7 +123,7 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: ctx.ui.notify("Brave search active (PREFER_BRAVE_SEARCH)", "info"); } else if (!isAnthropicProvider && !hasSearchKey) { ctx.ui.notify( - "Web search: Set BRAVE_API_KEY, TAVILY_API_KEY, or OLLAMA_API_KEY, or use an Anthropic model for built-in search", + "Web search: Set BRAVE_API_KEY, TAVILY_API_KEY, SERPER_API_KEY, EXA_API_KEY, or OLLAMA_API_KEY, or use an Anthropic model for built-in search", "warning" ); } diff --git a/src/resources/extensions/search-the-web/provider.ts b/src/resources/extensions/search-the-web/provider.ts index 42fd6d279..51189bd39 100644 --- a/src/resources/extensions/search-the-web/provider.ts +++ b/src/resources/extensions/search-the-web/provider.ts @@ -1,11 +1,11 @@ /** * Search provider selection and preference management. * - * Single source of truth for which search backend (Tavily vs Brave) to use. + * Single source of truth for which search backend to use. * Reads API keys from process.env at call time (not module load time) so * hot-reloaded keys work. Preference is stored in auth.json under the * synthetic provider key `search_provider` as - * { type: "api_key", key: "tavily" | "brave" | "ollama" | "combosearch" | "auto" }. + * { type: "api_key", key: "tavily" | "brave" | "serper" | "exa" | "ollama" | "combosearch" | "auto" }. * * @see S01-RESEARCH.md for the storage decision rationale (D002). */ @@ -21,10 +21,10 @@ import { resolveSearchProviderFromPreferences } from '../sf/preferences.js' const sfHome = process.env.SF_HOME || join(homedir(), '.sf') const authFilePath = join(sfHome, 'agent', 'auth.json') -export type SearchProvider = 'tavily' | 'brave' | 'ollama' | 'combosearch' +export type SearchProvider = 'tavily' | 'brave' | 'serper' | 'exa' | 'ollama' | 'combosearch' export type SearchProviderPreference = SearchProvider | 'auto' -const VALID_PREFERENCES = new Set(['tavily', 'brave', 'ollama', 'combosearch', 'auto']) +const VALID_PREFERENCES = new Set(['tavily', 'brave', 'serper', 'exa', 'ollama', 'combosearch', 'auto']) const PREFERENCE_KEY = 'search_provider' /** Returns the Tavily API key from the environment, or empty string if not set. */ @@ -51,6 +51,16 @@ export function getOllamaApiKey(): string { return process.env.OLLAMA_API_KEY || '' } +/** Returns the Serper API key from the environment, or empty string if not set. */ +export function getSerperApiKey(): string { + return process.env.SERPER_API_KEY || '' +} + +/** Returns the Exa API key from the environment, or empty string if not set. */ +export function getExaApiKey(): string { + return process.env.EXA_API_KEY || '' +} + /** * Read the user's search provider preference from auth.json. * Returns 'auto' if no preference is stored or the stored value is invalid. @@ -95,12 +105,16 @@ export function setSearchProviderPreference(pref: SearchProviderPreference, auth export function resolveSearchProvider(overridePreference?: string): SearchProvider | null { const tavilyKey = getTavilyApiKey() const braveKey = getBraveApiKey() + const serperKey = getSerperApiKey() + const exaKey = getExaApiKey() const ollamaKey = getOllamaApiKey() const hasTavily = tavilyKey.length > 0 const hasBrave = braveKey.length > 0 + const hasSerper = serperKey.length > 0 + const hasExa = exaKey.length > 0 const hasOllama = ollamaKey.length > 0 - const hasAny = hasTavily || hasBrave || hasOllama + const hasAny = hasTavily || hasBrave || hasSerper || hasExa || hasOllama // Determine effective preference let pref: SearchProviderPreference @@ -122,6 +136,8 @@ export function resolveSearchProvider(overridePreference?: string): SearchProvid if (pref === 'auto') { if (hasTavily) return 'tavily' if (hasBrave) return 'brave' + if (hasSerper) return 'serper' + if (hasExa) return 'exa' if (hasOllama) return 'ollama' return null } @@ -133,6 +149,8 @@ export function resolveSearchProvider(overridePreference?: string): SearchProvid if (pref === 'tavily') { if (hasTavily) return 'tavily' if (hasBrave) return 'brave' + if (hasSerper) return 'serper' + if (hasExa) return 'exa' if (hasOllama) return 'ollama' return null } @@ -140,6 +158,26 @@ export function resolveSearchProvider(overridePreference?: string): SearchProvid if (pref === 'brave') { if (hasBrave) return 'brave' if (hasTavily) return 'tavily' + if (hasSerper) return 'serper' + if (hasExa) return 'exa' + if (hasOllama) return 'ollama' + return null + } + + if (pref === 'serper') { + if (hasSerper) return 'serper' + if (hasTavily) return 'tavily' + if (hasBrave) return 'brave' + if (hasExa) return 'exa' + if (hasOllama) return 'ollama' + return null + } + + if (pref === 'exa') { + if (hasExa) return 'exa' + if (hasSerper) return 'serper' + if (hasTavily) return 'tavily' + if (hasBrave) return 'brave' if (hasOllama) return 'ollama' return null } @@ -148,6 +186,8 @@ export function resolveSearchProvider(overridePreference?: string): SearchProvid if (hasOllama) return 'ollama' if (hasTavily) return 'tavily' if (hasBrave) return 'brave' + if (hasSerper) return 'serper' + if (hasExa) return 'exa' return null } diff --git a/src/resources/extensions/search-the-web/tool-llm-context.ts b/src/resources/extensions/search-the-web/tool-llm-context.ts index 073760375..f7da9bfbd 100644 --- a/src/resources/extensions/search-the-web/tool-llm-context.ts +++ b/src/resources/extensions/search-the-web/tool-llm-context.ts @@ -5,9 +5,11 @@ * Unlike search-the-web → fetch_page (two steps), this returns pre-extracted, * relevance-scored page content in one API call. * - * Supports two backends: + * Supports multiple backends: * - Tavily: POST-based, client-side token budgeting via budgetContent() * - Brave: GET-based LLM Context API with server-side budgeting + * - Serper: search API + Jina Reader extraction + * - Exa: search API with built-in extracted contents * * Provider is selected by resolveSearchProvider() — same as tool-search.ts. * @@ -27,7 +29,7 @@ import { normalizeQuery, extractDomain } from "./url-utils.js"; import { formatLLMContext, type LLMContextSnippet, type LLMContextSource } from "./format.js"; import type { TavilyResult, TavilySearchResponse } from "./tavily.js"; import { publishedDateToAge } from "./tavily.js"; -import { getTavilyApiKey, getOllamaApiKey, getBraveApiKey, braveHeaders, resolveSearchProvider } from "./provider.js"; +import { getTavilyApiKey, getOllamaApiKey, getBraveApiKey, getSerperApiKey, getExaApiKey, braveHeaders, resolveSearchProvider } from "./provider.js"; // ============================================================================= // Types @@ -79,7 +81,29 @@ interface LLMContextDetails { errorKind?: string; error?: string; retryAfterMs?: number; - provider?: 'tavily' | 'brave' | 'ollama' | 'combosearch'; + provider?: 'tavily' | 'brave' | 'serper' | 'exa' | 'ollama' | 'combosearch'; +} + +interface SerperOrganicResult { + title?: string; + link?: string; + snippet?: string; + date?: string; +} + +interface SerperSearchResponse { + organic?: SerperOrganicResult[]; +} + +interface ExaLLMContextResult { + title?: string; + url?: string; + text?: string; + publishedDate?: string; +} + +interface ExaLLMContextResponse { + results?: ExaLLMContextResult[]; } // ============================================================================= @@ -337,10 +361,149 @@ async function executeBraveLLMContext( return { cached: { grounding, sources, estimatedTokens }, latencyMs: timed.latencyMs, rateLimit: timed.rateLimit }; } -function availableComboProviders(): Array<'tavily' | 'brave' | 'ollama'> { - const providers: Array<'tavily' | 'brave' | 'ollama'> = []; +async function fetchSerperPageMarkdown(url: string, signal?: AbortSignal): Promise { + const response = await fetch(`https://r.jina.ai/${url}`, { + method: "GET", + headers: { + "Accept": "text/plain", + "X-Return-Format": "markdown", + "X-No-Cache": "false", + }, + signal, + }); + if (!response.ok) { + throw new Error(`Jina fetch failed for ${url}: ${response.status}`); + } + let text = await response.text(); + text = text.replace(/^Title:\s*.+\n/, ""); + text = text.replace(/^URL Source:\s*.+\n\n?/, ""); + text = text.replace(/!\[([^\]]*)\]\([^)]+\)/g, ""); + text = text.replace(/\n{4,}/g, "\n\n\n"); + return text.trim(); +} + +async function executeSerperLLMContext( + params: { query: string; maxTokens: number; maxUrls: number; threshold: string; count: number }, + signal?: AbortSignal, +): Promise<{ cached: CachedLLMContext; latencyMs: number; rateLimit?: RateLimitInfo }> { + const timed = await fetchWithRetryTimed("https://google.serper.dev/search", { + method: "POST", + headers: { + "Content-Type": "application/json", + "X-API-KEY": getSerperApiKey(), + }, + body: JSON.stringify({ q: params.query, num: Math.max(1, Math.min(10, params.count)) }), + signal, + }, 2); + + const data: SerperSearchResponse = await timed.response.json(); + const candidates = (data.organic || []) + .filter((r) => typeof r.link === "string" && r.link.length > 0) + .slice(0, params.maxUrls); + + const pageResults = await Promise.allSettled( + candidates.map(async (r) => ({ + url: r.link!, + title: r.title || "(untitled)", + age: r.date || null, + content: await fetchSerperPageMarkdown(r.link!, signal), + })), + ); + + const sources: Record = {}; + const grounding: LLMContextSnippet[] = []; + let totalTokens = 0; + const effectiveBudget = Math.max(1, Math.floor(params.maxTokens * 0.8)); + + for (const page of pageResults) { + if (page.status !== "fulfilled") continue; + if (totalTokens >= effectiveBudget) break; + const remainingTokens = effectiveBudget - totalTokens; + const maxChars = remainingTokens * 4; + let text = page.value.content; + if (text.length > maxChars) { + text = text.slice(0, maxChars); + } + const tokens = estimateTokens(text); + if (tokens <= 0) continue; + totalTokens += tokens; + grounding.push({ + url: page.value.url, + title: page.value.title, + snippets: [text], + }); + sources[page.value.url] = { + title: page.value.title, + hostname: extractDomain(page.value.url), + age: page.value.age ? [null as unknown as string, null as unknown as string, page.value.age] : null, + }; + } + + return { cached: { grounding, sources, estimatedTokens: totalTokens }, latencyMs: timed.latencyMs, rateLimit: timed.rateLimit }; +} + +async function executeExaLLMContext( + params: { query: string; maxTokens: number; maxUrls: number; threshold: string; count: number }, + signal?: AbortSignal, +): Promise<{ cached: CachedLLMContext; latencyMs: number; rateLimit?: RateLimitInfo }> { + const timed = await fetchWithRetryTimed("https://api.exa.ai/search", { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": getExaApiKey(), + }, + body: JSON.stringify({ + query: params.query, + numResults: Math.max(1, Math.min(10, params.count)), + contents: { + text: true, + }, + }), + signal, + }, 2); + + const data: ExaLLMContextResponse = await timed.response.json(); + const sources: Record = {}; + const grounding: LLMContextSnippet[] = []; + let totalTokens = 0; + const effectiveBudget = Math.max(1, Math.floor(params.maxTokens * 0.8)); + + for (const result of (data.results || []).slice(0, params.maxUrls)) { + if (!result.url || !result.text) continue; + if (totalTokens >= effectiveBudget) break; + + const remainingTokens = effectiveBudget - totalTokens; + const maxChars = remainingTokens * 4; + let text = result.text; + if (text.length > maxChars) { + text = text.slice(0, maxChars); + } + + const tokens = estimateTokens(text); + if (tokens <= 0) continue; + totalTokens += tokens; + grounding.push({ + url: result.url, + title: result.title || "(untitled)", + snippets: [text], + }); + const ageString = result.publishedDate ? publishedDateToAge(result.publishedDate) : undefined; + sources[result.url] = { + title: result.title || "(untitled)", + hostname: extractDomain(result.url), + age: ageString ? [null as unknown as string, null as unknown as string, ageString] : null, + }; + } + + return { cached: { grounding, sources, estimatedTokens: totalTokens }, latencyMs: timed.latencyMs, rateLimit: timed.rateLimit }; +} + +function availableComboProviders(): Array<'tavily' | 'brave' | 'serper' | 'exa' | 'ollama'> { + const providers: Array<'tavily' | 'brave' | 'serper' | 'exa' | 'ollama'> = []; if (getTavilyApiKey()) providers.push('tavily'); if (getBraveApiKey()) providers.push('brave'); + if (getSerperApiKey()) providers.push('serper'); + if (getExaApiKey()) providers.push('exa'); if (getOllamaApiKey()) providers.push('ollama'); return providers; } @@ -391,6 +554,12 @@ async function executeComboLLMContext( signal, ); } + if (provider === 'serper') { + return executeSerperLLMContext(params, signal); + } + if (provider === 'exa') { + return executeExaLLMContext(params, signal); + } return executeBraveLLMContext(params, signal); }); @@ -489,7 +658,7 @@ export function registerLLMContextTool(pi: ExtensionAPI) { const provider = resolveSearchProvider(); if (!provider) { return { - content: [{ type: "text", text: "search_and_read unavailable: No search API key is set. Use secure_env_collect to set TAVILY_API_KEY, BRAVE_API_KEY, or OLLAMA_API_KEY." }], + content: [{ type: "text", text: "search_and_read unavailable: No search API key is set. Use secure_env_collect to set TAVILY_API_KEY, BRAVE_API_KEY, SERPER_API_KEY, EXA_API_KEY, or OLLAMA_API_KEY." }], isError: true, details: { errorKind: "auth_error", error: "No search API key set" } satisfies Partial, }; @@ -568,6 +737,22 @@ export function registerLLMContextTool(pi: ExtensionAPI) { result = ollamaResult.cached; latencyMs = ollamaResult.latencyMs; rateLimit = ollamaResult.rateLimit; + } else if (provider === "serper") { + const serperResult = await executeSerperLLMContext( + { query: params.query, maxTokens, maxUrls, threshold, count }, + signal, + ); + result = serperResult.cached; + latencyMs = serperResult.latencyMs; + rateLimit = serperResult.rateLimit; + } else if (provider === "exa") { + const exaResult = await executeExaLLMContext( + { query: params.query, maxTokens, maxUrls, threshold, count }, + signal, + ); + result = exaResult.cached; + latencyMs = exaResult.latencyMs; + rateLimit = exaResult.rateLimit; } else { let braveResult; try { diff --git a/src/resources/extensions/search-the-web/tool-search.ts b/src/resources/extensions/search-the-web/tool-search.ts index d33bdca0c..6502eae65 100644 --- a/src/resources/extensions/search-the-web/tool-search.ts +++ b/src/resources/extensions/search-the-web/tool-search.ts @@ -20,7 +20,7 @@ import { LRUTTLCache } from "./cache.js"; import { fetchWithRetryTimed, fetchWithRetry, classifyError, type RateLimitInfo } from "./http.js"; import { normalizeQuery, toDedupeKey, detectFreshness } from "./url-utils.js"; import { formatSearchResults, type SearchResultFormatted, type FormatSearchOptions } from "./format.js"; -import { getTavilyApiKey, getOllamaApiKey, getBraveApiKey, braveHeaders, resolveSearchProvider } from "./provider.js"; +import { getTavilyApiKey, getOllamaApiKey, getBraveApiKey, getSerperApiKey, getExaApiKey, braveHeaders, resolveSearchProvider } from "./provider.js"; import { normalizeTavilyResult, mapFreshnessToTavily, type TavilySearchResponse } from "./tavily.js"; // ============================================================================= @@ -93,7 +93,7 @@ interface SearchDetails { errorKind?: string; error?: string; retryAfterMs?: number; - provider?: 'tavily' | 'brave' | 'ollama' | 'combosearch'; + provider?: 'tavily' | 'brave' | 'serper' | 'exa' | 'ollama' | 'combosearch'; } // ============================================================================= @@ -156,6 +156,19 @@ function deduplicateResults(results: SearchResultFormatted[]): SearchResultForma return Array.from(seen.values()); } +function freshnessToExaStartPublishedDate(freshness: string | null): string | undefined { + if (!freshness) return undefined; + const now = Date.now(); + const offsets: Record = { + pd: 24 * 60 * 60 * 1000, + pw: 7 * 24 * 60 * 60 * 1000, + pm: 30 * 24 * 60 * 60 * 1000, + py: 365 * 24 * 60 * 60 * 1000, + }; + const offset = offsets[freshness]; + return offset ? new Date(now - offset).toISOString() : undefined; +} + /** * Fetch AI summary from Brave Summarizer API (best-effort, free). */ @@ -265,6 +278,29 @@ interface OllamaWebSearchResponse { results: OllamaWebSearchResult[]; } +interface SerperOrganicResult { + title?: string; + link?: string; + snippet?: string; + date?: string; +} + +interface SerperSearchResponse { + organic?: SerperOrganicResult[]; +} + +interface ExaSearchResult { + title?: string; + url?: string; + text?: string; + summary?: string; + publishedDate?: string; +} + +interface ExaSearchResponse { + results?: ExaSearchResult[]; +} + /** * Execute a search against the Ollama web_search API. * Returns a CachedSearchResult with normalized, deduplicated results. @@ -302,6 +338,106 @@ async function executeOllamaSearch( }; } +async function executeSerperSearch( + params: { query: string; domain?: string; count: number }, + signal?: AbortSignal +): Promise<{ results: CachedSearchResult; latencyMs: number; rateLimit?: RateLimitInfo }> { + const query = params.domain ? `site:${params.domain} ${params.query}` : params.query; + const timed = await fetchWithRetryTimed("https://google.serper.dev/search", { + method: "POST", + headers: { + "Content-Type": "application/json", + "X-API-KEY": getSerperApiKey(), + }, + body: JSON.stringify({ q: query, num: Math.max(1, Math.min(10, params.count)) }), + signal, + }, 2); + + const data: SerperSearchResponse = await timed.response.json(); + const normalized: SearchResultFormatted[] = (data.organic || []) + .map((r) => ({ + title: r.title || "(untitled)", + url: r.link || "", + description: r.snippet || "", + age: r.date || undefined, + })) + .filter((r) => r.url.length > 0); + const deduplicated = deduplicateResults(normalized); + + return { + results: { + results: deduplicated, + queryCorrected: false, + moreResultsAvailable: false, + }, + latencyMs: timed.latencyMs, + rateLimit: timed.rateLimit, + }; +} + +async function executeExaSearch( + params: { query: string; freshness: string | null; domain?: string; wantSummary: boolean; count: number }, + signal?: AbortSignal +): Promise<{ results: CachedSearchResult; latencyMs: number; rateLimit?: RateLimitInfo }> { + const requestBody: Record = { + query: params.query, + numResults: Math.max(1, Math.min(10, params.count)), + }; + if (params.domain) { + requestBody.includeDomains = [params.domain]; + } + const startPublishedDate = freshnessToExaStartPublishedDate(params.freshness); + if (startPublishedDate) { + requestBody.startPublishedDate = startPublishedDate; + } + if (params.wantSummary) { + requestBody.contents = { + summary: { + query: params.query, + }, + }; + } + + const timed = await fetchWithRetryTimed("https://api.exa.ai/search", { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": getExaApiKey(), + }, + body: JSON.stringify(requestBody), + signal, + }, 2); + + const data: ExaSearchResponse = await timed.response.json(); + const normalized: SearchResultFormatted[] = (data.results || []) + .map((r) => ({ + title: r.title || "(untitled)", + url: r.url || "", + description: r.summary || r.text || "", + age: r.publishedDate || undefined, + })) + .filter((r) => r.url.length > 0); + const deduplicated = deduplicateResults(normalized); + const summaryText = params.wantSummary + ? (data.results || []) + .map((r) => r.summary) + .filter((value): value is string => typeof value === "string" && value.trim().length > 0) + .slice(0, 3) + .join("\n\n") || undefined + : undefined; + + return { + results: { + results: deduplicated, + summaryText, + queryCorrected: false, + moreResultsAvailable: false, + }, + latencyMs: timed.latencyMs, + rateLimit: timed.rateLimit, + }; +} + async function executeBraveSearch( params: { query: string; effectiveQuery: string; freshness: string | null; wantSummary: boolean }, signal?: AbortSignal, @@ -352,10 +488,12 @@ async function executeBraveSearch( }; } -function availableComboProviders(): Array<'tavily' | 'brave' | 'ollama'> { - const providers: Array<'tavily' | 'brave' | 'ollama'> = []; +function availableComboProviders(): Array<'tavily' | 'brave' | 'serper' | 'exa' | 'ollama'> { + const providers: Array<'tavily' | 'brave' | 'serper' | 'exa' | 'ollama'> = []; if (getTavilyApiKey()) providers.push('tavily'); if (getBraveApiKey()) providers.push('brave'); + if (getSerperApiKey()) providers.push('serper'); + if (getExaApiKey()) providers.push('exa'); if (getOllamaApiKey()) providers.push('ollama'); return providers; } @@ -375,6 +513,12 @@ async function executeComboSearch( if (provider === 'ollama') { return executeOllamaSearch({ query: params.query, count: Math.max(10, params.count) }, signal); } + if (provider === 'serper') { + return executeSerperSearch({ query: params.query, domain: params.domain, count: Math.max(10, params.count) }, signal); + } + if (provider === 'exa') { + return executeExaSearch({ query: params.query, freshness: params.freshness, domain: params.domain, wantSummary: params.wantSummary, count: Math.max(10, params.count) }, signal); + } let effectiveQuery = params.query; if (params.domain && !effectiveQuery.toLowerCase().includes("site:")) { effectiveQuery = `site:${params.domain} ${effectiveQuery}`; @@ -473,7 +617,7 @@ export function registerSearchTool(pi: ExtensionAPI) { const provider = resolveSearchProvider(); if (!provider) { return { - content: [{ type: "text", text: "Web search unavailable: No search API key is set. Use secure_env_collect to set TAVILY_API_KEY, BRAVE_API_KEY, or OLLAMA_API_KEY." }], + content: [{ type: "text", text: "Web search unavailable: No search API key is set. Use secure_env_collect to set TAVILY_API_KEY, BRAVE_API_KEY, SERPER_API_KEY, EXA_API_KEY, or OLLAMA_API_KEY." }], isError: true, details: { errorKind: "auth_error", error: "No search API key set" } satisfies Partial, }; @@ -510,7 +654,7 @@ export function registerSearchTool(pi: ExtensionAPI) { // Handle domain filter (provider-specific) // ------------------------------------------------------------------ let effectiveQuery = params.query; - if (provider === "brave" && params.domain) { + if ((provider === "brave" || provider === "serper") && params.domain) { if (!effectiveQuery.toLowerCase().includes("site:")) { effectiveQuery = `site:${params.domain} ${effectiveQuery}`; } @@ -630,6 +774,22 @@ export function registerSearchTool(pi: ExtensionAPI) { searchResult = ollamaResult.results; latencyMs = ollamaResult.latencyMs; rateLimit = ollamaResult.rateLimit; + } else if (provider === "serper") { + const serperResult = await executeSerperSearch( + { query: params.query, domain: params.domain, count: 10 }, + signal, + ); + searchResult = serperResult.results; + latencyMs = serperResult.latencyMs; + rateLimit = serperResult.rateLimit; + } else if (provider === "exa") { + const exaResult = await executeExaSearch( + { query: params.query, freshness, domain: params.domain, wantSummary, count: 10 }, + signal, + ); + searchResult = exaResult.results; + latencyMs = exaResult.latencyMs; + rateLimit = exaResult.rateLimit; } else { const braveResult = await executeBraveSearch( { query: params.query, effectiveQuery, freshness, wantSummary }, diff --git a/src/resources/extensions/sf-notify/index.ts b/src/resources/extensions/sf-notify/index.ts index 2c1e30cc0..201c1546c 100644 --- a/src/resources/extensions/sf-notify/index.ts +++ b/src/resources/extensions/sf-notify/index.ts @@ -69,7 +69,7 @@ async function readSettingsFile(): Promise { try { const content = await fs.readFile(p, "utf8"); return JSON.parse(content); - } catch {} + } catch {} // settings not found or corrupt -> try next path } return {}; } diff --git a/src/resources/extensions/sf-permissions/permission-core.ts b/src/resources/extensions/sf-permissions/permission-core.ts index c6c844551..873075f4e 100644 --- a/src/resources/extensions/sf-permissions/permission-core.ts +++ b/src/resources/extensions/sf-permissions/permission-core.ts @@ -318,14 +318,14 @@ function loadSettings(): Record { if (fs.existsSync(sfPath)) { return JSON.parse(fs.readFileSync(sfPath, "utf-8")); } - } catch {} + } catch {} // settings file not found or invalid JSON → fall through to PI path const piPath = getPiSettingsPath(); try { if (fs.existsSync(piPath)) { return JSON.parse(fs.readFileSync(piPath, "utf-8")); } - } catch {} + } catch {} // PI settings absent or corrupt → return empty defaults return {}; } diff --git a/src/resources/extensions/sf-tui/color-band.ts b/src/resources/extensions/sf-tui/color-band.ts index b6cd9f752..b4467f657 100644 --- a/src/resources/extensions/sf-tui/color-band.ts +++ b/src/resources/extensions/sf-tui/color-band.ts @@ -176,7 +176,7 @@ function readColorState(): ColorState | null { if (fs.existsSync(STATE_FILE)) { return JSON.parse(fs.readFileSync(STATE_FILE, "utf8")) as ColorState; } - } catch {} + } catch {} // file missing or corrupt → return null (no saved state) return null; } @@ -185,7 +185,7 @@ function writeColorState(s: ColorState): void { const dir = path.dirname(STATE_FILE); if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); fs.writeFileSync(STATE_FILE, JSON.stringify(s, null, 2), "utf8"); - } catch {} + } catch {} // write failure → state not persisted, but operation continues } // ───────────────────────────────────────────────────────────────────────────── diff --git a/src/resources/extensions/sf-usage-bar/index.ts b/src/resources/extensions/sf-usage-bar/index.ts index 64a0c979b..1632d7020 100644 --- a/src/resources/extensions/sf-usage-bar/index.ts +++ b/src/resources/extensions/sf-usage-bar/index.ts @@ -25,16 +25,14 @@ function loadAuthJson(): Record | undefined { if (fs.existsSync(sfAuthPath)) { return JSON.parse(fs.readFileSync(sfAuthPath, "utf-8")); } - } catch {} + } catch {} // file missing or invalid → try PI path const piAuthPath = path.join(os.homedir(), ".pi", "agent", "auth.json"); try { if (fs.existsSync(piAuthPath)) { return JSON.parse(fs.readFileSync(piAuthPath, "utf-8")); } - } catch {} - - return undefined; + } catch {} // file missing or invalid → return undefined } // ============================================================================ @@ -346,7 +344,7 @@ async function fetchGeminiUsage(_modelRegistry: any): Promise { const geminiData = JSON.parse(fs.readFileSync(credPath, "utf-8")); token = geminiData.access_token; } - } catch {} + } catch {} // missing or invalid JSON → continue } if (!token) { @@ -421,7 +419,7 @@ async function fetchCodexUsage(modelRegistry: any): Promise { if (cred?.type === "oauth") { accountId = (cred as any).accountId; } - } catch {} + } catch {} // missing or invalid JSON → continue to codex fallback // Fallback to ~/.codex/auth.json if not in sf's auth if (!accessToken) { @@ -439,7 +437,7 @@ async function fetchCodexUsage(modelRegistry: any): Promise { accountId = data.tokens.account_id; } } - } catch {} + } catch {} // codex auth missing or invalid → continue } if (!accessToken) { @@ -633,7 +631,7 @@ async function fetchZaiUsage(): Promise { if (data) { apiKey = data["z-ai"]?.access || data["zai"]?.access; } - } catch {} + } catch {} // missing or invalid → continue to error } if (!apiKey) { diff --git a/src/resources/extensions/sf/auto-dispatch.ts b/src/resources/extensions/sf/auto-dispatch.ts index d15c4dfa5..c97eb1dd3 100644 --- a/src/resources/extensions/sf/auto-dispatch.ts +++ b/src/resources/extensions/sf/auto-dispatch.ts @@ -215,6 +215,33 @@ export const DISPATCH_RULES: DispatchRule[] = [ }; }, }, + { + name: "initial-roadmap-meeting (first dispatch)", + match: async ({ state, mid, midTitle, basePath }) => { + // Only on first dispatch: when phase is pre-planning AND no roadmap exists yet + // This ensures roadmap meeting happens BEFORE discuss/research/plan + if (state.phase !== "pre-planning") return null; + // resolveMilestoneFile returns path string if file exists, null if not + const roadmapFile = resolveMilestoneFile(basePath, mid, "ROADMAP"); + if (roadmapFile && existsSync(roadmapFile)) return null; // roadmap already exists + return { + action: "dispatch", + unitType: "roadmap-meeting", + unitId: mid, + prompt: "You are facilitating the **initial roadmap meeting** for milestone " + mid + ".\n\n" + + "Before any detailed planning, work with the user to agree on:\n" + + "1. **What done looks like** — the milestone definition of success\n" + + "2. **Rough scope** — what slices (vertical increments) make up this milestone\n" + + "3. **Key risks** — what could go wrong or cause re-planning\n" + + "4. **First slice** — which slice should go first (lowest risk)\n\n" + + "Then write a **ROADMAP.md** in `.sf/milestones/" + mid + "/` with the agreed slices.\n" + + "Do NOT write detailed plans — that's for later after the roadmap is aligned.\n\n" + + "## Session Context\n" + + "- Working directory: `" + basePath + "`\n" + + "- Project goals/description: See PROJECT.md if it exists", + }; + }, + }, { name: "summarizing → complete-slice", match: async ({ state, mid, midTitle, basePath }) => { @@ -951,5 +978,11 @@ export async function resolveDispatch( /** Exposed for testing — returns the rule names in evaluation order. */ export function getDispatchRuleNames(): string[] { + if (hasRegistry()) { + return getRegistry() + .listRules() + .filter((rule) => rule.when === "dispatch") + .map((rule) => rule.name); + } return DISPATCH_RULES.map((r) => r.name); } diff --git a/src/resources/extensions/sf/auto-recovery.ts b/src/resources/extensions/sf/auto-recovery.ts index 4f83659b3..af92bb07c 100644 --- a/src/resources/extensions/sf/auto-recovery.ts +++ b/src/resources/extensions/sf/auto-recovery.ts @@ -17,6 +17,8 @@ import { isDbAvailable, getTask, getSlice, getSliceTasks, getPendingGates, updat import { isValidationTerminal } from "./state.js"; import { getErrorMessage } from "./error-utils.js"; import { logWarning, logError } from "./workflow-logger.js"; +import { getSlicePlanBlockingIssue } from "./plan-quality.js"; +import { getMilestonePlanBlockingIssue } from "./milestone-quality.js"; import { nativeConflictFiles, nativeCommit, @@ -313,7 +315,9 @@ export function verifyExpectedArtifact( if (unitType === "plan-milestone") { try { - const roadmap = parseLegacyRoadmap(readFileSync(absPath, "utf-8")); + const roadmapContent = readFileSync(absPath, "utf-8"); + if (getMilestonePlanBlockingIssue(roadmapContent)) return false; + const roadmap = parseLegacyRoadmap(roadmapContent); if (roadmap.slices.length === 0) return false; } catch (err) { logWarning("recovery", `plan-milestone roadmap verification failed: ${err instanceof Error ? err.message : String(err)}`); @@ -332,6 +336,7 @@ export function verifyExpectedArtifact( const hasCheckboxTask = /^- \[[xX ]\] \*\*T\d+:/m.test(planContent); const hasHeadingTask = /^#{2,4}\s+T\d+\s*(?:--|—|:)/m.test(planContent); if (!hasCheckboxTask && !hasHeadingTask) return false; + if (getSlicePlanBlockingIssue(planContent)) return false; } // execute-task: DB status is authoritative. Fall back to checked-checkbox diff --git a/src/resources/extensions/sf/auto-start.ts b/src/resources/extensions/sf/auto-start.ts index 738988b5b..dd2d32834 100644 --- a/src/resources/extensions/sf/auto-start.ts +++ b/src/resources/extensions/sf/auto-start.ts @@ -13,7 +13,7 @@ import type { ExtensionAPI, ExtensionCommandContext, } from "@singularity-forge/pi-coding-agent"; -import { deriveState } from "./state.js"; +import { deriveState, isGhostMilestone } from "./state.js"; import { loadFile, getManifestStatus } from "./files.js"; import type { InterruptedSessionAssessment } from "./interrupted-session.js"; import { @@ -558,7 +558,28 @@ export async function bootstrapAutoSession( }); invalidateAllCaches(); - const postState = await deriveState(base); + let postState = await deriveState(base); + if (!postState.activeMilestone) { + ctx.ui.notify( + `Headless bootstrap for ${nextId} returned without artifacts. Starting roadmap planning repair session.`, + "warning", + ); + await dispatchNewMilestoneDiscuss(ctx, pi, base, nextId, { + auto: true, + preamble: injectTodoContext( + base, + [ + `This is an autonomous roadmap bootstrap repair for ${nextId}.`, + "The previous bootstrap turn ended without writing CONTEXT, CONTEXT-DRAFT, or ROADMAP artifacts.", + "Start the roadmap planning session now: build project knowledge, run the planning meeting, and persist artifacts.", + "Do not stop after reflection. At minimum write CONTEXT-DRAFT with evidence and open questions.", + "If confidence is high enough, write CONTEXT and call sf_plan_milestone so auto-mode can continue.", + ].join("\n"), + ), + }); + invalidateAllCaches(); + postState = await deriveState(base); + } if ( postState.activeMilestone && postState.phase !== "complete" && @@ -579,13 +600,74 @@ export async function bootstrapAutoSession( if (hasContext) { state = postState; } else { + const repairId = postState.activeMilestone.id; ctx.ui.notify( - "Headless bootstrap completed but no milestone context was written. Retrying.", + `Headless bootstrap created ${repairId} without context. Starting roadmap planning repair session.`, "warning", ); - return releaseLockAndReturn(); + await dispatchNewMilestoneDiscuss(ctx, pi, base, repairId, { + auto: true, + preamble: injectTodoContext( + base, + [ + `This is an autonomous roadmap bootstrap repair for existing milestone ${repairId}.`, + "The previous bootstrap created a milestone shell but did not write CONTEXT.md, CONTEXT-DRAFT.md, or ROADMAP.md.", + "Reuse this milestone ID. Do not create a new milestone for the same bootstrap work.", + "Run the roadmap planning session now and persist CONTEXT or CONTEXT-DRAFT at minimum.", + "If confidence is high enough, write CONTEXT and call sf_plan_milestone so auto-mode can continue.", + ].join("\n"), + ), + }); + invalidateAllCaches(); + postState = await deriveState(base); + if ( + postState.activeMilestone && + postState.phase !== "complete" && + postState.phase !== "pre-planning" + ) { + s.consecutiveCompleteBootstraps = 0; + state = postState; + } else if ( + postState.activeMilestone && + postState.phase === "pre-planning" + ) { + const repairedContextFile = resolveMilestoneFile( + base, + postState.activeMilestone.id, + "CONTEXT", + ); + const repairedHasContext = !!( + repairedContextFile && (await loadFile(repairedContextFile)) + ); + if (repairedHasContext) { + state = postState; + } else { + ctx.ui.notify( + "Headless bootstrap repair completed but milestone context is still missing.", + "warning", + ); + return releaseLockAndReturn(); + } + } else { + ctx.ui.notify( + "Headless bootstrap repair completed but no milestone artifacts were written. Auto cannot continue without a context or draft.", + "warning", + ); + return releaseLockAndReturn(); + } } } else { + if (isGhostMilestone(base, nextId)) { + rmSync(join(sfRoot(base), "milestones", nextId), { + recursive: true, + force: true, + }); + invalidateAllCaches(); + } + ctx.ui.notify( + "Headless bootstrap repair completed but no milestone artifacts were written. Auto cannot continue without a context or draft.", + "warning", + ); return releaseLockAndReturn(); } } @@ -597,17 +679,42 @@ export async function bootstrapAutoSession( const hasContext = !!(contextFile && (await loadFile(contextFile))); if (!hasContext) { ctx.ui.notify(`Milestone ${mid} has no context. Bootstrapping from codebase analysis.`, "info"); - const { bootstrapNewMilestone, dispatchNewMilestoneDiscuss, injectTodoContext } = await import("./guided-flow.js"); - const nextId = bootstrapNewMilestone(base); - await dispatchNewMilestoneDiscuss(ctx, pi, base, nextId, { + const { dispatchNewMilestoneDiscuss, injectTodoContext } = await import("./guided-flow.js"); + await dispatchNewMilestoneDiscuss(ctx, pi, base, mid, { auto: true, - preamble: injectTodoContext(base, "This is an autonomous session."), + preamble: injectTodoContext( + base, + [ + `This is an autonomous roadmap bootstrap repair for existing milestone ${mid}.`, + "The milestone exists but has no CONTEXT.md yet.", + "Reuse this milestone ID. Do not create a new milestone for the same bootstrap work.", + "Build project knowledge, run the planning meeting, and persist CONTEXT or CONTEXT-DRAFT.", + ].join("\n"), + ), }); invalidateAllCaches(); const postState = await deriveState(base); if (postState.activeMilestone && postState.phase !== "pre-planning") { state = postState; + } else if (postState.activeMilestone && postState.phase === "pre-planning") { + const repairedContextFile = resolveMilestoneFile( + base, + postState.activeMilestone.id, + "CONTEXT", + ); + const repairedHasContext = !!( + repairedContextFile && (await loadFile(repairedContextFile)) + ); + if (repairedHasContext) { + state = postState; + } else { + ctx.ui.notify( + "Discussion completed but milestone context is still missing. Run /sf to try again.", + "warning", + ); + return releaseLockAndReturn(); + } } else { ctx.ui.notify( "Discussion completed but milestone context is still missing. Run /sf to try again.", diff --git a/src/resources/extensions/sf/bootstrap/db-tools.ts b/src/resources/extensions/sf/bootstrap/db-tools.ts index b8ba1f38a..0caa249a7 100644 --- a/src/resources/extensions/sf/bootstrap/db-tools.ts +++ b/src/resources/extensions/sf/bootstrap/db-tools.ts @@ -433,7 +433,7 @@ export function registerDbTools(pi: ExtensionAPI): void { promptSnippet: "Plan a milestone via DB write + roadmap render + cache invalidation", promptGuidelines: [ "Use sf_plan_milestone for milestone planning instead of writing ROADMAP.md directly.", - "Keep parameters flat and provide the full milestone planning payload, including slices.", + "Keep parameters flat and provide the full milestone planning payload. Use either explicit slices or templateId-based scaffolding for common feat/fix/refactor patterns.", "The tool validates input, writes milestone and slice planning data transactionally, renders ROADMAP.md from DB, and clears both state and parse caches after success.", "Use the canonical name sf_plan_milestone; sf_milestone_plan is only an alias.", ], @@ -442,7 +442,7 @@ export function registerDbTools(pi: ExtensionAPI): void { milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }), title: Type.String({ description: "Milestone title" }), vision: Type.String({ description: "Milestone vision" }), - slices: Type.Array(Type.Object({ + slices: Type.Optional(Type.Array(Type.Object({ sliceId: Type.String({ description: "Slice ID (e.g. S01)" }), title: Type.String({ description: "Slice title" }), risk: Type.String({ description: "Slice risk" }), @@ -453,7 +453,8 @@ export function registerDbTools(pi: ExtensionAPI): void { proofLevel: Type.String({ description: "Slice proof level" }), integrationClosure: Type.String({ description: "Slice integration closure" }), observabilityImpact: Type.String({ description: "Slice observability impact" }), - }), { description: "Planned slices for the milestone" }), + }), { description: "Planned slices for the milestone. Optional when templateId is used for scaffolding." })), + templateId: Type.Optional(Type.String({ description: "Optional milestone template scaffold (e.g. bugfix, small-feature, refactor)" })), // ── Enrichment metadata (optional — defaults to empty) ──────────── status: Type.Optional(Type.String({ description: "Milestone status (defaults to active)" })), dependsOn: Type.Optional(Type.Array(Type.String(), { description: "Milestone dependencies" })), @@ -474,6 +475,26 @@ export function registerDbTools(pi: ExtensionAPI): void { definitionOfDone: Type.Optional(Type.Array(Type.String(), { description: "Definition of done bullets" })), requirementCoverage: Type.Optional(Type.String({ description: "Requirement coverage text" })), boundaryMapMarkdown: Type.Optional(Type.String({ description: "Boundary map markdown block" })), + visionMeeting: Type.Optional(Type.Object({ + trigger: Type.String({ description: "Why a top-level roadmap meeting was needed" }), + pm: Type.String({ description: "Product manager framing of the milestone and roadmap" }), + userAdvocate: Type.String({ description: "User advocate view of what must matter for the end user" }), + customerPanel: Type.String({ description: "Nuanced customer panel summary across multiple likely customer viewpoints" }), + business: Type.String({ description: "Business view on viability, wedge, retention, or monetizable direction" }), + researcher: Type.String({ description: "Comparable products, OSS tools, market expectations, and external research" }), + deliveryLead: Type.String({ description: "Sequencing and scope-cut view from a delivery perspective" }), + partner: Type.String({ description: "Strengthened best-case roadmap proposal" }), + combatant: Type.String({ description: "Strongest objection, overbuild warning, or alternative framing" }), + architect: Type.String({ description: "System-fit and architecture synthesis" }), + moderator: Type.String({ description: "Final moderator decision after weighing the participants" }), + weightedSynthesis: Type.String({ description: "Weighted synthesis of the strongest claims, additions, cuts, and sequencing changes" }), + confidenceByArea: Type.String({ description: "Confidence by area, not one fake overall score" }), + recommendedRoute: Type.Union([ + Type.Literal("discussing"), + Type.Literal("researching"), + Type.Literal("planning"), + ], { description: "Where the system should route next after weighing the meeting" }), + }, { description: "Structured vision and roadmap alignment meeting for top-level milestone planning" })), }), execute: planMilestoneExecute, }; @@ -504,6 +525,26 @@ export function registerDbTools(pi: ExtensionAPI): void { milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }), sliceId: Type.String({ description: "Slice ID (e.g. S01)" }), goal: Type.String({ description: "Slice goal" }), + adversarialReview: Type.Optional(Type.Object({ + partner: Type.String({ description: "Strongest case for the plan and confirmed mechanism" }), + combatant: Type.String({ description: "Attacks the premise first, then the proposal and alternatives" }), + architect: Type.String({ description: "System-fit review after partner and combatant passes" }), + }, { description: "Adversarial review summary for this slice plan" })), + planningMeeting: Type.Optional(Type.Object({ + trigger: Type.String({ description: "Why a planning meeting was needed" }), + pm: Type.String({ description: "PM/product framing and scope cut" }), + researcher: Type.String({ description: "Research and evidence summary, including docs/code findings" }), + partner: Type.String({ description: "Strengthened best-case plan" }), + combatant: Type.String({ description: "Strongest objection or alternative root cause/approach" }), + architect: Type.String({ description: "System-fit and sequencing resolution" }), + moderator: Type.String({ description: "Moderator synthesis and decision" }), + recommendedRoute: Type.Union([ + Type.Literal("discussing"), + Type.Literal("researching"), + Type.Literal("planning"), + ], { description: "Where the workflow should route after the meeting" }), + confidenceSummary: Type.String({ description: "Confidence rationale after the meeting" }), + }, { description: "Optional structured planning meeting artifact for ambiguous or higher-complexity slices" })), tasks: Type.Array(Type.Object({ taskId: Type.String({ description: "Task ID (e.g. T01)" }), title: Type.String({ description: "Task title" }), @@ -931,8 +972,9 @@ export function registerDbTools(pi: ExtensionAPI): void { promptGuidelines: [ "Use sf_replan_slice (canonical) or sf_slice_replan (alias) when a blocker is discovered and the slice plan needs rewriting.", "The tool structurally enforces that completed tasks cannot be updated or removed — violations return specific error payloads naming the blocked task ID.", - "Parameters: milestoneId, sliceId, blockerTaskId, blockerDescription, whatChanged, updatedTasks (array), removedTaskIds (array).", + "Parameters: milestoneId, sliceId, blockerTaskId, blockerDescription, whatChanged, optional slice-level planning/ceremony updates, updatedTasks (array), removedTaskIds (array).", "updatedTasks items: taskId, title, description, estimate, files, verify, inputs, expectedOutput.", + "When the blocker changes the slice-level rationale or execution readiness, update adversarialReview and planningMeeting as part of the same replan.", ], parameters: Type.Object({ milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }), @@ -940,6 +982,31 @@ export function registerDbTools(pi: ExtensionAPI): void { blockerTaskId: Type.String({ description: "Task ID that discovered the blocker" }), blockerDescription: Type.String({ description: "Description of the blocker" }), whatChanged: Type.String({ description: "Summary of what changed in the plan" }), + goal: Type.Optional(Type.String({ description: "Updated slice goal when the replan changes the slice contract" })), + successCriteria: Type.Optional(Type.String({ description: "Updated slice success criteria block" })), + proofLevel: Type.Optional(Type.String({ description: "Updated slice proof level" })), + integrationClosure: Type.Optional(Type.String({ description: "Updated slice integration closure" })), + observabilityImpact: Type.Optional(Type.String({ description: "Updated slice observability impact" })), + adversarialReview: Type.Optional(Type.Object({ + partner: Type.String({ description: "Updated strongest case for the replanned slice" }), + combatant: Type.String({ description: "Updated strongest objection or alternative cause/path" }), + architect: Type.String({ description: "Updated system-fit review after the replan" }), + }, { description: "Updated adversarial review for the replanned slice" })), + planningMeeting: Type.Optional(Type.Object({ + trigger: Type.String({ description: "Why a planning meeting was needed during replan" }), + pm: Type.String({ description: "PM/product framing and scope cut" }), + researcher: Type.String({ description: "Updated evidence summary for the replan" }), + partner: Type.String({ description: "Updated strengthened best-case plan" }), + combatant: Type.String({ description: "Updated strongest objection or alternative" }), + architect: Type.String({ description: "Updated system-fit and sequencing resolution" }), + moderator: Type.String({ description: "Moderator synthesis and route after replan" }), + recommendedRoute: Type.Union([ + Type.Literal("discussing"), + Type.Literal("researching"), + Type.Literal("planning"), + ], { description: "Where the workflow should route after the replanning meeting" }), + confidenceSummary: Type.String({ description: "Confidence rationale after the replanning meeting" }), + }, { description: "Updated planning meeting artifact for the replanned slice" })), updatedTasks: Type.Array( Type.Object({ taskId: Type.String({ description: "Task ID (e.g. T01)" }), diff --git a/src/resources/extensions/sf/bootstrap/system-context.ts b/src/resources/extensions/sf/bootstrap/system-context.ts index 0284fe996..891eb9cc7 100644 --- a/src/resources/extensions/sf/bootstrap/system-context.ts +++ b/src/resources/extensions/sf/bootstrap/system-context.ts @@ -13,6 +13,7 @@ import { resolveModelWithFallbacksForUnit } from "../preferences-models.js"; import { resolveSkillReference } from "../preferences-skills.js"; import { resolveSfRootFile, resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveTaskFiles, resolveTasksDir, relSliceFile, relSlicePath, relTaskFile } from "../paths.js"; import { ensureCodebaseMapFresh, readCodebaseMap } from "../codebase-generator.js"; +import { buildCodeIntelligenceContextBlock } from "../code-intelligence.js"; import { hasSkillSnapshot, detectNewSkills, formatSkillsXml } from "../skill-discovery.js"; import { getActiveAutoWorktreeContext } from "../auto-worktree.js"; import { getActiveWorktreeName, getWorktreeOriginalCwd } from "../worktree-command.js"; @@ -134,6 +135,7 @@ export async function buildBeforeAgentStartResult( } let codebaseBlock = ""; + let codeIntelligenceBlock = ""; try { const codebaseOptions = loadedPreferences?.preferences?.codebase ? { @@ -147,6 +149,15 @@ export async function buildBeforeAgentStartResult( logWarning("bootstrap", `CODEBASE refresh failed: ${(e as Error).message}`); } + try { + codeIntelligenceBlock = buildCodeIntelligenceContextBlock( + process.cwd(), + loadedPreferences?.preferences?.codebase, + ); + } catch (e) { + logWarning("bootstrap", `code intelligence block failed: ${(e as Error).message}`); + } + const codebasePath = resolveSfRootFile(process.cwd(), "CODEBASE"); const rawCodebase = readCodebaseMap(process.cwd()); if (existsSync(codebasePath) && rawCodebase) { @@ -182,7 +193,7 @@ export async function buildBeforeAgentStartResult( ? `\n\n## Subagent Model\n\nWhen spawning subagents via the \`subagent\` tool, always pass \`model: "${subagentModelConfig.primary}"\` in the tool call parameters. Never omit this — always specify it explicitly.` : ""; - const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — SF]\n\n${systemContent}${preferenceBlock}${knowledgeBlock}${codebaseBlock}${memoryBlock}${newSkillsBlock}${worktreeBlock}${subagentModelBlock}`; + const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — SF]\n\n${systemContent}${preferenceBlock}${knowledgeBlock}${codebaseBlock}${codeIntelligenceBlock}${memoryBlock}${newSkillsBlock}${worktreeBlock}${subagentModelBlock}`; stopContextTimer({ systemPromptSize: fullSystem.length, diff --git a/src/resources/extensions/sf/codebase-generator.ts b/src/resources/extensions/sf/codebase-generator.ts index 820cc9b2d..4e8ce0176 100644 --- a/src/resources/extensions/sf/codebase-generator.ts +++ b/src/resources/extensions/sf/codebase-generator.ts @@ -56,6 +56,14 @@ interface DirectoryGroup { collapsed: boolean; } +interface ProjectKnowledge { + stackSignals: string[]; + criticalPathHints: string[]; + verificationCommands: string[]; + skillNeeds: string[]; + knowledgeGaps: string[]; +} + interface ResolvedCodebaseMapOptions { excludes: string[]; maxFiles: number; @@ -294,6 +302,147 @@ function groupByDirectory( return groups; } +function hasFile(files: string[], fileName: string): boolean { + return files.includes(fileName) || files.some((file) => file.endsWith(`/${fileName}`)); +} + +function hasDir(files: string[], dirName: string): boolean { + const prefix = dirName.endsWith("/") ? dirName : `${dirName}/`; + return files.some((file) => file.startsWith(prefix) || file.includes(`/${prefix}`)); +} + +function hasExt(files: string[], extensions: string[]): boolean { + const wanted = new Set(extensions); + return files.some((file) => wanted.has(extname(file).toLowerCase())); +} + +function hasTestFile(files: string[]): boolean { + return files.some((file) => + /(^|\/)(test|tests|spec|__tests__)(\/|$)/i.test(file) + || /\.(test|spec)\.[cm]?[jt]sx?$/i.test(file) + || /_test\.go$/i.test(file) + || /test_.*\.py$/i.test(file) + || /_spec\.rb$/i.test(file) + ); +} + +function pushUnique(target: string[], value: string): void { + if (!target.includes(value)) target.push(value); +} + +function inferProjectKnowledge(files: string[]): ProjectKnowledge { + const stackSignals: string[] = []; + const criticalPathHints: string[] = []; + const verificationCommands: string[] = []; + const skillNeeds: string[] = []; + const knowledgeGaps: string[] = []; + + if (hasFile(files, "package.json")) { + pushUnique(stackSignals, "Node.js package manifest present"); + pushUnique(verificationCommands, "npm test or the package.json test script"); + if (hasFile(files, "tsconfig.json") || hasExt(files, [".ts", ".tsx"])) { + pushUnique(stackSignals, "TypeScript source detected"); + pushUnique(skillNeeds, "TypeScript/Node project maintenance"); + } else { + pushUnique(skillNeeds, "JavaScript/Node project maintenance"); + } + } + if (hasFile(files, "go.mod")) { + pushUnique(stackSignals, "Go module present"); + pushUnique(verificationCommands, "go test ./..."); + pushUnique(skillNeeds, "Go service development and testing"); + } + if (hasFile(files, "Cargo.toml")) { + pushUnique(stackSignals, "Rust crate/workspace manifest present"); + pushUnique(verificationCommands, "cargo test"); + pushUnique(skillNeeds, "Rust implementation and ownership review"); + } + if (hasFile(files, "pyproject.toml") || hasFile(files, "requirements.txt")) { + pushUnique(stackSignals, "Python project manifest present"); + pushUnique(verificationCommands, "pytest or the project quality command"); + pushUnique(skillNeeds, "Python packaging, typing, and tests"); + } + if (hasFile(files, "Dockerfile") || hasFile(files, "docker-compose.yml") || hasFile(files, "compose.yaml")) { + pushUnique(stackSignals, "Container/runtime configuration present"); + pushUnique(skillNeeds, "Containerized runtime and deployment review"); + } + if (hasFile(files, "flake.nix") || hasDir(files, "nix") || hasDir(files, "nixos")) { + pushUnique(stackSignals, "Nix/NixOS configuration present"); + pushUnique(skillNeeds, "Nix build and deployment review"); + } + if (hasDir(files, "migrations") || hasDir(files, "db") || hasDir(files, "database") || hasExt(files, [".sql"])) { + pushUnique(stackSignals, "Database schema or migration files present"); + pushUnique(skillNeeds, "Database migration and persistence review"); + pushUnique(criticalPathHints, "Database migrations and persistence code need schema/runtime alignment checks"); + } + + for (const dir of ["src", "app", "cmd", "internal", "pkg", "server", "services", "packages"]) { + if (hasDir(files, dir)) { + pushUnique(criticalPathHints, `${dir}/ is a likely implementation boundary to map before planning`); + } + } + if (hasDir(files, "api") || hasDir(files, "routes") || hasDir(files, "handlers")) { + pushUnique(criticalPathHints, "API/handler directories exist; trace request paths end-to-end before changing behavior"); + } + if (hasDir(files, "scripts")) { + pushUnique(criticalPathHints, "scripts/ may contain repo-owned build, test, or deploy entrypoints"); + } + if (hasDir(files, "docs")) { + pushUnique(criticalPathHints, "docs/ may contain product or architecture decisions that constrain roadmap scope"); + } + + if (hasTestFile(files)) { + pushUnique(criticalPathHints, "Tracked tests exist; map coverage against the primary user/runtime flows"); + } else { + pushUnique(knowledgeGaps, "No tracked test files detected by filename convention; verify actual quality gates before planning"); + } + if (!hasDir(files, ".github/workflows") && !hasFile(files, ".gitlab-ci.yml") && !hasFile(files, "Jenkinsfile")) { + pushUnique(knowledgeGaps, "No common CI workflow file detected; identify the authoritative quality command"); + } + if (!hasFile(files, "README.md") && !hasFile(files, "README.rst")) { + pushUnique(knowledgeGaps, "No README detected; infer product intent from code, docs, or user-provided specification"); + } + if (stackSignals.length === 0) { + pushUnique(knowledgeGaps, "No common runtime manifest detected; inspect entrypoints manually before planning"); + } + + pushUnique(knowledgeGaps, "Fill descriptions for active milestone files after reading them, not from filenames alone"); + pushUnique(knowledgeGaps, "Record verified runtime boundaries, external services, data stores, and missing skills before final CONTEXT.md"); + + return { + stackSignals: stackSignals.length ? stackSignals : ["No stack signals inferred from common manifests"], + criticalPathHints: criticalPathHints.length ? criticalPathHints : ["Map entrypoints manually; no common source directories detected"], + verificationCommands: verificationCommands.length ? verificationCommands : ["Identify and run the repo-owned quality/test command"], + skillNeeds: skillNeeds.length ? skillNeeds : ["General codebase exploration skill; add domain-specific skills after stack discovery"], + knowledgeGaps, + }; +} + +function renderProjectKnowledge(lines: string[], knowledge: ProjectKnowledge): void { + lines.push("## Project Knowledge"); + lines.push(""); + lines.push( + "Generated orientation scaffold. SF should enrich these sections with verified findings before promoting milestone context.", + ); + lines.push(""); + + const sections: Array<[string, string[]]> = [ + ["Stack Signals", knowledge.stackSignals], + ["Critical Paths To Investigate", knowledge.criticalPathHints], + ["Verification Commands To Prove", knowledge.verificationCommands], + ["Skill Needs", knowledge.skillNeeds], + ["Knowledge Gaps To Close", knowledge.knowledgeGaps], + ]; + + for (const [heading, items] of sections) { + lines.push(`### ${heading}`); + for (const item of items) { + lines.push(`- ${item}`); + } + lines.push(""); + } +} + // ─── Rendering ─────────────────────────────────────────────────────────────── function renderCodebaseMap( @@ -301,6 +450,7 @@ function renderCodebaseMap( totalFiles: number, truncated: boolean, metadata: CodebaseMapMetadata, + files: string[], ): string { const lines: string[] = []; const described = groups.reduce((sum, g) => sum + g.files.filter((f) => f.description).length, 0); @@ -314,6 +464,11 @@ function renderCodebaseMap( } lines.push(""); + renderProjectKnowledge(lines, inferProjectKnowledge(files)); + + lines.push("## File Map"); + lines.push(""); + for (const group of groups) { const heading = group.path || "(root)"; lines.push(`### ${heading}/`); @@ -379,7 +534,7 @@ function buildCodebaseMap( fileCount: listed.files.length, truncated: listed.truncated, }; - const content = renderCodebaseMap(groups, listed.files.length, listed.truncated, metadata); + const content = renderCodebaseMap(groups, listed.files.length, listed.truncated, metadata, listed.files); return { content, diff --git a/src/resources/extensions/sf/commands-bootstrap.ts b/src/resources/extensions/sf/commands-bootstrap.ts index ffbd531b8..0f5e92a78 100644 --- a/src/resources/extensions/sf/commands-bootstrap.ts +++ b/src/resources/extensions/sf/commands-bootstrap.ts @@ -224,6 +224,7 @@ function getGsdArgumentCompletions(prefix: string) { { cmd: "generate", desc: "Generate or regenerate CODEBASE.md" }, { cmd: "update", desc: "Refresh the CODEBASE.md cache immediately" }, { cmd: "stats", desc: "Show codebase-map coverage and generation time" }, + { cmd: "rag", desc: "Inspect optional project-rag code search backend" }, { cmd: "help", desc: "Show usage and subcommands" }, ], "codebase"); } diff --git a/src/resources/extensions/sf/commands-codebase.ts b/src/resources/extensions/sf/commands-codebase.ts index 62469824a..9b66f8d74 100644 --- a/src/resources/extensions/sf/commands-codebase.ts +++ b/src/resources/extensions/sf/commands-codebase.ts @@ -2,7 +2,7 @@ * SF Command — /sf codebase * * Generate and manage the codebase map (.sf/CODEBASE.md). - * Subcommands: generate, update, stats, help + * Subcommands: generate, update, stats, rag, help */ import type { ExtensionAPI, ExtensionCommandContext } from "@singularity-forge/pi-coding-agent"; @@ -14,14 +14,19 @@ import { getCodebaseMapStats, readCodebaseMap, } from "./codebase-generator.js"; +import { + ensureProjectRagMcpConfig, + formatProjectRagStatus, +} from "./code-intelligence.js"; import { loadEffectiveSFPreferences } from "./preferences.js"; import type { CodebaseMapOptions } from "./codebase-generator.js"; const USAGE = - "Usage: /sf codebase [generate|update|stats]\n\n" + + "Usage: /sf codebase [generate|update|stats|rag]\n\n" + " generate [--max-files N] [--collapse-threshold N] — Generate or regenerate CODEBASE.md\n" + " update [--max-files N] [--collapse-threshold N] — Refresh the CODEBASE.md cache immediately\n" + " stats — Show file count, coverage, and generation time\n" + + " rag [status|init] — Inspect or configure optional project-rag MCP backend\n" + " help — Show this help\n\n" + "With no subcommand, shows stats if a map exists or help if not.\n" + "SF also refreshes CODEBASE.md automatically before prompt injection and after completed units when tracked files change.\n\n" + @@ -29,7 +34,9 @@ const USAGE = " codebase:\n" + " exclude_patterns: [\"docs/\", \"fixtures/\"]\n" + " max_files: 1000\n" + - " collapse_threshold: 15"; + " collapse_threshold: 15\n" + + " project_rag: auto # auto | off | required\n" + + " project_rag_auto_index: true"; export async function handleCodebase( args: string, @@ -101,6 +108,43 @@ export async function handleCodebase( return; } + case "rag": { + const action = (parts[1] ?? "status").toLowerCase(); + const prefs = loadEffectiveSFPreferences()?.preferences?.codebase; + if (action === "status") { + ctx.ui.notify(formatProjectRagStatus(basePath, prefs), "info"); + return; + } + if (action === "init") { + try { + const result = ensureProjectRagMcpConfig(basePath); + ctx.ui.notify( + [ + result.status === "created" + ? "Created project-rag MCP config." + : result.status === "updated" + ? "Updated project-rag MCP config." + : "Project-rag MCP config is already up to date.", + "", + `Server: ${result.serverName}`, + `Config: ${result.configPath}`, + "", + "Restart the MCP client session so the new server and tools are loaded.", + ].join("\n"), + "success", + ); + } catch (err) { + ctx.ui.notify( + `Could not initialize project-rag MCP config: ${err instanceof Error ? err.message : String(err)}`, + "warning", + ); + } + return; + } + ctx.ui.notify(`Unknown /sf codebase rag action "${action}". Use status or init.`, "warning"); + return; + } + case "help": ctx.ui.notify(USAGE, "info"); return; diff --git a/src/resources/extensions/sf/commands/catalog.ts b/src/resources/extensions/sf/commands/catalog.ts index 7229bb33a..1e52e1669 100644 --- a/src/resources/extensions/sf/commands/catalog.ts +++ b/src/resources/extensions/sf/commands/catalog.ts @@ -248,6 +248,8 @@ const NESTED_COMPLETIONS: CompletionMap = { { cmd: "update --max-files", desc: "Update with custom file limit" }, { cmd: "update --collapse-threshold", desc: "Update with custom collapse threshold" }, { cmd: "stats", desc: "Show file count, description coverage, and generation time" }, + { cmd: "rag status", desc: "Show optional project-rag MCP backend status" }, + { cmd: "rag init", desc: "Write .mcp.json entry for project-rag when a binary is available" }, { cmd: "help", desc: "Show usage and available subcommands" }, ], ship: [ diff --git a/src/resources/extensions/sf/commands/handlers/core.ts b/src/resources/extensions/sf/commands/handlers/core.ts index 986940393..e321f8e30 100644 --- a/src/resources/extensions/sf/commands/handlers/core.ts +++ b/src/resources/extensions/sf/commands/handlers/core.ts @@ -80,7 +80,7 @@ export function showHelp(ctx: ExtensionCommandContext, args = ""): void { "", "PROJECT KNOWLEDGE", " /sf knowledge Add rule, pattern, or lesson to KNOWLEDGE.md", - " /sf codebase [generate|update|stats] Manage the CODEBASE.md cache used in prompt context", + " /sf codebase [generate|update|stats|rag] Manage CODEBASE.md and optional code search", "", "SETUP & CONFIGURATION", " /sf init Project init wizard — detect, configure, bootstrap .sf/", diff --git a/src/resources/extensions/sf/docs/preferences-reference.md b/src/resources/extensions/sf/docs/preferences-reference.md index df70b65b8..ae2d14d7c 100644 --- a/src/resources/extensions/sf/docs/preferences-reference.md +++ b/src/resources/extensions/sf/docs/preferences-reference.md @@ -163,6 +163,14 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea - `skip_reassess`: boolean — force-disable roadmap reassessment even if `reassess_after_slice` is enabled. Default: `false`. - `skip_slice_research`: boolean — skip per-slice research. Default: `false`. +- `codebase`: configures `.sf/CODEBASE.md` and the optional Project RAG code-intelligence backend. Keys: + - `exclude_patterns`: string[] — extra file or directory patterns to omit from CODEBASE.md. + - `max_files`: number — maximum files to include in CODEBASE.md. Default: `500`. + - `collapse_threshold`: number — files-per-directory threshold before collapsing a directory summary. Default: `20`. + - `project_rag`: `"auto"`, `"off"`, or `"required"` — use Brainwires/project-rag MCP search when configured. Default: `"auto"`. + - `project_rag_server`: string — explicit MCP server name when the server cannot be detected from command or args. + - `project_rag_auto_index`: boolean — whether agents should prefer indexing before querying a configured Project RAG backend. Default: `true`. + - `remote_questions`: route interactive questions to Slack/Discord for headless auto-mode. Keys: - `channel`: `"slack"` or `"discord"` — channel type. - `channel_id`: string or number — channel ID. @@ -216,7 +224,7 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea - `auto_report`: boolean — generate an HTML report snapshot after each milestone completion. Default: `true`. -- `search_provider`: `"brave"`, `"tavily"`, `"ollama"`, `"combosearch"`, `"native"`, or `"auto"` — selects the search backend for research phases. `"combosearch"` fans out across all configured custom search backends and merges the results. `"native"` forces Anthropic's built-in web search only; provider values force that backend and disable native search; `"auto"` uses the default heuristic. Default: `"auto"`. +- `search_provider`: `"brave"`, `"tavily"`, `"serper"`, `"exa"`, `"ollama"`, `"combosearch"`, `"native"`, or `"auto"` — selects the search backend for research phases. `"combosearch"` fans out across all configured custom search backends and merges the results. `"native"` forces Anthropic's built-in web search only; provider values force that backend and disable native search; `"auto"` uses the default heuristic. Default: `"auto"`. - `context_selection`: `"full"` or `"smart"` — controls how files are inlined into context. `"full"` inlines entire files; `"smart"` uses semantic chunking to include only the most relevant sections. Default is derived from `token_profile`. diff --git a/src/resources/extensions/sf/guided-flow.ts b/src/resources/extensions/sf/guided-flow.ts index 31d3c7079..0a1e3a467 100644 --- a/src/resources/extensions/sf/guided-flow.ts +++ b/src/resources/extensions/sf/guided-flow.ts @@ -598,7 +598,8 @@ export function bootstrapProject(basePath: string): void { /** * Headless milestone creation from a seed specification document. * Bootstraps the project if needed, generates the next milestone ID, - * and dispatches the headless discuss prompt (no Q&A rounds). + * and dispatches the headless discuss prompt. Headless mode may ask only + * the final depth-verification gate before promoting draft knowledge. */ export async function showHeadlessMilestoneCreation( ctx: ExtensionCommandContext, @@ -621,7 +622,7 @@ export async function showHeadlessMilestoneCreation( const milestoneDir = join(sfRoot(basePath), "milestones", nextId, "slices"); mkdirSync(milestoneDir, { recursive: true }); - // Build and dispatch the headless discuss prompt + // Build and dispatch the headless discuss prompt. const prompt = buildHeadlessDiscussPrompt(nextId, seedContext, basePath); // Set pending auto start (auto-mode triggers on "Milestone X ready." via checkAutoStartAfterDiscuss) @@ -666,7 +667,8 @@ export async function dispatchNewMilestoneDiscuss( "2. Run existing tests (go test, cargo test, npm test, etc.) to measure current quality", "3. Web search for industry best practices for this type of software — testing strategies, architecture patterns, operational requirements", "4. Research any libraries, frameworks, or external services involved — get current API docs and constraints", - "5. Identify gaps: missing tests, incomplete features, error handling, observability, security, documentation", + "5. Update .sf/CODEBASE.md with verified project knowledge: stack signals, critical paths, file descriptions, verification commands, and skill needs", + "6. Identify gaps: missing tests, incomplete features, error handling, observability, security, documentation", "", "Goal: define milestones that represent the highest-value work to make this software production-ready, well-tested, and complete.", "Use all available models and research tools. Treat your findings as the specification.", diff --git a/src/resources/extensions/sf/health-widget-core.ts b/src/resources/extensions/sf/health-widget-core.ts index 243855afe..bdfcbff7d 100644 --- a/src/resources/extensions/sf/health-widget-core.ts +++ b/src/resources/extensions/sf/health-widget-core.ts @@ -23,6 +23,8 @@ export interface HealthWidgetData { /** Subject line of the last commit, or null if unavailable. */ lastCommitMessage: string | null; lastRefreshed: number; + /** Whether a remote questions channel (Telegram/Discord/Slack) is configured. */ + remoteQuestionsConfigured: boolean; } export function detectHealthWidgetProjectState(basePath: string): HealthWidgetProjectState { @@ -107,5 +109,10 @@ export function buildHealthLines(data: HealthWidgetData): string[] { parts.push(`Last commit: ${relTime}${msg}`); } + // Suggest remote questions if not configured (helps auto-mode users) + if (data.projectState === "active" && !data.remoteQuestionsConfigured) { + parts.push("/sf remote telegram"); + } + return [` ${parts.join(" │ ")}`]; } diff --git a/src/resources/extensions/sf/health-widget.ts b/src/resources/extensions/sf/health-widget.ts index fa32b1043..a8d1db4f8 100644 --- a/src/resources/extensions/sf/health-widget.ts +++ b/src/resources/extensions/sf/health-widget.ts @@ -18,6 +18,7 @@ import { loadLedgerFromDisk, getProjectTotals } from "./metrics.js"; import { describeNextUnit, estimateTimeRemaining, updateSliceProgressCache } from "./auto-dashboard.js"; import { projectRoot } from "./commands/context.js"; import { deriveState, invalidateStateCache } from "./state.js"; +import { isRemoteConfigured } from "../remote-questions/manager.js"; import { buildHealthLines, detectHealthWidgetProjectState, @@ -83,6 +84,7 @@ function loadHealthWidgetData(basePath: string): HealthWidgetData { lastCommitEpoch, lastCommitMessage, lastRefreshed: Date.now(), + remoteQuestionsConfigured: isRemoteConfigured(), }; } diff --git a/src/resources/extensions/sf/key-manager.ts b/src/resources/extensions/sf/key-manager.ts index 2b1a62c61..78e38a34d 100644 --- a/src/resources/extensions/sf/key-manager.ts +++ b/src/resources/extensions/sf/key-manager.ts @@ -44,6 +44,7 @@ export const PROVIDER_REGISTRY: ProviderInfo[] = [ { id: "xai", label: "xAI (Grok)", category: "llm", envVar: "XAI_API_KEY", dashboardUrl: "console.x.ai" }, { id: "openrouter", label: "OpenRouter", category: "llm", envVar: "OPENROUTER_API_KEY", dashboardUrl: "openrouter.ai/keys" }, { id: "mistral", label: "Mistral", category: "llm", envVar: "MISTRAL_API_KEY", dashboardUrl: "console.mistral.ai" }, + { id: "xiaomi", label: "Xiaomi MiMo", category: "llm", envVar: "XIAOMI_API_KEY", dashboardUrl: "token-plan-ams.xiaomimimo.com" }, { id: "ollama-cloud", label: "Ollama Cloud", category: "llm", envVar: "OLLAMA_API_KEY" }, { id: "opencode-go", label: "OpenCode Go", category: "llm", envVar: "OPENCODE_API_KEY", dashboardUrl: "opencode.ai/zen" }, { id: "custom-openai", label: "Custom (OpenAI-compat)", category: "llm", envVar: "CUSTOM_OPENAI_API_KEY" }, @@ -59,6 +60,8 @@ export const PROVIDER_REGISTRY: ProviderInfo[] = [ // Search Providers { id: "tavily", label: "Tavily Search", category: "search", envVar: "TAVILY_API_KEY", dashboardUrl: "tavily.com/app/api-keys" }, { id: "brave", label: "Brave Search", category: "search", envVar: "BRAVE_API_KEY", dashboardUrl: "brave.com/search/api" }, + { id: "serper", label: "Serper", category: "search", envVar: "SERPER_API_KEY", dashboardUrl: "serper.dev" }, + { id: "exa", label: "Exa Search", category: "search", envVar: "EXA_API_KEY", dashboardUrl: "dashboard.exa.ai" }, // Remote Integrations { id: "discord_bot", label: "Discord Bot", category: "remote", envVar: "DISCORD_BOT_TOKEN" }, diff --git a/src/resources/extensions/sf/learning/data/model-benchmarks.json b/src/resources/extensions/sf/learning/data/model-benchmarks.json index c8f3bc474..4e5d58343 100644 --- a/src/resources/extensions/sf/learning/data/model-benchmarks.json +++ b/src/resources/extensions/sf/learning/data/model-benchmarks.json @@ -174,6 +174,25 @@ "context_window": 262144, "max_output_tokens": 32768 }, + "kimi-for-coding": { + "swe_bench": null, + "swe_bench_verified": null, + "live_code_bench": 85, + "human_eval": 99, + "hle": 44.9, + "aime_2026": null, + "gpqa": 74, + "mmlu_pro": null, + "bbh": null, + "browse_comp": 60.2, + "simple_qa": null, + "long_context_ruler": null, + "arena_elo": null, + "instruction_following": null, + "source": "Kimi For Coding live endpoint — canonical ID backed by K2.6 coding surface", + "context_window": 262144, + "max_output_tokens": 32768 + }, "kimi-k2.5": { "swe_bench": null, "swe_bench_verified": null, @@ -904,4 +923,4 @@ "context_window": 1048576, "max_output_tokens": 65536 } -} \ No newline at end of file +} diff --git a/src/resources/extensions/sf/learning/data/primary-provider-chain.json b/src/resources/extensions/sf/learning/data/primary-provider-chain.json index 83893ea4d..578d14081 100644 --- a/src/resources/extensions/sf/learning/data/primary-provider-chain.json +++ b/src/resources/extensions/sf/learning/data/primary-provider-chain.json @@ -1,5 +1,5 @@ [ - {"provider": "kimi-coding", "model": "k2p5", "priority": 0}, + {"provider": "kimi-coding", "model": "kimi-for-coding", "priority": 0}, {"provider": "ollama-cloud", "model": "kimi-k2.5:cloud", "priority": 1}, {"provider": "opencode-go", "model": "kimi-k2.5", "priority": 2} ] diff --git a/src/resources/extensions/sf/learning/fallback-chain-writer.test.mjs b/src/resources/extensions/sf/learning/fallback-chain-writer.test.mjs index a2e95a0cf..85b6086e0 100644 --- a/src/resources/extensions/sf/learning/fallback-chain-writer.test.mjs +++ b/src/resources/extensions/sf/learning/fallback-chain-writer.test.mjs @@ -257,7 +257,7 @@ test("writeFallbackChains warns via log when project-level .sf/agent/settings.js } }); -test("writeFallbackChains always emits the hardcoded main chain with three kimi-k2.5 provider routes", () => { +test("writeFallbackChains always emits the hardcoded main chain with canonical kimi-for-coding primary route", () => { const { dir, settingsPath } = makeTempSettingsDir(); try { // Deps deliberately minimal — no overrides, no enabledModels — so @@ -273,7 +273,7 @@ test("writeFallbackChains always emits the hardcoded main chain with three kimi- assert.equal(mainChain.length, 3, "main chain has exactly 3 entries"); assert.equal(mainChain[0].provider, "kimi-coding"); - assert.equal(mainChain[0].model, "k2p5"); + assert.equal(mainChain[0].model, "kimi-for-coding"); assert.equal(mainChain[0].priority, 0); assert.equal(mainChain[1].provider, "ollama-cloud"); diff --git a/src/resources/extensions/sf/markdown-renderer.ts b/src/resources/extensions/sf/markdown-renderer.ts index e96c1c967..34df49456 100644 --- a/src/resources/extensions/sf/markdown-renderer.ts +++ b/src/resources/extensions/sf/markdown-renderer.ts @@ -157,6 +157,67 @@ function renderRoadmapMarkdown(milestone: MilestoneRow, slices: SliceRow[]): str lines.push(`**Vision:** ${milestone.vision}`); lines.push(""); + if (milestone.vision_meeting) { + lines.push("## Vision Alignment Meeting"); + lines.push(""); + lines.push("### Trigger"); + lines.push(""); + lines.push(milestone.vision_meeting.trigger.trim()); + lines.push(""); + lines.push("### Product Manager"); + lines.push(""); + lines.push(milestone.vision_meeting.pm.trim()); + lines.push(""); + lines.push("### User Advocate"); + lines.push(""); + lines.push(milestone.vision_meeting.userAdvocate.trim()); + lines.push(""); + lines.push("### Customer Panel"); + lines.push(""); + lines.push(milestone.vision_meeting.customerPanel.trim()); + lines.push(""); + lines.push("### Business"); + lines.push(""); + lines.push(milestone.vision_meeting.business.trim()); + lines.push(""); + lines.push("### Researcher"); + lines.push(""); + lines.push(milestone.vision_meeting.researcher.trim()); + lines.push(""); + lines.push("### Delivery Lead"); + lines.push(""); + lines.push(milestone.vision_meeting.deliveryLead.trim()); + lines.push(""); + lines.push("### Partner"); + lines.push(""); + lines.push(milestone.vision_meeting.partner.trim()); + lines.push(""); + lines.push("### Combatant"); + lines.push(""); + lines.push(milestone.vision_meeting.combatant.trim()); + lines.push(""); + lines.push("### Architect"); + lines.push(""); + lines.push(milestone.vision_meeting.architect.trim()); + lines.push(""); + lines.push("### Moderator"); + lines.push(""); + lines.push(milestone.vision_meeting.moderator.trim()); + lines.push(""); + lines.push("### Weighted Synthesis"); + lines.push(""); + lines.push(milestone.vision_meeting.weightedSynthesis.trim()); + lines.push(""); + lines.push("### Confidence By Area"); + lines.push(""); + lines.push(milestone.vision_meeting.confidenceByArea.trim()); + lines.push(""); + lines.push("### Recommended Route"); + lines.push(""); + lines.push(milestone.vision_meeting.recommendedRoute); + lines.push(""); + } + if (milestone.success_criteria.length > 0) { lines.push("## Success Criteria"); lines.push(""); @@ -301,6 +362,62 @@ function renderSlicePlanMarkdown(slice: SliceRow, tasks: TaskRow[], gates: GateR lines.push(""); } + lines.push("## Adversarial Review"); + lines.push(""); + lines.push("### Partner Review"); + lines.push(""); + lines.push(slice.adversarial_partner?.trim() || "Missing partner review."); + lines.push(""); + lines.push("### Combatant Review"); + lines.push(""); + lines.push(slice.adversarial_combatant?.trim() || "Missing combatant review."); + lines.push(""); + lines.push("### Architect Review"); + lines.push(""); + lines.push(slice.adversarial_architect?.trim() || "Missing architect review."); + lines.push(""); + + if (slice.planning_meeting) { + lines.push("## Planning Meeting"); + lines.push(""); + lines.push("### Trigger"); + lines.push(""); + lines.push(slice.planning_meeting.trigger.trim()); + lines.push(""); + lines.push("### Product Manager"); + lines.push(""); + lines.push(slice.planning_meeting.pm.trim()); + lines.push(""); + lines.push("### Researcher"); + lines.push(""); + lines.push(slice.planning_meeting.researcher.trim()); + lines.push(""); + lines.push("### Partner"); + lines.push(""); + lines.push(slice.planning_meeting.partner.trim()); + lines.push(""); + lines.push("### Combatant"); + lines.push(""); + lines.push(slice.planning_meeting.combatant.trim()); + lines.push(""); + lines.push("### Architect"); + lines.push(""); + lines.push(slice.planning_meeting.architect.trim()); + lines.push(""); + lines.push("### Moderator"); + lines.push(""); + lines.push(slice.planning_meeting.moderator.trim()); + lines.push(""); + lines.push("### Recommended Route"); + lines.push(""); + lines.push(slice.planning_meeting.recommendedRoute); + lines.push(""); + lines.push("### Confidence"); + lines.push(""); + lines.push(slice.planning_meeting.confidenceSummary.trim()); + lines.push(""); + } + if (slice.proof_level.trim()) { lines.push("## Proof Level"); lines.push(""); diff --git a/src/resources/extensions/sf/metrics.ts b/src/resources/extensions/sf/metrics.ts index c622e7fdd..1b4bdec6e 100644 --- a/src/resources/extensions/sf/metrics.ts +++ b/src/resources/extensions/sf/metrics.ts @@ -35,11 +35,11 @@ export { formatTokenCount } from "../shared/format-utils.js"; */ function inferProviderFromBareModelId(modelId: string): string { const lower = modelId.toLowerCase(); - if (lower === "k2p5" || lower === "kimi-k2-thinking") return "kimi-coding"; + if (lower === "kimi-for-coding" || lower === "k2p5" || lower === "kimi-k2-thinking") return "kimi-coding"; if (lower.startsWith("minimax-m")) return "ollama-cloud"; if (lower.startsWith("minimax-") || modelId.startsWith("MiniMax-")) return "minimax"; if (lower.startsWith("glm-")) return "zai"; - if (lower.startsWith("mimo-")) return "xiaomi-token-plan-ams"; + if (lower.startsWith("mimo-")) return "xiaomi"; if (lower.startsWith("gemini-")) return "google-gemini-cli"; if (lower.startsWith("magistral-") || lower.startsWith("mistral-") || lower.startsWith("devstral-") || lower.startsWith("codestral-") || lower.startsWith("ministral-") || lower.startsWith("pixtral-")) return "mistral"; return "unknown"; diff --git a/src/resources/extensions/sf/preferences-types.ts b/src/resources/extensions/sf/preferences-types.ts index 74df5980e..ed2cdad0d 100644 --- a/src/resources/extensions/sf/preferences-types.ts +++ b/src/resources/extensions/sf/preferences-types.ts @@ -196,6 +196,8 @@ export interface AutoSupervisorConfig { idle_timeout_minutes?: number; hard_timeout_minutes?: number; phase_timeout_minutes?: number; + /** Enable supervised mode in headless auto - wait for stdin instead of exiting on questions. Default: true */ + supervised_mode?: boolean; } export interface RemoteQuestionsConfig { @@ -203,6 +205,23 @@ export interface RemoteQuestionsConfig { channel_id: string | number; timeout_minutes?: number; // clamped to 1-30 poll_interval_seconds?: number; // clamped to 2-30 + auto_resolve_on_timeout?: boolean; + auto_resolve_strategy?: "recommended-option"; +} + +export interface DispatchExperimentPreferences { + /** + * Explicit rule-name order override. Unknown names are ignored and omitted + * rules keep their original relative order after the listed rules. + */ + order?: string[]; + /** + * Named dispatch-order variants for lightweight A/B experimentation. + * Each variant is an order array with the same semantics as `order`. + */ + variants?: Record; + /** Active variant name. If set and found in `variants`, it wins over `order`. */ + active_variant?: string; } export interface CmuxPreferences { @@ -262,6 +281,7 @@ export interface ExperimentalPreferences { * Default: false (opt-in required). */ rtk?: boolean; + dispatch_rules?: DispatchExperimentPreferences; } /** Configuration for the codebase map generator (/sf codebase). */ @@ -272,6 +292,12 @@ export interface CodebaseMapPreferences { max_files?: number; /** Files-per-directory threshold before collapsing to a summary line. Default: 20. */ collapse_threshold?: number; + /** Optional Brainwires/project-rag MCP backend. Default: "auto" (use if configured, never block if missing). */ + project_rag?: "auto" | "off" | "required"; + /** MCP server name for project-rag when it cannot be detected from command/args. */ + project_rag_server?: string; + /** Whether prompts should prefer indexing before querying when project-rag is configured. Default: true. */ + project_rag_auto_index?: boolean; } export interface SFPreferences { @@ -314,8 +340,8 @@ export interface SFPreferences { verification_commands?: string[]; verification_auto_fix?: boolean; verification_max_retries?: number; - /** Search provider preference. "brave"/"tavily"/"ollama"/"combosearch" force that backend and disable native Anthropic search. "native" forces native only. "auto" = current default behavior. */ - search_provider?: "brave" | "tavily" | "ollama" | "combosearch" | "native" | "auto"; + /** Search provider preference. "brave"/"tavily"/"serper"/"exa"/"ollama"/"combosearch" force that backend and disable native Anthropic search. "native" forces native only. "auto" = current default behavior. */ + search_provider?: "brave" | "tavily" | "serper" | "exa" | "ollama" | "combosearch" | "native" | "auto"; /** Context selection mode for file inlining. "full" inlines entire files, "smart" uses semantic chunking. Default derived from token profile. */ context_selection?: ContextSelectionMode; /** Default widget display mode for auto-mode dashboard. "full" | "small" | "min" | "off". Default: "full". */ diff --git a/src/resources/extensions/sf/preferences-validation.ts b/src/resources/extensions/sf/preferences-validation.ts index 0da858b58..8b40a0558 100644 --- a/src/resources/extensions/sf/preferences-validation.ts +++ b/src/resources/extensions/sf/preferences-validation.ts @@ -17,6 +17,7 @@ import { KNOWN_UNIT_TYPES, SKILL_ACTIONS, + type DispatchExperimentPreferences, type WorkflowMode, type SFPreferences, type SFSkillRule, @@ -384,11 +385,11 @@ export function validatePreferences(preferences: SFPreferences): { // ─── Search Provider ───────────────────────────────────────────── if (preferences.search_provider !== undefined) { - const validSearchProviders = new Set(["brave", "tavily", "ollama", "combosearch", "native", "auto"]); + const validSearchProviders = new Set(["brave", "tavily", "serper", "exa", "ollama", "combosearch", "native", "auto"]); if (typeof preferences.search_provider === "string" && validSearchProviders.has(preferences.search_provider)) { validated.search_provider = preferences.search_provider as SFPreferences["search_provider"]; } else { - errors.push(`search_provider must be one of: brave, tavily, ollama, combosearch, native, auto`); + errors.push(`search_provider must be one of: brave, tavily, serper, exa, ollama, combosearch, native, auto`); } } @@ -495,7 +496,39 @@ export function validatePreferences(preferences: SFPreferences): { // ─── Auto Supervisor ──────────────────────────────────────────────── if (preferences.auto_supervisor !== undefined) { if (preferences.auto_supervisor && typeof preferences.auto_supervisor === "object") { - validated.auto_supervisor = preferences.auto_supervisor; + const as = preferences.auto_supervisor as Record; + const validatedAs: NonNullable = {}; + + if (as.model !== undefined) { + if (typeof as.model === "string") validatedAs.model = as.model; + else errors.push("auto_supervisor.model must be a string"); + } + if (as.supervised_mode !== undefined) { + if (typeof as.supervised_mode === "boolean") validatedAs.supervised_mode = as.supervised_mode; + else errors.push("auto_supervisor.supervised_mode must be a boolean (true/false)"); + } + if (as.soft_timeout_minutes !== undefined) { + const val = Number(as.soft_timeout_minutes); + if (!Number.isNaN(val) && val >= 0) validatedAs.soft_timeout_minutes = val; + else errors.push("auto_supervisor.soft_timeout_minutes must be a non-negative number"); + } + if (as.idle_timeout_minutes !== undefined) { + const val = Number(as.idle_timeout_minutes); + if (!Number.isNaN(val) && val >= 0) validatedAs.idle_timeout_minutes = val; + else errors.push("auto_supervisor.idle_timeout_minutes must be a non-negative number"); + } + if (as.hard_timeout_minutes !== undefined) { + const val = Number(as.hard_timeout_minutes); + if (!Number.isNaN(val) && val >= 0) validatedAs.hard_timeout_minutes = val; + else errors.push("auto_supervisor.hard_timeout_minutes must be a non-negative number"); + } + if (as.phase_timeout_minutes !== undefined) { + const val = Number(as.phase_timeout_minutes); + if (!Number.isNaN(val) && val >= 0) validatedAs.phase_timeout_minutes = val; + else errors.push("auto_supervisor.phase_timeout_minutes must be a non-negative number"); + } + + validated.auto_supervisor = validatedAs; } else { errors.push("auto_supervisor must be an object"); } @@ -539,7 +572,52 @@ export function validatePreferences(preferences: SFPreferences): { // ─── Remote Questions ─────────────────────────────────────────────── if (preferences.remote_questions !== undefined) { if (preferences.remote_questions && typeof preferences.remote_questions === "object") { - validated.remote_questions = preferences.remote_questions; + const rq = preferences.remote_questions as unknown as Record; + const validRq: NonNullable = { + channel: rq.channel as NonNullable["channel"], + channel_id: rq.channel_id as string | number, + }; + + if (rq.timeout_minutes !== undefined) { + const timeout = Number(rq.timeout_minutes); + if (Number.isFinite(timeout)) validRq.timeout_minutes = timeout; + else errors.push("remote_questions.timeout_minutes must be a number"); + } + if (rq.poll_interval_seconds !== undefined) { + const poll = Number(rq.poll_interval_seconds); + if (Number.isFinite(poll)) validRq.poll_interval_seconds = poll; + else errors.push("remote_questions.poll_interval_seconds must be a number"); + } + if (rq.auto_resolve_on_timeout !== undefined) { + if (typeof rq.auto_resolve_on_timeout === "boolean") { + validRq.auto_resolve_on_timeout = rq.auto_resolve_on_timeout; + } else { + errors.push("remote_questions.auto_resolve_on_timeout must be a boolean"); + } + } + if (rq.auto_resolve_strategy !== undefined) { + if (rq.auto_resolve_strategy === "recommended-option") { + validRq.auto_resolve_strategy = "recommended-option"; + } else { + errors.push('remote_questions.auto_resolve_strategy must be "recommended-option"'); + } + } + + const knownRemoteKeys = new Set([ + "channel", + "channel_id", + "timeout_minutes", + "poll_interval_seconds", + "auto_resolve_on_timeout", + "auto_resolve_strategy", + ]); + for (const key of Object.keys(rq)) { + if (!knownRemoteKeys.has(key)) { + warnings.push(`unknown remote_questions key "${key}" — ignored`); + } + } + + validated.remote_questions = validRq; } else { errors.push("remote_questions must be an object"); } @@ -1125,7 +1203,63 @@ export function validatePreferences(preferences: SFPreferences): { else errors.push("experimental.rtk must be a boolean"); } - const knownExpKeys = new Set(["rtk"]); + if (exp.dispatch_rules !== undefined) { + if (typeof exp.dispatch_rules === "object" && exp.dispatch_rules !== null) { + const rawDispatch = exp.dispatch_rules as Record; + const validDispatch: DispatchExperimentPreferences = {}; + + if (rawDispatch.order !== undefined) { + if (Array.isArray(rawDispatch.order) && rawDispatch.order.every((item) => typeof item === "string")) { + validDispatch.order = rawDispatch.order + .map((item) => item.trim()) + .filter((item) => item.length > 0); + } else { + errors.push("experimental.dispatch_rules.order must be an array of strings"); + } + } + + if (rawDispatch.variants !== undefined) { + if (typeof rawDispatch.variants === "object" && rawDispatch.variants !== null && !Array.isArray(rawDispatch.variants)) { + const validVariants: Record = {}; + for (const [variantName, variantOrder] of Object.entries(rawDispatch.variants)) { + if (!Array.isArray(variantOrder) || variantOrder.some((item) => typeof item !== "string")) { + errors.push(`experimental.dispatch_rules.variants.${variantName} must be an array of strings`); + continue; + } + validVariants[variantName] = variantOrder + .map((item) => item.trim()) + .filter((item) => item.length > 0); + } + validDispatch.variants = validVariants; + } else { + errors.push("experimental.dispatch_rules.variants must be an object mapping variant names to string arrays"); + } + } + + if (rawDispatch.active_variant !== undefined) { + if (typeof rawDispatch.active_variant === "string" && rawDispatch.active_variant.trim().length > 0) { + validDispatch.active_variant = rawDispatch.active_variant.trim(); + } else { + errors.push("experimental.dispatch_rules.active_variant must be a non-empty string"); + } + } + + const knownDispatchKeys = new Set(["order", "variants", "active_variant"]); + for (const key of Object.keys(rawDispatch)) { + if (!knownDispatchKeys.has(key)) { + warnings.push(`unknown experimental.dispatch_rules key "${key}" — ignored`); + } + } + + if (Object.keys(validDispatch).length > 0) { + validExp.dispatch_rules = validDispatch; + } + } else { + errors.push("experimental.dispatch_rules must be an object"); + } + } + + const knownExpKeys = new Set(["rtk", "dispatch_rules"]); for (const key of Object.keys(exp)) { if (!knownExpKeys.has(key)) { warnings.push(`unknown experimental key "${key}" — ignored`); @@ -1169,8 +1303,36 @@ export function validatePreferences(preferences: SFPreferences): { errors.push("codebase.collapse_threshold must be a positive integer"); } } + if (cb.project_rag !== undefined) { + if (cb.project_rag === "auto" || cb.project_rag === "off" || cb.project_rag === "required") { + validCb.project_rag = cb.project_rag; + } else { + errors.push('codebase.project_rag must be one of "auto", "off", or "required"'); + } + } + if (cb.project_rag_server !== undefined) { + if (typeof cb.project_rag_server === "string" && cb.project_rag_server.trim().length > 0) { + validCb.project_rag_server = cb.project_rag_server.trim(); + } else { + errors.push("codebase.project_rag_server must be a non-empty string"); + } + } + if (cb.project_rag_auto_index !== undefined) { + if (typeof cb.project_rag_auto_index === "boolean") { + validCb.project_rag_auto_index = cb.project_rag_auto_index; + } else { + errors.push("codebase.project_rag_auto_index must be a boolean"); + } + } - const knownCbKeys = new Set(["exclude_patterns", "max_files", "collapse_threshold"]); + const knownCbKeys = new Set([ + "exclude_patterns", + "max_files", + "collapse_threshold", + "project_rag", + "project_rag_server", + "project_rag_auto_index", + ]); for (const key of Object.keys(cb)) { if (!knownCbKeys.has(key)) { warnings.push(`unknown codebase key "${key}" — ignored`); diff --git a/src/resources/extensions/sf/prompts/complete-milestone.md b/src/resources/extensions/sf/prompts/complete-milestone.md index 288635348..85ca40167 100644 --- a/src/resources/extensions/sf/prompts/complete-milestone.md +++ b/src/resources/extensions/sf/prompts/complete-milestone.md @@ -24,6 +24,14 @@ Then: 7. Fill the **Decision Re-evaluation** table in the milestone summary. For each key decision from `.sf/DECISIONS.md` made during this milestone, evaluate whether it is still valid given what was actually built. Flag decisions that should be revisited next milestone. 8. Validate **requirement status transitions**. For each requirement that changed status during this milestone, confirm the transition is supported by evidence. Requirements can move between Active, Validated, Deferred, Blocked, or Out of Scope — but only with proof. +### Parallel Follow-up Classification + +Before completion, classify leftover work into one of two buckets: +- **Blocking completeness work** — required to truthfully claim the milestone is done. Missing contract tests, unresolved bugs, missing proof for promised behavior, required launch/safety checks, or anything needed for the milestone's stated outcome. These BLOCK completion. +- **Parallel hardening / follow-up work** — useful but not required for the milestone's honest claim. Examples: broader regression coverage, extra negative tests, load/perf expansion, docs improvements, cleanup, deeper observability, additional adapters. + +If work falls into the second bucket, do not fail the milestone just because it exists. Record it as a follow-up for a parallel track or later milestone. If it falls into the first bucket, the milestone is not complete. + **DB access safety:** Do NOT query `.sf/sf.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — the engine owns the WAL connection. Use `sf_milestone_status` to read milestone and slice state. All data you need is already inlined in the context above or accessible via the `sf_*` tools — never via direct SQL. ### Verification Gate — STOP if verification failed diff --git a/src/resources/extensions/sf/prompts/discuss-headless.md b/src/resources/extensions/sf/prompts/discuss-headless.md index 414c0c1e0..59e3e0340 100644 --- a/src/resources/extensions/sf/prompts/discuss-headless.md +++ b/src/resources/extensions/sf/prompts/discuss-headless.md @@ -1,13 +1,13 @@ # Headless Milestone Creation -You are creating a SF milestone from a provided specification document. This is a **headless** (non-interactive) flow — do NOT ask the user any questions. Wherever the interactive flow would ask the user, make your best-judgment call and document it as an assumption. +You are creating a SF milestone from a provided specification document. This is a **headless** flow: do not ask exploratory user questions. Make best-judgment calls and document them as assumptions. The only allowed user question is the final `depth_verification_{{milestoneId}}_confirm` write gate when your evidence is strong enough to promote a final `CONTEXT.md`; if that gate is unavailable or not confirmed, write a draft and stop. ## Active Skills Apply these skills throughout this session: ### `codebase-analysis` -Run the full four-phase codebase analysis before planning any milestones: (1) orientation map, (2) ultra-granular critical path analysis, (3) technical debt inventory with priority scores, (4) test coverage gaps. Write `.sf/CODEBASE-ANALYSIS.md` with findings. This is the evidence base for all planning decisions. +Run the full four-phase codebase analysis before planning any milestones: (1) orientation map, (2) ultra-granular critical path analysis, (3) technical debt inventory with priority scores, (4) test coverage gaps. Update `.sf/CODEBASE.md` with verified findings, file descriptions for active paths, critical runtime boundaries, verification commands, and skill needs. This is the canonical project-knowledge base for all planning decisions. ### `architecture-planning` Map the architecture (C4 Level 1-2) before designing milestones. Identify deep vs shallow modules, coupling problems, boundary violations. Every significant architectural decision made during planning gets an ADR in `docs/adr/`. Update `.sf/DECISIONS.md` via `sf_decision_save` for architectural decisions. @@ -51,6 +51,8 @@ Summarize your understanding of the specification concretely — not abstractly: Print this reflection in chat. Do not skip this step. +Do not stop after the reflection. Continue through vision mapping, mandatory investigation, project-knowledge updates, requirements, strategy, depth verification/draft fallback, and artifact writes in this same run. A headless run that only prints reflection has failed its contract. + ## Vision Mapping Decide the approach based on the actual scope: @@ -73,6 +75,8 @@ Before anything else, form a diagnosis: What is the core challenge? What is brok - **Run tests**: `go test ./...`, `cargo test`, `npm test`, `pytest` — failing tests are requirements - **Measure coverage**: find untested critical paths - **Scan for dead code, stubs, and commented-out features** — abandoned attempts are signals +- **Discover needed skills**: identify repo languages, frameworks, data stores, external services, build tools, and domain-specific competencies. Check installed skills first; record installed, missing, and potentially useful skills in `.sf/CODEBASE.md` and `.sf/PM-STRATEGY.md`. +- **Use code intelligence when available**: if the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, index/query it for broad concept, symbol, schema, and git-history searches before manually reading files. If it is missing or fails, continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout. - Use `rg`, `find`, `ast-grep`, `ls -la` for broad codebase mapping ### Step 2: Check library and ecosystem facts @@ -123,11 +127,18 @@ If the spec leaves any of these unresolved, make your best-judgment call and doc Print a structured depth summary in chat covering: - What you understood the spec to describe - Key technical findings from investigation +- Project knowledge now recorded in `.sf/CODEBASE.md`, including stack signals, critical paths, verification commands, skill needs, and unresolved knowledge gaps - Assumptions you made and why - Areas where you're least confident This is your audit trail. Print it — do not skip it. +Before writing final `CONTEXT.md`, decide confidence: +- **HIGH**: You have verified the project knowledge above from actual files/tests/research, and the milestone scope is specific enough for downstream agents. Call `ask_user_questions` once with question ID `depth_verification_{{milestoneId}}_confirm`; make the recommended first option "Proceed with final context (Recommended)" and the second option "Keep as draft". If the confirmed answer is not received, do not bypass the gate. +- **MEDIUM or LOW**: Do not call the gate. Write `.sf/milestones/{{milestoneId}}/{{milestoneId}}-CONTEXT-DRAFT.md` with the evidence, assumptions, and open questions, then stop. + +The write gate is intentional. It prevents final milestone context from being created from shallow repo knowledge. + ## Focused Research Do a focused research pass before roadmap creation. @@ -219,12 +230,13 @@ In a single pass: **Depth-Preservation Guidance for context.md:** Preserve the specification's exact terminology, emphasis, and specific framing. Do not paraphrase domain-specific language into generics. If the spec said "craft feel," write "craft feel" — not "high-quality user experience." The context file is downstream agents' only window into this conversation — flattening specifics into generics loses the signal that shaped every decision. -4. Write `{{contextPath}}` — use the **Context** output template below. Preserve key risks, unknowns, existing codebase constraints, integration points, and relevant requirements surfaced during research. Include an "Assumptions" section documenting every judgment call. -5. Call `sf_plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters. -6. For each architectural or pattern decision, call `sf_decision_save` — the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically. -7. {{commitInstruction}} +4. If `depth_verification_{{milestoneId}}_confirm` was confirmed, write `{{contextPath}}` — use the **Context** output template below. Preserve key risks, unknowns, existing codebase constraints, integration points, and relevant requirements surfaced during research. Include an "Assumptions" section documenting every judgment call. +5. If depth verification was not confirmed, write `.sf/milestones/{{milestoneId}}/{{milestoneId}}-CONTEXT-DRAFT.md` instead. Include the project-knowledge evidence, confidence level, assumptions, open questions, and what must be researched next. Do **not** call `sf_plan_milestone`. End with: "Milestone {{milestoneId}} drafted for discussion." +6. Only after confirmed final context, call `sf_plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters. +7. For each architectural or pattern decision, call `sf_decision_save` — the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically. +8. {{commitInstruction}} -After writing the files, say exactly: "Milestone {{milestoneId}} ready." — nothing else. Auto-mode will start automatically. +After writing final context and roadmap, say exactly: "Milestone {{milestoneId}} ready." — nothing else. If you wrote a draft, say exactly: "Milestone {{milestoneId}} drafted for discussion." — nothing else. ### Multi-Milestone @@ -237,8 +249,8 @@ After writing the files, say exactly: "Milestone {{milestoneId}} ready." — not #### Phase 2: Primary milestone -5. Write a full `CONTEXT.md` for the primary milestone (the first in sequence). Include an "Assumptions" section. -6. Call `sf_plan_milestone` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done. +5. For the primary milestone (the first in sequence), write a full `CONTEXT.md` only if `depth_verification_{{milestoneId}}_confirm` was confirmed. If not confirmed, write `CONTEXT-DRAFT.md`, do not call `sf_plan_milestone`, and stop. +6. After confirmed final context, call `sf_plan_milestone` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done. #### MANDATORY: depends_on Frontmatter in CONTEXT.md @@ -296,20 +308,21 @@ For single-milestone projects, do NOT write this file. 7. {{multiMilestoneCommitInstruction}} -After writing the files, say exactly: "Milestone {{milestoneId}} ready." — nothing else. Auto-mode will start automatically. +After writing final context and roadmap, say exactly: "Milestone {{milestoneId}} ready." — nothing else. If you wrote a draft, say exactly: "Milestone {{milestoneId}} drafted for discussion." — nothing else. ## Critical Rules -- **DO NOT ask the user any questions** — this is headless mode. Make judgment calls and document them. +- **Do not ask exploratory user questions** — this is headless mode. Make judgment calls and document them. The only allowed user question is `depth_verification_{{milestoneId}}_confirm`, and only when evidence is strong enough to finalize. - **Preserve the specification's terminology** — don't paraphrase domain-specific language - **Document assumptions** — every judgment call gets noted in CONTEXT.md under "Assumptions" with reasoning - **Investigate thoroughly** — scout codebase, check library docs, web search. Same rigor as interactive mode. +- **Build project knowledge first** — update `.sf/CODEBASE.md` with stack signals, critical paths, verification commands, skill needs, file descriptions, and unresolved gaps before writing context. - **Do focused research** — identify table stakes, domain standards, omissions, scope traps. Same rigor as interactive mode. - **Use proper tools** — `sf_plan_milestone` for roadmaps, `sf_decision_save` for decisions, `sf_milestone_generate_id` for IDs - **Print artifacts in chat** — requirements table, roadmap preview, depth summary. The TUI scrollback is the user's audit trail. - **Use depends_on frontmatter** for multi-milestone sequences - **Anti-reduction rule** — if the spec describes a big vision, plan the big vision. Phase complexity — don't cut it. - **Naming convention** — always use `sf_milestone_generate_id` for IDs. Directories use bare IDs, files use ID-SUFFIX format. -- **End with "Milestone {{milestoneId}} ready."** — this triggers auto-start detection +- **End with "Milestone {{milestoneId}} ready." only after final context and roadmap exist.** Draft output must end with "Milestone {{milestoneId}} drafted for discussion." so auto-mode does not start from shallow knowledge. {{inlinedTemplates}} diff --git a/src/resources/extensions/sf/prompts/guided-discuss-milestone.md b/src/resources/extensions/sf/prompts/guided-discuss-milestone.md index c3ba272cb..ea85b076f 100644 --- a/src/resources/extensions/sf/prompts/guided-discuss-milestone.md +++ b/src/resources/extensions/sf/prompts/guided-discuss-milestone.md @@ -16,6 +16,7 @@ Apply `pm-planning` skill thinking throughout: use Working Backwards to anchor o Do a lightweight targeted investigation so your questions are grounded in reality: - Scout the codebase (`rg`, `find`, or `scout`) to understand what already exists that this milestone touches or builds on +- If the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, use its MCP search tools for broad concept, symbol, schema, and git-history lookup before manually reading files - Check the roadmap context above (if present) to understand what surrounds this milestone - **Library docs — DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) for any GitHub-hosted library. Fall back to `resolve_library` / `get_library_docs` (Context7) only when DeepWiki doesn't have it (Context7 is capped at 1000 req/month free tier). - Identify the 3–5 biggest behavioural and architectural unknowns: things where the user's answer will materially change what gets built diff --git a/src/resources/extensions/sf/prompts/guided-plan-milestone.md b/src/resources/extensions/sf/prompts/guided-plan-milestone.md index bfaca1496..f54a76838 100644 --- a/src/resources/extensions/sf/prompts/guided-plan-milestone.md +++ b/src/resources/extensions/sf/prompts/guided-plan-milestone.md @@ -1,5 +1,27 @@ Plan milestone {{milestoneId}} ("{{milestoneTitle}}"). Read `.sf/DECISIONS.md` if it exists — respect existing decisions. Read `.sf/REQUIREMENTS.md` if it exists and treat Active requirements as the capability contract. If `REQUIREMENTS.md` is missing, continue in legacy compatibility mode but explicitly note missing requirement coverage. Use the **Roadmap** output template below to shape the milestone planning payload you send to `sf_plan_milestone`. Call `sf_plan_milestone` to persist the milestone planning fields and render `{{milestoneId}}-ROADMAP.md` from DB state. Do **not** write `{{milestoneId}}-ROADMAP.md`, `ROADMAP.md`, or other planning artifacts manually. If planning produces structural decisions, append them to `.sf/DECISIONS.md`. {{skillActivation}} Fill the Horizontal Checklist section with cross-cutting concerns considered during planning (requirements re-read, decisions re-evaluated, graceful shutdown, revenue paths, auth boundary, shared resources, reconnection). Omit for trivial milestones. +Before calling `sf_plan_milestone`, run a bounded **Vision Alignment Meeting** for the milestone and roadmap. This is allowed to be broader and more nuanced than slice planning. Include at least these participant lenses: +- Product Manager +- User Advocate +- Customer Panel +- Business +- Researcher +- Delivery Lead +- Partner +- Combatant +- Architect +- Moderator + +All of them may suggest roadmap additions, cuts, sequencing changes, or deferrals. The roadmap is decided by the moderator's **weighted synthesis**, not raw vote count. Record the meeting in the `visionMeeting` payload with: +- `trigger` +- all participant views above +- `weightedSynthesis` +- `confidenceByArea` +- `recommendedRoute` + +If the meeting concludes `researching` or `discussing`, persist that honestly. The system will keep the milestone in planning until the weighted route becomes `planning`. +If confidence remains low after the meeting, treat that as a signal that the milestone is probably too big or too entangled. Cut scope or split the roadmap instead of pretending it is execution-ready. + ## Requirement Rules - Every relevant Active requirement must be mapped to a slice, deferred, blocked with reason, or moved out of scope. diff --git a/src/resources/extensions/sf/prompts/plan-milestone.md b/src/resources/extensions/sf/prompts/plan-milestone.md index 0fbc67f7e..fbf8f233e 100644 --- a/src/resources/extensions/sf/prompts/plan-milestone.md +++ b/src/resources/extensions/sf/prompts/plan-milestone.md @@ -24,6 +24,7 @@ Before decomposing, build your understanding: 2. **Library docs — DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) as the default for any GitHub-hosted library. Fall back to `resolve_library` / `get_library_docs` (Context7) for npm/pypi/crates packages DeepWiki doesn't have. Context7 free tier is capped at 1000 req/month — spend those on cases DeepWiki can't cover. Skip both for libraries already used in this codebase. 3. **Skill Discovery ({{skillDiscoveryMode}}):**{{skillDiscoveryInstructions}} 4. **Requirements analysis.** If `.sf/REQUIREMENTS.md` exists, research against it. Identify which Active requirements are table stakes, likely omissions, overbuilt risks, or domain-standard behaviors. +5. **Comparable systems and nuanced stakeholder scan.** Before locking the roadmap, research what similar products, OSS tools, and production teams do in this category. Surface table stakes, common failure modes, customer expectations, business constraints, and differentiators worth preserving. ### Strategic Questions to Answer @@ -42,6 +43,28 @@ If milestone research exists (inlined above), trust those findings and skip redu Narrate your decomposition reasoning — why you're grouping work this way, what risks are driving the order, what verification strategy you're choosing and why. Use complete sentences rather than planner shorthand or fragmentary notes. +Before you persist the roadmap, run a bounded **Vision Alignment Meeting**. This is broader than slice planning and should feel allowed to be chatty and nuanced. Gather the strongest additions, cuts, and ordering changes from these participant lenses: +- **Product Manager:** what is the real product move and what should the roadmap prove? +- **User Advocate:** what must matter for the user experience and trust surface? +- **Customer Panel:** multiple likely customer viewpoints, not a single flattened “user”. +- **Business:** wedge, retention, expansion path, or viability concerns. +- **Researcher:** comparable products, OSS tools, market expectations, DeepWiki/Context7 findings, and focused web research. +- **Delivery Lead:** smallest credible milestone sequence and scope cuts. +- **Partner:** strongest case for the roadmap. +- **Combatant:** why the roadmap is wrong, overbuilt, or solving the wrong thing. +- **Architect:** system-fit and sequencing synthesis. +- **Moderator:** weigh the claims after the meeting; do NOT majority-vote. + +Every participant may propose roadmap additions, removals, sequencing changes, or deferrals. The roadmap is driven by the **weighted synthesis**, not raw vote count. Record: +- `trigger` +- each participant view above +- `weightedSynthesis` +- `confidenceByArea` +- `recommendedRoute` (`planning`, `researching`, or `discussing`) + +If confidence is weak in a core area, route back truthfully instead of pretending the roadmap is ready. A roadmap with `recommendedRoute=researching` or `discussing` should still be persisted as a draft, but it is not execution-ready and the system will keep the milestone in planning. +If confidence stays low after research and weighted discussion, assume the milestone is probably too big or too entangled. Cut scope, split milestones, or defer lower-value slices rather than forcing a vague roadmap through. + Then: 1. Use the **Roadmap** output template from the inlined context above 2. {{skillActivation}} diff --git a/src/resources/extensions/sf/prompts/plan-slice.md b/src/resources/extensions/sf/prompts/plan-slice.md index 3ea8bf304..c40838791 100644 --- a/src/resources/extensions/sf/prompts/plan-slice.md +++ b/src/resources/extensions/sf/prompts/plan-slice.md @@ -67,8 +67,22 @@ Then: - a matching task plan file with description, steps, must-haves, verification, inputs, and expected output - **Inputs and Expected Output must list concrete backtick-wrapped file paths** (e.g. `` `src/types.ts` ``). These are machine-parsed to derive task dependencies — vague prose without paths breaks parallel execution. Every task must have at least one output file path. - Observability Impact section **only if the task touches runtime boundaries, async flows, or error paths** — omit it otherwise -7. **Persist planning state through `sf_plan_slice`.** Call it with the full slice planning payload (goal, demo, must-haves, verification, tasks, and metadata). The tool inserts all tasks in the same transaction, writes to the DB, and renders `{{outputPath}}` and `{{slicePath}}/tasks/T##-PLAN.md` files automatically. Do **not** call `sf_plan_task` separately — `sf_plan_slice` handles task persistence. Do **not** rely on direct `PLAN.md` writes as the source of truth; the DB-backed tool is the canonical write path for slice and task planning state. -8. **Self-audit the plan.** Walk through each check — if any fail, fix the plan files before moving on: +7. **Run adversarial review before persisting the plan.** Record all three lenses in the `adversarialReview` payload you send to `sf_plan_slice`: + - **Partner:** strongest case for why this plan is sufficient, grounded in the actual code and evidence you explored. + - **Combatant:** attack the premise first. Name at least 3 plausible alternative root causes, failure modes, or plan-shape mistakes, plus the cheapest falsifier for each. + - **Architect:** after reading partner + combatant, state the system-fit risk, sequencing risk, or missing integration proof. + - If any of the three reviews expose a problem, change the plan before persisting it. Do not treat the review as commentary-only. +8. **When ambiguity is still real, run a bounded planning meeting before persistence.** This is for standard/heavy slices, low-confidence plans, multiple plausible approaches, or automatic feature planning where PM framing matters. Record it in the optional `planningMeeting` payload: + - **Trigger:** why the meeting was needed + - **Product Manager:** diagnosis, user value, scope cut, and what would count as a useful increment + - **Researcher:** the strongest evidence from code, docs, DeepWiki, Context7, or focused web research + - **Partner / Combatant / Architect:** the same roles as above, but in meeting form + - **Moderator:** synthesize the disagreement and set a route + - **Recommended Route:** one of `discussing`, `researching`, `planning` + - **Confidence:** concise post-meeting confidence summary + - Keep it bounded: one round is normal, two is the limit. If the meeting route is `discussing` or `researching`, persist the draft anyway so the system keeps the context, but do not pretend the slice is execution-ready. +9. **Persist planning state through `sf_plan_slice`.** Call it with the full slice planning payload (goal, adversarialReview, optional planningMeeting, demo, must-haves, verification, tasks, and metadata). The tool inserts all tasks in the same transaction, writes to the DB, and renders `{{outputPath}}` and `{{slicePath}}/tasks/T##-PLAN.md` files automatically. Do **not** call `sf_plan_task` separately — `sf_plan_slice` handles task persistence. Do **not** rely on direct `PLAN.md` writes as the source of truth; the DB-backed tool is the canonical write path for slice and task planning state. +10. **Self-audit the plan.** Walk through each check — if any fail, fix the plan files before moving on: - **Completion semantics:** If every task were completed exactly as written, the slice goal/demo should actually be true. - **Requirement coverage:** Every must-have in the slice maps to at least one task. No must-have is orphaned. If `REQUIREMENTS.md` exists, every Active requirement this slice owns maps to at least one task. - **Task completeness:** Every task has steps, must-haves, verification, inputs, and expected output — none are blank or vague. Inputs and Expected Output list backtick-wrapped file paths, not prose descriptions. @@ -77,8 +91,10 @@ Then: - **Scope sanity:** Target 2–5 steps and 3–8 files per task. 10+ steps or 12+ files — must split. Each task must be completable in a single fresh context window. - **Feature completeness:** Every task produces real, user-facing progress — not just internal scaffolding. - **Quality gate coverage:** For non-trivial slices, Threat Surface and Requirement Impact sections are present and specific (not placeholder text). For non-trivial tasks, Failure Modes, Load Profile, and Negative Tests are addressed in the task plan. -10. If planning produced structural decisions, append them to `.sf/DECISIONS.md` -11. {{commitInstruction}} + - **Adversarial completeness:** The persisted plan includes non-placeholder Partner, Combatant, and Architect review sections. If combatant only agrees, you did not push hard enough. + - **Meeting honesty:** If a planningMeeting exists and its route is `discussing` or `researching`, the moderator decision and confidence must explain why. Do not write a “planning” route just to get unstuck. +11. If planning produced structural decisions, append them to `.sf/DECISIONS.md` +12. {{commitInstruction}} The slice directory and tasks/ subdirectory already exist. Do NOT mkdir. All work stays in your working directory: `{{workingDirectory}}`. diff --git a/src/resources/extensions/sf/prompts/reassess-roadmap.md b/src/resources/extensions/sf/prompts/reassess-roadmap.md index 7738227f7..8dd988a34 100644 --- a/src/resources/extensions/sf/prompts/reassess-roadmap.md +++ b/src/resources/extensions/sf/prompts/reassess-roadmap.md @@ -39,6 +39,7 @@ Ask yourself: - Are the Threat Surface and Requirement Impact sections in completed slice plans still accurate for remaining slices? - Did this slice's Operational Readiness reveal monitoring gaps that remaining slices should address? - Should any Horizontal Checklist items be updated based on what was actually built? +- Did this slice reveal valuable follow-up work that is real but **non-blocking** for the current milestone claim, such as broader regression tests, load tests, docs, observability hardening, or cleanup? ### Success-Criterion Coverage Check @@ -55,6 +56,16 @@ If all criteria have at least one remaining owning slice, the coverage check pas Use `sf_reassess_roadmap` with `verdict: "roadmap-confirmed"`, an empty `sliceChanges` object, and the assessment text — the tool writes the assessment to the DB and renders `{{assessmentPath}}`. If requirements exist, explicitly note whether requirement coverage remains sound. +### Parallel Follow-up Rule + +Do not reflexively stuff all newly discovered test or hardening work back into the current milestone. + +Classify each follow-up honestly: +- **Required completeness work** — needed to prove the current milestone's promise, close a missing contract, fix a real bug, or satisfy launch/safety claims. This stays in the current milestone. +- **Non-blocking hardening work** — valuable but not required for the current milestone to be truthfully considered done. Examples: broader regression coverage, additional negative tests, extra load testing, docs polish, observability expansion, cleanup refactors. This may be deferred into a parallel follow-up track or later milestone. + +If the milestone is already on track to be honestly complete, prefer creating a follow-up track rather than bloating the current roadmap. Treat persistent low confidence plus lots of follow-up work as a signal that the original milestone may have been too large or under-scoped. + **If changes are needed:** **Persist changes through `sf_reassess_roadmap`.** Pass: `milestoneId`, `completedSliceId`, `verdict` (e.g. "roadmap-adjusted"), `assessment` (text explaining the decision), and `sliceChanges` with `modified` (array of sliceId, title, risk, depends, demo), `added` (same shape), `removed` (array of slice ID strings). The tool structurally enforces preservation of completed slices, writes the assessment to the DB, re-renders `{{roadmapPath}}`, and renders `{{assessmentPath}}`. diff --git a/src/resources/extensions/sf/prompts/replan-slice.md b/src/resources/extensions/sf/prompts/replan-slice.md index ed5ca2e0b..418358ab2 100644 --- a/src/resources/extensions/sf/prompts/replan-slice.md +++ b/src/resources/extensions/sf/prompts/replan-slice.md @@ -32,7 +32,7 @@ Consider these captures when rewriting the remaining tasks — they represent th 1. Read the blocker task summary carefully. Understand exactly what was discovered and why it blocks the current plan. 2. Analyze the remaining `[ ]` tasks in the slice plan. Determine which are still valid, which need modification, and which should be replaced. -3. **Persist replan state through `sf_replan_slice`.** Call it with: `milestoneId`, `sliceId`, `blockerTaskId`, `blockerDescription`, `whatChanged`, `updatedTasks` (array of task objects with taskId, title, description, estimate, files, verify, inputs, expectedOutput), `removedTaskIds` (array of task ID strings). The tool structurally enforces preservation of completed tasks, writes replan history to the DB, re-renders `{{planPath}}`, and renders `{{replanPath}}`. Preserve or update the Threat Surface and Requirement Impact sections if the replan changes the slice's security posture or requirement coverage. +3. **Persist replan state through `sf_replan_slice`.** Call it with: `milestoneId`, `sliceId`, `blockerTaskId`, `blockerDescription`, `whatChanged`, optional updated slice-level fields (`goal`, `successCriteria`, `proofLevel`, `integrationClosure`, `observabilityImpact`, `adversarialReview`, `planningMeeting`), `updatedTasks` (array of task objects with taskId, title, description, estimate, files, verify, inputs, expectedOutput), and `removedTaskIds` (array of task ID strings). The tool structurally enforces preservation of completed tasks, writes replan history to the DB, re-renders `{{planPath}}`, and renders `{{replanPath}}`. Preserve or update the Threat Surface and Requirement Impact sections if the replan changes the slice's security posture or requirement coverage. If the blocker changes the strongest objection, partner case, or moderator route, update the slice-level ceremony state in the same call so replanning does not leave stale review context behind. 4. If any incomplete task had a `T0x-PLAN.md`, remove or rewrite it to match the new task description. 5. Do not commit manually — the system auto-commits your changes after this unit completes. diff --git a/src/resources/extensions/sf/prompts/system.md b/src/resources/extensions/sf/prompts/system.md index b3d835e7b..fbb61de2f 100644 --- a/src/resources/extensions/sf/prompts/system.md +++ b/src/resources/extensions/sf/prompts/system.md @@ -134,7 +134,7 @@ Templates showing the expected format for each artifact type are in: - `/sf status` - progress dashboard overlay - `/sf queue` - queue future milestones (safe while auto-mode is running) - `/sf quick ` - quick task with SF guarantees (atomic commits, state tracking) but no milestone ceremony -- `/sf codebase [generate|update|stats]` - manage the `.sf/CODEBASE.md` cache used for prompt context +- `/sf codebase [generate|update|stats|rag]` - manage `.sf/CODEBASE.md` and optional code search - `{{shortcutDashboard}}` - toggle dashboard overlay - `{{shortcutShell}}` - show shell processes @@ -148,7 +148,7 @@ Templates showing the expected format for each artifact type are in: **Code navigation:** Use `lsp` for definition, type_definition, implementation, references, incoming_calls, outgoing_calls, hover, signature, symbols, rename, code_actions, format, and diagnostics. Falls back gracefully if no server is available. Never `grep` for a symbol definition when `lsp` can resolve it semantically. Never shell out to prettier/rustfmt/gofmt when `lsp format` is available. After editing code, use `lsp diagnostics` to verify no type errors were introduced. -**Codebase exploration:** Use `subagent` with `scout` for broad unfamiliar subsystem mapping. Use `rg` for text search across files. Use `lsp` for structural navigation. Never read files one-by-one to "explore" — search first, then read what's relevant. +**Codebase exploration:** Use `subagent` with `scout` for broad unfamiliar subsystem mapping. Use `.sf/CODEBASE.md` for durable orientation. If the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, use its MCP tools for broad hybrid semantic + BM25 code retrieval before manual file-by-file reading. Use `rg` for text search across files. Use `lsp` for structural navigation. Never read files one-by-one to "explore" — search first, then read what's relevant. **Documentation lookup:** Prefer `ask_question` / `read_wiki_contents` (DeepWiki) as the default — it's AI-indexed, covers any GitHub repo, and has no free-tier cap. Fall back to `resolve_library` → `get_library_docs` (Context7) for npm/pypi/crates packages when DeepWiki doesn't have the repo or you need the package-registry view. **Context7 free tier is capped at 1000 requests/month — spend those on cases DeepWiki can't cover.** Start Context7 calls with `tokens=5000`. Never guess at API signatures from memory when docs are available. diff --git a/src/resources/extensions/sf/prompts/validate-milestone.md b/src/resources/extensions/sf/prompts/validate-milestone.md index fb28d6219..8babca374 100644 --- a/src/resources/extensions/sf/prompts/validate-milestone.md +++ b/src/resources/extensions/sf/prompts/validate-milestone.md @@ -42,6 +42,12 @@ After all reviewers complete, aggregate their verdicts: - If any reviewer says NEEDS-ATTENTION → overall verdict: `needs-attention` - If any reviewer says FAIL → overall verdict: `needs-remediation` +When reviewers surface missing tests or hardening work, classify it before deciding the verdict: +- If the missing work is required to honestly prove the milestone's stated promise, treat it as remediation. +- If the missing work is additional hardening beyond the milestone's honest proof boundary, record it as follow-up work rather than forcing a remediation verdict. + +Do not use validation to smuggle every useful future test into the current milestone. Validation should protect truth, not prevent parallel follow-up hardening. + ### Step 3 — Persist Validation Prepare the validation content you will pass to `sf_validate_milestone`. Do **not** manually write `{{validationPath}}` — the DB-backed tool is the canonical write path and renders the validation file for you. diff --git a/src/resources/extensions/sf/rule-registry.ts b/src/resources/extensions/sf/rule-registry.ts index 3068c9393..2b6cde2b0 100644 --- a/src/resources/extensions/sf/rule-registry.ts +++ b/src/resources/extensions/sf/rule-registry.ts @@ -18,7 +18,7 @@ import type { PersistedHookState, HookStatusEntry, } from "./types.js"; -import { resolvePostUnitHooks, resolvePreDispatchHooks } from "./preferences.js"; +import { loadEffectiveSFPreferences, resolvePostUnitHooks, resolvePreDispatchHooks } from "./preferences.js"; import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs"; import { join } from "node:path"; import { parseUnitId } from "./unit-id.js"; @@ -77,6 +77,38 @@ export class RuleRegistry { this.dispatchRules = dispatchRules; } + private getEffectiveDispatchRules(): RegistryRule[] { + const prefs = loadEffectiveSFPreferences()?.preferences; + const dispatchPrefs = prefs?.experimental?.dispatch_rules; + const variantOrder = + dispatchPrefs?.active_variant && dispatchPrefs.variants?.[dispatchPrefs.active_variant] + ? dispatchPrefs.variants[dispatchPrefs.active_variant] + : undefined; + const requestedOrder = variantOrder ?? dispatchPrefs?.order; + + if (!requestedOrder || requestedOrder.length === 0) { + return this.dispatchRules; + } + + const indexed = new Map(this.dispatchRules.map((rule, index) => [rule.name, { rule, index }] as const)); + const ordered: RegistryRule[] = []; + const seen = new Set(); + + for (const name of requestedOrder) { + const entry = indexed.get(name); + if (!entry || seen.has(name)) continue; + ordered.push(entry.rule); + seen.add(name); + } + + for (const rule of this.dispatchRules) { + if (seen.has(rule.name)) continue; + ordered.push(rule); + } + + return ordered; + } + // ── Core query ─────────────────────────────────────────────────────── /** @@ -84,7 +116,7 @@ export class RuleRegistry { * Hook rules are loaded fresh from preferences on each call (not cached). */ listRules(): RegistryRule[] { - const rules: RegistryRule[] = [...this.dispatchRules]; + const rules: RegistryRule[] = [...this.getEffectiveDispatchRules()]; // Convert post-unit hooks to unified rules const postHooks = resolvePostUnitHooks(); @@ -127,7 +159,7 @@ export class RuleRegistry { * Returns stop action if no rule matches (unhandled phase). */ async evaluateDispatch(ctx: DispatchContext): Promise { - for (const rule of this.dispatchRules) { + for (const rule of this.getEffectiveDispatchRules()) { const result = await rule.where(ctx); if (result) { if (result.action !== "skip") result.matchedRule = rule.name; diff --git a/src/resources/extensions/sf/safety/content-validator.ts b/src/resources/extensions/sf/safety/content-validator.ts index 51d07d6b1..c5c9ed3ad 100644 --- a/src/resources/extensions/sf/safety/content-validator.ts +++ b/src/resources/extensions/sf/safety/content-validator.ts @@ -7,6 +7,8 @@ import { existsSync, readFileSync } from "node:fs"; import { logWarning } from "../workflow-logger.js"; +import { inspectSlicePlanMarkdown } from "../plan-quality.js"; +import { inspectMilestoneRoadmapMarkdown } from "../milestone-quality.js"; // ─── Types ────────────────────────────────────────────────────────────────── @@ -79,6 +81,13 @@ function validatePlanSlice(content: string): ContentViolation[] { }); } + for (const issue of inspectSlicePlanMarkdown(content).issues) { + violations.push({ + severity: "warning", + reason: `Slice plan ${issue}`, + }); + } + return violations; } @@ -94,5 +103,12 @@ function validatePlanMilestone(content: string): ContentViolation[] { }); } + for (const issue of inspectMilestoneRoadmapMarkdown(content).issues) { + violations.push({ + severity: "warning", + reason: `Milestone roadmap ${issue}`, + }); + } + return violations; } diff --git a/src/resources/extensions/sf/sf-db.ts b/src/resources/extensions/sf/sf-db.ts index 8c58ee291..4e5372172 100644 --- a/src/resources/extensions/sf/sf-db.ts +++ b/src/resources/extensions/sf/sf-db.ts @@ -24,6 +24,8 @@ import { createRequire } from "node:module"; import { existsSync, copyFileSync, mkdirSync, realpathSync } from "node:fs"; import { dirname } from "node:path"; import type { Decision, Requirement, GateRow, GateId, GateScope, GateStatus, GateVerdict } from "./types.js"; +import type { PlanningMeetingRecord } from "./plan-quality.js"; +import type { VisionAlignmentMeetingRecord } from "./milestone-quality.js"; import { SFError, SF_STALE_STATE } from "./errors.js"; import { getGateIdsForTurn, type OwnerTurn } from "./gate-registry.js"; import { logError, logWarning } from "./workflow-logger.js"; @@ -180,7 +182,7 @@ function openRawDb(path: string): unknown { return new Database(path); } -const SCHEMA_VERSION = 16; +const SCHEMA_VERSION = 20; function indexExists(db: DbAdapter, name: string): boolean { return !!db.prepare( @@ -310,7 +312,8 @@ function initSchema(db: DbAdapter, fileBacked: boolean): void { verification_uat TEXT NOT NULL DEFAULT '', definition_of_done TEXT NOT NULL DEFAULT '[]', requirement_coverage TEXT NOT NULL DEFAULT '', - boundary_map_markdown TEXT NOT NULL DEFAULT '' + boundary_map_markdown TEXT NOT NULL DEFAULT '', + vision_meeting_json TEXT NOT NULL DEFAULT '' ) `); @@ -332,6 +335,10 @@ function initSchema(db: DbAdapter, fileBacked: boolean): void { proof_level TEXT NOT NULL DEFAULT '', integration_closure TEXT NOT NULL DEFAULT '', observability_impact TEXT NOT NULL DEFAULT '', + adversarial_partner TEXT NOT NULL DEFAULT '', + adversarial_combatant TEXT NOT NULL DEFAULT '', + adversarial_architect TEXT NOT NULL DEFAULT '', + planning_meeting_json TEXT NOT NULL DEFAULT '', sequence INTEGER DEFAULT 0, -- Ordering hint: tools may set this to control execution order replan_triggered_at TEXT DEFAULT NULL, PRIMARY KEY (milestone_id, id), @@ -365,6 +372,7 @@ function initSchema(db: DbAdapter, fileBacked: boolean): void { expected_output TEXT NOT NULL DEFAULT '[]', observability_impact TEXT NOT NULL DEFAULT '', full_plan_md TEXT NOT NULL DEFAULT '', + verification_status TEXT NOT NULL DEFAULT '', sequence INTEGER DEFAULT 0, -- Ordering hint: tools may set this to control execution order PRIMARY KEY (milestone_id, slice_id, id), FOREIGN KEY (milestone_id, slice_id) REFERENCES slices(milestone_id, id) @@ -1035,6 +1043,32 @@ function migrateSchema(db: DbAdapter): void { }); } + if (currentVersion < 18) { + ensureColumn(db, "slices", "adversarial_partner", `ALTER TABLE slices ADD COLUMN adversarial_partner TEXT NOT NULL DEFAULT ''`); + ensureColumn(db, "slices", "adversarial_combatant", `ALTER TABLE slices ADD COLUMN adversarial_combatant TEXT NOT NULL DEFAULT ''`); + ensureColumn(db, "slices", "adversarial_architect", `ALTER TABLE slices ADD COLUMN adversarial_architect TEXT NOT NULL DEFAULT ''`); + db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({ + ":version": 18, + ":applied_at": new Date().toISOString(), + }); + } + + if (currentVersion < 19) { + ensureColumn(db, "slices", "planning_meeting_json", `ALTER TABLE slices ADD COLUMN planning_meeting_json TEXT NOT NULL DEFAULT ''`); + db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({ + ":version": 19, + ":applied_at": new Date().toISOString(), + }); + } + + if (currentVersion < 20) { + ensureColumn(db, "milestones", "vision_meeting_json", `ALTER TABLE milestones ADD COLUMN vision_meeting_json TEXT NOT NULL DEFAULT ''`); + db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({ + ":version": 20, + ":applied_at": new Date().toISOString(), + }); + } + db.exec("COMMIT"); } catch (err) { db.exec("ROLLBACK"); @@ -1434,6 +1468,7 @@ export interface MilestonePlanningRecord { definitionOfDone: string[]; requirementCoverage: string; boundaryMapMarkdown: string; + visionMeeting?: VisionAlignmentMeetingRecord; } export interface SlicePlanningRecord { @@ -1442,6 +1477,12 @@ export interface SlicePlanningRecord { proofLevel: string; integrationClosure: string; observabilityImpact: string; + adversarialReview: { + partner: string; + combatant: string; + architect: string; + }; + planningMeeting?: PlanningMeetingRecord; } export interface TaskPlanningRecord { @@ -1469,12 +1510,12 @@ export function insertMilestone(m: { id, title, status, depends_on, created_at, vision, success_criteria, key_risks, proof_strategy, verification_contract, verification_integration, verification_operational, verification_uat, - definition_of_done, requirement_coverage, boundary_map_markdown + definition_of_done, requirement_coverage, boundary_map_markdown, vision_meeting_json ) VALUES ( :id, :title, :status, :depends_on, :created_at, :vision, :success_criteria, :key_risks, :proof_strategy, :verification_contract, :verification_integration, :verification_operational, :verification_uat, - :definition_of_done, :requirement_coverage, :boundary_map_markdown + :definition_of_done, :requirement_coverage, :boundary_map_markdown, :vision_meeting_json )`, ).run({ ":id": m.id, @@ -1495,6 +1536,7 @@ export function insertMilestone(m: { ":definition_of_done": JSON.stringify(m.planning?.definitionOfDone ?? []), ":requirement_coverage": m.planning?.requirementCoverage ?? "", ":boundary_map_markdown": m.planning?.boundaryMapMarkdown ?? "", + ":vision_meeting_json": m.planning?.visionMeeting ? JSON.stringify(m.planning.visionMeeting) : "", }); } @@ -1514,7 +1556,8 @@ export function upsertMilestonePlanning(milestoneId: string, planning: Partial): SliceRow { return { milestone_id: row["milestone_id"] as string, @@ -1790,6 +1869,10 @@ function rowToSlice(row: Record): SliceRow { proof_level: (row["proof_level"] as string) ?? "", integration_closure: (row["integration_closure"] as string) ?? "", observability_impact: (row["observability_impact"] as string) ?? "", + adversarial_partner: (row["adversarial_partner"] as string) ?? "", + adversarial_combatant: (row["adversarial_combatant"] as string) ?? "", + adversarial_architect: (row["adversarial_architect"] as string) ?? "", + planning_meeting: parsePlanningMeeting(row["planning_meeting_json"]), sequence: (row["sequence"] as number) ?? 0, replan_triggered_at: (row["replan_triggered_at"] as string) ?? null, }; @@ -2010,6 +2093,16 @@ export interface MilestoneRow { definition_of_done: string[]; requirement_coverage: string; boundary_map_markdown: string; + vision_meeting: VisionAlignmentMeetingRecord | null; +} + +function parseVisionMeeting(raw: unknown): VisionAlignmentMeetingRecord | null { + if (typeof raw !== "string" || raw.trim().length === 0) return null; + try { + return JSON.parse(raw) as VisionAlignmentMeetingRecord; + } catch { + return null; + } } function rowToMilestone(row: Record): MilestoneRow { @@ -2031,6 +2124,7 @@ function rowToMilestone(row: Record): MilestoneRow { definition_of_done: JSON.parse((row["definition_of_done"] as string) || "[]"), requirement_coverage: (row["requirement_coverage"] as string) ?? "", boundary_map_markdown: (row["boundary_map_markdown"] as string) ?? "", + vision_meeting: parseVisionMeeting(row["vision_meeting_json"]), }; } @@ -2314,12 +2408,12 @@ export function reconcileWorktreeDb( id, title, status, depends_on, created_at, completed_at, vision, success_criteria, key_risks, proof_strategy, verification_contract, verification_integration, verification_operational, verification_uat, - definition_of_done, requirement_coverage, boundary_map_markdown + definition_of_done, requirement_coverage, boundary_map_markdown, vision_meeting_json ) SELECT id, title, status, depends_on, created_at, completed_at, vision, success_criteria, key_risks, proof_strategy, verification_contract, verification_integration, verification_operational, verification_uat, - definition_of_done, requirement_coverage, boundary_map_markdown + definition_of_done, requirement_coverage, boundary_map_markdown, vision_meeting_json FROM wt.milestones `).run()); @@ -2330,7 +2424,8 @@ export function reconcileWorktreeDb( INSERT OR REPLACE INTO slices ( milestone_id, id, title, status, risk, depends, demo, created_at, completed_at, full_summary_md, full_uat_md, goal, success_criteria, proof_level, - integration_closure, observability_impact, sequence, replan_triggered_at + integration_closure, observability_impact, adversarial_partner, adversarial_combatant, + adversarial_architect, planning_meeting_json, sequence, replan_triggered_at ) SELECT w.milestone_id, w.id, w.title, CASE @@ -2343,7 +2438,8 @@ export function reconcileWorktreeDb( THEN m.completed_at ELSE w.completed_at END, w.full_summary_md, w.full_uat_md, w.goal, w.success_criteria, w.proof_level, - w.integration_closure, w.observability_impact, w.sequence, w.replan_triggered_at + w.integration_closure, w.observability_impact, w.adversarial_partner, w.adversarial_combatant, + w.adversarial_architect, w.planning_meeting_json, w.sequence, w.replan_triggered_at FROM wt.slices w LEFT JOIN slices m ON m.milestone_id = w.milestone_id AND m.id = w.id `).run()); @@ -3165,8 +3261,8 @@ export function restoreManifest(manifest: StateManifest): void { `INSERT INTO milestones (id, title, status, depends_on, created_at, completed_at, vision, success_criteria, key_risks, proof_strategy, verification_contract, verification_integration, verification_operational, verification_uat, - definition_of_done, requirement_coverage, boundary_map_markdown) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + definition_of_done, requirement_coverage, boundary_map_markdown, vision_meeting_json) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, ); for (const m of manifest.milestones) { msStmt.run( @@ -3176,6 +3272,7 @@ export function restoreManifest(manifest: StateManifest): void { JSON.stringify(m.proof_strategy), m.verification_contract, m.verification_integration, m.verification_operational, m.verification_uat, JSON.stringify(m.definition_of_done), m.requirement_coverage, m.boundary_map_markdown, + m.vision_meeting ? JSON.stringify(m.vision_meeting) : "", ); } @@ -3184,8 +3281,9 @@ export function restoreManifest(manifest: StateManifest): void { `INSERT INTO slices (milestone_id, id, title, status, risk, depends, demo, created_at, completed_at, full_summary_md, full_uat_md, goal, success_criteria, proof_level, integration_closure, observability_impact, + adversarial_partner, adversarial_combatant, adversarial_architect, planning_meeting_json, sequence, replan_triggered_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, ); for (const s of manifest.slices) { slStmt.run( @@ -3193,6 +3291,8 @@ export function restoreManifest(manifest: StateManifest): void { JSON.stringify(s.depends), s.demo, s.created_at, s.completed_at, s.full_summary_md, s.full_uat_md, s.goal, s.success_criteria, s.proof_level, s.integration_closure, s.observability_impact, + s.adversarial_partner ?? "", s.adversarial_combatant ?? "", s.adversarial_architect ?? "", + s.planning_meeting ? JSON.stringify(s.planning_meeting) : "", s.sequence, s.replan_triggered_at, ); } @@ -3203,8 +3303,8 @@ export function restoreManifest(manifest: StateManifest): void { one_liner, narrative, verification_result, duration, completed_at, blocker_discovered, deviations, known_issues, key_files, key_decisions, full_summary_md, description, estimate, files, verify, - inputs, expected_output, observability_impact, sequence) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + inputs, expected_output, observability_impact, full_plan_md, sequence) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, ); for (const t of manifest.tasks) { tkStmt.run( @@ -3214,7 +3314,7 @@ export function restoreManifest(manifest: StateManifest): void { JSON.stringify(t.key_files), JSON.stringify(t.key_decisions), t.full_summary_md, t.description, t.estimate, JSON.stringify(t.files), t.verify, JSON.stringify(t.inputs), JSON.stringify(t.expected_output), - t.observability_impact, t.sequence, + t.observability_impact, t.full_plan_md, t.sequence, ); } diff --git a/src/resources/extensions/sf/state.ts b/src/resources/extensions/sf/state.ts index 49d88810f..2827a3c66 100644 --- a/src/resources/extensions/sf/state.ts +++ b/src/resources/extensions/sf/state.ts @@ -44,6 +44,8 @@ import { existsSync, readdirSync, readFileSync } from 'node:fs'; import { debugCount, debugTime } from './debug-logger.js'; import { logWarning, logError } from './workflow-logger.js'; import { extractVerdict } from './verdict-parser.js'; +import { getSlicePlanBlockingIssue } from './plan-quality.js'; +import { getVisionAlignmentBlockingIssue } from './milestone-quality.js'; import { isDbAvailable, @@ -89,6 +91,7 @@ export function isGhostMilestone(basePath: string, mid: string): boolean { if (dbRow) { if (dbRow.status === 'queued') { const hasContent = resolveMilestoneFile(basePath, mid, "CONTEXT") + || resolveMilestoneFile(basePath, mid, "CONTEXT-DRAFT") || resolveMilestoneFile(basePath, mid, "ROADMAP") || resolveMilestoneFile(basePath, mid, "SUMMARY"); return !hasContent; @@ -839,6 +842,34 @@ export async function deriveStateFromDb(basePath: string): Promise { }; } + const activeMilestoneRow = getMilestone(activeMilestone.id); + const shouldEnforceVisionMeeting = !!activeMilestoneRow && ( + activeMilestoneRow.vision_meeting !== null + || activeMilestoneRow.vision.trim().length > 0 + || activeMilestoneRow.success_criteria.length > 0 + || activeMilestoneRow.key_risks.length > 0 + || activeMilestoneRow.proof_strategy.length > 0 + || activeMilestoneRow.verification_contract.trim().length > 0 + || activeMilestoneRow.verification_integration.trim().length > 0 + || activeMilestoneRow.verification_operational.trim().length > 0 + || activeMilestoneRow.verification_uat.trim().length > 0 + || activeMilestoneRow.definition_of_done.length > 0 + || activeMilestoneRow.requirement_coverage.trim().length > 0 + || activeMilestoneRow.boundary_map_markdown.trim().length > 0 + ); + const milestonePlanningIssue = shouldEnforceVisionMeeting + ? getVisionAlignmentBlockingIssue(activeMilestoneRow?.vision_meeting ?? null) + : null; + if (milestonePlanningIssue) { + return { + activeMilestone, activeSlice: null, activeTask: null, + phase: 'planning', recentDecisions: [], blockers: [], + nextAction: `Milestone ${activeMilestone.id} roadmap is incomplete (${milestonePlanningIssue}). Re-run plan-milestone with a weighted vision alignment meeting before execution.`, + registry, requirements, + progress: { milestones: milestoneProgress, slices: { done: 0, total: activeMilestoneSlices.length } }, + }; + } + const allSlicesDone = activeMilestoneSlices.every(s => isStatusDone(s.status)); const sliceProgress = { done: activeMilestoneSlices.filter(s => isStatusDone(s.status)).length, @@ -882,6 +913,18 @@ export async function deriveStateFromDb(basePath: string): Promise { }; } + const planContent = await loadFile(planFile); + const planQualityIssue = planContent ? getSlicePlanBlockingIssue(planContent) : 'missing slice plan content'; + if (planQualityIssue) { + return { + activeMilestone, activeSlice, activeTask: null, + phase: 'planning', recentDecisions: [], blockers: [], + nextAction: `Slice ${activeSlice.id} plan is incomplete (${planQualityIssue}). Re-run plan-slice with partner/combatant/architect review.`, + registry, requirements, + progress: { milestones: milestoneProgress, slices: sliceProgress }, + }; + } + const tasks = await reconcileSliceTasks(basePath, activeMilestone.id, activeSlice.id, planFile); const taskProgress = { @@ -1544,6 +1587,26 @@ export async function _deriveStateImpl(basePath: string): Promise { }; } + const planQualityIssue = getSlicePlanBlockingIssue(slicePlanContent); + if (planQualityIssue) { + return { + activeMilestone, + activeSlice, + activeTask: null, + phase: 'planning', + recentDecisions: [], + blockers: [], + nextAction: `Slice ${activeSlice.id} plan is incomplete (${planQualityIssue}). Re-run plan-slice with partner/combatant/architect review.`, + + registry, + requirements, + progress: { + milestones: milestoneProgress, + slices: sliceProgress, + }, + }; + } + const slicePlan = parsePlan(slicePlanContent); // ── Reconcile stale task status for filesystem-based projects (#2514) ── diff --git a/src/resources/extensions/sf/tests/artifact-corruption-2630.test.ts b/src/resources/extensions/sf/tests/artifact-corruption-2630.test.ts index 42be01aa3..357f5beaf 100644 --- a/src/resources/extensions/sf/tests/artifact-corruption-2630.test.ts +++ b/src/resources/extensions/sf/tests/artifact-corruption-2630.test.ts @@ -101,6 +101,7 @@ function makeMilestoneRow(overrides?: Partial): MilestoneRow { requirement_coverage: '', boundary_map_markdown: '', ...overrides, + vision_meeting: overrides?.vision_meeting ?? null, }; } diff --git a/src/resources/extensions/sf/tests/auto-start-needs-discussion.test.ts b/src/resources/extensions/sf/tests/auto-start-needs-discussion.test.ts index 4bac55746..76bc58e12 100644 --- a/src/resources/extensions/sf/tests/auto-start-needs-discussion.test.ts +++ b/src/resources/extensions/sf/tests/auto-start-needs-discussion.test.ts @@ -215,4 +215,33 @@ describe("auto-start-needs-discussion (#1726)", () => { ); } }); + + test("8. headless bootstrap without artifacts starts a roadmap repair dispatch before aborting", () => { + const source = readAutoStartSource(); + const repairMessageIdx = source.indexOf("Starting roadmap planning repair session"); + const repairDispatchIdx = source.indexOf("dispatchNewMilestoneDiscuss(ctx, pi, base, nextId", repairMessageIdx); + const finalAbortIdx = source.indexOf("Headless bootstrap repair completed but no milestone artifacts", repairDispatchIdx); + + assert.ok(repairMessageIdx > -1, + "bootstrap should announce a roadmap planning repair session when no artifacts are written"); + assert.ok(repairDispatchIdx > repairMessageIdx, + "bootstrap should dispatch a repair turn for the same reserved milestone ID"); + assert.ok(finalAbortIdx > repairDispatchIdx, + "bootstrap should only abort after the repair dispatch also fails to produce artifacts"); + }); + + test("9. pre-planning milestone without context is repaired in-place instead of allocating a new ID", () => { + const source = readAutoStartSource(); + const noContextIdx = source.indexOf("Milestone ${mid} has no context. Bootstrapping from codebase analysis."); + const noContextBlock = source.slice(noContextIdx, source.indexOf("// Active milestone has CONTEXT-DRAFT", noContextIdx)); + + assert.ok(noContextIdx > -1, + "pre-planning no-context branch should exist"); + assert.ok(noContextBlock.includes("dispatchNewMilestoneDiscuss(ctx, pi, base, mid"), + "no-context repair should reuse the existing milestone id"); + assert.ok(!noContextBlock.includes("bootstrapNewMilestone(base)"), + "no-context repair must not allocate a new milestone id"); + assert.ok(noContextBlock.includes("Reuse this milestone ID"), + "repair prompt should explicitly forbid creating a replacement milestone"); + }); }); diff --git a/src/resources/extensions/sf/tests/codebase-generator.test.ts b/src/resources/extensions/sf/tests/codebase-generator.test.ts index ec077f69e..d2be52eaf 100644 --- a/src/resources/extensions/sf/tests/codebase-generator.test.ts +++ b/src/resources/extensions/sf/tests/codebase-generator.test.ts @@ -126,6 +126,28 @@ test("generateCodebaseMap: generates from git ls-files", () => { } }); +test("generateCodebaseMap: includes project knowledge scaffold from repo signals", () => { + const base = makeTmpRepo(); + try { + addFile(base, "go.mod", "module example.com/dr\n"); + addFile(base, "migrations/001_init.sql", "create table checks(id int);\n"); + addFile(base, "cmd/server/main.go", "package main\n"); + addFile(base, "internal/handlers/failover.go", "package handlers\n"); + addFile(base, "README.md", "# DR\n"); + + const result = generateCodebaseMap(base); + assert.match(result.content, /## Project Knowledge/); + assert.match(result.content, /Go module present/); + assert.match(result.content, /Database schema or migration files present/); + assert.match(result.content, /go test \.\/\.\.\./); + assert.match(result.content, /Go service development and testing/); + assert.match(result.content, /cmd\/ is a likely implementation boundary/); + assert.match(result.content, /Knowledge Gaps To Close/); + } finally { + cleanup(base); + } +}); + test("generateCodebaseMap: excludes .sf/ files", () => { const base = makeTmpRepo(); try { diff --git a/src/resources/extensions/sf/tests/derive-state-db.test.ts b/src/resources/extensions/sf/tests/derive-state-db.test.ts index cb69eff76..9179f4184 100644 --- a/src/resources/extensions/sf/tests/derive-state-db.test.ts +++ b/src/resources/extensions/sf/tests/derive-state-db.test.ts @@ -72,6 +72,20 @@ const PLAN_CONTENT = `# S01: First Slice **Goal:** Test executing. **Demo:** Tests pass. +## Adversarial Review + +### Partner Review + +The slice is executable because the tasks already capture the real delivery steps and one task is intentionally already complete. + +### Combatant Review + +The failure mode is stale plan metadata. The DB-backed task status and plan artifact need to stay aligned, which these tests exercise. + +### Architect Review + +The plan only advances to execution when both the plan artifact and task artifacts agree, which is the system boundary we care about here. + ## Tasks - [ ] **T01: First Task** \`est:10m\` @@ -508,6 +522,20 @@ describe('derive-state-db', async () => { **Goal:** Test summarizing. **Demo:** Tests pass. +## Adversarial Review + +### Partner Review + +Both tasks are complete, so the slice should advance to summarizing. + +### Combatant Review + +Without a complete review block, this would be a shallow plan artifact rather than a valid summarizing fixture. + +### Architect Review + +This fixture exists to prove the summarize transition, not to bypass plan completeness. + ## Tasks - [x] **T01: First Task** \`est:10m\` @@ -1126,4 +1154,20 @@ describe('derive-state-db', async () => { cleanup(base); } }); + + test('derive-state-db: queued milestone with CONTEXT-DRAFT is not a ghost', async () => { + const base = createFixtureBase(); + try { + mkdirSync(join(base, '.sf', 'milestones', 'M001', 'slices'), { recursive: true }); + writeFile(base, 'milestones/M001/M001-CONTEXT-DRAFT.md', '# M001 Draft\n\nNeeds discussion.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Draft milestone', status: 'queued' }); + + assert.ok(!isGhostMilestone(base, 'M001'), 'queued milestone with CONTEXT-DRAFT is not a ghost'); + } finally { + closeDatabase(); + cleanup(base); + } + }); }); diff --git a/src/resources/extensions/sf/tests/derive-state-helpers.test.ts b/src/resources/extensions/sf/tests/derive-state-helpers.test.ts index ea1fc5118..5753a355c 100644 --- a/src/resources/extensions/sf/tests/derive-state-helpers.test.ts +++ b/src/resources/extensions/sf/tests/derive-state-helpers.test.ts @@ -60,6 +60,20 @@ const PLAN_CONTENT = `# S01: First Slice **Goal:** Test executing. **Demo:** Tests pass. +## Adversarial Review + +### Partner Review + +The task list is concrete enough for the helper path to derive active work correctly. + +### Combatant Review + +If the plan artifact is missing real review content, helper-based state derivation should not treat it as execution-ready. + +### Architect Review + +This fixture should prove the helper path respects the same planning contract as the main state path. + ## Tasks - [ ] **T01: First Task** \`est:10m\` @@ -162,7 +176,7 @@ describe('derive-state-helpers', () => { try { writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); // S02 depends on S01 but we lock to S02 directly - writeFile(base, 'milestones/M001/slices/S02/S02-PLAN.md', `# S02\n\n**Goal:** Test.\n**Demo:** Pass.\n\n## Tasks\n\n- [ ] **T01: Task** \`est:5m\`\n Do thing.\n`); + writeFile(base, 'milestones/M001/slices/S02/S02-PLAN.md', `# S02\n\n**Goal:** Test.\n**Demo:** Pass.\n\n## Adversarial Review\n\n### Partner Review\n\nThe slice lock should allow this valid plan to execute even though its dependency is unmet.\n\n### Combatant Review\n\nThe lock bypass must not accidentally accept a shallow scaffold; this fixture includes the full review block.\n\n### Architect Review\n\nThis isolates slice-lock behavior without weakening the plan-quality contract.\n\n## Tasks\n\n- [ ] **T01: Task** \`est:5m\`\n Do thing.\n`); writeFile(base, 'milestones/M001/slices/S02/tasks/.gitkeep', ''); writeFile(base, 'milestones/M001/slices/S02/tasks/T01-PLAN.md', '# T01 Plan'); diff --git a/src/resources/extensions/sf/tests/flag-file-db.test.ts b/src/resources/extensions/sf/tests/flag-file-db.test.ts index 95acb0262..6a9fbc468 100644 --- a/src/resources/extensions/sf/tests/flag-file-db.test.ts +++ b/src/resources/extensions/sf/tests/flag-file-db.test.ts @@ -59,6 +59,20 @@ const PLAN_CONTENT = `# S01: Test Slice **Goal:** Test replanning detection. **Demo:** Tests pass. +## Adversarial Review + +### Partner Review + +The completed and active tasks provide enough structure to test blocker and flag-file transitions. + +### Combatant Review + +If the plan lacks real review content, replanning tests would be exercising an invalid execution state. + +### Architect Review + +This keeps the replanning fixture aligned with the same plan-validity contract as normal execution. + ## Tasks - [x] **T01: Done Task** \`est:10m\` diff --git a/src/resources/extensions/sf/tests/health-widget.test.ts b/src/resources/extensions/sf/tests/health-widget.test.ts index a638fbd4c..d750a798c 100644 --- a/src/resources/extensions/sf/tests/health-widget.test.ts +++ b/src/resources/extensions/sf/tests/health-widget.test.ts @@ -1,6 +1,6 @@ import test from "node:test"; import assert from "node:assert/strict"; -import { mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; import { @@ -39,6 +39,7 @@ function activeData(overrides: Partial = {}): HealthWidgetData lastCommitEpoch: null, lastCommitMessage: null, lastRefreshed: Date.now(), + remoteQuestionsConfigured: false, ...overrides, }; } @@ -102,6 +103,20 @@ test("buildHealthLines: active state with issues reports issue summary", (t) => assert.match(lines[0]!, /Env: 1 error/); }); +test("buildHealthLines: active state suggests remote setup only when missing", () => { + const missing = buildHealthLines(activeData({ remoteQuestionsConfigured: false })); + assert.match(missing[0]!, /\/sf remote telegram/); + + const configured = buildHealthLines(activeData({ remoteQuestionsConfigured: true })); + assert.doesNotMatch(configured[0]!, /\/sf remote telegram/); +}); + +test("health widget runtime checks sibling remote-questions extension", () => { + const source = readFileSync(join(import.meta.dirname, "..", "health-widget.ts"), "utf-8"); + assert.match(source, /from "\.\.\/remote-questions\/manager\.js"/); + assert.doesNotMatch(source, /require\("\.\/remote-questions\/manager\.js"\)/); +}); + // ── Last commit display ────────────────────────────────────────────────── test("buildHealthLines: shows last commit with relative time and message", (t) => { diff --git a/src/resources/extensions/sf/tests/integration/auto-recovery.test.ts b/src/resources/extensions/sf/tests/integration/auto-recovery.test.ts index a6447fc0f..053ca1a0d 100644 --- a/src/resources/extensions/sf/tests/integration/auto-recovery.test.ts +++ b/src/resources/extensions/sf/tests/integration/auto-recovery.test.ts @@ -299,6 +299,20 @@ test("verifyExpectedArtifact accepts plan-slice with actual tasks", (t) => { writeFileSync(join(sliceDir, "S01-PLAN.md"), [ "# S01: Test Slice", "", + "## Adversarial Review", + "", + "### Partner Review", + "", + "The task list is concrete enough to execute.", + "", + "### Combatant Review", + "", + "A scaffold without review would be too weak, so this fixture includes the required pushback.", + "", + "### Architect Review", + "", + "The plan is valid only because it can hand off execution with real task artifacts.", + "", "## Tasks", "", "- [ ] **T01: Implement feature** `est:2h`", @@ -323,6 +337,20 @@ test("verifyExpectedArtifact accepts plan-slice with completed tasks", (t) => { writeFileSync(join(sliceDir, "S01-PLAN.md"), [ "# S01: Test Slice", "", + "## Adversarial Review", + "", + "### Partner Review", + "", + "Checked tasks should still count as valid plan entries.", + "", + "### Combatant Review", + "", + "Completion markers alone are not enough without the review block.", + "", + "### Architect Review", + "", + "The fixture should still represent a complete handoff artifact.", + "", "## Tasks", "", "- [x] **T01: Implement feature** `est:2h`", @@ -348,6 +376,20 @@ test("verifyExpectedArtifact plan-slice passes when all task plan files exist", const planContent = [ "# S01: Test Slice", "", + "## Adversarial Review", + "", + "### Partner Review", + "", + "The slice has concrete tasks and matching task plans.", + "", + "### Combatant Review", + "", + "Missing task plan files must still fail even when the slice plan looks complete.", + "", + "### Architect Review", + "", + "This keeps artifact verification strict at both the slice and task layer.", + "", "## Tasks", "", "- [ ] **T01: First task** `est:1h`", @@ -370,6 +412,20 @@ test("verifyExpectedArtifact plan-slice fails when a task plan file is missing ( const planContent = [ "# S01: Test Slice", "", + "## Adversarial Review", + "", + "### Partner Review", + "", + "The slice plan is otherwise valid, so the missing task plan should be the deciding failure.", + "", + "### Combatant Review", + "", + "A valid slice plan should not mask missing task artifacts.", + "", + "### Architect Review", + "", + "This keeps the regression narrowly focused on per-task plan existence.", + "", "## Tasks", "", "- [ ] **T01: First task** `est:1h`", @@ -413,6 +469,20 @@ test("verifyExpectedArtifact accepts plan-slice with heading-style tasks (### T0 writeFileSync(join(sliceDir, "S01-PLAN.md"), [ "# S01: Test Slice", "", + "## Adversarial Review", + "", + "### Partner Review", + "", + "Heading-style task plans are valid if the review is still present.", + "", + "### Combatant Review", + "", + "The parser must not confuse heading-style tasks with a shallow scaffold.", + "", + "### Architect Review", + "", + "Allowing both plan formats keeps migration compatibility without weakening the gate.", + "", "## Tasks", "", "### T01 -- Implement feature", @@ -442,6 +512,20 @@ test("verifyExpectedArtifact accepts plan-slice with colon-style heading tasks ( writeFileSync(join(sliceDir, "S01-PLAN.md"), [ "# S01: Test Slice", "", + "## Adversarial Review", + "", + "### Partner Review", + "", + "Colon-style heading tasks are also valid when the plan is otherwise complete.", + "", + "### Combatant Review", + "", + "This guards against accepting bare headings without review.", + "", + "### Architect Review", + "", + "The validation rule stays format-flexible but review-strict.", + "", "## Tasks", "", "### T01: Implement feature", @@ -500,6 +584,11 @@ test("verifyExpectedArtifact plan-slice passes for rendered slice/task plan arti proofLevel: "integration", integrationClosure: "DB rows are the source of truth for PLAN artifacts.", observabilityImpact: "- Recovery verification fails if a task plan file is missing", + adversarialReview: { + partner: "The DB-backed renderer already owns the slice contract, so persisting the review in the same row keeps plan intent and artifact generation aligned.", + combatant: "A shallow plan could still render. The recovery gate must reject missing review data so auto-mode does not treat scaffolding as execution-ready.", + architect: "This keeps plan quality enforcement on the same boundary as artifact verification instead of relying on prompt discipline alone.", + }, }, }); insertTask({ @@ -573,6 +662,11 @@ test("verifyExpectedArtifact plan-slice fails after deleting a rendered task pla proofLevel: "integration", integrationClosure: "DB rows are the source of truth for PLAN artifacts.", observabilityImpact: "- Recovery verification fails if a task plan file is missing", + adversarialReview: { + partner: "A rendered plan should remain the canonical artifact for downstream recovery checks.", + combatant: "Deleting a task plan file should still fail verification even when the slice plan itself looks complete.", + architect: "This keeps recovery sensitive to both plan completeness and task artifact completeness.", + }, }, }); insertTask({ @@ -619,6 +713,102 @@ test("verifyExpectedArtifact plan-slice fails after deleting a rendered task pla } }); +test("verifyExpectedArtifact plan-slice fails when adversarial review is missing", (t) => { + const base = makeTmpBase(); + t.after(() => cleanup(base)); + + writeFileSync(join(base, ".sf", "milestones", "M001", "slices", "S01", "S01-PLAN.md"), [ + "# S01: First Slice", + "", + "**Goal:** Test plan quality.", + "**Demo:** Task artifacts exist.", + "", + "## Tasks", + "", + "- [ ] **T01: Do thing**", + " - Files: `src/example.ts`", + " - Verify: `npm test`", + ].join("\n")); + writeFileSync(join(base, ".sf", "milestones", "M001", "slices", "S01", "tasks", "T01-PLAN.md"), "# T01 PLAN\n"); + + const result = verifyExpectedArtifact("plan-slice", "M001/S01", base); + assert.equal(result, false, "plan-slice verification should fail without adversarial review"); +}); + +test("verifyExpectedArtifact plan-slice fails when planning meeting routes back to researching", (t) => { + const base = makeTmpBase(); + t.after(() => cleanup(base)); + + writeFileSync(join(base, ".sf", "milestones", "M001", "slices", "S01", "S01-PLAN.md"), [ + "# S01: First Slice", + "", + "**Goal:** Test plan quality.", + "**Demo:** Task artifacts exist.", + "", + "## Adversarial Review", + "", + "### Partner Review", + "", + "The current plan could work if the premise holds.", + "", + "### Combatant Review", + "", + "The premise may still be wrong, so this should not execute yet.", + "", + "### Architect Review", + "", + "The route should stay conservative until the premise is rechecked.", + "", + "## Planning Meeting", + "", + "### Trigger", + "", + "Multiple plausible approaches remained after the first pass.", + "", + "### Product Manager", + "", + "The increment is still unclear enough that shipping now would be premature.", + "", + "### Researcher", + "", + "The current evidence does not yet narrow the best approach enough.", + "", + "### Partner", + "", + "One path looks viable if the assumptions hold.", + "", + "### Combatant", + "", + "Those assumptions are still too weak.", + "", + "### Architect", + "", + "The system boundary is not yet proven.", + "", + "### Moderator", + "", + "Return to research before planning execution.", + "", + "### Recommended Route", + "", + "researching", + "", + "### Confidence", + "", + "Post-meeting confidence is not high enough for execution planning.", + "", + "## Tasks", + "", + "- [ ] **T01: Do thing**", + " - Files: `src/example.ts`", + " - Verify: `npm test`", + ].join("\n")); + writeFileSync(join(base, ".sf", "milestones", "M001", "slices", "S01", "tasks", "T01-PLAN.md"), "# T01 PLAN\n"); + + const result = verifyExpectedArtifact("plan-slice", "M001/S01", base); + assert.equal(result, false, "plan-slice verification should fail when the meeting routes back to researching"); +}); + // ─── #793: invalidateAllCaches unblocks skip-loop ───────────────────────── // When the skip-loop breaker fires, it must call invalidateAllCaches() (not // just invalidateStateCache()) to clear path/parse caches that deriveState @@ -639,7 +829,7 @@ test("#793: invalidateAllCaches clears all caches so deriveState sees fresh disk join(base, ".sf", "milestones", mid, `${mid}-ROADMAP.md`), `# M001: Test Milestone\n\n**Vision:** test.\n\n## Slices\n\n- [ ] **${sid}: Slice One** \`risk:low\` \`depends:[]\`\n > After this: done.\n`, ); - const planUnchecked = `# ${sid}: Slice One\n\n**Goal:** test.\n\n## Tasks\n\n- [ ] **T01: Task One** \`est:10m\`\n- [ ] **T02: Task Two** \`est:10m\`\n`; + const planUnchecked = `# ${sid}: Slice One\n\n**Goal:** test.\n\n## Adversarial Review\n\n### Partner Review\n\nThe plan is concrete enough to execute and exercise cache invalidation.\n\n### Combatant Review\n\nA stale cache must not cause us to accept a plan that was never actually reread.\n\n### Architect Review\n\nThis fixture proves cache invalidation while still meeting the plan-quality contract.\n\n## Tasks\n\n- [ ] **T01: Task One** \`est:10m\`\n- [ ] **T02: Task Two** \`est:10m\`\n`; writeFileSync(join(planDir, `${sid}-PLAN.md`), planUnchecked); writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01: Task One\n\n**Goal:** t\n\n## Steps\n- step\n\n## Verification\n- v\n"); writeFileSync(join(tasksDir, "T02-PLAN.md"), "# T02: Task Two\n\n**Goal:** t\n\n## Steps\n- step\n\n## Verification\n- v\n"); @@ -649,7 +839,7 @@ test("#793: invalidateAllCaches clears all caches so deriveState sees fresh disk assert.equal(state1.activeTask?.id, "T01", "initial: T01 is active"); // Simulate task completion on disk (what the LLM does) - const planChecked = `# ${sid}: Slice One\n\n**Goal:** test.\n\n## Tasks\n\n- [x] **T01: Task One** \`est:10m\`\n- [ ] **T02: Task Two** \`est:10m\`\n`; + const planChecked = `# ${sid}: Slice One\n\n**Goal:** test.\n\n## Adversarial Review\n\n### Partner Review\n\nThe plan remains valid after T01 completes.\n\n### Combatant Review\n\nThe cache invalidation path must prove we reread the updated plan rather than trusting stale state.\n\n### Architect Review\n\nThis keeps the regression focused on cache behavior, not on plan completeness.\n\n## Tasks\n\n- [x] **T01: Task One** \`est:10m\`\n- [ ] **T02: Task Two** \`est:10m\`\n`; writeFileSync(join(planDir, `${sid}-PLAN.md`), planChecked); writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "---\nid: T01\n---\n# Summary\n"); diff --git a/src/resources/extensions/sf/tests/plan-milestone-artifact-verification.test.ts b/src/resources/extensions/sf/tests/plan-milestone-artifact-verification.test.ts index 229c84846..6d7a878d0 100644 --- a/src/resources/extensions/sf/tests/plan-milestone-artifact-verification.test.ts +++ b/src/resources/extensions/sf/tests/plan-milestone-artifact-verification.test.ts @@ -39,18 +39,87 @@ test("#3405: plan-milestone roadmap stub does not count as a verified artifact", } }); +test("plan-milestone roadmap without a vision alignment meeting does not count as a verified artifact", () => { + const base = createFixtureBase(); + try { + writeRoadmap(base, "M001", [ + "# M001: Missing ceremony", + "", + "## Vision", + "Real work, but no meeting contract.", + "", + "## Slice Overview", + "| ID | Slice | Risk | Depends | Done | After this |", + "|----|-------|------|---------|------|------------|", + "| S01 | First slice | low | — | ⬜ | a real slice exists. |", + "", + ].join("\n")); + + const result = verifyExpectedArtifact("plan-milestone", "M001", base); + assert.equal(result, false, "roadmaps missing the vision meeting must fail verification"); + } finally { + rmSync(base, { recursive: true, force: true }); + } +}); + test("#3405: plan-milestone roadmap with real slices still passes artifact verification", () => { const base = createFixtureBase(); try { writeRoadmap(base, "M001", [ "# M001: Real roadmap", "", - "**Vision:** Real work.", + "## Vision", + "Real work.", "", - "## Slices", + "## Vision Alignment Meeting", "", - "- [ ] **S01: First slice** `risk:low` `depends:[]`", - " > After this: a real slice exists.", + "### Trigger", + "Need a top-level roadmap synthesis.", + "", + "### Product Manager", + "Ship the smallest credible milestone.", + "", + "### User Advocate", + "Keep the roadmap understandable.", + "", + "### Customer Panel", + "Advanced users want fidelity; maintainers want consistency.", + "", + "### Business", + "This should create a credible planning surface.", + "", + "### Researcher", + "Comparable systems treat roadmap coherence as table stakes.", + "", + "### Delivery Lead", + "Keep milestone one narrow.", + "", + "### Partner", + "The roadmap is right-sized.", + "", + "### Combatant", + "Avoid speculative automation.", + "", + "### Architect", + "Persist and render the meeting.", + "", + "### Moderator", + "Weighted synthesis says this is ready to plan.", + "", + "### Weighted Synthesis", + "This milestone is ready.", + "", + "### Confidence By Area", + "- User need: high", + "- Sequencing confidence: high", + "", + "### Recommended Route", + "planning", + "", + "## Slice Overview", + "| ID | Slice | Risk | Depends | Done | After this |", + "|----|-------|------|---------|------|------------|", + "| S01 | First slice | low | — | ⬜ | a real slice exists. |", "", ].join("\n")); @@ -60,3 +129,71 @@ test("#3405: plan-milestone roadmap with real slices still passes artifact verif rmSync(base, { recursive: true, force: true }); } }); + +test("plan-milestone roadmap routed back to research does not count as a verified artifact", () => { + const base = createFixtureBase(); + try { + writeRoadmap(base, "M001", [ + "# M001: Research draft", + "", + "## Vision", + "Real work.", + "", + "## Vision Alignment Meeting", + "", + "### Trigger", + "Need a top-level roadmap synthesis.", + "", + "### Product Manager", + "Ship the smallest credible milestone.", + "", + "### User Advocate", + "Keep the roadmap understandable.", + "", + "### Customer Panel", + "Advanced users want fidelity; maintainers want consistency.", + "", + "### Business", + "This should create a credible planning surface.", + "", + "### Researcher", + "Comparable systems are still unclear.", + "", + "### Delivery Lead", + "Keep milestone one narrow.", + "", + "### Partner", + "There is a plausible roadmap.", + "", + "### Combatant", + "Market expectations are not grounded enough yet.", + "", + "### Architect", + "Do not start execution on a weak roadmap.", + "", + "### Moderator", + "Weighted synthesis says keep researching.", + "", + "### Weighted Synthesis", + "The roadmap is still a draft.", + "", + "### Confidence By Area", + "- User need: high", + "- Comparable-system fit: low", + "", + "### Recommended Route", + "researching", + "", + "## Slice Overview", + "| ID | Slice | Risk | Depends | Done | After this |", + "|----|-------|------|---------|------|------------|", + "| S01 | First slice | low | — | ⬜ | a real slice exists. |", + "", + ].join("\n")); + + const result = verifyExpectedArtifact("plan-milestone", "M001", base); + assert.equal(result, false, "roadmaps routed back to research must fail verification"); + } finally { + rmSync(base, { recursive: true, force: true }); + } +}); diff --git a/src/resources/extensions/sf/tests/plan-milestone.test.ts b/src/resources/extensions/sf/tests/plan-milestone.test.ts index c7fe52782..2b7972792 100644 --- a/src/resources/extensions/sf/tests/plan-milestone.test.ts +++ b/src/resources/extensions/sf/tests/plan-milestone.test.ts @@ -5,7 +5,7 @@ import { join } from 'node:path'; import { tmpdir } from 'node:os'; import { openDatabase, closeDatabase, getMilestone, getMilestoneSlices, getSlice, updateSliceStatus, deleteSlice, insertMilestone } from '../sf-db.ts'; -import { handlePlanMilestone } from '../tools/plan-milestone.ts'; +import { handlePlanMilestone, type PlanMilestoneParams } from '../tools/plan-milestone.ts'; import { parseRoadmap } from '../parsers-legacy.ts'; function makeTmpBase(): string { @@ -19,7 +19,7 @@ function cleanup(base: string): void { try { rmSync(base, { recursive: true, force: true }); } catch { /* noop */ } } -function validParams() { +function validParams(): PlanMilestoneParams { return { milestoneId: 'M001', title: 'DB-backed planning', @@ -38,6 +38,22 @@ function validParams() { definitionOfDone: ['Tests pass', 'Tool reruns cleanly'], requirementCoverage: 'Covers R015.', boundaryMapMarkdown: '| From | To | Produces | Consumes |\n|------|----|----------|----------|\n| S01 | terminal | roadmap | nothing |', + visionMeeting: { + trigger: 'Top-level roadmap spans multiple user, business, and delivery concerns.', + pm: 'Primary product move is making DB-backed planning the real source of truth.', + userAdvocate: 'Users need planning artifacts that stay coherent after repeated planning turns.', + customerPanel: 'Power users care about fidelity, maintainers care about consistency, and new adopters care about understandable roadmap output.', + business: 'The system needs a credible planning surface that can scale to more guided automation.', + researcher: 'Comparable planning systems treat requirements, roadmap, and state as a connected contract rather than separate notes.', + deliveryLead: 'Keep the first milestone narrow: DB-backed write path, projection, and prompt migration.', + partner: 'The proposed roadmap is small enough to land and strong enough to prove the architecture shift.', + combatant: 'Do not overbuild planning metadata before the DB-backed write path is working.', + architect: 'Persist the meeting and render it into the roadmap so the state machine can reason over it.', + moderator: 'Weighted synthesis: preserve the narrow DB-backed milestone, but require explicit stakeholder and market reasoning before execution.', + weightedSynthesis: 'User trust, product coherence, and business viability all point to a small but fully real DB-backed planning milestone. The strongest cut is avoiding speculative extra automation in this milestone.', + confidenceByArea: '- User need: high\n- Architecture fit: high\n- Comparable-system fit: medium\n- Sequencing confidence: high', + recommendedRoute: 'planning', + }, slices: [ { sliceId: 'S01', @@ -81,6 +97,8 @@ test('handlePlanMilestone writes milestone and slice planning state and renders assert.equal(milestone?.vision, 'Make planning write through the database.'); assert.deepEqual(milestone?.success_criteria, ['Planning persists', 'Roadmap renders from DB']); assert.equal(milestone?.verification_contract, 'Contract verification text'); + assert.equal(milestone?.vision_meeting?.recommendedRoute, 'planning'); + assert.match(milestone?.vision_meeting?.customerPanel ?? '', /Power users care about fidelity/); const slices = getMilestoneSlices('M001'); assert.equal(slices.length, 2); @@ -94,6 +112,9 @@ test('handlePlanMilestone writes milestone and slice planning state and renders assert.match(roadmap, /# M001: DB-backed planning/); assert.match(roadmap, /## Vision/); assert.match(roadmap, /Make planning write through the database\./); + assert.match(roadmap, /## Vision Alignment Meeting/); + assert.match(roadmap, /### Customer Panel/); + assert.match(roadmap, /### Weighted Synthesis/); assert.match(roadmap, /## Slice Overview/); assert.match(roadmap, /\| S01 \| Tool wiring \| medium \|/); assert.match(roadmap, /\| S02 \| Prompt migration \| low \| S01 \|/); @@ -102,6 +123,36 @@ test('handlePlanMilestone writes milestone and slice planning state and renders } }); +test('handlePlanMilestone persists weighted roadmap draft even when the meeting routes back to research', async () => { + const base = makeTmpBase(); + const dbPath = join(base, '.sf', 'sf.db'); + openDatabase(dbPath); + + try { + const baseVisionMeeting = validParams().visionMeeting!; + const result = await handlePlanMilestone({ + ...validParams(), + visionMeeting: { + ...baseVisionMeeting, + moderator: 'Weighted synthesis says comparable-product expectations are still too fuzzy for a final roadmap.', + confidenceByArea: '- User need: high\n- Architecture fit: medium\n- Comparable-system fit: low\n- Sequencing confidence: low', + recommendedRoute: 'researching', + }, + }, base); + assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`); + + const milestone = getMilestone('M001'); + assert.equal(milestone?.vision_meeting?.recommendedRoute, 'researching'); + + const roadmapPath = join(base, '.sf', 'milestones', 'M001', 'M001-ROADMAP.md'); + const roadmap = readFileSync(roadmapPath, 'utf-8'); + assert.match(roadmap, /### Recommended Route/); + assert.match(roadmap, /researching/); + } finally { + cleanup(base); + } +}); + test('handlePlanMilestone rejects invalid payloads', async () => { const base = makeTmpBase(); const dbPath = join(base, '.sf', 'sf.db'); @@ -117,6 +168,37 @@ test('handlePlanMilestone rejects invalid payloads', async () => { } }); +test('handlePlanMilestone scaffolds common milestone slices from templateId', async () => { + const base = makeTmpBase(); + const dbPath = join(base, '.sf', 'sf.db'); + openDatabase(dbPath); + + try { + const params = validParams(); + const result = await handlePlanMilestone({ + milestoneId: params.milestoneId, + title: params.title, + vision: params.vision, + templateId: 'bugfix', + successCriteria: params.successCriteria, + }, base); + assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`); + + const slices = getMilestoneSlices('M001'); + assert.equal(slices.length, 3); + assert.equal(slices[0]?.id, 'S01'); + assert.equal(slices[1]?.depends[0], 'S01'); + assert.match(slices[0]?.goal ?? '', /Capture the failing boundary/i); + + const roadmapPath = join(base, '.sf', 'milestones', 'M001', 'M001-ROADMAP.md'); + const roadmap = readFileSync(roadmapPath, 'utf-8'); + assert.match(roadmap, /Reproduce and bound the failure/); + assert.match(roadmap, /Verify and guard against regression/); + } finally { + cleanup(base); + } +}); + test('handlePlanMilestone surfaces render failures and does not clear parse-visible state on failure', async () => { const base = makeTmpBase(); const dbPath = join(base, '.sf', 'sf.db'); @@ -171,17 +253,18 @@ test('handlePlanMilestone reruns idempotently and updates existing planning stat try { const first = await handlePlanMilestone(validParams(), base); assert.ok(!('error' in first)); + const baseParams = validParams(); const second = await handlePlanMilestone({ - ...validParams(), + ...baseParams, vision: 'Updated vision', slices: [ { - ...validParams().slices[0], + ...baseParams.slices![0]!, goal: 'Updated goal', observabilityImpact: 'Updated observability', }, - validParams().slices[1], + baseParams.slices![1]!, ], }, base); assert.ok(!('error' in second)); @@ -207,6 +290,7 @@ test('handlePlanMilestone preserves completed slice status on re-plan (#2558)', // Initial plan — both slices start as "pending" const first = await handlePlanMilestone(validParams(), base); assert.ok(!('error' in first), `unexpected error: ${'error' in first ? first.error : ''}`); + const baseParams = validParams(); // Mark S01 as complete (simulates work done in a worktree) updateSliceStatus('M001', 'S01', 'complete', new Date().toISOString()); @@ -237,6 +321,7 @@ test('plan-milestone re-plan preserves completed status and updates slice fields // Initial plan — both slices start as "pending" const first = await handlePlanMilestone(validParams(), base); assert.ok(!('error' in first), `unexpected error: ${'error' in first ? first.error : ''}`); + const baseParams = validParams(); // Mark S01 as complete (simulates work done in worktree, then reconciled) updateSliceStatus('M001', 'S01', 'complete', new Date().toISOString()); @@ -248,10 +333,10 @@ test('plan-milestone re-plan preserves completed status and updates slice fields // 2. Update S01's non-status fields (title, risk, depends, demo) // 3. Keep S02 as "pending" const updatedParams = { - ...validParams(), + ...baseParams, slices: [ - { ...validParams().slices[0], title: 'Updated S01 title', risk: 'high' }, - validParams().slices[1], + { ...baseParams.slices![0]!, title: 'Updated S01 title', risk: 'high' }, + baseParams.slices![1]!, ], }; const second = await handlePlanMilestone(updatedParams, base); diff --git a/src/resources/extensions/sf/tests/plan-slice.test.ts b/src/resources/extensions/sf/tests/plan-slice.test.ts index acdd14883..a8fa7f62f 100644 --- a/src/resources/extensions/sf/tests/plan-slice.test.ts +++ b/src/resources/extensions/sf/tests/plan-slice.test.ts @@ -5,7 +5,7 @@ import { join } from 'node:path'; import { tmpdir } from 'node:os'; import { openDatabase, closeDatabase, insertMilestone, insertSlice, getSlice, getSliceTasks, getTask } from '../sf-db.ts'; -import { handlePlanSlice } from '../tools/plan-slice.ts'; +import { handlePlanSlice, type PlanSliceParams } from '../tools/plan-slice.ts'; import { parsePlan } from '../parsers-legacy.ts'; import { parseTaskPlanFile } from '../files.ts'; @@ -25,7 +25,7 @@ function seedParentSlice(): void { insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Planning slice', status: 'pending', demo: 'Rendered plans exist.' }); } -function validParams() { +function validParams(): PlanSliceParams { return { milestoneId: 'M001', sliceId: 'S02', @@ -34,6 +34,22 @@ function validParams() { proofLevel: 'integration', integrationClosure: 'Planning handlers now write DB rows and render plan artifacts.', observabilityImpact: '- Validation failures return structured errors\n- Cache invalidation is proven by parse-visible state updates', + adversarialReview: { + partner: 'The slice is tightly scoped and the task split aligns with the actual write/render boundary.', + combatant: 'The biggest risk is letting ceremony state exist in the prompt but disappear from persisted slice state.', + architect: 'The plan must round-trip through DB rows, markdown renders, and recovery flows without losing its moderation context.', + }, + planningMeeting: { + trigger: 'The slice changes planning-system behavior and needs an explicit route before execution.', + pm: 'Keep the slice focused on persistence and render boundaries, not broader planner redesign.', + researcher: 'The surrounding workflow already reads ceremony sections from rendered plan artifacts and DB-backed state.', + partner: 'The current task split is enough if the review state is stored and rendered faithfully.', + combatant: 'Without persistence proof, the meeting is just commentary and the system can drift.', + architect: 'Persist both the adversarial review and the meeting route in the same transactional planning write.', + moderator: 'Weighted synthesis says this slice is ready to plan and execute once the ceremony state is preserved.', + recommendedRoute: 'planning', + confidenceSummary: 'High confidence after proving DB + markdown persistence of the ceremony fields.', + }, tasks: [ { taskId: 'T01', @@ -75,6 +91,9 @@ test('handlePlanSlice writes slice/task planning state and renders plan artifact assert.ok(slice); assert.equal(slice?.goal, 'Persist slice planning through the DB.'); assert.equal(slice?.proof_level, 'integration'); + assert.match(slice?.adversarial_partner ?? '', /tightly scoped/); + assert.equal(slice?.planning_meeting?.recommendedRoute, 'planning'); + assert.match(slice?.planning_meeting?.confidenceSummary ?? '', /High confidence/); const tasks = getSliceTasks('M001', 'S02'); assert.equal(tasks.length, 2); @@ -88,6 +107,11 @@ test('handlePlanSlice writes slice/task planning state and renders plan artifact assert.equal(parsedPlan.goal, 'Persist slice planning through the DB.'); assert.equal(parsedPlan.tasks.length, 2); assert.equal(parsedPlan.tasks[0]?.id, 'T01'); + const renderedPlan = readFileSync(planPath, 'utf-8'); + assert.match(renderedPlan, /## Adversarial Review/); + assert.match(renderedPlan, /## Planning Meeting/); + assert.match(renderedPlan, /### Recommended Route/); + assert.match(renderedPlan, /planning/); const taskPlanPath = join(base, '.sf', 'milestones', 'M001', 'slices', 'S02', 'tasks', 'T01-PLAN.md'); assert.ok(existsSync(taskPlanPath), 'task plan should be rendered to disk'); @@ -98,6 +122,43 @@ test('handlePlanSlice writes slice/task planning state and renders plan artifact } }); +test('handlePlanSlice persists a routed-back planning meeting without pretending the slice is execution-ready', async () => { + const base = makeTmpBase(); + openDatabase(join(base, '.sf', 'sf.db')); + + try { + seedParentSlice(); + const baseParams = validParams(); + const baseMeeting = baseParams.planningMeeting!; + + const result = await handlePlanSlice({ + ...baseParams, + planningMeeting: { + trigger: baseMeeting.trigger, + pm: baseMeeting.pm, + researcher: baseMeeting.researcher, + partner: baseMeeting.partner, + combatant: baseMeeting.combatant, + architect: baseMeeting.architect, + moderator: 'Keep researching before execution; the premise is still too weak.', + recommendedRoute: 'researching', + confidenceSummary: 'Low confidence until the premise is narrowed.', + }, + }, base); + assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`); + + const slice = getSlice('M001', 'S02'); + assert.equal(slice?.planning_meeting?.recommendedRoute, 'researching'); + + const renderedPlan = readFileSync(join(base, '.sf', 'milestones', 'M001', 'slices', 'S02', 'S02-PLAN.md'), 'utf-8'); + assert.match(renderedPlan, /## Planning Meeting/); + assert.match(renderedPlan, /researching/); + assert.match(renderedPlan, /Low confidence/); + } finally { + cleanup(base); + } +}); + test('handlePlanSlice rejects invalid payloads', async () => { const base = makeTmpBase(); openDatabase(join(base, '.sf', 'sf.db')); diff --git a/src/resources/extensions/sf/tests/preferences.test.ts b/src/resources/extensions/sf/tests/preferences.test.ts index 6e78beea5..eea20a9e4 100644 --- a/src/resources/extensions/sf/tests/preferences.test.ts +++ b/src/resources/extensions/sf/tests/preferences.test.ts @@ -609,6 +609,40 @@ test("experimental.rtk defaults to off in new project preferences", () => { assert.equal(prefs!.experimental?.rtk, undefined); }); +test("experimental dispatch rules validate with variants and active variant", () => { + const result = validatePreferences({ + experimental: { + dispatch_rules: { + order: ["rule-a", "rule-b"], + variants: { + beta: ["rule-b", "rule-a"], + }, + active_variant: "beta", + }, + }, + }); + + assert.equal(result.errors.length, 0); + assert.deepEqual(result.preferences.experimental?.dispatch_rules?.order, ["rule-a", "rule-b"]); + assert.deepEqual(result.preferences.experimental?.dispatch_rules?.variants?.beta, ["rule-b", "rule-a"]); + assert.equal(result.preferences.experimental?.dispatch_rules?.active_variant, "beta"); +}); + +test("remote questions preferences validate timeout auto-resolution settings", () => { + const result = validatePreferences({ + remote_questions: { + channel: "discord", + channel_id: "123456789012345678", + auto_resolve_on_timeout: true, + auto_resolve_strategy: "recommended-option", + }, + }); + + assert.equal(result.errors.length, 0); + assert.equal(result.preferences.remote_questions?.auto_resolve_on_timeout, true); + assert.equal(result.preferences.remote_questions?.auto_resolve_strategy, "recommended-option"); +}); + // ── Codebase Map Preferences ───────────────────────────────────────────────── test("codebase preferences validate and pass through correctly", () => { @@ -617,12 +651,18 @@ test("codebase preferences validate and pass through correctly", () => { exclude_patterns: ["docs/", "fixtures/"], max_files: 1000, collapse_threshold: 15, + project_rag: "auto", + project_rag_server: "project", + project_rag_auto_index: false, }, }); assert.equal(result.errors.length, 0); assert.deepEqual(result.preferences.codebase?.exclude_patterns, ["docs/", "fixtures/"]); assert.equal(result.preferences.codebase?.max_files, 1000); assert.equal(result.preferences.codebase?.collapse_threshold, 15); + assert.equal(result.preferences.codebase?.project_rag, "auto"); + assert.equal(result.preferences.codebase?.project_rag_server, "project"); + assert.equal(result.preferences.codebase?.project_rag_auto_index, false); }); test("codebase preferences reject invalid types", () => { @@ -631,11 +671,17 @@ test("codebase preferences reject invalid types", () => { exclude_patterns: "not-an-array" as any, max_files: -5, collapse_threshold: 0, + project_rag: "yes" as any, + project_rag_server: "", + project_rag_auto_index: "yes" as any, }, }); assert.ok(result.errors.some(e => e.includes("exclude_patterns must be an array"))); assert.ok(result.errors.some(e => e.includes("max_files must be a positive"))); assert.ok(result.errors.some(e => e.includes("collapse_threshold must be a positive"))); + assert.ok(result.errors.some(e => e.includes("project_rag must be one of"))); + assert.ok(result.errors.some(e => e.includes("project_rag_server must be a non-empty string"))); + assert.ok(result.errors.some(e => e.includes("project_rag_auto_index must be a boolean"))); }); test("codebase preferences warn on unknown keys", () => { @@ -660,6 +706,9 @@ test("codebase preferences parse from markdown frontmatter", () => { ' - ".cache/"', " max_files: 800", " collapse_threshold: 10", + " project_rag: required", + " project_rag_server: project", + " project_rag_auto_index: true", "---", ].join("\n"); const prefs = parsePreferencesMarkdown(content); @@ -669,4 +718,7 @@ test("codebase preferences parse from markdown frontmatter", () => { assert.deepEqual(result.preferences.codebase?.exclude_patterns, ["docs/", ".cache/"]); assert.equal(result.preferences.codebase?.max_files, 800); assert.equal(result.preferences.codebase?.collapse_threshold, 10); + assert.equal(result.preferences.codebase?.project_rag, "required"); + assert.equal(result.preferences.codebase?.project_rag_server, "project"); + assert.equal(result.preferences.codebase?.project_rag_auto_index, true); }); diff --git a/src/resources/extensions/sf/tests/projection-regression.test.ts b/src/resources/extensions/sf/tests/projection-regression.test.ts index 5ca9bbed5..fa075dac1 100644 --- a/src/resources/extensions/sf/tests/projection-regression.test.ts +++ b/src/resources/extensions/sf/tests/projection-regression.test.ts @@ -84,6 +84,7 @@ function makeMilestoneRow() { definition_of_done: [], requirement_coverage: '', boundary_map_markdown: '', + vision_meeting: null, }; } diff --git a/src/resources/extensions/sf/tests/prompt-contracts.test.ts b/src/resources/extensions/sf/tests/prompt-contracts.test.ts index 3ed62c305..9a63a65c2 100644 --- a/src/resources/extensions/sf/tests/prompt-contracts.test.ts +++ b/src/resources/extensions/sf/tests/prompt-contracts.test.ts @@ -38,10 +38,17 @@ test("workflow-start prompt defaults to autonomy instead of per-phase confirmati test("system prompt references CODEBASE.md and /sf codebase", () => { const prompt = readPrompt("system"); assert.match(prompt, /CODEBASE\.md/); - assert.match(prompt, /\/sf codebase \[generate\|update\|stats\]/); + assert.match(prompt, /\/sf codebase \[generate\|update\|stats\|rag\]/); assert.match(prompt, /auto-refreshes it when tracked files change/i); }); +test("system prompt routes broad code search through optional Project RAG when available", () => { + const prompt = readPrompt("system"); + assert.match(prompt, /PROJECT CODE INTELLIGENCE/); + assert.match(prompt, /Project RAG is configured/); + assert.match(prompt, /hybrid semantic \+ BM25 code retrieval/i); +}); + test("system prompt hard rules forbid fabricating user responses", () => { const prompt = readPrompt("system"); assert.match(prompt, /never fabricate, simulate, or role-play user responses/i); @@ -76,6 +83,27 @@ test("guided milestone discussion scopes depth verification to the milestone id" assert.doesNotMatch(prompt, /depth_verification_confirm" — this enables the write-gate downstream/i, "legacy global depth gate wording should be gone"); }); +test("headless milestone creation builds project knowledge before final context", () => { + const prompt = readPrompt("discuss-headless"); + assert.match(prompt, /\.sf\/CODEBASE\.md/); + assert.match(prompt, /stack signals, critical paths, verification commands, skill needs/i); + assert.match(prompt, /Discover needed skills/i); + assert.match(prompt, /Use code intelligence when available/i); + assert.match(prompt, /A headless run that only prints reflection has failed its contract/i); + assert.match(prompt, /Build project knowledge first/i); + assert.doesNotMatch(prompt, /\.sf\/CODEBASE-ANALYSIS\.md/); +}); + +test("headless milestone creation preserves depth gate and draft fallback", () => { + const prompt = readPrompt("discuss-headless"); + assert.match(prompt, /depth_verification_\{\{milestoneId\}\}_confirm/); + assert.match(prompt, /Proceed with final context \(Recommended\)/); + assert.match(prompt, /\{\{milestoneId\}\}-CONTEXT-DRAFT\.md/); + assert.match(prompt, /Do \*\*not\*\* call `sf_plan_milestone`/); + assert.match(prompt, /Milestone \{\{milestoneId\}\} drafted for discussion\./); + assert.doesNotMatch(prompt, /\*\*DO NOT ask the user any questions\*\*/); +}); + test("queue prompt requires waiting for user response between rounds", () => { const prompt = readPrompt("queue"); assert.match(prompt, /Never fabricate or simulate user input during this discussion/i); diff --git a/src/resources/extensions/sf/tests/remote-questions.test.ts b/src/resources/extensions/sf/tests/remote-questions.test.ts index b97a7de03..52cfb1ce9 100644 --- a/src/resources/extensions/sf/tests/remote-questions.test.ts +++ b/src/resources/extensions/sf/tests/remote-questions.test.ts @@ -174,6 +174,11 @@ test("sanitizeError preserves short safe messages", () => { assert.equal(sanitizeError("Connection refused"), "Connection refused"); }); +test("onboarding remote config writes canonical lowercase global preferences", () => { + const source = readFileSync(join(process.cwd(), "src/remote-questions-config.ts"), "utf-8"); + assert.match(source, /join\(appRoot,\s*"preferences\.md"\)/); + assert.doesNotMatch(source, /join\(appRoot,\s*"PREFERENCES\.md"\)/); +}); // ═══════════════════════════════════════════════════════════════════════════ // Discord Parity Tests @@ -804,23 +809,28 @@ test("ask-user-questions source-level: race path checks both hasRemote and ctx.h ); }); -test("ask-user-questions source-level: race treats remote timeout as non-win", () => { - // Regression: the whole point of the race is that a remote timeout should NOT - // block the local TUI. The race helper must filter out timed_out results. +test("ask-user-questions source-level: race accepts only usable remote results", () => { + // Plain remote timeouts must not block the local TUI, but supervised + // timeout auto-resolution is a real answer and must be accepted. const src = readFileSync( join(__dirname, "..", "..", "ask-user-questions.ts"), "utf-8", ); - const raceFnStart = src.indexOf("async function raceRemoteAndLocal("); - const raceFnEnd = src.indexOf("\n}", raceFnStart); - const raceFnBody = src.slice(raceFnStart, raceFnEnd); assert.ok( - raceFnBody.includes("timed_out"), - "raceRemoteAndLocal should check for timed_out in remote results", + src.includes("export function isUsableRemoteQuestionResult"), + "ask-user-questions.ts should centralize remote result usability", ); assert.ok( - raceFnBody.includes("details?.error"), - "raceRemoteAndLocal should check for error in remote results", + src.includes("details?.timed_out && details.autoResolved !== true"), + "plain timed_out results should remain unusable unless auto-resolved", + ); + assert.ok( + src.includes("details?.error"), + "remote errors should remain unusable", + ); + assert.ok( + src.includes("isUsableRemoteQuestionResult(details)"), + "raceRemoteAndLocal should use the shared remote result usability check", ); }); diff --git a/src/resources/extensions/sf/tests/replan-handler.test.ts b/src/resources/extensions/sf/tests/replan-handler.test.ts index a24f2583a..38d1ed756 100644 --- a/src/resources/extensions/sf/tests/replan-handler.test.ts +++ b/src/resources/extensions/sf/tests/replan-handler.test.ts @@ -244,6 +244,60 @@ test('handleReplanSlice succeeds when modifying only incomplete tasks', async () } }); +test('handleReplanSlice updates slice-level ceremony state when the replan changes the reasoning', async () => { + const base = makeTmpBase(); + openDatabase(join(base, '.sf', 'sf.db')); + + try { + seedSliceWithTasks({ t01Status: 'complete', t02Status: 'pending', t03Status: 'pending' }); + + const result = await handleReplanSlice({ + ...validReplanParams(), + goal: 'Re-scope the slice around the newly discovered API boundary.', + successCriteria: '- Remaining tasks use the new API\n- Replan preserves the updated rationale', + proofLevel: 'integration', + integrationClosure: 'The replanned slice proves the new API path end-to-end.', + observabilityImpact: 'The replan makes the changed API boundary explicit in planning artifacts.', + adversarialReview: { + partner: 'The replan is sufficient because it narrows the slice to the now-known API boundary.', + combatant: 'The old plan shape is invalid because it assumed the previous API contract still held.', + architect: 'The slice contract and the task plan both need to reflect the new moderator route.', + }, + planningMeeting: { + trigger: 'The blocker changed the slice-level rationale, not just the next task.', + pm: 'Cut scope to the new API boundary and avoid carrying stale assumptions.', + researcher: 'The blocker summary showed the original API contract was wrong.', + partner: 'The replanned slice is now grounded in observed behavior.', + combatant: 'Do not leave the old ceremony state behind or the plan will lie.', + architect: 'Persist the replanning decision in the same slice row that renders PLAN.md.', + moderator: 'Proceed with the narrower replan and keep execution-ready status explicit.', + recommendedRoute: 'planning', + confidenceSummary: 'High confidence after narrowing the slice to the observed API boundary.', + }, + }, base); + assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`); + + const sliceRow = _getAdapter()!.prepare(` + SELECT goal, success_criteria, proof_level, integration_closure, observability_impact, + adversarial_partner, adversarial_combatant, adversarial_architect, planning_meeting_json + FROM slices WHERE milestone_id = 'M001' AND id = 'S01' + `).get() as Record; + + assert.equal(sliceRow['goal'], 'Re-scope the slice around the newly discovered API boundary.'); + assert.match(String(sliceRow['adversarial_partner'] ?? ''), /narrows the slice/); + assert.match(String(sliceRow['adversarial_combatant'] ?? ''), /old plan shape is invalid/); + assert.match(String(sliceRow['planning_meeting_json'] ?? ''), /observed API boundary/); + + const planPath = join(base, '.sf', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md'); + const renderedPlan = readFileSync(planPath, 'utf-8'); + assert.match(renderedPlan, /## Adversarial Review/); + assert.match(renderedPlan, /## Planning Meeting/); + assert.match(renderedPlan, /Re-scope the slice around the newly discovered API boundary/); + } finally { + cleanup(base); + } +}); + test('handleReplanSlice cache invalidation: re-parsing PLAN.md reflects mutations', async () => { const base = makeTmpBase(); openDatabase(join(base, '.sf', 'sf.db')); diff --git a/src/resources/extensions/sf/tests/rule-registry.test.ts b/src/resources/extensions/sf/tests/rule-registry.test.ts index d0425937c..c72507e8c 100644 --- a/src/resources/extensions/sf/tests/rule-registry.test.ts +++ b/src/resources/extensions/sf/tests/rule-registry.test.ts @@ -5,6 +5,9 @@ import assert from 'node:assert/strict'; import { test, describe, beforeEach } from "node:test"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; import { RuleRegistry, getRegistry, @@ -61,6 +64,37 @@ function makeContext(phase: string): DispatchContext { }; } +function withProjectPreferences(content: string, run: () => Promise | T): Promise | T { + const originalCwd = process.cwd(); + const originalSfHome = process.env.SF_HOME; + const tempProject = mkdtempSync(join(tmpdir(), "sf-dispatch-prefs-")); + const tempHome = mkdtempSync(join(tmpdir(), "sf-dispatch-home-")); + mkdirSync(join(tempProject, ".sf"), { recursive: true }); + writeFileSync(join(tempProject, ".sf", "preferences.md"), content, "utf-8"); + process.env.SF_HOME = tempHome; + process.chdir(tempProject); + + const cleanup = () => { + process.chdir(originalCwd); + if (originalSfHome === undefined) delete process.env.SF_HOME; + else process.env.SF_HOME = originalSfHome; + rmSync(tempProject, { recursive: true, force: true }); + rmSync(tempHome, { recursive: true, force: true }); + }; + + try { + const result = run(); + if (result && typeof (result as Promise).then === "function") { + return (result as Promise).finally(cleanup); + } + cleanup(); + return result; + } catch (error) { + cleanup(); + throw error; + } +} + // ─── Tests ──────────────────────────────────────────────────────────────── describe("RuleRegistry", () => { @@ -262,6 +296,68 @@ describe("RuleRegistry", () => { } }); + test("evaluateDispatch live-reloads dispatch rule order from preferences", async () => { + const ruleFirst = mockDispatchRule("rule-first", "planning"); + const ruleSecond = mockDispatchRule("rule-second", "planning"); + const registry = new RuleRegistry([ruleFirst, ruleSecond]); + + await withProjectPreferences([ + "---", + "version: 1", + "experimental:", + " dispatch_rules:", + " order:", + " - rule-second", + "---", + "", + ].join("\n"), async () => { + const result = await registry.evaluateDispatch(makeContext("planning")); + assert.equal(result.action, "dispatch"); + if (result.action === "dispatch") { + assert.equal(result.unitType, "test-planning"); + assert.equal(result.prompt, "Prompt for planning"); + assert.equal(result.matchedRule, "rule-second"); + } + assert.deepEqual( + registry.listRules().filter((rule) => rule.when === "dispatch").map((rule) => rule.name).slice(0, 2), + ["rule-second", "rule-first"], + ); + }); + }); + + test("getDispatchRuleNames reflects active dispatch variant without registry re-init", async () => { + const first: RegistryRule = { + name: "variant-a", + when: "dispatch", + evaluation: "first-match", + where: async () => ({ action: "dispatch", unitType: "a", unitId: "id", prompt: "a" }), + then: () => {}, + }; + const second: RegistryRule = { + name: "variant-b", + when: "dispatch", + evaluation: "first-match", + where: async () => ({ action: "dispatch", unitType: "b", unitId: "id", prompt: "b" }), + then: () => {}, + }; + initRegistry([first, second]); + + await withProjectPreferences([ + "---", + "version: 1", + "experimental:", + " dispatch_rules:", + " active_variant: beta", + " variants:", + " beta:", + " - variant-b", + "---", + "", + ].join("\n"), async () => { + assert.deepEqual(getDispatchRuleNames().slice(0, 2), ["variant-b", "variant-a"]); + }); + }); + // ── Dispatch rule conversion tests ───────────────────────────────── test("convertDispatchRules produces correct count of RegistryRule objects", () => { diff --git a/src/resources/extensions/sf/tests/state-corruption-2945.test.ts b/src/resources/extensions/sf/tests/state-corruption-2945.test.ts index 5ab7efd58..7e705c34d 100644 --- a/src/resources/extensions/sf/tests/state-corruption-2945.test.ts +++ b/src/resources/extensions/sf/tests/state-corruption-2945.test.ts @@ -70,6 +70,7 @@ function makeMilestoneRow(overrides: Partial = {}): MilestoneRow { requirement_coverage: "", boundary_map_markdown: "", ...overrides, + vision_meeting: overrides.vision_meeting ?? null, }; } diff --git a/src/resources/extensions/sf/tests/state-machine-full-walkthrough.test.ts b/src/resources/extensions/sf/tests/state-machine-full-walkthrough.test.ts index 65fe52cc2..d8d1babc2 100644 --- a/src/resources/extensions/sf/tests/state-machine-full-walkthrough.test.ts +++ b/src/resources/extensions/sf/tests/state-machine-full-walkthrough.test.ts @@ -193,6 +193,20 @@ function standardPlan(): string { "**Goal:** Test.", "**Demo:** Tests pass.", "", + "## Adversarial Review", + "", + "### Partner Review", + "", + "The task split matches the actual artifact boundaries, so finishing the tasks should make the slice demonstrably true.", + "", + "### Combatant Review", + "", + "The main failure mode is a plan that lists tasks without enough execution detail. The per-task plans and verification steps are the cheap falsifier for that risk.", + "", + "### Architect Review", + "", + "The slice stays safe because it proves the local boundary and does not claim broader milestone completion on its own.", + "", "## Tasks", "", "- [ ] **T01: First Task** `est:10m`", @@ -211,6 +225,20 @@ function allDonePlan(): string { "**Goal:** Test.", "**Demo:** Tests pass.", "", + "## Adversarial Review", + "", + "### Partner Review", + "", + "Completing both tasks is enough to satisfy the slice contract.", + "", + "### Combatant Review", + "", + "If the summaries do not exist, the checked boxes would be lying. The tests around execution and summarizing catch that.", + "", + "### Architect Review", + "", + "This plan only closes the slice-level boundary and still requires explicit slice completion.", + "", "## Tasks", "", "- [x] **T01: First Task** `est:10m`", @@ -229,6 +257,20 @@ function partialDonePlan(): string { "**Goal:** Test.", "**Demo:** Tests pass.", "", + "## Adversarial Review", + "", + "### Partner Review", + "", + "The remaining unchecked task correctly keeps the slice in execution.", + "", + "### Combatant Review", + "", + "If the plan artifact were incomplete, execution should not start. This fixture includes the review to isolate task-progress behavior.", + "", + "### Architect Review", + "", + "This isolates progress-state behavior without weakening the planning contract.", + "", "## Tasks", "", "- [x] **T01: First Task** `est:10m`", @@ -352,6 +394,67 @@ describe("state-machine-full-walkthrough", () => { // ═══════════════════════════════════════════════════════════════════════════ describe("Phase 5: planning", () => { + test("DB path: milestone with slices but missing vision meeting stays in planning", async () => { + const base = createFixtureBase(); + const dbPath = join(base, ".sf", "sf.db"); + openDatabase(dbPath); + + insertMilestone({ + id: "M001", + title: "Test Milestone", + status: "active", + planning: { + vision: "Test state machine.", + }, + }); + insertSlice({ id: "S01", milestoneId: "M001", title: "First Slice", status: "pending", risk: "low", depends: [] }); + + invalidateStateCache(); + const state = await deriveState(base); + + assert.equal(state.phase, "planning"); + assert.match(state.nextAction, /weighted vision alignment meeting/i); + assert.match(state.nextAction, /missing vision alignment meeting/i); + }); + + test("DB path: milestone routed back to researching stays in planning", async () => { + const base = createFixtureBase(); + const dbPath = join(base, ".sf", "sf.db"); + openDatabase(dbPath); + + insertMilestone({ + id: "M001", + title: "Test Milestone", + status: "active", + planning: { + vision: "Test state machine.", + visionMeeting: { + trigger: "Need roadmap agreement.", + pm: "Keep the milestone focused.", + userAdvocate: "Users need clarity.", + customerPanel: "Operators want confidence before execution.", + business: "Do not start weak roadmaps.", + researcher: "Comparable approaches are still unclear.", + deliveryLead: "Cut scope before execution.", + partner: "One path might work.", + combatant: "The premise is still too weak.", + architect: "Route back until the roadmap is grounded.", + moderator: "Weighted synthesis says keep researching.", + weightedSynthesis: "The roadmap is still a draft.", + confidenceByArea: "- sequencing: low", + recommendedRoute: "researching", + }, + }, + }); + insertSlice({ id: "S01", milestoneId: "M001", title: "First Slice", status: "pending", risk: "low", depends: [] }); + + invalidateStateCache(); + const state = await deriveState(base); + + assert.equal(state.phase, "planning"); + assert.match(state.nextAction, /routed back to researching/i); + }); + test("roadmap with slice, no PLAN file → planning", async () => { const base = createFixtureBase(); writeRoadmap(base, "M001", standardRoadmap()); @@ -399,6 +502,29 @@ describe("state-machine-full-walkthrough", () => { assert.equal(state.phase, "planning", "missing task plan files should stay in planning"); }); + test("PLAN with tasks but missing adversarial review → planning", async () => { + const base = createFixtureBase(); + writeRoadmap(base, "M001", standardRoadmap()); + const dir = join(base, ".sf", "milestones", "M001", "slices", "S01"); + mkdirSync(join(dir, "tasks"), { recursive: true }); + writeFileSync(join(dir, "S01-PLAN.md"), [ + "# S01: First Slice", + "", + "**Goal:** Test.", + "**Demo:** Tests pass.", + "", + "## Tasks", + "", + "- [ ] **T01: First Task** `est:10m`", + " First task description.", + ].join("\n")); + writeFileSync(join(dir, "tasks", "T01-PLAN.md"), "# T01 Plan\n"); + invalidateStateCache(); + const state = await deriveState(base); + + assert.equal(state.phase, "planning", "plan without adversarial review should remain in planning"); + }); + test("PLAN with all task plan files → NOT planning", async () => { const base = createFixtureBase(); writeRoadmap(base, "M001", standardRoadmap()); @@ -558,6 +684,20 @@ describe("state-machine-full-walkthrough", () => { "**Goal:** Test.", "**Demo:** Tests pass.", "", + "## Adversarial Review", + "", + "### Partner Review", + "", + "Heading-style task plans can still reconcile via SUMMARY files.", + "", + "### Combatant Review", + "", + "Without review, the parser could mistake headings for scaffolding.", + "", + "### Architect Review", + "", + "This keeps the reconciliation test focused on task completion rather than plan validity.", + "", "## Tasks", "", "### T01: First Task", @@ -1596,6 +1736,20 @@ describe("state-machine-full-walkthrough", () => { "**Goal:** Test.", "**Demo:** Tests pass.", "", + "## Adversarial Review", + "", + "### Partner Review", + "", + "All three tasks are concrete and can reconcile via their SUMMARY artifacts.", + "", + "### Combatant Review", + "", + "A three-task plan without review would be too easy to accept accidentally, so keep the gate explicit here.", + "", + "### Architect Review", + "", + "This fixture proves multiple stale tasks reconcile in one pass without bypassing plan quality.", + "", "## Tasks", "", "- [ ] **T01: First** `est:10m`", diff --git a/src/resources/extensions/sf/tests/workflow-manifest.test.ts b/src/resources/extensions/sf/tests/workflow-manifest.test.ts index 2576b828b..40056c964 100644 --- a/src/resources/extensions/sf/tests/workflow-manifest.test.ts +++ b/src/resources/extensions/sf/tests/workflow-manifest.test.ts @@ -117,6 +117,79 @@ test('workflow-manifest: snapshotState captures tasks', () => { } }); +test('workflow-manifest: snapshotState preserves planning ceremony fields', () => { + const base = tempDir(); + openDatabase(tempDbPath(base)); + try { + insertMilestone({ + id: 'M001', + title: 'Ceremony Milestone', + planning: { + vision: 'Keep planning context intact.', + visionMeeting: { + trigger: 'Roadmap needs weighted synthesis.', + pm: 'Keep the milestone tight.', + userAdvocate: 'Users need stable planning state.', + customerPanel: 'Operators want fidelity.', + business: 'Planning should survive recovery.', + researcher: 'Comparable systems persist review context.', + deliveryLead: 'Avoid losing context between turns.', + partner: 'This is enough to move forward.', + combatant: 'Do not drop the discussion state on restore.', + architect: 'Persist the meeting in the same state snapshot.', + moderator: 'Weighted synthesis says this is ready to plan.', + weightedSynthesis: 'Keep the meeting state with the roadmap.', + confidenceByArea: '- sequencing: high', + recommendedRoute: 'planning', + }, + }, + }); + insertSlice({ + id: 'S01', + milestoneId: 'M001', + title: 'Ceremony Slice', + planning: { + goal: 'Keep slice planning context intact.', + adversarialReview: { + partner: 'The slice plan is grounded enough.', + combatant: 'The missing integration point is manifest restore.', + architect: 'Persist both review and meeting fields together.', + }, + planningMeeting: { + trigger: 'Ambiguity remained after adversarial review.', + pm: 'Keep the execution increment narrow.', + researcher: 'The current code drops planning state in export/restore.', + partner: 'The current plan is close.', + combatant: 'Not if restore loses the meeting.', + architect: 'Fix manifest and reconcile paths together.', + moderator: 'Do the persistence fix before calling this execution-ready.', + recommendedRoute: 'planning', + confidenceSummary: 'High confidence after export/restore proof.', + }, + }, + }); + insertTask({ + id: 'T01', + sliceId: 'S01', + milestoneId: 'M001', + title: 'Preserve full plan', + status: 'pending', + }); + _getAdapter()!.prepare(`UPDATE tasks SET full_plan_md = ? WHERE milestone_id = ? AND slice_id = ? AND id = ?`) + .run('# T01 Plan\n\nKeep the task plan body.', 'M001', 'S01', 'T01'); + + const snap = snapshotState(); + assert.equal(snap.milestones[0]?.vision_meeting?.recommendedRoute, 'planning'); + assert.match(snap.milestones[0]?.vision_meeting?.customerPanel ?? '', /Operators want fidelity/); + assert.match(snap.slices[0]?.adversarial_partner ?? '', /grounded enough/); + assert.equal(snap.slices[0]?.planning_meeting?.recommendedRoute, 'planning'); + assert.match(snap.tasks[0]?.full_plan_md ?? '', /Keep the task plan body/); + } finally { + closeDatabase(); + cleanupDir(base); + } +}); + // ─── bootstrapFromManifest ──────────────────────────────────────────────── test('workflow-manifest: bootstrapFromManifest returns false when no manifest file', () => { @@ -166,6 +239,79 @@ test('workflow-manifest: bootstrapFromManifest restores DB from manifest (round- } }); +test('workflow-manifest: bootstrapFromManifest restores planning ceremony fields', () => { + const base = tempDir(); + openDatabase(tempDbPath(base)); + try { + insertMilestone({ + id: 'M001', + title: 'Restored Ceremony Milestone', + planning: { + vision: 'Round-trip the planning context.', + visionMeeting: { + trigger: 'Need roadmap agreement.', + pm: 'Keep the roadmap focused.', + userAdvocate: 'Do not lose planning context.', + customerPanel: 'Advanced users expect nuance.', + business: 'Recovery should preserve planning fidelity.', + researcher: 'The export/restore path is part of the workflow contract.', + deliveryLead: 'Keep the fix narrow and testable.', + partner: 'The meeting is specific enough.', + combatant: 'It is useless if restore drops it.', + architect: 'Round-trip the meeting JSON as part of milestone restore.', + moderator: 'Planning can proceed once restore is lossless.', + weightedSynthesis: 'Persist the whole meeting.', + confidenceByArea: '- restore fidelity: high', + recommendedRoute: 'planning', + }, + }, + }); + insertSlice({ + id: 'S01', + milestoneId: 'M001', + title: 'Restored Ceremony Slice', + planning: { + goal: 'Round-trip slice planning context.', + adversarialReview: { + partner: 'The slice is focused enough.', + combatant: 'Restore has to keep the review body.', + architect: 'Manifest loss here would break replans and worktrees.', + }, + planningMeeting: { + trigger: 'There was still ambiguity.', + pm: 'Cut to the restore boundary.', + researcher: 'The DB stores the meeting already.', + partner: 'The change is local.', + combatant: 'Only if export and restore both carry it.', + architect: 'Test both directions.', + moderator: 'Proceed after round-trip proof.', + recommendedRoute: 'planning', + confidenceSummary: 'High after round-trip.', + }, + }, + }); + insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Restore it', status: 'complete' }); + _getAdapter()!.prepare(`UPDATE tasks SET full_plan_md = ? WHERE milestone_id = ? AND slice_id = ? AND id = ?`) + .run('# T01 Plan\n\nRestored full plan.', 'M001', 'S01', 'T01'); + writeManifest(base); + closeDatabase(); + + openDatabase(path.join(base, 'roundtrip.db')); + const result = bootstrapFromManifest(base); + assert.equal(result, true); + + const snap = snapshotState(); + assert.equal(snap.milestones[0]?.vision_meeting?.recommendedRoute, 'planning'); + assert.match(snap.milestones[0]?.vision_meeting?.researcher ?? '', /workflow contract/); + assert.match(snap.slices[0]?.adversarial_architect ?? '', /worktrees/); + assert.equal(snap.slices[0]?.planning_meeting?.recommendedRoute, 'planning'); + assert.match(snap.tasks[0]?.full_plan_md ?? '', /Restored full plan/); + } finally { + closeDatabase(); + cleanupDir(base); + } +}); + // ─── snapshotState: numeric column coercion (#2962) ───────────────────── test('workflow-manifest: snapshotState coerces string placeholders in numeric columns to null (#2962)', () => { diff --git a/src/resources/extensions/sf/tests/workflow-projections.test.ts b/src/resources/extensions/sf/tests/workflow-projections.test.ts index fc02c32cb..f44c60c26 100644 --- a/src/resources/extensions/sf/tests/workflow-projections.test.ts +++ b/src/resources/extensions/sf/tests/workflow-projections.test.ts @@ -3,8 +3,8 @@ import test from 'node:test'; import assert from 'node:assert/strict'; -import { renderPlanContent } from '../workflow-projections.ts'; -import type { SliceRow, TaskRow } from '../sf-db.ts'; +import { renderPlanContent, renderRoadmapContent } from '../workflow-projections.ts'; +import type { MilestoneRow, SliceRow, TaskRow } from '../sf-db.ts'; // ─── Test fixtures ──────────────────────────────────────────────────────── @@ -24,6 +24,10 @@ function makeSlice(overrides: Partial = {}): SliceRow { proof_level: '', integration_closure: '', observability_impact: '', + adversarial_partner: '', + adversarial_combatant: '', + adversarial_architect: '', + planning_meeting: null, created_at: '2026-01-01T00:00:00Z', completed_at: null, sequence: 1, @@ -32,6 +36,30 @@ function makeSlice(overrides: Partial = {}): SliceRow { }; } +function makeMilestone(overrides: Partial = {}): MilestoneRow { + return { + id: 'M001', + title: 'Auth Foundation', + status: 'active', + depends_on: [], + created_at: '2026-01-01T00:00:00Z', + completed_at: null, + vision: 'Ship the authentication foundation.', + success_criteria: [], + key_risks: [], + proof_strategy: [], + verification_contract: '', + verification_integration: '', + verification_operational: '', + verification_uat: '', + definition_of_done: [], + requirement_coverage: '', + boundary_map_markdown: '', + vision_meeting: null, + ...overrides, + }; +} + function makeTask(overrides: Partial = {}): TaskRow { return { id: 'T01', @@ -99,6 +127,60 @@ test('workflow-projections: renderPlanContent includes ## Tasks section', () => assert.ok(content.includes('## Tasks')); }); +test('workflow-projections: renderPlanContent includes adversarial review and planning meeting sections', () => { + const slice = makeSlice({ + adversarial_partner: 'The slice is narrowly scoped and grounded in real boundaries.', + adversarial_combatant: 'The main risk is dropping ceremony state during persistence.', + adversarial_architect: 'The plan must survive DB, projection, and restore paths.', + planning_meeting: { + trigger: 'There was still ambiguity after the first pass.', + pm: 'Keep the slice focused on state persistence.', + researcher: 'The surrounding workflow already reads these sections back.', + partner: 'The current shape is enough to move forward.', + combatant: 'Only if the projection output preserves the route honestly.', + architect: 'Projection output is part of the workflow contract.', + moderator: 'Proceed, but keep the route explicit.', + recommendedRoute: 'planning', + confidenceSummary: 'High confidence after projection coverage.', + }, + }); + + const content = renderPlanContent(slice, []); + assert.ok(content.includes('## Adversarial Review')); + assert.ok(content.includes('## Planning Meeting')); + assert.ok(content.includes('### Recommended Route')); + assert.ok(content.includes('planning')); + assert.ok(content.includes('High confidence after projection coverage.')); +}); + +test('workflow-projections: renderRoadmapContent includes vision alignment meeting sections', () => { + const milestone = makeMilestone({ + vision_meeting: { + trigger: 'Need top-level roadmap agreement before execution.', + pm: 'Keep the milestone narrow.', + userAdvocate: 'Users need a roadmap that stays coherent.', + customerPanel: 'Operators want confidence before execution.', + business: 'The roadmap should be defensible.', + researcher: 'Comparable systems preserve this reasoning explicitly.', + deliveryLead: 'Sequence the smallest credible slice first.', + partner: 'The current roadmap is viable.', + combatant: 'Do not overstate confidence.', + architect: 'Persist the moderation context with the roadmap.', + moderator: 'Weighted synthesis says the roadmap is ready.', + weightedSynthesis: 'This roadmap is narrow enough to execute.', + confidenceByArea: '- sequencing: high', + recommendedRoute: 'planning', + }, + }); + + const content = renderRoadmapContent(milestone, [makeSlice()]); + assert.ok(content.includes('## Vision Alignment Meeting')); + assert.ok(content.includes('### Weighted Synthesis')); + assert.ok(content.includes('### Confidence By Area')); + assert.ok(content.includes('### Recommended Route')); + assert.ok(content.includes('planning')); +}); + // ─── renderPlanContent: task checkboxes ────────────────────────────────── test('workflow-projections: pending task renders with [ ] checkbox', () => { diff --git a/src/resources/extensions/sf/tests/workflow-templates.test.ts b/src/resources/extensions/sf/tests/workflow-templates.test.ts index 8e93300bc..f04830db6 100644 --- a/src/resources/extensions/sf/tests/workflow-templates.test.ts +++ b/src/resources/extensions/sf/tests/workflow-templates.test.ts @@ -11,6 +11,7 @@ import { listTemplates, getTemplateInfo, loadWorkflowTemplate, + scaffoldMilestoneSlices, } from '../workflow-templates.ts'; @@ -169,3 +170,24 @@ console.log('\n── Load Workflow Template ──'); } // ═══════════════════════════════════════════════════════════════════════════ +// Milestone Scaffolding +// ═══════════════════════════════════════════════════════════════════════════ + +console.log('\n── Milestone Scaffolding ──'); + +{ + const bugfixSlices = scaffoldMilestoneSlices('bugfix'); + assert.ok(bugfixSlices !== null, 'Should scaffold bugfix milestone slices'); + assert.equal(bugfixSlices!.length, 3, 'Bugfix scaffold should create 3 slices'); + assert.equal(bugfixSlices![0].sliceId, 'S01'); + assert.ok(bugfixSlices![1].depends.includes('S01')); + + const featureSlices = scaffoldMilestoneSlices('feat'); + assert.ok(featureSlices !== null, 'Should scaffold via alias'); + assert.equal(featureSlices![0].title, 'Define the user-facing contract'); + + const missingScaffold = scaffoldMilestoneSlices('nonexistent'); + assert.equal(missingScaffold, null, 'Unknown template should not scaffold'); +} + +// ═══════════════════════════════════════════════════════════════════════════ diff --git a/src/resources/extensions/sf/tests/worktree-db.test.ts b/src/resources/extensions/sf/tests/worktree-db.test.ts index 626450598..976e9f258 100644 --- a/src/resources/extensions/sf/tests/worktree-db.test.ts +++ b/src/resources/extensions/sf/tests/worktree-db.test.ts @@ -318,6 +318,126 @@ console.log('\n=== worktree-db: reconcileWorktreeDb ==='); cleanup(mainDir, wtDir); } +// Test: preserves ceremony state when reconciling worktree milestone/slice rows +{ + const mainDir = tempDir(); + const wtDir = tempDir(); + const mainDb = path.join(mainDir, 'sf.db'); + const wtDb = path.join(wtDir, 'sf.db'); + + openDatabase(mainDb); + _getAdapter()!.prepare(` + INSERT INTO milestones ( + id, title, status, depends_on, created_at, completed_at, + vision, success_criteria, key_risks, proof_strategy, + verification_contract, verification_integration, verification_operational, verification_uat, + definition_of_done, requirement_coverage, boundary_map_markdown, vision_meeting_json + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `).run( + 'M001', 'Main Milestone', 'active', '[]', new Date().toISOString(), null, + 'Main vision', '[]', '[]', '[]', '', '', '', '', '[]', '', '', + JSON.stringify({ + trigger: 'Main trigger', + pm: 'Main pm', + userAdvocate: 'Main user', + customerPanel: 'Main customer', + business: 'Main business', + researcher: 'Main researcher', + deliveryLead: 'Main delivery', + partner: 'Main partner', + combatant: 'Main combatant', + architect: 'Main architect', + moderator: 'Main moderator', + weightedSynthesis: 'Main synthesis', + confidenceByArea: '- restore: medium', + recommendedRoute: 'researching', + }), + ); + _getAdapter()!.prepare(` + INSERT INTO slices ( + milestone_id, id, title, status, risk, depends, demo, created_at, completed_at, + full_summary_md, full_uat_md, goal, success_criteria, proof_level, integration_closure, + observability_impact, adversarial_partner, adversarial_combatant, adversarial_architect, + planning_meeting_json, sequence, replan_triggered_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `).run( + 'M001', 'S01', 'Main Slice', 'pending', 'low', '[]', '', new Date().toISOString(), null, + '', '', 'Main goal', '', '', '', '', + 'Main partner', 'Main combatant', 'Main architect', + JSON.stringify({ + trigger: 'Main trigger', + pm: 'Main pm', + researcher: 'Main researcher', + partner: 'Main partner', + combatant: 'Main combatant', + architect: 'Main architect', + moderator: 'Main moderator', + recommendedRoute: 'researching', + confidenceSummary: 'Main confidence', + }), + 1, null, + ); + closeDatabase(); + copyWorktreeDb(mainDb, wtDb); + + openDatabase(wtDb); + _getAdapter()!.prepare(`UPDATE milestones SET vision_meeting_json = ? WHERE id = 'M001'`).run(JSON.stringify({ + trigger: 'Worktree trigger', + pm: 'Worktree pm', + userAdvocate: 'Worktree user', + customerPanel: 'Worktree customer', + business: 'Worktree business', + researcher: 'Worktree researcher', + deliveryLead: 'Worktree delivery', + partner: 'Worktree partner', + combatant: 'Worktree combatant', + architect: 'Worktree architect', + moderator: 'Worktree moderator', + weightedSynthesis: 'Worktree synthesis', + confidenceByArea: '- restore: high', + recommendedRoute: 'planning', + })); + _getAdapter()!.prepare(` + UPDATE slices + SET adversarial_partner = ?, adversarial_combatant = ?, adversarial_architect = ?, planning_meeting_json = ? + WHERE milestone_id = 'M001' AND id = 'S01' + `).run( + 'Worktree partner', + 'Worktree combatant', + 'Worktree architect', + JSON.stringify({ + trigger: 'Worktree trigger', + pm: 'Worktree pm', + researcher: 'Worktree researcher', + partner: 'Worktree partner', + combatant: 'Worktree combatant', + architect: 'Worktree architect', + moderator: 'Worktree moderator', + recommendedRoute: 'planning', + confidenceSummary: 'Worktree confidence', + }), + ); + closeDatabase(); + + openDatabase(mainDb); + const result = reconcileWorktreeDb(mainDb, wtDb); + assert.ok(result.milestones > 0, 'milestone rows merged count > 0'); + assert.ok(result.slices > 0, 'slice rows merged count > 0'); + + const milestoneRow = _getAdapter()!.prepare(`SELECT vision_meeting_json FROM milestones WHERE id = 'M001'`).get() as Record; + const sliceRow = _getAdapter()!.prepare(` + SELECT adversarial_partner, adversarial_combatant, adversarial_architect, planning_meeting_json + FROM slices WHERE milestone_id = 'M001' AND id = 'S01' + `).get() as Record; + assert.match(String(milestoneRow['vision_meeting_json'] ?? ''), /Worktree synthesis/); + assert.equal(sliceRow['adversarial_partner'], 'Worktree partner'); + assert.equal(sliceRow['adversarial_combatant'], 'Worktree combatant'); + assert.equal(sliceRow['adversarial_architect'], 'Worktree architect'); + assert.match(String(sliceRow['planning_meeting_json'] ?? ''), /Worktree confidence/); + + cleanup(mainDir, wtDir); +} + // Test: handles missing worktree DB gracefully { const mainDir = tempDir(); diff --git a/src/resources/extensions/sf/token-counter.ts b/src/resources/extensions/sf/token-counter.ts index 5dc137239..13eb6e756 100644 --- a/src/resources/extensions/sf/token-counter.ts +++ b/src/resources/extensions/sf/token-counter.ts @@ -103,7 +103,11 @@ async function buildGoogleGeminiCliServer(apiKeyRaw: string) { ]); const authClient = new OAuth2Client(); authClient.setCredentials({ access_token: credentials.token }); - return new CodeAssistServer(authClient, credentials.projectId, { headers: {} }); + return new CodeAssistServer( + authClient as unknown as ConstructorParameters[0], + credentials.projectId, + { headers: {} }, + ); } export function isGoogleGeminiCountablePayload(payload: unknown): payload is CountTokensParameters { diff --git a/src/resources/extensions/sf/tools/plan-milestone.ts b/src/resources/extensions/sf/tools/plan-milestone.ts index e270245a0..7227e1f92 100644 --- a/src/resources/extensions/sf/tools/plan-milestone.ts +++ b/src/resources/extensions/sf/tools/plan-milestone.ts @@ -1,6 +1,7 @@ import { clearParseCache } from "../files.js"; import { isClosedStatus } from "../status-guards.js"; import { isNonEmptyString, validateStringArray } from "../validation.js"; +import { hasStructuredVisionAlignmentMeeting, type VisionAlignmentMeetingRecord } from "../milestone-quality.js"; import { transaction, getMilestone, @@ -17,6 +18,7 @@ import { renderAllProjections } from "../workflow-projections.js"; import { writeManifest } from "../workflow-manifest.js"; import { appendEvent } from "../workflow-events.js"; import { logWarning } from "../workflow-logger.js"; +import { scaffoldMilestoneSlices } from "../workflow-templates.js"; export interface PlanMilestoneSliceInput { sliceId: string; @@ -35,7 +37,8 @@ export interface PlanMilestoneParams { milestoneId: string; title: string; vision: string; - slices: PlanMilestoneSliceInput[]; + slices?: PlanMilestoneSliceInput[]; + templateId?: string; status?: string; dependsOn?: string[]; /** Optional caller-provided identity for audit trail */ @@ -62,6 +65,8 @@ export interface PlanMilestoneParams { requirementCoverage?: string; /** @optional — defaults to "Not provided." when omitted */ boundaryMapMarkdown?: string; + /** Optional structured top-level vision/roadmap alignment meeting. */ + visionMeeting?: VisionAlignmentMeetingRecord; } export interface PlanMilestoneResult { @@ -161,6 +166,15 @@ function validateParams(params: PlanMilestoneParams): PlanMilestoneParams { if (!isNonEmptyString(params?.title)) throw new Error("title is required"); if (!isNonEmptyString(params?.vision)) throw new Error("vision is required"); + let slicesInput = params.slices; + if ((!Array.isArray(slicesInput) || slicesInput.length === 0) && params.templateId) { + const scaffolded = scaffoldMilestoneSlices(params.templateId); + if (!scaffolded) { + throw new Error(`unknown milestone template: ${params.templateId}`); + } + slicesInput = scaffolded; + } + return { ...params, dependsOn: params.dependsOn ? validateStringArray(params.dependsOn, "dependsOn") : [], @@ -175,7 +189,8 @@ function validateParams(params: PlanMilestoneParams): PlanMilestoneParams { definitionOfDone: params.definitionOfDone ? validateStringArray(params.definitionOfDone, "definitionOfDone") : [], requirementCoverage: params.requirementCoverage ?? "Not provided.", boundaryMapMarkdown: params.boundaryMapMarkdown ?? "Not provided.", - slices: validateSlices(params.slices), + visionMeeting: hasStructuredVisionAlignmentMeeting(params.visionMeeting) ? params.visionMeeting : undefined, + slices: validateSlices(slicesInput), }; } @@ -189,6 +204,7 @@ export async function handlePlanMilestone( } catch (err) { return { error: `validation failed: ${(err as Error).message}` }; } + const slices = params.slices ?? []; // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ─── // Guards must be inside the transaction so the state they check cannot @@ -210,7 +226,7 @@ export async function handlePlanMilestone( const existingSlices = getMilestoneSlices(params.milestoneId); const completedSlices = existingSlices.filter(s => isClosedStatus(s.status)); if (completedSlices.length > 0) { - const incomingSliceIds = new Set(params.slices.map(s => s.sliceId)); + const incomingSliceIds = new Set(slices.map(s => s.sliceId)); const droppedCompleted = completedSlices.filter(s => !incomingSliceIds.has(s.id)); if (droppedCompleted.length > 0) { guardError = `cannot re-plan milestone ${params.milestoneId}: ${droppedCompleted.length} completed slice(s) would be dropped (${droppedCompleted.map(s => s.id).join(", ")}). Use sf_reassess_roadmap to modify the roadmap.`; @@ -254,10 +270,11 @@ export async function handlePlanMilestone( definitionOfDone: params.definitionOfDone, requirementCoverage: params.requirementCoverage, boundaryMapMarkdown: params.boundaryMapMarkdown, + visionMeeting: params.visionMeeting, }); - for (let i = 0; i < params.slices.length; i++) { - const slice = params.slices[i]!; + for (let i = 0; i < slices.length; i++) { + const slice = slices[i]!; // Preserve completed/done status on re-plan (#2558). // Without this, a re-plan after milestone transition would reset // already-completed slices back to "pending". diff --git a/src/resources/extensions/sf/tools/plan-slice.ts b/src/resources/extensions/sf/tools/plan-slice.ts index 911e81de9..fb571289d 100644 --- a/src/resources/extensions/sf/tools/plan-slice.ts +++ b/src/resources/extensions/sf/tools/plan-slice.ts @@ -17,6 +17,12 @@ import { renderAllProjections } from "../workflow-projections.js"; import { writeManifest } from "../workflow-manifest.js"; import { appendEvent } from "../workflow-events.js"; import { logWarning } from "../workflow-logger.js"; +import { + hasCompleteAdversarialReview, + hasStructuredPlanningMeeting, + type AdversarialReviewRecord, + type PlanningMeetingRecord, +} from "../plan-quality.js"; export interface PlanSliceTaskInput { taskId: string; @@ -36,6 +42,8 @@ export interface PlanSliceParams { sliceId: string; goal: string; tasks: PlanSliceTaskInput[]; + adversarialReview?: AdversarialReviewRecord; + planningMeeting?: PlanningMeetingRecord; /** @optional — defaults to "Not provided." when omitted by models with limited tool-calling */ successCriteria?: string; /** @optional — defaults to "Not provided." when omitted */ @@ -124,6 +132,30 @@ function validateParams(params: PlanSliceParams): PlanSliceParams { proofLevel: params.proofLevel ?? "Not provided.", integrationClosure: params.integrationClosure ?? "Not provided.", observabilityImpact: params.observabilityImpact ?? "Not provided.", + adversarialReview: hasCompleteAdversarialReview(params.adversarialReview) + ? { + partner: params.adversarialReview!.partner!.trim(), + combatant: params.adversarialReview!.combatant!.trim(), + architect: params.adversarialReview!.architect!.trim(), + } + : { + partner: "Missing partner review.", + combatant: "Missing combatant review.", + architect: "Missing architect review.", + }, + planningMeeting: hasStructuredPlanningMeeting(params.planningMeeting) + ? { + trigger: params.planningMeeting.trigger.trim(), + pm: params.planningMeeting.pm.trim(), + researcher: params.planningMeeting.researcher.trim(), + partner: params.planningMeeting.partner.trim(), + combatant: params.planningMeeting.combatant.trim(), + architect: params.planningMeeting.architect.trim(), + moderator: params.planningMeeting.moderator.trim(), + recommendedRoute: params.planningMeeting.recommendedRoute, + confidenceSummary: params.planningMeeting.confidenceSummary.trim(), + } + : undefined, tasks: validateTasks(params.tasks), }; } @@ -172,6 +204,8 @@ export async function handlePlanSlice( proofLevel: params.proofLevel, integrationClosure: params.integrationClosure, observabilityImpact: params.observabilityImpact, + adversarialReview: params.adversarialReview, + planningMeeting: params.planningMeeting, }); for (const task of params.tasks) { diff --git a/src/resources/extensions/sf/tools/replan-slice.ts b/src/resources/extensions/sf/tools/replan-slice.ts index 526a6ee12..6e8e71c3e 100644 --- a/src/resources/extensions/sf/tools/replan-slice.ts +++ b/src/resources/extensions/sf/tools/replan-slice.ts @@ -6,12 +6,19 @@ import { getTask, insertTask, upsertTaskPlanning, + upsertSlicePlanning, insertReplanHistory, deleteTask, } from "../sf-db.js"; import { invalidateStateCache } from "../state.js"; import { isClosedStatus } from "../status-guards.js"; import { isNonEmptyString } from "../validation.js"; +import { + hasCompleteAdversarialReview, + hasStructuredPlanningMeeting, + type AdversarialReviewRecord, + type PlanningMeetingRecord, +} from "../plan-quality.js"; import { renderPlanFromDb, renderReplanFromDb } from "../markdown-renderer.js"; import { renderAllProjections } from "../workflow-projections.js"; import { writeManifest } from "../workflow-manifest.js"; @@ -36,6 +43,13 @@ export interface ReplanSliceParams { blockerTaskId: string; blockerDescription: string; whatChanged: string; + goal?: string; + successCriteria?: string; + proofLevel?: string; + integrationClosure?: string; + observabilityImpact?: string; + adversarialReview?: AdversarialReviewRecord; + planningMeeting?: PlanningMeetingRecord; updatedTasks: ReplanSliceTaskInput[]; removedTaskIds: string[]; /** Optional caller-provided identity for audit trail */ @@ -152,6 +166,34 @@ export async function handleReplanSlice( summary: params.whatChanged, }); + upsertSlicePlanning(params.milestoneId, params.sliceId, { + goal: params.goal, + successCriteria: params.successCriteria, + proofLevel: params.proofLevel, + integrationClosure: params.integrationClosure, + observabilityImpact: params.observabilityImpact, + adversarialReview: hasCompleteAdversarialReview(params.adversarialReview) + ? { + partner: params.adversarialReview!.partner.trim(), + combatant: params.adversarialReview!.combatant.trim(), + architect: params.adversarialReview!.architect.trim(), + } + : undefined, + planningMeeting: hasStructuredPlanningMeeting(params.planningMeeting) + ? { + trigger: params.planningMeeting.trigger.trim(), + pm: params.planningMeeting.pm.trim(), + researcher: params.planningMeeting.researcher.trim(), + partner: params.planningMeeting.partner.trim(), + combatant: params.planningMeeting.combatant.trim(), + architect: params.planningMeeting.architect.trim(), + moderator: params.planningMeeting.moderator.trim(), + recommendedRoute: params.planningMeeting.recommendedRoute, + confidenceSummary: params.planningMeeting.confidenceSummary.trim(), + } + : undefined, + }); + // Apply task updates (upsert existing, insert new) for (const updatedTask of params.updatedTasks) { if (existingTaskIds.has(updatedTask.taskId)) { diff --git a/src/resources/extensions/sf/workflow-manifest.ts b/src/resources/extensions/sf/workflow-manifest.ts index b37912d4a..7edc74f80 100644 --- a/src/resources/extensions/sf/workflow-manifest.ts +++ b/src/resources/extensions/sf/workflow-manifest.ts @@ -95,6 +95,9 @@ export function snapshotState(): StateManifest { definition_of_done: JSON.parse((r["definition_of_done"] as string) || "[]"), requirement_coverage: (r["requirement_coverage"] as string) ?? "", boundary_map_markdown: (r["boundary_map_markdown"] as string) ?? "", + vision_meeting: typeof r["vision_meeting_json"] === "string" && (r["vision_meeting_json"] as string).trim().length > 0 + ? JSON.parse(r["vision_meeting_json"] as string) + : null, })); const rawSlices = db.prepare("SELECT * FROM slices ORDER BY milestone_id, sequence, id").all() as Record[]; @@ -115,6 +118,12 @@ export function snapshotState(): StateManifest { proof_level: (r["proof_level"] as string) ?? "", integration_closure: (r["integration_closure"] as string) ?? "", observability_impact: (r["observability_impact"] as string) ?? "", + adversarial_partner: (r["adversarial_partner"] as string) ?? "", + adversarial_combatant: (r["adversarial_combatant"] as string) ?? "", + adversarial_architect: (r["adversarial_architect"] as string) ?? "", + planning_meeting: typeof r["planning_meeting_json"] === "string" && (r["planning_meeting_json"] as string).trim().length > 0 + ? JSON.parse(r["planning_meeting_json"] as string) + : null, sequence: toNumeric(r["sequence"], 0) as number, replan_triggered_at: (r["replan_triggered_at"] as string) ?? null, })); diff --git a/src/resources/extensions/sf/workflow-projections.ts b/src/resources/extensions/sf/workflow-projections.ts index 01f0208e4..6035573b0 100644 --- a/src/resources/extensions/sf/workflow-projections.ts +++ b/src/resources/extensions/sf/workflow-projections.ts @@ -53,6 +53,94 @@ export function renderPlanContent(sliceRow: SliceRow, taskRows: TaskRow[]): stri lines.push(`**Goal:** ${sliceRow.goal || "TBD"}`); lines.push(`**Demo:** After this: ${sliceRow.demo || "TBD"}`); lines.push(""); + lines.push("## Must-Haves"); + lines.push(""); + if (sliceRow.success_criteria.trim()) { + for (const line of sliceRow.success_criteria.split(/\n+/).map((entry) => entry.trim()).filter(Boolean)) { + lines.push(line.startsWith("-") ? line : `- ${line}`); + } + } else { + lines.push("- Complete the planned slice outcomes."); + } + lines.push(""); + + lines.push("## Adversarial Review"); + lines.push(""); + lines.push("### Partner Review"); + lines.push(""); + lines.push(sliceRow.adversarial_partner?.trim() || "Missing partner review."); + lines.push(""); + lines.push("### Combatant Review"); + lines.push(""); + lines.push(sliceRow.adversarial_combatant?.trim() || "Missing combatant review."); + lines.push(""); + lines.push("### Architect Review"); + lines.push(""); + lines.push(sliceRow.adversarial_architect?.trim() || "Missing architect review."); + lines.push(""); + + if (sliceRow.planning_meeting) { + lines.push("## Planning Meeting"); + lines.push(""); + lines.push("### Trigger"); + lines.push(""); + lines.push(sliceRow.planning_meeting.trigger.trim()); + lines.push(""); + lines.push("### Product Manager"); + lines.push(""); + lines.push(sliceRow.planning_meeting.pm.trim()); + lines.push(""); + lines.push("### Researcher"); + lines.push(""); + lines.push(sliceRow.planning_meeting.researcher.trim()); + lines.push(""); + lines.push("### Partner"); + lines.push(""); + lines.push(sliceRow.planning_meeting.partner.trim()); + lines.push(""); + lines.push("### Combatant"); + lines.push(""); + lines.push(sliceRow.planning_meeting.combatant.trim()); + lines.push(""); + lines.push("### Architect"); + lines.push(""); + lines.push(sliceRow.planning_meeting.architect.trim()); + lines.push(""); + lines.push("### Moderator"); + lines.push(""); + lines.push(sliceRow.planning_meeting.moderator.trim()); + lines.push(""); + lines.push("### Recommended Route"); + lines.push(""); + lines.push(sliceRow.planning_meeting.recommendedRoute); + lines.push(""); + lines.push("### Confidence"); + lines.push(""); + lines.push(sliceRow.planning_meeting.confidenceSummary.trim()); + lines.push(""); + } + + if (sliceRow.proof_level.trim()) { + lines.push("## Proof Level"); + lines.push(""); + lines.push(`- This slice proves: ${sliceRow.proof_level.trim()}`); + lines.push(""); + } + + if (sliceRow.integration_closure.trim()) { + lines.push("## Integration Closure"); + lines.push(""); + lines.push(sliceRow.integration_closure.trim()); + lines.push(""); + } + + if (sliceRow.observability_impact.trim()) { + lines.push("## Observability / Diagnostics"); + lines.push(""); + lines.push(sliceRow.observability_impact.trim()); + lines.push(""); + } + lines.push("## Tasks"); for (const task of taskRows) { @@ -121,6 +209,54 @@ export function renderRoadmapContent(milestoneRow: MilestoneRow, sliceRows: Slic lines.push("## Vision"); lines.push(milestoneRow.vision || milestoneRow.title || "TBD"); lines.push(""); + + if (milestoneRow.vision_meeting) { + lines.push("## Vision Alignment Meeting"); + lines.push(""); + lines.push("### Trigger"); + lines.push(milestoneRow.vision_meeting.trigger); + lines.push(""); + lines.push("### Product Manager"); + lines.push(milestoneRow.vision_meeting.pm); + lines.push(""); + lines.push("### User Advocate"); + lines.push(milestoneRow.vision_meeting.userAdvocate); + lines.push(""); + lines.push("### Customer Panel"); + lines.push(milestoneRow.vision_meeting.customerPanel); + lines.push(""); + lines.push("### Business"); + lines.push(milestoneRow.vision_meeting.business); + lines.push(""); + lines.push("### Researcher"); + lines.push(milestoneRow.vision_meeting.researcher); + lines.push(""); + lines.push("### Delivery Lead"); + lines.push(milestoneRow.vision_meeting.deliveryLead); + lines.push(""); + lines.push("### Partner"); + lines.push(milestoneRow.vision_meeting.partner); + lines.push(""); + lines.push("### Combatant"); + lines.push(milestoneRow.vision_meeting.combatant); + lines.push(""); + lines.push("### Architect"); + lines.push(milestoneRow.vision_meeting.architect); + lines.push(""); + lines.push("### Moderator"); + lines.push(milestoneRow.vision_meeting.moderator); + lines.push(""); + lines.push("### Weighted Synthesis"); + lines.push(milestoneRow.vision_meeting.weightedSynthesis); + lines.push(""); + lines.push("### Confidence By Area"); + lines.push(milestoneRow.vision_meeting.confidenceByArea); + lines.push(""); + lines.push("### Recommended Route"); + lines.push(milestoneRow.vision_meeting.recommendedRoute); + lines.push(""); + } + lines.push("## Slice Overview"); lines.push("| ID | Slice | Risk | Depends | Done | After this |"); lines.push("|----|-------|------|---------|------|------------|"); diff --git a/src/resources/extensions/sf/workflow-templates.ts b/src/resources/extensions/sf/workflow-templates.ts index 32df12d0b..5fe8bbf4a 100644 --- a/src/resources/extensions/sf/workflow-templates.ts +++ b/src/resources/extensions/sf/workflow-templates.ts @@ -48,6 +48,19 @@ export interface TemplateMatch { matchedTrigger?: string; } +export interface MilestoneTemplateSliceScaffold { + sliceId: string; + title: string; + risk: string; + depends: string[]; + demo: string; + goal: string; + successCriteria: string; + proofLevel: string; + integrationClosure: string; + observabilityImpact: string; +} + // ─── Registry Cache ────────────────────────────────────────────────────────── let cachedRegistry: TemplateRegistry | null = null; @@ -259,3 +272,131 @@ export function loadWorkflowTemplate(templateId: string): string | null { return readFileSync(filePath, "utf-8"); } + +export function scaffoldMilestoneSlices(templateId: string): MilestoneTemplateSliceScaffold[] | null { + const match = resolveByName(templateId); + if (!match) return null; + + switch (match.id) { + case "bugfix": + case "hotfix": + return [ + { + sliceId: "S01", + title: "Reproduce and bound the failure", + risk: "high", + depends: [], + demo: "The team can reproduce the failure and point to the exact broken boundary.", + goal: "Capture the failing boundary, root-cause scope, and the contract that proves the bug is real.", + successCriteria: "A concrete repro path and failing proof exist before any fix is treated as valid.", + proofLevel: "runtime repro", + integrationClosure: "The same repro path is available for post-fix verification.", + observabilityImpact: "Logs, traces, or repro notes make the failure boundary explicit.", + }, + { + sliceId: "S02", + title: "Implement the fix", + risk: "medium", + depends: ["S01"], + demo: "The broken behavior is corrected without widening scope.", + goal: "Change the implementation to satisfy the failing contract and remove the root cause.", + successCriteria: "The contract test passes and the fix path is minimal and purpose-aligned.", + proofLevel: "contract + integration", + integrationClosure: "Callers continue to work on the corrected boundary with no workaround branch left behind.", + observabilityImpact: "Failures surface clearly if the same path regresses.", + }, + { + sliceId: "S03", + title: "Verify and guard against regression", + risk: "low", + depends: ["S02"], + demo: "The solved boundary is proven end-to-end and the fix stays in place.", + goal: "Re-run the repro, add regression coverage, and verify the shipped behavior.", + successCriteria: "The original failure path is green and regression evidence is captured.", + proofLevel: "runtime + regression", + integrationClosure: "Verification proves the real consumer path, not just a unit-level surrogate.", + observabilityImpact: "Follow-up diagnostics and regression checks are documented.", + }, + ]; + case "refactor": + return [ + { + sliceId: "S01", + title: "Map the current boundary", + risk: "medium", + depends: [], + demo: "The team can explain what is being reshaped and what must not regress.", + goal: "Capture current responsibilities, callers, and the contract that must survive the refactor.", + successCriteria: "The pre-refactor contract and blast radius are explicit before code movement starts.", + proofLevel: "contract inventory", + integrationClosure: "Downstream boundaries and unchanged behavior are named up front.", + observabilityImpact: "Audit notes capture the before-state and likely regression surfaces.", + }, + { + sliceId: "S02", + title: "Restructure the implementation", + risk: "medium", + depends: ["S01"], + demo: "The code is reorganized into the intended shape without changing the consumer contract.", + goal: "Apply the structural change while preserving verified behavior.", + successCriteria: "The code layout improves and the existing contract stays green.", + proofLevel: "contract + quality", + integrationClosure: "Callers, imports, and runtime paths remain coherent after the move.", + observabilityImpact: "Lint/type/test failures clearly point to missed migration edges.", + }, + { + sliceId: "S03", + title: "Stabilize and verify", + risk: "low", + depends: ["S02"], + demo: "The new structure is validated and safe to build on.", + goal: "Prove the refactor is complete, regression-safe, and simpler to extend.", + successCriteria: "Quality gates and targeted regression checks pass on the new structure.", + proofLevel: "quality + regression", + integrationClosure: "No stale compatibility shims or duplicate paths remain without reason.", + observabilityImpact: "Future maintainers can see the intended shape and the critical checks.", + }, + ]; + case "small-feature": + return [ + { + sliceId: "S01", + title: "Define the user-facing contract", + risk: "medium", + depends: [], + demo: "The feature outcome, acceptance path, and boundary are clear before implementation.", + goal: "State what the feature must do, for whom, and how success will be verified.", + successCriteria: "A concrete contract exists for the intended feature behavior.", + proofLevel: "contract", + integrationClosure: "The consumer path and integration boundary are identified before code is written.", + observabilityImpact: "Acceptance criteria and diagnostics are explicit.", + }, + { + sliceId: "S02", + title: "Implement the core behavior", + risk: "medium", + depends: ["S01"], + demo: "The feature works on its main path.", + goal: "Build the smallest complete implementation that satisfies the agreed contract.", + successCriteria: "The main feature path works and the contract test passes.", + proofLevel: "contract + integration", + integrationClosure: "Primary callers and dependent boundaries work together on the new path.", + observabilityImpact: "Failures in the feature path are diagnosable.", + }, + { + sliceId: "S03", + title: "Polish, verify, and close", + risk: "low", + depends: ["S02"], + demo: "The feature is verified, documented, and safe to hand over.", + goal: "Finish verification, edge handling, and any required docs or follow-up notes.", + successCriteria: "The feature is truthfully ready for users or the next milestone.", + proofLevel: "verification", + integrationClosure: "The end-to-end user path is proven and any gaps are recorded explicitly.", + observabilityImpact: "Tests/docs/notes make the feature supportable.", + }, + ]; + default: + return null; + } +} diff --git a/src/resources/extensions/subagent/extension-manifest.json b/src/resources/extensions/subagent/extension-manifest.json index cb71f8f86..708190db2 100644 --- a/src/resources/extensions/subagent/extension-manifest.json +++ b/src/resources/extensions/subagent/extension-manifest.json @@ -6,8 +6,8 @@ "tier": "bundled", "requires": { "platform": ">=2.29.0" }, "provides": { - "tools": ["subagent"], + "tools": ["subagent", "await_subagent", "cancel_subagent"], "commands": ["subagent"], - "hooks": ["session_shutdown"] + "hooks": ["session_start", "session_before_switch", "session_shutdown"] } } diff --git a/src/resources/extensions/subagent/index.ts b/src/resources/extensions/subagent/index.ts index 118e40ab6..05ea39c32 100644 --- a/src/resources/extensions/subagent/index.ts +++ b/src/resources/extensions/subagent/index.ts @@ -22,7 +22,7 @@ import type { Message } from "@singularity-forge/pi-ai"; import { StringEnum } from "@singularity-forge/pi-ai"; import { type ExtensionAPI, getMarkdownTheme } from "@singularity-forge/pi-coding-agent"; import { Container, Markdown, Spacer, Text } from "@singularity-forge/pi-tui"; -import { Type } from "@sinclair/typebox"; +import { Static, Type } from "@sinclair/typebox"; import { formatTokenCount } from "../shared/mod.js"; import { getCurrentPhase } from "../shared/sf-phase-state.js"; import { type AgentConfig, type AgentScope, discoverAgents } from "./agents.js"; @@ -37,6 +37,7 @@ import { import { registerWorker, updateWorker } from "./worker-registry.js"; import { loadEffectiveSFPreferences } from "../sf/preferences.js"; import { CmuxClient, shellEscape } from "../cmux/index.js"; +import { SubagentBackgroundJobManager } from "./background-jobs.js"; const MAX_PARALLEL_TASKS = 8; const MAX_CONCURRENCY = 4; @@ -205,6 +206,285 @@ interface SubagentDetails { results: SingleResult[]; } +interface SubagentToolResult extends AgentToolResult { + isError?: boolean; +} + +interface SubagentExecutionContext { + defaultCwd: string; + agents: AgentConfig[]; + agentScope: AgentScope; + projectAgentsDir: string | null; + params: Static; + signal: AbortSignal | undefined; + onUpdate: OnUpdateCallback | undefined; + cmuxClient: CmuxClient; + cmuxSplitsEnabled: boolean; + useIsolation: boolean; +} + +function getPrimaryTextContent(result: AgentToolResult): string { + const first = result.content.find((item) => item.type === "text"); + return first?.type === "text" ? first.text : "(no output)"; +} + +function summarizeBackgroundInvocation(params: Static): string { + if (params.chain && params.chain.length > 0) return `chain:${params.chain.map((step) => step.agent).join("→")}`; + if (params.tasks && params.tasks.length > 0) return `parallel:${params.tasks.map((task) => task.agent).join(",")}`; + if (params.agent) return `single:${params.agent}`; + return "subagent"; +} + +async function executeSubagentInvocation({ + defaultCwd, + agents, + agentScope, + projectAgentsDir, + params, + signal, + onUpdate, + cmuxClient, + cmuxSplitsEnabled, + useIsolation, +}: SubagentExecutionContext): Promise { + const makeDetails = + (mode: "single" | "parallel" | "chain") => + (results: SingleResult[]): SubagentDetails => ({ + mode, + agentScope, + projectAgentsDir, + results, + }); + + if (params.chain && params.chain.length > 0) { + const results: SingleResult[] = []; + let previousOutput = ""; + + for (let i = 0; i < params.chain.length; i++) { + const step = params.chain[i]; + const taskWithContext = step.task.replace(/\{previous\}/g, previousOutput); + const chainUpdate: OnUpdateCallback | undefined = onUpdate + ? (partial) => { + const currentResult = partial.details?.results[0]; + if (!currentResult) return; + onUpdate({ + content: partial.content, + details: makeDetails("chain")([...results, currentResult]), + }); + } + : undefined; + + const result = await runSingleAgent( + defaultCwd, + agents, + step.agent, + taskWithContext, + step.cwd, + i + 1, + signal, + chainUpdate, + makeDetails("chain"), + step.model ?? params.model, + ); + results.push(result); + + const isError = result.exitCode !== 0 || result.stopReason === "error" || result.stopReason === "aborted"; + if (isError) { + const errorMsg = result.errorMessage || result.stderr || getFinalOutput(result.messages) || "(no output)"; + return { + content: [{ type: "text", text: `Chain stopped at step ${i + 1} (${step.agent}): ${errorMsg}` }], + details: makeDetails("chain")(results), + isError: true, + }; + } + previousOutput = getFinalOutput(result.messages); + } + + return { + content: [{ type: "text", text: getFinalOutput(results[results.length - 1].messages) || "(no output)" }], + details: makeDetails("chain")(results), + }; + } + + if (params.tasks && params.tasks.length > 0) { + if (params.tasks.length > MAX_PARALLEL_TASKS) { + return { + content: [{ type: "text", text: `Too many parallel tasks (${params.tasks.length}). Max is ${MAX_PARALLEL_TASKS}.` }], + details: makeDetails("parallel")([]), + isError: true, + }; + } + + const allResults: SingleResult[] = new Array(params.tasks.length); + for (let i = 0; i < params.tasks.length; i++) { + allResults[i] = { + agent: params.tasks[i].agent, + agentSource: "unknown", + task: params.tasks[i].task, + exitCode: -1, + messages: [], + stderr: "", + usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, cost: 0, contextTokens: 0, turns: 0 }, + }; + } + + const emitParallelUpdate = () => { + if (!onUpdate) return; + const running = allResults.filter((r) => r.exitCode === -1).length; + const done = allResults.filter((r) => r.exitCode !== -1).length; + onUpdate({ + content: [{ type: "text", text: `Parallel: ${done}/${allResults.length} done, ${running} running...` }], + details: makeDetails("parallel")([...allResults]), + }); + }; + + const MAX_RETRIES = 1; + const batchId = crypto.randomUUID(); + const batchSize = params.tasks.length; + const gridSurfaces = cmuxSplitsEnabled + ? await cmuxClient.createGridLayout(Math.min(batchSize, MAX_CONCURRENCY)) + : []; + const results = await mapWithConcurrencyLimit(params.tasks, MAX_CONCURRENCY, async (t, index) => { + const workerId = registerWorker(t.agent, t.task, index, batchSize, batchId); + const taskModelOverride = t.model ?? params.model; + const runTask = () => + cmuxSplitsEnabled + ? runSingleAgentInCmuxSplit( + cmuxClient, + gridSurfaces[index] ?? (index % 2 === 0 ? "right" : "down"), + defaultCwd, + agents, + t.agent, + t.task, + t.cwd, + undefined, + signal, + (partial) => { + if (partial.details?.results[0]) { + allResults[index] = partial.details.results[0]; + emitParallelUpdate(); + } + }, + makeDetails("parallel"), + taskModelOverride, + ) + : runSingleAgent( + defaultCwd, + agents, + t.agent, + t.task, + t.cwd, + undefined, + signal, + (partial) => { + if (partial.details?.results[0]) { + allResults[index] = partial.details.results[0]; + emitParallelUpdate(); + } + }, + makeDetails("parallel"), + taskModelOverride, + ); + let result = await runTask(); + const isFailed = result.exitCode !== 0 || (result.messages.length === 0 && !signal?.aborted); + if (isFailed && MAX_RETRIES > 0 && !signal?.aborted) { + result = await runTask(); + } + + updateWorker(workerId, result.exitCode === 0 ? "completed" : "failed"); + allResults[index] = result; + emitParallelUpdate(); + return result; + }); + + const successCount = results.filter((r) => r.exitCode === 0).length; + const summaries = results.map((r) => { + const isError = r.exitCode !== 0 || r.stopReason === "error" || r.stopReason === "aborted"; + const output = isError + ? (r.errorMessage || r.stderr || getFinalOutput(r.messages) || "(no output)") + : getFinalOutput(r.messages); + return `[${r.agent}] ${r.exitCode === 0 ? "completed" : `failed (exit ${r.exitCode})`}: ${output || "(no output)"}`; + }); + return { + content: [{ type: "text", text: `Parallel: ${successCount}/${results.length} succeeded\n\n${summaries.join("\n\n")}` }], + details: makeDetails("parallel")(results), + }; + } + + if (params.agent && params.task) { + let isolation: IsolationEnvironment | null = null; + let mergeResult: MergeResult | undefined; + try { + const effectiveCwd = params.cwd ?? defaultCwd; + if (useIsolation) { + const taskId = crypto.randomUUID(); + isolation = await createIsolation(effectiveCwd, taskId, readIsolationMode()); + } + + const result = cmuxSplitsEnabled + ? await runSingleAgentInCmuxSplit( + cmuxClient, + "right", + defaultCwd, + agents, + params.agent, + params.task, + isolation ? isolation.workDir : params.cwd, + undefined, + signal, + onUpdate, + makeDetails("single"), + params.model, + ) + : await runSingleAgent( + defaultCwd, + agents, + params.agent, + params.task, + isolation ? isolation.workDir : params.cwd, + undefined, + signal, + onUpdate, + makeDetails("single"), + params.model, + ); + + if (isolation) { + const patches = await isolation.captureDelta(); + if (patches.length > 0) mergeResult = await mergeDeltaPatches(effectiveCwd, patches); + } + + const isError = result.exitCode !== 0 || result.stopReason === "error" || result.stopReason === "aborted"; + if (isError) { + const errorMsg = result.errorMessage || result.stderr || getFinalOutput(result.messages) || "(no output)"; + return { + content: [{ type: "text", text: `Agent ${result.stopReason || "failed"}: ${errorMsg}` }], + details: makeDetails("single")([result]), + isError: true, + }; + } + + let outputText = getFinalOutput(result.messages) || "(no output)"; + if (mergeResult && !mergeResult.success) { + outputText += `\n\n⚠ Patch merge failed: ${mergeResult.error || "unknown error"}`; + } + return { + content: [{ type: "text", text: outputText }], + details: makeDetails("single")([result]), + }; + } finally { + if (isolation) await isolation.cleanup(); + } + } + + const available = agents.map((a) => `${a.name} (${a.source})`).join(", ") || "none"; + return { + content: [{ type: "text", text: `Invalid parameters. Available agents: ${available}` }], + details: makeDetails("single")([]), + isError: true, + }; +} + function getFinalOutput(messages: Message[]): string { for (let i = messages.length - 1; i >= 0; i--) { const msg = messages[i]; @@ -617,6 +897,14 @@ const AgentScopeSchema = StringEnum(["user", "project", "both"] as const, { const SubagentParams = Type.Object({ agent: Type.Optional(Type.String({ description: "Name of the agent to invoke (for single mode)" })), task: Type.Optional(Type.String({ description: "Task to delegate (for single mode)" })), + background: Type.Optional( + Type.Boolean({ + description: + "Launch the subagent run in the background for later retrieval via await_subagent. " + + "Useful for longer autonomous test or research waves.", + default: false, + }), + ), model: Type.Optional(Type.String({ description: "Override the agent's default model. Applies to single mode, or as a default for all tasks/chain steps unless they set their own `model`.", })), @@ -639,7 +927,53 @@ const SubagentParams = Type.Object({ }); export default function (pi: ExtensionAPI) { + let backgroundJobs: SubagentBackgroundJobManager | null = null; + + function getBackgroundJobs(): SubagentBackgroundJobManager { + if (!backgroundJobs) throw new Error("Subagent background job manager not initialized. Wait for session_start."); + return backgroundJobs; + } + + pi.on("session_start", async () => { + backgroundJobs = new SubagentBackgroundJobManager({ + onJobComplete: (job) => { + if (job.awaited) return; + const elapsed = ((Date.now() - job.startTime) / 1000).toFixed(1); + const output = job.result + ? getPrimaryTextContent(job.result) + : `Error: ${job.errorText ?? "unknown error"}`; + const maxLen = 2000; + const truncated = output.length > maxLen + ? `${output.slice(0, maxLen)}\n\n[... truncated, use await_subagent for full output]` + : output; + pi.sendMessage( + { + customType: "subagent_job_result", + content: [ + `**Background subagent ${job.status}: ${job.id}** (${job.label}, ${elapsed}s)`, + "", + truncated, + ].join("\n"), + display: true, + }, + { deliverAs: "followUp" }, + ); + }, + }); + }); + + pi.on("session_before_switch", async () => { + if (!backgroundJobs) return; + for (const job of backgroundJobs.getRunningJobs()) { + backgroundJobs.cancel(job.id); + } + }); + pi.on("session_shutdown", async () => { + if (backgroundJobs) { + backgroundJobs.shutdown(); + backgroundJobs = null; + } await stopLiveSubagents(); }); @@ -742,262 +1076,46 @@ export default function (pi: ExtensionAPI) { }; } } - - if (params.chain && params.chain.length > 0) { - const results: SingleResult[] = []; - let previousOutput = ""; - - for (let i = 0; i < params.chain.length; i++) { - const step = params.chain[i]; - const taskWithContext = step.task.replace(/\{previous\}/g, previousOutput); - - // Create update callback that includes all previous results - const chainUpdate: OnUpdateCallback | undefined = onUpdate - ? (partial) => { - // Combine completed results with current streaming result - const currentResult = partial.details?.results[0]; - if (currentResult) { - const allResults = [...results, currentResult]; - onUpdate({ - content: partial.content, - details: makeDetails("chain")(allResults), - }); - } - } - : undefined; - - const result = await runSingleAgent( - ctx.cwd, + if (params.background) { + const manager = getBackgroundJobs(); + const jobId = manager.register(summarizeBackgroundInvocation(params), (backgroundSignal) => + executeSubagentInvocation({ + defaultCwd: ctx.cwd, agents, - step.agent, - taskWithContext, - step.cwd, - i + 1, - signal, - chainUpdate, - makeDetails("chain"), - step.model ?? params.model, - ); - results.push(result); - - const isError = - result.exitCode !== 0 || result.stopReason === "error" || result.stopReason === "aborted"; - if (isError) { - const errorMsg = - result.errorMessage || result.stderr || getFinalOutput(result.messages) || "(no output)"; - return { - content: [{ type: "text", text: `Chain stopped at step ${i + 1} (${step.agent}): ${errorMsg}` }], - details: makeDetails("chain")(results), - isError: true, - }; - } - previousOutput = getFinalOutput(result.messages); - } + agentScope, + projectAgentsDir: discovery.projectAgentsDir, + params: { ...params, confirmProjectAgents: false, background: false }, + signal: backgroundSignal, + onUpdate: undefined, + cmuxClient, + cmuxSplitsEnabled, + useIsolation, + }), + ); return { - content: [{ type: "text", text: getFinalOutput(results[results.length - 1].messages) || "(no output)" }], - details: makeDetails("chain")(results), + content: [{ + type: "text", + text: + `Background subagent job started: **${jobId}**\n` + + `Invocation: \`${summarizeBackgroundInvocation(params)}\`\n\n` + + "Use `await_subagent` to retrieve the result or `cancel_subagent` to stop it.", + }], + details: undefined, }; } - if (params.tasks && params.tasks.length > 0) { - if (params.tasks.length > MAX_PARALLEL_TASKS) - return { - content: [ - { - type: "text", - text: `Too many parallel tasks (${params.tasks.length}). Max is ${MAX_PARALLEL_TASKS}.`, - }, - ], - details: makeDetails("parallel")([]), - }; - - // Track all results for streaming updates - const allResults: SingleResult[] = new Array(params.tasks.length); - - // Initialize placeholder results - for (let i = 0; i < params.tasks.length; i++) { - allResults[i] = { - agent: params.tasks[i].agent, - agentSource: "unknown", - task: params.tasks[i].task, - exitCode: -1, // -1 = still running - messages: [], - stderr: "", - usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, cost: 0, contextTokens: 0, turns: 0 }, - }; - } - - const emitParallelUpdate = () => { - if (onUpdate) { - const running = allResults.filter((r) => r.exitCode === -1).length; - const done = allResults.filter((r) => r.exitCode !== -1).length; - onUpdate({ - content: [ - { type: "text", text: `Parallel: ${done}/${allResults.length} done, ${running} running...` }, - ], - details: makeDetails("parallel")([...allResults]), - }); - } - }; - - const MAX_RETRIES = 1; // Retry failed tasks once - const batchId = crypto.randomUUID(); - const batchSize = params.tasks.length; - // Pre-create a grid layout for cmux splits so agents get a clean tiled arrangement - const gridSurfaces = cmuxSplitsEnabled - ? await cmuxClient.createGridLayout(Math.min(batchSize, MAX_CONCURRENCY)) - : []; - const results = await mapWithConcurrencyLimit(params.tasks, MAX_CONCURRENCY, async (t, index) => { - const workerId = registerWorker(t.agent, t.task, index, batchSize, batchId); - const taskModelOverride = t.model ?? params.model; - const runTask = () => cmuxSplitsEnabled - ? runSingleAgentInCmuxSplit( - cmuxClient, - gridSurfaces[index] ?? (index % 2 === 0 ? "right" : "down"), - ctx.cwd, - agents, - t.agent, - t.task, - t.cwd, - undefined, - signal, - (partial) => { - if (partial.details?.results[0]) { - allResults[index] = partial.details.results[0]; - emitParallelUpdate(); - } - }, - makeDetails("parallel"), - taskModelOverride, - ) - : runSingleAgent( - ctx.cwd, - agents, - t.agent, - t.task, - t.cwd, - undefined, - signal, - (partial) => { - if (partial.details?.results[0]) { - allResults[index] = partial.details.results[0]; - emitParallelUpdate(); - } - }, - makeDetails("parallel"), - taskModelOverride, - ); - let result = await runTask(); - - // Auto-retry failed tasks (likely API rate limit or transient error) - const isFailed = result.exitCode !== 0 || (result.messages.length === 0 && !signal?.aborted); - if (isFailed && MAX_RETRIES > 0 && !signal?.aborted) { - result = await runTask(); - } - - updateWorker(workerId, result.exitCode === 0 ? "completed" : "failed"); - allResults[index] = result; - emitParallelUpdate(); - return result; - }); - - const successCount = results.filter((r) => r.exitCode === 0).length; - const summaries = results.map((r) => { - const isError = r.exitCode !== 0 || r.stopReason === "error" || r.stopReason === "aborted"; - const output = isError - ? (r.errorMessage || r.stderr || getFinalOutput(r.messages) || "(no output)") - : getFinalOutput(r.messages); - return `[${r.agent}] ${r.exitCode === 0 ? "completed" : `failed (exit ${r.exitCode})`}: ${output || "(no output)"}`; - }); - return { - content: [ - { - type: "text", - text: `Parallel: ${successCount}/${results.length} succeeded\n\n${summaries.join("\n\n")}`, - }, - ], - details: makeDetails("parallel")(results), - }; - } - - if (params.agent && params.task) { - let isolation: IsolationEnvironment | null = null; - let mergeResult: MergeResult | undefined; - try { - const effectiveCwd = params.cwd ?? ctx.cwd; - - if (useIsolation) { - const taskId = crypto.randomUUID(); - isolation = await createIsolation(effectiveCwd, taskId, isolationMode); - } - - const result = cmuxSplitsEnabled - ? await runSingleAgentInCmuxSplit( - cmuxClient, - "right", - ctx.cwd, - agents, - params.agent, - params.task, - isolation ? isolation.workDir : params.cwd, - undefined, - signal, - onUpdate, - makeDetails("single"), - params.model, - ) - : await runSingleAgent( - ctx.cwd, - agents, - params.agent, - params.task, - isolation ? isolation.workDir : params.cwd, - undefined, - signal, - onUpdate, - makeDetails("single"), - params.model, - ); - - // Capture and merge delta if isolated - if (isolation) { - const patches = await isolation.captureDelta(); - if (patches.length > 0) { - mergeResult = await mergeDeltaPatches(effectiveCwd, patches); - } - } - - const isError = result.exitCode !== 0 || result.stopReason === "error" || result.stopReason === "aborted"; - if (isError) { - const errorMsg = - result.errorMessage || result.stderr || getFinalOutput(result.messages) || "(no output)"; - return { - content: [{ type: "text", text: `Agent ${result.stopReason || "failed"}: ${errorMsg}` }], - details: makeDetails("single")([result]), - isError: true, - }; - } - - let outputText = getFinalOutput(result.messages) || "(no output)"; - if (mergeResult && !mergeResult.success) { - outputText += `\n\n⚠ Patch merge failed: ${mergeResult.error || "unknown error"}`; - } - return { - content: [{ type: "text", text: outputText }], - details: makeDetails("single")([result]), - }; - } finally { - if (isolation) { - await isolation.cleanup(); - } - } - } - - const available = agents.map((a) => `${a.name} (${a.source})`).join(", ") || "none"; - return { - content: [{ type: "text", text: `Invalid parameters. Available agents: ${available}` }], - details: makeDetails("single")([]), - }; + return executeSubagentInvocation({ + defaultCwd: ctx.cwd, + agents, + agentScope, + projectAgentsDir: discovery.projectAgentsDir, + params, + signal, + onUpdate, + cmuxClient, + cmuxSplitsEnabled, + useIsolation, + }); }, renderCall(args, theme) { @@ -1315,4 +1433,83 @@ export default function (pi: ExtensionAPI) { return new Text(text?.type === "text" ? text.text : "(no output)", 0, 0); }, }); + + pi.registerTool({ + name: "await_subagent", + label: "Await Subagent", + description: "Wait for a background subagent job to complete and return its full result.", + parameters: Type.Object({ + job_id: Type.String({ description: "Background subagent job ID (for example sub_a1b2c3d4)" }), + timeout: Type.Optional(Type.Number({ + description: + "Maximum seconds to wait before returning control. Defaults to 120. " + + "If the timeout is reached the subagent keeps running in the background.", + })), + }), + async execute(_toolCallId, params) { + const manager = getBackgroundJobs(); + const job = manager.getJob(params.job_id); + if (!job) { + return { content: [{ type: "text", text: `Background subagent job not found: ${params.job_id}` }], details: undefined }; + } + + manager.suppressFollowUp(job.id); + if (job.status !== "running") { + if (job.result) return job.result; + return { + content: [{ type: "text", text: `Background subagent ${job.status}: ${job.errorText ?? "unknown error"}` }], + details: undefined, + isError: job.status !== "completed", + }; + } + + const timeoutSeconds = params.timeout ?? 120; + const timeoutMs = timeoutSeconds * 1000; + const TIMEOUT_SENTINEL = Symbol("timeout"); + const timeoutPromise = new Promise((resolve) => { + const timer = setTimeout(() => resolve(TIMEOUT_SENTINEL), timeoutMs); + if (typeof timer === "object" && "unref" in timer) timer.unref(); + }); + const raceResult = await Promise.race([job.promise.then(() => "completed" as const), timeoutPromise]); + if (raceResult === TIMEOUT_SENTINEL) { + const elapsed = ((Date.now() - job.startTime) / 1000).toFixed(1); + return { + content: [{ + type: "text", + text: + `Background subagent still running: **${job.id}** (${job.label}, ${elapsed}s)\n\n` + + `Timed out after ${timeoutSeconds}s. Call \`await_subagent\` again later or \`cancel_subagent\` to stop it.`, + }], + details: undefined, + }; + } + + const finished = manager.getJob(job.id); + if (finished?.result) return finished.result; + return { + content: [{ type: "text", text: `Background subagent ${finished?.status ?? "failed"}: ${finished?.errorText ?? "unknown error"}` }], + details: undefined, + isError: true, + }; + }, + }); + + pi.registerTool({ + name: "cancel_subagent", + label: "Cancel Subagent", + description: "Cancel a running background subagent job by ID.", + parameters: Type.Object({ + job_id: Type.String({ description: "Background subagent job ID (for example sub_a1b2c3d4)" }), + }), + async execute(_toolCallId, params) { + const manager = getBackgroundJobs(); + const result = manager.cancel(params.job_id); + const messages: Record = { + cancelled: `Background subagent ${params.job_id} has been cancelled.`, + not_found: `Background subagent ${params.job_id} not found.`, + already_completed: `Background subagent ${params.job_id} has already completed (or failed/cancelled).`, + }; + return { content: [{ type: "text", text: messages[result] ?? `Unknown result: ${result}` }], details: undefined }; + }, + }); } diff --git a/src/tests/provider.test.ts b/src/tests/provider.test.ts index 147cbe497..5fc36752f 100644 --- a/src/tests/provider.test.ts +++ b/src/tests/provider.test.ts @@ -2,7 +2,7 @@ * Tests for search provider selection, preference persistence, and key helpers. * * Covers: - * - All 8 resolveSearchProvider() scenarios (keys × preferences) + * - resolveSearchProvider() scenarios (keys × preferences) * - Preference get/set round-trip via AuthStorage * - Key helper functions */ @@ -76,6 +76,26 @@ test('resolveSearchProvider returns brave when only BRAVE_API_KEY is set', async }) }) +test('resolveSearchProvider returns serper when only SERPER_API_KEY is set', async () => { + const { resolveSearchProvider } = await import( + '../resources/extensions/search-the-web/provider.ts' + ) + withEnv({ TAVILY_API_KEY: undefined, BRAVE_API_KEY: undefined, SERPER_API_KEY: 'serper-test' }, () => { + const result = resolveSearchProvider('auto') + assert.equal(result, 'serper') + }) +}) + +test('resolveSearchProvider returns exa when only EXA_API_KEY is set', async () => { + const { resolveSearchProvider } = await import( + '../resources/extensions/search-the-web/provider.ts' + ) + withEnv({ TAVILY_API_KEY: undefined, BRAVE_API_KEY: undefined, SERPER_API_KEY: undefined, EXA_API_KEY: 'exa-test' }, () => { + const result = resolveSearchProvider('auto') + assert.equal(result, 'exa') + }) +}) + test('resolveSearchProvider returns tavily when both keys set and preference is auto', async () => { const { resolveSearchProvider } = await import( '../resources/extensions/search-the-web/provider.ts' @@ -150,6 +170,14 @@ test('resolveSearchProvider falls back to other provider when preferred key miss const result = resolveSearchProvider('brave') assert.equal(result, 'tavily', 'falls back to tavily when brave preferred but key missing') }) + withEnv({ TAVILY_API_KEY: undefined, BRAVE_API_KEY: undefined, SERPER_API_KEY: 'serper-test' }, () => { + const result = resolveSearchProvider('brave') + assert.equal(result, 'serper', 'falls back to serper when brave preferred but only serper key exists') + }) + withEnv({ TAVILY_API_KEY: undefined, BRAVE_API_KEY: undefined, SERPER_API_KEY: undefined, EXA_API_KEY: 'exa-test' }, () => { + const result = resolveSearchProvider('brave') + assert.equal(result, 'exa', 'falls back to exa when brave preferred but only exa key exists') + }) }) // ═══════════════════════════════════════════════════════════════════════════ @@ -244,11 +272,35 @@ test('getBraveApiKey reads from process.env.BRAVE_API_KEY', async () => { }) }) +test('getSerperApiKey reads from process.env.SERPER_API_KEY', async () => { + const { getSerperApiKey } = await import( + '../resources/extensions/search-the-web/provider.ts' + ) + withEnv({ SERPER_API_KEY: 'serper-test-key' }, () => { + assert.equal(getSerperApiKey(), 'serper-test-key') + }) + withEnv({ SERPER_API_KEY: undefined }, () => { + assert.equal(getSerperApiKey(), '') + }) +}) + +test('getExaApiKey reads from process.env.EXA_API_KEY', async () => { + const { getExaApiKey } = await import( + '../resources/extensions/search-the-web/provider.ts' + ) + withEnv({ EXA_API_KEY: 'exa-test-key' }, () => { + assert.equal(getExaApiKey(), 'exa-test-key') + }) + withEnv({ EXA_API_KEY: undefined }, () => { + assert.equal(getExaApiKey(), '') + }) +}) + // ═══════════════════════════════════════════════════════════════════════════ // 4. Boundary contract — S01→S02 public API surface // ═══════════════════════════════════════════════════════════════════════════ -test('provider.ts exports exactly the 7 expected functions', async () => { +test('provider.ts exports exactly the expected functions', async () => { const provider = await import( '../resources/extensions/search-the-web/provider.ts' ) @@ -259,6 +311,8 @@ test('provider.ts exports exactly the 7 expected functions', async () => { 'getBraveApiKey', 'braveHeaders', 'getOllamaApiKey', + 'getSerperApiKey', + 'getExaApiKey', 'getSearchProviderPreference', 'setSearchProviderPreference', ] as const @@ -275,6 +329,6 @@ test('provider.ts exports exactly the 7 expected functions', async () => { assert.deepEqual( actualFunctions.sort(), [...expectedExports].sort(), - 'provider.ts should export exactly the 7 expected functions (no extra function exports)', + 'provider.ts should export exactly the expected functions (no extra function exports)', ) }) diff --git a/src/tests/search-provider-command.test.ts b/src/tests/search-provider-command.test.ts index b246a1928..e3fd2457e 100644 --- a/src/tests/search-provider-command.test.ts +++ b/src/tests/search-provider-command.test.ts @@ -2,7 +2,7 @@ * Contract tests for /search-provider slash command. * * Covers: - * - Direct arg application (tavily, brave, auto) + * - Direct arg application (tavily, brave, serper, exa, auto) * - Interactive select UI when no arg given * - Cancel (Esc) produces no side effects * - Invalid arg falls back to interactive select @@ -18,21 +18,31 @@ import { tmpdir } from 'node:os' // ─── Helpers (reused from provider.test.ts pattern) ──────────────────────── -function withEnv( +const SEARCH_ENV_KEYS = [ + 'TAVILY_API_KEY', + 'BRAVE_API_KEY', + 'SERPER_API_KEY', + 'EXA_API_KEY', + 'OLLAMA_API_KEY', +] as const + +async function withEnv( vars: Record, - fn: () => void, -): void { + fn: () => T | Promise, +): Promise { const originals: Record = {} - for (const key of Object.keys(vars)) { + const keys = new Set([...SEARCH_ENV_KEYS, ...Object.keys(vars)]) + for (const key of keys) { originals[key] = process.env[key] - if (vars[key] === undefined) { + const value = vars[key] + if (value === undefined) { delete process.env[key] } else { - process.env[key] = vars[key] + process.env[key] = value } } try { - fn() + return await fn() } finally { for (const key of Object.keys(originals)) { if (originals[key] === undefined) { @@ -191,7 +201,7 @@ test('direct arg "auto" sets preference and notifies', async (t) => { // 4. No arg — shows select UI, user picks one // ═══════════════════════════════════════════════════════════════════════════ -test('no arg shows select UI with 5 options, user picks brave', async () => { +test('no arg shows select UI with 7 options, user picks brave', async () => { const cmd = await loadCommand() await withEnv({ TAVILY_API_KEY: 'tvly-test', BRAVE_API_KEY: 'BSA-test' }, async () => { @@ -200,14 +210,16 @@ test('no arg shows select UI with 5 options, user picks brave', async () => { // Select UI shown assert.equal(ctx.ui.selectCalls.length, 1, 'should show select UI') - assert.equal(ctx.ui.selectCalls[0].options.length, 5) + assert.equal(ctx.ui.selectCalls[0].options.length, 7) // Options show key status assert.match(ctx.ui.selectCalls[0].options[0], /tavily \(key: ✓\)/) assert.match(ctx.ui.selectCalls[0].options[1], /brave \(key: ✓\)/) - assert.match(ctx.ui.selectCalls[0].options[2], /ollama \(key:/) - assert.match(ctx.ui.selectCalls[0].options[3], /combosearch \(/) - assert.equal(ctx.ui.selectCalls[0].options[4], 'auto') + assert.match(ctx.ui.selectCalls[0].options[2], /serper \(key:/) + assert.match(ctx.ui.selectCalls[0].options[3], /exa \(key:/) + assert.match(ctx.ui.selectCalls[0].options[4], /ollama \(key:/) + assert.match(ctx.ui.selectCalls[0].options[5], /combosearch \(/) + assert.equal(ctx.ui.selectCalls[0].options[6], 'auto') // Title shows current preference assert.match(ctx.ui.selectCalls[0].title, /current:/) @@ -267,16 +279,16 @@ test('invalid arg "google" falls back to interactive select', async () => { // 7. Tab completion — all options when prefix is empty // ═══════════════════════════════════════════════════════════════════════════ -test('tab completion returns all 5 options when prefix is empty', async () => { +test('tab completion returns all 7 options when prefix is empty', async () => { const cmd = await loadCommand() - withEnv({ TAVILY_API_KEY: 'tvly-test', BRAVE_API_KEY: 'BSA-test' }, () => { + await withEnv({ TAVILY_API_KEY: 'tvly-test', BRAVE_API_KEY: 'BSA-test' }, () => { const items = cmd.getArgumentCompletions!('') assert.ok(items, 'completions should not be null') - assert.equal(items!.length, 5) + assert.equal(items!.length, 7) const values = items!.map((i: any) => i.value) - assert.deepEqual(values, ['tavily', 'brave', 'ollama', 'combosearch', 'auto']) + assert.deepEqual(values, ['tavily', 'brave', 'serper', 'exa', 'ollama', 'combosearch', 'auto']) // Each item has label and description assert.ok(items!.every((i: any) => i.label), 'every item should have a label') @@ -291,7 +303,7 @@ test('tab completion returns all 5 options when prefix is empty', async () => { test('tab completion filters by prefix: "t" returns only tavily', async () => { const cmd = await loadCommand() - withEnv({ TAVILY_API_KEY: 'tvly-test', BRAVE_API_KEY: undefined }, () => { + await withEnv({ TAVILY_API_KEY: 'tvly-test', BRAVE_API_KEY: undefined }, () => { const items = cmd.getArgumentCompletions!('t') assert.ok(items) assert.equal(items!.length, 1) diff --git a/src/web/onboarding-service.ts b/src/web/onboarding-service.ts index 44b6644f9..5b9148bd8 100644 --- a/src/web/onboarding-service.ts +++ b/src/web/onboarding-service.ts @@ -160,6 +160,8 @@ const OPTIONAL_SECTION_CATALOG: OptionalSectionCatalogEntry[] = [ label: "Web search", providers: [ { id: "brave", label: "Brave Search", envVar: "BRAVE_API_KEY" }, + { id: "serper", label: "Serper", envVar: "SERPER_API_KEY" }, + { id: "exa", label: "Exa Search", envVar: "EXA_API_KEY" }, { id: "tavily", label: "Tavily", envVar: "TAVILY_API_KEY" }, ], }, diff --git a/src/wizard.ts b/src/wizard.ts index 654d11a28..77d9b9088 100644 --- a/src/wizard.ts +++ b/src/wizard.ts @@ -11,6 +11,8 @@ export function loadStoredEnvKeys(authStorage: AuthStorage): void { const providers: Array<[string, string]> = [ ['brave', 'BRAVE_API_KEY'], ['brave_answers', 'BRAVE_ANSWERS_KEY'], + ['serper', 'SERPER_API_KEY'], + ['exa', 'EXA_API_KEY'], ['context7', 'CONTEXT7_API_KEY'], ['jina', 'JINA_API_KEY'], ['tavily', 'TAVILY_API_KEY'],