diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts index a191abe8c..43b61bd2e 100644 --- a/src/resources/extensions/gsd/auto-prompts.ts +++ b/src/resources/extensions/gsd/auto-prompts.ts @@ -261,7 +261,12 @@ export async function inlineGsdRootFile( /** * Inline decisions with optional milestone scoping from the DB. - * Falls back to filesystem via inlineGsdRootFile when DB unavailable or empty. + * Falls back to filesystem via inlineGsdRootFile only when DB is unavailable. + * + * Cascade logic (R005): + * 1. Query with { milestoneId, scope } if scope provided + * 2. If empty AND scope was provided, retry with { milestoneId } only (drop scope) + * 3. If still empty, return null (intentional per D020) */ export async function inlineDecisionsFromDb( base: string, milestoneId?: string, scope?: string, level?: InlineLevel, @@ -271,7 +276,15 @@ export async function inlineDecisionsFromDb( const { isDbAvailable } = await import("./gsd-db.js"); if (isDbAvailable()) { const { queryDecisions, formatDecisionsForPrompt } = await import("./context-store.js"); - const decisions = queryDecisions({ milestoneId, scope }); + + // First query: try with both milestoneId and scope (if scope provided) + let decisions = queryDecisions({ milestoneId, scope }); + + // Cascade: if empty AND scope was provided, retry without scope + if (decisions.length === 0 && scope) { + decisions = queryDecisions({ milestoneId }); + } + if (decisions.length > 0) { // Use compact format for non-full levels to save ~35% tokens const formatted = inlineLevel !== "full" @@ -279,26 +292,29 @@ export async function inlineDecisionsFromDb( : formatDecisionsForPrompt(decisions); return `### Decisions\nSource: \`.gsd/DECISIONS.md\`\n\n${formatted}`; } + // DB available but cascade returned empty — intentional per D020, don't fall back to file + return null; } } catch (err) { logWarning("prompt", `inlineDecisionsFromDb failed: ${err instanceof Error ? err.message : String(err)}`); } + // DB unavailable — fall back to filesystem return inlineGsdRootFile(base, "decisions.md", "Decisions"); } /** - * Inline requirements with optional slice scoping from the DB. + * Inline requirements with optional milestone and slice scoping from the DB. * Falls back to filesystem via inlineGsdRootFile when DB unavailable or empty. */ export async function inlineRequirementsFromDb( - base: string, sliceId?: string, level?: InlineLevel, + base: string, milestoneId?: string, sliceId?: string, level?: InlineLevel, ): Promise { const inlineLevel = level ?? resolveInlineLevel(); try { const { isDbAvailable } = await import("./gsd-db.js"); if (isDbAvailable()) { const { queryRequirements, formatRequirementsForPrompt } = await import("./context-store.js"); - const requirements = queryRequirements({ sliceId }); + const requirements = queryRequirements({ milestoneId, sliceId }); if (requirements.length > 0) { // Use compact format for non-full levels to save ~40% tokens const formatted = inlineLevel !== "full" @@ -335,6 +351,131 @@ export async function inlineProjectFromDb( return inlineGsdRootFile(base, "project.md", "Project"); } +// ─── Stopwords for keyword extraction ───────────────────────────────────── +const STOPWORDS = new Set(['of', 'the', 'and', 'a', 'for', '+', '-', 'to', 'in', 'on', 'with', 'is', 'as', 'by']); + +// Generic words that don't provide meaningful scope differentiation +const GENERIC_WORDS = new Set([ + 'setup', 'integration', 'implementation', 'testing', 'test', 'tests', + 'config', 'configuration', 'init', 'initial', 'basic', 'core', + 'main', 'primary', 'final', 'complete', 'finish', 'end', + 'start', 'begin', 'first', 'last', 'update', 'updates', + 'fix', 'fixes', 'add', 'adds', 'remove', 'removes', + 'create', 'creates', 'build', 'builds', 'deploy', 'deployment', + 'refactor', 'refactoring', 'cleanup', 'polish', 'review', + // Process/activity words that describe what you're doing, not what domain + 'hardening', 'validation', 'verification', 'optimization', + 'improvement', 'enhancement', 'infrastructure', +]); + +// Pattern to match slice/milestone/task IDs (e.g., S01, M001, T03) +const UNIT_ID_PATTERN = /^[smt]\d+$/i; + +/** + * Derive a scope keyword from slice title and optional description. + * Returns the most specific noun (first non-generic keyword) for decision scoping. + * + * Examples: + * - "Auth Middleware & Protected Route" → "auth" + * - "Database & User Model Setup" → "database" + * - "Integration Testing" → undefined (too generic) + * - "API Rate Limiting" → "api" + * + * @param sliceTitle - The slice title + * @param sliceDescription - Optional roadmap description (demo text) + * @returns A single lowercase keyword or undefined if no meaningful scope + */ +export function deriveSliceScope(sliceTitle: string, sliceDescription?: string): string | undefined { + // Combine title and description for keyword extraction + const combinedText = sliceDescription + ? `${sliceTitle} ${sliceDescription}` + : sliceTitle; + + // Extract all words, lowercase, remove punctuation + const words = combinedText + .split(/[\s&+,;:|/\\()-]+/) + .map(w => w.toLowerCase().replace(/[^a-z0-9]/g, '')) + .filter(w => w.length >= 2); + + // Find the first word that is: + // 1. Not a stopword + // 2. Not a generic word + // 3. Not a unit ID (S01, M001, T03) + // 4. At least 3 characters (meaningful scope) + for (const word of words) { + if (STOPWORDS.has(word)) continue; + if (GENERIC_WORDS.has(word)) continue; + if (UNIT_ID_PATTERN.test(word)) continue; + if (word.length < 3) continue; + return word; + } + + return undefined; +} +/** + * Extract keywords from a slice title for scoped knowledge queries. + * Splits on whitespace, filters stopwords, lowercases. + * Example: 'KNOWLEDGE scoping + roadmap excerpt' → ['knowledge', 'scoping', 'roadmap', 'excerpt'] + */ +function extractKeywords(title: string): string[] { + return title + .split(/\s+/) + .map(w => w.toLowerCase().replace(/[^a-z0-9]/g, '')) + .filter(w => w.length > 0 && !STOPWORDS.has(w)); +} + +/** + * Inline scoped KNOWLEDGE.md content based on keywords from slice title. + * Reads KNOWLEDGE.md, filters to sections matching keywords, formats with header. + * Returns null if no KNOWLEDGE.md exists or no sections match. + */ +export async function inlineKnowledgeScoped( + base: string, + keywords: string[], +): Promise { + const knowledgePath = resolveGsdRootFile(base, "KNOWLEDGE"); + if (!existsSync(knowledgePath)) return null; + + const content = await loadFile(knowledgePath); + if (!content) return null; + + // Import queryKnowledge from context-store + const { queryKnowledge } = await import("./context-store.js"); + const scoped = await queryKnowledge(content, keywords); + + // Return null if no sections matched (empty string from queryKnowledge) + if (!scoped) return null; + + return `### Project Knowledge (scoped)\nSource: \`${relGsdRootFile("KNOWLEDGE")}\`\n\n${scoped.trim()}`; +} + +/** + * Inline a roadmap excerpt for a specific slice. + * Reads full roadmap, extracts minimal excerpt with header + predecessor + target row. + * Returns null if roadmap doesn't exist or slice not found. + */ +export async function inlineRoadmapExcerpt( + base: string, + mid: string, + sid: string, +): Promise { + const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP"); + if (!roadmapPath || !existsSync(roadmapPath)) return null; + + const roadmapRel = relMilestoneFile(base, mid, "ROADMAP"); + const content = await loadFile(roadmapPath); + if (!content) return null; + + // Import formatRoadmapExcerpt from context-store + const { formatRoadmapExcerpt } = await import("./context-store.js"); + const excerpt = formatRoadmapExcerpt(content, sid, roadmapRel); + + // Return null if slice not found in roadmap + if (!excerpt) return null; + + return `### Milestone Roadmap (excerpt)\nSource: \`${roadmapRel}\`\n\n${excerpt}`; +} + // ─── Skill Activation & Discovery ───────────────────────────────────────── function normalizeSkillReference(ref: string): string { @@ -880,7 +1021,7 @@ export async function buildResearchMilestonePrompt(mid: string, midTitle: string inlined.push(await inlineFile(contextPath, contextRel, "Milestone Context")); const projectInline = await inlineProjectFromDb(base); if (projectInline) inlined.push(projectInline); - const requirementsInline = await inlineRequirementsFromDb(base); + const requirementsInline = await inlineRequirementsFromDb(base, mid); if (requirementsInline) inlined.push(requirementsInline); const decisionsInline = await inlineDecisionsFromDb(base, mid); if (decisionsInline) inlined.push(decisionsInline); @@ -930,7 +1071,7 @@ export async function buildPlanMilestonePrompt(mid: string, midTitle: string, ba if (inlineLevel !== "minimal") { const projectInline = await inlineProjectFromDb(base); if (projectInline) inlined.push(projectInline); - const requirementsInline = await inlineRequirementsFromDb(base, undefined, inlineLevel); + const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel); if (requirementsInline) inlined.push(requirementsInline); const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel); if (decisionsInline) inlined.push(decisionsInline); @@ -999,19 +1140,35 @@ export async function buildResearchSlicePrompt( const sliceContextRel = relSliceFile(base, mid, sid, "CONTEXT"); const inlined: string[] = []; - inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap")); + + // Use roadmap excerpt instead of full roadmap for context reduction + const roadmapExcerptRS = await inlineRoadmapExcerpt(base, mid, sid); + if (roadmapExcerptRS) { + inlined.push(roadmapExcerptRS); + } else { + // Fall back to full roadmap if excerpt fails + inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap")); + } + const contextInline = await inlineFileOptional(contextPath, contextRel, "Milestone Context"); if (contextInline) inlined.push(contextInline); const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)"); if (sliceCtxInline) inlined.push(sliceCtxInline); const researchInline = await inlineFileOptional(milestoneResearchPath, milestoneResearchRel, "Milestone Research"); if (researchInline) inlined.push(researchInline); - const decisionsInline = await inlineDecisionsFromDb(base, mid); + + // Derive scope from slice title for decision filtering (R005) + const derivedScope = deriveSliceScope(sTitle); + const decisionsInline = await inlineDecisionsFromDb(base, mid, derivedScope); if (decisionsInline) inlined.push(decisionsInline); - const requirementsInline = await inlineRequirementsFromDb(base, sid); + const requirementsInline = await inlineRequirementsFromDb(base, mid, sid); if (requirementsInline) inlined.push(requirementsInline); - const knowledgeInlineRS = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge"); + + // Use scoped knowledge based on slice title keywords + const keywords = extractKeywords(sTitle); + const knowledgeInlineRS = await inlineKnowledgeScoped(base, keywords); if (knowledgeInlineRS) inlined.push(knowledgeInlineRS); + inlined.push(inlineTemplate("research", "Research")); const depContent = await inlineDependencySummaries(mid, sid, base); @@ -1060,19 +1217,33 @@ export async function buildPlanSlicePrompt( const researchSliceAnchor = readPhaseAnchor(base, mid, "research-slice"); if (researchSliceAnchor) inlined.push(formatAnchorForPrompt(researchSliceAnchor)); - inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap")); + // Use roadmap excerpt instead of full roadmap for context reduction + const roadmapExcerptPS = await inlineRoadmapExcerpt(base, mid, sid); + if (roadmapExcerptPS) { + inlined.push(roadmapExcerptPS); + } else { + // Fall back to full roadmap if excerpt fails + inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap")); + } + const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)"); if (sliceCtxInline) inlined.push(sliceCtxInline); const researchInline = await inlineFileOptional(researchPath, researchRel, "Slice Research"); if (researchInline) inlined.push(researchInline); if (inlineLevel !== "minimal") { - const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel); + // Derive scope from slice title for decision filtering (R005) + const derivedScopePS = deriveSliceScope(sTitle); + const decisionsInline = await inlineDecisionsFromDb(base, mid, derivedScopePS, inlineLevel); if (decisionsInline) inlined.push(decisionsInline); - const requirementsInline = await inlineRequirementsFromDb(base, sid, inlineLevel); + const requirementsInline = await inlineRequirementsFromDb(base, mid, sid, inlineLevel); if (requirementsInline) inlined.push(requirementsInline); } - const knowledgeInlinePS = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge"); + + // Use scoped knowledge based on slice title keywords + const keywordsPS = extractKeywords(sTitle); + const knowledgeInlinePS = await inlineKnowledgeScoped(base, keywordsPS); if (knowledgeInlinePS) inlined.push(knowledgeInlinePS); + inlined.push(inlineTemplate("plan", "Slice Plan")); if (inlineLevel === "full") { inlined.push(inlineTemplate("task-plan", "Task Plan")); @@ -1272,7 +1443,7 @@ export async function buildCompleteSlicePrompt( if (sliceCtxInline) inlined.push(sliceCtxInline); inlined.push(await inlineFile(slicePlanPath, slicePlanRel, "Slice Plan")); if (inlineLevel !== "minimal") { - const requirementsInline = await inlineRequirementsFromDb(base, sid, inlineLevel); + const requirementsInline = await inlineRequirementsFromDb(base, mid, sid, inlineLevel); if (requirementsInline) inlined.push(requirementsInline); } const knowledgeInlineCS = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge"); @@ -1355,7 +1526,7 @@ export async function buildCompleteMilestonePrompt( // Inline root GSD files (skip for minimal — completion can read these if needed) if (inlineLevel !== "minimal") { - const requirementsInline = await inlineRequirementsFromDb(base, undefined, inlineLevel); + const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel); if (requirementsInline) inlined.push(requirementsInline); const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel); if (decisionsInline) inlined.push(decisionsInline); @@ -1480,7 +1651,7 @@ export async function buildValidateMilestonePrompt( // Inline root GSD files if (inlineLevel !== "minimal") { - const requirementsInline = await inlineRequirementsFromDb(base, undefined, inlineLevel); + const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel); if (requirementsInline) inlined.push(requirementsInline); const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel); if (decisionsInline) inlined.push(decisionsInline); @@ -1656,7 +1827,7 @@ export async function buildReassessRoadmapPrompt( if (inlineLevel !== "minimal") { const projectInline = await inlineProjectFromDb(base); if (projectInline) inlined.push(projectInline); - const requirementsInline = await inlineRequirementsFromDb(base, undefined, inlineLevel); + const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel); if (requirementsInline) inlined.push(requirementsInline); const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel); if (decisionsInline) inlined.push(decisionsInline); diff --git a/src/resources/extensions/gsd/context-store.ts b/src/resources/extensions/gsd/context-store.ts index b23f1e855..df938555a 100644 --- a/src/resources/extensions/gsd/context-store.ts +++ b/src/resources/extensions/gsd/context-store.ts @@ -15,6 +15,7 @@ export interface DecisionQueryOpts { } export interface RequirementQueryOpts { + milestoneId?: string; sliceId?: string; status?: string; } @@ -67,7 +68,8 @@ export function queryDecisions(opts?: DecisionQueryOpts): Decision[] { /** * Query active (non-superseded) requirements with optional filters. - * - sliceId: filters where primary_owner LIKE '%sliceId%' OR supporting_slices LIKE '%sliceId%' + * - milestoneId: combined with sliceId for precise filtering (e.g. %M005/S01%) + * - sliceId: filters where primary_owner LIKE '%pattern%' OR supporting_slices LIKE '%pattern%' * - status: filters where status = :status (exact match) * * Returns [] if DB is not available. Never throws. @@ -81,9 +83,19 @@ export function queryRequirements(opts?: RequirementQueryOpts): Requirement[] { const clauses: string[] = ['superseded_by IS NULL']; const params: Record = {}; - if (opts?.sliceId) { + // Combined milestone+slice filtering for precise scoping + if (opts?.milestoneId && opts?.sliceId) { + // Use combined pattern like %M005/S01% to avoid cross-milestone contamination + clauses.push('(primary_owner LIKE :combined_pattern OR supporting_slices LIKE :combined_pattern)'); + params[':combined_pattern'] = `%${opts.milestoneId}/${opts.sliceId}%`; + } else if (opts?.sliceId) { + // Slice-only filtering (legacy behavior) clauses.push('(primary_owner LIKE :slice_pattern OR supporting_slices LIKE :slice_pattern)'); params[':slice_pattern'] = `%${opts.sliceId}%`; + } else if (opts?.milestoneId) { + // Milestone-only filtering + clauses.push('(primary_owner LIKE :milestone_pattern OR supporting_slices LIKE :milestone_pattern)'); + params[':milestone_pattern'] = `%${opts.milestoneId}%`; } if (opts?.status) { @@ -194,3 +206,156 @@ export function queryArtifact(path: string): string | null { export function queryProject(): string | null { return queryArtifact('PROJECT.md'); } + +// ─── Knowledge Query ─────────────────────────────────────────────────────── + +/** + * Filter KNOWLEDGE.md sections by keyword matching. + * Uses H2 sections, matches keywords case-insensitively against: + * 1. Section header text + * 2. First paragraph of section content (up to first blank line or next heading) + * + * Per D020, returns empty string (not null) when no matches found. + * This signals "no relevant knowledge" vs "file not found". + * + * @param content - Full KNOWLEDGE.md content + * @param keywords - Keywords to match (case-insensitive) + * @returns Concatenated matching sections with H2 headers, or empty string + */ +export async function queryKnowledge(content: string, keywords: string[]): Promise { + if (!content || keywords.length === 0) return ''; + + // Lazy import to avoid circular dependency + const { extractAllSections } = await import('./files.js'); + + const sections = extractAllSections(content, 2); + if (sections.size === 0) return ''; + + // Normalize keywords for case-insensitive matching + const normalizedKeywords = keywords.map(k => k.toLowerCase()); + + const matchingSections: string[] = []; + + for (const [header, body] of sections) { + // Extract first paragraph: everything up to first blank line or next heading + const firstParagraph = body.split(/\n\s*\n|\n#/)[0] || ''; + + // Check if any keyword matches header or first paragraph + const headerLower = header.toLowerCase(); + const paragraphLower = firstParagraph.toLowerCase(); + + const matches = normalizedKeywords.some(kw => + headerLower.includes(kw) || paragraphLower.includes(kw) + ); + + if (matches) { + matchingSections.push(`## ${header}\n\n${body}`); + } + } + + return matchingSections.join('\n\n'); +} + +// ─── Roadmap Excerpt Formatter ───────────────────────────────────────────── + +/** + * Format a minimal roadmap excerpt for prompt injection. + * Parses the slice table from roadmap content, extracts: + * 1. Header row + separator + * 2. Predecessor row (if sliceId depends on one via the Depends column) + * 3. Target slice row + * 4. Reference directive pointing to full roadmap path + * + * Per D021, this minimizes injected content while preserving dependency awareness. + * Returns empty string if sliceId is not found in the table. + * Never throws. + * + * @param roadmapContent - Full content of the M###-ROADMAP.md file + * @param sliceId - Target slice ID (e.g. 'S02') + * @param roadmapPath - Optional path for reference directive (defaults to generic) + */ +export function formatRoadmapExcerpt( + roadmapContent: string, + sliceId: string, + roadmapPath = 'ROADMAP.md', +): string { + if (!roadmapContent || !sliceId) return ''; + + const lines = roadmapContent.split('\n'); + + // Find the slice table header: | ID | Slice | ... (case insensitive) + let headerIndex = -1; + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (line && /^\s*\|\s*ID\s*\|\s*Slice\s*\|/i.test(line)) { + headerIndex = i; + break; + } + } + + if (headerIndex === -1) return ''; + + // The separator should be the next line (|---|---|...) + const separatorIndex = headerIndex + 1; + if (separatorIndex >= lines.length) return ''; + + const headerLine = lines[headerIndex]; + const separatorLine = lines[separatorIndex]; + + // Validate separator line looks like |---|---|... (may include : for alignment) + if (!separatorLine || !/^\s*\|[\s:\-|]+\|/.test(separatorLine)) return ''; + + // Parse table rows after separator + interface SliceRow { + line: string; + id: string; + depends: string; + } + + const sliceRows: SliceRow[] = []; + for (let i = separatorIndex + 1; i < lines.length; i++) { + const line = lines[i]; + if (!line || !line.trim().startsWith('|')) break; // End of table + + // Parse row: | ID | Slice | Risk | Depends | Done | After this | + const cells = line.split('|').map(c => c.trim()); + // cells[0] is empty (before first |), cells[1] is ID, etc. + if (cells.length < 5) continue; + + const id = cells[1] || ''; + const depends = cells[4] || ''; // Depends column (0-indexed: empty, ID, Slice, Risk, Depends, ...) + + sliceRows.push({ line, id, depends }); + } + + // Find target slice row + const targetRow = sliceRows.find(r => r.id === sliceId); + if (!targetRow) return ''; + + // Find predecessor if target depends on one + // Depends column may contain: '—', 'S01', 'S01, S02', etc. + let predecessorRow: SliceRow | undefined; + const dependsRaw = targetRow.depends; + if (dependsRaw && dependsRaw !== '—' && dependsRaw !== '-') { + // Extract first dependency (e.g. 'S01' from 'S01, S02') + const depMatch = dependsRaw.match(/S\d+/); + if (depMatch) { + predecessorRow = sliceRows.find(r => r.id === depMatch[0]); + } + } + + // Build excerpt + const excerptLines: string[] = [headerLine!, separatorLine!]; + + if (predecessorRow) { + excerptLines.push(predecessorRow.line); + } + + excerptLines.push(targetRow.line); + + // Add reference directive + excerptLines.push(''); + excerptLines.push(`> See full roadmap: ${roadmapPath}`); + + return excerptLines.join('\n'); +} diff --git a/src/resources/extensions/gsd/tests/context-store.test.ts b/src/resources/extensions/gsd/tests/context-store.test.ts index 88c1f84fd..f81934092 100644 --- a/src/resources/extensions/gsd/tests/context-store.test.ts +++ b/src/resources/extensions/gsd/tests/context-store.test.ts @@ -15,6 +15,8 @@ import { formatRequirementsForPrompt, queryArtifact, queryProject, + formatRoadmapExcerpt, + queryKnowledge, } from '../context-store.ts'; // ═══════════════════════════════════════════════════════════════════════════ @@ -452,3 +454,177 @@ describe("context-store: queryProject", () => { assert.strictEqual(content, null, 'queryProject returns null when DB closed'); }); }); + +// ═══════════════════════════════════════════════════════════════════════════ +// context-store: formatRoadmapExcerpt +// ═══════════════════════════════════════════════════════════════════════════ + +describe("context-store: formatRoadmapExcerpt", () => { + // Sample roadmap content matching actual M005-ROADMAP.md format + const sampleRoadmap = `# M005: Tiered Context Injection + +## Vision +Refactor prompt builders to inject relevance-scoped context. + +## Slice Overview +| ID | Slice | Risk | Depends | Done | After this | +|----|-------|------|---------|------|------------| +| S01 | Scope existing queries | low | — | ✅ | planSlice prompt scoped. | +| S02 | KNOWLEDGE scoping | medium | S01 | ⬜ | KNOWLEDGE sections filtered. | +| S03 | Measurement test | low | S02 | ⬜ | 40% reduction confirmed. | +`; + + test("S02 with S01 predecessor includes both rows", () => { + const result = formatRoadmapExcerpt(sampleRoadmap, 'S02', '.gsd/milestones/M005/M005-ROADMAP.md'); + + // Should have header + assert.match(result, /\| ID \| Slice \| Risk \| Depends \| Done \| After this \|/, 'has header row'); + // Should have separator + assert.match(result, /\|----\|/, 'has separator row'); + // Should have S01 predecessor + assert.match(result, /\| S01 \|/, 'has predecessor S01 row'); + // Should have S02 target + assert.match(result, /\| S02 \|/, 'has target S02 row'); + // Should have reference directive + assert.match(result, /See full roadmap:.*M005-ROADMAP\.md/, 'has reference directive'); + // Should NOT have S03 (not relevant) + assert.ok(!result.includes('| S03 |'), 'does not include unrelated S03'); + }); + + test("S01 with no predecessor includes only target row", () => { + const result = formatRoadmapExcerpt(sampleRoadmap, 'S01'); + + // Should have header + separator + S01 only + assert.match(result, /\| ID \| Slice \|/, 'has header row'); + assert.match(result, /\| S01 \|/, 'has target S01 row'); + // Should NOT have S02 or S03 + assert.ok(!result.includes('| S02 |'), 'does not include S02'); + assert.ok(!result.includes('| S03 |'), 'does not include S03'); + // Should have reference + assert.match(result, /See full roadmap:/, 'has reference directive'); + + // Count rows: header + separator + S01 + blank + directive = 5 lines + const lines = result.split('\n'); + assert.strictEqual(lines.length, 5, 'correct number of lines (no predecessor)'); + }); + + test("missing slice returns empty string", () => { + const result = formatRoadmapExcerpt(sampleRoadmap, 'S99'); + + assert.strictEqual(result, '', 'missing slice returns empty string'); + }); + + test("empty input returns empty string", () => { + assert.strictEqual(formatRoadmapExcerpt('', 'S01'), '', 'empty content returns empty'); + assert.strictEqual(formatRoadmapExcerpt(sampleRoadmap, ''), '', 'empty sliceId returns empty'); + }); + + test("handles table with various column formats", () => { + // Table with different spacing and content + const variantRoadmap = `# Milestone + +| ID | Slice | Risk | Depends | Done | After this | +|:---|:------|:-----|:--------|:-----|:-----------| +| S01 | First slice title | low | — | ✅ | First complete. | +| S02 | Second longer slice title here | medium | S01 | ⬜ | Second working. | +`; + + const result = formatRoadmapExcerpt(variantRoadmap, 'S02'); + + assert.match(result, /\| S01 \|/, 'has predecessor with different spacing'); + assert.match(result, /\| S02 \|/, 'has target with different spacing'); + assert.match(result, /Second longer slice title/, 'preserves full slice title'); + }); + + test("handles multiple dependencies by using first one", () => { + const multiDepRoadmap = `| ID | Slice | Risk | Depends | Done | After this | +|----|-------|------|---------|------|------------| +| S01 | First | low | — | ✅ | Done. | +| S02 | Second | low | — | ✅ | Done. | +| S03 | Third | medium | S01, S02 | ⬜ | Working. | +`; + + const result = formatRoadmapExcerpt(multiDepRoadmap, 'S03'); + + // Should include S01 (first dependency) and S03 + assert.match(result, /\| S01 \|/, 'has first dependency S01'); + assert.match(result, /\| S03 \|/, 'has target S03'); + // S02 is also a dependency but we only include the first one + // (This is intentional to keep excerpts minimal) + }); +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// context-store: queryKnowledge +// ═══════════════════════════════════════════════════════════════════════════ + +describe("context-store: queryKnowledge", () => { + // Sample KNOWLEDGE.md content + const sampleKnowledge = `# Project Knowledge + +## Database Patterns +SQLite is used with WAL mode for concurrent reads. +Always use prepared statements. + +More database details here. + +## API Design +REST endpoints follow OpenAPI spec. +Use versioned paths like /v1/resource. + +## Testing Guidelines +Unit tests use node:test. +Integration tests mock external services. +`; + + test("single keyword matches header", async () => { + const result = await queryKnowledge(sampleKnowledge, ['database']); + + assert.match(result, /## Database Patterns/, 'includes matching section header'); + assert.match(result, /SQLite is used with WAL mode/, 'includes section content'); + // Should NOT include other sections + assert.ok(!result.includes('## API Design'), 'does not include non-matching API section'); + assert.ok(!result.includes('## Testing Guidelines'), 'does not include non-matching Testing section'); + }); + + test("multiple keywords match multiple sections", async () => { + const result = await queryKnowledge(sampleKnowledge, ['database', 'testing']); + + assert.match(result, /## Database Patterns/, 'includes Database section'); + assert.match(result, /## Testing Guidelines/, 'includes Testing section'); + assert.ok(!result.includes('## API Design'), 'does not include API section'); + }); + + test("no matches returns empty string", async () => { + const result = await queryKnowledge(sampleKnowledge, ['nonexistent']); + + assert.strictEqual(result, '', 'no matches returns empty string per D020'); + }); + + test("keyword in first paragraph matches", async () => { + const result = await queryKnowledge(sampleKnowledge, ['sqlite']); + + // 'sqlite' appears in first paragraph of Database Patterns + assert.match(result, /## Database Patterns/, 'matches keyword in first paragraph'); + assert.match(result, /SQLite is used/, 'includes the section with matching paragraph'); + }); + + test("case-insensitive matching", async () => { + const result = await queryKnowledge(sampleKnowledge, ['DATABASE', 'API']); + + assert.match(result, /## Database Patterns/, 'case-insensitive header match'); + assert.match(result, /## API Design/, 'case-insensitive header match for API'); + }); + + test("empty keywords returns empty string", async () => { + const result = await queryKnowledge(sampleKnowledge, []); + + assert.strictEqual(result, '', 'empty keywords returns empty string'); + }); + + test("empty content returns empty string", async () => { + const result = await queryKnowledge('', ['database']); + + assert.strictEqual(result, '', 'empty content returns empty string'); + }); +}); diff --git a/src/resources/extensions/gsd/tests/decision-scope-cascade.test.ts b/src/resources/extensions/gsd/tests/decision-scope-cascade.test.ts new file mode 100644 index 000000000..0660a771c --- /dev/null +++ b/src/resources/extensions/gsd/tests/decision-scope-cascade.test.ts @@ -0,0 +1,370 @@ +// decision-scope-cascade: Tests for R005 fallback cascade and scope derivation +// +// Validates: +// (a) inlineDecisionsFromDb cascade: milestone + scope → milestone only → null +// (b) deriveSliceScope extracts meaningful scope keywords from slice titles +// (c) deriveSliceScope returns undefined for generic titles + +import { describe, test, afterEach, beforeEach } from "node:test"; +import assert from "node:assert/strict"; +import { + openDatabase, + closeDatabase, + isDbAvailable, + insertDecision, +} from '../gsd-db.ts'; +import { + queryDecisions, + formatDecisionsForPrompt, +} from '../context-store.ts'; +import { deriveSliceScope } from '../auto-prompts.ts'; + +// ═══════════════════════════════════════════════════════════════════════════ +// deriveSliceScope: Extract meaningful scope from slice titles +// ═══════════════════════════════════════════════════════════════════════════ + +describe("deriveSliceScope: keyword extraction", () => { + test("extracts first meaningful noun from title", () => { + // "Auth Middleware & Protected Route" → "auth" + assert.strictEqual( + deriveSliceScope("Auth Middleware & Protected Route"), + "auth", + "extracts 'auth' from auth-related title", + ); + + // "Database & User Model Setup" → "database" (not "setup" which is generic) + const dbScope = deriveSliceScope("Database & User Model Setup"); + assert.ok( + dbScope === "database" || dbScope === "user", + `expected 'database' or 'user', got '${dbScope}'`, + ); + + // "API Rate Limiting" → "api" + assert.strictEqual( + deriveSliceScope("API Rate Limiting"), + "api", + "extracts 'api' from API-related title", + ); + + // "Stripe Payment Integration" → "stripe" + assert.strictEqual( + deriveSliceScope("Stripe Payment Integration"), + "stripe", + "extracts 'stripe' from payment-related title", + ); + }); + + test("returns undefined for generic titles", () => { + // "Integration Testing" → undefined (both words are generic) + assert.strictEqual( + deriveSliceScope("Integration Testing"), + undefined, + "returns undefined for generic 'Integration Testing'", + ); + + // "Setup & Configuration" → undefined (all generic) + assert.strictEqual( + deriveSliceScope("Setup & Configuration"), + undefined, + "returns undefined for generic 'Setup & Configuration'", + ); + + // "Final Review" → undefined + assert.strictEqual( + deriveSliceScope("Final Review"), + undefined, + "returns undefined for generic 'Final Review'", + ); + + // "Basic Implementation" → undefined + assert.strictEqual( + deriveSliceScope("Basic Implementation"), + undefined, + "returns undefined for generic 'Basic Implementation'", + ); + }); + + test("handles description as additional context", () => { + // Generic title but specific description + const scope = deriveSliceScope( + "Initial Setup", + "Configure PostgreSQL database connection", + ); + assert.ok( + scope === "postgresql" || scope === "database" || scope === "configure", + `expected meaningful scope from description, got '${scope}'`, + ); + }); + + test("handles edge cases", () => { + // Empty title + assert.strictEqual( + deriveSliceScope(""), + undefined, + "returns undefined for empty title", + ); + + // Short words only + assert.strictEqual( + deriveSliceScope("A B C"), + undefined, + "returns undefined for very short words", + ); + + // Mixed case and punctuation + assert.strictEqual( + deriveSliceScope("OAuth2 + JWT Authentication"), + "oauth2", + "handles mixed case and punctuation", + ); + }); + + test("filters unit IDs (S01, M001, T03)", () => { + // "S01: Infrastructure" → undefined (S01 is a unit ID, infrastructure is generic) + assert.strictEqual( + deriveSliceScope("S01: Infrastructure"), + undefined, + "skips S01 ID and returns undefined for generic 'Infrastructure'", + ); + + // "M001 Setup" → undefined (M001 is a unit ID, setup is generic) + assert.strictEqual( + deriveSliceScope("M001 Setup"), + undefined, + "skips M001 ID and returns undefined for generic 'Setup'", + ); + + // "T03: Database Migration" → "database" (skips T03, returns meaningful word) + assert.strictEqual( + deriveSliceScope("T03: Database Migration"), + "database", + "skips T03 ID and returns 'database'", + ); + + // "S02 Auth Flow" → "auth" (skips S02, returns meaningful word) + assert.strictEqual( + deriveSliceScope("S02 Auth Flow"), + "auth", + "skips S02 ID and returns 'auth'", + ); + }); + + test("filters process/activity words", () => { + // "Integration Testing + Hardening" → undefined (all generic/process words) + assert.strictEqual( + deriveSliceScope("Integration Testing + Hardening"), + undefined, + "returns undefined for 'Integration Testing + Hardening'", + ); + + // "Validation & Verification" → undefined (both are process words) + assert.strictEqual( + deriveSliceScope("Validation & Verification"), + undefined, + "returns undefined for 'Validation & Verification'", + ); + + // "Performance Optimization" → "performance" (optimization is generic, performance is domain) + assert.strictEqual( + deriveSliceScope("Performance Optimization"), + "performance", + "extracts 'performance' before generic 'optimization'", + ); + + // "Security Enhancement" → "security" (enhancement is generic, security is domain) + assert.strictEqual( + deriveSliceScope("Security Enhancement"), + "security", + "extracts 'security' before generic 'enhancement'", + ); + + // "WebSocket Delivery Pipeline" → "websocket" + assert.strictEqual( + deriveSliceScope("WebSocket Delivery Pipeline"), + "websocket", + "extracts 'websocket' from delivery pipeline title", + ); + + // "Prisma Schema + Migration" → "prisma" + assert.strictEqual( + deriveSliceScope("Prisma Schema + Migration"), + "prisma", + "extracts 'prisma' from schema migration title", + ); + }); +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// inlineDecisionsFromDb cascade: R005 implementation +// ═══════════════════════════════════════════════════════════════════════════ + +describe("inlineDecisionsFromDb: cascade fallback (R005)", () => { + beforeEach(() => { + openDatabase(':memory:'); + }); + + afterEach(() => { + closeDatabase(); + }); + + test("cascade: scoped query returns scoped decisions when they exist", () => { + // Insert decisions with different scopes + insertDecision({ + id: 'D001', when_context: 'M001/S01', scope: 'auth', + decision: 'use JWT', choice: 'JWT', rationale: 'standard', + revisable: 'yes', made_by: 'agent', superseded_by: null, + }); + insertDecision({ + id: 'D002', when_context: 'M001/S02', scope: 'database', + decision: 'use PostgreSQL', choice: 'PostgreSQL', rationale: 'relational', + revisable: 'yes', made_by: 'agent', superseded_by: null, + }); + insertDecision({ + id: 'D003', when_context: 'M001/S01', scope: 'architecture', + decision: 'use microservices', choice: 'microservices', rationale: 'scalable', + revisable: 'yes', made_by: 'agent', superseded_by: null, + }); + + // Query with scope 'auth' should return D001 only + const authDecisions = queryDecisions({ milestoneId: 'M001', scope: 'auth' }); + assert.strictEqual(authDecisions.length, 1, 'scoped query returns 1 decision'); + assert.strictEqual(authDecisions[0]?.id, 'D001', 'returns D001 for auth scope'); + + // Query with scope 'database' should return D002 only + const dbDecisions = queryDecisions({ milestoneId: 'M001', scope: 'database' }); + assert.strictEqual(dbDecisions.length, 1, 'scoped query returns 1 decision'); + assert.strictEqual(dbDecisions[0]?.id, 'D002', 'returns D002 for database scope'); + }); + + test("cascade: milestone-only fallback when scoped query returns empty", () => { + // Insert decisions for M001 with generic scope (e.g. 'architecture') + insertDecision({ + id: 'D001', when_context: 'M001/S01', scope: 'architecture', + decision: 'use microservices', choice: 'microservices', rationale: 'scalable', + revisable: 'yes', made_by: 'agent', superseded_by: null, + }); + insertDecision({ + id: 'D002', when_context: 'M001/S02', scope: 'performance', + decision: 'use caching', choice: 'Redis', rationale: 'fast', + revisable: 'yes', made_by: 'agent', superseded_by: null, + }); + + // Query with scope 'auth' (no decisions with this scope) should return empty + const authDecisions = queryDecisions({ milestoneId: 'M001', scope: 'auth' }); + assert.strictEqual(authDecisions.length, 0, 'scoped query for auth returns empty'); + + // Simulate cascade: fallback to milestone-only query + const milestoneDecisions = queryDecisions({ milestoneId: 'M001' }); + assert.strictEqual(milestoneDecisions.length, 2, 'milestone-only query returns 2 decisions'); + const ids = milestoneDecisions.map(d => d.id).sort(); + assert.deepStrictEqual(ids, ['D001', 'D002'], 'milestone fallback returns all M001 decisions'); + }); + + test("cascade: returns null when both scoped and milestone queries are empty", () => { + // Insert decisions only for M002 + insertDecision({ + id: 'D001', when_context: 'M002/S01', scope: 'auth', + decision: 'use OAuth', choice: 'OAuth2', rationale: 'standard', + revisable: 'yes', made_by: 'agent', superseded_by: null, + }); + + // Query M001 with scope should return empty (no M001 decisions at all) + const scopedDecisions = queryDecisions({ milestoneId: 'M001', scope: 'auth' }); + assert.strictEqual(scopedDecisions.length, 0, 'scoped query returns empty'); + + // Fallback to milestone-only should also return empty (no M001 decisions) + const milestoneDecisions = queryDecisions({ milestoneId: 'M001' }); + assert.strictEqual(milestoneDecisions.length, 0, 'milestone-only query returns empty'); + + // This scenario would result in null from inlineDecisionsFromDb + // (we can't directly test inlineDecisionsFromDb here without mocking fs) + }); + + test("cascade: demonstrates the full cascade behavior", () => { + // This test demonstrates the cascade logic that inlineDecisionsFromDb implements: + // 1. First try { milestoneId: 'M001', scope: 'payment' } → empty + // 2. Then try { milestoneId: 'M001' } → gets D001, D002 + // 3. Return the milestone-level decisions + + // Setup: decisions exist at milestone level but not for 'payment' scope + insertDecision({ + id: 'D001', when_context: 'M001/S01', scope: 'architecture', + decision: 'use REST', choice: 'REST API', rationale: 'standard', + revisable: 'yes', made_by: 'agent', superseded_by: null, + }); + insertDecision({ + id: 'D002', when_context: 'M001/S02', scope: 'security', + decision: 'use HTTPS', choice: 'TLS 1.3', rationale: 'secure', + revisable: 'yes', made_by: 'agent', superseded_by: null, + }); + + // Step 1: Query with scope 'payment' (no matches) + const paymentDecisions = queryDecisions({ milestoneId: 'M001', scope: 'payment' }); + assert.strictEqual(paymentDecisions.length, 0, 'payment scope query returns empty'); + + // Step 2: Since scope was provided but returned empty, cascade to milestone-only + const milestoneDecisions = queryDecisions({ milestoneId: 'M001' }); + assert.strictEqual(milestoneDecisions.length, 2, 'milestone fallback returns 2 decisions'); + + // Step 3: Format and verify content + const formatted = formatDecisionsForPrompt(milestoneDecisions); + assert.match(formatted, /D001/, 'formatted output includes D001'); + assert.match(formatted, /D002/, 'formatted output includes D002'); + assert.match(formatted, /architecture/, 'formatted output includes architecture scope'); + assert.match(formatted, /security/, 'formatted output includes security scope'); + }); +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// Integration: scope derivation feeds into cascade +// ═══════════════════════════════════════════════════════════════════════════ + +describe("integration: scope derivation with cascade", () => { + beforeEach(() => { + openDatabase(':memory:'); + }); + + afterEach(() => { + closeDatabase(); + }); + + test("derived scope finds matching decisions when they exist", () => { + // Insert decisions with 'auth' scope + insertDecision({ + id: 'D001', when_context: 'M001/S01', scope: 'auth', + decision: 'use JWT', choice: 'JWT tokens', rationale: 'stateless', + revisable: 'yes', made_by: 'agent', superseded_by: null, + }); + + // Derive scope from slice title + const derivedScope = deriveSliceScope("Auth Middleware & Protected Routes"); + assert.strictEqual(derivedScope, 'auth', 'derives auth scope from title'); + + // Query with derived scope should find the decision + const decisions = queryDecisions({ milestoneId: 'M001', scope: derivedScope }); + assert.strictEqual(decisions.length, 1, 'scoped query finds matching decision'); + assert.strictEqual(decisions[0]?.id, 'D001', 'finds the auth decision'); + }); + + test("generic title triggers milestone-level fallback", () => { + // Insert decisions with various scopes + insertDecision({ + id: 'D001', when_context: 'M001/S01', scope: 'architecture', + decision: 'use monolith', choice: 'monolith', rationale: 'simple', + revisable: 'yes', made_by: 'agent', superseded_by: null, + }); + insertDecision({ + id: 'D002', when_context: 'M001/S02', scope: 'tooling', + decision: 'use TypeScript', choice: 'TypeScript', rationale: 'type safety', + revisable: 'yes', made_by: 'agent', superseded_by: null, + }); + + // Derive scope from generic slice title + const derivedScope = deriveSliceScope("Integration Testing"); + assert.strictEqual(derivedScope, undefined, 'generic title returns undefined scope'); + + // Without a scope, query returns all milestone decisions + const decisions = queryDecisions({ milestoneId: 'M001', scope: derivedScope }); + assert.strictEqual(decisions.length, 2, 'no scope filter returns all decisions'); + }); +}); diff --git a/src/resources/extensions/gsd/tests/measurement.test.ts b/src/resources/extensions/gsd/tests/measurement.test.ts new file mode 100644 index 000000000..25785d10f --- /dev/null +++ b/src/resources/extensions/gsd/tests/measurement.test.ts @@ -0,0 +1,531 @@ +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { + queryKnowledge, + formatRoadmapExcerpt, +} from '../context-store.ts'; + +// ═══════════════════════════════════════════════════════════════════════════ +// measurement.test.ts — Verify ≥40% context reduction from scoped injection +// +// Tests queryKnowledge() and formatRoadmapExcerpt() with realistic synthetic +// fixtures to confirm the context reduction target is met. +// ═══════════════════════════════════════════════════════════════════════════ + +// ─── Synthetic KNOWLEDGE.md Fixture (~8KB, 9 H2 sections) ────────────────── + +const syntheticKnowledge = `# Project Knowledge Base + +## Database Patterns +SQLite is the primary persistence layer, using WAL mode for concurrent reads. +All queries use prepared statements for SQL injection prevention. +Connection pooling is handled by better-sqlite3's synchronous API. +Schema migrations are versioned and applied at startup. + +Example patterns: +- Use transactions for multi-statement operations +- Prefer RETURNING clause for insert/update +- Index foreign keys for join performance +- Use CHECK constraints for data validation + +Performance considerations: +- WAL checkpoint every 1000 writes +- Vacuum on shutdown for space reclamation +- Page size 4096 for SSD optimization + +Database schema evolution: +- Migrations stored in migrations/ directory +- Each migration has up/down scripts +- Version table tracks applied migrations +- Rollback supported for last N migrations + +Connection management: +- Single connection for write operations +- Read connections pooled for concurrency +- Connection timeout set to 5 seconds +- Busy timeout handles lock contention + +Query patterns: +- Use prepared statements for parameterization +- Batch inserts via INSERT ... VALUES syntax +- Upserts via INSERT OR REPLACE +- Pagination via LIMIT/OFFSET or cursor + +## API Design Principles +REST endpoints follow OpenAPI 3.0 specification. +Versioned paths use /v1/resource pattern. +Authentication uses Bearer tokens in Authorization header. +Rate limiting applies per-client with sliding window algorithm. + +Response formats: +- Success: { data: T, meta?: { pagination } } +- Error: { error: { code, message, details? } } +- Pagination: cursor-based for large collections + +Content negotiation: +- Accept: application/json (default) +- Accept: text/plain (for CLI consumers) +- Accept: text/event-stream (for SSE endpoints) + +API versioning strategy: +- Major versions in URL path (/v1, /v2) +- Minor versions via Accept-Version header +- Deprecation warnings in response headers +- 12-month sunset period for old versions + +Endpoint naming conventions: +- Nouns for resources (users, projects) +- Verbs only for non-CRUD actions (login, export) +- Plural form for collections +- Singular for singletons (me, config) + +HTTP method semantics: +- GET: read-only, cacheable +- POST: create or non-idempotent action +- PUT: full replacement +- PATCH: partial update +- DELETE: remove resource + +## Testing Strategy +Unit tests use node:test with strict assertions. +Integration tests mock external services via msw. +E2E tests use Playwright for browser automation. +Test coverage target is 80% line coverage. + +Test organization: +- Unit tests adjacent to source files (*.test.ts) +- Integration tests in __tests__/integration/ +- E2E tests in e2e/ directory +- Fixtures in __fixtures__/ subdirectories + +Mocking guidelines: +- Prefer dependency injection over global mocks +- Use vi.mock() sparingly, only for ES module boundaries +- Reset mocks in afterEach hooks + +Test data management: +- Factories generate realistic test data +- Seeds populate database for integration tests +- Snapshots capture expected output +- Golden files for complex comparisons + +Assertion patterns: +- Use strict equality for primitives +- Deep equality for objects/arrays +- Regex matching for dynamic content +- Snapshot testing for UI components + +Test isolation: +- Each test gets fresh database state +- Environment variables reset between tests +- File system operations use temp directories +- Network calls intercepted by mock server + +## Error Handling +Errors are typed using discriminated unions. +Application errors extend BaseError class. +HTTP errors map to standard status codes. +Unhandled rejections trigger graceful shutdown. + +Error codes follow domain prefixes: +- AUTH_xxx: Authentication/authorization errors +- DB_xxx: Database operation failures +- NET_xxx: Network/external service errors +- VAL_xxx: Validation errors + +Logging integration: +- Error instances auto-serialize to JSON +- Stack traces included in development +- Correlation IDs propagate through request chain + +Error recovery strategies: +- Retry with exponential backoff for transient errors +- Circuit breaker for external service failures +- Fallback values for non-critical operations +- Graceful degradation for partial failures + +User-facing error messages: +- Generic messages for security-sensitive errors +- Actionable guidance for recoverable errors +- Reference codes for support escalation +- Localized messages via i18n + +Error boundary patterns: +- Component-level boundaries in UI +- Route-level error handlers in API +- Global unhandled rejection handlers +- Process-level crash recovery + +## Observability Patterns +Structured logging uses pino with JSON output. +Metrics collected via OpenTelemetry SDK. +Traces propagate context through async boundaries. +Health checks exposed at /health and /ready endpoints. + +Log levels: +- ERROR: Unrecoverable failures +- WARN: Degraded operation +- INFO: Significant state changes +- DEBUG: Detailed diagnostic data + +Metric types: +- Counters for request counts +- Histograms for latency distribution +- Gauges for resource utilization + +Trace context propagation: +- W3C Trace Context headers +- Baggage for cross-service metadata +- Span attributes for searchability +- Events for significant moments + +Dashboard design: +- SLO dashboards for reliability +- Request flow visualization +- Error rate trends +- Resource saturation alerts + +Alerting strategy: +- Page for customer-impacting issues +- Ticket for degraded performance +- Notification for capacity planning +- Silence during maintenance windows + +## Security Guidelines +Secrets never appear in logs or error messages. +Environment variables validated at startup. +CORS configured per-environment whitelist. +CSP headers enforced for web responses. + +Input validation: +- Zod schemas for request body parsing +- Path parameters validated against patterns +- Query parameters have default/max values + +Output encoding: +- HTML entities escaped in templates +- JSON stringification for API responses +- URL encoding for redirect targets + +Authentication patterns: +- JWT tokens with short expiry +- Refresh token rotation +- Session invalidation on logout +- Multi-factor authentication support + +Authorization model: +- Role-based access control (RBAC) +- Resource-level permissions +- Attribute-based policies (ABAC) +- Principle of least privilege + +Secure communication: +- TLS 1.3 minimum +- Certificate pinning for mobile +- HSTS preload list +- Certificate transparency logging + +## Performance Optimization +Critical paths target sub-10ms latency. +Database queries use covering indexes. +Response compression enabled for > 1KB bodies. +Static assets served with immutable caching. + +Caching strategy: +- Redis for session data +- In-memory LRU for hot paths +- CDN for static assets +- Stale-while-revalidate for API responses + +Memory management: +- Stream large payloads instead of buffering +- Weak references for disposable caches +- Manual GC hints for batch operations + +Query optimization: +- Explain plans for complex queries +- Index usage analysis +- Query result caching +- Connection pooling tuning + +Frontend performance: +- Code splitting for lazy loading +- Image optimization and lazy loading +- Critical CSS inlining +- Prefetching for likely navigations + +Backend performance: +- Async I/O for non-blocking operations +- Worker threads for CPU-bound tasks +- Connection keep-alive +- Response streaming + +## Deployment Architecture +Containers built with multi-stage Dockerfiles. +Kubernetes manifests in deploy/ directory. +Horizontal pod autoscaling on CPU/memory. +Rolling updates with zero-downtime. + +Environment hierarchy: +- development: local Docker Compose +- staging: shared k8s namespace +- production: isolated k8s cluster + +Configuration: +- ConfigMaps for non-sensitive config +- Secrets for credentials +- Environment-specific overlays via Kustomize + +Container best practices: +- Non-root user in container +- Read-only filesystem where possible +- Resource limits and requests +- Liveness and readiness probes + +Service mesh integration: +- Istio for traffic management +- mTLS for service-to-service auth +- Retry and timeout policies +- Circuit breaking configuration + +Disaster recovery: +- Database replication across zones +- Point-in-time recovery capability +- Regular backup verification +- Documented runbooks + +## Development Workflow +Feature branches follow conventional commits. +PRs require CI pass and code review. +Main branch deploys to staging automatically. +Release tags trigger production deployment. + +CI pipeline stages: +1. Install dependencies +2. Lint and type check +3. Unit tests with coverage +4. Build artifacts +5. Integration tests +6. Security scan + +Local development: +- pnpm for package management +- Turborepo for monorepo orchestration +- Docker Compose for service dependencies + +Code review guidelines: +- Focus on correctness and clarity +- Security-sensitive changes require security review +- Performance-critical paths need benchmarks +- Breaking changes need migration guide + +Branch strategy: +- main: production-ready code +- develop: integration branch (optional) +- feature/*: new functionality +- fix/*: bug fixes +- release/*: release preparation + +Documentation requirements: +- README for project overview +- API docs auto-generated from OpenAPI +- Architecture decision records (ADRs) +- Runbooks for operational procedures +`; + +// ─── Synthetic Roadmap Fixture (~1KB, 4 slices) ──────────────────────────── + +const syntheticRoadmap = `# M005: Tiered Context Injection + +## Vision +Refactor prompt builders to inject relevance-scoped context instead of full files. +This reduces token consumption and improves agent focus on relevant information. + +## Success Criteria +- [ ] 40% reduction in injected context size +- [ ] No regression in agent task completion rate +- [ ] Measurable test confirms reduction target + +## Slice Overview +| ID | Slice | Risk | Depends | Done | After this | +|----|-------|------|---------|------|------------| +| S01 | Scope existing DB queries | low | — | ✅ | planSlice and researchSlice use milestone+slice filters for decisions/requirements. | +| S02 | KNOWLEDGE scoping + roadmap excerpt | medium | S01 | ⬜ | KNOWLEDGE sections filtered by keywords. Roadmap injected as excerpt. | +| S03 | Measurement test suite | low | S02 | ⬜ | Automated tests confirm 40% reduction vs baseline. | +| S04 | Documentation and rollout | low | S03 | ⬜ | Updated docs. Feature flag for gradual rollout. | + +## Key Risks +1. Keyword extraction may miss relevant sections — mitigate with fallback to full content +2. Excerpt parsing fragile to roadmap format changes — mitigate with graceful degradation + +## Definition of Done +- [ ] All slices complete with passing verification +- [ ] Measurement tests in CI +- [ ] No increase in prompt build latency +`; + +// ═══════════════════════════════════════════════════════════════════════════ +// Measurement Tests +// ═══════════════════════════════════════════════════════════════════════════ + +describe("measurement: context reduction verification", () => { + test("synthetic KNOWLEDGE fixture is ~8KB as specified", () => { + const sizeKB = syntheticKnowledge.length / 1024; + assert.ok( + sizeKB >= 7 && sizeKB <= 10, + `KNOWLEDGE fixture should be ~8KB, got ${sizeKB.toFixed(2)}KB` + ); + }); + + test("synthetic KNOWLEDGE has 9 H2 sections", () => { + const h2Count = (syntheticKnowledge.match(/^## /gm) || []).length; + assert.strictEqual(h2Count, 9, `KNOWLEDGE fixture should have 9 H2 sections, got ${h2Count}`); + }); + + test("queryKnowledge achieves ≥40% reduction with targeted keywords", async () => { + // Keywords targeting 2 sections: "Database Patterns" and "Testing Strategy" + const keywords = ['database', 'testing']; + + const scopedResult = await queryKnowledge(syntheticKnowledge, keywords); + + const fullSize = syntheticKnowledge.length; + const scopedSize = scopedResult.length; + const reductionPct = ((fullSize - scopedSize) / fullSize) * 100; + + // Verify we got matching sections + assert.match(scopedResult, /## Database Patterns/, 'should include Database section'); + assert.match(scopedResult, /## Testing Strategy/, 'should include Testing section'); + + // Verify we excluded other sections + assert.ok(!scopedResult.includes('## API Design'), 'should exclude API section'); + assert.ok(!scopedResult.includes('## Observability'), 'should exclude Observability section'); + assert.ok(!scopedResult.includes('## Deployment'), 'should exclude Deployment section'); + + // Verify ≥40% reduction (2/9 sections = ~78% reduction expected) + assert.ok( + reductionPct >= 40, + `queryKnowledge should achieve ≥40% reduction, got ${reductionPct.toFixed(1)}% (${scopedSize} chars vs ${fullSize} chars)` + ); + + console.log(` → queryKnowledge: ${reductionPct.toFixed(1)}% reduction (${scopedSize} → ${fullSize} chars)`); + }); + + test("queryKnowledge with single keyword achieves ≥40% reduction", async () => { + // Single keyword targeting 1 section + const keywords = ['security']; + + const scopedResult = await queryKnowledge(syntheticKnowledge, keywords); + + const fullSize = syntheticKnowledge.length; + const scopedSize = scopedResult.length; + const reductionPct = ((fullSize - scopedSize) / fullSize) * 100; + + // Verify we got matching section + assert.match(scopedResult, /## Security Guidelines/, 'should include Security section'); + + // Verify ≥40% reduction (1/9 sections = ~89% reduction expected) + assert.ok( + reductionPct >= 40, + `single keyword should achieve ≥40% reduction, got ${reductionPct.toFixed(1)}%` + ); + }); + + test("formatRoadmapExcerpt achieves ≥40% reduction", () => { + const sliceId = 'S02'; + + const excerptResult = formatRoadmapExcerpt(syntheticRoadmap, sliceId, '.gsd/milestones/M005/M005-ROADMAP.md'); + + const fullSize = syntheticRoadmap.length; + const excerptSize = excerptResult.length; + const reductionPct = ((fullSize - excerptSize) / fullSize) * 100; + + // Verify excerpt contains required elements + assert.match(excerptResult, /\| ID \| Slice \|/, 'should have table header'); + assert.match(excerptResult, /\| S01 \|/, 'should have predecessor S01'); + assert.match(excerptResult, /\| S02 \|/, 'should have target S02'); + assert.match(excerptResult, /See full roadmap:/, 'should have reference directive'); + + // Verify we excluded other slices + assert.ok(!excerptResult.includes('| S03 |'), 'should exclude S03'); + assert.ok(!excerptResult.includes('| S04 |'), 'should exclude S04'); + + // Verify ≥40% reduction (2 rows + overhead vs full roadmap = significant reduction) + assert.ok( + reductionPct >= 40, + `formatRoadmapExcerpt should achieve ≥40% reduction, got ${reductionPct.toFixed(1)}% (${excerptSize} chars vs ${fullSize} chars)` + ); + + console.log(` → formatRoadmapExcerpt: ${reductionPct.toFixed(1)}% reduction (${excerptSize} → ${fullSize} chars)`); + }); + + test("combined KNOWLEDGE + roadmap reduction exceeds 40%", async () => { + // Simulate what happens in buildPlanSlicePrompt + const keywords = ['database', 'testing']; + + const scopedKnowledge = await queryKnowledge(syntheticKnowledge, keywords); + const scopedRoadmap = formatRoadmapExcerpt(syntheticRoadmap, 'S02'); + + const fullKnowledgeSize = syntheticKnowledge.length; + const fullRoadmapSize = syntheticRoadmap.length; + const fullTotal = fullKnowledgeSize + fullRoadmapSize; + + const scopedKnowledgeSize = scopedKnowledge.length; + const scopedRoadmapSize = scopedRoadmap.length; + const scopedTotal = scopedKnowledgeSize + scopedRoadmapSize; + + const combinedReductionPct = ((fullTotal - scopedTotal) / fullTotal) * 100; + + // Combined reduction should easily exceed 40% + assert.ok( + combinedReductionPct >= 40, + `combined reduction should be ≥40%, got ${combinedReductionPct.toFixed(1)}%` + ); + + console.log(` → Combined: ${combinedReductionPct.toFixed(1)}% reduction`); + console.log(` - KNOWLEDGE: ${fullKnowledgeSize} → ${scopedKnowledgeSize} chars`); + console.log(` - Roadmap: ${fullRoadmapSize} → ${scopedRoadmapSize} chars`); + console.log(` - Total: ${fullTotal} → ${scopedTotal} chars`); + }); +}); + +describe("measurement: edge cases maintain reduction target", () => { + test("three keywords still achieves ≥40% reduction", async () => { + // Even with 3 matching sections (3/9 = 33%), we should hit target + const keywords = ['database', 'api', 'security']; + + const scopedResult = await queryKnowledge(syntheticKnowledge, keywords); + + const fullSize = syntheticKnowledge.length; + const scopedSize = scopedResult.length; + const reductionPct = ((fullSize - scopedSize) / fullSize) * 100; + + // Verify matches (3 sections) + assert.match(scopedResult, /## Database Patterns/, 'should include Database'); + assert.match(scopedResult, /## API Design/, 'should include API'); + assert.match(scopedResult, /## Security Guidelines/, 'should include Security'); + + // With 3/9 sections, reduction should be ~67% + assert.ok( + reductionPct >= 40, + `3 keywords should still achieve ≥40% reduction, got ${reductionPct.toFixed(1)}%` + ); + }); + + test("excerpt for S01 (no dependencies) achieves ≥40% reduction", () => { + const excerptResult = formatRoadmapExcerpt(syntheticRoadmap, 'S01'); + + const fullSize = syntheticRoadmap.length; + const excerptSize = excerptResult.length; + const reductionPct = ((fullSize - excerptSize) / fullSize) * 100; + + // S01 has no predecessor, so just 1 row + header + reference + assert.match(excerptResult, /\| S01 \|/, 'should have S01'); + assert.ok(!excerptResult.includes('| S02 |'), 'should not have S02'); + + // Single row should still achieve significant reduction + assert.ok( + reductionPct >= 40, + `S01 excerpt should achieve ≥40% reduction, got ${reductionPct.toFixed(1)}%` + ); + }); +});