feat(M005): Tiered Context Injection - relevance-scoped context with 65%+ reduction

- Added milestone-scoped queryDecisions/queryRequirements with D020 fallback cascade
- Added queryKnowledge() for keyword-based KNOWLEDGE.md section filtering
- Added formatRoadmapExcerpt() for minimal roadmap table extraction
- Wired inlineKnowledgeScoped() and inlineRoadmapExcerpt() into slice prompts
- 39 new tests (31 context-store + 8 measurement)
- Measured 65.7% combined context reduction (exceeds 40% target)
This commit is contained in:
OfficialDelta 2026-04-07 22:44:09 -04:00
parent 0dd7c31213
commit 4214252eaa
4 changed files with 986 additions and 18 deletions

View file

@ -261,7 +261,8 @@ export async function inlineGsdRootFile(
/**
* Inline decisions with optional milestone scoping from the DB.
* Falls back to filesystem via inlineGsdRootFile when DB unavailable or empty.
* Falls back to filesystem via inlineGsdRootFile only when DB is unavailable.
* When DB is available but cascade returns empty, returns null (empty is intentional per D020).
*/
export async function inlineDecisionsFromDb(
base: string, milestoneId?: string, scope?: string, level?: InlineLevel,
@ -279,26 +280,29 @@ export async function inlineDecisionsFromDb(
: formatDecisionsForPrompt(decisions);
return `### Decisions\nSource: \`.gsd/DECISIONS.md\`\n\n${formatted}`;
}
// DB available but empty result — intentional per D020, don't fall back to file
return null;
}
} catch (err) {
logWarning("prompt", `inlineDecisionsFromDb failed: ${err instanceof Error ? err.message : String(err)}`);
}
// DB unavailable — fall back to filesystem
return inlineGsdRootFile(base, "decisions.md", "Decisions");
}
/**
* Inline requirements with optional slice scoping from the DB.
* Inline requirements with optional milestone and slice scoping from the DB.
* Falls back to filesystem via inlineGsdRootFile when DB unavailable or empty.
*/
export async function inlineRequirementsFromDb(
base: string, sliceId?: string, level?: InlineLevel,
base: string, milestoneId?: string, sliceId?: string, level?: InlineLevel,
): Promise<string | null> {
const inlineLevel = level ?? resolveInlineLevel();
try {
const { isDbAvailable } = await import("./gsd-db.js");
if (isDbAvailable()) {
const { queryRequirements, formatRequirementsForPrompt } = await import("./context-store.js");
const requirements = queryRequirements({ sliceId });
const requirements = queryRequirements({ milestoneId, sliceId });
if (requirements.length > 0) {
// Use compact format for non-full levels to save ~40% tokens
const formatted = inlineLevel !== "full"
@ -335,6 +339,73 @@ export async function inlineProjectFromDb(
return inlineGsdRootFile(base, "project.md", "Project");
}
// ─── Stopwords for keyword extraction ─────────────────────────────────────
const STOPWORDS = new Set(['of', 'the', 'and', 'a', 'for', '+', '-', 'to', 'in', 'on', 'with', 'is', 'as', 'by']);
/**
* Extract keywords from a slice title for scoped knowledge queries.
* Splits on whitespace, filters stopwords, lowercases.
* Example: 'KNOWLEDGE scoping + roadmap excerpt' ['knowledge', 'scoping', 'roadmap', 'excerpt']
*/
function extractKeywords(title: string): string[] {
return title
.split(/\s+/)
.map(w => w.toLowerCase().replace(/[^a-z0-9]/g, ''))
.filter(w => w.length > 0 && !STOPWORDS.has(w));
}
/**
* Inline scoped KNOWLEDGE.md content based on keywords from slice title.
* Reads KNOWLEDGE.md, filters to sections matching keywords, formats with header.
* Returns null if no KNOWLEDGE.md exists or no sections match.
*/
export async function inlineKnowledgeScoped(
base: string,
keywords: string[],
): Promise<string | null> {
const knowledgePath = resolveGsdRootFile(base, "KNOWLEDGE");
if (!existsSync(knowledgePath)) return null;
const content = await loadFile(knowledgePath);
if (!content) return null;
// Import queryKnowledge from context-store
const { queryKnowledge } = await import("./context-store.js");
const scoped = await queryKnowledge(content, keywords);
// Return null if no sections matched (empty string from queryKnowledge)
if (!scoped) return null;
return `### Project Knowledge (scoped)\nSource: \`${relGsdRootFile("KNOWLEDGE")}\`\n\n${scoped.trim()}`;
}
/**
* Inline a roadmap excerpt for a specific slice.
* Reads full roadmap, extracts minimal excerpt with header + predecessor + target row.
* Returns null if roadmap doesn't exist or slice not found.
*/
export async function inlineRoadmapExcerpt(
base: string,
mid: string,
sid: string,
): Promise<string | null> {
const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
if (!roadmapPath || !existsSync(roadmapPath)) return null;
const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
const content = await loadFile(roadmapPath);
if (!content) return null;
// Import formatRoadmapExcerpt from context-store
const { formatRoadmapExcerpt } = await import("./context-store.js");
const excerpt = formatRoadmapExcerpt(content, sid, roadmapRel);
// Return null if slice not found in roadmap
if (!excerpt) return null;
return `### Milestone Roadmap (excerpt)\nSource: \`${roadmapRel}\`\n\n${excerpt}`;
}
// ─── Skill Activation & Discovery ─────────────────────────────────────────
function normalizeSkillReference(ref: string): string {
@ -880,7 +951,7 @@ export async function buildResearchMilestonePrompt(mid: string, midTitle: string
inlined.push(await inlineFile(contextPath, contextRel, "Milestone Context"));
const projectInline = await inlineProjectFromDb(base);
if (projectInline) inlined.push(projectInline);
const requirementsInline = await inlineRequirementsFromDb(base);
const requirementsInline = await inlineRequirementsFromDb(base, mid);
if (requirementsInline) inlined.push(requirementsInline);
const decisionsInline = await inlineDecisionsFromDb(base, mid);
if (decisionsInline) inlined.push(decisionsInline);
@ -930,7 +1001,7 @@ export async function buildPlanMilestonePrompt(mid: string, midTitle: string, ba
if (inlineLevel !== "minimal") {
const projectInline = await inlineProjectFromDb(base);
if (projectInline) inlined.push(projectInline);
const requirementsInline = await inlineRequirementsFromDb(base, undefined, inlineLevel);
const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel);
if (requirementsInline) inlined.push(requirementsInline);
const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel);
if (decisionsInline) inlined.push(decisionsInline);
@ -999,7 +1070,16 @@ export async function buildResearchSlicePrompt(
const sliceContextRel = relSliceFile(base, mid, sid, "CONTEXT");
const inlined: string[] = [];
inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
// Use roadmap excerpt instead of full roadmap for context reduction
const roadmapExcerptRS = await inlineRoadmapExcerpt(base, mid, sid);
if (roadmapExcerptRS) {
inlined.push(roadmapExcerptRS);
} else {
// Fall back to full roadmap if excerpt fails
inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
}
const contextInline = await inlineFileOptional(contextPath, contextRel, "Milestone Context");
if (contextInline) inlined.push(contextInline);
const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)");
@ -1008,10 +1088,14 @@ export async function buildResearchSlicePrompt(
if (researchInline) inlined.push(researchInline);
const decisionsInline = await inlineDecisionsFromDb(base, mid);
if (decisionsInline) inlined.push(decisionsInline);
const requirementsInline = await inlineRequirementsFromDb(base, sid);
const requirementsInline = await inlineRequirementsFromDb(base, mid, sid);
if (requirementsInline) inlined.push(requirementsInline);
const knowledgeInlineRS = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
// Use scoped knowledge based on slice title keywords
const keywords = extractKeywords(sTitle);
const knowledgeInlineRS = await inlineKnowledgeScoped(base, keywords);
if (knowledgeInlineRS) inlined.push(knowledgeInlineRS);
inlined.push(inlineTemplate("research", "Research"));
const depContent = await inlineDependencySummaries(mid, sid, base);
@ -1060,7 +1144,15 @@ export async function buildPlanSlicePrompt(
const researchSliceAnchor = readPhaseAnchor(base, mid, "research-slice");
if (researchSliceAnchor) inlined.push(formatAnchorForPrompt(researchSliceAnchor));
inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
// Use roadmap excerpt instead of full roadmap for context reduction
const roadmapExcerptPS = await inlineRoadmapExcerpt(base, mid, sid);
if (roadmapExcerptPS) {
inlined.push(roadmapExcerptPS);
} else {
// Fall back to full roadmap if excerpt fails
inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
}
const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)");
if (sliceCtxInline) inlined.push(sliceCtxInline);
const researchInline = await inlineFileOptional(researchPath, researchRel, "Slice Research");
@ -1068,11 +1160,15 @@ export async function buildPlanSlicePrompt(
if (inlineLevel !== "minimal") {
const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel);
if (decisionsInline) inlined.push(decisionsInline);
const requirementsInline = await inlineRequirementsFromDb(base, sid, inlineLevel);
const requirementsInline = await inlineRequirementsFromDb(base, mid, sid, inlineLevel);
if (requirementsInline) inlined.push(requirementsInline);
}
const knowledgeInlinePS = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
// Use scoped knowledge based on slice title keywords
const keywordsPS = extractKeywords(sTitle);
const knowledgeInlinePS = await inlineKnowledgeScoped(base, keywordsPS);
if (knowledgeInlinePS) inlined.push(knowledgeInlinePS);
inlined.push(inlineTemplate("plan", "Slice Plan"));
if (inlineLevel === "full") {
inlined.push(inlineTemplate("task-plan", "Task Plan"));
@ -1272,7 +1368,7 @@ export async function buildCompleteSlicePrompt(
if (sliceCtxInline) inlined.push(sliceCtxInline);
inlined.push(await inlineFile(slicePlanPath, slicePlanRel, "Slice Plan"));
if (inlineLevel !== "minimal") {
const requirementsInline = await inlineRequirementsFromDb(base, sid, inlineLevel);
const requirementsInline = await inlineRequirementsFromDb(base, mid, sid, inlineLevel);
if (requirementsInline) inlined.push(requirementsInline);
}
const knowledgeInlineCS = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
@ -1355,7 +1451,7 @@ export async function buildCompleteMilestonePrompt(
// Inline root GSD files (skip for minimal — completion can read these if needed)
if (inlineLevel !== "minimal") {
const requirementsInline = await inlineRequirementsFromDb(base, undefined, inlineLevel);
const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel);
if (requirementsInline) inlined.push(requirementsInline);
const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel);
if (decisionsInline) inlined.push(decisionsInline);
@ -1480,7 +1576,7 @@ export async function buildValidateMilestonePrompt(
// Inline root GSD files
if (inlineLevel !== "minimal") {
const requirementsInline = await inlineRequirementsFromDb(base, undefined, inlineLevel);
const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel);
if (requirementsInline) inlined.push(requirementsInline);
const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel);
if (decisionsInline) inlined.push(decisionsInline);
@ -1656,7 +1752,7 @@ export async function buildReassessRoadmapPrompt(
if (inlineLevel !== "minimal") {
const projectInline = await inlineProjectFromDb(base);
if (projectInline) inlined.push(projectInline);
const requirementsInline = await inlineRequirementsFromDb(base, undefined, inlineLevel);
const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel);
if (requirementsInline) inlined.push(requirementsInline);
const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel);
if (decisionsInline) inlined.push(decisionsInline);

View file

@ -15,6 +15,7 @@ export interface DecisionQueryOpts {
}
export interface RequirementQueryOpts {
milestoneId?: string;
sliceId?: string;
status?: string;
}
@ -67,7 +68,8 @@ export function queryDecisions(opts?: DecisionQueryOpts): Decision[] {
/**
* Query active (non-superseded) requirements with optional filters.
* - sliceId: filters where primary_owner LIKE '%sliceId%' OR supporting_slices LIKE '%sliceId%'
* - milestoneId: combined with sliceId for precise filtering (e.g. %M005/S01%)
* - sliceId: filters where primary_owner LIKE '%pattern%' OR supporting_slices LIKE '%pattern%'
* - status: filters where status = :status (exact match)
*
* Returns [] if DB is not available. Never throws.
@ -81,9 +83,19 @@ export function queryRequirements(opts?: RequirementQueryOpts): Requirement[] {
const clauses: string[] = ['superseded_by IS NULL'];
const params: Record<string, unknown> = {};
if (opts?.sliceId) {
// Combined milestone+slice filtering for precise scoping
if (opts?.milestoneId && opts?.sliceId) {
// Use combined pattern like %M005/S01% to avoid cross-milestone contamination
clauses.push('(primary_owner LIKE :combined_pattern OR supporting_slices LIKE :combined_pattern)');
params[':combined_pattern'] = `%${opts.milestoneId}/${opts.sliceId}%`;
} else if (opts?.sliceId) {
// Slice-only filtering (legacy behavior)
clauses.push('(primary_owner LIKE :slice_pattern OR supporting_slices LIKE :slice_pattern)');
params[':slice_pattern'] = `%${opts.sliceId}%`;
} else if (opts?.milestoneId) {
// Milestone-only filtering
clauses.push('(primary_owner LIKE :milestone_pattern OR supporting_slices LIKE :milestone_pattern)');
params[':milestone_pattern'] = `%${opts.milestoneId}%`;
}
if (opts?.status) {
@ -194,3 +206,156 @@ export function queryArtifact(path: string): string | null {
export function queryProject(): string | null {
return queryArtifact('PROJECT.md');
}
// ─── Knowledge Query ───────────────────────────────────────────────────────
/**
* Filter KNOWLEDGE.md sections by keyword matching.
* Uses H2 sections, matches keywords case-insensitively against:
* 1. Section header text
* 2. First paragraph of section content (up to first blank line or next heading)
*
* Per D020, returns empty string (not null) when no matches found.
* This signals "no relevant knowledge" vs "file not found".
*
* @param content - Full KNOWLEDGE.md content
* @param keywords - Keywords to match (case-insensitive)
* @returns Concatenated matching sections with H2 headers, or empty string
*/
export async function queryKnowledge(content: string, keywords: string[]): Promise<string> {
if (!content || keywords.length === 0) return '';
// Lazy import to avoid circular dependency
const { extractAllSections } = await import('./files.js');
const sections = extractAllSections(content, 2);
if (sections.size === 0) return '';
// Normalize keywords for case-insensitive matching
const normalizedKeywords = keywords.map(k => k.toLowerCase());
const matchingSections: string[] = [];
for (const [header, body] of sections) {
// Extract first paragraph: everything up to first blank line or next heading
const firstParagraph = body.split(/\n\s*\n|\n#/)[0] || '';
// Check if any keyword matches header or first paragraph
const headerLower = header.toLowerCase();
const paragraphLower = firstParagraph.toLowerCase();
const matches = normalizedKeywords.some(kw =>
headerLower.includes(kw) || paragraphLower.includes(kw)
);
if (matches) {
matchingSections.push(`## ${header}\n\n${body}`);
}
}
return matchingSections.join('\n\n');
}
// ─── Roadmap Excerpt Formatter ─────────────────────────────────────────────
/**
* Format a minimal roadmap excerpt for prompt injection.
* Parses the slice table from roadmap content, extracts:
* 1. Header row + separator
* 2. Predecessor row (if sliceId depends on one via the Depends column)
* 3. Target slice row
* 4. Reference directive pointing to full roadmap path
*
* Per D021, this minimizes injected content while preserving dependency awareness.
* Returns empty string if sliceId is not found in the table.
* Never throws.
*
* @param roadmapContent - Full content of the M###-ROADMAP.md file
* @param sliceId - Target slice ID (e.g. 'S02')
* @param roadmapPath - Optional path for reference directive (defaults to generic)
*/
export function formatRoadmapExcerpt(
roadmapContent: string,
sliceId: string,
roadmapPath = 'ROADMAP.md',
): string {
if (!roadmapContent || !sliceId) return '';
const lines = roadmapContent.split('\n');
// Find the slice table header: | ID | Slice | ... (case insensitive)
let headerIndex = -1;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (line && /^\s*\|\s*ID\s*\|\s*Slice\s*\|/i.test(line)) {
headerIndex = i;
break;
}
}
if (headerIndex === -1) return '';
// The separator should be the next line (|---|---|...)
const separatorIndex = headerIndex + 1;
if (separatorIndex >= lines.length) return '';
const headerLine = lines[headerIndex];
const separatorLine = lines[separatorIndex];
// Validate separator line looks like |---|---|... (may include : for alignment)
if (!separatorLine || !/^\s*\|[\s:\-|]+\|/.test(separatorLine)) return '';
// Parse table rows after separator
interface SliceRow {
line: string;
id: string;
depends: string;
}
const sliceRows: SliceRow[] = [];
for (let i = separatorIndex + 1; i < lines.length; i++) {
const line = lines[i];
if (!line || !line.trim().startsWith('|')) break; // End of table
// Parse row: | ID | Slice | Risk | Depends | Done | After this |
const cells = line.split('|').map(c => c.trim());
// cells[0] is empty (before first |), cells[1] is ID, etc.
if (cells.length < 5) continue;
const id = cells[1] || '';
const depends = cells[4] || ''; // Depends column (0-indexed: empty, ID, Slice, Risk, Depends, ...)
sliceRows.push({ line, id, depends });
}
// Find target slice row
const targetRow = sliceRows.find(r => r.id === sliceId);
if (!targetRow) return '';
// Find predecessor if target depends on one
// Depends column may contain: '—', 'S01', 'S01, S02', etc.
let predecessorRow: SliceRow | undefined;
const dependsRaw = targetRow.depends;
if (dependsRaw && dependsRaw !== '—' && dependsRaw !== '-') {
// Extract first dependency (e.g. 'S01' from 'S01, S02')
const depMatch = dependsRaw.match(/S\d+/);
if (depMatch) {
predecessorRow = sliceRows.find(r => r.id === depMatch[0]);
}
}
// Build excerpt
const excerptLines: string[] = [headerLine!, separatorLine!];
if (predecessorRow) {
excerptLines.push(predecessorRow.line);
}
excerptLines.push(targetRow.line);
// Add reference directive
excerptLines.push('');
excerptLines.push(`> See full roadmap: ${roadmapPath}`);
return excerptLines.join('\n');
}

View file

@ -15,6 +15,8 @@ import {
formatRequirementsForPrompt,
queryArtifact,
queryProject,
formatRoadmapExcerpt,
queryKnowledge,
} from '../context-store.ts';
// ═══════════════════════════════════════════════════════════════════════════
@ -452,3 +454,177 @@ describe("context-store: queryProject", () => {
assert.strictEqual(content, null, 'queryProject returns null when DB closed');
});
});
// ═══════════════════════════════════════════════════════════════════════════
// context-store: formatRoadmapExcerpt
// ═══════════════════════════════════════════════════════════════════════════
describe("context-store: formatRoadmapExcerpt", () => {
// Sample roadmap content matching actual M005-ROADMAP.md format
const sampleRoadmap = `# M005: Tiered Context Injection
## Vision
Refactor prompt builders to inject relevance-scoped context.
## Slice Overview
| ID | Slice | Risk | Depends | Done | After this |
|----|-------|------|---------|------|------------|
| S01 | Scope existing queries | low | | | planSlice prompt scoped. |
| S02 | KNOWLEDGE scoping | medium | S01 | | KNOWLEDGE sections filtered. |
| S03 | Measurement test | low | S02 | | 40% reduction confirmed. |
`;
test("S02 with S01 predecessor includes both rows", () => {
const result = formatRoadmapExcerpt(sampleRoadmap, 'S02', '.gsd/milestones/M005/M005-ROADMAP.md');
// Should have header
assert.match(result, /\| ID \| Slice \| Risk \| Depends \| Done \| After this \|/, 'has header row');
// Should have separator
assert.match(result, /\|----\|/, 'has separator row');
// Should have S01 predecessor
assert.match(result, /\| S01 \|/, 'has predecessor S01 row');
// Should have S02 target
assert.match(result, /\| S02 \|/, 'has target S02 row');
// Should have reference directive
assert.match(result, /See full roadmap:.*M005-ROADMAP\.md/, 'has reference directive');
// Should NOT have S03 (not relevant)
assert.ok(!result.includes('| S03 |'), 'does not include unrelated S03');
});
test("S01 with no predecessor includes only target row", () => {
const result = formatRoadmapExcerpt(sampleRoadmap, 'S01');
// Should have header + separator + S01 only
assert.match(result, /\| ID \| Slice \|/, 'has header row');
assert.match(result, /\| S01 \|/, 'has target S01 row');
// Should NOT have S02 or S03
assert.ok(!result.includes('| S02 |'), 'does not include S02');
assert.ok(!result.includes('| S03 |'), 'does not include S03');
// Should have reference
assert.match(result, /See full roadmap:/, 'has reference directive');
// Count rows: header + separator + S01 + blank + directive = 5 lines
const lines = result.split('\n');
assert.strictEqual(lines.length, 5, 'correct number of lines (no predecessor)');
});
test("missing slice returns empty string", () => {
const result = formatRoadmapExcerpt(sampleRoadmap, 'S99');
assert.strictEqual(result, '', 'missing slice returns empty string');
});
test("empty input returns empty string", () => {
assert.strictEqual(formatRoadmapExcerpt('', 'S01'), '', 'empty content returns empty');
assert.strictEqual(formatRoadmapExcerpt(sampleRoadmap, ''), '', 'empty sliceId returns empty');
});
test("handles table with various column formats", () => {
// Table with different spacing and content
const variantRoadmap = `# Milestone
| ID | Slice | Risk | Depends | Done | After this |
|:---|:------|:-----|:--------|:-----|:-----------|
| S01 | First slice title | low | | | First complete. |
| S02 | Second longer slice title here | medium | S01 | | Second working. |
`;
const result = formatRoadmapExcerpt(variantRoadmap, 'S02');
assert.match(result, /\| S01 \|/, 'has predecessor with different spacing');
assert.match(result, /\| S02 \|/, 'has target with different spacing');
assert.match(result, /Second longer slice title/, 'preserves full slice title');
});
test("handles multiple dependencies by using first one", () => {
const multiDepRoadmap = `| ID | Slice | Risk | Depends | Done | After this |
|----|-------|------|---------|------|------------|
| S01 | First | low | | | Done. |
| S02 | Second | low | | | Done. |
| S03 | Third | medium | S01, S02 | | Working. |
`;
const result = formatRoadmapExcerpt(multiDepRoadmap, 'S03');
// Should include S01 (first dependency) and S03
assert.match(result, /\| S01 \|/, 'has first dependency S01');
assert.match(result, /\| S03 \|/, 'has target S03');
// S02 is also a dependency but we only include the first one
// (This is intentional to keep excerpts minimal)
});
});
// ═══════════════════════════════════════════════════════════════════════════
// context-store: queryKnowledge
// ═══════════════════════════════════════════════════════════════════════════
describe("context-store: queryKnowledge", () => {
// Sample KNOWLEDGE.md content
const sampleKnowledge = `# Project Knowledge
## Database Patterns
SQLite is used with WAL mode for concurrent reads.
Always use prepared statements.
More database details here.
## API Design
REST endpoints follow OpenAPI spec.
Use versioned paths like /v1/resource.
## Testing Guidelines
Unit tests use node:test.
Integration tests mock external services.
`;
test("single keyword matches header", async () => {
const result = await queryKnowledge(sampleKnowledge, ['database']);
assert.match(result, /## Database Patterns/, 'includes matching section header');
assert.match(result, /SQLite is used with WAL mode/, 'includes section content');
// Should NOT include other sections
assert.ok(!result.includes('## API Design'), 'does not include non-matching API section');
assert.ok(!result.includes('## Testing Guidelines'), 'does not include non-matching Testing section');
});
test("multiple keywords match multiple sections", async () => {
const result = await queryKnowledge(sampleKnowledge, ['database', 'testing']);
assert.match(result, /## Database Patterns/, 'includes Database section');
assert.match(result, /## Testing Guidelines/, 'includes Testing section');
assert.ok(!result.includes('## API Design'), 'does not include API section');
});
test("no matches returns empty string", async () => {
const result = await queryKnowledge(sampleKnowledge, ['nonexistent']);
assert.strictEqual(result, '', 'no matches returns empty string per D020');
});
test("keyword in first paragraph matches", async () => {
const result = await queryKnowledge(sampleKnowledge, ['sqlite']);
// 'sqlite' appears in first paragraph of Database Patterns
assert.match(result, /## Database Patterns/, 'matches keyword in first paragraph');
assert.match(result, /SQLite is used/, 'includes the section with matching paragraph');
});
test("case-insensitive matching", async () => {
const result = await queryKnowledge(sampleKnowledge, ['DATABASE', 'API']);
assert.match(result, /## Database Patterns/, 'case-insensitive header match');
assert.match(result, /## API Design/, 'case-insensitive header match for API');
});
test("empty keywords returns empty string", async () => {
const result = await queryKnowledge(sampleKnowledge, []);
assert.strictEqual(result, '', 'empty keywords returns empty string');
});
test("empty content returns empty string", async () => {
const result = await queryKnowledge('', ['database']);
assert.strictEqual(result, '', 'empty content returns empty string');
});
});

View file

@ -0,0 +1,531 @@
import { describe, test } from "node:test";
import assert from "node:assert/strict";
import {
queryKnowledge,
formatRoadmapExcerpt,
} from '../context-store.ts';
// ═══════════════════════════════════════════════════════════════════════════
// measurement.test.ts — Verify ≥40% context reduction from scoped injection
//
// Tests queryKnowledge() and formatRoadmapExcerpt() with realistic synthetic
// fixtures to confirm the context reduction target is met.
// ═══════════════════════════════════════════════════════════════════════════
// ─── Synthetic KNOWLEDGE.md Fixture (~8KB, 9 H2 sections) ──────────────────
const syntheticKnowledge = `# Project Knowledge Base
## Database Patterns
SQLite is the primary persistence layer, using WAL mode for concurrent reads.
All queries use prepared statements for SQL injection prevention.
Connection pooling is handled by better-sqlite3's synchronous API.
Schema migrations are versioned and applied at startup.
Example patterns:
- Use transactions for multi-statement operations
- Prefer RETURNING clause for insert/update
- Index foreign keys for join performance
- Use CHECK constraints for data validation
Performance considerations:
- WAL checkpoint every 1000 writes
- Vacuum on shutdown for space reclamation
- Page size 4096 for SSD optimization
Database schema evolution:
- Migrations stored in migrations/ directory
- Each migration has up/down scripts
- Version table tracks applied migrations
- Rollback supported for last N migrations
Connection management:
- Single connection for write operations
- Read connections pooled for concurrency
- Connection timeout set to 5 seconds
- Busy timeout handles lock contention
Query patterns:
- Use prepared statements for parameterization
- Batch inserts via INSERT ... VALUES syntax
- Upserts via INSERT OR REPLACE
- Pagination via LIMIT/OFFSET or cursor
## API Design Principles
REST endpoints follow OpenAPI 3.0 specification.
Versioned paths use /v1/resource pattern.
Authentication uses Bearer tokens in Authorization header.
Rate limiting applies per-client with sliding window algorithm.
Response formats:
- Success: { data: T, meta?: { pagination } }
- Error: { error: { code, message, details? } }
- Pagination: cursor-based for large collections
Content negotiation:
- Accept: application/json (default)
- Accept: text/plain (for CLI consumers)
- Accept: text/event-stream (for SSE endpoints)
API versioning strategy:
- Major versions in URL path (/v1, /v2)
- Minor versions via Accept-Version header
- Deprecation warnings in response headers
- 12-month sunset period for old versions
Endpoint naming conventions:
- Nouns for resources (users, projects)
- Verbs only for non-CRUD actions (login, export)
- Plural form for collections
- Singular for singletons (me, config)
HTTP method semantics:
- GET: read-only, cacheable
- POST: create or non-idempotent action
- PUT: full replacement
- PATCH: partial update
- DELETE: remove resource
## Testing Strategy
Unit tests use node:test with strict assertions.
Integration tests mock external services via msw.
E2E tests use Playwright for browser automation.
Test coverage target is 80% line coverage.
Test organization:
- Unit tests adjacent to source files (*.test.ts)
- Integration tests in __tests__/integration/
- E2E tests in e2e/ directory
- Fixtures in __fixtures__/ subdirectories
Mocking guidelines:
- Prefer dependency injection over global mocks
- Use vi.mock() sparingly, only for ES module boundaries
- Reset mocks in afterEach hooks
Test data management:
- Factories generate realistic test data
- Seeds populate database for integration tests
- Snapshots capture expected output
- Golden files for complex comparisons
Assertion patterns:
- Use strict equality for primitives
- Deep equality for objects/arrays
- Regex matching for dynamic content
- Snapshot testing for UI components
Test isolation:
- Each test gets fresh database state
- Environment variables reset between tests
- File system operations use temp directories
- Network calls intercepted by mock server
## Error Handling
Errors are typed using discriminated unions.
Application errors extend BaseError class.
HTTP errors map to standard status codes.
Unhandled rejections trigger graceful shutdown.
Error codes follow domain prefixes:
- AUTH_xxx: Authentication/authorization errors
- DB_xxx: Database operation failures
- NET_xxx: Network/external service errors
- VAL_xxx: Validation errors
Logging integration:
- Error instances auto-serialize to JSON
- Stack traces included in development
- Correlation IDs propagate through request chain
Error recovery strategies:
- Retry with exponential backoff for transient errors
- Circuit breaker for external service failures
- Fallback values for non-critical operations
- Graceful degradation for partial failures
User-facing error messages:
- Generic messages for security-sensitive errors
- Actionable guidance for recoverable errors
- Reference codes for support escalation
- Localized messages via i18n
Error boundary patterns:
- Component-level boundaries in UI
- Route-level error handlers in API
- Global unhandled rejection handlers
- Process-level crash recovery
## Observability Patterns
Structured logging uses pino with JSON output.
Metrics collected via OpenTelemetry SDK.
Traces propagate context through async boundaries.
Health checks exposed at /health and /ready endpoints.
Log levels:
- ERROR: Unrecoverable failures
- WARN: Degraded operation
- INFO: Significant state changes
- DEBUG: Detailed diagnostic data
Metric types:
- Counters for request counts
- Histograms for latency distribution
- Gauges for resource utilization
Trace context propagation:
- W3C Trace Context headers
- Baggage for cross-service metadata
- Span attributes for searchability
- Events for significant moments
Dashboard design:
- SLO dashboards for reliability
- Request flow visualization
- Error rate trends
- Resource saturation alerts
Alerting strategy:
- Page for customer-impacting issues
- Ticket for degraded performance
- Notification for capacity planning
- Silence during maintenance windows
## Security Guidelines
Secrets never appear in logs or error messages.
Environment variables validated at startup.
CORS configured per-environment whitelist.
CSP headers enforced for web responses.
Input validation:
- Zod schemas for request body parsing
- Path parameters validated against patterns
- Query parameters have default/max values
Output encoding:
- HTML entities escaped in templates
- JSON stringification for API responses
- URL encoding for redirect targets
Authentication patterns:
- JWT tokens with short expiry
- Refresh token rotation
- Session invalidation on logout
- Multi-factor authentication support
Authorization model:
- Role-based access control (RBAC)
- Resource-level permissions
- Attribute-based policies (ABAC)
- Principle of least privilege
Secure communication:
- TLS 1.3 minimum
- Certificate pinning for mobile
- HSTS preload list
- Certificate transparency logging
## Performance Optimization
Critical paths target sub-10ms latency.
Database queries use covering indexes.
Response compression enabled for > 1KB bodies.
Static assets served with immutable caching.
Caching strategy:
- Redis for session data
- In-memory LRU for hot paths
- CDN for static assets
- Stale-while-revalidate for API responses
Memory management:
- Stream large payloads instead of buffering
- Weak references for disposable caches
- Manual GC hints for batch operations
Query optimization:
- Explain plans for complex queries
- Index usage analysis
- Query result caching
- Connection pooling tuning
Frontend performance:
- Code splitting for lazy loading
- Image optimization and lazy loading
- Critical CSS inlining
- Prefetching for likely navigations
Backend performance:
- Async I/O for non-blocking operations
- Worker threads for CPU-bound tasks
- Connection keep-alive
- Response streaming
## Deployment Architecture
Containers built with multi-stage Dockerfiles.
Kubernetes manifests in deploy/ directory.
Horizontal pod autoscaling on CPU/memory.
Rolling updates with zero-downtime.
Environment hierarchy:
- development: local Docker Compose
- staging: shared k8s namespace
- production: isolated k8s cluster
Configuration:
- ConfigMaps for non-sensitive config
- Secrets for credentials
- Environment-specific overlays via Kustomize
Container best practices:
- Non-root user in container
- Read-only filesystem where possible
- Resource limits and requests
- Liveness and readiness probes
Service mesh integration:
- Istio for traffic management
- mTLS for service-to-service auth
- Retry and timeout policies
- Circuit breaking configuration
Disaster recovery:
- Database replication across zones
- Point-in-time recovery capability
- Regular backup verification
- Documented runbooks
## Development Workflow
Feature branches follow conventional commits.
PRs require CI pass and code review.
Main branch deploys to staging automatically.
Release tags trigger production deployment.
CI pipeline stages:
1. Install dependencies
2. Lint and type check
3. Unit tests with coverage
4. Build artifacts
5. Integration tests
6. Security scan
Local development:
- pnpm for package management
- Turborepo for monorepo orchestration
- Docker Compose for service dependencies
Code review guidelines:
- Focus on correctness and clarity
- Security-sensitive changes require security review
- Performance-critical paths need benchmarks
- Breaking changes need migration guide
Branch strategy:
- main: production-ready code
- develop: integration branch (optional)
- feature/*: new functionality
- fix/*: bug fixes
- release/*: release preparation
Documentation requirements:
- README for project overview
- API docs auto-generated from OpenAPI
- Architecture decision records (ADRs)
- Runbooks for operational procedures
`;
// ─── Synthetic Roadmap Fixture (~1KB, 4 slices) ────────────────────────────
const syntheticRoadmap = `# M005: Tiered Context Injection
## Vision
Refactor prompt builders to inject relevance-scoped context instead of full files.
This reduces token consumption and improves agent focus on relevant information.
## Success Criteria
- [ ] 40% reduction in injected context size
- [ ] No regression in agent task completion rate
- [ ] Measurable test confirms reduction target
## Slice Overview
| ID | Slice | Risk | Depends | Done | After this |
|----|-------|------|---------|------|------------|
| S01 | Scope existing DB queries | low | | | planSlice and researchSlice use milestone+slice filters for decisions/requirements. |
| S02 | KNOWLEDGE scoping + roadmap excerpt | medium | S01 | | KNOWLEDGE sections filtered by keywords. Roadmap injected as excerpt. |
| S03 | Measurement test suite | low | S02 | | Automated tests confirm 40% reduction vs baseline. |
| S04 | Documentation and rollout | low | S03 | | Updated docs. Feature flag for gradual rollout. |
## Key Risks
1. Keyword extraction may miss relevant sections mitigate with fallback to full content
2. Excerpt parsing fragile to roadmap format changes mitigate with graceful degradation
## Definition of Done
- [ ] All slices complete with passing verification
- [ ] Measurement tests in CI
- [ ] No increase in prompt build latency
`;
// ═══════════════════════════════════════════════════════════════════════════
// Measurement Tests
// ═══════════════════════════════════════════════════════════════════════════
describe("measurement: context reduction verification", () => {
test("synthetic KNOWLEDGE fixture is ~8KB as specified", () => {
const sizeKB = syntheticKnowledge.length / 1024;
assert.ok(
sizeKB >= 7 && sizeKB <= 10,
`KNOWLEDGE fixture should be ~8KB, got ${sizeKB.toFixed(2)}KB`
);
});
test("synthetic KNOWLEDGE has 9 H2 sections", () => {
const h2Count = (syntheticKnowledge.match(/^## /gm) || []).length;
assert.strictEqual(h2Count, 9, `KNOWLEDGE fixture should have 9 H2 sections, got ${h2Count}`);
});
test("queryKnowledge achieves ≥40% reduction with targeted keywords", async () => {
// Keywords targeting 2 sections: "Database Patterns" and "Testing Strategy"
const keywords = ['database', 'testing'];
const scopedResult = await queryKnowledge(syntheticKnowledge, keywords);
const fullSize = syntheticKnowledge.length;
const scopedSize = scopedResult.length;
const reductionPct = ((fullSize - scopedSize) / fullSize) * 100;
// Verify we got matching sections
assert.match(scopedResult, /## Database Patterns/, 'should include Database section');
assert.match(scopedResult, /## Testing Strategy/, 'should include Testing section');
// Verify we excluded other sections
assert.ok(!scopedResult.includes('## API Design'), 'should exclude API section');
assert.ok(!scopedResult.includes('## Observability'), 'should exclude Observability section');
assert.ok(!scopedResult.includes('## Deployment'), 'should exclude Deployment section');
// Verify ≥40% reduction (2/9 sections = ~78% reduction expected)
assert.ok(
reductionPct >= 40,
`queryKnowledge should achieve ≥40% reduction, got ${reductionPct.toFixed(1)}% (${scopedSize} chars vs ${fullSize} chars)`
);
console.log(` → queryKnowledge: ${reductionPct.toFixed(1)}% reduction (${scopedSize}${fullSize} chars)`);
});
test("queryKnowledge with single keyword achieves ≥40% reduction", async () => {
// Single keyword targeting 1 section
const keywords = ['security'];
const scopedResult = await queryKnowledge(syntheticKnowledge, keywords);
const fullSize = syntheticKnowledge.length;
const scopedSize = scopedResult.length;
const reductionPct = ((fullSize - scopedSize) / fullSize) * 100;
// Verify we got matching section
assert.match(scopedResult, /## Security Guidelines/, 'should include Security section');
// Verify ≥40% reduction (1/9 sections = ~89% reduction expected)
assert.ok(
reductionPct >= 40,
`single keyword should achieve ≥40% reduction, got ${reductionPct.toFixed(1)}%`
);
});
test("formatRoadmapExcerpt achieves ≥40% reduction", () => {
const sliceId = 'S02';
const excerptResult = formatRoadmapExcerpt(syntheticRoadmap, sliceId, '.gsd/milestones/M005/M005-ROADMAP.md');
const fullSize = syntheticRoadmap.length;
const excerptSize = excerptResult.length;
const reductionPct = ((fullSize - excerptSize) / fullSize) * 100;
// Verify excerpt contains required elements
assert.match(excerptResult, /\| ID \| Slice \|/, 'should have table header');
assert.match(excerptResult, /\| S01 \|/, 'should have predecessor S01');
assert.match(excerptResult, /\| S02 \|/, 'should have target S02');
assert.match(excerptResult, /See full roadmap:/, 'should have reference directive');
// Verify we excluded other slices
assert.ok(!excerptResult.includes('| S03 |'), 'should exclude S03');
assert.ok(!excerptResult.includes('| S04 |'), 'should exclude S04');
// Verify ≥40% reduction (2 rows + overhead vs full roadmap = significant reduction)
assert.ok(
reductionPct >= 40,
`formatRoadmapExcerpt should achieve ≥40% reduction, got ${reductionPct.toFixed(1)}% (${excerptSize} chars vs ${fullSize} chars)`
);
console.log(` → formatRoadmapExcerpt: ${reductionPct.toFixed(1)}% reduction (${excerptSize}${fullSize} chars)`);
});
test("combined KNOWLEDGE + roadmap reduction exceeds 40%", async () => {
// Simulate what happens in buildPlanSlicePrompt
const keywords = ['database', 'testing'];
const scopedKnowledge = await queryKnowledge(syntheticKnowledge, keywords);
const scopedRoadmap = formatRoadmapExcerpt(syntheticRoadmap, 'S02');
const fullKnowledgeSize = syntheticKnowledge.length;
const fullRoadmapSize = syntheticRoadmap.length;
const fullTotal = fullKnowledgeSize + fullRoadmapSize;
const scopedKnowledgeSize = scopedKnowledge.length;
const scopedRoadmapSize = scopedRoadmap.length;
const scopedTotal = scopedKnowledgeSize + scopedRoadmapSize;
const combinedReductionPct = ((fullTotal - scopedTotal) / fullTotal) * 100;
// Combined reduction should easily exceed 40%
assert.ok(
combinedReductionPct >= 40,
`combined reduction should be ≥40%, got ${combinedReductionPct.toFixed(1)}%`
);
console.log(` → Combined: ${combinedReductionPct.toFixed(1)}% reduction`);
console.log(` - KNOWLEDGE: ${fullKnowledgeSize}${scopedKnowledgeSize} chars`);
console.log(` - Roadmap: ${fullRoadmapSize}${scopedRoadmapSize} chars`);
console.log(` - Total: ${fullTotal}${scopedTotal} chars`);
});
});
describe("measurement: edge cases maintain reduction target", () => {
test("three keywords still achieves ≥40% reduction", async () => {
// Even with 3 matching sections (3/9 = 33%), we should hit target
const keywords = ['database', 'api', 'security'];
const scopedResult = await queryKnowledge(syntheticKnowledge, keywords);
const fullSize = syntheticKnowledge.length;
const scopedSize = scopedResult.length;
const reductionPct = ((fullSize - scopedSize) / fullSize) * 100;
// Verify matches (3 sections)
assert.match(scopedResult, /## Database Patterns/, 'should include Database');
assert.match(scopedResult, /## API Design/, 'should include API');
assert.match(scopedResult, /## Security Guidelines/, 'should include Security');
// With 3/9 sections, reduction should be ~67%
assert.ok(
reductionPct >= 40,
`3 keywords should still achieve ≥40% reduction, got ${reductionPct.toFixed(1)}%`
);
});
test("excerpt for S01 (no dependencies) achieves ≥40% reduction", () => {
const excerptResult = formatRoadmapExcerpt(syntheticRoadmap, 'S01');
const fullSize = syntheticRoadmap.length;
const excerptSize = excerptResult.length;
const reductionPct = ((fullSize - excerptSize) / fullSize) * 100;
// S01 has no predecessor, so just 1 row + header + reference
assert.match(excerptResult, /\| S01 \|/, 'should have S01');
assert.ok(!excerptResult.includes('| S02 |'), 'should not have S02');
// Single row should still achieve significant reduction
assert.ok(
reductionPct >= 40,
`S01 excerpt should achieve ≥40% reduction, got ${reductionPct.toFixed(1)}%`
);
});
});