From 73916a8c38ed87c09ab22f92782fc71a53ecf9df Mon Sep 17 00:00:00 2001 From: Nils Reeh Date: Wed, 15 Apr 2026 04:52:52 +0200 Subject: [PATCH] feat(graph): parse LEARNINGS.md into knowledge graph and rebuild after extraction --- packages/mcp-server/src/readers/graph.test.ts | 178 ++++++++++++++++++ packages/mcp-server/src/readers/graph.ts | 149 ++++++++++++++- .../gsd/commands-extract-learnings.ts | 8 + 3 files changed, 334 insertions(+), 1 deletion(-) diff --git a/packages/mcp-server/src/readers/graph.test.ts b/packages/mcp-server/src/readers/graph.test.ts index bc329c570..456dd4be4 100644 --- a/packages/mcp-server/src/readers/graph.test.ts +++ b/packages/mcp-server/src/readers/graph.test.ts @@ -98,6 +98,49 @@ function makeProjectWithArtifacts(projectDir: string): void { ].join('\n')); } +// --------------------------------------------------------------------------- +// LEARNINGS.md fixture helpers +// --------------------------------------------------------------------------- + +function writeLearningsFixture(projectDir: string, milestoneId: string, content: string): void { + writeFixture(projectDir, `.gsd/milestones/${milestoneId}/${milestoneId}-LEARNINGS.md`, content); +} + +const SAMPLE_LEARNINGS = `--- +phase: "M001" +phase_name: "User Auth" +project: "my-project" +generated: "2026-04-15T10:00:00Z" +counts: + decisions: 2 + lessons: 1 + patterns: 1 + surprises: 1 +missing_artifacts: [] +--- + +# Learnings: User Auth + +## Decisions +- Use JWT for stateless auth across services. + Source: M001-PLAN.md/Architecture + +- Store refresh tokens in HTTP-only cookies only. + Source: M001-PLAN.md/Security + +## Lessons +- Integration tests need a real DB — mocks missed migration bugs. + Source: M001-SUMMARY.md/Testing + +## Patterns +- Repository pattern abstracts DB access and simplifies testing. + Source: M001-PLAN.md/Design + +## Surprises +- Token expiry edge case caused silent auth failures in prod. + Source: M001-SUMMARY.md/Issues +`; + // --------------------------------------------------------------------------- // buildGraph tests // --------------------------------------------------------------------------- @@ -162,6 +205,141 @@ describe('buildGraph', () => { }); }); +// --------------------------------------------------------------------------- +// buildGraph — LEARNINGS.md parsing tests +// --------------------------------------------------------------------------- + +describe('buildGraph — LEARNINGS.md parsing', () => { + let projectDir: string; + + beforeEach(() => { + projectDir = tmpProject(); + // Create minimal milestone directory so parseMilestoneFiles finds it + mkdirSync(join(projectDir, '.gsd', 'milestones', 'M001'), { recursive: true }); + writeLearningsFixture(projectDir, 'M001', SAMPLE_LEARNINGS); + }); + + afterEach(() => rmSync(projectDir, { recursive: true, force: true })); + + it('extracts decision nodes from ## Decisions section', async () => { + const graph = await buildGraph(projectDir); + const decisions = graph.nodes.filter((n) => n.type === 'decision' || (n.type === 'rule' && n.id.startsWith('decision:'))); + // Decisions should be extracted with a 'decision' type (or similar existing type) + const decisionNodes = graph.nodes.filter((n) => n.id.includes('decision:M001')); + assert.ok(decisionNodes.length >= 2, `Expected >= 2 decision nodes, got ${decisionNodes.length}`); + }); + + it('extracts lesson nodes from ## Lessons section', async () => { + const graph = await buildGraph(projectDir); + const lessonNodes = graph.nodes.filter((n) => n.id.includes('lesson:M001')); + assert.ok(lessonNodes.length >= 1, `Expected >= 1 lesson node, got ${lessonNodes.length}`); + assert.ok(lessonNodes.every((n) => n.type === 'lesson'), 'All lesson nodes must have type "lesson"'); + }); + + it('extracts pattern nodes from ## Patterns section', async () => { + const graph = await buildGraph(projectDir); + const patternNodes = graph.nodes.filter((n) => n.id.includes('pattern:M001')); + assert.ok(patternNodes.length >= 1, `Expected >= 1 pattern node, got ${patternNodes.length}`); + assert.ok(patternNodes.every((n) => n.type === 'pattern'), 'All pattern nodes must have type "pattern"'); + }); + + it('maps surprises to lesson nodes', async () => { + const graph = await buildGraph(projectDir); + // Surprises should be mapped to lesson type since no "surprise" NodeType exists + const surpriseNodes = graph.nodes.filter((n) => n.id.includes('surprise:M001')); + assert.ok(surpriseNodes.length >= 1, `Expected >= 1 surprise node, got ${surpriseNodes.length}`); + assert.ok(surpriseNodes.every((n) => n.type === 'lesson'), 'Surprises must be mapped to type "lesson"'); + }); + + it('node labels contain the learning text', async () => { + const graph = await buildGraph(projectDir); + const hasJwtDecision = graph.nodes.some((n) => + n.label.toLowerCase().includes('jwt') || n.description?.toLowerCase().includes('jwt'), + ); + assert.ok(hasJwtDecision, 'Expected a node describing the JWT decision'); + }); + + it('node description includes source attribution', async () => { + const graph = await buildGraph(projectDir); + const learningNodes = graph.nodes.filter((n) => + n.id.includes(':M001:') || n.id.match(/:(decision|lesson|pattern|surprise):M001/), + ); + const withSource = learningNodes.filter((n) => n.description?.includes('Source:') || n.description?.includes('M001-PLAN')); + assert.ok(withSource.length > 0, 'Expected at least one node with source attribution in description'); + }); + + it('adds relates_to edge from learning node to milestone node', async () => { + const graph = await buildGraph(projectDir); + const edgesToMilestone = graph.edges.filter( + (e) => e.to === 'milestone:M001' || e.from === 'milestone:M001', + ); + // At least one learning node should relate to the milestone + const learningEdges = graph.edges.filter( + (e) => (e.from.includes('M001') && (e.type === 'relates_to' || e.type === 'contains')) || + (e.to.includes('M001') && e.type === 'relates_to'), + ); + assert.ok(learningEdges.length > 0 || edgesToMilestone.length > 0, + 'Expected edges connecting learning nodes to milestone'); + }); + + it('skips LEARNINGS.md gracefully when file is malformed', async () => { + const badProject = tmpProject(); + mkdirSync(join(badProject, '.gsd', 'milestones', 'M002'), { recursive: true }); + writeLearningsFixture(badProject, 'M002', '\0\0\0 not valid yaml or markdown \0\0\0'); + // Must not throw + const graph = await buildGraph(badProject); + assert.ok(graph.nodes.length >= 0); + assert.equal(typeof graph.builtAt, 'string'); + rmSync(badProject, { recursive: true, force: true }); + }); + + it('produces no learning nodes when all sections are empty', async () => { + const emptyProject = tmpProject(); + mkdirSync(join(emptyProject, '.gsd', 'milestones', 'M003'), { recursive: true }); + writeLearningsFixture(emptyProject, 'M003', `--- +phase: "M003" +phase_name: "Empty" +project: "test" +generated: "2026-04-15T10:00:00Z" +counts: + decisions: 0 + lessons: 0 + patterns: 0 + surprises: 0 +missing_artifacts: [] +--- + +# Learnings: Empty + +## Decisions + +## Lessons + +## Patterns + +## Surprises +`); + const graph = await buildGraph(emptyProject); + const learningNodes = graph.nodes.filter((n) => + n.id.includes('decision:M003') || + n.id.includes('lesson:M003') || + n.id.includes('pattern:M003') || + n.id.includes('surprise:M003'), + ); + assert.equal(learningNodes.length, 0, 'Empty sections should produce no nodes'); + rmSync(emptyProject, { recursive: true, force: true }); + }); + + it('does not crash when LEARNINGS.md is missing entirely', async () => { + const noLearningsProject = tmpProject(); + mkdirSync(join(noLearningsProject, '.gsd', 'milestones', 'M004'), { recursive: true }); + // No LEARNINGS.md file written + const graph = await buildGraph(noLearningsProject); + assert.ok(graph.nodes.length >= 0); + rmSync(noLearningsProject, { recursive: true, force: true }); + }); +}); + // --------------------------------------------------------------------------- // writeGraph tests // --------------------------------------------------------------------------- diff --git a/packages/mcp-server/src/readers/graph.ts b/packages/mcp-server/src/readers/graph.ts index 8c6c9d4c0..0867a5935 100644 --- a/packages/mcp-server/src/readers/graph.ts +++ b/packages/mcp-server/src/readers/graph.ts @@ -27,7 +27,8 @@ export type NodeType = | 'rule' | 'pattern' | 'lesson' - | 'concept'; + | 'concept' + | 'decision'; export type EdgeType = | 'contains' @@ -386,6 +387,151 @@ function parseTasksFromPlan( } } +// --------------------------------------------------------------------------- +// LEARNINGS.md parser +// --------------------------------------------------------------------------- + +/** + * Parse all *-LEARNINGS.md files found in milestone directories. + * Extracts Decisions, Lessons, Patterns, and Surprises as typed graph nodes. + * Surprises are mapped to the 'lesson' NodeType (no distinct type exists). + * Parse errors per file are caught — the file is skipped, never rethrows. + */ +function parseLearningsFiles(gsdRoot: string, nodes: GraphNode[], edges: GraphEdge[]): void { + const milestoneIds = findMilestoneIds(gsdRoot); + + for (const milestoneId of milestoneIds) { + try { + parseSingleLearningsFile(gsdRoot, milestoneId, nodes, edges); + } catch { + // Skip this milestone's LEARNINGS.md on any error + } + } +} + +function parseSingleLearningsFile( + gsdRoot: string, + milestoneId: string, + nodes: GraphNode[], + edges: GraphEdge[], +): void { + const mDir = resolveMilestoneDir(gsdRoot, milestoneId); + if (!mDir) return; + + const learningsPath = join(mDir, `${milestoneId}-LEARNINGS.md`); + if (!existsSync(learningsPath)) return; + + let content: string; + try { + content = readFileSync(learningsPath, 'utf-8'); + } catch { + return; + } + + // Strip YAML frontmatter if present + const withoutFrontmatter = content.replace(/^---[\s\S]*?---\n?/, ''); + + const milestoneNodeId = `milestone:${milestoneId}`; + const sourceFile = `milestones/${milestoneId}/${milestoneId}-LEARNINGS.md`; + + // Parse each section: [sectionName, nodeType, idPrefix] + const sections: Array<[string, NodeType, string]> = [ + ['Decisions', 'decision', 'decision'], + ['Lessons', 'lesson', 'lesson'], + ['Patterns', 'pattern', 'pattern'], + ['Surprises', 'lesson', 'surprise'], + ]; + + for (const [sectionName, nodeType, idPrefix] of sections) { + const sectionMatch = withoutFrontmatter.match( + new RegExp(`##\\s+${sectionName}\\s*\\n([\\s\\S]*?)(?=\\n##\\s|$)`, 'i'), + ); + if (!sectionMatch) continue; + + const sectionContent = sectionMatch[1]; + parseLearningsSection( + sectionContent, + milestoneId, + idPrefix, + nodeType, + milestoneNodeId, + sourceFile, + nodes, + edges, + ); + } +} + +function parseLearningsSection( + sectionContent: string, + milestoneId: string, + idPrefix: string, + nodeType: NodeType, + milestoneNodeId: string, + sourceFile: string, + nodes: GraphNode[], + edges: GraphEdge[], +): void { + // Each item is a bullet line starting with "- " followed by optional + // indented "Source: ..." line. + // We collect bullet items and their associated source attribution. + const lines = sectionContent.split('\n'); + let itemIndex = 0; + let currentText: string | null = null; + let currentSource: string | null = null; + + const flushItem = (): void => { + if (!currentText) return; + itemIndex += 1; + const nodeId = `${idPrefix}:${milestoneId}:${itemIndex}`; + const description = currentSource ? `${currentSource}` : undefined; + + nodes.push({ + id: nodeId, + label: currentText, + type: nodeType, + description, + confidence: 'EXTRACTED', + sourceFile, + }); + + // Edge: milestone relates_to this learning node + edges.push({ + from: milestoneNodeId, + to: nodeId, + type: 'relates_to', + confidence: 'EXTRACTED', + }); + + currentText = null; + currentSource = null; + }; + + for (const line of lines) { + const bulletMatch = line.match(/^[-*]\s+(.+)/); + if (bulletMatch) { + flushItem(); + currentText = bulletMatch[1].trim(); + continue; + } + + // Indented source attribution: " Source: ..." + const sourceMatch = line.match(/^\s+Source:\s+(.+)/i); + if (sourceMatch && currentText !== null) { + currentSource = `Source: ${sourceMatch[1].trim()}`; + continue; + } + + // Continuation of current item text (indented non-source line) + const continuationMatch = line.match(/^\s{2,}(.+)/); + if (continuationMatch && currentText !== null && currentSource === null) { + currentText += ' ' + continuationMatch[1].trim(); + } + } + + flushItem(); +} + // --------------------------------------------------------------------------- // buildGraph // --------------------------------------------------------------------------- @@ -407,6 +553,7 @@ export async function buildGraph(projectDir: string): Promise { parseStateFile, parseKnowledgeFile, parseMilestoneFiles, + parseLearningsFiles, ]; for (const parser of parsers) { diff --git a/src/resources/extensions/gsd/commands-extract-learnings.ts b/src/resources/extensions/gsd/commands-extract-learnings.ts index 634cb3936..de23d5422 100644 --- a/src/resources/extensions/gsd/commands-extract-learnings.ts +++ b/src/resources/extensions/gsd/commands-extract-learnings.ts @@ -174,6 +174,14 @@ If the \`capture_thought\` tool is available, call it once for each extracted it - source: {artifact filename} If \`capture_thought\` is not available, skip this step silently — do not report an error. + +--- + +## Rebuild Knowledge Graph + +After writing LEARNINGS.md, call the \`gsd_graph\` tool with \`{ "mode": "build" }\` to rebuild the knowledge graph so the new learnings are immediately queryable by future milestone prompts. + +If the \`gsd_graph\` tool is not available, skip this step silently. `; }