feat(graph): parse LEARNINGS.md into knowledge graph and rebuild after extraction

2026-04-15 04:52:52 +02:00 · 2026-04-15 04:52:52 +02:00 · 73916a8c38
commit 73916a8c38
parent ee922cff59
3 changed files with 334 additions and 1 deletions
--- a/packages/mcp-server/src/readers/graph.test.ts
+++ b/packages/mcp-server/src/readers/graph.test.ts
@ -98,6 +98,49 @@ function makeProjectWithArtifacts(projectDir: string): void {
  ].join('\n'));
 }

+// ---------------------------------------------------------------------------
+// LEARNINGS.md fixture helpers
+// ---------------------------------------------------------------------------
+
+function writeLearningsFixture(projectDir: string, milestoneId: string, content: string): void {
+  writeFixture(projectDir, `.gsd/milestones/${milestoneId}/${milestoneId}-LEARNINGS.md`, content);
+}
+
+const SAMPLE_LEARNINGS = `---
+phase: "M001"
+phase_name: "User Auth"
+project: "my-project"
+generated: "2026-04-15T10:00:00Z"
+counts:
+  decisions: 2
+  lessons: 1
+  patterns: 1
+  surprises: 1
+missing_artifacts: []
+---
+
+# Learnings: User Auth
+
+## Decisions
+- Use JWT for stateless auth across services.
+  Source: M001-PLAN.md/Architecture
+
+- Store refresh tokens in HTTP-only cookies only.
+  Source: M001-PLAN.md/Security
+
+## Lessons
+- Integration tests need a real DB — mocks missed migration bugs.
+  Source: M001-SUMMARY.md/Testing
+
+## Patterns
+- Repository pattern abstracts DB access and simplifies testing.
+  Source: M001-PLAN.md/Design
+
+## Surprises
+- Token expiry edge case caused silent auth failures in prod.
+  Source: M001-SUMMARY.md/Issues
+`;
+
 // ---------------------------------------------------------------------------
 // buildGraph tests
 // ---------------------------------------------------------------------------
@ -162,6 +205,141 @@ describe('buildGraph', () => {
  });
 });

+// ---------------------------------------------------------------------------
+// buildGraph — LEARNINGS.md parsing tests
+// ---------------------------------------------------------------------------
+
+describe('buildGraph — LEARNINGS.md parsing', () => {
+  let projectDir: string;
+
+  beforeEach(() => {
+    projectDir = tmpProject();
+    // Create minimal milestone directory so parseMilestoneFiles finds it
+    mkdirSync(join(projectDir, '.gsd', 'milestones', 'M001'), { recursive: true });
+    writeLearningsFixture(projectDir, 'M001', SAMPLE_LEARNINGS);
+  });
+
+  afterEach(() => rmSync(projectDir, { recursive: true, force: true }));
+
+  it('extracts decision nodes from ## Decisions section', async () => {
+    const graph = await buildGraph(projectDir);
+    const decisions = graph.nodes.filter((n) => n.type === 'decision' || (n.type === 'rule' && n.id.startsWith('decision:')));
+    // Decisions should be extracted with a 'decision' type (or similar existing type)
+    const decisionNodes = graph.nodes.filter((n) => n.id.includes('decision:M001'));
+    assert.ok(decisionNodes.length >= 2, `Expected >= 2 decision nodes, got ${decisionNodes.length}`);
+  });
+
+  it('extracts lesson nodes from ## Lessons section', async () => {
+    const graph = await buildGraph(projectDir);
+    const lessonNodes = graph.nodes.filter((n) => n.id.includes('lesson:M001'));
+    assert.ok(lessonNodes.length >= 1, `Expected >= 1 lesson node, got ${lessonNodes.length}`);
+    assert.ok(lessonNodes.every((n) => n.type === 'lesson'), 'All lesson nodes must have type "lesson"');
+  });
+
+  it('extracts pattern nodes from ## Patterns section', async () => {
+    const graph = await buildGraph(projectDir);
+    const patternNodes = graph.nodes.filter((n) => n.id.includes('pattern:M001'));
+    assert.ok(patternNodes.length >= 1, `Expected >= 1 pattern node, got ${patternNodes.length}`);
+    assert.ok(patternNodes.every((n) => n.type === 'pattern'), 'All pattern nodes must have type "pattern"');
+  });
+
+  it('maps surprises to lesson nodes', async () => {
+    const graph = await buildGraph(projectDir);
+    // Surprises should be mapped to lesson type since no "surprise" NodeType exists
+    const surpriseNodes = graph.nodes.filter((n) => n.id.includes('surprise:M001'));
+    assert.ok(surpriseNodes.length >= 1, `Expected >= 1 surprise node, got ${surpriseNodes.length}`);
+    assert.ok(surpriseNodes.every((n) => n.type === 'lesson'), 'Surprises must be mapped to type "lesson"');
+  });
+
+  it('node labels contain the learning text', async () => {
+    const graph = await buildGraph(projectDir);
+    const hasJwtDecision = graph.nodes.some((n) =>
+      n.label.toLowerCase().includes('jwt') || n.description?.toLowerCase().includes('jwt'),
+    );
+    assert.ok(hasJwtDecision, 'Expected a node describing the JWT decision');
+  });
+
+  it('node description includes source attribution', async () => {
+    const graph = await buildGraph(projectDir);
+    const learningNodes = graph.nodes.filter((n) =>
+      n.id.includes(':M001:') || n.id.match(/:(decision|lesson|pattern|surprise):M001/),
+    );
+    const withSource = learningNodes.filter((n) => n.description?.includes('Source:') || n.description?.includes('M001-PLAN'));
+    assert.ok(withSource.length > 0, 'Expected at least one node with source attribution in description');
+  });
+
+  it('adds relates_to edge from learning node to milestone node', async () => {
+    const graph = await buildGraph(projectDir);
+    const edgesToMilestone = graph.edges.filter(
+      (e) => e.to === 'milestone:M001' || e.from === 'milestone:M001',
+    );
+    // At least one learning node should relate to the milestone
+    const learningEdges = graph.edges.filter(
+      (e) => (e.from.includes('M001') && (e.type === 'relates_to' || e.type === 'contains')) ||
+              (e.to.includes('M001') && e.type === 'relates_to'),
+    );
+    assert.ok(learningEdges.length > 0 || edgesToMilestone.length > 0,
+      'Expected edges connecting learning nodes to milestone');
+  });
+
+  it('skips LEARNINGS.md gracefully when file is malformed', async () => {
+    const badProject = tmpProject();
+    mkdirSync(join(badProject, '.gsd', 'milestones', 'M002'), { recursive: true });
+    writeLearningsFixture(badProject, 'M002', '\0\0\0 not valid yaml or markdown \0\0\0');
+    // Must not throw
+    const graph = await buildGraph(badProject);
+    assert.ok(graph.nodes.length >= 0);
+    assert.equal(typeof graph.builtAt, 'string');
+    rmSync(badProject, { recursive: true, force: true });
+  });
+
+  it('produces no learning nodes when all sections are empty', async () => {
+    const emptyProject = tmpProject();
+    mkdirSync(join(emptyProject, '.gsd', 'milestones', 'M003'), { recursive: true });
+    writeLearningsFixture(emptyProject, 'M003', `---
+phase: "M003"
+phase_name: "Empty"
+project: "test"
+generated: "2026-04-15T10:00:00Z"
+counts:
+  decisions: 0
+  lessons: 0
+  patterns: 0
+  surprises: 0
+missing_artifacts: []
+---
+
+# Learnings: Empty
+
+## Decisions
+
+## Lessons
+
+## Patterns
+
+## Surprises
+`);
+    const graph = await buildGraph(emptyProject);
+    const learningNodes = graph.nodes.filter((n) =>
+      n.id.includes('decision:M003') ||
+      n.id.includes('lesson:M003') ||
+      n.id.includes('pattern:M003') ||
+      n.id.includes('surprise:M003'),
+    );
+    assert.equal(learningNodes.length, 0, 'Empty sections should produce no nodes');
+    rmSync(emptyProject, { recursive: true, force: true });
+  });
+
+  it('does not crash when LEARNINGS.md is missing entirely', async () => {
+    const noLearningsProject = tmpProject();
+    mkdirSync(join(noLearningsProject, '.gsd', 'milestones', 'M004'), { recursive: true });
+    // No LEARNINGS.md file written
+    const graph = await buildGraph(noLearningsProject);
+    assert.ok(graph.nodes.length >= 0);
+    rmSync(noLearningsProject, { recursive: true, force: true });
+  });
+});
+
 // ---------------------------------------------------------------------------
 // writeGraph tests
 // ---------------------------------------------------------------------------
--- a/packages/mcp-server/src/readers/graph.ts
+++ b/packages/mcp-server/src/readers/graph.ts
@ -27,7 +27,8 @@ export type NodeType =
  | 'rule'
  | 'pattern'
  | 'lesson'
-  | 'concept';
+  | 'concept'
+  | 'decision';

 export type EdgeType =
  | 'contains'
@ -386,6 +387,151 @@ function parseTasksFromPlan(
  }
 }

+// ---------------------------------------------------------------------------
+// LEARNINGS.md parser
+// ---------------------------------------------------------------------------
+
+/**
+ * Parse all *-LEARNINGS.md files found in milestone directories.
+ * Extracts Decisions, Lessons, Patterns, and Surprises as typed graph nodes.
+ * Surprises are mapped to the 'lesson' NodeType (no distinct type exists).
+ * Parse errors per file are caught — the file is skipped, never rethrows.
+ */
+function parseLearningsFiles(gsdRoot: string, nodes: GraphNode[], edges: GraphEdge[]): void {
+  const milestoneIds = findMilestoneIds(gsdRoot);
+
+  for (const milestoneId of milestoneIds) {
+    try {
+      parseSingleLearningsFile(gsdRoot, milestoneId, nodes, edges);
+    } catch {
+      // Skip this milestone's LEARNINGS.md on any error
+    }
+  }
+}
+
+function parseSingleLearningsFile(
+  gsdRoot: string,
+  milestoneId: string,
+  nodes: GraphNode[],
+  edges: GraphEdge[],
+): void {
+  const mDir = resolveMilestoneDir(gsdRoot, milestoneId);
+  if (!mDir) return;
+
+  const learningsPath = join(mDir, `${milestoneId}-LEARNINGS.md`);
+  if (!existsSync(learningsPath)) return;
+
+  let content: string;
+  try {
+    content = readFileSync(learningsPath, 'utf-8');
+  } catch {
+    return;
+  }
+
+  // Strip YAML frontmatter if present
+  const withoutFrontmatter = content.replace(/^---[\s\S]*?---\n?/, '');
+
+  const milestoneNodeId = `milestone:${milestoneId}`;
+  const sourceFile = `milestones/${milestoneId}/${milestoneId}-LEARNINGS.md`;
+
+  // Parse each section: [sectionName, nodeType, idPrefix]
+  const sections: Array<[string, NodeType, string]> = [
+    ['Decisions', 'decision', 'decision'],
+    ['Lessons', 'lesson', 'lesson'],
+    ['Patterns', 'pattern', 'pattern'],
+    ['Surprises', 'lesson', 'surprise'],
+  ];
+
+  for (const [sectionName, nodeType, idPrefix] of sections) {
+    const sectionMatch = withoutFrontmatter.match(
+      new RegExp(`##\\s+${sectionName}\\s*\\n([\\s\\S]*?)(?=\\n##\\s|$)`, 'i'),
+    );
+    if (!sectionMatch) continue;
+
+    const sectionContent = sectionMatch[1];
+    parseLearningsSection(
+      sectionContent,
+      milestoneId,
+      idPrefix,
+      nodeType,
+      milestoneNodeId,
+      sourceFile,
+      nodes,
+      edges,
+    );
+  }
+}
+
+function parseLearningsSection(
+  sectionContent: string,
+  milestoneId: string,
+  idPrefix: string,
+  nodeType: NodeType,
+  milestoneNodeId: string,
+  sourceFile: string,
+  nodes: GraphNode[],
+  edges: GraphEdge[],
+): void {
+  // Each item is a bullet line starting with "- " followed by optional
+  // indented "Source: ..." line.
+  // We collect bullet items and their associated source attribution.
+  const lines = sectionContent.split('\n');
+  let itemIndex = 0;
+  let currentText: string | null = null;
+  let currentSource: string | null = null;
+
+  const flushItem = (): void => {
+    if (!currentText) return;
+    itemIndex += 1;
+    const nodeId = `${idPrefix}:${milestoneId}:${itemIndex}`;
+    const description = currentSource ? `${currentSource}` : undefined;
+
+    nodes.push({
+      id: nodeId,
+      label: currentText,
+      type: nodeType,
+      description,
+      confidence: 'EXTRACTED',
+      sourceFile,
+    });
+
+    // Edge: milestone relates_to this learning node
+    edges.push({
+      from: milestoneNodeId,
+      to: nodeId,
+      type: 'relates_to',
+      confidence: 'EXTRACTED',
+    });
+
+    currentText = null;
+    currentSource = null;
+  };
+
+  for (const line of lines) {
+    const bulletMatch = line.match(/^[-*]\s+(.+)/);
+    if (bulletMatch) {
+      flushItem();
+      currentText = bulletMatch[1].trim();
+      continue;
+    }
+
+    // Indented source attribution: "  Source: ..."
+    const sourceMatch = line.match(/^\s+Source:\s+(.+)/i);
+    if (sourceMatch && currentText !== null) {
+      currentSource = `Source: ${sourceMatch[1].trim()}`;
+      continue;
+    }
+
+    // Continuation of current item text (indented non-source line)
+    const continuationMatch = line.match(/^\s{2,}(.+)/);
+    if (continuationMatch && currentText !== null && currentSource === null) {
+      currentText += ' ' + continuationMatch[1].trim();
+    }
+  }
+
+  flushItem();
+}
+
 // ---------------------------------------------------------------------------
 // buildGraph
 // ---------------------------------------------------------------------------
@ -407,6 +553,7 @@ export async function buildGraph(projectDir: string): Promise<KnowledgeGraph> {
    parseStateFile,
    parseKnowledgeFile,
    parseMilestoneFiles,
+    parseLearningsFiles,
  ];

  for (const parser of parsers) {
--- a/src/resources/extensions/gsd/commands-extract-learnings.ts
+++ b/src/resources/extensions/gsd/commands-extract-learnings.ts
@ -174,6 +174,14 @@ If the \`capture_thought\` tool is available, call it once for each extracted it
 - source: {artifact filename}

 If \`capture_thought\` is not available, skip this step silently — do not report an error.
+
+---
+
+## Rebuild Knowledge Graph
+
+After writing LEARNINGS.md, call the \`gsd_graph\` tool with \`{ "mode": "build" }\` to rebuild the knowledge graph so the new learnings are immediately queryable by future milestone prompts.
+
+If the \`gsd_graph\` tool is not available, skip this step silently.
 `;
 }