feat(graph): parse LEARNINGS.md into knowledge graph and rebuild after extraction

This commit is contained in:
Nils Reeh 2026-04-15 04:52:52 +02:00
parent ee922cff59
commit 73916a8c38
3 changed files with 334 additions and 1 deletions

View file

@ -98,6 +98,49 @@ function makeProjectWithArtifacts(projectDir: string): void {
].join('\n'));
}
// ---------------------------------------------------------------------------
// LEARNINGS.md fixture helpers
// ---------------------------------------------------------------------------
function writeLearningsFixture(projectDir: string, milestoneId: string, content: string): void {
writeFixture(projectDir, `.gsd/milestones/${milestoneId}/${milestoneId}-LEARNINGS.md`, content);
}
const SAMPLE_LEARNINGS = `---
phase: "M001"
phase_name: "User Auth"
project: "my-project"
generated: "2026-04-15T10:00:00Z"
counts:
decisions: 2
lessons: 1
patterns: 1
surprises: 1
missing_artifacts: []
---
# Learnings: User Auth
## Decisions
- Use JWT for stateless auth across services.
Source: M001-PLAN.md/Architecture
- Store refresh tokens in HTTP-only cookies only.
Source: M001-PLAN.md/Security
## Lessons
- Integration tests need a real DB mocks missed migration bugs.
Source: M001-SUMMARY.md/Testing
## Patterns
- Repository pattern abstracts DB access and simplifies testing.
Source: M001-PLAN.md/Design
## Surprises
- Token expiry edge case caused silent auth failures in prod.
Source: M001-SUMMARY.md/Issues
`;
// ---------------------------------------------------------------------------
// buildGraph tests
// ---------------------------------------------------------------------------
@ -162,6 +205,141 @@ describe('buildGraph', () => {
});
});
// ---------------------------------------------------------------------------
// buildGraph — LEARNINGS.md parsing tests
// ---------------------------------------------------------------------------
describe('buildGraph — LEARNINGS.md parsing', () => {
let projectDir: string;
beforeEach(() => {
projectDir = tmpProject();
// Create minimal milestone directory so parseMilestoneFiles finds it
mkdirSync(join(projectDir, '.gsd', 'milestones', 'M001'), { recursive: true });
writeLearningsFixture(projectDir, 'M001', SAMPLE_LEARNINGS);
});
afterEach(() => rmSync(projectDir, { recursive: true, force: true }));
it('extracts decision nodes from ## Decisions section', async () => {
const graph = await buildGraph(projectDir);
const decisions = graph.nodes.filter((n) => n.type === 'decision' || (n.type === 'rule' && n.id.startsWith('decision:')));
// Decisions should be extracted with a 'decision' type (or similar existing type)
const decisionNodes = graph.nodes.filter((n) => n.id.includes('decision:M001'));
assert.ok(decisionNodes.length >= 2, `Expected >= 2 decision nodes, got ${decisionNodes.length}`);
});
it('extracts lesson nodes from ## Lessons section', async () => {
const graph = await buildGraph(projectDir);
const lessonNodes = graph.nodes.filter((n) => n.id.includes('lesson:M001'));
assert.ok(lessonNodes.length >= 1, `Expected >= 1 lesson node, got ${lessonNodes.length}`);
assert.ok(lessonNodes.every((n) => n.type === 'lesson'), 'All lesson nodes must have type "lesson"');
});
it('extracts pattern nodes from ## Patterns section', async () => {
const graph = await buildGraph(projectDir);
const patternNodes = graph.nodes.filter((n) => n.id.includes('pattern:M001'));
assert.ok(patternNodes.length >= 1, `Expected >= 1 pattern node, got ${patternNodes.length}`);
assert.ok(patternNodes.every((n) => n.type === 'pattern'), 'All pattern nodes must have type "pattern"');
});
it('maps surprises to lesson nodes', async () => {
const graph = await buildGraph(projectDir);
// Surprises should be mapped to lesson type since no "surprise" NodeType exists
const surpriseNodes = graph.nodes.filter((n) => n.id.includes('surprise:M001'));
assert.ok(surpriseNodes.length >= 1, `Expected >= 1 surprise node, got ${surpriseNodes.length}`);
assert.ok(surpriseNodes.every((n) => n.type === 'lesson'), 'Surprises must be mapped to type "lesson"');
});
it('node labels contain the learning text', async () => {
const graph = await buildGraph(projectDir);
const hasJwtDecision = graph.nodes.some((n) =>
n.label.toLowerCase().includes('jwt') || n.description?.toLowerCase().includes('jwt'),
);
assert.ok(hasJwtDecision, 'Expected a node describing the JWT decision');
});
it('node description includes source attribution', async () => {
const graph = await buildGraph(projectDir);
const learningNodes = graph.nodes.filter((n) =>
n.id.includes(':M001:') || n.id.match(/:(decision|lesson|pattern|surprise):M001/),
);
const withSource = learningNodes.filter((n) => n.description?.includes('Source:') || n.description?.includes('M001-PLAN'));
assert.ok(withSource.length > 0, 'Expected at least one node with source attribution in description');
});
it('adds relates_to edge from learning node to milestone node', async () => {
const graph = await buildGraph(projectDir);
const edgesToMilestone = graph.edges.filter(
(e) => e.to === 'milestone:M001' || e.from === 'milestone:M001',
);
// At least one learning node should relate to the milestone
const learningEdges = graph.edges.filter(
(e) => (e.from.includes('M001') && (e.type === 'relates_to' || e.type === 'contains')) ||
(e.to.includes('M001') && e.type === 'relates_to'),
);
assert.ok(learningEdges.length > 0 || edgesToMilestone.length > 0,
'Expected edges connecting learning nodes to milestone');
});
it('skips LEARNINGS.md gracefully when file is malformed', async () => {
const badProject = tmpProject();
mkdirSync(join(badProject, '.gsd', 'milestones', 'M002'), { recursive: true });
writeLearningsFixture(badProject, 'M002', '\0\0\0 not valid yaml or markdown \0\0\0');
// Must not throw
const graph = await buildGraph(badProject);
assert.ok(graph.nodes.length >= 0);
assert.equal(typeof graph.builtAt, 'string');
rmSync(badProject, { recursive: true, force: true });
});
it('produces no learning nodes when all sections are empty', async () => {
const emptyProject = tmpProject();
mkdirSync(join(emptyProject, '.gsd', 'milestones', 'M003'), { recursive: true });
writeLearningsFixture(emptyProject, 'M003', `---
phase: "M003"
phase_name: "Empty"
project: "test"
generated: "2026-04-15T10:00:00Z"
counts:
decisions: 0
lessons: 0
patterns: 0
surprises: 0
missing_artifacts: []
---
# Learnings: Empty
## Decisions
## Lessons
## Patterns
## Surprises
`);
const graph = await buildGraph(emptyProject);
const learningNodes = graph.nodes.filter((n) =>
n.id.includes('decision:M003') ||
n.id.includes('lesson:M003') ||
n.id.includes('pattern:M003') ||
n.id.includes('surprise:M003'),
);
assert.equal(learningNodes.length, 0, 'Empty sections should produce no nodes');
rmSync(emptyProject, { recursive: true, force: true });
});
it('does not crash when LEARNINGS.md is missing entirely', async () => {
const noLearningsProject = tmpProject();
mkdirSync(join(noLearningsProject, '.gsd', 'milestones', 'M004'), { recursive: true });
// No LEARNINGS.md file written
const graph = await buildGraph(noLearningsProject);
assert.ok(graph.nodes.length >= 0);
rmSync(noLearningsProject, { recursive: true, force: true });
});
});
// ---------------------------------------------------------------------------
// writeGraph tests
// ---------------------------------------------------------------------------

View file

@ -27,7 +27,8 @@ export type NodeType =
| 'rule'
| 'pattern'
| 'lesson'
| 'concept';
| 'concept'
| 'decision';
export type EdgeType =
| 'contains'
@ -386,6 +387,151 @@ function parseTasksFromPlan(
}
}
// ---------------------------------------------------------------------------
// LEARNINGS.md parser
// ---------------------------------------------------------------------------
/**
* Parse all *-LEARNINGS.md files found in milestone directories.
* Extracts Decisions, Lessons, Patterns, and Surprises as typed graph nodes.
* Surprises are mapped to the 'lesson' NodeType (no distinct type exists).
* Parse errors per file are caught the file is skipped, never rethrows.
*/
function parseLearningsFiles(gsdRoot: string, nodes: GraphNode[], edges: GraphEdge[]): void {
const milestoneIds = findMilestoneIds(gsdRoot);
for (const milestoneId of milestoneIds) {
try {
parseSingleLearningsFile(gsdRoot, milestoneId, nodes, edges);
} catch {
// Skip this milestone's LEARNINGS.md on any error
}
}
}
function parseSingleLearningsFile(
gsdRoot: string,
milestoneId: string,
nodes: GraphNode[],
edges: GraphEdge[],
): void {
const mDir = resolveMilestoneDir(gsdRoot, milestoneId);
if (!mDir) return;
const learningsPath = join(mDir, `${milestoneId}-LEARNINGS.md`);
if (!existsSync(learningsPath)) return;
let content: string;
try {
content = readFileSync(learningsPath, 'utf-8');
} catch {
return;
}
// Strip YAML frontmatter if present
const withoutFrontmatter = content.replace(/^---[\s\S]*?---\n?/, '');
const milestoneNodeId = `milestone:${milestoneId}`;
const sourceFile = `milestones/${milestoneId}/${milestoneId}-LEARNINGS.md`;
// Parse each section: [sectionName, nodeType, idPrefix]
const sections: Array<[string, NodeType, string]> = [
['Decisions', 'decision', 'decision'],
['Lessons', 'lesson', 'lesson'],
['Patterns', 'pattern', 'pattern'],
['Surprises', 'lesson', 'surprise'],
];
for (const [sectionName, nodeType, idPrefix] of sections) {
const sectionMatch = withoutFrontmatter.match(
new RegExp(`##\\s+${sectionName}\\s*\\n([\\s\\S]*?)(?=\\n##\\s|$)`, 'i'),
);
if (!sectionMatch) continue;
const sectionContent = sectionMatch[1];
parseLearningsSection(
sectionContent,
milestoneId,
idPrefix,
nodeType,
milestoneNodeId,
sourceFile,
nodes,
edges,
);
}
}
function parseLearningsSection(
sectionContent: string,
milestoneId: string,
idPrefix: string,
nodeType: NodeType,
milestoneNodeId: string,
sourceFile: string,
nodes: GraphNode[],
edges: GraphEdge[],
): void {
// Each item is a bullet line starting with "- " followed by optional
// indented "Source: ..." line.
// We collect bullet items and their associated source attribution.
const lines = sectionContent.split('\n');
let itemIndex = 0;
let currentText: string | null = null;
let currentSource: string | null = null;
const flushItem = (): void => {
if (!currentText) return;
itemIndex += 1;
const nodeId = `${idPrefix}:${milestoneId}:${itemIndex}`;
const description = currentSource ? `${currentSource}` : undefined;
nodes.push({
id: nodeId,
label: currentText,
type: nodeType,
description,
confidence: 'EXTRACTED',
sourceFile,
});
// Edge: milestone relates_to this learning node
edges.push({
from: milestoneNodeId,
to: nodeId,
type: 'relates_to',
confidence: 'EXTRACTED',
});
currentText = null;
currentSource = null;
};
for (const line of lines) {
const bulletMatch = line.match(/^[-*]\s+(.+)/);
if (bulletMatch) {
flushItem();
currentText = bulletMatch[1].trim();
continue;
}
// Indented source attribution: " Source: ..."
const sourceMatch = line.match(/^\s+Source:\s+(.+)/i);
if (sourceMatch && currentText !== null) {
currentSource = `Source: ${sourceMatch[1].trim()}`;
continue;
}
// Continuation of current item text (indented non-source line)
const continuationMatch = line.match(/^\s{2,}(.+)/);
if (continuationMatch && currentText !== null && currentSource === null) {
currentText += ' ' + continuationMatch[1].trim();
}
}
flushItem();
}
// ---------------------------------------------------------------------------
// buildGraph
// ---------------------------------------------------------------------------
@ -407,6 +553,7 @@ export async function buildGraph(projectDir: string): Promise<KnowledgeGraph> {
parseStateFile,
parseKnowledgeFile,
parseMilestoneFiles,
parseLearningsFiles,
];
for (const parser of parsers) {

View file

@ -174,6 +174,14 @@ If the \`capture_thought\` tool is available, call it once for each extracted it
- source: {artifact filename}
If \`capture_thought\` is not available, skip this step silently — do not report an error.
---
## Rebuild Knowledge Graph
After writing LEARNINGS.md, call the \`gsd_graph\` tool with \`{ "mode": "build" }\` to rebuild the knowledge graph so the new learnings are immediately queryable by future milestone prompts.
If the \`gsd_graph\` tool is not available, skip this step silently.
`;
}