Generator (codebase-generator.ts): - Fix truncation off-by-one: use filtered.length > maxFiles (not >=) - Fix collapsed-directory round-trip: emit <!-- gsd:collapsed-descriptions --> comment blocks so incremental updates recover descriptions for collapsed dirs - Fix double-enumeration race in updateCodebaseMap: reuse files array from generateCodebaseMap instead of calling enumerateFiles a second time - Propagate truncated flag through updateCodebaseMap return type - Fix getCodebaseMapStats to read Files: N from header (accurate for collapsed dirs) - Remove redundant dead catch around lsFiles() in enumerateFiles - parseCodebaseMap: use else-if for bare match (avoid unnecessary double-check) - parseCodebaseMap: scan gsd:collapsed-descriptions comment blocks Command handler (commands-codebase.ts): - Bare /gsd codebase now shows stats (if map exists) or help (if no map) instead of silently running generate - Add explicit help subcommand with info-level output - Guard update: warn if no CODEBASE.md exists instead of silently generating - Validate --max-files: reject NaN, zero, and negative values with clear message - Emit warning (not success) when generate produces 0 files - Propagate truncated flag warning in both generate and update output - Fix extractFlag regex: escape flag name and support --flag=value syntax - Add actionable tip to stats output Catalog (commands/catalog.ts): - Add --max-files and help to codebase tab-completion entries System context (bootstrap/system-context.ts): - Cap CODEBASE.md injection at 8 000 chars (~2 000 tokens) per request - Add generation timestamp and staleness notice to the injected block header Paths (paths.ts): - Fix LEGACY_GSD_ROOT_FILES.CODEBASE to use lowercase codebase.md (matches the pattern of all other legacy root file names) Tests (codebase-generator.test.ts): - 15 new test cases: custom excludePatterns, collapseThreshold option, truncation boundary conditions (below/at/above limit), non-git directory, empty repo, collapsed-description round-trip, removed file tracking, binary/lock exclusions, truncated flag propagation, collapsed-dir stats accuracy, .gsd/ auto-creation, corrupted input, parseCodebaseMap comment blocks - Fix collapse assertion to verify individual entries are absent from main body - Fix git rm test to commit first so git rm succeeds - 29/29 tests passing Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
351 lines
12 KiB
TypeScript
351 lines
12 KiB
TypeScript
/**
|
|
* GSD Codebase Map Generator
|
|
*
|
|
* Produces .gsd/CODEBASE.md — a structural table of contents for the project.
|
|
* Gives fresh agent contexts instant orientation without filesystem exploration.
|
|
*
|
|
* Generation: walk `git ls-files`, group by directory, output with descriptions.
|
|
* Maintenance: agent updates descriptions as it works; incremental update preserves them.
|
|
*/
|
|
|
|
import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
|
|
import { join, dirname, extname } from "node:path";
|
|
|
|
import { execSync } from "node:child_process";
|
|
import { gsdRoot } from "./paths.js";
|
|
|
|
// ─── Types ───────────────────────────────────────────────────────────────────
|
|
|
|
export interface CodebaseMapOptions {
|
|
excludePatterns?: string[];
|
|
maxFiles?: number;
|
|
collapseThreshold?: number;
|
|
}
|
|
|
|
interface FileEntry {
|
|
path: string;
|
|
description: string;
|
|
}
|
|
|
|
interface DirectoryGroup {
|
|
path: string;
|
|
files: FileEntry[];
|
|
collapsed: boolean;
|
|
}
|
|
|
|
// ─── Defaults ────────────────────────────────────────────────────────────────
|
|
|
|
const DEFAULT_EXCLUDES = [
|
|
".gsd/",
|
|
".planning/",
|
|
".git/",
|
|
"node_modules/",
|
|
"dist/",
|
|
"build/",
|
|
".next/",
|
|
"coverage/",
|
|
"__pycache__/",
|
|
".venv/",
|
|
"vendor/",
|
|
];
|
|
|
|
const DEFAULT_MAX_FILES = 500;
|
|
const DEFAULT_COLLAPSE_THRESHOLD = 20;
|
|
|
|
// ─── Parsing ─────────────────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Parse an existing CODEBASE.md to extract file → description mappings.
|
|
* Also scans <!-- gsd:collapsed-descriptions --> comment blocks to preserve
|
|
* descriptions for files in collapsed directories across incremental updates.
|
|
*/
|
|
export function parseCodebaseMap(content: string): Map<string, string> {
|
|
const descriptions = new Map<string, string>();
|
|
let inCollapsedBlock = false;
|
|
|
|
for (const line of content.split("\n")) {
|
|
// Track collapsed-description comment blocks
|
|
if (line.trimStart().startsWith("<!-- gsd:collapsed-descriptions")) {
|
|
inCollapsedBlock = true;
|
|
continue;
|
|
}
|
|
if (inCollapsedBlock && line.trimStart().startsWith("-->")) {
|
|
inCollapsedBlock = false;
|
|
continue;
|
|
}
|
|
|
|
// Match: - `path/to/file.ts` — Description here
|
|
const match = line.match(/^- `(.+?)` — (.+)$/);
|
|
if (match) {
|
|
descriptions.set(match[1], match[2]);
|
|
continue;
|
|
}
|
|
|
|
// Match: - `path/to/file.ts` (no description) — only outside collapsed blocks
|
|
if (!inCollapsedBlock) {
|
|
const bareMatch = line.match(/^- `(.+?)`\s*$/);
|
|
if (bareMatch) {
|
|
descriptions.set(bareMatch[1], "");
|
|
}
|
|
}
|
|
}
|
|
return descriptions;
|
|
}
|
|
|
|
// ─── File Enumeration ────────────────────────────────────────────────────────
|
|
|
|
function shouldExclude(filePath: string, excludes: string[]): boolean {
|
|
for (const pattern of excludes) {
|
|
if (pattern.endsWith("/")) {
|
|
if (filePath.startsWith(pattern) || filePath.includes(`/${pattern}`)) return true;
|
|
} else if (filePath === pattern || filePath.endsWith(`/${pattern}`)) {
|
|
return true;
|
|
}
|
|
}
|
|
// Skip binary/lock files
|
|
const ext = extname(filePath).toLowerCase();
|
|
if ([".lock", ".png", ".jpg", ".jpeg", ".gif", ".ico", ".woff", ".woff2", ".ttf", ".eot", ".svg"].includes(ext)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
function lsFiles(basePath: string): string[] {
|
|
try {
|
|
const result = execSync("git ls-files", { cwd: basePath, encoding: "utf-8", timeout: 10000 });
|
|
return result.split("\n").filter(Boolean);
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Enumerate tracked files, applying exclusions and the maxFiles cap.
|
|
* Returns both the file list and whether truncation occurred.
|
|
*/
|
|
function enumerateFiles(basePath: string, excludes: string[], maxFiles: number): { files: string[]; truncated: boolean } {
|
|
const allFiles = lsFiles(basePath);
|
|
const filtered = allFiles.filter((f) => !shouldExclude(f, excludes));
|
|
const truncated = filtered.length > maxFiles;
|
|
return { files: truncated ? filtered.slice(0, maxFiles) : filtered, truncated };
|
|
}
|
|
|
|
// ─── Grouping ────────────────────────────────────────────────────────────────
|
|
|
|
function groupByDirectory(
|
|
files: string[],
|
|
descriptions: Map<string, string>,
|
|
collapseThreshold: number,
|
|
): DirectoryGroup[] {
|
|
const dirMap = new Map<string, FileEntry[]>();
|
|
|
|
for (const file of files) {
|
|
const dir = dirname(file);
|
|
const dirKey = dir === "." ? "" : dir;
|
|
if (!dirMap.has(dirKey)) {
|
|
dirMap.set(dirKey, []);
|
|
}
|
|
dirMap.get(dirKey)!.push({
|
|
path: file,
|
|
description: descriptions.get(file) ?? "",
|
|
});
|
|
}
|
|
|
|
const groups: DirectoryGroup[] = [];
|
|
const sortedDirs = [...dirMap.keys()].sort();
|
|
|
|
for (const dir of sortedDirs) {
|
|
const dirFiles = dirMap.get(dir)!;
|
|
dirFiles.sort((a, b) => a.path.localeCompare(b.path));
|
|
|
|
groups.push({
|
|
path: dir,
|
|
files: dirFiles,
|
|
collapsed: dirFiles.length > collapseThreshold,
|
|
});
|
|
}
|
|
|
|
return groups;
|
|
}
|
|
|
|
// ─── Rendering ───────────────────────────────────────────────────────────────
|
|
|
|
function renderCodebaseMap(groups: DirectoryGroup[], totalFiles: number, truncated: boolean): string {
|
|
const lines: string[] = [];
|
|
const now = new Date().toISOString().split(".")[0] + "Z";
|
|
const described = groups.reduce((sum, g) => sum + g.files.filter((f) => f.description).length, 0);
|
|
|
|
lines.push("# Codebase Map");
|
|
lines.push("");
|
|
lines.push(`Generated: ${now} | Files: ${totalFiles} | Described: ${described}/${totalFiles}`);
|
|
if (truncated) {
|
|
lines.push(`Note: Truncated to first ${totalFiles} files. Run with higher --max-files to include all.`);
|
|
}
|
|
lines.push("");
|
|
|
|
for (const group of groups) {
|
|
const heading = group.path || "(root)";
|
|
lines.push(`### ${heading}/`);
|
|
|
|
if (group.collapsed) {
|
|
// Summarize collapsed directories
|
|
const extensions = new Map<string, number>();
|
|
for (const f of group.files) {
|
|
const ext = extname(f.path) || "(no ext)";
|
|
extensions.set(ext, (extensions.get(ext) ?? 0) + 1);
|
|
}
|
|
const extSummary = [...extensions.entries()]
|
|
.sort((a, b) => b[1] - a[1])
|
|
.map(([ext, count]) => `${count} ${ext}`)
|
|
.join(", ");
|
|
lines.push(`- *(${group.files.length} files: ${extSummary})*`);
|
|
|
|
// Preserve any existing descriptions in a hidden comment block so
|
|
// incremental updates can recover them via parseCodebaseMap.
|
|
const descLines = group.files
|
|
.filter((f) => f.description)
|
|
.map((f) => `- \`${f.path}\` — ${f.description}`);
|
|
if (descLines.length > 0) {
|
|
lines.push("<!-- gsd:collapsed-descriptions");
|
|
lines.push(...descLines);
|
|
lines.push("-->");
|
|
}
|
|
} else {
|
|
for (const file of group.files) {
|
|
if (file.description) {
|
|
lines.push(`- \`${file.path}\` — ${file.description}`);
|
|
} else {
|
|
lines.push(`- \`${file.path}\``);
|
|
}
|
|
}
|
|
}
|
|
lines.push("");
|
|
}
|
|
|
|
return lines.join("\n");
|
|
}
|
|
|
|
// ─── Public API ──────────────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Generate a fresh CODEBASE.md from scratch.
|
|
* Preserves existing descriptions if `existingDescriptions` is provided.
|
|
*/
|
|
export function generateCodebaseMap(
|
|
basePath: string,
|
|
options?: CodebaseMapOptions,
|
|
existingDescriptions?: Map<string, string>,
|
|
): { content: string; fileCount: number; truncated: boolean; files: string[] } {
|
|
const excludes = [...DEFAULT_EXCLUDES, ...(options?.excludePatterns ?? [])];
|
|
const maxFiles = options?.maxFiles ?? DEFAULT_MAX_FILES;
|
|
const collapseThreshold = options?.collapseThreshold ?? DEFAULT_COLLAPSE_THRESHOLD;
|
|
|
|
const { files, truncated } = enumerateFiles(basePath, excludes, maxFiles);
|
|
const descriptions = existingDescriptions ?? new Map<string, string>();
|
|
const groups = groupByDirectory(files, descriptions, collapseThreshold);
|
|
const content = renderCodebaseMap(groups, files.length, truncated);
|
|
|
|
return { content, fileCount: files.length, truncated, files };
|
|
}
|
|
|
|
/**
|
|
* Incremental update: re-scan files, preserve existing descriptions,
|
|
* add new files, remove deleted files.
|
|
*/
|
|
export function updateCodebaseMap(
|
|
basePath: string,
|
|
options?: CodebaseMapOptions,
|
|
): { content: string; added: number; removed: number; unchanged: number; fileCount: number; truncated: boolean } {
|
|
const codebasePath = join(gsdRoot(basePath), "CODEBASE.md");
|
|
|
|
// Load existing descriptions
|
|
let existingDescriptions = new Map<string, string>();
|
|
if (existsSync(codebasePath)) {
|
|
const existing = readFileSync(codebasePath, "utf-8");
|
|
existingDescriptions = parseCodebaseMap(existing);
|
|
}
|
|
|
|
const existingFiles = new Set(existingDescriptions.keys());
|
|
|
|
// Generate new map preserving descriptions — reuse the returned file list
|
|
// to avoid a second enumeration (prevents race between content and stats).
|
|
const result = generateCodebaseMap(basePath, options, existingDescriptions);
|
|
const currentSet = new Set(result.files);
|
|
|
|
// Count changes
|
|
let added = 0;
|
|
let removed = 0;
|
|
|
|
for (const f of result.files) {
|
|
if (!existingFiles.has(f)) added++;
|
|
}
|
|
for (const f of existingFiles) {
|
|
if (!currentSet.has(f)) removed++;
|
|
}
|
|
|
|
return {
|
|
content: result.content,
|
|
added,
|
|
removed,
|
|
unchanged: result.files.length - added,
|
|
fileCount: result.fileCount,
|
|
truncated: result.truncated,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Write CODEBASE.md to .gsd/ directory.
|
|
*/
|
|
export function writeCodebaseMap(basePath: string, content: string): string {
|
|
const root = gsdRoot(basePath);
|
|
mkdirSync(root, { recursive: true });
|
|
const outPath = join(root, "CODEBASE.md");
|
|
writeFileSync(outPath, content, "utf-8");
|
|
return outPath;
|
|
}
|
|
|
|
/**
|
|
* Read existing CODEBASE.md, or return null if it doesn't exist.
|
|
*/
|
|
export function readCodebaseMap(basePath: string): string | null {
|
|
const codebasePath = join(gsdRoot(basePath), "CODEBASE.md");
|
|
if (!existsSync(codebasePath)) return null;
|
|
try {
|
|
return readFileSync(codebasePath, "utf-8");
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get stats about the codebase map.
|
|
*/
|
|
export function getCodebaseMapStats(basePath: string): {
|
|
exists: boolean;
|
|
fileCount: number;
|
|
describedCount: number;
|
|
undescribedCount: number;
|
|
generatedAt: string | null;
|
|
} {
|
|
const content = readCodebaseMap(basePath);
|
|
if (!content) {
|
|
return { exists: false, fileCount: 0, describedCount: 0, undescribedCount: 0, generatedAt: null };
|
|
}
|
|
|
|
// Parse total file count from the header line (accurate even for collapsed dirs)
|
|
const fileCountMatch = content.match(/Files:\s*(\d+)/);
|
|
const totalFiles = fileCountMatch ? parseInt(fileCountMatch[1], 10) : 0;
|
|
|
|
// Use parseCodebaseMap to count described files (includes collapsed-description blocks)
|
|
const descriptions = parseCodebaseMap(content);
|
|
const described = [...descriptions.values()].filter((d) => d.length > 0).length;
|
|
const dateMatch = content.match(/Generated: (\S+)/);
|
|
|
|
return {
|
|
exists: true,
|
|
fileCount: totalFiles,
|
|
describedCount: described,
|
|
undescribedCount: totalFiles - described,
|
|
generatedAt: dateMatch?.[1] ?? null,
|
|
};
|
|
}
|