Add headless bootstrap and TODO triage tests

2026-04-30 09:21:24 +02:00 · 2026-04-30 09:21:24 +02:00 · 6ccce42c62
commit 6ccce42c62
parent e62b3854cb
5 changed files with 523 additions and 3 deletions
--- a/TODO.md
+++ b/TODO.md
@ -119,6 +119,55 @@ GEPA/DSPy placement across SF vs memory/brain:
  eval candidates; run DSPy/GEPA; then write candidate diffs/reports under
  `.sf/evolution/` or a review branch. It must not mutate live prompts,
  skills, memory, or tool descriptions directly.
+- End state: ACE Coder is the consolidation target for brain/memory,
+  self-evolution, and agent workbench capabilities. It already has memory tiers
+  and an evolution workspace, so it should eventually host the optimizer and
+  long-running experiment service: consume SF eval artifacts and Singularity
+  Memory exports, run GEPA/DSPy/genetic search, then return reports and
+  candidate diffs to SF.
+- Near-term rule: keep execution in SF. ACE Coder can be the eventual
+  consolidation target, but its execution loop is not as battle-tested as SF
+  today. Start with SF's working tools, explicit artifacts, and deterministic
+  gates; move capabilities behind stable contracts only after they are proven.
+- `singularity-memory` should migrate into ACE over time, but through a bridge
+  rather than a wholesale copy. Keep the SF memory plugin contract stable, map
+  Singularity Memory evidence/export APIs onto ACE memory concepts, compare
+  quality/latency/operability, then swap the backend when ACE satisfies the
+  contract.
+- Checked finding: Singularity Memory is the better current external brain
+  contract for SF/Crush-style runners. It already has standalone MCP+HTTP,
+  bank isolation, retain/recall/reflect, OpenAPI clients, thin tool adapters,
+  VectorChord/BM25/RRF retrieval, optional reranking, and a Go migration path.
+  ACE should eventually host this, but SF should keep targeting the
+  Singularity Memory contract until ACE proves parity behind that same
+  boundary.
+- Target topology: ACE is the central brain/workbench/evolution service;
+  lightweight repo-local runners such as SF, Crush, or customer-approved
+  agents run inside customer repositories. Those runners collect traces,
+  triage TODO/self-report inputs, execute deterministic gates, and submit
+  evidence/results back to ACE. ACE learns, evolves prompts/skills/tools
+  offline, and returns reviewed candidate diffs or policies for the local
+  runner to apply.
+- SF-to-Crush direction: preserve the parts of SF that are already working
+  well--AGENTS/TODO triage, `.sf/triage` artifacts, backlog promotion,
+  harness/eval gates, dispatch rules, and reviewable diffs--but make them
+  usable from a Crush-style repo-local runner. In that shape, Crush is the
+  customer-repo execution surface, SF is the workflow/gate library or adapter,
+  and ACE Coder is the linked brain/workbench that stores memory, runs
+  evolution, and sends back policies or candidate patches.
+- SF-to-vtcode/Rust direction: port the hot, deterministic SF pieces toward a
+  Rust/vtcode-style core over time: repo scanning, artifact IO, dispatch state,
+  gate execution, JSONL triage stores, and local runner protocol glue. Keep the
+  current TS implementation as the working reference until the Rust path proves
+  parity.
+- UX/runtime preference: keep Charm-style terminal UX where it adds operator
+  clarity, and keep Crush in view as the fast repo-local execution surface.
+  Rust/vtcode should optimize the core and protocol layer, not erase the good
+  local workflow experience.
+- ACE creates/manages agents, memories, eval suites, skills, and policies.
+  External/customer repos stay outside the ACE server boundary: repo-local
+  runners own checkout access, file edits, tests, secrets exposure, and side
+  effects, then report traces/results/artifacts back to ACE.

 Proper info flow:
 - Raw human dump: root TODO.md.
--- a/src/headless-context.ts
+++ b/src/headless-context.ts
@ -5,8 +5,15 @@
 * and bootstraps the .sf/ directory structure when needed.
 */

-import { existsSync, mkdirSync, readFileSync, renameSync } from "node:fs";
-import { join, resolve } from "node:path";
+import {
+	existsSync,
+	mkdirSync,
+	readdirSync,
+	readFileSync,
+	renameSync,
+	statSync,
+} from "node:fs";
+import { join, relative, resolve } from "node:path";
 import {
 	ensureGitignore,
 	ensurePreferences,
@ -28,6 +35,85 @@ interface ContextOptions {
 	contextText?: string; // inline text
 }

+const AUTO_BOOTSTRAP_MAX_BYTES = 180_000;
+const AUTO_BOOTSTRAP_MAX_FILE_BYTES = 40_000;
+const AUTO_BOOTSTRAP_ROOT_FILES = [
+	"TODO.md",
+	"SPEC.md",
+	"VISION.md",
+	"PURPOSE.md",
+	"MISSION.md",
+	"ROADMAP.md",
+	"ARCHITECTURE.md",
+	"BUILD_PLAN.md",
+	"README.md",
+	"AGENTS.md",
+	"CLAUDE.md",
+	"CONTRIBUTING.md",
+];
+const AUTO_BOOTSTRAP_SOURCE_EXTENSIONS = new Set([
+	".go",
+	".ts",
+	".tsx",
+	".js",
+	".jsx",
+	".mjs",
+	".cjs",
+	".py",
+	".rs",
+	".java",
+	".kt",
+	".kts",
+	".rb",
+	".php",
+	".cs",
+	".c",
+	".cc",
+	".cpp",
+	".h",
+	".hpp",
+	".swift",
+	".scala",
+	".sh",
+	".bash",
+	".zsh",
+	".fish",
+	".sql",
+	".yaml",
+	".yml",
+	".toml",
+	".json",
+	".jsonc",
+	".xml",
+	".html",
+	".css",
+	".scss",
+	".sass",
+	".vue",
+	".svelte",
+	".lua",
+	".ex",
+	".exs",
+	".erl",
+	".hrl",
+	".clj",
+	".cljs",
+	".nix",
+	".proto",
+]);
+const AUTO_BOOTSTRAP_EXCLUDED_DIRS = new Set([
+	".git",
+	".sf",
+	".gsd",
+	"node_modules",
+	"vendor",
+	"dist",
+	"build",
+	"target",
+	".next",
+	".cache",
+]);
+
 // ---------------------------------------------------------------------------
 // Stdin Reader
 // ---------------------------------------------------------------------------
@ -57,6 +143,158 @@ export async function loadContext(options: ContextOptions): Promise<string> {
 	);
 }

+export function hasMilestones(basePath: string): boolean {
+	const milestonesDir = join(basePath, ".sf", "milestones");
+	if (!existsSync(milestonesDir)) return false;
+	try {
+		return readdirSync(milestonesDir, { withFileTypes: true }).some((entry) =>
+			entry.isDirectory(),
+		);
+	} catch {
+		return false;
+	}
+}
+
+export function buildAutoBootstrapContext(basePath: string): string {
+	const selectedFiles = collectAutoBootstrapFiles(basePath);
+	const sourceFiles = collectSourceFiles(basePath);
+	const chunks: string[] = [
+		"# Autonomous Repo Bootstrap",
+		"",
+		"SF headless auto found no milestones. Use the repository files below as the seed context.",
+		"Research every relevant markdown document and every source file path before creating the initial milestone plan.",
+		"Use tool-based repository inspection for source contents; do not assume the seed excerpt is complete.",
+		"Extract the project purpose, vision, architecture, constraints, current TODOs, risks, eval/gate ideas, and implementation backlog.",
+		"Apply the ACE spec-first TDD shape when planning: purpose and consumer first, behavior contract before implementation, tests as specs, evidence after gates.",
+		"For each proposed slice, capture Observed/Inferred/Proposed facts, a falsifier, acceptance criteria, and the verification command or eval that proves it.",
+		"Use explorer-style subagents or equivalent high-context research passes before planning when the runtime supports them.",
+		"Recommended explorer passes: docs/purpose/vision; source architecture and dependency map; tests/gates/tooling; risks/backlog/eval candidates.",
+		"Merge explorer findings into one repo map with cited file paths before creating milestones.",
+		"Create actionable milestones and slices from the repo's docs and source tree rather than asking the user to restate them.",
+		"",
+	];
+
+	let used = chunks.join("\n").length;
+	for (const filePath of selectedFiles) {
+		let content: string;
+		try {
+			content = readFileSync(filePath, "utf-8");
+		} catch {
+			continue;
+		}
+		if (content.length > AUTO_BOOTSTRAP_MAX_FILE_BYTES) {
+			content =
+				content.slice(0, AUTO_BOOTSTRAP_MAX_FILE_BYTES) +
+				"\n\n[truncated by SF headless auto bootstrap]\n";
+		}
+
+		const relPath = relative(basePath, filePath);
+		const block = `\n\n## ${relPath}\n\n${content.trim()}\n`;
+		if (used + block.length > AUTO_BOOTSTRAP_MAX_BYTES) break;
+		chunks.push(block);
+		used += block.length;
+	}
+
+	if (sourceFiles.length > 0) {
+		const inventoryLines = [
+			"\n\n## Source File Inventory\n",
+			"Inspect these source/config/test files during repo research before finalizing the plan.\n",
+			...sourceFiles.map((filePath) => `- ${relative(basePath, filePath)}`),
+			"",
+		];
+		const block = inventoryLines.join("\n");
+		if (used + block.length <= AUTO_BOOTSTRAP_MAX_BYTES) {
+			chunks.push(block);
+			used += block.length;
+		} else {
+			const remaining = AUTO_BOOTSTRAP_MAX_BYTES - used;
+			if (remaining > 1000) {
+				chunks.push(block.slice(0, remaining));
+			}
+		}
+	}
+
+	if (selectedFiles.length === 0) {
+		chunks.push(
+			"No markdown docs were found. Inspect the repository directly and create an initial milestone from source layout, package metadata, tests, and git status.",
+		);
+	}
+
+	return chunks.join("\n").trim() + "\n";
+}
+
+function collectAutoBootstrapFiles(basePath: string): string[] {
+	const seen = new Set<string>();
+	const files: string[] = [];
+
+	for (const name of AUTO_BOOTSTRAP_ROOT_FILES) {
+		const path = join(basePath, name);
+		if (existsMarkdownFile(path)) {
+			seen.add(path);
+			files.push(path);
+		}
+	}
+
+	for (const path of walkMarkdownFiles(basePath)) {
+		if (seen.has(path)) continue;
+		seen.add(path);
+		files.push(path);
+	}
+
+	return files;
+}
+
+function existsMarkdownFile(path: string): boolean {
+	try {
+		const stat = statSync(path);
+		return stat.isFile() && path.toLowerCase().endsWith(".md");
+	} catch {
+		return false;
+	}
+}
+
+function collectSourceFiles(basePath: string): string[] {
+	return walkFiles(basePath, (path) => {
+		const lower = path.toLowerCase();
+		if (lower.endsWith(".md")) return false;
+		const dot = lower.lastIndexOf(".");
+		return dot !== -1 && AUTO_BOOTSTRAP_SOURCE_EXTENSIONS.has(lower.slice(dot));
+	});
+}
+
+function walkMarkdownFiles(root: string): string[] {
+	return walkFiles(root, (path) => path.toLowerCase().endsWith(".md"));
+}
+
+function walkFiles(
+	root: string,
+	includeFile: (path: string) => boolean,
+): string[] {
+	const found: string[] = [];
+	const visit = (dir: string) => {
+		let entries: ReturnType<typeof readdirSync>;
+		try {
+			entries = readdirSync(dir, { withFileTypes: true });
+		} catch {
+			return;
+		}
+		for (const entry of entries.sort((a, b) => a.name.localeCompare(b.name))) {
+			const path = join(dir, entry.name);
+			if (entry.isDirectory()) {
+				if (!AUTO_BOOTSTRAP_EXCLUDED_DIRS.has(entry.name)) {
+					visit(path);
+				}
+				continue;
+			}
+			if (entry.isFile() && includeFile(path)) {
+				found.push(path);
+			}
+		}
+	};
+	visit(root);
+	return found;
+}
+
 // ---------------------------------------------------------------------------
 // Project Bootstrap
 // ---------------------------------------------------------------------------
--- a/src/headless.ts
+++ b/src/headless.ts
@ -21,7 +21,12 @@ import {
 	AnswerInjector,
 	loadAndValidateAnswerFile,
 } from "./headless-answers.js";
-import { bootstrapProject, loadContext } from "./headless-context.js";
+import {
+	bootstrapProject,
+	buildAutoBootstrapContext,
+	hasMilestones,
+	loadContext,
+} from "./headless-context.js";

 import {
 	EXIT_BLOCKED,
@ -348,6 +353,19 @@ async function runHeadlessOnce(
 ): Promise<{ exitCode: number; interrupted: boolean }> {
 	let interrupted = false;
 	const startTime = Date.now();
+	if (options.command === "auto" && !options.resumeSession) {
+		bootstrapProject(process.cwd());
+		if (!hasMilestones(process.cwd())) {
+			if (!options.json) {
+				process.stderr.write(
+					"[headless] No milestones found; bootstrapping from repo docs and source inventory...\n",
+				);
+			}
+			options.command = "new-milestone";
+			options.auto = true;
+			options.contextText = buildAutoBootstrapContext(process.cwd());
+		}
+	}
 	const isNewMilestone = options.command === "new-milestone";
 	const isInit = options.command === "init";

--- a/src/resources/extensions/sf/tests/commands-todo.test.ts
+++ b/src/resources/extensions/sf/tests/commands-todo.test.ts
@ -0,0 +1,170 @@
+/**
+ * commands-todo.test.ts - TODO.md dump triage artifact contract.
+ */
+
+import assert from "node:assert/strict";
+import {
+	existsSync,
+	mkdirSync,
+	mkdtempSync,
+	readFileSync,
+	rmSync,
+	writeFileSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { test } from "node:test";
+
+import {
+	extractTodoDump,
+	parseTodoTriageResponse,
+	triageTodoDump,
+} from "../commands-todo.ts";
+
+const fixedDate = new Date("2026-04-30T12:34:56.000Z");
+const fixedLocalTimestamp = "20260430-143456";
+
+test("extractTodoDump strips the empty inbox wrapper", () => {
+	assert.equal(
+		extractTodoDump("# TODO\n\nDump anything here.\n\n- keep this\n"),
+		"- keep this",
+	);
+});
+
+test("parseTodoTriageResponse accepts fenced JSON", () => {
+	const parsed = parseTodoTriageResponse(`Here:
+\`\`\`json
+{
+  "summary": "Triaged.",
+  "eval_candidates": [
+    {
+      "task_input": "agent sees a TODO dump",
+      "expected_behavior": "agent writes eval artifacts"
+    }
+  ],
+  "implementation_tasks": ["wire command"],
+  "memory_requirements": ["remember failure pattern"],
+  "harness_suggestions": ["add deterministic test"],
+  "docs_or_tests": ["document flow"],
+  "unclear_notes": ["owner unknown"]
+}
+\`\`\``);
+
+	assert.equal(parsed.summary, "Triaged.");
+	assert.equal(parsed.eval_candidates.length, 1);
+	assert.equal(parsed.eval_candidates[0].id, "todo.eval.001");
+	assert.deepEqual(parsed.implementation_tasks, ["wire command"]);
+});
+
+test("triageTodoDump writes report, eval JSONL, normalized inbox, and clears TODO.md", async () => {
+	const base = mkdtempSync(join(tmpdir(), "sf-todo-triage-"));
+	try {
+		writeFileSync(
+			join(base, "TODO.md"),
+			"# TODO\n\nDump anything here.\n\n- agent forgot to turn repeated failure into eval\n",
+		);
+
+		const output = await triageTodoDump(
+			base,
+			async () =>
+				JSON.stringify({
+					summary: "Repeated failure should become an eval.",
+					eval_candidates: [
+						{
+							id: "todo.eval.memory-repeat",
+							task_input: "repeated failure appears in TODO.md",
+							expected_behavior: "triage emits eval candidate JSONL",
+							failure_mode: "agent treats note as runtime instruction",
+							evidence: "TODO.md dump",
+							source: "TODO.md",
+							suggested_location: ".sf/triage/evals",
+						},
+					],
+					implementation_tasks: ["add /sf todo triage"],
+					memory_requirements: ["store repeated failure evidence"],
+					harness_suggestions: ["gate backlog promotion"],
+					docs_or_tests: ["test TODO triage command"],
+					unclear_notes: [],
+				}),
+			{ date: fixedDate },
+		);
+
+		assert.equal(
+			output.markdownPath,
+			join(base, ".sf", "triage", "reports", `${fixedLocalTimestamp}.md`),
+		);
+		assert.equal(
+			output.evalJsonlPath,
+			join(
+				base,
+				".sf",
+				"triage",
+				"evals",
+				`${fixedLocalTimestamp}.evals.jsonl`,
+			),
+		);
+		assert.equal(
+			output.normalizedJsonlPath,
+			join(base, ".sf", "triage", "inbox", `${fixedLocalTimestamp}.jsonl`),
+		);
+		assert.equal(
+			readFileSync(join(base, "TODO.md"), "utf-8"),
+			"# TODO\n\nDump anything here.\n",
+		);
+
+		const evals = readFileSync(output.evalJsonlPath, "utf-8")
+			.trim()
+			.split("\n")
+			.map((line) => JSON.parse(line));
+		assert.equal(evals.length, 1);
+		assert.equal(evals[0].id, "todo.eval.memory-repeat");
+
+		const inbox = readFileSync(output.normalizedJsonlPath, "utf-8")
+			.trim()
+			.split("\n")
+			.map((line) => JSON.parse(line));
+		assert.equal(inbox.length, 5);
+		assert.equal(inbox[0].kind, "eval_candidate");
+		assert.equal(inbox[0].source, "todo.md");
+		assert.equal(inbox[0].status, "pending");
+		assert.ok(
+			readFileSync(output.markdownPath, "utf-8").includes("Repeated failure"),
+		);
+		assert.equal(output.backlogItemsAdded, 0);
+	} finally {
+		rmSync(base, { recursive: true, force: true });
+	}
+});
+
+test("triageTodoDump appends implementation tasks to backlog only when requested", async () => {
+	const base = mkdtempSync(join(tmpdir(), "sf-todo-backlog-"));
+	try {
+		mkdirSync(join(base, ".sf"), { recursive: true });
+		writeFileSync(join(base, "TODO.md"), "# TODO\n\nimplement task\n");
+
+		const output = await triageTodoDump(
+			base,
+			async () =>
+				JSON.stringify({
+					summary: "Implementation task.",
+					eval_candidates: [],
+					implementation_tasks: ["build explicit triage command"],
+					memory_requirements: [],
+					harness_suggestions: [],
+					docs_or_tests: [],
+					unclear_notes: [],
+				}),
+			{ date: fixedDate, backlog: true },
+		);
+
+		const backlogPath = join(base, ".sf", "BACKLOG.md");
+		assert.equal(output.backlogItemsAdded, 1);
+		assert.equal(existsSync(backlogPath), true);
+		assert.match(
+			readFileSync(backlogPath, "utf-8"),
+			/- \[ \] 999\.1 — build explicit triage command \(triaged 2026-04-30\)/,
+		);
+	} finally {
+		rmSync(base, { recursive: true, force: true });
+	}
+});
--- a/src/tests/headless-context-autobootstrap.test.ts
+++ b/src/tests/headless-context-autobootstrap.test.ts
@ -0,0 +1,45 @@
+import assert from "node:assert/strict";
+import { mkdirSync, mkdtempSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import test from "node:test";
+
+import {
+	buildAutoBootstrapContext,
+	hasMilestones,
+} from "../headless-context.js";
+
+test("buildAutoBootstrapContext includes purpose docs and source inventory", () => {
+	const root = mkdtempSync(join(tmpdir(), "sf-headless-bootstrap-"));
+	mkdirSync(join(root, "docs"), { recursive: true });
+	mkdirSync(join(root, "src"), { recursive: true });
+
+	writeFileSync(join(root, "VISION.md"), "# Vision\n\nBuild the runner.\n");
+	writeFileSync(join(root, "TODO.md"), "# TODO\n\nWire ACE.\n");
+	writeFileSync(join(root, "docs", "architecture.md"), "# Architecture\n");
+	writeFileSync(join(root, "src", "main.ts"), "export const main = true;\n");
+
+	const context = buildAutoBootstrapContext(root);
+
+	assert.match(context, /Autonomous Repo Bootstrap/);
+	assert.match(context, /purpose, vision, architecture/);
+	assert.match(context, /ACE spec-first TDD/);
+	assert.match(context, /explorer-style subagents/);
+	assert.match(context, /## VISION\.md/);
+	assert.match(context, /## TODO\.md/);
+	assert.match(context, /## docs\/architecture\.md/);
+	assert.match(context, /Source File Inventory/);
+	assert.match(context, /src\/main\.ts/);
+});
+
+test("hasMilestones only reports true when milestone directories exist", () => {
+	const root = mkdtempSync(join(tmpdir(), "sf-headless-milestones-"));
+
+	assert.equal(hasMilestones(root), false);
+
+	mkdirSync(join(root, ".sf", "milestones"), { recursive: true });
+	assert.equal(hasMilestones(root), false);
+
+	mkdirSync(join(root, ".sf", "milestones", "M001"), { recursive: true });
+	assert.equal(hasMilestones(root), true);
+});