Add headless bootstrap and TODO triage tests

This commit is contained in:
Mikael Hugo 2026-04-30 09:21:24 +02:00
parent e62b3854cb
commit 6ccce42c62
5 changed files with 523 additions and 3 deletions

49
TODO.md
View file

@ -119,6 +119,55 @@ GEPA/DSPy placement across SF vs memory/brain:
eval candidates; run DSPy/GEPA; then write candidate diffs/reports under
`.sf/evolution/` or a review branch. It must not mutate live prompts,
skills, memory, or tool descriptions directly.
- End state: ACE Coder is the consolidation target for brain/memory,
self-evolution, and agent workbench capabilities. It already has memory tiers
and an evolution workspace, so it should eventually host the optimizer and
long-running experiment service: consume SF eval artifacts and Singularity
Memory exports, run GEPA/DSPy/genetic search, then return reports and
candidate diffs to SF.
- Near-term rule: keep execution in SF. ACE Coder can be the eventual
consolidation target, but its execution loop is not as battle-tested as SF
today. Start with SF's working tools, explicit artifacts, and deterministic
gates; move capabilities behind stable contracts only after they are proven.
- `singularity-memory` should migrate into ACE over time, but through a bridge
rather than a wholesale copy. Keep the SF memory plugin contract stable, map
Singularity Memory evidence/export APIs onto ACE memory concepts, compare
quality/latency/operability, then swap the backend when ACE satisfies the
contract.
- Checked finding: Singularity Memory is the better current external brain
contract for SF/Crush-style runners. It already has standalone MCP+HTTP,
bank isolation, retain/recall/reflect, OpenAPI clients, thin tool adapters,
VectorChord/BM25/RRF retrieval, optional reranking, and a Go migration path.
ACE should eventually host this, but SF should keep targeting the
Singularity Memory contract until ACE proves parity behind that same
boundary.
- Target topology: ACE is the central brain/workbench/evolution service;
lightweight repo-local runners such as SF, Crush, or customer-approved
agents run inside customer repositories. Those runners collect traces,
triage TODO/self-report inputs, execute deterministic gates, and submit
evidence/results back to ACE. ACE learns, evolves prompts/skills/tools
offline, and returns reviewed candidate diffs or policies for the local
runner to apply.
- SF-to-Crush direction: preserve the parts of SF that are already working
well--AGENTS/TODO triage, `.sf/triage` artifacts, backlog promotion,
harness/eval gates, dispatch rules, and reviewable diffs--but make them
usable from a Crush-style repo-local runner. In that shape, Crush is the
customer-repo execution surface, SF is the workflow/gate library or adapter,
and ACE Coder is the linked brain/workbench that stores memory, runs
evolution, and sends back policies or candidate patches.
- SF-to-vtcode/Rust direction: port the hot, deterministic SF pieces toward a
Rust/vtcode-style core over time: repo scanning, artifact IO, dispatch state,
gate execution, JSONL triage stores, and local runner protocol glue. Keep the
current TS implementation as the working reference until the Rust path proves
parity.
- UX/runtime preference: keep Charm-style terminal UX where it adds operator
clarity, and keep Crush in view as the fast repo-local execution surface.
Rust/vtcode should optimize the core and protocol layer, not erase the good
local workflow experience.
- ACE creates/manages agents, memories, eval suites, skills, and policies.
External/customer repos stay outside the ACE server boundary: repo-local
runners own checkout access, file edits, tests, secrets exposure, and side
effects, then report traces/results/artifacts back to ACE.
Proper info flow:
- Raw human dump: root TODO.md.

View file

@ -5,8 +5,15 @@
* and bootstraps the .sf/ directory structure when needed.
*/
import { existsSync, mkdirSync, readFileSync, renameSync } from "node:fs";
import { join, resolve } from "node:path";
import {
existsSync,
mkdirSync,
readdirSync,
readFileSync,
renameSync,
statSync,
} from "node:fs";
import { join, relative, resolve } from "node:path";
import {
ensureGitignore,
ensurePreferences,
@ -28,6 +35,85 @@ interface ContextOptions {
contextText?: string; // inline text
}
const AUTO_BOOTSTRAP_MAX_BYTES = 180_000;
const AUTO_BOOTSTRAP_MAX_FILE_BYTES = 40_000;
const AUTO_BOOTSTRAP_ROOT_FILES = [
"TODO.md",
"SPEC.md",
"VISION.md",
"PURPOSE.md",
"MISSION.md",
"ROADMAP.md",
"ARCHITECTURE.md",
"BUILD_PLAN.md",
"README.md",
"AGENTS.md",
"CLAUDE.md",
"CONTRIBUTING.md",
];
const AUTO_BOOTSTRAP_SOURCE_EXTENSIONS = new Set([
".go",
".ts",
".tsx",
".js",
".jsx",
".mjs",
".cjs",
".py",
".rs",
".java",
".kt",
".kts",
".rb",
".php",
".cs",
".c",
".cc",
".cpp",
".h",
".hpp",
".swift",
".scala",
".sh",
".bash",
".zsh",
".fish",
".sql",
".yaml",
".yml",
".toml",
".json",
".jsonc",
".xml",
".html",
".css",
".scss",
".sass",
".vue",
".svelte",
".lua",
".ex",
".exs",
".erl",
".hrl",
".clj",
".cljs",
".nix",
".proto",
]);
const AUTO_BOOTSTRAP_EXCLUDED_DIRS = new Set([
".git",
".sf",
".gsd",
"node_modules",
"vendor",
"dist",
"build",
"target",
".next",
".cache",
]);
// ---------------------------------------------------------------------------
// Stdin Reader
// ---------------------------------------------------------------------------
@ -57,6 +143,158 @@ export async function loadContext(options: ContextOptions): Promise<string> {
);
}
export function hasMilestones(basePath: string): boolean {
const milestonesDir = join(basePath, ".sf", "milestones");
if (!existsSync(milestonesDir)) return false;
try {
return readdirSync(milestonesDir, { withFileTypes: true }).some((entry) =>
entry.isDirectory(),
);
} catch {
return false;
}
}
export function buildAutoBootstrapContext(basePath: string): string {
const selectedFiles = collectAutoBootstrapFiles(basePath);
const sourceFiles = collectSourceFiles(basePath);
const chunks: string[] = [
"# Autonomous Repo Bootstrap",
"",
"SF headless auto found no milestones. Use the repository files below as the seed context.",
"Research every relevant markdown document and every source file path before creating the initial milestone plan.",
"Use tool-based repository inspection for source contents; do not assume the seed excerpt is complete.",
"Extract the project purpose, vision, architecture, constraints, current TODOs, risks, eval/gate ideas, and implementation backlog.",
"Apply the ACE spec-first TDD shape when planning: purpose and consumer first, behavior contract before implementation, tests as specs, evidence after gates.",
"For each proposed slice, capture Observed/Inferred/Proposed facts, a falsifier, acceptance criteria, and the verification command or eval that proves it.",
"Use explorer-style subagents or equivalent high-context research passes before planning when the runtime supports them.",
"Recommended explorer passes: docs/purpose/vision; source architecture and dependency map; tests/gates/tooling; risks/backlog/eval candidates.",
"Merge explorer findings into one repo map with cited file paths before creating milestones.",
"Create actionable milestones and slices from the repo's docs and source tree rather than asking the user to restate them.",
"",
];
let used = chunks.join("\n").length;
for (const filePath of selectedFiles) {
let content: string;
try {
content = readFileSync(filePath, "utf-8");
} catch {
continue;
}
if (content.length > AUTO_BOOTSTRAP_MAX_FILE_BYTES) {
content =
content.slice(0, AUTO_BOOTSTRAP_MAX_FILE_BYTES) +
"\n\n[truncated by SF headless auto bootstrap]\n";
}
const relPath = relative(basePath, filePath);
const block = `\n\n## ${relPath}\n\n${content.trim()}\n`;
if (used + block.length > AUTO_BOOTSTRAP_MAX_BYTES) break;
chunks.push(block);
used += block.length;
}
if (sourceFiles.length > 0) {
const inventoryLines = [
"\n\n## Source File Inventory\n",
"Inspect these source/config/test files during repo research before finalizing the plan.\n",
...sourceFiles.map((filePath) => `- ${relative(basePath, filePath)}`),
"",
];
const block = inventoryLines.join("\n");
if (used + block.length <= AUTO_BOOTSTRAP_MAX_BYTES) {
chunks.push(block);
used += block.length;
} else {
const remaining = AUTO_BOOTSTRAP_MAX_BYTES - used;
if (remaining > 1000) {
chunks.push(block.slice(0, remaining));
}
}
}
if (selectedFiles.length === 0) {
chunks.push(
"No markdown docs were found. Inspect the repository directly and create an initial milestone from source layout, package metadata, tests, and git status.",
);
}
return chunks.join("\n").trim() + "\n";
}
function collectAutoBootstrapFiles(basePath: string): string[] {
const seen = new Set<string>();
const files: string[] = [];
for (const name of AUTO_BOOTSTRAP_ROOT_FILES) {
const path = join(basePath, name);
if (existsMarkdownFile(path)) {
seen.add(path);
files.push(path);
}
}
for (const path of walkMarkdownFiles(basePath)) {
if (seen.has(path)) continue;
seen.add(path);
files.push(path);
}
return files;
}
function existsMarkdownFile(path: string): boolean {
try {
const stat = statSync(path);
return stat.isFile() && path.toLowerCase().endsWith(".md");
} catch {
return false;
}
}
function collectSourceFiles(basePath: string): string[] {
return walkFiles(basePath, (path) => {
const lower = path.toLowerCase();
if (lower.endsWith(".md")) return false;
const dot = lower.lastIndexOf(".");
return dot !== -1 && AUTO_BOOTSTRAP_SOURCE_EXTENSIONS.has(lower.slice(dot));
});
}
function walkMarkdownFiles(root: string): string[] {
return walkFiles(root, (path) => path.toLowerCase().endsWith(".md"));
}
function walkFiles(
root: string,
includeFile: (path: string) => boolean,
): string[] {
const found: string[] = [];
const visit = (dir: string) => {
let entries: ReturnType<typeof readdirSync>;
try {
entries = readdirSync(dir, { withFileTypes: true });
} catch {
return;
}
for (const entry of entries.sort((a, b) => a.name.localeCompare(b.name))) {
const path = join(dir, entry.name);
if (entry.isDirectory()) {
if (!AUTO_BOOTSTRAP_EXCLUDED_DIRS.has(entry.name)) {
visit(path);
}
continue;
}
if (entry.isFile() && includeFile(path)) {
found.push(path);
}
}
};
visit(root);
return found;
}
// ---------------------------------------------------------------------------
// Project Bootstrap
// ---------------------------------------------------------------------------

View file

@ -21,7 +21,12 @@ import {
AnswerInjector,
loadAndValidateAnswerFile,
} from "./headless-answers.js";
import { bootstrapProject, loadContext } from "./headless-context.js";
import {
bootstrapProject,
buildAutoBootstrapContext,
hasMilestones,
loadContext,
} from "./headless-context.js";
import {
EXIT_BLOCKED,
@ -348,6 +353,19 @@ async function runHeadlessOnce(
): Promise<{ exitCode: number; interrupted: boolean }> {
let interrupted = false;
const startTime = Date.now();
if (options.command === "auto" && !options.resumeSession) {
bootstrapProject(process.cwd());
if (!hasMilestones(process.cwd())) {
if (!options.json) {
process.stderr.write(
"[headless] No milestones found; bootstrapping from repo docs and source inventory...\n",
);
}
options.command = "new-milestone";
options.auto = true;
options.contextText = buildAutoBootstrapContext(process.cwd());
}
}
const isNewMilestone = options.command === "new-milestone";
const isInit = options.command === "init";

View file

@ -0,0 +1,170 @@
/**
* commands-todo.test.ts - TODO.md dump triage artifact contract.
*/
import assert from "node:assert/strict";
import {
existsSync,
mkdirSync,
mkdtempSync,
readFileSync,
rmSync,
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { test } from "node:test";
import {
extractTodoDump,
parseTodoTriageResponse,
triageTodoDump,
} from "../commands-todo.ts";
const fixedDate = new Date("2026-04-30T12:34:56.000Z");
const fixedLocalTimestamp = "20260430-143456";
test("extractTodoDump strips the empty inbox wrapper", () => {
assert.equal(
extractTodoDump("# TODO\n\nDump anything here.\n\n- keep this\n"),
"- keep this",
);
});
test("parseTodoTriageResponse accepts fenced JSON", () => {
const parsed = parseTodoTriageResponse(`Here:
\`\`\`json
{
"summary": "Triaged.",
"eval_candidates": [
{
"task_input": "agent sees a TODO dump",
"expected_behavior": "agent writes eval artifacts"
}
],
"implementation_tasks": ["wire command"],
"memory_requirements": ["remember failure pattern"],
"harness_suggestions": ["add deterministic test"],
"docs_or_tests": ["document flow"],
"unclear_notes": ["owner unknown"]
}
\`\`\``);
assert.equal(parsed.summary, "Triaged.");
assert.equal(parsed.eval_candidates.length, 1);
assert.equal(parsed.eval_candidates[0].id, "todo.eval.001");
assert.deepEqual(parsed.implementation_tasks, ["wire command"]);
});
test("triageTodoDump writes report, eval JSONL, normalized inbox, and clears TODO.md", async () => {
const base = mkdtempSync(join(tmpdir(), "sf-todo-triage-"));
try {
writeFileSync(
join(base, "TODO.md"),
"# TODO\n\nDump anything here.\n\n- agent forgot to turn repeated failure into eval\n",
);
const output = await triageTodoDump(
base,
async () =>
JSON.stringify({
summary: "Repeated failure should become an eval.",
eval_candidates: [
{
id: "todo.eval.memory-repeat",
task_input: "repeated failure appears in TODO.md",
expected_behavior: "triage emits eval candidate JSONL",
failure_mode: "agent treats note as runtime instruction",
evidence: "TODO.md dump",
source: "TODO.md",
suggested_location: ".sf/triage/evals",
},
],
implementation_tasks: ["add /sf todo triage"],
memory_requirements: ["store repeated failure evidence"],
harness_suggestions: ["gate backlog promotion"],
docs_or_tests: ["test TODO triage command"],
unclear_notes: [],
}),
{ date: fixedDate },
);
assert.equal(
output.markdownPath,
join(base, ".sf", "triage", "reports", `${fixedLocalTimestamp}.md`),
);
assert.equal(
output.evalJsonlPath,
join(
base,
".sf",
"triage",
"evals",
`${fixedLocalTimestamp}.evals.jsonl`,
),
);
assert.equal(
output.normalizedJsonlPath,
join(base, ".sf", "triage", "inbox", `${fixedLocalTimestamp}.jsonl`),
);
assert.equal(
readFileSync(join(base, "TODO.md"), "utf-8"),
"# TODO\n\nDump anything here.\n",
);
const evals = readFileSync(output.evalJsonlPath, "utf-8")
.trim()
.split("\n")
.map((line) => JSON.parse(line));
assert.equal(evals.length, 1);
assert.equal(evals[0].id, "todo.eval.memory-repeat");
const inbox = readFileSync(output.normalizedJsonlPath, "utf-8")
.trim()
.split("\n")
.map((line) => JSON.parse(line));
assert.equal(inbox.length, 5);
assert.equal(inbox[0].kind, "eval_candidate");
assert.equal(inbox[0].source, "todo.md");
assert.equal(inbox[0].status, "pending");
assert.ok(
readFileSync(output.markdownPath, "utf-8").includes("Repeated failure"),
);
assert.equal(output.backlogItemsAdded, 0);
} finally {
rmSync(base, { recursive: true, force: true });
}
});
test("triageTodoDump appends implementation tasks to backlog only when requested", async () => {
const base = mkdtempSync(join(tmpdir(), "sf-todo-backlog-"));
try {
mkdirSync(join(base, ".sf"), { recursive: true });
writeFileSync(join(base, "TODO.md"), "# TODO\n\nimplement task\n");
const output = await triageTodoDump(
base,
async () =>
JSON.stringify({
summary: "Implementation task.",
eval_candidates: [],
implementation_tasks: ["build explicit triage command"],
memory_requirements: [],
harness_suggestions: [],
docs_or_tests: [],
unclear_notes: [],
}),
{ date: fixedDate, backlog: true },
);
const backlogPath = join(base, ".sf", "BACKLOG.md");
assert.equal(output.backlogItemsAdded, 1);
assert.equal(existsSync(backlogPath), true);
assert.match(
readFileSync(backlogPath, "utf-8"),
/- \[ \] 999\.1 — build explicit triage command \(triaged 2026-04-30\)/,
);
} finally {
rmSync(base, { recursive: true, force: true });
}
});

View file

@ -0,0 +1,45 @@
import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import test from "node:test";
import {
buildAutoBootstrapContext,
hasMilestones,
} from "../headless-context.js";
test("buildAutoBootstrapContext includes purpose docs and source inventory", () => {
const root = mkdtempSync(join(tmpdir(), "sf-headless-bootstrap-"));
mkdirSync(join(root, "docs"), { recursive: true });
mkdirSync(join(root, "src"), { recursive: true });
writeFileSync(join(root, "VISION.md"), "# Vision\n\nBuild the runner.\n");
writeFileSync(join(root, "TODO.md"), "# TODO\n\nWire ACE.\n");
writeFileSync(join(root, "docs", "architecture.md"), "# Architecture\n");
writeFileSync(join(root, "src", "main.ts"), "export const main = true;\n");
const context = buildAutoBootstrapContext(root);
assert.match(context, /Autonomous Repo Bootstrap/);
assert.match(context, /purpose, vision, architecture/);
assert.match(context, /ACE spec-first TDD/);
assert.match(context, /explorer-style subagents/);
assert.match(context, /## VISION\.md/);
assert.match(context, /## TODO\.md/);
assert.match(context, /## docs\/architecture\.md/);
assert.match(context, /Source File Inventory/);
assert.match(context, /src\/main\.ts/);
});
test("hasMilestones only reports true when milestone directories exist", () => {
const root = mkdtempSync(join(tmpdir(), "sf-headless-milestones-"));
assert.equal(hasMilestones(root), false);
mkdirSync(join(root, ".sf", "milestones"), { recursive: true });
assert.equal(hasMilestones(root), false);
mkdirSync(join(root, ".sf", "milestones", "M001"), { recursive: true });
assert.equal(hasMilestones(root), true);
});