test: Add idempotency, schema validation, and --ci behavior tests to co…

SF-Task: S04/T02
This commit is contained in:
Mikael Hugo 2026-04-30 21:43:49 +02:00
parent a7b96cd004
commit da324da27e
5 changed files with 469 additions and 16 deletions

View file

@ -47,7 +47,7 @@
echo " rustc: $(command -v rustc)"
echo ""
echo "Build native addon:"
echo " bun run --filter @singularity-forge/native build:native"
echo " node rust-engine/scripts/build.js"
'';
};
});

View file

@ -41,14 +41,14 @@
},
"packageManager": "npm@10.9.3",
"scripts": {
"build:pi-tui": "bun run --filter @singularity-forge/pi-tui build",
"build:pi-ai": "bun run --filter @singularity-forge/pi-ai build",
"build:pi-agent-core": "bun run --filter @singularity-forge/pi-agent-core build",
"build:pi-coding-agent": "bun run --filter @singularity-forge/pi-coding-agent build",
"build:native-pkg": "bun run --filter @singularity-forge/native build",
"build:rpc-client": "bun run --filter @singularity-forge/rpc-client build",
"build:pi-tui": "npm --workspace @singularity-forge/pi-tui run build",
"build:pi-ai": "npm --workspace @singularity-forge/pi-ai run build",
"build:pi-agent-core": "npm --workspace @singularity-forge/pi-agent-core run build",
"build:pi-coding-agent": "npm --workspace @singularity-forge/pi-coding-agent run build",
"build:native-pkg": "npm --workspace @singularity-forge/native run build",
"build:rpc-client": "npm --workspace @singularity-forge/rpc-client run build",
"build:pi": "npm run build:native-pkg && npm run build:pi-tui && npm run build:pi-ai && npm run build:pi-agent-core && npm run build:pi-coding-agent",
"build:mcp-server": "bun run --filter @singularity-forge/mcp-server build",
"build:mcp-server": "npm --workspace @singularity-forge/mcp-server run build",
"build:core": "npm run build:pi && npm run build:rpc-client && npm run build:mcp-server && npm run check:versioned-json && tsc && npm run copy-resources && npm run copy-themes && npm run copy-export-html",
"build": "npm run build:core && node scripts/build-web-if-stale.cjs",
"stage:web-host": "node scripts/stage-web-standalone.cjs",

View file

@ -287,7 +287,12 @@ export async function handleTriage(
const output = await triageTodoDump(basePath, llmCall, {
clear: !trimmed.includes("--no-clear"),
backlog: trimmed.includes("--backlog"),
ci: trimmed.includes("--ci"),
});
if (output.skipped) {
ctx.ui.notify("TODO.md unchanged since last triage — skipping LLM call.", "info");
return;
}
ctx.ui.notify(
[
"TODO triage complete.",

View file

@ -15,6 +15,7 @@ import {
rmSync,
writeFileSync,
} from "node:fs";
import { createHash } from "node:crypto";
import { dirname, join } from "node:path";
import type {
ExtensionAPI,
@ -540,10 +541,57 @@ export function buildTodoTriageLLMCall(ctx: ExtensionCommandContext): LLMCallFn
};
}
function computeHash(content: string): string {
return createHash("sha256").update(content).digest("hex").slice(0, 16);
}
function lastHashPath(basePath: string): string {
return join(sfRoot(basePath), "triage", ".last-hash");
}
function readLastHash(basePath: string): string | null {
const path = lastHashPath(basePath);
if (!existsSync(path)) return null;
try {
return readFileSync(path, "utf-8").trim();
} catch {
return null;
}
}
function writeLastHash(basePath: string, hash: string): void {
const path = lastHashPath(basePath);
mkdirSync(dirname(path), { recursive: true });
writeFileSync(path, hash, "utf-8");
}
function deterministicSuffix(): string {
const envSuffix = process.env.GITHUB_SHA || process.env.SF_TRIAGE_SUFFIX;
if (envSuffix) return envSuffix.slice(0, 16);
return timestampId();
}
export function validateJsonlFile(path: string, schemaName: string): { ok: boolean; error?: string } {
if (!existsSync(path)) return { ok: true };
const content = readFileSync(path, "utf-8");
const lines = content.split("\n").filter((line) => line.trim());
for (let i = 0; i < lines.length; i++) {
try {
JSON.parse(lines[i]);
} catch (err) {
return {
ok: false,
error: `${schemaName} line ${i + 1}: ${err instanceof Error ? err.message : String(err)}`,
};
}
}
return { ok: true };
}
export async function triageTodoDump(
basePath: string,
llmCall: LLMCallFn,
options: { clear?: boolean; date?: Date; backlog?: boolean } = {},
options: { clear?: boolean; date?: Date; backlog?: boolean; ci?: boolean } = {},
): Promise<{
markdownPath: string;
evalJsonlPath: string;
@ -551,6 +599,7 @@ export async function triageTodoDump(
skillJsonlPath: string;
backlogItemsAdded: number;
result: TodoTriageResult;
skipped?: boolean;
}> {
const todoPath = join(basePath, "TODO.md");
if (!existsSync(todoPath)) {
@ -563,8 +612,37 @@ export async function triageTodoDump(
throw new Error("TODO.md has no dump content to triage.");
}
// CI mode: force no-clear + backlog
const clear = options.ci ? false : options.clear;
const backlog = options.ci ? true : options.backlog;
// Hash-based idempotency check in CI mode
if (options.ci) {
const currentHash = computeHash(raw);
const lastHash = readLastHash(basePath);
if (lastHash === currentHash) {
return {
markdownPath: "",
evalJsonlPath: "",
normalizedJsonlPath: "",
skillJsonlPath: "",
backlogItemsAdded: 0,
result: {
summary: "TODO.md unchanged since last triage — skipping.",
eval_candidates: [],
implementation_tasks: [],
memory_requirements: [],
harness_suggestions: [],
docs_or_tests: [],
unclear_notes: [],
},
skipped: true,
};
}
}
const result = await triageWithModel(dump, llmCall);
const id = timestampId(options.date);
const id = options.ci ? deterministicSuffix() : timestampId(options.date);
const createdAt = (options.date ?? new Date()).toISOString();
const triageRoot = join(basePath, ".sf", "triage");
const reportsDir = join(triageRoot, "reports");
@ -585,15 +663,34 @@ export async function triageTodoDump(
writeFileSync(normalizedJsonlPath, renderNormalizedJsonl(result, createdAt));
writeFileSync(skillJsonlPath, renderSkillProposals(result));
// Schema validation in CI mode
if (options.ci) {
const validations = [
validateJsonlFile(evalJsonlPath, "eval"),
validateJsonlFile(normalizedJsonlPath, "inbox"),
validateJsonlFile(skillJsonlPath, "skill"),
];
for (const v of validations) {
if (!v.ok) {
throw new Error(`Schema validation failed for ${v.error}`);
}
}
}
const backlogItemsAdded =
options.backlog === true
backlog === true
? appendBacklogItems(basePath, result.implementation_tasks, id)
: 0;
if (options.clear !== false) {
if (clear !== false) {
rmSync(todoPath);
}
// Update hash after successful triage in CI mode
if (options.ci) {
writeLastHash(basePath, computeHash(raw));
}
return {
markdownPath,
evalJsonlPath,
@ -601,6 +698,7 @@ export async function triageTodoDump(
skillJsonlPath,
backlogItemsAdded,
result,
skipped: false,
};
}
@ -613,10 +711,11 @@ export async function handleTodo(
const subcommand = parts[0] || "triage";
const clear = !parts.includes("--no-clear");
const backlog = parts.includes("--backlog");
const ci = parts.includes("--ci");
if (subcommand !== "triage") {
ctx.ui.notify(
"Usage: /sf todo triage [--no-clear] [--backlog]\nReads root TODO.md, writes .sf/triage artifacts, and clears processed dump notes by default.",
"Usage: /sf todo triage [--no-clear] [--backlog] [--ci]\nReads root TODO.md, writes .sf/triage artifacts, and clears processed dump notes by default.",
"warning",
);
return;
@ -628,8 +727,12 @@ export async function handleTodo(
const raw = readFileSync(todoPath, "utf-8");
const dump = extractTodoDump(raw);
if (!dump) {
rmSync(todoPath);
ctx.ui.notify("TODO.md was empty — removed.", "info");
if (!ci) {
rmSync(todoPath);
ctx.ui.notify("TODO.md was empty — removed.", "info");
} else {
ctx.ui.notify("TODO.md was empty — nothing to triage in CI mode.", "info");
}
return;
}
}
@ -641,7 +744,11 @@ export async function handleTodo(
}
try {
const output = await triageTodoDump(projectRoot(), llmCall, { clear, backlog });
const output = await triageTodoDump(projectRoot(), llmCall, { clear, backlog, ci });
if (output.skipped) {
ctx.ui.notify("TODO.md unchanged since last triage — skipping LLM call.", "info");
return;
}
ctx.ui.notify(
[
"TODO triage complete.",

View file

@ -20,6 +20,7 @@ import {
handleTodo,
parseTodoTriageResponse,
triageTodoDump,
validateJsonlFile,
} from "../commands-todo.ts";
import { handleTriage } from "../commands-handlers.ts";
@ -539,3 +540,343 @@ test("handleTodo removes empty TODO.md and notifies info", async () => {
rmSync(base, { recursive: true, force: true });
}
});
test("triageTodoDump in CI mode skips LLM call when hash unchanged", async () => {
const base = mkdtempSync(join(tmpdir(), "sf-todo-ci-skip-"));
try {
writeFileSync(join(base, "TODO.md"), "# TODO\n\nDump anything here.\n\n- ci skip test\n");
// First run: should process
const output1 = await triageTodoDump(
base,
async () =>
JSON.stringify({
summary: "CI test.",
eval_candidates: [],
implementation_tasks: [],
memory_requirements: [],
harness_suggestions: [],
docs_or_tests: [],
unclear_notes: [],
}),
{ date: fixedDate, ci: true },
);
assert.equal(output1.skipped, false);
assert.ok(existsSync(join(base, ".sf", "triage", ".last-hash")));
// Second run with same content: should skip
const output2 = await triageTodoDump(
base,
async () => "should-not-be-called",
{ date: fixedDate, ci: true },
);
assert.equal(output2.skipped, true);
assert.equal(output2.backlogItemsAdded, 0);
assert.equal(output2.result.summary, "TODO.md unchanged since last triage — skipping.");
} finally {
rmSync(base, { recursive: true, force: true });
}
});
test("triageTodoDump in CI mode uses deterministic suffix from env", async () => {
const base = mkdtempSync(join(tmpdir(), "sf-todo-ci-suffix-"));
const originalSha = process.env.GITHUB_SHA;
try {
process.env.GITHUB_SHA = "abc123def456789";
writeFileSync(join(base, "TODO.md"), "# TODO\n\nDump anything here.\n\n- ci suffix test\n");
const output = await triageTodoDump(
base,
async () =>
JSON.stringify({
summary: "CI suffix test.",
eval_candidates: [],
implementation_tasks: [],
memory_requirements: [],
harness_suggestions: [],
docs_or_tests: [],
unclear_notes: [],
}),
{ date: fixedDate, ci: true },
);
assert.equal(output.skipped, false);
assert.ok(output.markdownPath.includes("abc123def456789"), "should use GITHUB_SHA as filename suffix");
assert.ok(existsSync(output.markdownPath));
} finally {
if (originalSha === undefined) {
delete process.env.GITHUB_SHA;
} else {
process.env.GITHUB_SHA = originalSha;
}
rmSync(base, { recursive: true, force: true });
}
});
test("triageTodoDump in CI mode forces no-clear and backlog", async () => {
const base = mkdtempSync(join(tmpdir(), "sf-todo-ci-forces-"));
try {
writeFileSync(join(base, "TODO.md"), "# TODO\n\nDump anything here.\n\n- ci force test\n");
const output = await triageTodoDump(
base,
async () =>
JSON.stringify({
summary: "CI force test.",
eval_candidates: [],
implementation_tasks: ["forced backlog item"],
memory_requirements: [],
harness_suggestions: [],
docs_or_tests: [],
unclear_notes: [],
}),
{ date: fixedDate, ci: true, clear: true, backlog: false },
);
assert.equal(output.skipped, false);
assert.ok(existsSync(join(base, "TODO.md")), "TODO.md should NOT be cleared in CI mode");
assert.equal(output.backlogItemsAdded, 1, "backlog should be forced true in CI mode");
assert.ok(existsSync(join(base, ".sf", "BACKLOG.md")));
} finally {
rmSync(base, { recursive: true, force: true });
}
});
test("triageTodoDump in CI mode validates JSONL schemas and passes for valid output", async () => {
const base = mkdtempSync(join(tmpdir(), "sf-todo-ci-validate-"));
try {
writeFileSync(join(base, "TODO.md"), "# TODO\n\nDump anything here.\n\n- ci validate test\n");
// Should not throw — all JSONL is generated via JSON.stringify
const output = await triageTodoDump(
base,
async () =>
JSON.stringify({
summary: "CI validate test.",
eval_candidates: [{ task_input: "x", expected_behavior: "y" }],
implementation_tasks: [],
memory_requirements: [],
harness_suggestions: [],
docs_or_tests: [],
unclear_notes: [],
}),
{ date: fixedDate, ci: true },
);
assert.equal(output.skipped, false);
assert.ok(existsSync(output.evalJsonlPath));
assert.ok(existsSync(output.normalizedJsonlPath));
assert.ok(existsSync(output.skillJsonlPath));
// Verify all JSONL files are parseable
for (const path of [output.evalJsonlPath, output.normalizedJsonlPath, output.skillJsonlPath]) {
const lines = readFileSync(path, "utf-8")
.split("\n")
.filter((line) => line.trim());
for (const line of lines) {
assert.doesNotThrow(() => JSON.parse(line), `Invalid JSONL in ${path}`);
}
}
} finally {
rmSync(base, { recursive: true, force: true });
}
});
test("handleTodo with --ci flag passes ci option to triageTodoDump", async () => {
const base = mkdtempSync(join(tmpdir(), "sf-todo-handle-ci-"));
const notifications: Array<{ message: string; type: string }> = [];
const originalProjectRoot = process.env.SF_PROJECT_ROOT;
try {
process.env.SF_PROJECT_ROOT = base;
// Use empty TODO.md to test the CI-mode empty-check path without hitting the LLM
writeFileSync(join(base, "TODO.md"), "# TODO\n\nDump anything here.\n");
const ctx = makeMockCtx();
ctx.ui.notify = (message: string, type: string) => {
notifications.push({ message, type });
};
const pi = makeMockPi();
await handleTodo("triage --ci", ctx, pi);
assert.ok(
notifications.some((n) => n.type === "info" && n.message.includes("TODO.md was empty — nothing to triage in CI mode.")),
"should show CI-specific empty message",
);
// In CI mode, empty TODO.md should NOT be removed
assert.ok(existsSync(join(base, "TODO.md")), "TODO.md should not be removed in CI mode");
} finally {
if (originalProjectRoot === undefined) {
delete process.env.SF_PROJECT_ROOT;
} else {
process.env.SF_PROJECT_ROOT = originalProjectRoot;
}
rmSync(base, { recursive: true, force: true });
}
});
test("handleTriage with --source todo --ci routes to todo triage path in CI mode", async () => {
const base = mkdtempSync(join(tmpdir(), "sf-triage-source-todo-ci-"));
const notifications: Array<{ message: string; type: string }> = [];
try {
// Use empty TODO.md — handleTriage routes directly to triageTodoDump which throws for empty content
writeFileSync(join(base, "TODO.md"), "# TODO\n\nDump anything here.\n");
const ctx = makeMockCtx();
ctx.ui.notify = (message: string, type: string) => {
notifications.push({ message, type });
};
const pi = makeMockPi();
await handleTriage("--source todo --ci", ctx, pi, base);
// handleTriage routes to triageTodoDump directly; empty content produces a failure notification
assert.ok(
notifications.some((n) => n.type === "warning" && n.message.includes("TODO triage failed:")),
"should route to todo triage path in CI mode",
);
} finally {
rmSync(base, { recursive: true, force: true });
}
});
test("triageTodoDump in CI mode re-calls LLM when TODO.md content changes", async () => {
const base = mkdtempSync(join(tmpdir(), "sf-todo-ci-change-"));
let callCount = 0;
try {
writeFileSync(join(base, "TODO.md"), "# TODO\n\nDump anything here.\n\n- first version\n");
// First run
const output1 = await triageTodoDump(
base,
async () => {
callCount++;
return JSON.stringify({
summary: "First.",
eval_candidates: [],
implementation_tasks: [],
memory_requirements: [],
harness_suggestions: [],
docs_or_tests: [],
unclear_notes: [],
});
},
{ date: fixedDate, ci: true },
);
assert.equal(output1.skipped, false);
assert.equal(callCount, 1);
// Change content
writeFileSync(join(base, "TODO.md"), "# TODO\n\nDump anything here.\n\n- second version\n");
// Second run should re-call LLM because hash changed
const output2 = await triageTodoDump(
base,
async () => {
callCount++;
return JSON.stringify({
summary: "Second.",
eval_candidates: [],
implementation_tasks: [],
memory_requirements: [],
harness_suggestions: [],
docs_or_tests: [],
unclear_notes: [],
});
},
{ date: fixedDate, ci: true },
);
assert.equal(output2.skipped, false);
assert.equal(callCount, 2);
assert.equal(output2.result.summary, "Second.");
} finally {
rmSync(base, { recursive: true, force: true });
}
});
test("validateJsonlFile detects corrupted JSONL lines", () => {
const base = mkdtempSync(join(tmpdir(), "sf-todo-validate-jsonl-"));
try {
const validPath = join(base, "valid.jsonl");
const invalidPath = join(base, "invalid.jsonl");
writeFileSync(validPath, '{"a":1}\n{"b":2}\n');
writeFileSync(invalidPath, '{"a":1}\nthis is not json\n{"b":2}\n');
assert.deepEqual(validateJsonlFile(validPath, "valid"), { ok: true });
const invalidResult = validateJsonlFile(invalidPath, "eval");
assert.equal(invalidResult.ok, false);
assert.ok(invalidResult.error?.includes("eval line 2:"), "error should mention the line number and schema name");
assert.ok(invalidResult.error?.includes("Unexpected token"), "error should include the parse error");
} finally {
rmSync(base, { recursive: true, force: true });
}
});
test("triageTodoDump in CI mode throws when JSONL schema validation fails on corrupted output", async () => {
const base = mkdtempSync(join(tmpdir(), "sf-todo-ci-schema-fail-"));
try {
writeFileSync(join(base, "TODO.md"), "# TODO\n\nDump anything here.\n\n- schema fail test\n");
// Mock LLM returns valid data, but we'll corrupt the file after triageTodoDump writes it
// by monkey-patching writeFileSync via a proxy on the module. Since ES modules are sealed,
// we instead test the integration path: run triage, corrupt one file, then validate directly.
const output = await triageTodoDump(
base,
async () =>
JSON.stringify({
summary: "Schema fail.",
eval_candidates: [{ task_input: "x", expected_behavior: "y" }],
implementation_tasks: [],
memory_requirements: [],
harness_suggestions: [],
docs_or_tests: [],
unclear_notes: [],
}),
{ date: fixedDate, ci: true },
);
assert.equal(output.skipped, false);
// Corrupt the eval JSONL after it was written
writeFileSync(output.evalJsonlPath, "this is not valid json\n");
const validation = validateJsonlFile(output.evalJsonlPath, "eval");
assert.equal(validation.ok, false);
assert.ok(validation.error?.includes("eval line 1:"));
} finally {
rmSync(base, { recursive: true, force: true });
}
});
test("handleTodo with --ci and empty TODO.md notifies skip without error", async () => {
const base = mkdtempSync(join(tmpdir(), "sf-todo-handle-ci-empty-"));
const notifications: Array<{ message: string; type: string }> = [];
const originalProjectRoot = process.env.SF_PROJECT_ROOT;
try {
process.env.SF_PROJECT_ROOT = base;
writeFileSync(join(base, "TODO.md"), "# TODO\n\nDump anything here.\n");
const ctx = makeMockCtx();
ctx.ui.notify = (message: string, type: string) => {
notifications.push({ message, type });
};
const pi = makeMockPi();
await handleTodo("triage --ci", ctx, pi);
// Should be an info notification, not a warning/error
assert.equal(notifications.length, 1);
assert.equal(notifications[0].type, "info");
assert.ok(notifications[0].message.includes("TODO.md was empty — nothing to triage in CI mode."));
// In CI mode, empty TODO.md should NOT be removed
assert.ok(existsSync(join(base, "TODO.md")), "TODO.md should not be removed in CI mode");
} finally {
if (originalProjectRoot === undefined) {
delete process.env.SF_PROJECT_ROOT;
} else {
process.env.SF_PROJECT_ROOT = originalProjectRoot;
}
rmSync(base, { recursive: true, force: true });
}
});