singularity-forge/src/tests/trace-export.test.ts

708 lines
21 KiB
TypeScript

/**
* Trace Export Integration Test
*
* Tests the trace collector's SF_TRACE_ENABLED gating, lifecycle (init/flush),
* span creation (root/unit/tool), event recording, error recording, and JSON
* file export — all exercised via direct module imports without requiring a
* live LLM or autonomous mode session.
*
* Run with:
* SF_TRACE_ENABLED=1 npm run test:unit -- trace-export
*/
import assert from "node:assert/strict";
import {
existsSync,
mkdirSync,
readdirSync,
readFileSync,
rmSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { test } from "vitest";
const projectRoot = process.cwd();
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/** Walk all spans depth-first (root first). */
function* allSpans(span: { children: unknown[]; kind?: string }): Generator<{
kind?: string;
attributes?: Record<string, unknown>;
events?: unknown[];
}> {
yield span;
for (const child of span.children) {
yield* allSpans(child as typeof span);
}
}
/** Read the most recent trace file in a traces directory. */
function _readLatestTrace(tracesDir: string): object | null {
if (!existsSync(tracesDir)) return null;
const files = readdirSync(tracesDir).filter(
(f) => f.startsWith("trace-") && f.endsWith(".json"),
);
if (files.length === 0) return null;
files.sort();
return JSON.parse(
readFileSync(join(tracesDir, files[files.length - 1]), "utf-8"),
);
}
/** Count spans of a given kind in a trace. */
function _countSpans(trace: object, kind: string): number {
const root = (trace as { rootSpan: { children: unknown[]; kind?: string } })
.rootSpan;
let count = root.kind === kind ? 1 : 0;
for (const span of allSpans(root)) {
if (span.kind === kind) count++;
}
return count;
}
// ---------------------------------------------------------------------------
// Tests: isTraceEnabled gating
// ---------------------------------------------------------------------------
test("isTraceEnabled() is false when SF_TRACE_ENABLED is unset", async () => {
const { isTraceEnabled } = await import(
"../../src/resources/extensions/sf/trace-collector.js"
);
const orig = process.env.SF_TRACE_ENABLED;
delete process.env.SF_TRACE_ENABLED;
try {
assert.equal(isTraceEnabled(), false);
} finally {
if (orig !== undefined) process.env.SF_TRACE_ENABLED = orig;
else delete process.env.SF_TRACE_ENABLED;
}
});
test('isTraceEnabled() is false when SF_TRACE_ENABLED is "0"', async () => {
const { isTraceEnabled } = await import(
"../../src/resources/extensions/sf/trace-collector.js"
);
const orig = process.env.SF_TRACE_ENABLED;
process.env.SF_TRACE_ENABLED = "0";
try {
assert.equal(isTraceEnabled(), false);
} finally {
if (orig !== undefined) process.env.SF_TRACE_ENABLED = orig;
else delete process.env.SF_TRACE_ENABLED;
}
});
test('isTraceEnabled() is true when SF_TRACE_ENABLED is "1"', async () => {
const { isTraceEnabled } = await import(
"../../src/resources/extensions/sf/trace-collector.js"
);
const orig = process.env.SF_TRACE_ENABLED;
process.env.SF_TRACE_ENABLED = "1";
try {
assert.equal(isTraceEnabled(), true);
} finally {
if (orig !== undefined) process.env.SF_TRACE_ENABLED = orig;
else delete process.env.SF_TRACE_ENABLED;
}
});
// ---------------------------------------------------------------------------
// Tests: initTraceCollector lifecycle
// ---------------------------------------------------------------------------
test("initTraceCollector returns null when SF_TRACE_ENABLED is not set", async () => {
const { initTraceCollector, isTraceEnabled } = await import(
"../../src/resources/extensions/sf/trace-collector.js"
);
const orig = process.env.SF_TRACE_ENABLED;
delete process.env.SF_TRACE_ENABLED;
try {
if (isTraceEnabled()) {
// Env already enabled — skip this path
return;
}
const trace = initTraceCollector(projectRoot, "test-session", "status");
assert.equal(
trace,
null,
"Must return null when SF_TRACE_ENABLED is not set",
);
} finally {
if (orig !== undefined) process.env.SF_TRACE_ENABLED = orig;
else delete process.env.SF_TRACE_ENABLED;
}
});
test('initTraceCollector returns null when SF_TRACE_ENABLED is "0"', async () => {
const { initTraceCollector, isTraceEnabled } = await import(
"../../src/resources/extensions/sf/trace-collector.js"
);
const orig = process.env.SF_TRACE_ENABLED;
process.env.SF_TRACE_ENABLED = "0";
try {
if (isTraceEnabled()) return;
const trace = initTraceCollector(projectRoot, "test-session", "status");
assert.equal(trace, null, 'Must return null when SF_TRACE_ENABLED is "0"');
} finally {
if (orig !== undefined) process.env.SF_TRACE_ENABLED = orig;
else delete process.env.SF_TRACE_ENABLED;
}
});
test('initTraceCollector creates trace when SF_TRACE_ENABLED is "1"', async () => {
const { initTraceCollector, flushTrace, getActiveTrace, isTraceEnabled } =
await import("../../src/resources/extensions/sf/trace-collector.js");
const orig = process.env.SF_TRACE_ENABLED;
const tmpDir = join(tmpdir(), `sf-trace-lifecycle-${Date.now()}`);
mkdirSync(tmpDir, { recursive: true });
try {
process.env.SF_TRACE_ENABLED = "1";
if (!isTraceEnabled()) {
// Env mutation may not be reactive in ESM — skip
rmSync(tmpDir, { recursive: true, force: true });
return;
}
const trace = initTraceCollector(
tmpDir,
"test-session-abc",
"status",
"claude-3-5-haiku",
);
assert.ok(trace, "initTraceCollector must return a trace when enabled");
assert.equal(trace!.projectRoot, tmpDir, "projectRoot must match");
assert.equal(trace!.sessionId, "test-session-abc", "sessionId must match");
assert.equal(
trace!.rootSpan.kind,
"session",
"Root span kind must be session",
);
assert.equal(
trace!.rootSpan.attributes.command,
"status",
"command attribute must be recorded",
);
assert.equal(
trace!.rootSpan.attributes.model,
"claude-3-5-haiku",
"model attribute must be recorded",
);
// getActiveTrace returns the same trace
assert.ok(
getActiveTrace() === trace,
"getActiveTrace must return the active trace",
);
// flushTrace writes file and clears active trace
flushTrace(tmpDir);
assert.equal(
getActiveTrace(),
null,
"Active trace must be null after flush",
);
rmSync(tmpDir, { recursive: true, force: true });
} finally {
if (orig === undefined) {
delete process.env.SF_TRACE_ENABLED;
} else {
process.env.SF_TRACE_ENABLED = orig;
}
}
});
// ---------------------------------------------------------------------------
// Tests: span hierarchy (root → unit → tool)
// ---------------------------------------------------------------------------
test("startUnitSpan creates a unit span as child of root", async () => {
const {
initTraceCollector,
flushTrace,
startUnitSpan,
getActiveTrace,
isTraceEnabled,
} = await import("../../src/resources/extensions/sf/trace-collector.js");
const orig = process.env.SF_TRACE_ENABLED;
const tmpDir = join(tmpdir(), `sf-span-test-${Date.now()}`);
mkdirSync(tmpDir, { recursive: true });
try {
process.env.SF_TRACE_ENABLED = "1";
if (!isTraceEnabled()) {
rmSync(tmpDir, { recursive: true, force: true });
return;
}
initTraceCollector(tmpDir, "session-x", "auto");
const unitSpan = startUnitSpan("slice", "S01", {
unitType: "slice",
unitId: "S01",
});
assert.ok(unitSpan, "startUnitSpan must return a span");
assert.equal(unitSpan!.kind, "unit", "Unit span kind must be unit");
assert.equal(
unitSpan!.attributes.unitId,
"S01",
"unitId attribute must be set",
);
assert.equal(
unitSpan!.attributes.unitType,
"slice",
"unitType attribute must be set",
);
assert.equal(
unitSpan!.status,
"in_progress",
"New unit span must be in_progress",
);
const trace = getActiveTrace()!;
assert.ok(
trace.rootSpan.children.includes(unitSpan!),
"Unit span must be child of root",
);
flushTrace(tmpDir);
rmSync(tmpDir, { recursive: true, force: true });
} finally {
if (orig === undefined) {
delete process.env.SF_TRACE_ENABLED;
} else {
process.env.SF_TRACE_ENABLED = orig;
}
}
});
test("startToolSpan creates a tool span as child of unit span", async () => {
const {
initTraceCollector,
flushTrace,
startUnitSpan,
startToolSpan,
completeSpan,
isTraceEnabled,
} = await import("../../src/resources/extensions/sf/trace-collector.js");
const orig = process.env.SF_TRACE_ENABLED;
const tmpDir = join(tmpdir(), `sf-tool-span-test-${Date.now()}`);
mkdirSync(tmpDir, { recursive: true });
try {
process.env.SF_TRACE_ENABLED = "1";
if (!isTraceEnabled()) {
rmSync(tmpDir, { recursive: true, force: true });
return;
}
initTraceCollector(tmpDir, "session-y", "auto");
const unitSpan = startUnitSpan("milestone", "M001", {
unitType: "milestone",
unitId: "M001",
});
const toolSpan = startToolSpan(unitSpan!, "bash", "call-123", {
toolName: "bash",
toolCallId: "call-123",
});
assert.ok(toolSpan, "startToolSpan must return a span");
assert.equal(toolSpan.kind, "tool", "Tool span kind must be tool");
assert.equal(
toolSpan.attributes.toolName,
"bash",
"toolName attribute must be set",
);
assert.equal(
toolSpan.attributes.toolCallId,
"call-123",
"toolCallId attribute must be set",
);
assert.ok(
unitSpan!.children.includes(toolSpan),
"Tool span must be child of unit span",
);
// Complete the tool span
completeSpan(toolSpan, "ok");
assert.equal(
toolSpan.status,
"ok",
"Tool span status must be ok after completeSpan(ok)",
);
assert.ok(toolSpan.endTime, "Tool span must have endTime after completion");
flushTrace(tmpDir);
rmSync(tmpDir, { recursive: true, force: true });
} finally {
if (orig === undefined) {
delete process.env.SF_TRACE_ENABLED;
} else {
process.env.SF_TRACE_ENABLED = orig;
}
}
});
test("completeSpan with error status marks span as error", async () => {
const {
initTraceCollector,
flushTrace,
startUnitSpan,
completeSpan,
isTraceEnabled,
} = await import("../../src/resources/extensions/sf/trace-collector.js");
const orig = process.env.SF_TRACE_ENABLED;
const tmpDir = join(tmpdir(), `sf-complete-error-${Date.now()}`);
mkdirSync(tmpDir, { recursive: true });
try {
process.env.SF_TRACE_ENABLED = "1";
if (!isTraceEnabled()) {
rmSync(tmpDir, { recursive: true, force: true });
return;
}
initTraceCollector(tmpDir, "session-z", "auto");
const unitSpan = startUnitSpan("task", "T01", {
unitType: "task",
unitId: "T01",
});
completeSpan(unitSpan!, "error");
assert.equal(unitSpan!.status, "error", "Unit span must have status error");
assert.ok(unitSpan!.endTime, "Unit span must have endTime");
flushTrace(tmpDir);
rmSync(tmpDir, { recursive: true, force: true });
} finally {
if (orig === undefined) {
delete process.env.SF_TRACE_ENABLED;
} else {
process.env.SF_TRACE_ENABLED = orig;
}
}
});
// ---------------------------------------------------------------------------
// Tests: event recording and error recording
// ---------------------------------------------------------------------------
test("traceEvent adds a named event to a span", async () => {
const {
initTraceCollector,
flushTrace,
startUnitSpan,
traceEvent,
isTraceEnabled,
} = await import("../../src/resources/extensions/sf/trace-collector.js");
const orig = process.env.SF_TRACE_ENABLED;
const tmpDir = join(tmpdir(), `sf-event-test-${Date.now()}`);
mkdirSync(tmpDir, { recursive: true });
try {
process.env.SF_TRACE_ENABLED = "1";
if (!isTraceEnabled()) {
rmSync(tmpDir, { recursive: true, force: true });
return;
}
initTraceCollector(tmpDir, "session-evt", "auto");
const unitSpan = startUnitSpan("milestone", "M002");
traceEvent(unitSpan!, "unit-failed", { reason: "no-plan", unitId: "M002" });
assert.equal(unitSpan!.events.length, 1, "Unit span must have one event");
assert.equal(unitSpan!.events[0].name, "unit-failed");
assert.equal(unitSpan!.events[0].attributes?.reason, "no-plan");
flushTrace(tmpDir);
rmSync(tmpDir, { recursive: true, force: true });
} finally {
if (orig === undefined) {
delete process.env.SF_TRACE_ENABLED;
} else {
process.env.SF_TRACE_ENABLED = orig;
}
}
});
test("traceError adds an error event and sets span status to error", async () => {
const {
initTraceCollector,
flushTrace,
startUnitSpan,
traceError,
isTraceEnabled,
} = await import("../../src/resources/extensions/sf/trace-collector.js");
const orig = process.env.SF_TRACE_ENABLED;
const tmpDir = join(tmpdir(), `sf-err-test-${Date.now()}`);
mkdirSync(tmpDir, { recursive: true });
try {
process.env.SF_TRACE_ENABLED = "1";
if (!isTraceEnabled()) {
rmSync(tmpDir, { recursive: true, force: true });
return;
}
initTraceCollector(tmpDir, "session-err", "auto");
const unitSpan = startUnitSpan("slice", "S03");
traceError(
unitSpan!,
"Tool execution failed: ECONNREFUSED",
"Error: ECONNREFUSED\n at ...",
);
assert.equal(
unitSpan!.status,
"error",
"Span must be error after traceError",
);
assert.ok(unitSpan!.endTime, "Span must have endTime after traceError");
assert.equal(unitSpan!.events.length, 1, "Span must have one error event");
assert.equal(unitSpan!.events[0].name, "error");
assert.equal(
unitSpan!.events[0].attributes?.message,
"Tool execution failed: ECONNREFUSED",
);
assert.ok(
unitSpan!.events[0].attributes?.stack,
"Error event must have stack",
);
flushTrace(tmpDir);
rmSync(tmpDir, { recursive: true, force: true });
} finally {
if (orig === undefined) {
delete process.env.SF_TRACE_ENABLED;
} else {
process.env.SF_TRACE_ENABLED = orig;
}
}
});
// ---------------------------------------------------------------------------
// Tests: JSON export to .sf/traces/
// ---------------------------------------------------------------------------
test("flushTrace writes a trace JSON file to .sf/traces/", async () => {
const {
initTraceCollector,
flushTrace,
startUnitSpan,
startToolSpan,
completeSpan,
setTraceCost,
setTraceExitCode,
isTraceEnabled,
} = await import("../../src/resources/extensions/sf/trace-collector.js");
const orig = process.env.SF_TRACE_ENABLED;
const tmpDir = join(tmpdir(), `sf-export-test-${Date.now()}`);
mkdirSync(tmpDir, { recursive: true });
const tracesDir = join(tmpDir, ".sf", "traces");
try {
process.env.SF_TRACE_ENABLED = "1";
if (!isTraceEnabled()) {
rmSync(tmpDir, { recursive: true, force: true });
return;
}
initTraceCollector(tmpDir, "export-session", "auto");
const unitSpan = startUnitSpan("slice", "S04", {
unitType: "slice",
unitId: "S04",
});
const toolSpan = startToolSpan(unitSpan!, "bash", "call-xyz", {
toolName: "bash",
});
completeSpan(toolSpan, "ok");
completeSpan(unitSpan!, "error");
setTraceCost(5000, 1200, 800, 200, 0.042);
setTraceExitCode(1);
flushTrace(tmpDir);
// Verify file was written
assert.ok(existsSync(tracesDir), ".sf/traces/ directory must be created");
const files = readdirSync(tracesDir).filter(
(f) => f.startsWith("trace-") && f.endsWith(".json"),
);
assert.ok(files.length > 0, "At least one trace file must exist");
// Read and verify structure
const trace = JSON.parse(
readFileSync(join(tracesDir, files[0]), "utf-8"),
) as Record<string, unknown>;
assert.equal(trace.version, 1, "Trace version must be 1");
assert.ok(trace.startedAt, "Trace must have startedAt");
assert.ok(
trace.completedAt,
"Trace must have completedAt (flushTrace calls finalizeTrace)",
);
assert.equal(trace.sessionId, "export-session", "sessionId must match");
assert.equal(trace.projectRoot, tmpDir, "projectRoot must match");
// Verify root span
const root = trace.rootSpan as Record<string, unknown>;
assert.equal(root.kind, "session");
assert.equal(root.status, "in_progress"); // finalizeTrace only sets completedAt, not root status
assert.equal(root.attributes?.command, "auto");
assert.equal(root.attributes?.exitCode, 1);
assert.equal(root.attributes?.inputTokens, 5000);
assert.equal(root.attributes?.costUsd, 0.042);
// Verify span hierarchy
assert.ok(Array.isArray(root.children), "Root must have children array");
assert.equal(
root.children.length,
1,
"Root must have exactly one child (unit span)",
);
const unit = root.children[0] as Record<string, unknown>;
assert.equal(unit.kind, "unit");
assert.equal(unit.attributes?.unitId, "S04");
assert.equal(unit.status, "error", "Unit span must have error status");
assert.ok(Array.isArray(unit.children), "Unit must have children array");
assert.equal(unit.children.length, 1, "Unit must have one tool child");
const tool = unit.children[0] as Record<string, unknown>;
assert.equal(tool.kind, "tool");
assert.equal(tool.attributes?.toolName, "bash");
assert.equal(tool.status, "ok", "Tool span must be ok");
rmSync(tmpDir, { recursive: true, force: true });
} finally {
if (orig === undefined) {
delete process.env.SF_TRACE_ENABLED;
} else {
process.env.SF_TRACE_ENABLED = orig;
}
}
});
test("trace contains root span + unit span + error event for failed unit scenario", async () => {
const {
initTraceCollector,
flushTrace,
startUnitSpan,
traceEvent,
traceError,
completeSpan,
isTraceEnabled,
} = await import("../../src/resources/extensions/sf/trace-collector.js");
const orig = process.env.SF_TRACE_ENABLED;
const tmpDir = join(tmpdir(), `sf-failure-trace-${Date.now()}`);
mkdirSync(tmpDir, { recursive: true });
const tracesDir = join(tmpDir, ".sf", "traces");
try {
process.env.SF_TRACE_ENABLED = "1";
if (!isTraceEnabled()) {
rmSync(tmpDir, { recursive: true, force: true });
return;
}
// Simulate autonomous mode with a milestone that has no plan (synthetic failure)
initTraceCollector(tmpDir, "no-plan-session", "auto");
const milestoneSpan = startUnitSpan("milestone", "M003", {
unitType: "milestone",
unitId: "M003",
});
traceEvent(milestoneSpan!, "unit-start", {
unitId: "M003",
unitType: "milestone",
});
// Simulate the failure: unit fails because there's no plan
traceEvent(milestoneSpan!, "unit-failed", { reason: "no-plan-found" });
traceError(
milestoneSpan!,
"Milestone M003 failed: no plan found for milestone M003",
);
// Complete with error status
completeSpan(milestoneSpan!, "error");
flushTrace(tmpDir);
// Read and verify
assert.ok(existsSync(tracesDir));
const files = readdirSync(tracesDir).filter(
(f) => f.startsWith("trace-") && f.endsWith(".json"),
);
assert.ok(files.length > 0);
const trace = JSON.parse(
readFileSync(join(tracesDir, files[0]), "utf-8"),
) as Record<string, unknown>;
const root = trace.rootSpan as Record<string, unknown>;
assert.equal(root.kind, "session", "Root span must be session");
assert.equal(
root.attributes?.command,
"auto",
"Root must record auto command",
);
assert.equal(
root.attributes?.sessionId,
"no-plan-session",
"Root must record sessionId",
);
assert.equal(
root.children.length,
1,
"Root must have exactly one child unit span",
);
const unit = root.children[0] as Record<string, unknown>;
assert.equal(unit.kind, "unit", "Child must be unit span");
assert.equal(unit.attributes?.unitId, "M003", "Unit span must have unitId");
assert.equal(unit.status, "error", "Failed unit must have error status");
// Error event must be present
assert.ok(Array.isArray(unit.events), "Unit must have events array");
const errorEvent = (unit.events as Array<Record<string, unknown>>).find(
(e) => e.name === "error",
);
assert.ok(errorEvent, "Unit must have an error event");
assert.ok(
String(errorEvent!.attributes?.message).includes("no plan found"),
"Error message must describe the failure",
);
rmSync(tmpDir, { recursive: true, force: true });
} finally {
if (orig === undefined) {
delete process.env.SF_TRACE_ENABLED;
} else {
process.env.SF_TRACE_ENABLED = orig;
}
}
});
// ---------------------------------------------------------------------------
// Tests: SF_TRACE_ENABLED=0 means zero overhead
// ---------------------------------------------------------------------------
test("SF_TRACE_ENABLED=0: initTraceCollector returns null, no state set", async () => {
const { initTraceCollector, getActiveTrace, isTraceEnabled } = await import(
"../../src/resources/extensions/sf/trace-collector.js"
);
const orig = process.env.SF_TRACE_ENABLED;
process.env.SF_TRACE_ENABLED = "0";
try {
if (isTraceEnabled()) return;
const trace = initTraceCollector(projectRoot, "sess", "auto");
assert.equal(trace, null);
assert.equal(getActiveTrace(), null);
} finally {
if (orig === undefined) {
delete process.env.SF_TRACE_ENABLED;
} else {
process.env.SF_TRACE_ENABLED = orig;
}
}
});