singularity-forge/src/traces.ts
Mikael Hugo a611cd5792 feat: introduce repo-vcs skill and add JSDoc annotations across core modules
- Add repository-vcs-context.ts to detect and inject VCS context (Git/Jujutsu)
  into the agent system prompt; wire in repo-vcs bundled skill trigger
- Add src/resources/skills/repo-vcs/ skill for commit, push, and safe-push workflows
- Add JSDoc Purpose/Consumer annotations to app-paths, bundled-extension-paths,
  errors, extension-discovery, extension-registry, headless-types, headless, and traces
- Add justfile and just to flake.nix devShell
- Fill out new-user-onboarding.md spec (Draft) and core-beliefs.md (Status: Accepted)
- Add notification-event-model.md design doc and notification-source-hygiene.md spec

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-01 21:36:32 +02:00

383 lines
11 KiB
TypeScript

/**
* traces.ts — Structured trace data model and export utilities for auto-mode execution.
*
* Purpose: provide a lightweight, hierarchical span model that captures the
* full lifecycle of an auto-mode session (session → units → tools) so that
* post-hoc analysis, debugging, and cost attribution can be done from a
* single JSON artifact instead of piecing together scattered logs.
*
* Consumer: headless.ts (creates and finalizes traces), trace-collector.ts
* (appends spans and events), and any external tool that reads .sf/traces/.
*/
import { randomUUID } from "node:crypto";
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
import { join } from "node:path";
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
/**
* Classify the role of a span in the trace hierarchy.
*
* Purpose: distinguish session roots, milestone/slice/task units, and
* individual tool calls so that renderers and aggregators can group or
* filter spans by semantic category.
*
* Consumer: trace-collector.ts when creating spans, and trace visualizers
* that colour-code or collapse spans by kind.
*/
export type SpanKind = "session" | "unit" | "tool";
/**
* Terminal state of a span.
*
* Purpose: capture whether a span finished successfully, failed, was
* cancelled, or is still running so that trace consumers can compute
* success rates and identify hung operations.
*
* Consumer: trace-collector.ts on unit/tool end, and trace analysis scripts
* that aggregate outcomes across sessions.
*/
export type SpanStatus =
| "ok"
| "error"
| "cancelled"
| "timeout"
| "in_progress";
/**
* A discrete event attached to a span, such as a checkpoint or decision.
*
* Purpose: record semantically meaningful moments (e.g. "planning meeting
* started", "model switched") inside a span without creating a child span
* for every micro-step.
*
* Consumer: trace-collector.ts when recording model switches, gate results,
* or other non-span lifecycle events.
*/
export interface TraceEvent {
name: string;
timestamp: number;
attributes?: Record<string, string | number | boolean | null>;
}
/**
* Optional metadata attached to a span.
*
* Purpose: carry dimensional data (tokens, cost, model, file paths) that
* lets downstream tools attribute spend and latency to specific units or
* tools without parsing free-form log lines.
*
* Consumer: trace-collector.ts when enriching spans after LLM responses,
* and cost-dashboard scripts that sum inputTokens / outputTokens.
*/
export interface SpanAttributes {
// Session-level
projectRoot?: string;
sessionId?: string;
cwd?: string;
command?: string;
model?: string;
inputTokens?: number;
outputTokens?: number;
cacheReadTokens?: number;
cacheWriteTokens?: number;
costUsd?: number;
exitCode?: number;
// Unit-level
unitType?: "milestone" | "slice" | "task";
unitId?: string;
unitStatus?: SpanStatus;
unitErrorReason?: string;
// Tool-level
toolName?: string;
toolCallId?: string;
toolStatus?: SpanStatus;
toolError?: string;
toolDurationMs?: number;
}
/**
* A single node in the trace tree.
*
* Purpose: represent one scoped operation (session, unit, or tool call) with
* timing, status, attributes, nested children, and a timeline of events so
* that the full execution graph can be reconstructed from the trace file.
*
* Consumer: trace-collector.ts, headless.ts, and any trace reader/visualizer.
*/
export interface Span {
id: string;
name: string;
kind: SpanKind;
status: SpanStatus;
startTime: number;
endTime?: number;
attributes: SpanAttributes;
children: Span[];
events: TraceEvent[];
}
/**
* The top-level trace container.
*
* Purpose: hold the root span and session metadata so that a single file
* contains everything needed to replay or analyse an auto-mode session.
*
* Consumer: headless.ts (creates and finalizes), exportTrace/exportTraceToProject
* (serializes), and external trace consumers.
*/
export interface Trace {
id: string;
version: number;
projectRoot: string;
sessionId?: string;
startedAt: string;
completedAt?: string;
rootSpan: Span;
}
// ---------------------------------------------------------------------------
// Span helpers
// ---------------------------------------------------------------------------
/**
* Create a new span with a random UUID and current timestamp.
*
* Purpose: provide a single, correct construction site for spans so that
* every span has a stable ID and a consistent start-time baseline.
*
* Consumer: trace-collector.ts when starting a session, unit, or tool span.
*/
export function createSpan(
name: string,
kind: SpanKind,
attributes: SpanAttributes = {},
): Span {
return {
id: randomUUID(),
name,
kind,
status: "in_progress",
startTime: Date.now(),
attributes,
children: [],
events: [],
};
}
/**
* Mark a span as complete and record end time.
*
* Purpose: ensure every finished span carries both a terminal status and an
* end timestamp so that duration calculations and success-rate metrics are
* accurate.
*
* Consumer: trace-collector.ts when a unit or tool finishes.
*/
export function endSpan(span: Span, status: SpanStatus = "ok"): Span {
span.status = status;
span.endTime = Date.now();
return span;
}
/**
* Append a named event to a span with optional attributes.
*
* Purpose: let collectors record semantically rich checkpoints (model
* switches, gate completions) inside an existing span without mutating the
* span's own fields.
*
* Consumer: trace-collector.ts during auto-mode phase transitions.
*/
export function addEvent(
span: Span,
name: string,
attributes?: Record<string, string | number | boolean | null>,
): void {
span.events.push({
name,
timestamp: Date.now(),
attributes,
});
}
/**
* Append an error event to a span with message and optional stack.
*
* Purpose: capture failure details (including stack traces when available)
* inside the trace so that debugging can be done from the trace file alone
* without cross-referencing separate log files.
*
* Consumer: trace-collector.ts when a tool call or unit throws.
*/
export function addError(span: Span, message: string, stack?: string): void {
span.events.push({
name: "error",
timestamp: Date.now(),
attributes: {
message,
...(stack ? { stack } : {}),
},
});
span.status = "error";
if (!span.endTime) span.endTime = Date.now();
}
// ---------------------------------------------------------------------------
// Trace helpers
// ---------------------------------------------------------------------------
/**
* Create a new trace with a root session span.
*
* Purpose: establish the top-level trace container and its root session span
* in one call so that headless.ts never creates a trace without a valid root.
*
* Consumer: headless.ts at the start of an auto-mode session.
*/
export function createTrace(
projectRoot: string,
sessionId?: string,
command?: string,
model?: string,
): Trace {
const rootSpan = createSpan(`session:${sessionId ?? "unknown"}`, "session", {
sessionId,
projectRoot,
command,
model,
});
return {
id: randomUUID(),
version: 1,
projectRoot,
sessionId,
startedAt: new Date().toISOString(),
rootSpan,
};
}
/**
* Finalize a trace: set completedAt timestamp.
*
* Purpose: mark the trace as closed so that readers know the tree is
* complete and can safely compute session duration and aggregate costs.
*
* Consumer: headless.ts in the normal exit path and signal handlers.
*/
export function finalizeTrace(trace: Trace): Trace {
trace.completedAt = new Date().toISOString();
return trace;
}
/**
* Find a span in the tree by ID (linear walk).
*
* Purpose: let collectors locate an existing span (e.g. to attach a child
* or end it) without maintaining a separate ID-to-span map.
*
* Consumer: trace-collector.ts when bridging async tool-call results back
* to their original span.
*/
export function findSpan(span: Span, id: string): Span | undefined {
if (span.id === id) return span;
for (const child of span.children) {
const found = findSpan(child, id);
if (found) return found;
}
return undefined;
}
/**
* Add a child span to a parent.
*
* Purpose: build the hierarchical tree (session → unit → tool) so that
* trace readers can collapse, expand, or aggregate by level.
*
* Consumer: trace-collector.ts when starting a unit or tool inside an
* already-running parent span.
*/
export function addChildSpan(parent: Span, child: Span): void {
parent.children.push(child);
}
/**
* Walk all spans in a trace (root first, depth-first). Yields each span.
*
* Purpose: provide a simple, reusable traversal for aggregators, exporters,
* and debug printers that need to visit every span without writing recursive
* loops in every consumer.
*
* Consumer: trace analysis scripts, cost aggregators, and test assertions
* that verify span tree shape.
*/
export function* walkSpans(span: Span): Generator<Span, void, unknown> {
yield span;
for (const child of span.children) {
yield* walkSpans(child);
}
}
// ---------------------------------------------------------------------------
// Export
// ---------------------------------------------------------------------------
/**
* Serialize and write a trace to an arbitrary path.
* Creates parent directories as needed.
*
* Purpose: allow trace consumers (tests, CI scripts, manual debugging) to
* persist a trace anywhere on disk without hard-coding .sf/traces/ logic.
*
* Consumer: test suites that write traces to temp directories, and custom
* integrations that ship traces to external observability platforms.
*/
export function exportTrace(trace: Trace, path: string): void {
const dir = join(path, "..");
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
writeFileSync(path, JSON.stringify(trace, null, 2), "utf-8");
}
/**
* Serialize and write a trace to .sf/traces/ in the project root.
* Filename: trace-<timestamp>.json
*
* Purpose: provide the standard, project-local trace sink so that every
* auto-mode session leaves a discoverable artifact in a known location.
*
* Consumer: headless.ts in the normal exit path and signal handlers.
*/
export function exportTraceToProject(
trace: Trace,
projectRoot: string,
): string {
const tracesDir = join(projectRoot, ".sf", "traces");
if (!existsSync(tracesDir)) {
mkdirSync(tracesDir, { recursive: true });
}
const filename = `trace-${Date.now()}.json`;
const path = join(tracesDir, filename);
writeFileSync(path, JSON.stringify(trace, null, 2), "utf-8");
return path;
}
/**
* Read a trace from disk.
*
* Purpose: round-trip a trace file back into the typed model so that
* analysis tools, test assertions, and replay utilities can work with
* structured data instead of raw JSON.
*
* Consumer: trace analysis scripts, test helpers, and any tool that reads
* .sf/traces/ for post-session inspection.
*/
export function readTrace(path: string): Trace {
return JSON.parse(readFileSync(path, "utf-8")) as Trace;
}