- Add repository-vcs-context.ts to detect and inject VCS context (Git/Jujutsu) into the agent system prompt; wire in repo-vcs bundled skill trigger - Add src/resources/skills/repo-vcs/ skill for commit, push, and safe-push workflows - Add JSDoc Purpose/Consumer annotations to app-paths, bundled-extension-paths, errors, extension-discovery, extension-registry, headless-types, headless, and traces - Add justfile and just to flake.nix devShell - Fill out new-user-onboarding.md spec (Draft) and core-beliefs.md (Status: Accepted) - Add notification-event-model.md design doc and notification-source-hygiene.md spec Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
383 lines
11 KiB
TypeScript
383 lines
11 KiB
TypeScript
/**
|
|
* traces.ts — Structured trace data model and export utilities for auto-mode execution.
|
|
*
|
|
* Purpose: provide a lightweight, hierarchical span model that captures the
|
|
* full lifecycle of an auto-mode session (session → units → tools) so that
|
|
* post-hoc analysis, debugging, and cost attribution can be done from a
|
|
* single JSON artifact instead of piecing together scattered logs.
|
|
*
|
|
* Consumer: headless.ts (creates and finalizes traces), trace-collector.ts
|
|
* (appends spans and events), and any external tool that reads .sf/traces/.
|
|
*/
|
|
|
|
import { randomUUID } from "node:crypto";
|
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
import { join } from "node:path";
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Types
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Classify the role of a span in the trace hierarchy.
|
|
*
|
|
* Purpose: distinguish session roots, milestone/slice/task units, and
|
|
* individual tool calls so that renderers and aggregators can group or
|
|
* filter spans by semantic category.
|
|
*
|
|
* Consumer: trace-collector.ts when creating spans, and trace visualizers
|
|
* that colour-code or collapse spans by kind.
|
|
*/
|
|
export type SpanKind = "session" | "unit" | "tool";
|
|
|
|
/**
|
|
* Terminal state of a span.
|
|
*
|
|
* Purpose: capture whether a span finished successfully, failed, was
|
|
* cancelled, or is still running so that trace consumers can compute
|
|
* success rates and identify hung operations.
|
|
*
|
|
* Consumer: trace-collector.ts on unit/tool end, and trace analysis scripts
|
|
* that aggregate outcomes across sessions.
|
|
*/
|
|
export type SpanStatus =
|
|
| "ok"
|
|
| "error"
|
|
| "cancelled"
|
|
| "timeout"
|
|
| "in_progress";
|
|
|
|
/**
|
|
* A discrete event attached to a span, such as a checkpoint or decision.
|
|
*
|
|
* Purpose: record semantically meaningful moments (e.g. "planning meeting
|
|
* started", "model switched") inside a span without creating a child span
|
|
* for every micro-step.
|
|
*
|
|
* Consumer: trace-collector.ts when recording model switches, gate results,
|
|
* or other non-span lifecycle events.
|
|
*/
|
|
export interface TraceEvent {
|
|
name: string;
|
|
timestamp: number;
|
|
attributes?: Record<string, string | number | boolean | null>;
|
|
}
|
|
|
|
/**
|
|
* Optional metadata attached to a span.
|
|
*
|
|
* Purpose: carry dimensional data (tokens, cost, model, file paths) that
|
|
* lets downstream tools attribute spend and latency to specific units or
|
|
* tools without parsing free-form log lines.
|
|
*
|
|
* Consumer: trace-collector.ts when enriching spans after LLM responses,
|
|
* and cost-dashboard scripts that sum inputTokens / outputTokens.
|
|
*/
|
|
export interface SpanAttributes {
|
|
// Session-level
|
|
projectRoot?: string;
|
|
sessionId?: string;
|
|
cwd?: string;
|
|
command?: string;
|
|
model?: string;
|
|
inputTokens?: number;
|
|
outputTokens?: number;
|
|
cacheReadTokens?: number;
|
|
cacheWriteTokens?: number;
|
|
costUsd?: number;
|
|
exitCode?: number;
|
|
|
|
// Unit-level
|
|
unitType?: "milestone" | "slice" | "task";
|
|
unitId?: string;
|
|
unitStatus?: SpanStatus;
|
|
unitErrorReason?: string;
|
|
|
|
// Tool-level
|
|
toolName?: string;
|
|
toolCallId?: string;
|
|
toolStatus?: SpanStatus;
|
|
toolError?: string;
|
|
toolDurationMs?: number;
|
|
}
|
|
|
|
/**
|
|
* A single node in the trace tree.
|
|
*
|
|
* Purpose: represent one scoped operation (session, unit, or tool call) with
|
|
* timing, status, attributes, nested children, and a timeline of events so
|
|
* that the full execution graph can be reconstructed from the trace file.
|
|
*
|
|
* Consumer: trace-collector.ts, headless.ts, and any trace reader/visualizer.
|
|
*/
|
|
export interface Span {
|
|
id: string;
|
|
name: string;
|
|
kind: SpanKind;
|
|
status: SpanStatus;
|
|
startTime: number;
|
|
endTime?: number;
|
|
attributes: SpanAttributes;
|
|
children: Span[];
|
|
events: TraceEvent[];
|
|
}
|
|
|
|
/**
|
|
* The top-level trace container.
|
|
*
|
|
* Purpose: hold the root span and session metadata so that a single file
|
|
* contains everything needed to replay or analyse an auto-mode session.
|
|
*
|
|
* Consumer: headless.ts (creates and finalizes), exportTrace/exportTraceToProject
|
|
* (serializes), and external trace consumers.
|
|
*/
|
|
export interface Trace {
|
|
id: string;
|
|
version: number;
|
|
projectRoot: string;
|
|
sessionId?: string;
|
|
startedAt: string;
|
|
completedAt?: string;
|
|
rootSpan: Span;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Span helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Create a new span with a random UUID and current timestamp.
|
|
*
|
|
* Purpose: provide a single, correct construction site for spans so that
|
|
* every span has a stable ID and a consistent start-time baseline.
|
|
*
|
|
* Consumer: trace-collector.ts when starting a session, unit, or tool span.
|
|
*/
|
|
export function createSpan(
|
|
name: string,
|
|
kind: SpanKind,
|
|
attributes: SpanAttributes = {},
|
|
): Span {
|
|
return {
|
|
id: randomUUID(),
|
|
name,
|
|
kind,
|
|
status: "in_progress",
|
|
startTime: Date.now(),
|
|
attributes,
|
|
children: [],
|
|
events: [],
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Mark a span as complete and record end time.
|
|
*
|
|
* Purpose: ensure every finished span carries both a terminal status and an
|
|
* end timestamp so that duration calculations and success-rate metrics are
|
|
* accurate.
|
|
*
|
|
* Consumer: trace-collector.ts when a unit or tool finishes.
|
|
*/
|
|
export function endSpan(span: Span, status: SpanStatus = "ok"): Span {
|
|
span.status = status;
|
|
span.endTime = Date.now();
|
|
return span;
|
|
}
|
|
|
|
/**
|
|
* Append a named event to a span with optional attributes.
|
|
*
|
|
* Purpose: let collectors record semantically rich checkpoints (model
|
|
* switches, gate completions) inside an existing span without mutating the
|
|
* span's own fields.
|
|
*
|
|
* Consumer: trace-collector.ts during auto-mode phase transitions.
|
|
*/
|
|
export function addEvent(
|
|
span: Span,
|
|
name: string,
|
|
attributes?: Record<string, string | number | boolean | null>,
|
|
): void {
|
|
span.events.push({
|
|
name,
|
|
timestamp: Date.now(),
|
|
attributes,
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Append an error event to a span with message and optional stack.
|
|
*
|
|
* Purpose: capture failure details (including stack traces when available)
|
|
* inside the trace so that debugging can be done from the trace file alone
|
|
* without cross-referencing separate log files.
|
|
*
|
|
* Consumer: trace-collector.ts when a tool call or unit throws.
|
|
*/
|
|
export function addError(span: Span, message: string, stack?: string): void {
|
|
span.events.push({
|
|
name: "error",
|
|
timestamp: Date.now(),
|
|
attributes: {
|
|
message,
|
|
...(stack ? { stack } : {}),
|
|
},
|
|
});
|
|
span.status = "error";
|
|
if (!span.endTime) span.endTime = Date.now();
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Trace helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Create a new trace with a root session span.
|
|
*
|
|
* Purpose: establish the top-level trace container and its root session span
|
|
* in one call so that headless.ts never creates a trace without a valid root.
|
|
*
|
|
* Consumer: headless.ts at the start of an auto-mode session.
|
|
*/
|
|
export function createTrace(
|
|
projectRoot: string,
|
|
sessionId?: string,
|
|
command?: string,
|
|
model?: string,
|
|
): Trace {
|
|
const rootSpan = createSpan(`session:${sessionId ?? "unknown"}`, "session", {
|
|
sessionId,
|
|
projectRoot,
|
|
command,
|
|
model,
|
|
});
|
|
return {
|
|
id: randomUUID(),
|
|
version: 1,
|
|
projectRoot,
|
|
sessionId,
|
|
startedAt: new Date().toISOString(),
|
|
rootSpan,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Finalize a trace: set completedAt timestamp.
|
|
*
|
|
* Purpose: mark the trace as closed so that readers know the tree is
|
|
* complete and can safely compute session duration and aggregate costs.
|
|
*
|
|
* Consumer: headless.ts in the normal exit path and signal handlers.
|
|
*/
|
|
export function finalizeTrace(trace: Trace): Trace {
|
|
trace.completedAt = new Date().toISOString();
|
|
return trace;
|
|
}
|
|
|
|
/**
|
|
* Find a span in the tree by ID (linear walk).
|
|
*
|
|
* Purpose: let collectors locate an existing span (e.g. to attach a child
|
|
* or end it) without maintaining a separate ID-to-span map.
|
|
*
|
|
* Consumer: trace-collector.ts when bridging async tool-call results back
|
|
* to their original span.
|
|
*/
|
|
export function findSpan(span: Span, id: string): Span | undefined {
|
|
if (span.id === id) return span;
|
|
for (const child of span.children) {
|
|
const found = findSpan(child, id);
|
|
if (found) return found;
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
/**
|
|
* Add a child span to a parent.
|
|
*
|
|
* Purpose: build the hierarchical tree (session → unit → tool) so that
|
|
* trace readers can collapse, expand, or aggregate by level.
|
|
*
|
|
* Consumer: trace-collector.ts when starting a unit or tool inside an
|
|
* already-running parent span.
|
|
*/
|
|
export function addChildSpan(parent: Span, child: Span): void {
|
|
parent.children.push(child);
|
|
}
|
|
|
|
/**
|
|
* Walk all spans in a trace (root first, depth-first). Yields each span.
|
|
*
|
|
* Purpose: provide a simple, reusable traversal for aggregators, exporters,
|
|
* and debug printers that need to visit every span without writing recursive
|
|
* loops in every consumer.
|
|
*
|
|
* Consumer: trace analysis scripts, cost aggregators, and test assertions
|
|
* that verify span tree shape.
|
|
*/
|
|
export function* walkSpans(span: Span): Generator<Span, void, unknown> {
|
|
yield span;
|
|
for (const child of span.children) {
|
|
yield* walkSpans(child);
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Export
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Serialize and write a trace to an arbitrary path.
|
|
* Creates parent directories as needed.
|
|
*
|
|
* Purpose: allow trace consumers (tests, CI scripts, manual debugging) to
|
|
* persist a trace anywhere on disk without hard-coding .sf/traces/ logic.
|
|
*
|
|
* Consumer: test suites that write traces to temp directories, and custom
|
|
* integrations that ship traces to external observability platforms.
|
|
*/
|
|
export function exportTrace(trace: Trace, path: string): void {
|
|
const dir = join(path, "..");
|
|
if (!existsSync(dir)) {
|
|
mkdirSync(dir, { recursive: true });
|
|
}
|
|
writeFileSync(path, JSON.stringify(trace, null, 2), "utf-8");
|
|
}
|
|
|
|
/**
|
|
* Serialize and write a trace to .sf/traces/ in the project root.
|
|
* Filename: trace-<timestamp>.json
|
|
*
|
|
* Purpose: provide the standard, project-local trace sink so that every
|
|
* auto-mode session leaves a discoverable artifact in a known location.
|
|
*
|
|
* Consumer: headless.ts in the normal exit path and signal handlers.
|
|
*/
|
|
export function exportTraceToProject(
|
|
trace: Trace,
|
|
projectRoot: string,
|
|
): string {
|
|
const tracesDir = join(projectRoot, ".sf", "traces");
|
|
if (!existsSync(tracesDir)) {
|
|
mkdirSync(tracesDir, { recursive: true });
|
|
}
|
|
const filename = `trace-${Date.now()}.json`;
|
|
const path = join(tracesDir, filename);
|
|
writeFileSync(path, JSON.stringify(trace, null, 2), "utf-8");
|
|
return path;
|
|
}
|
|
|
|
/**
|
|
* Read a trace from disk.
|
|
*
|
|
* Purpose: round-trip a trace file back into the typed model so that
|
|
* analysis tools, test assertions, and replay utilities can work with
|
|
* structured data instead of raw JSON.
|
|
*
|
|
* Consumer: trace analysis scripts, test helpers, and any tool that reads
|
|
* .sf/traces/ for post-session inspection.
|
|
*/
|
|
export function readTrace(path: string): Trace {
|
|
return JSON.parse(readFileSync(path, "utf-8")) as Trace;
|
|
}
|