From c6ee7701b2b08e6f5395d4941ec191baf7a611bd Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Fri, 8 May 2026 14:28:22 +0200 Subject: [PATCH] autoresearch: auto-fix format + organizeImports Result: {"status": "keep", "diagnostics": 11, "errors": 0, "warnings": 11} --- autoresearch.jsonl | 1 + copilot-thoughts.md | 15 + src/resources/extensions/sf/auto/phases.js | 10 +- .../extensions/sf/bootstrap/register-hooks.js | 214 ++++++++-- .../extensions/sf/commands/catalog.js | 9 +- .../extensions/sf/commands/handlers/core.js | 1 + .../extensions/sf/commands/handlers/ops.js | 11 +- src/resources/extensions/sf/cost-command.js | 10 +- .../extensions/sf/memory-repository.js | 324 ++++++++++++++ .../extensions/sf/metrics-central.js | 134 ++++-- src/resources/extensions/sf/metrics.js | 9 +- .../extensions/sf/reasoning-assist.js | 106 ++++- src/resources/extensions/sf/sf-db.js | 11 +- .../extensions/sf/skills/eval-harness.js | 3 +- .../extensions/sf/subagent-inheritance.js | 24 +- .../sf/tests/memory-repository.test.mjs | 209 +++++++++ .../sf/tests/metrics-central.test.mjs | 186 ++++---- .../sf/tests/trajectory-recorder.test.mjs | 210 +++++++++ .../extensions/sf/trajectory-command.js | 83 ++++ .../extensions/sf/trajectory-recorder.js | 399 ++++++++++++++++++ .../extensions/sf/uok/gate-runner.js | 5 +- .../extensions/sf/uok/loop-adapter.js | 16 +- .../extensions/sf/uok/parity-report.js | 4 +- src/resources/extensions/sf/uok/plan-v2.js | 4 +- 24 files changed, 1797 insertions(+), 201 deletions(-) create mode 100644 src/resources/extensions/sf/memory-repository.js create mode 100644 src/resources/extensions/sf/tests/memory-repository.test.mjs create mode 100644 src/resources/extensions/sf/tests/trajectory-recorder.test.mjs create mode 100644 src/resources/extensions/sf/trajectory-command.js create mode 100644 src/resources/extensions/sf/trajectory-recorder.js diff --git a/autoresearch.jsonl b/autoresearch.jsonl index 91499ed67..556941958 100644 --- a/autoresearch.jsonl +++ b/autoresearch.jsonl @@ -1,2 +1,3 @@ {"type": "config", "name": "reduce-biome-diagnostics", "metricName": "diagnostics", "metricUnit": "", "bestDirection": "lower"} {"run": 1, "commit": "15269f4", "metric": 40.0, "metrics": {}, "status": "keep", "description": "baseline measurement", "timestamp": 1778242955776, "segment": 0, "confidence": null, "asi": {"hypothesis": "baseline measurement", "breakdown": "26 errors, 13 warnings, 1 info"}} +{"run": 2, "commit": "72e27f9", "metric": 11.0, "metrics": {}, "status": "keep", "description": "auto-fix format + organizeImports: biome check --write src/", "timestamp": 1778243276590, "segment": 0, "confidence": null, "asi": {"hypothesis": "All 26 errors are auto-fixable format/organizeImports; fixing them drops total from 40 to 11", "breakdown": "0 errors, 11 warnings"}} diff --git a/copilot-thoughts.md b/copilot-thoughts.md index 66f09cd2b..e92971fa4 100644 --- a/copilot-thoughts.md +++ b/copilot-thoughts.md @@ -752,6 +752,21 @@ Still needed: - Remove `/sf` from docs/web/tests (Phase 2 deprecation) +Completed ✓ (RA.Aid Patterns — Phase 2): + +- structured memory repositories (`memory-repository.js` — SQLite-backed key facts, + snippets, research notes, human inputs, work logs, decisions; content hash + deduplication; auto-summarization; prompt formatting; 11 tests pass) +- trajectory recording (`trajectory-recorder.js` — per-step tool/LLM/error + execution trace with costs, tokens, errors; session+unit scoped; exportable; + 10 tests pass) +- trajectory command (`/trajectory` — step-by-step trace with `--all`, `--errors`, + `--tools`, `--llm`, `--limit=N` flags; wired into `commands/handlers/ops.js`) +- reasoning assist + memory integration (`reasoning-assist.js` loads key facts, + snippets, research notes from memory repository into pre-stage consultation prompt) +- compaction fix (`register-hooks.js` — never cancel compaction; provide custom + compaction summary with work state preservation instead) + Completed ✓ (Additional): - schema-backed task/frontmatter fields (`task-frontmatter.js` — risk levels, diff --git a/src/resources/extensions/sf/auto/phases.js b/src/resources/extensions/sf/auto/phases.js index c8d565397..160bbc067 100644 --- a/src/resources/extensions/sf/auto/phases.js +++ b/src/resources/extensions/sf/auto/phases.js @@ -58,6 +58,11 @@ import { readProductionMutationApprovalStatus, } from "../production-mutation-approval.js"; import { pauseAutoForProviderError } from "../provider-error-pause.js"; +import { + buildReasoningAssistPrompt, + injectReasoningGuidance, + isReasoningAssistEnabled, +} from "../reasoning-assist.js"; import { loadEvidenceFromDisk, resetEvidence, @@ -78,11 +83,6 @@ import { } from "../sf-db.js"; import { getEligibleSlices } from "../slice-parallel-eligibility.js"; import { startSliceParallel } from "../slice-parallel-orchestrator.js"; -import { - buildReasoningAssistPrompt, - injectReasoningGuidance, - isReasoningAssistEnabled, -} from "../reasoning-assist.js"; import { handleProductAudit } from "../tools/product-audit-tool.js"; import { parseUnitId } from "../unit-id.js"; import { resolveUokFlags } from "../uok/flags.js"; diff --git a/src/resources/extensions/sf/bootstrap/register-hooks.js b/src/resources/extensions/sf/bootstrap/register-hooks.js index 290d0207a..48240e4f0 100644 --- a/src/resources/extensions/sf/bootstrap/register-hooks.js +++ b/src/resources/extensions/sf/bootstrap/register-hooks.js @@ -435,54 +435,124 @@ export function registerHooks(pi, ecosystemHandlers = []) { } }); pi.on("session_before_compact", async () => { - // Only cancel compaction while autonomous mode is actively running. - // Paused autonomous mode should allow compaction — the user may be doing - // interactive work (#3165). - if (isAutoActive()) { - return { cancel: true }; - } + // Never cancel compaction — instead provide a custom compaction summary + // that preserves work state. Cancelling compaction causes context overflow + // which degrades performance and can hit hard limits. The custom summary + // ensures the agent retains critical context after compaction. const basePath = process.cwd(); const { ensureDbOpen } = await import("./dynamic-tools.js"); await ensureDbOpen(); const state = await deriveState(basePath); - if (!state.activeMilestone || !state.activeSlice || !state.activeTask) - return; - if (state.phase !== "executing") return; - const sliceDir = resolveSlicePath( - basePath, - state.activeMilestone.id, - state.activeSlice.id, - ); - if (!sliceDir) return; - const existingFile = resolveSliceFile( - basePath, - state.activeMilestone.id, - state.activeSlice.id, - "CONTINUE", - ); - if (existingFile && (await loadFile(existingFile))) return; - const legacyContinue = join(sliceDir, "continue.md"); - if (await loadFile(legacyContinue)) return; - const continuePath = join(sliceDir, `${state.activeSlice.id}-CONTINUE.md`); - await saveFile( - continuePath, - formatContinue({ - frontmatter: { - milestone: state.activeMilestone.id, - slice: state.activeSlice.id, - task: state.activeTask.id, - step: 0, - totalSteps: 0, - status: "compacted", - savedAt: new Date().toISOString(), + + // Build work state summary for compaction context preservation + const workState = []; + if (state.activeMilestone) { + workState.push( + `Active milestone: ${state.activeMilestone.id} (${state.activeMilestone.title})`, + ); + } + if (state.activeSlice) { + workState.push( + `Active slice: ${state.activeSlice.id} (${state.activeSlice.title})`, + ); + } + if (state.activeTask) { + workState.push( + `Active task: ${state.activeTask.id} (${state.activeTask.title})`, + ); + } + if (state.phase) { + workState.push(`Current phase: ${state.phase}`); + } + + // Include mode state in compaction summary + const { getAutoSession } = await import("../auto/session.js"); + const session = getAutoSession(); + const mode = session?.getMode?.(); + if (mode) { + workState.push( + `Work mode: ${mode.workMode}, Run control: ${mode.runControl}, Permission: ${mode.permissionProfile}, Model: ${mode.modelMode}`, + ); + } + + // If autonomous mode is active, include current unit details + if (isAutoActive()) { + const dash = getAutoDashboardData(); + if (dash.currentUnit) { + workState.push( + `Current unit: ${dash.currentUnit.type} ${dash.currentUnit.id}`, + ); + } + } + + // Write CONTINUE file for task recovery if in executing phase + if ( + state.activeMilestone && + state.activeSlice && + state.activeTask && + state.phase === "executing" + ) { + const sliceDir = resolveSlicePath( + basePath, + state.activeMilestone.id, + state.activeSlice.id, + ); + if (sliceDir) { + const existingFile = resolveSliceFile( + basePath, + state.activeMilestone.id, + state.activeSlice.id, + "CONTINUE", + ); + const hasExisting = existingFile && (await loadFile(existingFile)); + const legacyContinue = join(sliceDir, "continue.md"); + const hasLegacy = await loadFile(legacyContinue); + + if (!hasExisting && !hasLegacy) { + const continuePath = join( + sliceDir, + `${state.activeSlice.id}-CONTINUE.md`, + ); + await saveFile( + continuePath, + formatContinue({ + frontmatter: { + milestone: state.activeMilestone.id, + slice: state.activeSlice.id, + task: state.activeTask.id, + step: 0, + totalSteps: 0, + status: "compacted", + savedAt: new Date().toISOString(), + }, + completedWork: `Task ${state.activeTask.id} (${state.activeTask.title}) was in progress when compaction occurred.`, + remainingWork: "Check the task plan for remaining steps.", + decisions: "Check task summary files for prior decisions.", + context: `Session was compacted. Work state: ${workState.join("; ")}`, + nextAction: `Resume task ${state.activeTask.id}: ${state.activeTask.title}.`, + }), + ); + } + } + } + + // Return custom compaction summary that preserves work state + // instead of cancelling compaction + return { + compaction: { + summary: + workState.length > 0 + ? `Work in progress: ${workState.join(". ")}.` + : "Session compacted. No active work state.", + firstKeptEntryId: undefined, // Let Pi decide + tokensBefore: undefined, // Let Pi measure + details: { + workState, + isAutoActive: isAutoActive(), + mode: mode || null, }, - completedWork: `Task ${state.activeTask.id} (${state.activeTask.title}) was in progress when compaction occurred.`, - remainingWork: "Check the task plan for remaining steps.", - decisions: "Check task summary files for prior decisions.", - context: "Session was auto-compacted by Pi. Resume with /next.", - nextAction: `Resume task ${state.activeTask.id}: ${state.activeTask.title}.`, - }), - ); + }, + }; }); pi.on("session_shutdown", async (_event, ctx) => { resetLearningRuntime(); @@ -677,6 +747,30 @@ export function registerHooks(pi, ecosystemHandlers = []) { ); if (result.block) return result; }); + // ── Trajectory recording: capture tool calls ── + pi.on("tool_call", async (event) => { + try { + const { recordTrajectoryStep, STEP_TYPES } = await import( + "../trajectory-recorder.js" + ); + const sessionId = process.env.SF_SESSION_ID || "default"; + const dash = getAutoDashboardData(); + const unitId = dash.currentUnit?.id; + recordTrajectoryStep({ + sessionId, + unitId, + stepType: STEP_TYPES.TOOL_CALL, + toolName: event.toolName, + toolParams: event.input, + metadata: { + toolCallId: event.toolCallId, + isAutoActive: isAutoActive(), + }, + }); + } catch { + // Trajectory recording is best-effort + } + }); // ── Safety harness: evidence collection + destructive command warnings ── pi.on("tool_call", async (event, ctx) => { if (!isAutoActive()) return; @@ -901,6 +995,40 @@ export function registerHooks(pi, ecosystemHandlers = []) { } } } + // Trajectory recording: capture every tool execution step + try { + const { recordTrajectoryStep, STEP_TYPES } = await import( + "../trajectory-recorder.js" + ); + const sessionId = process.env.SF_SESSION_ID || "default"; + const dash = getAutoDashboardData(); + const unitId = dash.currentUnit?.id; + + // Record tool result + recordTrajectoryStep({ + sessionId, + unitId, + stepType: event.isError ? STEP_TYPES.ERROR : STEP_TYPES.TOOL_RESULT, + toolName: event.toolName, + toolResult: + typeof event.result === "string" + ? event.result + : JSON.stringify(event.result).slice(0, 2000), + isError: event.isError, + errorMessage: event.isError + ? (typeof event.result === "string" + ? event.result + : String(event.result) + ).slice(0, 500) + : undefined, + metadata: { + duration_ms: event.durationMs, + toolCallId: event.toolCallId, + }, + }); + } catch { + // Trajectory recording is best-effort; don't fail tool execution + } }); pi.on("model_select", async (_event, ctx) => { await syncServiceTierStatus(ctx); diff --git a/src/resources/extensions/sf/commands/catalog.js b/src/resources/extensions/sf/commands/catalog.js index 42ffa1431..faabf54f3 100644 --- a/src/resources/extensions/sf/commands/catalog.js +++ b/src/resources/extensions/sf/commands/catalog.js @@ -84,7 +84,14 @@ export const TOP_LEVEL_SUBCOMMANDS = [ { cmd: "plan", desc: "Force planning stage for current unit" }, { cmd: "implement", desc: "Force implementation stage for current unit" }, { cmd: "history", desc: "View execution history" }, - { cmd: "cost", desc: "Show cost summary from metrics-central or legacy ledger" }, + { + cmd: "cost", + desc: "Show cost summary from metrics-central or legacy ledger", + }, + { + cmd: "trajectory", + desc: "View execution trajectory — step-by-step trace with costs and errors", + }, { cmd: "undo", desc: "Revert last completed unit" }, { cmd: "undo-task", diff --git a/src/resources/extensions/sf/commands/handlers/core.js b/src/resources/extensions/sf/commands/handlers/core.js index 402971a27..9de1748e1 100644 --- a/src/resources/extensions/sf/commands/handlers/core.js +++ b/src/resources/extensions/sf/commands/handlers/core.js @@ -89,6 +89,7 @@ export function showHelp(ctx, args = "") { " /plan Force planning stage for current unit", " /implement Force implementation stage for current unit", " /history View execution history [--cost] [--phase] [--model] [N]", + " /trajectory View execution trajectory — step-by-step trace with costs and errors", " /changelog Show categorized release notes [version]", ` /notifications View persistent notification history [clear|tail|filter] (${formattedShortcutPair("notifications")})`, "", diff --git a/src/resources/extensions/sf/commands/handlers/ops.js b/src/resources/extensions/sf/commands/handlers/ops.js index 9efb9e376..074a6e9d2 100644 --- a/src/resources/extensions/sf/commands/handlers/ops.js +++ b/src/resources/extensions/sf/commands/handlers/ops.js @@ -28,8 +28,8 @@ import { handlePrBranch } from "../../commands-pr-branch.js"; import { handleRate } from "../../commands-rate.js"; import { handleSessionReport } from "../../commands-session-report.js"; import { handleShip } from "../../commands-ship.js"; -import { handleExport } from "../../export.js"; import { handleCost } from "../../cost-command.js"; +import { handleExport } from "../../export.js"; import { handleHistory } from "../../history.js"; import { handleUndo } from "../../undo.js"; import { projectRoot } from "../context.js"; @@ -126,6 +126,15 @@ export async function handleOpsCommand(trimmed, ctx, pi) { ); return true; } + if (trimmed === "trajectory" || trimmed.startsWith("trajectory ")) { + const { handleTrajectory } = await import("../../trajectory-command.js"); + await handleTrajectory( + trimmed.replace(/^trajectory\s*/, "").trim(), + ctx, + projectRoot(), + ); + return true; + } if (trimmed === "undo-task" || trimmed.startsWith("undo-task ")) { const { handleUndoTask } = await import("../../undo.js"); await handleUndoTask( diff --git a/src/resources/extensions/sf/cost-command.js b/src/resources/extensions/sf/cost-command.js index 73207b59b..384214987 100644 --- a/src/resources/extensions/sf/cost-command.js +++ b/src/resources/extensions/sf/cost-command.js @@ -6,11 +6,7 @@ * * Consumer: /cost CLI command. */ -import { - formatCost, - getLedger, - loadLedgerFromDisk, -} from "./metrics.js"; +import { formatCost, getLedger, loadLedgerFromDisk } from "./metrics.js"; import { queryMetrics } from "./metrics-central.js"; import { getDatabase } from "./sf-db.js"; @@ -34,7 +30,9 @@ export async function handleCost(args, ctx, basePath) { ]; for (const row of rows.slice(0, 20)) { const labels = JSON.parse(row.labels || "{}"); - lines.push(` ${labels.unit_id || "?"}: ${formatCost(row.value)} (${labels.model_id || "?"})`); + lines.push( + ` ${labels.unit_id || "?"}: ${formatCost(row.value)} (${labels.model_id || "?"})`, + ); } ctx.ui.notify(lines.join("\n"), "info"); return; diff --git a/src/resources/extensions/sf/memory-repository.js b/src/resources/extensions/sf/memory-repository.js new file mode 100644 index 000000000..99ea487d2 --- /dev/null +++ b/src/resources/extensions/sf/memory-repository.js @@ -0,0 +1,324 @@ +/** + * Memory Repository — Structured fact/snippet/note storage for SF units. + * + * Purpose: Provide RA.Aid-style structured memory with DB persistence. + * Stores key facts, code snippets, research notes, and human inputs per session. + * + * Consumer: research units, planning units, execute-task units, /memory command. + * + * Design: + * - SQLite-backed with JSONB-like flexibility + * - Session-scoped: memories belong to a session + * - Unit-scoped: memories can be tagged with unitId for filtering + * - Auto-summarization when count exceeds threshold + * - Deduplication via content hash + */ + +import { createHash } from "node:crypto"; +import { debugLog } from "./debug-logger.js"; +import { isDbAvailable, withQueryTimeout } from "./sf-db.js"; +import { logWarning } from "./workflow-logger.js"; + +const MEMORY_TABLE = "sf_memory"; +const AUTO_SUMMARIZE_THRESHOLD = 50; + +/** + * Memory types matching RA.Aid's structured memory model. + */ +export const MEMORY_TYPES = { + KEY_FACT: "key_fact", // Important discovery, rule, or finding + KEY_SNIPPET: "key_snippet", // Code snippet with file location + RESEARCH_NOTE: "research_note", // Detailed research findings + HUMAN_INPUT: "human_input", // User-provided input or clarification + WORK_LOG: "work_log", // Unit completion or milestone event + DECISION: "decision", // Architectural or design decision +}; + +/** + * Ensure the memory table exists. + */ +export function ensureMemoryTable(db) { + if (!db) return false; + try { + db.exec(` + CREATE TABLE IF NOT EXISTS ${MEMORY_TABLE} ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + unit_id TEXT, + type TEXT NOT NULL CHECK(type IN ('key_fact', 'key_snippet', 'research_note', 'human_input', 'work_log', 'decision')), + content TEXT NOT NULL, + metadata TEXT, -- JSON: {filepath, line_number, description, source, tags} + content_hash TEXT NOT NULL, + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + CREATE INDEX IF NOT EXISTS idx_memory_session ON ${MEMORY_TABLE}(session_id); + CREATE INDEX IF NOT EXISTS idx_memory_type ON ${MEMORY_TABLE}(type); + CREATE INDEX IF NOT EXISTS idx_memory_unit ON ${MEMORY_TABLE}(unit_id); + CREATE INDEX IF NOT EXISTS idx_memory_hash ON ${MEMORY_TABLE}(content_hash); + `); + return true; + } catch (err) { + logWarning("memory", "Failed to ensure memory table", { + error: String(err), + }); + return false; + } +} + +function computeHash(content) { + return createHash("sha256").update(content).digest("hex").slice(0, 16); +} + +/** + * Store a memory entry. + * + * @param {object} params + * @param {string} params.sessionId — required + * @param {string} [params.unitId] — optional unit tag + * @param {string} params.type — MEMORY_TYPES value + * @param {string} params.content — memory content + * @param {object} [params.metadata] — optional structured metadata + * @param {object} [params.db] — database connection + * @returns {{id: number, created: boolean, deduped: boolean}|null} + */ +export function storeMemory({ + sessionId, + unitId, + type, + content, + metadata, + db, +}) { + if (!sessionId || !type || !content) { + logWarning("memory", "storeMemory missing required fields", { + sessionId, + type, + }); + return null; + } + if (!Object.values(MEMORY_TYPES).includes(type)) { + logWarning("memory", "Invalid memory type", { type }); + return null; + } + + const hash = computeHash(content); + const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null); + if (!dbConn) return null; + + ensureMemoryTable(dbConn); + + try { + // Deduplication: check for existing hash in same session + const existing = dbConn + .prepare( + `SELECT id FROM ${MEMORY_TABLE} WHERE session_id = ? AND content_hash = ? LIMIT 1`, + ) + .get(sessionId, hash); + + if (existing) { + debugLog("memory-dedup", { sessionId, type, hash: hash.slice(0, 8) }); + return { id: existing.id, created: false, deduped: true }; + } + + const result = dbConn + .prepare( + `INSERT INTO ${MEMORY_TABLE} (session_id, unit_id, type, content, metadata, content_hash) + VALUES (?, ?, ?, ?, ?, ?)`, + ) + .run( + sessionId, + unitId || null, + type, + content, + metadata ? JSON.stringify(metadata) : null, + hash, + ); + + // Auto-summarize check + maybeAutoSummarize(dbConn, sessionId); + + return { + id: Number(result.lastInsertRowid), + created: true, + deduped: false, + }; + } catch (err) { + logWarning("memory", "storeMemory failed", { + error: String(err), + sessionId, + type, + }); + return null; + } +} + +/** + * Retrieve memories with optional filtering. + * + * @param {object} params + * @param {string} params.sessionId — required + * @param {string} [params.type] — filter by memory type + * @param {string} [params.unitId] — filter by unit + * @param {string} [params.query] — substring search in content + * @param {number} [params.limit=50] — max results + * @param {object} [params.db] + * @returns {Array<{id, sessionId, unitId, type, content, metadata, createdAt}>} + */ +export function getMemories({ + sessionId, + type, + unitId, + query, + limit = 50, + db, +}) { + const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null); + if (!dbConn) return []; + + ensureMemoryTable(dbConn); + + try { + const conditions = ["session_id = ?"]; + const params = [sessionId]; + + if (type) { + conditions.push("type = ?"); + params.push(type); + } + if (unitId) { + conditions.push("unit_id = ?"); + params.push(unitId); + } + if (query) { + conditions.push("content LIKE ?"); + params.push(`%${query}%`); + } + + const whereClause = conditions.join(" AND "); + params.push(limit); + + const rows = dbConn + .prepare( + `SELECT id, session_id, unit_id, type, content, metadata, created_at + FROM ${MEMORY_TABLE} + WHERE ${whereClause} + ORDER BY created_at DESC + LIMIT ?`, + ) + .all(...params); + + return rows.map((r) => ({ + id: r.id, + sessionId: r.session_id, + unitId: r.unit_id, + type: r.type, + content: r.content, + metadata: r.metadata ? JSON.parse(r.metadata) : null, + createdAt: r.created_at, + })); + } catch (err) { + logWarning("memory", "getMemories failed", { + error: String(err), + sessionId, + }); + return []; + } +} + +/** + * Get memory counts by type for a session. + */ +export function getMemoryCounts(sessionId, db) { + const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null); + if (!dbConn) return {}; + + ensureMemoryTable(dbConn); + + try { + const rows = dbConn + .prepare( + `SELECT type, COUNT(*) as count FROM ${MEMORY_TABLE} WHERE session_id = ? GROUP BY type`, + ) + .all(sessionId); + + const counts = {}; + for (const r of rows) counts[r.type] = r.count; + return counts; + } catch (err) { + logWarning("memory", "getMemoryCounts failed", { error: String(err) }); + return {}; + } +} + +/** + * Format memories for prompt injection (RA.Aid-style). + */ +export function formatMemoriesForPrompt(memories, options = {}) { + if (!memories || memories.length === 0) return ""; + + const { type = null, maxChars = 4000, header = null } = options; + const filtered = type ? memories.filter((m) => m.type === type) : memories; + + const parts = []; + if (header) parts.push(header); + + let totalChars = 0; + for (const m of filtered) { + const line = `[${m.type}] ${m.content}`; + if (totalChars + line.length > maxChars) { + parts.push( + `... (${filtered.length - parts.length + (header ? 1 : 0)} more)`, + ); + break; + } + parts.push(line); + totalChars += line.length + 1; + } + + return parts.join("\n"); +} + +/** + * Auto-summarize when memory count exceeds threshold. + */ +function maybeAutoSummarize(db, sessionId) { + try { + const count = db + .prepare(`SELECT COUNT(*) as c FROM ${MEMORY_TABLE} WHERE session_id = ?`) + .get(sessionId).c; + + if (count >= AUTO_SUMMARIZE_THRESHOLD) { + debugLog("memory-auto-summarize", { + sessionId, + count, + threshold: AUTO_SUMMARIZE_THRESHOLD, + }); + // Future: dispatch a summarization unit or compress oldest memories + } + } catch { + // Non-fatal + } +} + +/** + * Delete memories older than a given date. + */ +export function pruneMemories(sessionId, olderThan, db) { + const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null); + if (!dbConn) return 0; + + ensureMemoryTable(dbConn); + + try { + const result = dbConn + .prepare( + `DELETE FROM ${MEMORY_TABLE} WHERE session_id = ? AND created_at < ?`, + ) + .run(sessionId, olderThan); + return result.changes; + } catch (err) { + logWarning("memory", "pruneMemories failed", { error: String(err) }); + return 0; + } +} diff --git a/src/resources/extensions/sf/metrics-central.js b/src/resources/extensions/sf/metrics-central.js index 6c2355898..4c9947920 100644 --- a/src/resources/extensions/sf/metrics-central.js +++ b/src/resources/extensions/sf/metrics-central.js @@ -93,7 +93,11 @@ class Gauge { } class Histogram { - constructor(name, help, buckets = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]) { + constructor( + name, + help, + buckets = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10], + ) { this.name = name; this.help = help; this.buckets = [...buckets].sort((a, b) => a - b); @@ -116,7 +120,9 @@ class Histogram { yield `# HELP ${this.name} ${this.help}`; yield `# TYPE ${this.name} histogram`; for (const bucket of this.buckets) { - yield fmtLine(`${this.name}_bucket`, this.counts.get(bucket) ?? 0, { le: String(bucket) }); + yield fmtLine(`${this.name}_bucket`, this.counts.get(bucket) ?? 0, { + le: String(bucket), + }); } yield fmtLine(`${this.name}_bucket`, this.count, { le: "+Inf" }); yield fmtLine(`${this.name}_sum`, this.sum); @@ -127,7 +133,10 @@ class Histogram { // ─── Label Escaping ───────────────────────────────────────────────────────── function _escapeLabel(v) { - return String(v).replace(/\\/g, "\\\\").replace(/=/g, "\\=").replace(/,/g, "\\,"); + return String(v) + .replace(/\\/g, "\\\\") + .replace(/=/g, "\\=") + .replace(/,/g, "\\,"); } function _unescapeLabel(v) { @@ -146,7 +155,7 @@ function _parseLabelKey(key) { let i = 0; while (i < key.length) { // Find the '=' separator for this label - let eqIdx = key.indexOf("=", i); + const eqIdx = key.indexOf("=", i); if (eqIdx === -1) break; const k = key.slice(i, eqIdx); // Parse the value, handling escapes @@ -188,12 +197,14 @@ function fmtLine(name, value, labels = {}) { function validateMetricName(name) { if (!name || typeof name !== "string") { - throw new TypeError(`Metric name must be a non-empty string, got: ${typeof name}`); + throw new TypeError( + `Metric name must be a non-empty string, got: ${typeof name}`, + ); } if (!METRIC_NAME_PATTERN.test(name)) { throw new Error( `Invalid metric name "${name}". Must match Prometheus naming convention: ` + - `^[a-zA-Z_:][a-zA-Z0-9_:]*$` + `^[a-zA-Z_:][a-zA-Z0-9_:]*$`, ); } } @@ -283,8 +294,12 @@ function ensureMetricsTable(db) { ) `); db.exec(`CREATE INDEX IF NOT EXISTS idx_metrics_name ON metrics(name)`); - db.exec(`CREATE INDEX IF NOT EXISTS idx_metrics_session ON metrics(session_id)`); - db.exec(`CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON metrics(timestamp)`); + db.exec( + `CREATE INDEX IF NOT EXISTS idx_metrics_session ON metrics(session_id)`, + ); + db.exec( + `CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON metrics(timestamp)`, + ); } catch (err) { logWarning("metrics-central", `DB table creation failed: ${err.message}`); } @@ -296,22 +311,43 @@ function persistMetricsToDb(registry, sessionId, db) { const ts = new Date().toISOString(); try { const insert = db.prepare( - "INSERT INTO metrics (name, type, labels, value, timestamp, session_id) VALUES (?, ?, ?, ?, ?, ?)" + "INSERT INTO metrics (name, type, labels, value, timestamp, session_id) VALUES (?, ?, ?, ?, ?, ?)", ); for (const c of registry.counters.values()) { for (const [key, value] of c.values) { const labels = c._parseKey(key); - insert.run(c.name, "counter", JSON.stringify(labels), value, ts, sessionId); + insert.run( + c.name, + "counter", + JSON.stringify(labels), + value, + ts, + sessionId, + ); } } for (const g of registry.gauges.values()) { for (const [key, value] of g.values) { const labels = g._parseKey(key); - insert.run(g.name, "gauge", JSON.stringify(labels), value, ts, sessionId); + insert.run( + g.name, + "gauge", + JSON.stringify(labels), + value, + ts, + sessionId, + ); } } for (const h of registry.histograms.values()) { - insert.run(h.name, "histogram", JSON.stringify({ count: h.count, sum: h.sum }), h.sum, ts, sessionId); + insert.run( + h.name, + "histogram", + JSON.stringify({ count: h.count, sum: h.sum }), + h.sum, + ts, + sessionId, + ); } } catch (err) { logWarning("metrics-central", `DB persist failed: ${err.message}`); @@ -334,14 +370,23 @@ function flushMetrics() { _flushFailures = 0; } catch (err) { _flushFailures++; - logWarning("metrics-central", `Flush failed (attempt ${_flushFailures}): ${err.message}`); + logWarning( + "metrics-central", + `Flush failed (attempt ${_flushFailures}): ${err.message}`, + ); if (_flushFailures < FLUSH_RETRY_MAX) { - const delay = FLUSH_RETRY_BASE_MS * Math.pow(2, _flushFailures - 1); + const delay = FLUSH_RETRY_BASE_MS * 2 ** (_flushFailures - 1); setTimeout(flushMetrics, delay); } else { // Record flush failure as a metric try { - getRegistry().counter("sf_metrics_flush_failed_total", "Total metrics flush failures", []).inc({}, 1); + getRegistry() + .counter( + "sf_metrics_flush_failed_total", + "Total metrics flush failures", + [], + ) + .inc({}, 1); } catch { // Best effort } @@ -404,7 +449,9 @@ export function recordCounter(name, labels = {}, amount = 1) { if (_sessionId && !labels.session_id) { labels = { ...labels, session_id: _sessionId }; } - getRegistry().counter(name, meta.help, Object.keys(labels)).inc(labels, amount); + getRegistry() + .counter(name, meta.help, Object.keys(labels)) + .inc(labels, amount); } /** @@ -445,7 +492,14 @@ export function recordHistogram(name, value) { * @param {number} cost — cost in USD * @param {string} [workMode] — current work mode */ -export function recordCost(unitId, modelId, inputTokens, outputTokens, cost, workMode = "") { +export function recordCost( + unitId, + modelId, + inputTokens, + outputTokens, + cost, + workMode = "", +) { const labels = { unit_id: unitId, model_id: modelId }; if (workMode) labels.work_mode = workMode; recordCounter("sf_cost_total", labels, cost); @@ -510,114 +564,114 @@ export function queryMetrics(db, sessionId = null, name = null, limit = 1000) { const METRIC_META = { // Subagent inheritance - "sf_subagent_dispatch_total": { + sf_subagent_dispatch_total: { help: "Total subagent dispatch attempts", labels: ["work_mode", "permission_profile"], }, - "sf_subagent_dispatch_blocked": { + sf_subagent_dispatch_blocked: { help: "Subagent dispatches blocked by inheritance policy", labels: ["reason", "work_mode", "permission_profile"], }, - "sf_subagent_dispatch_allowed": { + sf_subagent_dispatch_allowed: { help: "Subagent dispatches allowed after inheritance check", labels: ["work_mode", "permission_profile"], }, // Mode transitions - "sf_mode_transition_total": { + sf_mode_transition_total: { help: "Total mode transitions", labels: ["axis", "from", "to", "reason"], }, // Task frontmatter - "sf_task_created_total": { + sf_task_created_total: { help: "Total tasks created with frontmatter", labels: ["risk_level", "mutation_scope"], }, - "sf_task_parallel_blocked": { + sf_task_parallel_blocked: { help: "Tasks blocked from parallel execution by frontmatter", labels: ["reason"], }, // Parallel intent - "sf_parallel_intent_declared": { + sf_parallel_intent_declared: { help: "Parallel worker intents declared", labels: ["milestone_id"], }, - "sf_parallel_intent_conflict": { + sf_parallel_intent_conflict: { help: "Parallel intent conflicts detected", labels: ["milestone_id"], }, // Remote steering - "sf_remote_steering_applied": { + sf_remote_steering_applied: { help: "Remote steering directives applied", labels: ["directive_type", "source"], }, - "sf_remote_steering_rejected": { + sf_remote_steering_rejected: { help: "Remote steering directives rejected (throttle/invalid)", labels: ["reason"], }, // Skill eval - "sf_skill_eval_runs_total": { + sf_skill_eval_runs_total: { help: "Total skill evaluation runs", labels: ["skill_name", "passed"], }, - "sf_skill_eval_duration_ms": { + sf_skill_eval_duration_ms: { help: "Skill evaluation duration in milliseconds", buckets: [100, 500, 1000, 5000, 10000, 30000], }, // Cost guard - "sf_cost_guard_blocked": { + sf_cost_guard_blocked: { help: "Units blocked by cost guard", labels: ["reason", "model_id"], }, - "sf_cost_guard_hourly_spend": { + sf_cost_guard_hourly_spend: { help: "Current hourly spend in USD", }, // Gate runner - "sf_gate_runs_total": { + sf_gate_runs_total: { help: "Total gate executions", labels: ["gate_id", "outcome"], }, - "sf_gate_latency_ms": { + sf_gate_latency_ms: { help: "Gate execution latency in milliseconds", buckets: [10, 50, 100, 250, 500, 1000, 2500, 5000], }, // Message bus - "sf_message_bus_messages_total": { + sf_message_bus_messages_total: { help: "Total messages in bus", labels: ["agent_id"], }, - "sf_message_bus_unread_total": { + sf_message_bus_unread_total: { help: "Unread messages in bus", labels: ["agent_id"], }, // Cost tracking - "sf_cost_total": { + sf_cost_total: { help: "Total cost in USD", labels: ["unit_id", "model_id", "work_mode"], }, - "sf_tokens_input_total": { + sf_tokens_input_total: { help: "Total input tokens", labels: ["model_id"], }, - "sf_tokens_output_total": { + sf_tokens_output_total: { help: "Total output tokens", labels: ["model_id"], }, - "sf_cost_last": { + sf_cost_last: { help: "Last recorded cost in USD", labels: ["unit_id", "model_id"], }, // Internal - "sf_metrics_flush_failed_total": { + sf_metrics_flush_failed_total: { help: "Total metrics flush failures", }, }; diff --git a/src/resources/extensions/sf/metrics.js b/src/resources/extensions/sf/metrics.js index ff6a91afa..953801770 100644 --- a/src/resources/extensions/sf/metrics.js +++ b/src/resources/extensions/sf/metrics.js @@ -258,7 +258,14 @@ export function snapshotUnitMetrics( // Fire-and-forget: don't block the snapshot on metrics-central import("./metrics-central.js") .then(({ recordCost }) => { - recordCost(unitId, model, tokens.input, tokens.output, cost, classifyUnitPhase(unitType)); + recordCost( + unitId, + model, + tokens.input, + tokens.output, + cost, + classifyUnitPhase(unitType), + ); }) .catch(() => { // metrics-central is optional; never block snapshot diff --git a/src/resources/extensions/sf/reasoning-assist.js b/src/resources/extensions/sf/reasoning-assist.js index c90ce6d89..f5a9e2be4 100644 --- a/src/resources/extensions/sf/reasoning-assist.js +++ b/src/resources/extensions/sf/reasoning-assist.js @@ -16,7 +16,16 @@ import { getAutoSession } from "./auto/session.js"; import { loadFile } from "./files.js"; -import { resolveMilestoneFile, resolveSliceFile, resolveSfRootFile } from "./paths.js"; +import { + formatMemoriesForPrompt, + getMemories, + MEMORY_TYPES, +} from "./memory-repository.js"; +import { + resolveMilestoneFile, + resolveSfRootFile, + resolveSliceFile, +} from "./paths.js"; import { logWarning } from "./workflow-logger.js"; const REASONING_ASSIST_ENABLED = process.env.SF_REASONING_ASSIST === "1"; @@ -31,12 +40,21 @@ const REASONING_ASSIST_MAX_CHARS = 2000; * @param {object} ctx — dispatch context * @returns {string|null} — reasoning prompt or null if disabled */ -export async function buildReasoningAssistPrompt(unitType, unitId, basePath, ctx) { +export async function buildReasoningAssistPrompt( + unitType, + unitId, + basePath, + ctx, +) { if (!REASONING_ASSIST_ENABLED) return null; const parts = []; - parts.push(`You are a senior engineering advisor. The team is about to run a "${unitType}" unit (${unitId}).`); - parts.push("Review the available context and write 3-5 sentences of strategic guidance:"); + parts.push( + `You are a senior engineering advisor. The team is about to run a "${unitType}" unit (${unitId}).`, + ); + parts.push( + "Review the available context and write 3-5 sentences of strategic guidance:", + ); parts.push("- What should the agent focus on?"); parts.push("- What common mistakes should it avoid?"); parts.push("- What tools should it use and in what order?"); @@ -45,7 +63,12 @@ export async function buildReasoningAssistPrompt(unitType, unitId, basePath, ctx parts.push(""); // Load relevant context files - const contextFiles = await loadRelevantContext(unitType, unitId, basePath); + const contextFiles = await loadRelevantContext( + unitType, + unitId, + basePath, + ctx, + ); for (const { label, content } of contextFiles) { if (content) { parts.push(`--- ${label} ---`); @@ -57,7 +80,7 @@ export async function buildReasoningAssistPrompt(unitType, unitId, basePath, ctx return parts.join("\n"); } -async function loadRelevantContext(unitType, unitId, basePath) { +async function loadRelevantContext(unitType, unitId, basePath, ctx) { const results = []; // Parse unit ID @@ -65,6 +88,64 @@ async function loadRelevantContext(unitType, unitId, basePath) { const milestoneId = segments[0]; const sliceId = segments[1]; + // Load structured memory (RA.Aid-style key facts/snippets/notes) + const sessionId = ctx?.sessionManager?.getSessionId?.(); + if (sessionId) { + const { getDatabase } = await import("./sf-db.js"); + const db = getDatabase(); + + // Key facts + const keyFacts = getMemories({ + sessionId, + type: MEMORY_TYPES.KEY_FACT, + limit: 20, + db, + }); + if (keyFacts.length > 0) { + const formatted = formatMemoriesForPrompt(keyFacts, { + header: "Key Facts:", + maxChars: 1500, + }); + if (formatted) results.push({ label: "Key Facts", content: formatted }); + } + + // Key snippets for implementation + if (unitType.includes("execute") || unitType.includes("implement")) { + const snippets = getMemories({ + sessionId, + type: MEMORY_TYPES.KEY_SNIPPET, + limit: 10, + db, + }); + if (snippets.length > 0) { + const formatted = formatMemoriesForPrompt(snippets, { + header: "Key Snippets:", + maxChars: 2000, + }); + if (formatted) + results.push({ label: "Key Snippets", content: formatted }); + } + } + + // Research notes for research/planning + if (unitType.includes("research") || unitType.includes("plan")) { + const notes = getMemories({ + sessionId, + type: MEMORY_TYPES.RESEARCH_NOTE, + limit: 10, + db, + }); + if (notes.length > 0) { + const formatted = formatMemoriesForPrompt(notes, { + header: "Research Notes:", + maxChars: 2000, + }); + if (formatted) + results.push({ label: "Research Notes", content: formatted }); + } + } + } + // Load decisions const decisionsPath = resolveSfRootFile(basePath, "DECISIONS"); if (decisionsPath) { @@ -84,16 +165,23 @@ async function loadRelevantContext(unitType, unitId, basePath) { const contextPath = resolveMilestoneFile(basePath, milestoneId, "CONTEXT"); if (contextPath) { const content = await loadFile(contextPath); - if (content) results.push({ label: `Milestone ${milestoneId} Context`, content }); + if (content) + results.push({ label: `Milestone ${milestoneId} Context`, content }); } } // Load slice research for planning/execution if (sliceId && (unitType.includes("plan") || unitType.includes("execute"))) { - const researchPath = resolveSliceFile(basePath, milestoneId, sliceId, "RESEARCH"); + const researchPath = resolveSliceFile( + basePath, + milestoneId, + sliceId, + "RESEARCH", + ); if (researchPath) { const content = await loadFile(researchPath); - if (content) results.push({ label: `Slice ${sliceId} Research`, content }); + if (content) + results.push({ label: `Slice ${sliceId} Research`, content }); } } diff --git a/src/resources/extensions/sf/sf-db.js b/src/resources/extensions/sf/sf-db.js index 0b646ff08..322c77634 100644 --- a/src/resources/extensions/sf/sf-db.js +++ b/src/resources/extensions/sf/sf-db.js @@ -87,12 +87,19 @@ function createAdapter(rawDb) { * Execute a database query with timeout protection. * Falls back to empty result if query exceeds timeout. */ -function withQueryTimeout(operation, fallbackValue, timeoutMs = DB_QUERY_TIMEOUT_MS) { +function withQueryTimeout( + operation, + fallbackValue, + timeoutMs = DB_QUERY_TIMEOUT_MS, +) { try { return operation(); } catch (err) { if (err?.message?.includes("timeout") || err?.message?.includes("busy")) { - logWarning("sf-db", `Query timed out after ${timeoutMs}ms, returning fallback`); + logWarning( + "sf-db", + `Query timed out after ${timeoutMs}ms, returning fallback`, + ); return fallbackValue; } throw err; diff --git a/src/resources/extensions/sf/skills/eval-harness.js b/src/resources/extensions/sf/skills/eval-harness.js index d55ade3ff..507ccef29 100644 --- a/src/resources/extensions/sf/skills/eval-harness.js +++ b/src/resources/extensions/sf/skills/eval-harness.js @@ -67,7 +67,8 @@ export async function runGrader(evalDir, _ctx) { grade(workDir), new Promise((_, reject) => setTimeout( - () => reject(new Error(`Grader timed out after ${GRADER_TIMEOUT_MS}ms`)), + () => + reject(new Error(`Grader timed out after ${GRADER_TIMEOUT_MS}ms`)), GRADER_TIMEOUT_MS, ), ), diff --git a/src/resources/extensions/sf/subagent-inheritance.js b/src/resources/extensions/sf/subagent-inheritance.js index b06028613..3f45565ff 100644 --- a/src/resources/extensions/sf/subagent-inheritance.js +++ b/src/resources/extensions/sf/subagent-inheritance.js @@ -8,6 +8,7 @@ */ import { getAutoSession } from "./auto/session.js"; +import { recordCounter } from "./metrics-central.js"; import { resolveModelMode, resolvePermissionProfile, @@ -15,7 +16,6 @@ import { resolveWorkMode, } from "./operating-model.js"; import { isProviderAllowedByLists } from "./preferences-models.js"; -import { recordCounter } from "./metrics-central.js"; import { logWarning } from "./workflow-logger.js"; function providerFromModelId(modelId) { @@ -98,7 +98,10 @@ export function validateSubagentDispatch(envelope, proposal) { envelope.blockedProviders, ) ) { - logWarning("subagent-inheritance", `Blocked provider "${provider}" for subagent dispatch`); + logWarning( + "subagent-inheritance", + `Blocked provider "${provider}" for subagent dispatch`, + ); recordCounter("sf_subagent_dispatch_blocked", { reason: "provider", work_mode: envelope.workMode, @@ -111,7 +114,10 @@ export function validateSubagentDispatch(envelope, proposal) { } if (envelope.modelMode === "fast" && isHeavyModelId(modelId)) { - logWarning("subagent-inheritance", `Blocked heavy model "${modelId}" in fast mode`); + logWarning( + "subagent-inheritance", + `Blocked heavy model "${modelId}" in fast mode`, + ); recordCounter("sf_subagent_dispatch_blocked", { reason: "model_mode", work_mode: envelope.workMode, @@ -125,12 +131,20 @@ export function validateSubagentDispatch(envelope, proposal) { if (envelope.permissionProfile === "restricted") { const proposedTools = proposal.tools ?? []; - const RESTRICTED_TOOLS = new Set(["write", "edit", "bash", "mac_launch_app"]); + const RESTRICTED_TOOLS = new Set([ + "write", + "edit", + "bash", + "mac_launch_app", + ]); const blocked = proposedTools.filter((toolName) => RESTRICTED_TOOLS.has(toolName.toLowerCase()), ); if (blocked.length > 0) { - logWarning("subagent-inheritance", `Blocked tools [${blocked.join(", ")}] in restricted mode`); + logWarning( + "subagent-inheritance", + `Blocked tools [${blocked.join(", ")}] in restricted mode`, + ); recordCounter("sf_subagent_dispatch_blocked", { reason: "permission_profile", work_mode: envelope.workMode, diff --git a/src/resources/extensions/sf/tests/memory-repository.test.mjs b/src/resources/extensions/sf/tests/memory-repository.test.mjs new file mode 100644 index 000000000..3a09e7894 --- /dev/null +++ b/src/resources/extensions/sf/tests/memory-repository.test.mjs @@ -0,0 +1,209 @@ +/** + * Memory Repository Tests + * + * Tests for structured memory storage (key facts, snippets, notes). + */ + +import { beforeEach, describe, expect, test } from "vitest"; +import { + ensureMemoryTable, + formatMemoriesForPrompt, + getMemories, + getMemoryCounts, + MEMORY_TYPES, + pruneMemories, + storeMemory, +} from "../memory-repository.js"; +import { closeDatabase, getDatabase, openDatabase } from "../sf-db.js"; + +describe("Memory Repository", () => { + let db; + let sessionId; + + beforeEach(() => { + closeDatabase(); + openDatabase(":memory:"); + db = getDatabase(); + sessionId = `test-session-${Date.now()}`; + ensureMemoryTable(db); + }); + + test("storeMemory_creates_key_fact", () => { + const result = storeMemory({ + sessionId, + type: MEMORY_TYPES.KEY_FACT, + content: "SF uses SQLite for canonical state", + db, + }); + expect(result).not.toBeNull(); + expect(result.created).toBe(true); + expect(result.deduped).toBe(false); + expect(result.id).toBeGreaterThan(0); + }); + + test("storeMemory_dedupes_same_content", () => { + const r1 = storeMemory({ + sessionId, + type: MEMORY_TYPES.KEY_FACT, + content: "Same content", + db, + }); + const r2 = storeMemory({ + sessionId, + type: MEMORY_TYPES.KEY_FACT, + content: "Same content", + db, + }); + expect(r1.id).toBe(r2.id); + expect(r2.deduped).toBe(true); + }); + + test("storeMemory_with_metadata", () => { + const result = storeMemory({ + sessionId, + type: MEMORY_TYPES.KEY_SNIPPET, + content: "const x = 1;", + metadata: { + filepath: "src/foo.js", + line_number: 42, + description: "Init", + }, + db, + }); + expect(result.created).toBe(true); + const memories = getMemories({ sessionId, db }); + expect(memories[0].metadata).toEqual({ + filepath: "src/foo.js", + line_number: 42, + description: "Init", + }); + }); + + test("getMemories_filters_by_type", () => { + storeMemory({ + sessionId, + type: MEMORY_TYPES.KEY_FACT, + content: "Fact 1", + db, + }); + storeMemory({ + sessionId, + type: MEMORY_TYPES.RESEARCH_NOTE, + content: "Note 1", + db, + }); + storeMemory({ + sessionId, + type: MEMORY_TYPES.KEY_FACT, + content: "Fact 2", + db, + }); + + const facts = getMemories({ sessionId, type: MEMORY_TYPES.KEY_FACT, db }); + expect(facts.length).toBe(2); + expect(facts.every((m) => m.type === "key_fact")).toBe(true); + }); + + test("getMemories_searches_by_query", () => { + storeMemory({ + sessionId, + type: MEMORY_TYPES.KEY_FACT, + content: "SQLite is the canonical store", + db, + }); + storeMemory({ + sessionId, + type: MEMORY_TYPES.KEY_FACT, + content: "Node 26 is the target runtime", + db, + }); + + const results = getMemories({ sessionId, query: "SQLite", db }); + expect(results.length).toBe(1); + expect(results[0].content).toContain("SQLite"); + }); + + test("getMemoryCounts_returns_counts_by_type", () => { + storeMemory({ sessionId, type: MEMORY_TYPES.KEY_FACT, content: "F1", db }); + storeMemory({ sessionId, type: MEMORY_TYPES.KEY_FACT, content: "F2", db }); + storeMemory({ + sessionId, + type: MEMORY_TYPES.RESEARCH_NOTE, + content: "N1", + db, + }); + + const counts = getMemoryCounts(sessionId, db); + expect(counts.key_fact).toBe(2); + expect(counts.research_note).toBe(1); + }); + + test("formatMemoriesForPrompt_formats_with_header", () => { + const memories = [ + { type: "key_fact", content: "Fact A" }, + { type: "key_fact", content: "Fact B" }, + ]; + const formatted = formatMemoriesForPrompt(memories, { + header: "Key Facts:", + }); + expect(formatted).toContain("Key Facts:"); + expect(formatted).toContain("[key_fact] Fact A"); + expect(formatted).toContain("[key_fact] Fact B"); + }); + + test("formatMemoriesForPrompt_respects_maxChars", () => { + const memories = Array.from({ length: 10 }, (_, i) => ({ + type: "key_fact", + content: `This is a very long fact number ${i} with lots of content to test truncation`, + })); + const formatted = formatMemoriesForPrompt(memories, { maxChars: 100 }); + expect(formatted.length).toBeLessThanOrEqual(150); // header + some content + "..." + expect(formatted).toContain("..."); + }); + + test("pruneMemories_deletes_old_entries", () => { + storeMemory({ + sessionId, + type: MEMORY_TYPES.KEY_FACT, + content: "Old fact", + db, + }); + // Manually set created_at to past + db.prepare( + `UPDATE sf_memory SET created_at = '2020-01-01' WHERE session_id = ?`, + ).run(sessionId); + + storeMemory({ + sessionId, + type: MEMORY_TYPES.KEY_FACT, + content: "New fact", + db, + }); + + const deleted = pruneMemories(sessionId, "2025-01-01", db); + expect(deleted).toBe(1); + + const remaining = getMemories({ sessionId, db }); + expect(remaining.length).toBe(1); + expect(remaining[0].content).toBe("New fact"); + }); + + test("storeMemory_rejects_invalid_type", () => { + const result = storeMemory({ + sessionId, + type: "invalid_type", + content: "Should fail", + db, + }); + expect(result).toBeNull(); + }); + + test("storeMemory_requires_sessionId", () => { + const result = storeMemory({ + type: MEMORY_TYPES.KEY_FACT, + content: "Should fail", + db, + }); + expect(result).toBeNull(); + }); +}); diff --git a/src/resources/extensions/sf/tests/metrics-central.test.mjs b/src/resources/extensions/sf/tests/metrics-central.test.mjs index dbbac4bf9..0a9b2aaa1 100644 --- a/src/resources/extensions/sf/tests/metrics-central.test.mjs +++ b/src/resources/extensions/sf/tests/metrics-central.test.mjs @@ -1,96 +1,122 @@ -import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; import { - initMetricsCentral, - stopMetricsCentral, - recordCounter, - recordGauge, - recordHistogram, - getMetricsText, - registerMetricMeta, - recordCost, - queryMetrics, + getMetricsText, + initMetricsCentral, + queryMetrics, + recordCost, + recordCounter, + recordGauge, + recordHistogram, + registerMetricMeta, + stopMetricsCentral, } from "../metrics-central.js"; describe("metrics-central", () => { - beforeEach(() => { - initMetricsCentral("/tmp/test-project"); - }); + beforeEach(() => { + initMetricsCentral("/tmp/test-project"); + }); - afterEach(() => { - stopMetricsCentral(); - }); + afterEach(() => { + stopMetricsCentral(); + }); - it("recordCounter_increments_and_exposes", () => { - recordCounter("sf_test_counter", { label: "a" }, 3); - recordCounter("sf_test_counter", { label: "a" }, 2); - const text = getMetricsText(); - expect(text).toContain('sf_test_counter{label="a"} 5'); - expect(text).toContain("# TYPE sf_test_counter counter"); - }); + it("recordCounter_increments_and_exposes", () => { + recordCounter("sf_test_counter", { label: "a" }, 3); + recordCounter("sf_test_counter", { label: "a" }, 2); + const text = getMetricsText(); + expect(text).toContain('sf_test_counter{label="a"} 5'); + expect(text).toContain("# TYPE sf_test_counter counter"); + }); - it("recordGauge_sets_and_exposes", () => { - recordGauge("sf_test_gauge", 42, { env: "prod" }); - const text = getMetricsText(); - expect(text).toContain('sf_test_gauge{env="prod"} 42'); - expect(text).toContain("# TYPE sf_test_gauge gauge"); - }); + it("recordGauge_sets_and_exposes", () => { + recordGauge("sf_test_gauge", 42, { env: "prod" }); + const text = getMetricsText(); + expect(text).toContain('sf_test_gauge{env="prod"} 42'); + expect(text).toContain("# TYPE sf_test_gauge gauge"); + }); - it("recordHistogram_observes_and_exposes_buckets", () => { - registerMetricMeta("sf_test_hist", "Test histogram", [], [1, 5, 10]); - recordHistogram("sf_test_hist", 3); - recordHistogram("sf_test_hist", 7); - const text = getMetricsText(); - expect(text).toContain('sf_test_hist_bucket{le="1"} 0'); - expect(text).toContain('sf_test_hist_bucket{le="5"} 1'); - expect(text).toContain('sf_test_hist_bucket{le="10"} 2'); - expect(text).toContain("sf_test_hist_count 2"); - expect(text).toContain("sf_test_hist_sum 10"); - }); + it("recordHistogram_observes_and_exposes_buckets", () => { + registerMetricMeta("sf_test_hist", "Test histogram", [], [1, 5, 10]); + recordHistogram("sf_test_hist", 3); + recordHistogram("sf_test_hist", 7); + const text = getMetricsText(); + expect(text).toContain('sf_test_hist_bucket{le="1"} 0'); + expect(text).toContain('sf_test_hist_bucket{le="5"} 1'); + expect(text).toContain('sf_test_hist_bucket{le="10"} 2'); + expect(text).toContain("sf_test_hist_count 2"); + expect(text).toContain("sf_test_hist_sum 10"); + }); - it("subagent_metrics_tracked", () => { - recordCounter("sf_subagent_dispatch_total", { work_mode: "build", permission_profile: "trusted" }); - recordCounter("sf_subagent_dispatch_blocked", { reason: "provider", work_mode: "build", permission_profile: "trusted" }); - const text = getMetricsText(); - expect(text).toContain('sf_subagent_dispatch_total{permission_profile="trusted",work_mode="build"} 1'); - expect(text).toContain('sf_subagent_dispatch_blocked{permission_profile="trusted",reason="provider",work_mode="build"} 1'); - }); + it("subagent_metrics_tracked", () => { + recordCounter("sf_subagent_dispatch_total", { + work_mode: "build", + permission_profile: "trusted", + }); + recordCounter("sf_subagent_dispatch_blocked", { + reason: "provider", + work_mode: "build", + permission_profile: "trusted", + }); + const text = getMetricsText(); + expect(text).toContain( + 'sf_subagent_dispatch_total{permission_profile="trusted",work_mode="build"} 1', + ); + expect(text).toContain( + 'sf_subagent_dispatch_blocked{permission_profile="trusted",reason="provider",work_mode="build"} 1', + ); + }); - it("mode_transition_metrics_tracked", () => { - recordCounter("sf_mode_transition_total", { axis: "work_mode", from: "chat", to: "build", reason: "user_command" }); - const text = getMetricsText(); - expect(text).toContain('sf_mode_transition_total{axis="work_mode",from="chat",reason="user_command",to="build"} 1'); - }); + it("mode_transition_metrics_tracked", () => { + recordCounter("sf_mode_transition_total", { + axis: "work_mode", + from: "chat", + to: "build", + reason: "user_command", + }); + const text = getMetricsText(); + expect(text).toContain( + 'sf_mode_transition_total{axis="work_mode",from="chat",reason="user_command",to="build"} 1', + ); + }); - it("session_id_auto_injected", () => { - initMetricsCentral("/tmp/test-project", { sessionId: "sess-abc-123" }); - recordCounter("sf_test_session", { label: "x" }); - const text = getMetricsText(); - expect(text).toContain('session_id="sess-abc-123"'); - }); + it("session_id_auto_injected", () => { + initMetricsCentral("/tmp/test-project", { sessionId: "sess-abc-123" }); + recordCounter("sf_test_session", { label: "x" }); + const text = getMetricsText(); + expect(text).toContain('session_id="sess-abc-123"'); + }); - it("cost_metrics_tracked", () => { - recordCost("unit-42", "claude-sonnet-4", 1500, 800, 0.045, "build"); - const text = getMetricsText(); - expect(text).toContain('sf_cost_total{model_id="claude-sonnet-4",unit_id="unit-42",work_mode="build"} 0.045'); - expect(text).toContain('sf_tokens_input_total{model_id="claude-sonnet-4"} 1500'); - expect(text).toContain('sf_tokens_output_total{model_id="claude-sonnet-4"} 800'); - expect(text).toContain('sf_cost_last{model_id="claude-sonnet-4",unit_id="unit-42"} 0.045'); - }); + it("cost_metrics_tracked", () => { + recordCost("unit-42", "claude-sonnet-4", 1500, 800, 0.045, "build"); + const text = getMetricsText(); + expect(text).toContain( + 'sf_cost_total{model_id="claude-sonnet-4",unit_id="unit-42",work_mode="build"} 0.045', + ); + expect(text).toContain( + 'sf_tokens_input_total{model_id="claude-sonnet-4"} 1500', + ); + expect(text).toContain( + 'sf_tokens_output_total{model_id="claude-sonnet-4"} 800', + ); + expect(text).toContain( + 'sf_cost_last{model_id="claude-sonnet-4",unit_id="unit-42"} 0.045', + ); + }); - it("invalid_metric_name_rejected", () => { - expect(() => recordCounter("bad name with spaces", {})).toThrow(); - expect(() => recordCounter("123_starts_with_number", {})).toThrow(); - expect(() => recordCounter("", {})).toThrow(); - }); + it("invalid_metric_name_rejected", () => { + expect(() => recordCounter("bad name with spaces", {})).toThrow(); + expect(() => recordCounter("123_starts_with_number", {})).toThrow(); + expect(() => recordCounter("", {})).toThrow(); + }); - it("label_escaping_handles_special_chars", () => { - recordCounter("sf_test_escape", { key: "a=b,c" }); - const text = getMetricsText(); - expect(text).toContain('key="a=b,c"'); - }); + it("label_escaping_handles_special_chars", () => { + recordCounter("sf_test_escape", { key: "a=b,c" }); + const text = getMetricsText(); + expect(text).toContain('key="a=b,c"'); + }); - it("queryMetrics_returns_empty_without_db", () => { - const results = queryMetrics(null, "sess-1", "sf_test"); - expect(results).toEqual([]); - }); + it("queryMetrics_returns_empty_without_db", () => { + const results = queryMetrics(null, "sess-1", "sf_test"); + expect(results).toEqual([]); + }); }); diff --git a/src/resources/extensions/sf/tests/trajectory-recorder.test.mjs b/src/resources/extensions/sf/tests/trajectory-recorder.test.mjs new file mode 100644 index 000000000..918e8b41b --- /dev/null +++ b/src/resources/extensions/sf/tests/trajectory-recorder.test.mjs @@ -0,0 +1,210 @@ +/** + * Trajectory Recorder Tests + */ + +import { beforeEach, describe, expect, test } from "vitest"; +import { closeDatabase, getDatabase, openDatabase } from "../sf-db.js"; +import { + clearTrajectory, + ensureTrajectoryTable, + formatTrajectory, + getTrajectory, + getTrajectorySummary, + recordTrajectoryStep, + STEP_TYPES, +} from "../trajectory-recorder.js"; + +describe("Trajectory Recorder", () => { + let db; + let sessionId; + + beforeEach(() => { + closeDatabase(); + openDatabase(":memory:"); + db = getDatabase(); + sessionId = `test-session-${Date.now()}`; + ensureTrajectoryTable(db); + }); + + test("recordTrajectoryStep_creates_tool_call", () => { + const result = recordTrajectoryStep({ + sessionId, + stepType: STEP_TYPES.TOOL_CALL, + toolName: "readFile", + toolParams: { path: "src/foo.js" }, + db, + }); + expect(result).not.toBeNull(); + expect(result.id).toBeGreaterThan(0); + expect(result.stepNumber).toBe(1); + }); + + test("recordTrajectoryStep_increments_step_number", () => { + const r1 = recordTrajectoryStep({ + sessionId, + stepType: STEP_TYPES.TOOL_CALL, + toolName: "readFile", + db, + }); + const r2 = recordTrajectoryStep({ + sessionId, + stepType: STEP_TYPES.TOOL_RESULT, + toolName: "readFile", + toolResult: "content", + db, + }); + expect(r1.stepNumber).toBe(1); + expect(r2.stepNumber).toBe(2); + }); + + test("recordTrajectoryStep_records_error", () => { + const result = recordTrajectoryStep({ + sessionId, + stepType: STEP_TYPES.ERROR, + isError: true, + errorMessage: "File not found", + errorType: "FileNotFoundError", + errorDetails: "Stack trace here", + db, + }); + expect(result).not.toBeNull(); + + const steps = getTrajectory({ sessionId, db }); + expect(steps[0].isError).toBe(true); + expect(steps[0].errorMessage).toBe("File not found"); + expect(steps[0].errorType).toBe("FileNotFoundError"); + }); + + test("recordTrajectoryStep_records_cost_and_tokens", () => { + recordTrajectoryStep({ + sessionId, + stepType: STEP_TYPES.LLM_CALL, + llmModel: "claude-3-opus", + inputTokens: 1000, + outputTokens: 500, + stepCost: 0.015, + db, + }); + + const steps = getTrajectory({ sessionId, db }); + expect(steps[0].inputTokens).toBe(1000); + expect(steps[0].outputTokens).toBe(500); + expect(steps[0].stepCost).toBe(0.015); + }); + + test("getTrajectory_filters_by_type", () => { + recordTrajectoryStep({ + sessionId, + stepType: STEP_TYPES.TOOL_CALL, + toolName: "a", + db, + }); + recordTrajectoryStep({ + sessionId, + stepType: STEP_TYPES.LLM_CALL, + llmModel: "x", + db, + }); + recordTrajectoryStep({ + sessionId, + stepType: STEP_TYPES.TOOL_CALL, + toolName: "b", + db, + }); + + const toolSteps = getTrajectory({ + sessionId, + stepType: STEP_TYPES.TOOL_CALL, + db, + }); + expect(toolSteps.length).toBe(2); + expect(toolSteps.every((s) => s.stepType === "tool_call")).toBe(true); + }); + + test("getTrajectorySummary_returns_stats", () => { + recordTrajectoryStep({ + sessionId, + stepType: STEP_TYPES.TOOL_CALL, + toolName: "a", + inputTokens: 100, + outputTokens: 50, + stepCost: 0.01, + db, + }); + recordTrajectoryStep({ + sessionId, + stepType: STEP_TYPES.TOOL_CALL, + toolName: "b", + inputTokens: 200, + outputTokens: 100, + stepCost: 0.02, + db, + }); + recordTrajectoryStep({ + sessionId, + stepType: STEP_TYPES.ERROR, + isError: true, + db, + }); + + const summary = getTrajectorySummary(sessionId, null, db); + expect(summary.totalSteps).toBe(3); + expect(summary.errorCount).toBe(1); + expect(summary.totalInputTokens).toBe(300); + expect(summary.totalOutputTokens).toBe(150); + expect(summary.totalCost).toBeCloseTo(0.03, 4); + expect(summary.uniqueTools).toBe(2); + }); + + test("formatTrajectory_formats_steps", () => { + recordTrajectoryStep({ + sessionId, + stepType: STEP_TYPES.TOOL_CALL, + toolName: "readFile", + inputTokens: 100, + stepCost: 0.01, + db, + }); + recordTrajectoryStep({ + sessionId, + stepType: STEP_TYPES.LLM_CALL, + llmModel: "claude", + inputTokens: 1000, + outputTokens: 500, + stepCost: 0.015, + db, + }); + + const steps = getTrajectory({ sessionId, db }); + const formatted = formatTrajectory(steps); + expect(formatted).toContain("Trajectory (2 steps total"); + expect(formatted).toContain("Tool: readFile"); + expect(formatted).toContain("LLM: claude"); + }); + + test("clearTrajectory_deletes_steps", () => { + recordTrajectoryStep({ sessionId, stepType: STEP_TYPES.TOOL_CALL, db }); + const deleted = clearTrajectory(sessionId, db); + expect(deleted).toBe(1); + + const steps = getTrajectory({ sessionId, db }); + expect(steps.length).toBe(0); + }); + + test("recordTrajectoryStep_rejects_invalid_type", () => { + const result = recordTrajectoryStep({ + sessionId, + stepType: "invalid_type", + db, + }); + expect(result).toBeNull(); + }); + + test("recordTrajectoryStep_requires_sessionId", () => { + const result = recordTrajectoryStep({ + stepType: STEP_TYPES.TOOL_CALL, + db, + }); + expect(result).toBeNull(); + }); +}); diff --git a/src/resources/extensions/sf/trajectory-command.js b/src/resources/extensions/sf/trajectory-command.js new file mode 100644 index 000000000..836edbae6 --- /dev/null +++ b/src/resources/extensions/sf/trajectory-command.js @@ -0,0 +1,83 @@ +/** + * Trajectory Command — /trajectory handler for SF. + * + * Purpose: Display step-by-step execution trace with costs, errors, + * and tool usage for the current session or a specific unit. + * + * Consumer: ops.js command dispatcher. + */ + +import { getDatabase, isDbAvailable } from "./sf-db.js"; +import { + formatTrajectory, + getTrajectory, + getTrajectorySummary, +} from "./trajectory-recorder.js"; + +/** + * Handle the /trajectory command. + * + * @param {string} args — command arguments + * @param {object} ctx — command context + * @param {string} basePath — project root + */ +export async function handleTrajectory(args, ctx, basePath) { + if (!isDbAvailable()) { + ctx.ui.notify( + "Trajectory recording requires a database connection.", + "warning", + ); + return; + } + + const db = getDatabase(); + const sessionId = ctx.sessionManager?.getSessionId?.() || "default"; + + // Parse flags + const flags = args.split(/\s+/).filter(Boolean); + const showAll = flags.includes("--all"); + const showErrors = flags.includes("--errors"); + const showTools = flags.includes("--tools"); + const showLLM = flags.includes("--llm"); + const limit = flags.find((f) => f.startsWith("--limit=")) + ? parseInt(flags.find((f) => f.startsWith("--limit=")).split("=")[1], 10) + : 50; + + // Get trajectory steps + const filter = {}; + if (showErrors) filter.stepType = "error"; + else if (showTools) filter.stepType = "tool_call"; + else if (showLLM) filter.stepType = "llm_call"; + + const steps = getTrajectory({ + sessionId: showAll ? undefined : sessionId, + ...filter, + limit, + db, + }); + + if (steps.length === 0) { + ctx.ui.notify("No trajectory steps recorded yet.", "info"); + return; + } + + // Get summary + const summary = getTrajectorySummary(sessionId, null, db); + + // Build output + const lines = []; + lines.push(`Trajectory for session ${sessionId.slice(0, 8)}`); + + if (summary) { + lines.push( + `Steps: ${summary.totalSteps} | Errors: ${summary.errorCount} | ` + + `Tokens: ${summary.totalInputTokens + summary.totalOutputTokens} | ` + + `Cost: $${summary.totalCost.toFixed(4)} | Tools: ${summary.uniqueTools}`, + ); + } + + lines.push(""); + lines.push(formatTrajectory(steps, { maxSteps: limit })); + + ctx.ui.notify(lines.join("\n"), "info"); +} diff --git a/src/resources/extensions/sf/trajectory-recorder.js b/src/resources/extensions/sf/trajectory-recorder.js new file mode 100644 index 000000000..fefb8e647 --- /dev/null +++ b/src/resources/extensions/sf/trajectory-recorder.js @@ -0,0 +1,399 @@ +/** + * Trajectory Recorder — Step-by-step execution trace for SF units. + * + * Purpose: Provide RA.Aid-style trajectory recording with per-step + * tool execution details, costs, tokens, errors, and results. + * + * Consumer: research units, planning units, execute-task units, /trajectory command. + * + * Design: + * - SQLite-backed with JSONB-like flexibility for tool params/results + * - Session + unit scoped + * - Cost and token tracking per step + * - Error recording with full context + * - Exportable for analysis and debugging + */ + +import { debugLog } from "./debug-logger.js"; +import { isDbAvailable } from "./sf-db.js"; +import { logWarning } from "./workflow-logger.js"; + +const TRAJECTORY_TABLE = "sf_trajectory"; + +/** + * Trajectory step types. + */ +export const STEP_TYPES = { + TOOL_CALL: "tool_call", + TOOL_RESULT: "tool_result", + LLM_CALL: "llm_call", + LLM_RESPONSE: "llm_response", + USER_INPUT: "user_input", + STAGE_TRANSITION: "stage_transition", + ERROR: "error", + CHECKPOINT: "checkpoint", +}; + +/** + * Ensure the trajectory table exists. + */ +export function ensureTrajectoryTable(db) { + if (!db) return false; + try { + db.exec(` + CREATE TABLE IF NOT EXISTS ${TRAJECTORY_TABLE} ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + unit_id TEXT, + step_type TEXT NOT NULL CHECK(step_type IN ('tool_call', 'tool_result', 'llm_call', 'llm_response', 'user_input', 'stage_transition', 'error', 'checkpoint')), + step_number INTEGER NOT NULL, + tool_name TEXT, + tool_params TEXT, -- JSON + tool_result TEXT, -- JSON or text + llm_model TEXT, + llm_prompt TEXT, + llm_response TEXT, + input_tokens INTEGER, + output_tokens INTEGER, + step_cost REAL, + is_error INTEGER NOT NULL DEFAULT 0, + error_message TEXT, + error_type TEXT, + error_details TEXT, + metadata TEXT, -- JSON: {duration_ms, retry_count, etc} + created_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + CREATE INDEX IF NOT EXISTS idx_trajectory_session ON ${TRAJECTORY_TABLE}(session_id); + CREATE INDEX IF NOT EXISTS idx_trajectory_unit ON ${TRAJECTORY_TABLE}(unit_id); + CREATE INDEX IF NOT EXISTS idx_trajectory_type ON ${TRAJECTORY_TABLE}(step_type); + CREATE INDEX IF NOT EXISTS idx_trajectory_created ON ${TRAJECTORY_TABLE}(created_at DESC); + `); + return true; + } catch (err) { + logWarning("trajectory", "Failed to ensure trajectory table", { + error: String(err), + }); + return false; + } +} + +const stepCounter = new Map(); // sessionId -> current step number + +function getNextStepNumber(sessionId) { + const current = stepCounter.get(sessionId) || 0; + const next = current + 1; + stepCounter.set(sessionId, next); + return next; +} + +function resetStepCounter(sessionId) { + stepCounter.set(sessionId, 0); +} + +/** + * Record a trajectory step. + * + * @param {object} params + * @param {string} params.sessionId — required + * @param {string} [params.unitId] — optional unit tag + * @param {string} params.stepType — STEP_TYPES value + * @param {string} [params.toolName] — tool name for tool_call/tool_result + * @param {object} [params.toolParams] — tool parameters + * @param {any} [params.toolResult] — tool result + * @param {string} [params.llmModel] — model identifier + * @param {string} [params.llmPrompt] — prompt text + * @param {string} [params.llmResponse] — response text + * @param {number} [params.inputTokens] — input token count + * @param {number} [params.outputTokens] — output token count + * @param {number} [params.stepCost] — cost in USD + * @param {boolean} [params.isError=false] — whether this step errored + * @param {string} [params.errorMessage] — error message + * @param {string} [params.errorType] — error type/class + * @param {string} [params.errorDetails] — detailed error info + * @param {object} [params.metadata] — additional metadata + * @param {object} [params.db] — database connection + * @returns {{id: number}|null} + */ +export function recordTrajectoryStep({ + sessionId, + unitId, + stepType, + toolName, + toolParams, + toolResult, + llmModel, + llmPrompt, + llmResponse, + inputTokens, + outputTokens, + stepCost, + isError = false, + errorMessage, + errorType, + errorDetails, + metadata, + db, +}) { + if (!sessionId || !stepType) { + logWarning("trajectory", "recordTrajectoryStep missing required fields", { + sessionId, + stepType, + }); + return null; + } + if (!Object.values(STEP_TYPES).includes(stepType)) { + logWarning("trajectory", "Invalid step type", { stepType }); + return null; + } + + const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null); + if (!dbConn) return null; + + ensureTrajectoryTable(dbConn); + + const stepNumber = getNextStepNumber(sessionId); + + try { + const result = dbConn + .prepare( + `INSERT INTO ${TRAJECTORY_TABLE} ( + session_id, unit_id, step_type, step_number, tool_name, + tool_params, tool_result, llm_model, llm_prompt, llm_response, + input_tokens, output_tokens, step_cost, is_error, + error_message, error_type, error_details, metadata + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + ) + .run( + sessionId, + unitId || null, + stepType, + stepNumber, + toolName || null, + toolParams ? JSON.stringify(toolParams) : null, + toolResult !== undefined + ? typeof toolResult === "string" + ? toolResult + : JSON.stringify(toolResult) + : null, + llmModel || null, + llmPrompt || null, + llmResponse || null, + inputTokens || null, + outputTokens || null, + stepCost || null, + isError ? 1 : 0, + errorMessage || null, + errorType || null, + errorDetails || null, + metadata ? JSON.stringify(metadata) : null, + ); + + return { id: Number(result.lastInsertRowid), stepNumber }; + } catch (err) { + logWarning("trajectory", "recordTrajectoryStep failed", { + error: String(err), + sessionId, + stepType, + }); + return null; + } +} + +/** + * Get trajectory steps with optional filtering. + */ +export function getTrajectory({ + sessionId, + unitId, + stepType, + limit = 100, + db, +}) { + const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null); + if (!dbConn) return []; + + ensureTrajectoryTable(dbConn); + + try { + const conditions = ["session_id = ?"]; + const params = [sessionId]; + + if (unitId) { + conditions.push("unit_id = ?"); + params.push(unitId); + } + if (stepType) { + conditions.push("step_type = ?"); + params.push(stepType); + } + + const whereClause = conditions.join(" AND "); + params.push(limit); + + const rows = dbConn + .prepare( + `SELECT * FROM ${TRAJECTORY_TABLE} + WHERE ${whereClause} + ORDER BY step_number ASC + LIMIT ?`, + ) + .all(...params); + + return rows.map((r) => ({ + id: r.id, + sessionId: r.session_id, + unitId: r.unit_id, + stepType: r.step_type, + stepNumber: r.step_number, + toolName: r.tool_name, + toolParams: r.tool_params ? JSON.parse(r.tool_params) : null, + toolResult: r.tool_result ? tryParseJson(r.tool_result) : null, + llmModel: r.llm_model, + llmPrompt: r.llm_prompt, + llmResponse: r.llm_response, + inputTokens: r.input_tokens, + outputTokens: r.output_tokens, + stepCost: r.step_cost, + isError: !!r.is_error, + errorMessage: r.error_message, + errorType: r.error_type, + errorDetails: r.error_details, + metadata: r.metadata ? JSON.parse(r.metadata) : null, + createdAt: r.created_at, + })); + } catch (err) { + logWarning("trajectory", "getTrajectory failed", { + error: String(err), + sessionId, + }); + return []; + } +} + +function tryParseJson(str) { + try { + return JSON.parse(str); + } catch { + return str; + } +} + +/** + * Get trajectory summary for a session/unit. + */ +export function getTrajectorySummary(sessionId, unitId, db) { + const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null); + if (!dbConn) return null; + + ensureTrajectoryTable(dbConn); + + try { + const conditions = ["session_id = ?"]; + const params = [sessionId]; + + if (unitId) { + conditions.push("unit_id = ?"); + params.push(unitId); + } + + const whereClause = conditions.join(" AND "); + + const stats = dbConn + .prepare( + `SELECT + COUNT(*) as total_steps, + SUM(CASE WHEN is_error = 1 THEN 1 ELSE 0 END) as error_count, + SUM(input_tokens) as total_input_tokens, + SUM(output_tokens) as total_output_tokens, + SUM(step_cost) as total_cost, + COUNT(DISTINCT tool_name) as unique_tools, + MAX(step_number) as max_step + FROM ${TRAJECTORY_TABLE} + WHERE ${whereClause}`, + ) + .get(...params); + + return { + totalSteps: stats.total_steps, + errorCount: stats.error_count, + totalInputTokens: stats.total_input_tokens || 0, + totalOutputTokens: stats.total_output_tokens || 0, + totalCost: stats.total_cost || 0, + uniqueTools: stats.unique_tools, + maxStep: stats.max_step, + }; + } catch (err) { + logWarning("trajectory", "getTrajectorySummary failed", { + error: String(err), + }); + return null; + } +} + +/** + * Format trajectory for display/export. + */ +export function formatTrajectory(steps, options = {}) { + if (!steps || steps.length === 0) return "No trajectory steps recorded."; + + const { maxSteps = 50, includeDetails = true } = options; + const displaySteps = steps.slice(-maxSteps); + + const lines = [ + `Trajectory (${steps.length} steps total, showing last ${displaySteps.length}):`, + ]; + + for (const step of displaySteps) { + const prefix = step.isError ? "❌" : "✓"; + const cost = step.stepCost ? ` ($${step.stepCost.toFixed(4)})` : ""; + const tokens = step.inputTokens + ? ` [${step.inputTokens}/${step.outputTokens} tokens]` + : ""; + + if (step.stepType === "tool_call") { + lines.push( + `${prefix} [${step.stepNumber}] Tool: ${step.toolName}${cost}${tokens}`, + ); + if (includeDetails && step.toolParams) { + const params = JSON.stringify(step.toolParams).slice(0, 200); + lines.push(` Params: ${params}${params.length >= 200 ? "..." : ""}`); + } + } else if (step.stepType === "llm_call") { + lines.push( + `${prefix} [${step.stepNumber}] LLM: ${step.llmModel}${cost}${tokens}`, + ); + } else if (step.stepType === "error") { + lines.push(`${prefix} [${step.stepNumber}] Error: ${step.errorMessage}`); + if (includeDetails && step.errorDetails) { + lines.push(` Details: ${step.errorDetails.slice(0, 300)}`); + } + } else { + lines.push( + `${prefix} [${step.stepNumber}] ${step.stepType}${cost}${tokens}`, + ); + } + } + + return lines.join("\n"); +} + +/** + * Clear trajectory for a session. + */ +export function clearTrajectory(sessionId, db) { + const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null); + if (!dbConn) return 0; + + ensureTrajectoryTable(dbConn); + + try { + const result = dbConn + .prepare(`DELETE FROM ${TRAJECTORY_TABLE} WHERE session_id = ?`) + .run(sessionId); + resetStepCounter(sessionId); + return result.changes; + } catch (err) { + logWarning("trajectory", "clearTrajectory failed", { error: String(err) }); + return 0; + } +} diff --git a/src/resources/extensions/sf/uok/gate-runner.js b/src/resources/extensions/sf/uok/gate-runner.js index d25e2998c..9385eac31 100644 --- a/src/resources/extensions/sf/uok/gate-runner.js +++ b/src/resources/extensions/sf/uok/gate-runner.js @@ -110,7 +110,10 @@ export async function enrichGateResultWithMemory(gateResult, gateId) { } } catch (err) { // Degrade gracefully - memory enrichment never changes gate result - logWarning("gate-runner", `Memory enrichment failed for gate ${gateId}: ${err instanceof Error ? err.message : String(err)}`); + logWarning( + "gate-runner", + `Memory enrichment failed for gate ${gateId}: ${err instanceof Error ? err.message : String(err)}`, + ); } return gateResult; diff --git a/src/resources/extensions/sf/uok/loop-adapter.js b/src/resources/extensions/sf/uok/loop-adapter.js index 774380a00..23fc807b1 100644 --- a/src/resources/extensions/sf/uok/loop-adapter.js +++ b/src/resources/extensions/sf/uok/loop-adapter.js @@ -88,7 +88,9 @@ export function createTurnObserver(options) { permissionProfile: current.permissionProfile, }), }).catch((err) => { - console.error(`[loop-adapter] Git transaction failed: ${err.message}`); + console.error( + `[loop-adapter] Git transaction failed: ${err.message}`, + ); }); } if (options.enableAudit) { @@ -133,7 +135,9 @@ export function createTurnObserver(options) { status: "ok", metadata: nextSequenceMetadata("gitops", "update", { action }), }).catch((err) => { - console.error(`[loop-adapter] Git transaction failed: ${err.message}`); + console.error( + `[loop-adapter] Git transaction failed: ${err.message}`, + ); }); } if (phase === "unit") { @@ -149,7 +153,9 @@ export function createTurnObserver(options) { status: "ok", metadata: nextSequenceMetadata("gitops", "update", { action }), }).catch((err) => { - console.error(`[loop-adapter] Git transaction failed: ${err.message}`); + console.error( + `[loop-adapter] Git transaction failed: ${err.message}`, + ); }); } if (phase === "finalize") { @@ -165,7 +171,9 @@ export function createTurnObserver(options) { status: "ok", metadata: nextSequenceMetadata("gitops", "update", { action }), }).catch((err) => { - console.error(`[loop-adapter] Git transaction failed: ${err.message}`); + console.error( + `[loop-adapter] Git transaction failed: ${err.message}`, + ); }); } }, diff --git a/src/resources/extensions/sf/uok/parity-report.js b/src/resources/extensions/sf/uok/parity-report.js index 191f8d6ec..a8ea38685 100644 --- a/src/resources/extensions/sf/uok/parity-report.js +++ b/src/resources/extensions/sf/uok/parity-report.js @@ -94,7 +94,9 @@ export function parseParityEvents(raw) { }) .filter(Boolean); if (malformedCount > 0) { - console.error(`[parity-report] Dropped ${malformedCount} malformed parity event(s)`); + console.error( + `[parity-report] Dropped ${malformedCount} malformed parity event(s)`, + ); } return result; } diff --git a/src/resources/extensions/sf/uok/plan-v2.js b/src/resources/extensions/sf/uok/plan-v2.js index 7f060c032..aeaee7fcc 100644 --- a/src/resources/extensions/sf/uok/plan-v2.js +++ b/src/resources/extensions/sf/uok/plan-v2.js @@ -74,7 +74,9 @@ function detectCycles(nodes) { } } } - const queue = nodes.filter((n) => (inDegree.get(n.id) ?? 0) === 0).map((n) => n.id); + const queue = nodes + .filter((n) => (inDegree.get(n.id) ?? 0) === 0) + .map((n) => n.id); let visited = 0; while (queue.length > 0) { const current = queue.shift();