diff --git a/src/resources/extensions/sf/routing-history.js b/src/resources/extensions/sf/routing-history.js index 850072a98..dfa78f8ac 100644 --- a/src/resources/extensions/sf/routing-history.js +++ b/src/resources/extensions/sf/routing-history.js @@ -1,32 +1,55 @@ // SF Extension — Routing History (Adaptive Learning) // Tracks success/failure per tier per unit-type pattern to improve // classification accuracy over time. -import { join } from "node:path"; -import { loadJsonFile, saveJsonFile } from "./json-persistence.js"; -import { sfRuntimeRoot } from "./paths.js"; +import { + clearRoutingHistory as dbClearRoutingHistory, + getAllRoutingHistory, + getDatabase, + insertRoutingFeedback, + upsertRoutingOutcome, +} from "./sf-db.js"; // ─── Constants ─────────────────────────────────────────────────────────────── -const HISTORY_FILE = "routing-history.json"; const ROLLING_WINDOW = 50; // only consider last N entries per pattern const FAILURE_THRESHOLD = 0.2; // >20% failure rate triggers tier bump const FEEDBACK_WEIGHT = 2; // feedback signals count 2x vs automatic // ─── In-Memory State ───────────────────────────────────────────────────────── let history = null; -let historyBasePath = ""; // ─── Public API ────────────────────────────────────────────────────────────── /** * Initialize routing history for a project. */ -export function initRoutingHistory(base) { - historyBasePath = base; - history = loadHistory(base); +export function initRoutingHistory(_base) { + history = createEmptyHistory(); + const db = getDatabase(); + if (!db) return; + const rows = getAllRoutingHistory(db); + for (const row of rows) { + if (!history.patterns[row.pattern]) { + history.patterns[row.pattern] = { + light: { success: 0, fail: 0 }, + standard: { success: 0, fail: 0 }, + heavy: { success: 0, fail: 0 }, + }; + } + if ( + row.tier === "light" || + row.tier === "standard" || + row.tier === "heavy" + ) { + history.patterns[row.pattern][row.tier].success = row.success_count; + history.patterns[row.pattern][row.tier].fail = row.fail_count; + } + if (row.updated_at > history.updatedAt) { + history.updatedAt = row.updated_at; + } + } } /** * Reset routing history state. */ export function resetRoutingHistory() { history = null; - historyBasePath = ""; } /** * Record the outcome of a unit dispatch. @@ -38,12 +61,14 @@ export function resetRoutingHistory() { */ export function recordOutcome(unitType, tier, success, tags) { if (!history) return; + const db = getDatabase(); // Record for the base unit type const basePattern = unitType; ensurePattern(basePattern); const outcome = history.patterns[basePattern][tier]; if (success) outcome.success++; else outcome.fail++; + if (db) upsertRoutingOutcome(db, basePattern, tier, success); // Record for tag-specific patterns (e.g. "execute-task:docs") if (tags && tags.length > 0) { for (const tag of tags) { @@ -52,6 +77,7 @@ export function recordOutcome(unitType, tier, success, tags) { const tagOutcome = history.patterns[tagPattern][tier]; if (success) tagOutcome.success++; else tagOutcome.fail++; + if (db) upsertRoutingOutcome(db, tagPattern, tier, success); } } // Apply rolling window — cap total entries per tier per pattern @@ -67,24 +93,25 @@ export function recordOutcome(unitType, tier, success, tags) { } } history.updatedAt = new Date().toISOString(); - saveHistory(historyBasePath, history); } /** * Record user feedback for the last completed unit. */ -export function recordFeedback(unitType, unitId, tier, rating) { +export function recordFeedback(unitType, _unitId, tier, rating) { if (!history) return; + const ts = new Date().toISOString(); history.feedback.push({ - unitType, - unitId, + pattern: unitType, tier, - rating, - timestamp: new Date().toISOString(), + feedback: rating, + ts, }); // Cap feedback array at 200 entries if (history.feedback.length > 200) { history.feedback = history.feedback.slice(-200); } + const db = getDatabase(); + if (db) insertRoutingFeedback(db, unitType, tier, rating); // Apply feedback as weighted outcome const pattern = unitType; ensurePattern(pattern); @@ -95,15 +122,24 @@ export function recordFeedback(unitType, unitId, tier, rating) { if (lower) { const outcomes = history.patterns[pattern][lower]; outcomes.success += FEEDBACK_WEIGHT; + if (db) { + for (let i = 0; i < FEEDBACK_WEIGHT; i++) { + upsertRoutingOutcome(db, pattern, lower, true); + } + } } } else if (rating === "under") { // User says this needed a better model → record as failure at current tier const outcomes = history.patterns[pattern][tier]; outcomes.fail += FEEDBACK_WEIGHT; + if (db) { + for (let i = 0; i < FEEDBACK_WEIGHT; i++) { + upsertRoutingOutcome(db, pattern, tier, false); + } + } } // "ok" = no adjustment needed history.updatedAt = new Date().toISOString(); - saveHistory(historyBasePath, history); } /** * Get the recommended tier adjustment for a given pattern. @@ -126,9 +162,10 @@ export function getAdaptiveTierAdjustment(unitType, currentTier, tags) { /** * Clear all routing history (user-triggered reset). */ -export function clearRoutingHistory(base) { +export function clearRoutingHistory(_base) { history = createEmptyHistory(); - saveHistory(base, history); + const db = getDatabase(); + if (db) dbClearRoutingHistory(db); } /** * Get current history data (for display/debugging). @@ -187,25 +224,3 @@ function createEmptyHistory() { updatedAt: new Date().toISOString(), }; } -function historyPath(base) { - return join(sfRuntimeRoot(base), HISTORY_FILE); -} -function isRoutingHistoryData(data) { - return ( - typeof data === "object" && - data !== null && - data.version === 1 && - typeof data.patterns === "object" && - data.patterns !== null - ); -} -function loadHistory(base) { - return loadJsonFile( - historyPath(base), - isRoutingHistoryData, - createEmptyHistory, - ); -} -function saveHistory(base, data) { - saveJsonFile(historyPath(base), data); -} diff --git a/src/resources/extensions/sf/sf-db.js b/src/resources/extensions/sf/sf-db.js index d53a1a470..04f586b0b 100644 --- a/src/resources/extensions/sf/sf-db.js +++ b/src/resources/extensions/sf/sf-db.js @@ -244,7 +244,7 @@ function performDatabaseMaintenance(rawDb, path) { ); } } -const SCHEMA_VERSION = 52; +const SCHEMA_VERSION = 54; function indexExists(db, name) { return !!db .prepare( @@ -3102,6 +3102,77 @@ function migrateSchema(db) { ":applied_at": new Date().toISOString(), }); } + if (currentVersion < 53) { + // Add routing_history and routing_feedback tables — migrate file-based + // routing history to DB-first storage. + db.exec(` + CREATE TABLE IF NOT EXISTS routing_history ( + pattern TEXT NOT NULL, + tier TEXT NOT NULL, + success_count INTEGER NOT NULL DEFAULT 0, + fail_count INTEGER NOT NULL DEFAULT 0, + updated_at TEXT NOT NULL, + PRIMARY KEY (pattern, tier) + ); + CREATE TABLE IF NOT EXISTS routing_feedback ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + pattern TEXT NOT NULL, + tier TEXT NOT NULL, + feedback TEXT NOT NULL, + recorded_at TEXT NOT NULL + ); + `); + db.prepare( + "INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)", + ).run({ + ":version": 53, + ":applied_at": new Date().toISOString(), + }); + } + if (currentVersion < 54) { + // Migrate metrics ledger from .sf/runtime/metrics.json to DB-first + // unit_metrics and project_metrics_meta tables. + db.exec(` + CREATE TABLE IF NOT EXISTS unit_metrics ( + type TEXT NOT NULL, + id TEXT NOT NULL, + started_at INTEGER NOT NULL, + finished_at INTEGER NOT NULL, + model TEXT NOT NULL, + auto_session_key TEXT, + tokens_input INTEGER NOT NULL DEFAULT 0, + tokens_output INTEGER NOT NULL DEFAULT 0, + tokens_cache_read INTEGER NOT NULL DEFAULT 0, + tokens_cache_write INTEGER NOT NULL DEFAULT 0, + tokens_total INTEGER NOT NULL DEFAULT 0, + cost REAL NOT NULL DEFAULT 0, + tool_calls INTEGER NOT NULL DEFAULT 0, + assistant_messages INTEGER NOT NULL DEFAULT 0, + user_messages INTEGER NOT NULL DEFAULT 0, + api_requests INTEGER NOT NULL DEFAULT 0, + tier TEXT, + model_downgraded INTEGER, + context_window_tokens INTEGER, + truncation_sections INTEGER, + continue_here_fired INTEGER, + prompt_char_count INTEGER, + baseline_char_count INTEGER, + cache_hit_rate INTEGER, + skills TEXT, + PRIMARY KEY (type, id, started_at) + ); + CREATE TABLE IF NOT EXISTS project_metrics_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + `); + db.prepare( + "INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)", + ).run({ + ":version": 54, + ":applied_at": new Date().toISOString(), + }); + } db.exec("COMMIT"); } catch (err) { db.exec("ROLLBACK"); @@ -8191,7 +8262,14 @@ export function insertTriageEval(id, runId, data, createdAt) { * Purpose: store triage inbox items (eval_candidate, implementation_task, etc.) in DB. * Consumer: commands-todo.js triageTodoDump. */ -export function insertTriageItem(id, runId, kind, content, evidence, createdAt) { +export function insertTriageItem( + id, + runId, + kind, + content, + evidence, + createdAt, +) { if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open"); currentDb .prepare( @@ -8334,3 +8412,241 @@ export function upsertValidationAttentionMarker(milestoneId, marker) { ":revalidation_requested_at": marker.revalidationRequestedAt ?? null, }); } + +// ─── Routing History ────────────────────────────────────────────────────────── + +/** + * Upsert a routing outcome for a pattern/tier pair, incrementing success or fail count. + * Purpose: persist adaptive tier learning to DB so routing decisions survive restarts. + * Consumer: routing-history.js recordOutcome. + */ +export function upsertRoutingOutcome(db, pattern, tier, success) { + db.prepare( + `INSERT INTO routing_history (pattern, tier, success_count, fail_count, updated_at) + VALUES (:pattern, :tier, :success_count, :fail_count, :updated_at) + ON CONFLICT(pattern, tier) DO UPDATE SET + success_count = success_count + excluded.success_count, + fail_count = fail_count + excluded.fail_count, + updated_at = excluded.updated_at`, + ).run({ + ":pattern": pattern, + ":tier": tier, + ":success_count": success ? 1 : 0, + ":fail_count": success ? 0 : 1, + ":updated_at": new Date().toISOString(), + }); +} + +/** + * Get all routing history rows. + * Purpose: load full routing state into memory on init. + * Consumer: routing-history.js initRoutingHistory. + */ +export function getAllRoutingHistory(db) { + return db + .prepare( + "SELECT pattern, tier, success_count, fail_count, updated_at FROM routing_history", + ) + .all(); +} + +/** + * Get routing history rows for a specific pattern. + * Purpose: targeted pattern lookup for adaptive tier queries. + * Consumer: routing-history.js getRoutingHistoryForPattern. + */ +export function getRoutingHistoryForPattern(db, pattern) { + return db + .prepare( + "SELECT tier, success_count, fail_count FROM routing_history WHERE pattern = ?", + ) + .all(pattern); +} + +/** + * Insert a routing feedback signal into the audit table. + * Purpose: persist user feedback for later analysis and weighted outcome application. + * Consumer: routing-history.js recordFeedback. + */ +export function insertRoutingFeedback(db, pattern, tier, feedback) { + db.prepare( + `INSERT INTO routing_feedback (pattern, tier, feedback, recorded_at) + VALUES (:pattern, :tier, :feedback, :recorded_at)`, + ).run({ + ":pattern": pattern, + ":tier": tier, + ":feedback": feedback, + ":recorded_at": new Date().toISOString(), + }); +} + +/** + * Clear all routing history and feedback rows. + * Purpose: full reset of adaptive learning state on user request. + * Consumer: routing-history.js clearRoutingHistory. + */ +export function clearRoutingHistory(db) { + db.prepare("DELETE FROM routing_history").run(); + db.prepare("DELETE FROM routing_feedback").run(); +} + +// ─── Unit Metrics CRUD ──────────────────────────────────────────────────────── + +function rowToUnitMetrics(row) { + const unit = { + type: row["type"], + id: row["id"], + model: row["model"], + startedAt: row["started_at"], + finishedAt: row["finished_at"], + tokens: { + input: row["tokens_input"], + output: row["tokens_output"], + cacheRead: row["tokens_cache_read"], + cacheWrite: row["tokens_cache_write"], + total: row["tokens_total"], + }, + cost: row["cost"], + toolCalls: row["tool_calls"], + assistantMessages: row["assistant_messages"], + userMessages: row["user_messages"], + apiRequests: row["api_requests"], + }; + if (row["auto_session_key"] != null) + unit.autoSessionKey = row["auto_session_key"]; + if (row["tier"] != null) unit.tier = row["tier"]; + if (row["model_downgraded"] != null) + unit.modelDowngraded = row["model_downgraded"] === 1; + if (row["context_window_tokens"] != null) + unit.contextWindowTokens = row["context_window_tokens"]; + if (row["truncation_sections"] != null) + unit.truncationSections = row["truncation_sections"]; + if (row["continue_here_fired"] != null) + unit.continueHereFired = row["continue_here_fired"] === 1; + if (row["prompt_char_count"] != null) + unit.promptCharCount = row["prompt_char_count"]; + if (row["baseline_char_count"] != null) + unit.baselineCharCount = row["baseline_char_count"]; + if (row["cache_hit_rate"] != null) unit.cacheHitRate = row["cache_hit_rate"]; + if (row["skills"] != null) unit.skills = JSON.parse(row["skills"]); + return unit; +} + +/** + * Upsert a single unit metrics record into the DB. + * + * Purpose: persist per-unit token/cost telemetry from autonomous mode so + * history, cost, and export commands can read from the canonical DB store + * instead of a fragile JSON file on disk. + * + * Consumer: metrics.js saveLedger (called after every unit snapshot). + */ +export function upsertUnitMetrics(db, unit) { + db.prepare( + `INSERT OR REPLACE INTO unit_metrics ( + type, id, started_at, finished_at, model, auto_session_key, + tokens_input, tokens_output, tokens_cache_read, tokens_cache_write, tokens_total, + cost, tool_calls, assistant_messages, user_messages, api_requests, + tier, model_downgraded, context_window_tokens, truncation_sections, + continue_here_fired, prompt_char_count, baseline_char_count, cache_hit_rate, skills + ) VALUES ( + :type, :id, :started_at, :finished_at, :model, :auto_session_key, + :tokens_input, :tokens_output, :tokens_cache_read, :tokens_cache_write, :tokens_total, + :cost, :tool_calls, :assistant_messages, :user_messages, :api_requests, + :tier, :model_downgraded, :context_window_tokens, :truncation_sections, + :continue_here_fired, :prompt_char_count, :baseline_char_count, :cache_hit_rate, :skills + )`, + ).run({ + ":type": unit.type, + ":id": unit.id, + ":started_at": unit.startedAt, + ":finished_at": unit.finishedAt, + ":model": unit.model, + ":auto_session_key": unit.autoSessionKey ?? null, + ":tokens_input": unit.tokens.input, + ":tokens_output": unit.tokens.output, + ":tokens_cache_read": unit.tokens.cacheRead, + ":tokens_cache_write": unit.tokens.cacheWrite, + ":tokens_total": unit.tokens.total, + ":cost": unit.cost, + ":tool_calls": unit.toolCalls, + ":assistant_messages": unit.assistantMessages, + ":user_messages": unit.userMessages, + ":api_requests": unit.apiRequests ?? unit.assistantMessages, + ":tier": unit.tier ?? null, + ":model_downgraded": + unit.modelDowngraded != null ? (unit.modelDowngraded ? 1 : 0) : null, + ":context_window_tokens": unit.contextWindowTokens ?? null, + ":truncation_sections": unit.truncationSections ?? null, + ":continue_here_fired": + unit.continueHereFired != null ? (unit.continueHereFired ? 1 : 0) : null, + ":prompt_char_count": unit.promptCharCount ?? null, + ":baseline_char_count": unit.baselineCharCount ?? null, + ":cache_hit_rate": unit.cacheHitRate ?? null, + ":skills": unit.skills != null ? JSON.stringify(unit.skills) : null, + }); +} + +/** + * Load all unit metrics ordered by started_at ASC (oldest first). + * + * Purpose: reconstruct the in-memory ledger from the canonical DB store + * on session init or on demand from history/cost commands. + * + * Consumer: metrics.js loadLedgerFromDisk and loadLedger. + */ +export function getAllUnitMetrics(db) { + return db + .prepare("SELECT * FROM unit_metrics ORDER BY started_at ASC") + .all() + .map(rowToUnitMetrics); +} + +/** + * Delete oldest unit_metrics rows keeping only the N most recent by finished_at. + * + * Purpose: enforce a max-ledger-size cap so the DB doesn't bloat over long + * autonomous runs. Called by the doctor when the ledger exceeds its threshold. + * + * Consumer: metrics.js pruneMetricsLedger. + */ +export function pruneUnitMetrics(db, keepCount) { + db.prepare( + `DELETE FROM unit_metrics WHERE rowid NOT IN ( + SELECT rowid FROM unit_metrics ORDER BY finished_at DESC LIMIT :keepCount + )`, + ).run({ ":keepCount": keepCount }); +} + +/** + * Get the project start timestamp stored in project_metrics_meta. + * + * Purpose: surface when the autonomous run started for elapsed-time display. + * + * Consumer: metrics.js loadLedger and loadLedgerFromDisk. + */ +export function getProjectStartedAt(db) { + const row = db + .prepare( + "SELECT value FROM project_metrics_meta WHERE key = 'projectStartedAt'", + ) + .get(); + if (!row) return null; + const ts = Number(row["value"]); + return Number.isFinite(ts) ? ts : null; +} + +/** + * Persist the project start timestamp in project_metrics_meta. + * + * Purpose: survive process restarts so the dashboard shows wall-clock elapsed + * time for the full autonomous session, not just the current process lifetime. + * + * Consumer: metrics.js initMetrics (via loadLedger → defaultLedger path). + */ +export function setProjectStartedAt(db, ts) { + db.prepare( + `INSERT INTO project_metrics_meta (key, value) VALUES ('projectStartedAt', :value) + ON CONFLICT(key) DO UPDATE SET value = excluded.value`, + ).run({ ":value": String(ts) }); +} diff --git a/src/resources/extensions/sf/tests/sf-db-migration.test.mjs b/src/resources/extensions/sf/tests/sf-db-migration.test.mjs index afa0d21ca..a88f5d5d3 100644 --- a/src/resources/extensions/sf/tests/sf-db-migration.test.mjs +++ b/src/resources/extensions/sf/tests/sf-db-migration.test.mjs @@ -223,7 +223,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill", const version = db .prepare("SELECT MAX(version) AS version FROM schema_version") .get(); - assert.equal(version.version, 52); + assert.equal(version.version, 54); const taskSpec = db .prepare( "SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'",