feat(db): routing-history → DB-first (schema v53)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-09 16:02:47 +02:00 · 2026-05-09 16:02:47 +02:00 · 9df46d2d88
commit 9df46d2d88
parent bd0c612993
3 changed files with 374 additions and 43 deletions
--- a/src/resources/extensions/sf/routing-history.js
+++ b/src/resources/extensions/sf/routing-history.js
@ -1,32 +1,55 @@
 // SF Extension — Routing History (Adaptive Learning)
 // Tracks success/failure per tier per unit-type pattern to improve
 // classification accuracy over time.
-import { join } from "node:path";
-import { loadJsonFile, saveJsonFile } from "./json-persistence.js";
-import { sfRuntimeRoot } from "./paths.js";
+import {
+	clearRoutingHistory as dbClearRoutingHistory,
+	getAllRoutingHistory,
+	getDatabase,
+	insertRoutingFeedback,
+	upsertRoutingOutcome,
+} from "./sf-db.js";

 // ─── Constants ───────────────────────────────────────────────────────────────
-const HISTORY_FILE = "routing-history.json";
 const ROLLING_WINDOW = 50; // only consider last N entries per pattern
 const FAILURE_THRESHOLD = 0.2; // >20% failure rate triggers tier bump
 const FEEDBACK_WEIGHT = 2; // feedback signals count 2x vs automatic
 // ─── In-Memory State ─────────────────────────────────────────────────────────
 let history = null;
-let historyBasePath = "";
 // ─── Public API ──────────────────────────────────────────────────────────────
 /**
 * Initialize routing history for a project.
 */
-export function initRoutingHistory(base) {
-	historyBasePath = base;
-	history = loadHistory(base);
+export function initRoutingHistory(_base) {
+	history = createEmptyHistory();
+	const db = getDatabase();
+	if (!db) return;
+	const rows = getAllRoutingHistory(db);
+	for (const row of rows) {
+		if (!history.patterns[row.pattern]) {
+			history.patterns[row.pattern] = {
+				light: { success: 0, fail: 0 },
+				standard: { success: 0, fail: 0 },
+				heavy: { success: 0, fail: 0 },
+			};
+		}
+		if (
+			row.tier === "light" ||
+			row.tier === "standard" ||
+			row.tier === "heavy"
+		) {
+			history.patterns[row.pattern][row.tier].success = row.success_count;
+			history.patterns[row.pattern][row.tier].fail = row.fail_count;
+		}
+		if (row.updated_at > history.updatedAt) {
+			history.updatedAt = row.updated_at;
+		}
+	}
 }
 /**
 * Reset routing history state.
 */
 export function resetRoutingHistory() {
 	history = null;
-	historyBasePath = "";
 }
 /**
 * Record the outcome of a unit dispatch.
@ -38,12 +61,14 @@ export function resetRoutingHistory() {
 */
 export function recordOutcome(unitType, tier, success, tags) {
 	if (!history) return;
+	const db = getDatabase();
 	// Record for the base unit type
 	const basePattern = unitType;
 	ensurePattern(basePattern);
 	const outcome = history.patterns[basePattern][tier];
 	if (success) outcome.success++;
 	else outcome.fail++;
+	if (db) upsertRoutingOutcome(db, basePattern, tier, success);
 	// Record for tag-specific patterns (e.g. "execute-task:docs")
 	if (tags && tags.length > 0) {
 		for (const tag of tags) {
@ -52,6 +77,7 @@ export function recordOutcome(unitType, tier, success, tags) {
 			const tagOutcome = history.patterns[tagPattern][tier];
 			if (success) tagOutcome.success++;
 			else tagOutcome.fail++;
+			if (db) upsertRoutingOutcome(db, tagPattern, tier, success);
 		}
 	}
 	// Apply rolling window — cap total entries per tier per pattern
@ -67,24 +93,25 @@ export function recordOutcome(unitType, tier, success, tags) {
 		}
 	}
 	history.updatedAt = new Date().toISOString();
-	saveHistory(historyBasePath, history);
 }
 /**
 * Record user feedback for the last completed unit.
 */
-export function recordFeedback(unitType, unitId, tier, rating) {
+export function recordFeedback(unitType, _unitId, tier, rating) {
 	if (!history) return;
+	const ts = new Date().toISOString();
 	history.feedback.push({
-		unitType,
-		unitId,
+		pattern: unitType,
 		tier,
-		rating,
-		timestamp: new Date().toISOString(),
+		feedback: rating,
+		ts,
 	});
 	// Cap feedback array at 200 entries
 	if (history.feedback.length > 200) {
 		history.feedback = history.feedback.slice(-200);
 	}
+	const db = getDatabase();
+	if (db) insertRoutingFeedback(db, unitType, tier, rating);
 	// Apply feedback as weighted outcome
 	const pattern = unitType;
 	ensurePattern(pattern);
@ -95,15 +122,24 @@ export function recordFeedback(unitType, unitId, tier, rating) {
 		if (lower) {
 			const outcomes = history.patterns[pattern][lower];
 			outcomes.success += FEEDBACK_WEIGHT;
+			if (db) {
+				for (let i = 0; i < FEEDBACK_WEIGHT; i++) {
+					upsertRoutingOutcome(db, pattern, lower, true);
+				}
+			}
 		}
 	} else if (rating === "under") {
 		// User says this needed a better model → record as failure at current tier
 		const outcomes = history.patterns[pattern][tier];
 		outcomes.fail += FEEDBACK_WEIGHT;
+		if (db) {
+			for (let i = 0; i < FEEDBACK_WEIGHT; i++) {
+				upsertRoutingOutcome(db, pattern, tier, false);
+			}
+		}
 	}
 	// "ok" = no adjustment needed
 	history.updatedAt = new Date().toISOString();
-	saveHistory(historyBasePath, history);
 }
 /**
 * Get the recommended tier adjustment for a given pattern.
@ -126,9 +162,10 @@ export function getAdaptiveTierAdjustment(unitType, currentTier, tags) {
 /**
 * Clear all routing history (user-triggered reset).
 */
-export function clearRoutingHistory(base) {
+export function clearRoutingHistory(_base) {
 	history = createEmptyHistory();
-	saveHistory(base, history);
+	const db = getDatabase();
+	if (db) dbClearRoutingHistory(db);
 }
 /**
 * Get current history data (for display/debugging).
@ -187,25 +224,3 @@ function createEmptyHistory() {
 		updatedAt: new Date().toISOString(),
 	};
 }
-function historyPath(base) {
-	return join(sfRuntimeRoot(base), HISTORY_FILE);
-}
-function isRoutingHistoryData(data) {
-	return (
-		typeof data === "object" &&
-		data !== null &&
-		data.version === 1 &&
-		typeof data.patterns === "object" &&
-		data.patterns !== null
-	);
-}
-function loadHistory(base) {
-	return loadJsonFile(
-		historyPath(base),
-		isRoutingHistoryData,
-		createEmptyHistory,
-	);
-}
-function saveHistory(base, data) {
-	saveJsonFile(historyPath(base), data);
-}
--- a/src/resources/extensions/sf/sf-db.js
+++ b/src/resources/extensions/sf/sf-db.js
@ -244,7 +244,7 @@ function performDatabaseMaintenance(rawDb, path) {
 		);
 	}
 }
-const SCHEMA_VERSION = 52;
+const SCHEMA_VERSION = 54;
 function indexExists(db, name) {
 	return !!db
 		.prepare(
@ -3102,6 +3102,77 @@ function migrateSchema(db) {
 				":applied_at": new Date().toISOString(),
 			});
 		}
+		if (currentVersion < 53) {
+			// Add routing_history and routing_feedback tables — migrate file-based
+			// routing history to DB-first storage.
+			db.exec(`
+				CREATE TABLE IF NOT EXISTS routing_history (
+					pattern TEXT NOT NULL,
+					tier TEXT NOT NULL,
+					success_count INTEGER NOT NULL DEFAULT 0,
+					fail_count INTEGER NOT NULL DEFAULT 0,
+					updated_at TEXT NOT NULL,
+					PRIMARY KEY (pattern, tier)
+				);
+				CREATE TABLE IF NOT EXISTS routing_feedback (
+					id INTEGER PRIMARY KEY AUTOINCREMENT,
+					pattern TEXT NOT NULL,
+					tier TEXT NOT NULL,
+					feedback TEXT NOT NULL,
+					recorded_at TEXT NOT NULL
+				);
+			`);
+			db.prepare(
+				"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
+			).run({
+				":version": 53,
+				":applied_at": new Date().toISOString(),
+			});
+		}
+		if (currentVersion < 54) {
+			// Migrate metrics ledger from .sf/runtime/metrics.json to DB-first
+			// unit_metrics and project_metrics_meta tables.
+			db.exec(`
+				CREATE TABLE IF NOT EXISTS unit_metrics (
+					type TEXT NOT NULL,
+					id TEXT NOT NULL,
+					started_at INTEGER NOT NULL,
+					finished_at INTEGER NOT NULL,
+					model TEXT NOT NULL,
+					auto_session_key TEXT,
+					tokens_input INTEGER NOT NULL DEFAULT 0,
+					tokens_output INTEGER NOT NULL DEFAULT 0,
+					tokens_cache_read INTEGER NOT NULL DEFAULT 0,
+					tokens_cache_write INTEGER NOT NULL DEFAULT 0,
+					tokens_total INTEGER NOT NULL DEFAULT 0,
+					cost REAL NOT NULL DEFAULT 0,
+					tool_calls INTEGER NOT NULL DEFAULT 0,
+					assistant_messages INTEGER NOT NULL DEFAULT 0,
+					user_messages INTEGER NOT NULL DEFAULT 0,
+					api_requests INTEGER NOT NULL DEFAULT 0,
+					tier TEXT,
+					model_downgraded INTEGER,
+					context_window_tokens INTEGER,
+					truncation_sections INTEGER,
+					continue_here_fired INTEGER,
+					prompt_char_count INTEGER,
+					baseline_char_count INTEGER,
+					cache_hit_rate INTEGER,
+					skills TEXT,
+					PRIMARY KEY (type, id, started_at)
+				);
+				CREATE TABLE IF NOT EXISTS project_metrics_meta (
+					key TEXT PRIMARY KEY,
+					value TEXT NOT NULL
+				);
+			`);
+			db.prepare(
+				"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
+			).run({
+				":version": 54,
+				":applied_at": new Date().toISOString(),
+			});
+		}
 		db.exec("COMMIT");
 	} catch (err) {
 		db.exec("ROLLBACK");
@ -8191,7 +8262,14 @@ export function insertTriageEval(id, runId, data, createdAt) {
 * Purpose: store triage inbox items (eval_candidate, implementation_task, etc.) in DB.
 * Consumer: commands-todo.js triageTodoDump.
 */
-export function insertTriageItem(id, runId, kind, content, evidence, createdAt) {
+export function insertTriageItem(
+	id,
+	runId,
+	kind,
+	content,
+	evidence,
+	createdAt,
+) {
 	if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
 	currentDb
 		.prepare(
@ -8334,3 +8412,241 @@ export function upsertValidationAttentionMarker(milestoneId, marker) {
 			":revalidation_requested_at": marker.revalidationRequestedAt ?? null,
 		});
 }
+
+// ─── Routing History ──────────────────────────────────────────────────────────
+
+/**
+ * Upsert a routing outcome for a pattern/tier pair, incrementing success or fail count.
+ * Purpose: persist adaptive tier learning to DB so routing decisions survive restarts.
+ * Consumer: routing-history.js recordOutcome.
+ */
+export function upsertRoutingOutcome(db, pattern, tier, success) {
+	db.prepare(
+		`INSERT INTO routing_history (pattern, tier, success_count, fail_count, updated_at)
+     VALUES (:pattern, :tier, :success_count, :fail_count, :updated_at)
+     ON CONFLICT(pattern, tier) DO UPDATE SET
+       success_count = success_count + excluded.success_count,
+       fail_count = fail_count + excluded.fail_count,
+       updated_at = excluded.updated_at`,
+	).run({
+		":pattern": pattern,
+		":tier": tier,
+		":success_count": success ? 1 : 0,
+		":fail_count": success ? 0 : 1,
+		":updated_at": new Date().toISOString(),
+	});
+}
+
+/**
+ * Get all routing history rows.
+ * Purpose: load full routing state into memory on init.
+ * Consumer: routing-history.js initRoutingHistory.
+ */
+export function getAllRoutingHistory(db) {
+	return db
+		.prepare(
+			"SELECT pattern, tier, success_count, fail_count, updated_at FROM routing_history",
+		)
+		.all();
+}
+
+/**
+ * Get routing history rows for a specific pattern.
+ * Purpose: targeted pattern lookup for adaptive tier queries.
+ * Consumer: routing-history.js getRoutingHistoryForPattern.
+ */
+export function getRoutingHistoryForPattern(db, pattern) {
+	return db
+		.prepare(
+			"SELECT tier, success_count, fail_count FROM routing_history WHERE pattern = ?",
+		)
+		.all(pattern);
+}
+
+/**
+ * Insert a routing feedback signal into the audit table.
+ * Purpose: persist user feedback for later analysis and weighted outcome application.
+ * Consumer: routing-history.js recordFeedback.
+ */
+export function insertRoutingFeedback(db, pattern, tier, feedback) {
+	db.prepare(
+		`INSERT INTO routing_feedback (pattern, tier, feedback, recorded_at)
+     VALUES (:pattern, :tier, :feedback, :recorded_at)`,
+	).run({
+		":pattern": pattern,
+		":tier": tier,
+		":feedback": feedback,
+		":recorded_at": new Date().toISOString(),
+	});
+}
+
+/**
+ * Clear all routing history and feedback rows.
+ * Purpose: full reset of adaptive learning state on user request.
+ * Consumer: routing-history.js clearRoutingHistory.
+ */
+export function clearRoutingHistory(db) {
+	db.prepare("DELETE FROM routing_history").run();
+	db.prepare("DELETE FROM routing_feedback").run();
+}
+
+// ─── Unit Metrics CRUD ────────────────────────────────────────────────────────
+
+function rowToUnitMetrics(row) {
+	const unit = {
+		type: row["type"],
+		id: row["id"],
+		model: row["model"],
+		startedAt: row["started_at"],
+		finishedAt: row["finished_at"],
+		tokens: {
+			input: row["tokens_input"],
+			output: row["tokens_output"],
+			cacheRead: row["tokens_cache_read"],
+			cacheWrite: row["tokens_cache_write"],
+			total: row["tokens_total"],
+		},
+		cost: row["cost"],
+		toolCalls: row["tool_calls"],
+		assistantMessages: row["assistant_messages"],
+		userMessages: row["user_messages"],
+		apiRequests: row["api_requests"],
+	};
+	if (row["auto_session_key"] != null)
+		unit.autoSessionKey = row["auto_session_key"];
+	if (row["tier"] != null) unit.tier = row["tier"];
+	if (row["model_downgraded"] != null)
+		unit.modelDowngraded = row["model_downgraded"] === 1;
+	if (row["context_window_tokens"] != null)
+		unit.contextWindowTokens = row["context_window_tokens"];
+	if (row["truncation_sections"] != null)
+		unit.truncationSections = row["truncation_sections"];
+	if (row["continue_here_fired"] != null)
+		unit.continueHereFired = row["continue_here_fired"] === 1;
+	if (row["prompt_char_count"] != null)
+		unit.promptCharCount = row["prompt_char_count"];
+	if (row["baseline_char_count"] != null)
+		unit.baselineCharCount = row["baseline_char_count"];
+	if (row["cache_hit_rate"] != null) unit.cacheHitRate = row["cache_hit_rate"];
+	if (row["skills"] != null) unit.skills = JSON.parse(row["skills"]);
+	return unit;
+}
+
+/**
+ * Upsert a single unit metrics record into the DB.
+ *
+ * Purpose: persist per-unit token/cost telemetry from autonomous mode so
+ * history, cost, and export commands can read from the canonical DB store
+ * instead of a fragile JSON file on disk.
+ *
+ * Consumer: metrics.js saveLedger (called after every unit snapshot).
+ */
+export function upsertUnitMetrics(db, unit) {
+	db.prepare(
+		`INSERT OR REPLACE INTO unit_metrics (
+			type, id, started_at, finished_at, model, auto_session_key,
+			tokens_input, tokens_output, tokens_cache_read, tokens_cache_write, tokens_total,
+			cost, tool_calls, assistant_messages, user_messages, api_requests,
+			tier, model_downgraded, context_window_tokens, truncation_sections,
+			continue_here_fired, prompt_char_count, baseline_char_count, cache_hit_rate, skills
+		) VALUES (
+			:type, :id, :started_at, :finished_at, :model, :auto_session_key,
+			:tokens_input, :tokens_output, :tokens_cache_read, :tokens_cache_write, :tokens_total,
+			:cost, :tool_calls, :assistant_messages, :user_messages, :api_requests,
+			:tier, :model_downgraded, :context_window_tokens, :truncation_sections,
+			:continue_here_fired, :prompt_char_count, :baseline_char_count, :cache_hit_rate, :skills
+		)`,
+	).run({
+		":type": unit.type,
+		":id": unit.id,
+		":started_at": unit.startedAt,
+		":finished_at": unit.finishedAt,
+		":model": unit.model,
+		":auto_session_key": unit.autoSessionKey ?? null,
+		":tokens_input": unit.tokens.input,
+		":tokens_output": unit.tokens.output,
+		":tokens_cache_read": unit.tokens.cacheRead,
+		":tokens_cache_write": unit.tokens.cacheWrite,
+		":tokens_total": unit.tokens.total,
+		":cost": unit.cost,
+		":tool_calls": unit.toolCalls,
+		":assistant_messages": unit.assistantMessages,
+		":user_messages": unit.userMessages,
+		":api_requests": unit.apiRequests ?? unit.assistantMessages,
+		":tier": unit.tier ?? null,
+		":model_downgraded":
+			unit.modelDowngraded != null ? (unit.modelDowngraded ? 1 : 0) : null,
+		":context_window_tokens": unit.contextWindowTokens ?? null,
+		":truncation_sections": unit.truncationSections ?? null,
+		":continue_here_fired":
+			unit.continueHereFired != null ? (unit.continueHereFired ? 1 : 0) : null,
+		":prompt_char_count": unit.promptCharCount ?? null,
+		":baseline_char_count": unit.baselineCharCount ?? null,
+		":cache_hit_rate": unit.cacheHitRate ?? null,
+		":skills": unit.skills != null ? JSON.stringify(unit.skills) : null,
+	});
+}
+
+/**
+ * Load all unit metrics ordered by started_at ASC (oldest first).
+ *
+ * Purpose: reconstruct the in-memory ledger from the canonical DB store
+ * on session init or on demand from history/cost commands.
+ *
+ * Consumer: metrics.js loadLedgerFromDisk and loadLedger.
+ */
+export function getAllUnitMetrics(db) {
+	return db
+		.prepare("SELECT * FROM unit_metrics ORDER BY started_at ASC")
+		.all()
+		.map(rowToUnitMetrics);
+}
+
+/**
+ * Delete oldest unit_metrics rows keeping only the N most recent by finished_at.
+ *
+ * Purpose: enforce a max-ledger-size cap so the DB doesn't bloat over long
+ * autonomous runs. Called by the doctor when the ledger exceeds its threshold.
+ *
+ * Consumer: metrics.js pruneMetricsLedger.
+ */
+export function pruneUnitMetrics(db, keepCount) {
+	db.prepare(
+		`DELETE FROM unit_metrics WHERE rowid NOT IN (
+			SELECT rowid FROM unit_metrics ORDER BY finished_at DESC LIMIT :keepCount
+		)`,
+	).run({ ":keepCount": keepCount });
+}
+
+/**
+ * Get the project start timestamp stored in project_metrics_meta.
+ *
+ * Purpose: surface when the autonomous run started for elapsed-time display.
+ *
+ * Consumer: metrics.js loadLedger and loadLedgerFromDisk.
+ */
+export function getProjectStartedAt(db) {
+	const row = db
+		.prepare(
+			"SELECT value FROM project_metrics_meta WHERE key = 'projectStartedAt'",
+		)
+		.get();
+	if (!row) return null;
+	const ts = Number(row["value"]);
+	return Number.isFinite(ts) ? ts : null;
+}
+
+/**
+ * Persist the project start timestamp in project_metrics_meta.
+ *
+ * Purpose: survive process restarts so the dashboard shows wall-clock elapsed
+ * time for the full autonomous session, not just the current process lifetime.
+ *
+ * Consumer: metrics.js initMetrics (via loadLedger → defaultLedger path).
+ */
+export function setProjectStartedAt(db, ts) {
+	db.prepare(
+		`INSERT INTO project_metrics_meta (key, value) VALUES ('projectStartedAt', :value)
+		ON CONFLICT(key) DO UPDATE SET value = excluded.value`,
+	).run({ ":value": String(ts) });
+}
--- a/src/resources/extensions/sf/tests/sf-db-migration.test.mjs
+++ b/src/resources/extensions/sf/tests/sf-db-migration.test.mjs
@ -223,7 +223,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
 	const version = db
 		.prepare("SELECT MAX(version) AS version FROM schema_version")
 		.get();
-	assert.equal(version.version, 52);
+	assert.equal(version.version, 54);
 	const taskSpec = db
 		.prepare(
 			"SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'",