feat(db): routing-history → DB-first (schema v53)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Mikael Hugo 2026-05-09 16:02:47 +02:00
parent bd0c612993
commit 9df46d2d88
3 changed files with 374 additions and 43 deletions

View file

@ -1,32 +1,55 @@
// SF Extension — Routing History (Adaptive Learning)
// Tracks success/failure per tier per unit-type pattern to improve
// classification accuracy over time.
import { join } from "node:path";
import { loadJsonFile, saveJsonFile } from "./json-persistence.js";
import { sfRuntimeRoot } from "./paths.js";
import {
clearRoutingHistory as dbClearRoutingHistory,
getAllRoutingHistory,
getDatabase,
insertRoutingFeedback,
upsertRoutingOutcome,
} from "./sf-db.js";
// ─── Constants ───────────────────────────────────────────────────────────────
const HISTORY_FILE = "routing-history.json";
const ROLLING_WINDOW = 50; // only consider last N entries per pattern
const FAILURE_THRESHOLD = 0.2; // >20% failure rate triggers tier bump
const FEEDBACK_WEIGHT = 2; // feedback signals count 2x vs automatic
// ─── In-Memory State ─────────────────────────────────────────────────────────
let history = null;
let historyBasePath = "";
// ─── Public API ──────────────────────────────────────────────────────────────
/**
* Initialize routing history for a project.
*/
export function initRoutingHistory(base) {
historyBasePath = base;
history = loadHistory(base);
export function initRoutingHistory(_base) {
history = createEmptyHistory();
const db = getDatabase();
if (!db) return;
const rows = getAllRoutingHistory(db);
for (const row of rows) {
if (!history.patterns[row.pattern]) {
history.patterns[row.pattern] = {
light: { success: 0, fail: 0 },
standard: { success: 0, fail: 0 },
heavy: { success: 0, fail: 0 },
};
}
if (
row.tier === "light" ||
row.tier === "standard" ||
row.tier === "heavy"
) {
history.patterns[row.pattern][row.tier].success = row.success_count;
history.patterns[row.pattern][row.tier].fail = row.fail_count;
}
if (row.updated_at > history.updatedAt) {
history.updatedAt = row.updated_at;
}
}
}
/**
* Reset routing history state.
*/
export function resetRoutingHistory() {
history = null;
historyBasePath = "";
}
/**
* Record the outcome of a unit dispatch.
@ -38,12 +61,14 @@ export function resetRoutingHistory() {
*/
export function recordOutcome(unitType, tier, success, tags) {
if (!history) return;
const db = getDatabase();
// Record for the base unit type
const basePattern = unitType;
ensurePattern(basePattern);
const outcome = history.patterns[basePattern][tier];
if (success) outcome.success++;
else outcome.fail++;
if (db) upsertRoutingOutcome(db, basePattern, tier, success);
// Record for tag-specific patterns (e.g. "execute-task:docs")
if (tags && tags.length > 0) {
for (const tag of tags) {
@ -52,6 +77,7 @@ export function recordOutcome(unitType, tier, success, tags) {
const tagOutcome = history.patterns[tagPattern][tier];
if (success) tagOutcome.success++;
else tagOutcome.fail++;
if (db) upsertRoutingOutcome(db, tagPattern, tier, success);
}
}
// Apply rolling window — cap total entries per tier per pattern
@ -67,24 +93,25 @@ export function recordOutcome(unitType, tier, success, tags) {
}
}
history.updatedAt = new Date().toISOString();
saveHistory(historyBasePath, history);
}
/**
* Record user feedback for the last completed unit.
*/
export function recordFeedback(unitType, unitId, tier, rating) {
export function recordFeedback(unitType, _unitId, tier, rating) {
if (!history) return;
const ts = new Date().toISOString();
history.feedback.push({
unitType,
unitId,
pattern: unitType,
tier,
rating,
timestamp: new Date().toISOString(),
feedback: rating,
ts,
});
// Cap feedback array at 200 entries
if (history.feedback.length > 200) {
history.feedback = history.feedback.slice(-200);
}
const db = getDatabase();
if (db) insertRoutingFeedback(db, unitType, tier, rating);
// Apply feedback as weighted outcome
const pattern = unitType;
ensurePattern(pattern);
@ -95,15 +122,24 @@ export function recordFeedback(unitType, unitId, tier, rating) {
if (lower) {
const outcomes = history.patterns[pattern][lower];
outcomes.success += FEEDBACK_WEIGHT;
if (db) {
for (let i = 0; i < FEEDBACK_WEIGHT; i++) {
upsertRoutingOutcome(db, pattern, lower, true);
}
}
}
} else if (rating === "under") {
// User says this needed a better model → record as failure at current tier
const outcomes = history.patterns[pattern][tier];
outcomes.fail += FEEDBACK_WEIGHT;
if (db) {
for (let i = 0; i < FEEDBACK_WEIGHT; i++) {
upsertRoutingOutcome(db, pattern, tier, false);
}
}
}
// "ok" = no adjustment needed
history.updatedAt = new Date().toISOString();
saveHistory(historyBasePath, history);
}
/**
* Get the recommended tier adjustment for a given pattern.
@ -126,9 +162,10 @@ export function getAdaptiveTierAdjustment(unitType, currentTier, tags) {
/**
* Clear all routing history (user-triggered reset).
*/
export function clearRoutingHistory(base) {
export function clearRoutingHistory(_base) {
history = createEmptyHistory();
saveHistory(base, history);
const db = getDatabase();
if (db) dbClearRoutingHistory(db);
}
/**
* Get current history data (for display/debugging).
@ -187,25 +224,3 @@ function createEmptyHistory() {
updatedAt: new Date().toISOString(),
};
}
function historyPath(base) {
return join(sfRuntimeRoot(base), HISTORY_FILE);
}
function isRoutingHistoryData(data) {
return (
typeof data === "object" &&
data !== null &&
data.version === 1 &&
typeof data.patterns === "object" &&
data.patterns !== null
);
}
function loadHistory(base) {
return loadJsonFile(
historyPath(base),
isRoutingHistoryData,
createEmptyHistory,
);
}
function saveHistory(base, data) {
saveJsonFile(historyPath(base), data);
}

View file

@ -244,7 +244,7 @@ function performDatabaseMaintenance(rawDb, path) {
);
}
}
const SCHEMA_VERSION = 52;
const SCHEMA_VERSION = 54;
function indexExists(db, name) {
return !!db
.prepare(
@ -3102,6 +3102,77 @@ function migrateSchema(db) {
":applied_at": new Date().toISOString(),
});
}
if (currentVersion < 53) {
// Add routing_history and routing_feedback tables — migrate file-based
// routing history to DB-first storage.
db.exec(`
CREATE TABLE IF NOT EXISTS routing_history (
pattern TEXT NOT NULL,
tier TEXT NOT NULL,
success_count INTEGER NOT NULL DEFAULT 0,
fail_count INTEGER NOT NULL DEFAULT 0,
updated_at TEXT NOT NULL,
PRIMARY KEY (pattern, tier)
);
CREATE TABLE IF NOT EXISTS routing_feedback (
id INTEGER PRIMARY KEY AUTOINCREMENT,
pattern TEXT NOT NULL,
tier TEXT NOT NULL,
feedback TEXT NOT NULL,
recorded_at TEXT NOT NULL
);
`);
db.prepare(
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
).run({
":version": 53,
":applied_at": new Date().toISOString(),
});
}
if (currentVersion < 54) {
// Migrate metrics ledger from .sf/runtime/metrics.json to DB-first
// unit_metrics and project_metrics_meta tables.
db.exec(`
CREATE TABLE IF NOT EXISTS unit_metrics (
type TEXT NOT NULL,
id TEXT NOT NULL,
started_at INTEGER NOT NULL,
finished_at INTEGER NOT NULL,
model TEXT NOT NULL,
auto_session_key TEXT,
tokens_input INTEGER NOT NULL DEFAULT 0,
tokens_output INTEGER NOT NULL DEFAULT 0,
tokens_cache_read INTEGER NOT NULL DEFAULT 0,
tokens_cache_write INTEGER NOT NULL DEFAULT 0,
tokens_total INTEGER NOT NULL DEFAULT 0,
cost REAL NOT NULL DEFAULT 0,
tool_calls INTEGER NOT NULL DEFAULT 0,
assistant_messages INTEGER NOT NULL DEFAULT 0,
user_messages INTEGER NOT NULL DEFAULT 0,
api_requests INTEGER NOT NULL DEFAULT 0,
tier TEXT,
model_downgraded INTEGER,
context_window_tokens INTEGER,
truncation_sections INTEGER,
continue_here_fired INTEGER,
prompt_char_count INTEGER,
baseline_char_count INTEGER,
cache_hit_rate INTEGER,
skills TEXT,
PRIMARY KEY (type, id, started_at)
);
CREATE TABLE IF NOT EXISTS project_metrics_meta (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
);
`);
db.prepare(
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
).run({
":version": 54,
":applied_at": new Date().toISOString(),
});
}
db.exec("COMMIT");
} catch (err) {
db.exec("ROLLBACK");
@ -8191,7 +8262,14 @@ export function insertTriageEval(id, runId, data, createdAt) {
* Purpose: store triage inbox items (eval_candidate, implementation_task, etc.) in DB.
* Consumer: commands-todo.js triageTodoDump.
*/
export function insertTriageItem(id, runId, kind, content, evidence, createdAt) {
export function insertTriageItem(
id,
runId,
kind,
content,
evidence,
createdAt,
) {
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
currentDb
.prepare(
@ -8334,3 +8412,241 @@ export function upsertValidationAttentionMarker(milestoneId, marker) {
":revalidation_requested_at": marker.revalidationRequestedAt ?? null,
});
}
// ─── Routing History ──────────────────────────────────────────────────────────
/**
* Upsert a routing outcome for a pattern/tier pair, incrementing success or fail count.
* Purpose: persist adaptive tier learning to DB so routing decisions survive restarts.
* Consumer: routing-history.js recordOutcome.
*/
export function upsertRoutingOutcome(db, pattern, tier, success) {
db.prepare(
`INSERT INTO routing_history (pattern, tier, success_count, fail_count, updated_at)
VALUES (:pattern, :tier, :success_count, :fail_count, :updated_at)
ON CONFLICT(pattern, tier) DO UPDATE SET
success_count = success_count + excluded.success_count,
fail_count = fail_count + excluded.fail_count,
updated_at = excluded.updated_at`,
).run({
":pattern": pattern,
":tier": tier,
":success_count": success ? 1 : 0,
":fail_count": success ? 0 : 1,
":updated_at": new Date().toISOString(),
});
}
/**
* Get all routing history rows.
* Purpose: load full routing state into memory on init.
* Consumer: routing-history.js initRoutingHistory.
*/
export function getAllRoutingHistory(db) {
return db
.prepare(
"SELECT pattern, tier, success_count, fail_count, updated_at FROM routing_history",
)
.all();
}
/**
* Get routing history rows for a specific pattern.
* Purpose: targeted pattern lookup for adaptive tier queries.
* Consumer: routing-history.js getRoutingHistoryForPattern.
*/
export function getRoutingHistoryForPattern(db, pattern) {
return db
.prepare(
"SELECT tier, success_count, fail_count FROM routing_history WHERE pattern = ?",
)
.all(pattern);
}
/**
* Insert a routing feedback signal into the audit table.
* Purpose: persist user feedback for later analysis and weighted outcome application.
* Consumer: routing-history.js recordFeedback.
*/
export function insertRoutingFeedback(db, pattern, tier, feedback) {
db.prepare(
`INSERT INTO routing_feedback (pattern, tier, feedback, recorded_at)
VALUES (:pattern, :tier, :feedback, :recorded_at)`,
).run({
":pattern": pattern,
":tier": tier,
":feedback": feedback,
":recorded_at": new Date().toISOString(),
});
}
/**
* Clear all routing history and feedback rows.
* Purpose: full reset of adaptive learning state on user request.
* Consumer: routing-history.js clearRoutingHistory.
*/
export function clearRoutingHistory(db) {
db.prepare("DELETE FROM routing_history").run();
db.prepare("DELETE FROM routing_feedback").run();
}
// ─── Unit Metrics CRUD ────────────────────────────────────────────────────────
function rowToUnitMetrics(row) {
const unit = {
type: row["type"],
id: row["id"],
model: row["model"],
startedAt: row["started_at"],
finishedAt: row["finished_at"],
tokens: {
input: row["tokens_input"],
output: row["tokens_output"],
cacheRead: row["tokens_cache_read"],
cacheWrite: row["tokens_cache_write"],
total: row["tokens_total"],
},
cost: row["cost"],
toolCalls: row["tool_calls"],
assistantMessages: row["assistant_messages"],
userMessages: row["user_messages"],
apiRequests: row["api_requests"],
};
if (row["auto_session_key"] != null)
unit.autoSessionKey = row["auto_session_key"];
if (row["tier"] != null) unit.tier = row["tier"];
if (row["model_downgraded"] != null)
unit.modelDowngraded = row["model_downgraded"] === 1;
if (row["context_window_tokens"] != null)
unit.contextWindowTokens = row["context_window_tokens"];
if (row["truncation_sections"] != null)
unit.truncationSections = row["truncation_sections"];
if (row["continue_here_fired"] != null)
unit.continueHereFired = row["continue_here_fired"] === 1;
if (row["prompt_char_count"] != null)
unit.promptCharCount = row["prompt_char_count"];
if (row["baseline_char_count"] != null)
unit.baselineCharCount = row["baseline_char_count"];
if (row["cache_hit_rate"] != null) unit.cacheHitRate = row["cache_hit_rate"];
if (row["skills"] != null) unit.skills = JSON.parse(row["skills"]);
return unit;
}
/**
* Upsert a single unit metrics record into the DB.
*
* Purpose: persist per-unit token/cost telemetry from autonomous mode so
* history, cost, and export commands can read from the canonical DB store
* instead of a fragile JSON file on disk.
*
* Consumer: metrics.js saveLedger (called after every unit snapshot).
*/
export function upsertUnitMetrics(db, unit) {
db.prepare(
`INSERT OR REPLACE INTO unit_metrics (
type, id, started_at, finished_at, model, auto_session_key,
tokens_input, tokens_output, tokens_cache_read, tokens_cache_write, tokens_total,
cost, tool_calls, assistant_messages, user_messages, api_requests,
tier, model_downgraded, context_window_tokens, truncation_sections,
continue_here_fired, prompt_char_count, baseline_char_count, cache_hit_rate, skills
) VALUES (
:type, :id, :started_at, :finished_at, :model, :auto_session_key,
:tokens_input, :tokens_output, :tokens_cache_read, :tokens_cache_write, :tokens_total,
:cost, :tool_calls, :assistant_messages, :user_messages, :api_requests,
:tier, :model_downgraded, :context_window_tokens, :truncation_sections,
:continue_here_fired, :prompt_char_count, :baseline_char_count, :cache_hit_rate, :skills
)`,
).run({
":type": unit.type,
":id": unit.id,
":started_at": unit.startedAt,
":finished_at": unit.finishedAt,
":model": unit.model,
":auto_session_key": unit.autoSessionKey ?? null,
":tokens_input": unit.tokens.input,
":tokens_output": unit.tokens.output,
":tokens_cache_read": unit.tokens.cacheRead,
":tokens_cache_write": unit.tokens.cacheWrite,
":tokens_total": unit.tokens.total,
":cost": unit.cost,
":tool_calls": unit.toolCalls,
":assistant_messages": unit.assistantMessages,
":user_messages": unit.userMessages,
":api_requests": unit.apiRequests ?? unit.assistantMessages,
":tier": unit.tier ?? null,
":model_downgraded":
unit.modelDowngraded != null ? (unit.modelDowngraded ? 1 : 0) : null,
":context_window_tokens": unit.contextWindowTokens ?? null,
":truncation_sections": unit.truncationSections ?? null,
":continue_here_fired":
unit.continueHereFired != null ? (unit.continueHereFired ? 1 : 0) : null,
":prompt_char_count": unit.promptCharCount ?? null,
":baseline_char_count": unit.baselineCharCount ?? null,
":cache_hit_rate": unit.cacheHitRate ?? null,
":skills": unit.skills != null ? JSON.stringify(unit.skills) : null,
});
}
/**
* Load all unit metrics ordered by started_at ASC (oldest first).
*
* Purpose: reconstruct the in-memory ledger from the canonical DB store
* on session init or on demand from history/cost commands.
*
* Consumer: metrics.js loadLedgerFromDisk and loadLedger.
*/
export function getAllUnitMetrics(db) {
return db
.prepare("SELECT * FROM unit_metrics ORDER BY started_at ASC")
.all()
.map(rowToUnitMetrics);
}
/**
* Delete oldest unit_metrics rows keeping only the N most recent by finished_at.
*
* Purpose: enforce a max-ledger-size cap so the DB doesn't bloat over long
* autonomous runs. Called by the doctor when the ledger exceeds its threshold.
*
* Consumer: metrics.js pruneMetricsLedger.
*/
export function pruneUnitMetrics(db, keepCount) {
db.prepare(
`DELETE FROM unit_metrics WHERE rowid NOT IN (
SELECT rowid FROM unit_metrics ORDER BY finished_at DESC LIMIT :keepCount
)`,
).run({ ":keepCount": keepCount });
}
/**
* Get the project start timestamp stored in project_metrics_meta.
*
* Purpose: surface when the autonomous run started for elapsed-time display.
*
* Consumer: metrics.js loadLedger and loadLedgerFromDisk.
*/
export function getProjectStartedAt(db) {
const row = db
.prepare(
"SELECT value FROM project_metrics_meta WHERE key = 'projectStartedAt'",
)
.get();
if (!row) return null;
const ts = Number(row["value"]);
return Number.isFinite(ts) ? ts : null;
}
/**
* Persist the project start timestamp in project_metrics_meta.
*
* Purpose: survive process restarts so the dashboard shows wall-clock elapsed
* time for the full autonomous session, not just the current process lifetime.
*
* Consumer: metrics.js initMetrics (via loadLedger defaultLedger path).
*/
export function setProjectStartedAt(db, ts) {
db.prepare(
`INSERT INTO project_metrics_meta (key, value) VALUES ('projectStartedAt', :value)
ON CONFLICT(key) DO UPDATE SET value = excluded.value`,
).run({ ":value": String(ts) });
}

View file

@ -223,7 +223,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
const version = db
.prepare("SELECT MAX(version) AS version FROM schema_version")
.get();
assert.equal(version.version, 52);
assert.equal(version.version, 54);
const taskSpec = db
.prepare(
"SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'",