feat: store judgment log in db

This commit is contained in:
Mikael Hugo 2026-05-07 06:22:07 +02:00
parent 2178aa8803
commit a2184a0a0e
5 changed files with 242 additions and 22 deletions

View file

@ -2,39 +2,51 @@
* Judgment log records agent decision-making during autonomous mode.
*
* When the agent makes a non-trivial call between alternatives, it logs a
* JudgmentEntry. These accumulate in .sf/runtime/judgment-log.jsonl and are
* surfaced at milestone close (low-confidence entries bubble into the summary).
* JudgmentEntry. These accumulate in the SQLite judgments table (schema v40+)
* with fallback to .sf/runtime/judgment-log.jsonl for legacy environments.
*
* Storage: sfRuntimeRoot(basePath)/judgment-log.jsonl runtime artifact.
* Storage: sf.db judgments table (preferred) or sfRuntimeRoot/judgment-log.jsonl (fallback).
*
* The tool `sf_log_judgment` (registered in dynamic-tools.ts or equivalent)
* calls appendJudgment(). readJudgmentLog() is used by the compounding step.
*/
import { appendFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
import { join } from "node:path";
import { sfRuntimeRoot } from "./paths.js";
import { sfRoot, sfRuntimeRoot } from "./paths.js";
import { getJudgmentsForUnit, insertJudgment, openDatabase } from "./sf-db.js";
const JUDGMENT_LOG_SCHEMA_VERSION = 1;
/**
* Append a single judgment entry to the judgment log JSONL file.
* Creates the file and parent directories on first call.
* Append a single judgment entry to the judgment log.
* Prefers SQLite; falls back to JSONL when DB is unavailable.
* Failure is non-fatal silently swallowed so the agent loop is not disrupted.
*/
export function appendJudgment(basePath, entry) {
const full = {
schemaVersion: JUDGMENT_LOG_SCHEMA_VERSION,
ts: new Date().toISOString(),
...entry,
};
try {
ensureJudgmentDb(basePath);
insertJudgment(full);
return;
} catch {
// Fall through to JSONL backup
}
// Fallback: JSONL file
try {
const logPath = resolveJudgmentLogPath(basePath);
mkdirSync(join(logPath, ".."), { recursive: true });
const full = {
schemaVersion: JUDGMENT_LOG_SCHEMA_VERSION,
ts: new Date().toISOString(),
...entry,
};
appendFileSync(logPath, JSON.stringify(full) + "\n", "utf-8");
} catch {
// Non-fatal — judgment logging must never break the agent loop
}
}
/**
* Read judgment log entries, optionally filtered by unitId prefix.
*
@ -43,6 +55,21 @@ export function appendJudgment(basePath, entry) {
* starts with this value are returned (e.g. "M001" returns all M001 entries)
*/
export function readJudgmentLog(basePath, unitId) {
try {
ensureJudgmentDb(basePath);
const rows = getJudgmentsForUnit(unitId ?? "", 1000);
if (rows.length > 0) {
return rows.map((r) => ({
schemaVersion: JUDGMENT_LOG_SCHEMA_VERSION,
...r,
alternatives: r.alternatives,
}));
}
} catch {
// Fall through to JSONL backup
}
// Fallback: JSONL file
const logPath = resolveJudgmentLogPath(basePath);
if (!existsSync(logPath)) return [];
try {
@ -82,3 +109,11 @@ function normalizeJudgmentEntry(entry) {
export function resolveJudgmentLogPath(basePath) {
return join(sfRuntimeRoot(basePath), "judgment-log.jsonl");
}
function ensureJudgmentDb(basePath) {
const dir = sfRoot(basePath);
mkdirSync(dir, { recursive: true });
if (!openDatabase(join(dir, "sf.db"))) {
throw new Error("SF judgment database unavailable");
}
}

View file

@ -78,7 +78,7 @@ function openRawDb(path) {
loadProvider();
return new DatabaseSync(path);
}
const SCHEMA_VERSION = 39;
const SCHEMA_VERSION = 40;
function indexExists(db, name) {
return !!db
.prepare(
@ -587,6 +587,20 @@ function initSchema(db, fileBacked) {
"CREATE INDEX IF NOT EXISTS idx_memories_category ON memories(superseded_by, category)",
);
db.exec(`
CREATE TABLE IF NOT EXISTS judgments (
id INTEGER PRIMARY KEY AUTOINCREMENT,
unit_id TEXT NOT NULL,
decision TEXT NOT NULL DEFAULT '',
alternatives_json TEXT NOT NULL DEFAULT '[]',
reasoning TEXT NOT NULL DEFAULT '',
confidence TEXT NOT NULL DEFAULT 'medium',
ts TEXT NOT NULL
)
`);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_judgments_unit_id ON judgments(unit_id, ts DESC)",
);
db.exec(`
CREATE TABLE IF NOT EXISTS milestones (
id TEXT PRIMARY KEY,
title TEXT NOT NULL DEFAULT '',
@ -2070,6 +2084,28 @@ function migrateSchema(db) {
":applied_at": new Date().toISOString(),
});
}
if (currentVersion < 40) {
db.exec(`
CREATE TABLE IF NOT EXISTS judgments (
id INTEGER PRIMARY KEY AUTOINCREMENT,
unit_id TEXT NOT NULL,
decision TEXT NOT NULL DEFAULT '',
alternatives_json TEXT NOT NULL DEFAULT '[]',
reasoning TEXT NOT NULL DEFAULT '',
confidence TEXT NOT NULL DEFAULT 'medium',
ts TEXT NOT NULL
)
`);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_judgments_unit_id ON judgments(unit_id, ts DESC)",
);
db.prepare(
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
).run({
":version": 40,
":applied_at": new Date().toISOString(),
});
}
db.exec("COMMIT");
} catch (err) {
db.exec("ROLLBACK");
@ -6125,6 +6161,53 @@ export function deleteMemorySourceRow(id) {
.run({ ":id": id });
return (res?.changes ?? 0) > 0;
}
// ─── Judgments ───────────────────────────────────────────────────────────────
export function insertJudgment(entry) {
if (!currentDb) return;
try {
currentDb
.prepare(`INSERT INTO judgments (unit_id, decision, alternatives_json, reasoning, confidence, ts)
VALUES (:unit_id, :decision, :alternatives_json, :reasoning, :confidence, :ts)`)
.run({
":unit_id": entry.unitId ?? "",
":decision": entry.decision ?? "",
":alternatives_json": JSON.stringify(entry.alternatives ?? []),
":reasoning": entry.reasoning ?? "",
":confidence": entry.confidence ?? "medium",
":ts": entry.ts ?? new Date().toISOString(),
});
} catch {
// Judgment logging is best-effort
}
}
export function getJudgmentsForUnit(unitIdPrefix, limit = 1000) {
if (!currentDb) return [];
try {
const rows = currentDb
.prepare(
`SELECT id, unit_id AS unitId, decision, alternatives_json AS alternativesJson, reasoning, confidence, ts
FROM judgments
WHERE unit_id LIKE :prefix
ORDER BY ts DESC
LIMIT :limit`,
)
.all({
":prefix": `${unitIdPrefix}%`,
":limit": limit,
});
return rows.map((r) => ({
id: r.id,
unitId: r.unitId,
decision: r.decision,
alternatives: parseJsonObject(r.alternativesJson, []),
reasoning: r.reasoning,
confidence: r.confidence,
ts: r.ts,
}));
} catch {
return [];
}
}
// ─── Memory Embeddings ───────────────────────────────────────────────────────
export function upsertMemoryEmbedding(args) {
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");

View file

@ -24,7 +24,7 @@ import {
} from "../autonomous-solver.js";
import { triageTodoDump } from "../commands-todo.js";
import { emitJournalEvent, queryJournal } from "../journal.js";
import { appendJudgment, readJudgmentLog } from "../judgment-log.js";
import { readJudgmentLog } from "../judgment-log.js";
import { ModelLearner } from "../model-learner.js";
import { createScheduleStore } from "../schedule/schedule-store.js";
import { closeDatabase } from "../sf-db.js";
@ -216,16 +216,9 @@ describe("SF JSONL schema versioning", () => {
assert.equal(legacy.runId, "legacy-run");
});
test("judgment_log_writes_schema_version_and_reads_legacy_rows", () => {
test("judgment_log_reads_legacy_jsonl_rows_as_version_1", () => {
const project = makeProject();
appendJudgment(project, {
unitId: "M001/S01/T01",
confidence: "high",
decision: "keep file-backed projection",
});
const path = join(project, ".sf", "judgment-log.jsonl");
assert.equal(readJsonl(path)[0].schemaVersion, 1);
writeFileSync(
path,

View file

@ -0,0 +1,74 @@
/**
* judgment-log-db.test.mjs DB-backed judgment log coverage.
*
* Purpose: prove agent judgments are persisted in the project SQLite DB and
* do not leak across projects when the process switches roots.
*/
import assert from "node:assert/strict";
import { mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, test } from "vitest";
import { appendJudgment, readJudgmentLog } from "../judgment-log.js";
import { closeDatabase, getJudgmentsForUnit, openDatabase } from "../sf-db.js";
const tmpRoots = [];
afterEach(() => {
closeDatabase();
for (const root of tmpRoots.splice(0)) {
rmSync(root, { recursive: true, force: true });
}
});
function makeProject() {
const root = mkdtempSync(join(tmpdir(), "sf-judgment-db-"));
tmpRoots.push(root);
return root;
}
test("appendJudgment_writes_to_project_db", () => {
const project = makeProject();
appendJudgment(project, {
unitId: "M001/S01/T01",
decision: "Use DB",
alternatives: ["JSONL"],
reasoning: "queryable state",
confidence: "high",
});
const entries = readJudgmentLog(project, "M001");
assert.equal(entries.length, 1);
assert.equal(entries[0].decision, "Use DB");
assert.deepEqual(entries[0].alternatives, ["JSONL"]);
});
test("appendJudgment_when_switching_projects_uses_current_project_db", () => {
const first = makeProject();
const second = makeProject();
appendJudgment(first, {
unitId: "M001/S01/T01",
decision: "first",
confidence: "high",
});
appendJudgment(second, {
unitId: "M001/S01/T01",
decision: "second",
confidence: "high",
});
closeDatabase();
openDatabase(join(first, ".sf", "sf.db"));
assert.deepEqual(
getJudgmentsForUnit("M001").map((entry) => entry.decision),
["first"],
);
closeDatabase();
openDatabase(join(second, ".sf", "sf.db"));
assert.deepEqual(
getJudgmentsForUnit("M001").map((entry) => entry.decision),
["second"],
);
});

View file

@ -13,8 +13,10 @@ import { afterEach, test } from "vitest";
import {
closeDatabase,
getDatabase,
getJudgmentsForUnit,
getScheduleEntries,
insertGateRun,
insertJudgment,
insertScheduleEntry,
openDatabase,
} from "../sf-db.js";
@ -201,7 +203,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
const version = db
.prepare("SELECT MAX(version) AS version FROM schema_version")
.get();
assert.equal(version.version, 39);
assert.equal(version.version, 40);
const taskSpec = db
.prepare(
"SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'",
@ -308,3 +310,36 @@ test("openDatabase_memory_indexes_exist", () => {
"should have idx_memory_sources_content_hash",
);
});
test("openDatabase_judgments_table_round_trip", () => {
assert.equal(openDatabase(":memory:"), true);
insertJudgment({
unitId: "M001/S01/T01",
decision: "Use SQLite over JSONL",
alternatives: ["Keep JSONL", "Use JSONL with schema"],
reasoning: "SQLite gives us querying and indexing",
confidence: "high",
ts: "2026-05-07T00:00:00.000Z",
});
insertJudgment({
unitId: "M001/S01/T02",
decision: "Add tags column",
alternatives: ["Separate tags table"],
reasoning: "Simpler schema",
confidence: "medium",
ts: "2026-05-07T00:01:00.000Z",
});
const m001 = getJudgmentsForUnit("M001", 100);
assert.equal(m001.length, 2);
assert.equal(m001[0].unitId, "M001/S01/T02"); // DESC order
assert.equal(m001[1].unitId, "M001/S01/T01");
assert.deepEqual(m001[1].alternatives, [
"Keep JSONL",
"Use JSONL with schema",
]);
const t01 = getJudgmentsForUnit("M001/S01/T01", 100);
assert.equal(t01.length, 1);
assert.equal(t01[0].confidence, "high");
});