feat: store judgment log in db
This commit is contained in:
parent
2178aa8803
commit
a2184a0a0e
5 changed files with 242 additions and 22 deletions
|
|
@ -2,39 +2,51 @@
|
|||
* Judgment log — records agent decision-making during autonomous mode.
|
||||
*
|
||||
* When the agent makes a non-trivial call between alternatives, it logs a
|
||||
* JudgmentEntry. These accumulate in .sf/runtime/judgment-log.jsonl and are
|
||||
* surfaced at milestone close (low-confidence entries bubble into the summary).
|
||||
* JudgmentEntry. These accumulate in the SQLite judgments table (schema v40+)
|
||||
* with fallback to .sf/runtime/judgment-log.jsonl for legacy environments.
|
||||
*
|
||||
* Storage: sfRuntimeRoot(basePath)/judgment-log.jsonl — runtime artifact.
|
||||
* Storage: sf.db judgments table (preferred) or sfRuntimeRoot/judgment-log.jsonl (fallback).
|
||||
*
|
||||
* The tool `sf_log_judgment` (registered in dynamic-tools.ts or equivalent)
|
||||
* calls appendJudgment(). readJudgmentLog() is used by the compounding step.
|
||||
*/
|
||||
import { appendFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { sfRuntimeRoot } from "./paths.js";
|
||||
import { sfRoot, sfRuntimeRoot } from "./paths.js";
|
||||
import { getJudgmentsForUnit, insertJudgment, openDatabase } from "./sf-db.js";
|
||||
|
||||
const JUDGMENT_LOG_SCHEMA_VERSION = 1;
|
||||
|
||||
/**
|
||||
* Append a single judgment entry to the judgment log JSONL file.
|
||||
* Creates the file and parent directories on first call.
|
||||
* Append a single judgment entry to the judgment log.
|
||||
* Prefers SQLite; falls back to JSONL when DB is unavailable.
|
||||
* Failure is non-fatal — silently swallowed so the agent loop is not disrupted.
|
||||
*/
|
||||
export function appendJudgment(basePath, entry) {
|
||||
const full = {
|
||||
schemaVersion: JUDGMENT_LOG_SCHEMA_VERSION,
|
||||
ts: new Date().toISOString(),
|
||||
...entry,
|
||||
};
|
||||
|
||||
try {
|
||||
ensureJudgmentDb(basePath);
|
||||
insertJudgment(full);
|
||||
return;
|
||||
} catch {
|
||||
// Fall through to JSONL backup
|
||||
}
|
||||
|
||||
// Fallback: JSONL file
|
||||
try {
|
||||
const logPath = resolveJudgmentLogPath(basePath);
|
||||
mkdirSync(join(logPath, ".."), { recursive: true });
|
||||
const full = {
|
||||
schemaVersion: JUDGMENT_LOG_SCHEMA_VERSION,
|
||||
ts: new Date().toISOString(),
|
||||
...entry,
|
||||
};
|
||||
appendFileSync(logPath, JSON.stringify(full) + "\n", "utf-8");
|
||||
} catch {
|
||||
// Non-fatal — judgment logging must never break the agent loop
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read judgment log entries, optionally filtered by unitId prefix.
|
||||
*
|
||||
|
|
@ -43,6 +55,21 @@ export function appendJudgment(basePath, entry) {
|
|||
* starts with this value are returned (e.g. "M001" returns all M001 entries)
|
||||
*/
|
||||
export function readJudgmentLog(basePath, unitId) {
|
||||
try {
|
||||
ensureJudgmentDb(basePath);
|
||||
const rows = getJudgmentsForUnit(unitId ?? "", 1000);
|
||||
if (rows.length > 0) {
|
||||
return rows.map((r) => ({
|
||||
schemaVersion: JUDGMENT_LOG_SCHEMA_VERSION,
|
||||
...r,
|
||||
alternatives: r.alternatives,
|
||||
}));
|
||||
}
|
||||
} catch {
|
||||
// Fall through to JSONL backup
|
||||
}
|
||||
|
||||
// Fallback: JSONL file
|
||||
const logPath = resolveJudgmentLogPath(basePath);
|
||||
if (!existsSync(logPath)) return [];
|
||||
try {
|
||||
|
|
@ -82,3 +109,11 @@ function normalizeJudgmentEntry(entry) {
|
|||
export function resolveJudgmentLogPath(basePath) {
|
||||
return join(sfRuntimeRoot(basePath), "judgment-log.jsonl");
|
||||
}
|
||||
|
||||
function ensureJudgmentDb(basePath) {
|
||||
const dir = sfRoot(basePath);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
if (!openDatabase(join(dir, "sf.db"))) {
|
||||
throw new Error("SF judgment database unavailable");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ function openRawDb(path) {
|
|||
loadProvider();
|
||||
return new DatabaseSync(path);
|
||||
}
|
||||
const SCHEMA_VERSION = 39;
|
||||
const SCHEMA_VERSION = 40;
|
||||
function indexExists(db, name) {
|
||||
return !!db
|
||||
.prepare(
|
||||
|
|
@ -587,6 +587,20 @@ function initSchema(db, fileBacked) {
|
|||
"CREATE INDEX IF NOT EXISTS idx_memories_category ON memories(superseded_by, category)",
|
||||
);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS judgments (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
unit_id TEXT NOT NULL,
|
||||
decision TEXT NOT NULL DEFAULT '',
|
||||
alternatives_json TEXT NOT NULL DEFAULT '[]',
|
||||
reasoning TEXT NOT NULL DEFAULT '',
|
||||
confidence TEXT NOT NULL DEFAULT 'medium',
|
||||
ts TEXT NOT NULL
|
||||
)
|
||||
`);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_judgments_unit_id ON judgments(unit_id, ts DESC)",
|
||||
);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS milestones (
|
||||
id TEXT PRIMARY KEY,
|
||||
title TEXT NOT NULL DEFAULT '',
|
||||
|
|
@ -2070,6 +2084,28 @@ function migrateSchema(db) {
|
|||
":applied_at": new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
if (currentVersion < 40) {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS judgments (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
unit_id TEXT NOT NULL,
|
||||
decision TEXT NOT NULL DEFAULT '',
|
||||
alternatives_json TEXT NOT NULL DEFAULT '[]',
|
||||
reasoning TEXT NOT NULL DEFAULT '',
|
||||
confidence TEXT NOT NULL DEFAULT 'medium',
|
||||
ts TEXT NOT NULL
|
||||
)
|
||||
`);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_judgments_unit_id ON judgments(unit_id, ts DESC)",
|
||||
);
|
||||
db.prepare(
|
||||
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
|
||||
).run({
|
||||
":version": 40,
|
||||
":applied_at": new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
db.exec("COMMIT");
|
||||
} catch (err) {
|
||||
db.exec("ROLLBACK");
|
||||
|
|
@ -6125,6 +6161,53 @@ export function deleteMemorySourceRow(id) {
|
|||
.run({ ":id": id });
|
||||
return (res?.changes ?? 0) > 0;
|
||||
}
|
||||
// ─── Judgments ───────────────────────────────────────────────────────────────
|
||||
export function insertJudgment(entry) {
|
||||
if (!currentDb) return;
|
||||
try {
|
||||
currentDb
|
||||
.prepare(`INSERT INTO judgments (unit_id, decision, alternatives_json, reasoning, confidence, ts)
|
||||
VALUES (:unit_id, :decision, :alternatives_json, :reasoning, :confidence, :ts)`)
|
||||
.run({
|
||||
":unit_id": entry.unitId ?? "",
|
||||
":decision": entry.decision ?? "",
|
||||
":alternatives_json": JSON.stringify(entry.alternatives ?? []),
|
||||
":reasoning": entry.reasoning ?? "",
|
||||
":confidence": entry.confidence ?? "medium",
|
||||
":ts": entry.ts ?? new Date().toISOString(),
|
||||
});
|
||||
} catch {
|
||||
// Judgment logging is best-effort
|
||||
}
|
||||
}
|
||||
export function getJudgmentsForUnit(unitIdPrefix, limit = 1000) {
|
||||
if (!currentDb) return [];
|
||||
try {
|
||||
const rows = currentDb
|
||||
.prepare(
|
||||
`SELECT id, unit_id AS unitId, decision, alternatives_json AS alternativesJson, reasoning, confidence, ts
|
||||
FROM judgments
|
||||
WHERE unit_id LIKE :prefix
|
||||
ORDER BY ts DESC
|
||||
LIMIT :limit`,
|
||||
)
|
||||
.all({
|
||||
":prefix": `${unitIdPrefix}%`,
|
||||
":limit": limit,
|
||||
});
|
||||
return rows.map((r) => ({
|
||||
id: r.id,
|
||||
unitId: r.unitId,
|
||||
decision: r.decision,
|
||||
alternatives: parseJsonObject(r.alternativesJson, []),
|
||||
reasoning: r.reasoning,
|
||||
confidence: r.confidence,
|
||||
ts: r.ts,
|
||||
}));
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
// ─── Memory Embeddings ───────────────────────────────────────────────────────
|
||||
export function upsertMemoryEmbedding(args) {
|
||||
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ import {
|
|||
} from "../autonomous-solver.js";
|
||||
import { triageTodoDump } from "../commands-todo.js";
|
||||
import { emitJournalEvent, queryJournal } from "../journal.js";
|
||||
import { appendJudgment, readJudgmentLog } from "../judgment-log.js";
|
||||
import { readJudgmentLog } from "../judgment-log.js";
|
||||
import { ModelLearner } from "../model-learner.js";
|
||||
import { createScheduleStore } from "../schedule/schedule-store.js";
|
||||
import { closeDatabase } from "../sf-db.js";
|
||||
|
|
@ -216,16 +216,9 @@ describe("SF JSONL schema versioning", () => {
|
|||
assert.equal(legacy.runId, "legacy-run");
|
||||
});
|
||||
|
||||
test("judgment_log_writes_schema_version_and_reads_legacy_rows", () => {
|
||||
test("judgment_log_reads_legacy_jsonl_rows_as_version_1", () => {
|
||||
const project = makeProject();
|
||||
appendJudgment(project, {
|
||||
unitId: "M001/S01/T01",
|
||||
confidence: "high",
|
||||
decision: "keep file-backed projection",
|
||||
});
|
||||
|
||||
const path = join(project, ".sf", "judgment-log.jsonl");
|
||||
assert.equal(readJsonl(path)[0].schemaVersion, 1);
|
||||
|
||||
writeFileSync(
|
||||
path,
|
||||
|
|
|
|||
74
src/resources/extensions/sf/tests/judgment-log-db.test.mjs
Normal file
74
src/resources/extensions/sf/tests/judgment-log-db.test.mjs
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
/**
|
||||
* judgment-log-db.test.mjs — DB-backed judgment log coverage.
|
||||
*
|
||||
* Purpose: prove agent judgments are persisted in the project SQLite DB and
|
||||
* do not leak across projects when the process switches roots.
|
||||
*/
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdtempSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, test } from "vitest";
|
||||
import { appendJudgment, readJudgmentLog } from "../judgment-log.js";
|
||||
import { closeDatabase, getJudgmentsForUnit, openDatabase } from "../sf-db.js";
|
||||
|
||||
const tmpRoots = [];
|
||||
|
||||
afterEach(() => {
|
||||
closeDatabase();
|
||||
for (const root of tmpRoots.splice(0)) {
|
||||
rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
function makeProject() {
|
||||
const root = mkdtempSync(join(tmpdir(), "sf-judgment-db-"));
|
||||
tmpRoots.push(root);
|
||||
return root;
|
||||
}
|
||||
|
||||
test("appendJudgment_writes_to_project_db", () => {
|
||||
const project = makeProject();
|
||||
|
||||
appendJudgment(project, {
|
||||
unitId: "M001/S01/T01",
|
||||
decision: "Use DB",
|
||||
alternatives: ["JSONL"],
|
||||
reasoning: "queryable state",
|
||||
confidence: "high",
|
||||
});
|
||||
|
||||
const entries = readJudgmentLog(project, "M001");
|
||||
assert.equal(entries.length, 1);
|
||||
assert.equal(entries[0].decision, "Use DB");
|
||||
assert.deepEqual(entries[0].alternatives, ["JSONL"]);
|
||||
});
|
||||
|
||||
test("appendJudgment_when_switching_projects_uses_current_project_db", () => {
|
||||
const first = makeProject();
|
||||
const second = makeProject();
|
||||
|
||||
appendJudgment(first, {
|
||||
unitId: "M001/S01/T01",
|
||||
decision: "first",
|
||||
confidence: "high",
|
||||
});
|
||||
appendJudgment(second, {
|
||||
unitId: "M001/S01/T01",
|
||||
decision: "second",
|
||||
confidence: "high",
|
||||
});
|
||||
|
||||
closeDatabase();
|
||||
openDatabase(join(first, ".sf", "sf.db"));
|
||||
assert.deepEqual(
|
||||
getJudgmentsForUnit("M001").map((entry) => entry.decision),
|
||||
["first"],
|
||||
);
|
||||
closeDatabase();
|
||||
openDatabase(join(second, ".sf", "sf.db"));
|
||||
assert.deepEqual(
|
||||
getJudgmentsForUnit("M001").map((entry) => entry.decision),
|
||||
["second"],
|
||||
);
|
||||
});
|
||||
|
|
@ -13,8 +13,10 @@ import { afterEach, test } from "vitest";
|
|||
import {
|
||||
closeDatabase,
|
||||
getDatabase,
|
||||
getJudgmentsForUnit,
|
||||
getScheduleEntries,
|
||||
insertGateRun,
|
||||
insertJudgment,
|
||||
insertScheduleEntry,
|
||||
openDatabase,
|
||||
} from "../sf-db.js";
|
||||
|
|
@ -201,7 +203,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
|
|||
const version = db
|
||||
.prepare("SELECT MAX(version) AS version FROM schema_version")
|
||||
.get();
|
||||
assert.equal(version.version, 39);
|
||||
assert.equal(version.version, 40);
|
||||
const taskSpec = db
|
||||
.prepare(
|
||||
"SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'",
|
||||
|
|
@ -308,3 +310,36 @@ test("openDatabase_memory_indexes_exist", () => {
|
|||
"should have idx_memory_sources_content_hash",
|
||||
);
|
||||
});
|
||||
|
||||
test("openDatabase_judgments_table_round_trip", () => {
|
||||
assert.equal(openDatabase(":memory:"), true);
|
||||
insertJudgment({
|
||||
unitId: "M001/S01/T01",
|
||||
decision: "Use SQLite over JSONL",
|
||||
alternatives: ["Keep JSONL", "Use JSONL with schema"],
|
||||
reasoning: "SQLite gives us querying and indexing",
|
||||
confidence: "high",
|
||||
ts: "2026-05-07T00:00:00.000Z",
|
||||
});
|
||||
insertJudgment({
|
||||
unitId: "M001/S01/T02",
|
||||
decision: "Add tags column",
|
||||
alternatives: ["Separate tags table"],
|
||||
reasoning: "Simpler schema",
|
||||
confidence: "medium",
|
||||
ts: "2026-05-07T00:01:00.000Z",
|
||||
});
|
||||
|
||||
const m001 = getJudgmentsForUnit("M001", 100);
|
||||
assert.equal(m001.length, 2);
|
||||
assert.equal(m001[0].unitId, "M001/S01/T02"); // DESC order
|
||||
assert.equal(m001[1].unitId, "M001/S01/T01");
|
||||
assert.deepEqual(m001[1].alternatives, [
|
||||
"Keep JSONL",
|
||||
"Use JSONL with schema",
|
||||
]);
|
||||
|
||||
const t01 = getJudgmentsForUnit("M001/S01/T01", 100);
|
||||
assert.equal(t01.length, 1);
|
||||
assert.equal(t01[0].confidence, "high");
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue