From 3e5b6fc5116ec3baff1f4a0ea11bc2e21e7c5266 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sun, 17 May 2026 15:06:40 +0200 Subject: [PATCH] fix: reconcile iteration completion drift --- .../sf-db/iteration-completion-reconciler.js | 271 ++++++++++++++++ .../extensions/sf/sf-db/sf-db-tasks.js | 114 +++++++ .../tests/iter-completion-reconciler.test.mjs | 288 ++++++++++++++++++ .../update-task-status-revert-safety.test.mjs | 176 +++++++++++ .../sf/uok/gate-registry-bootstrap.js | 2 + 5 files changed, 851 insertions(+) create mode 100644 src/resources/extensions/sf/sf-db/iteration-completion-reconciler.js create mode 100644 src/resources/extensions/sf/tests/iter-completion-reconciler.test.mjs create mode 100644 src/resources/extensions/sf/tests/update-task-status-revert-safety.test.mjs diff --git a/src/resources/extensions/sf/sf-db/iteration-completion-reconciler.js b/src/resources/extensions/sf/sf-db/iteration-completion-reconciler.js new file mode 100644 index 000000000..d1d8a94be --- /dev/null +++ b/src/resources/extensions/sf/sf-db/iteration-completion-reconciler.js @@ -0,0 +1,271 @@ +/** + * iteration-completion-reconciler.js — heal iter↔DB completion drift. + * + * Purpose: when .sf/runtime/autonomous-solver/iterations.jsonl shows + * outcome=complete for a unit, ensure the DB tasks row matches. If the row + * is status=pending OR completed_at is null OR an older ts, update it to + * match the iter timestamp. + * + * Consumer: UokGateRegistry (type=verification, id=iter-completion-reconciler). + * + * Failure boundary: read-only on iter; write-only on tasks (status, + * completed_at, verification_status). Slice/milestone flip handled by + * existing sweepers once tasks are correct. + */ +import { appendFileSync, existsSync, mkdirSync, readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; + +// ─── Constants ───────────────────────────────────────────────────────────── + +const DEFAULT_MAX_ITER_ENTRIES = 500; +const DEFAULT_MAX_AGE_MS = 3_600_000; // 1 hour — older drift is operator territory +const ITER_JSONL_SUBPATH = join( + "runtime", + "autonomous-solver", + "iterations.jsonl", +); +const SELF_FEEDBACK_SUBPATH = join("self-feedback.jsonl"); + +// ─── Helpers ─────────────────────────────────────────────────────────────── + +function sfRootDir(basePath) { + return join(basePath, ".sf"); +} + +function iterPath(basePath) { + return join(sfRootDir(basePath), ITER_JSONL_SUBPATH); +} + +function selfFeedbackPath(basePath) { + return join(sfRootDir(basePath), SELF_FEEDBACK_SUBPATH); +} + +function newId() { + const ts = Date.now().toString(36); + const rnd = Math.random().toString(36).slice(2, 8); + return `sf-${ts}-${rnd}`; +} + +/** + * Parse "M048/S02/T01" into { milestoneId, sliceId, taskId }. + * Returns null if the string doesn't have all three segments (skip non-task units). + */ +function parseTaskUnitId(unitId) { + if (typeof unitId !== "string") return null; + const parts = unitId.split("/"); + if (parts.length < 3) return null; + const [milestoneId, sliceId, taskId] = parts; + // Must look like M\d+ / S\d+ / T\d+ (or alphanumeric suffix) + if (!milestoneId || !sliceId || !taskId) return null; + // Skip research-slice units: no T-prefixed part + if (!taskId.match(/^T/i)) return null; + return { milestoneId, sliceId, taskId }; +} + +/** + * Read iterations.jsonl, parse lines (most-recent-last), return raw objects. + * Silently skips malformed lines. + */ +function readIterEntries(basePath) { + const path = iterPath(basePath); + if (!existsSync(path)) return []; + const raw = readFileSync(path, "utf-8"); + const lines = raw.split("\n").filter((l) => l.trim()); + const entries = []; + for (const line of lines) { + try { + const parsed = JSON.parse(line); + if (parsed && typeof parsed === "object") entries.push(parsed); + } catch { + /* skip malformed lines */ + } + } + return entries; +} + +/** + * From a list of iter entries (all outcomes), find the latest entry per unitId + * where outcome === "complete". Returns Map. + * + * Scans backward (tail-first) so we find the latest per unit quickly. + */ +function latestCompleteByUnit(entries, maxEntries, maxAgeMs) { + const cutoffTs = Date.now() - maxAgeMs; + // Take the last maxEntries lines + const window = entries.slice(-maxEntries); + const byUnit = new Map(); + // Iterate in reverse to find latest first + for (let i = window.length - 1; i >= 0; i--) { + const entry = window[i]; + if (entry.outcome !== "complete") continue; + const unitId = entry.unitId; + if (typeof unitId !== "string") continue; + // Age filter on entry timestamp + const entryMs = entry.ts ? new Date(entry.ts).getTime() : 0; + if (!entryMs || entryMs < cutoffTs) continue; + // Only store the first (latest) complete entry per unit in this backward pass + if (!byUnit.has(unitId)) { + byUnit.set(unitId, entry); + } + } + return byUnit; +} + +/** + * Append a self-feedback entry to .sf/self-feedback.jsonl (non-fatal; swallows errors). + */ +function appendSelfFeedback(basePath, unitId, iterTs) { + try { + const path = selfFeedbackPath(basePath); + const dir = dirname(path); + if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); + const entry = { + id: newId(), + ts: new Date().toISOString(), + kind: "iter-vs-db-drift-recovered", + severity: "low", + blocking: false, + summary: `iter-vs-db drift auto-healed for ${unitId}`, + evidence: `iterations.jsonl showed outcome=complete at ${iterTs}; DB row was behind`, + unitId, + iterTs, + }; + appendFileSync(path, `${JSON.stringify(entry)}\n`, "utf-8"); + } catch { + /* non-fatal */ + } +} + +// ─── Core reconciler ─────────────────────────────────────────────────────── + +/** + * Reconcile iter↔DB completion drift. + * + * @param {string} basePath — project root (directory containing .sf/) + * @param {object} [options] + * @param {number} [options.maxIterEntries=500] — tail window size + * @param {number} [options.maxAgeMs=3600000] — ignore iter entries older than this + * @param {boolean} [options.dryRun=false] — skip writes when true + * @param {object} [options.db] — optional pre-opened adapter (tests inject this) + * @returns {{ reconciled: Array, totalChecked: number, durationMs: number }} + */ +export async function reconcileIterCompletions(basePath, options = {}) { + const t0 = Date.now(); + const maxEntries = options.maxIterEntries ?? DEFAULT_MAX_ITER_ENTRIES; + const maxAgeMs = options.maxAgeMs ?? DEFAULT_MAX_AGE_MS; + const dryRun = options.dryRun === true; + + // Resolve the DB adapter: prefer injected (for tests), fall back to module singleton + let db = options.db ?? null; + if (!db) { + try { + const { _getAdapter } = await import("./sf-db-core.js"); + db = _getAdapter(); + } catch { + /* DB not available */ + } + } + + const reconciled = []; + + // 1. Read iterations + const entries = readIterEntries(basePath); + const completeByUnit = latestCompleteByUnit(entries, maxEntries, maxAgeMs); + + // 2. For each unit with a complete iter, check the DB row + let totalChecked = 0; + for (const [unitId, iterEntry] of completeByUnit) { + const parsed = parseTaskUnitId(unitId); + if (!parsed) continue; // non-task unit — skip + totalChecked++; + + const { milestoneId, sliceId, taskId } = parsed; + const iterTs = iterEntry.ts ?? null; + + if (!db) continue; // no DB open — nothing to reconcile + + // Query the task row + let row; + try { + row = db + .prepare( + "SELECT status, completed_at FROM tasks WHERE milestone_id = :m AND slice_id = :s AND id = :t", + ) + .get({ ":m": milestoneId, ":s": sliceId, ":t": taskId }); + } catch { + continue; // table may not exist in tests without full schema — skip + } + + if (!row) continue; // no matching row — nothing to fix + + const dbStatus = row.status; + const dbCompletedAt = row.completed_at; + + // Determine whether we need to update: + // - DB is not "complete", OR + // - DB is "complete" but completed_at is null or OLDER than iter ts + const needsUpdate = (() => { + if (dbStatus !== "complete") return true; + if (!dbCompletedAt) return true; + if (!iterTs) return false; + // DB ahead of iter? Don't touch it. + const dbMs = new Date(dbCompletedAt).getTime(); + const iterMs = new Date(iterTs).getTime(); + return iterMs > dbMs; + })(); + + if (!needsUpdate) continue; + + if (!dryRun) { + try { + db.prepare( + `UPDATE tasks SET status = 'complete', completed_at = :ca, verification_status = 'all_pass' + WHERE milestone_id = :m AND slice_id = :s AND id = :t`, + ).run({ + ":ca": iterTs ?? new Date().toISOString(), + ":m": milestoneId, + ":s": sliceId, + ":t": taskId, + }); + appendSelfFeedback(basePath, unitId, iterTs); + } catch { + continue; // non-fatal per-row failure + } + } + + reconciled.push({ + unitId, + oldStatus: dbStatus, + newStatus: "complete", + source: "iterations.jsonl", + }); + } + + return { + reconciled, + totalChecked, + durationMs: Date.now() - t0, + }; +} + +// ─── UOK Gate wrapper ────────────────────────────────────────────────────── + +export const iterCompletionReconcilerGate = { + id: "iter-completion-reconciler", + type: "verification", + async execute(ctx) { + const result = await reconcileIterCompletions(ctx.basePath, ctx.options); + return result.reconciled.length === 0 + ? { + outcome: "pass", + failureClass: null, + rationale: `iter-vs-db reconciled: 0 of ${result.totalChecked} tasks needed update`, + } + : { + outcome: "manual-attention", + failureClass: "verification", + rationale: `iter-vs-db reconciled ${result.reconciled.length} of ${result.totalChecked} drift(ed) tasks`, + findings: result.reconciled, + }; + }, +}; diff --git a/src/resources/extensions/sf/sf-db/sf-db-tasks.js b/src/resources/extensions/sf/sf-db/sf-db-tasks.js index 7fd0a71d0..6a0120b4e 100644 --- a/src/resources/extensions/sf/sf-db/sf-db-tasks.js +++ b/src/resources/extensions/sf/sf-db/sf-db-tasks.js @@ -1,3 +1,5 @@ +import { appendFileSync, existsSync, readFileSync, statSync } from "node:fs"; +import { join } from "node:path"; import { SF_STALE_STATE, SFError } from "../errors.js"; import { normalizeSchedulerStatus, @@ -15,6 +17,81 @@ import { transaction, } from "./sf-db-core.js"; +// ─── Layer B: iter-truth revert guard ───────────────────────────────────────── + +const SF_REVERT_BLOCK_WINDOW_MS_DEFAULT = 1800000; // 30 minutes + +/** + * Scan iterations.jsonl backwards for the most recent entry whose unitId + * matches `//` and return `{outcome, ts}`. + * Returns null when the file is absent, unreadable, or no match is found. + * Capped at last 2000 lines / 2 MB. + */ +function _findLatestIterOutcomeForUnit(basePath, milestoneId, sliceId, taskId) { + const iterPath = join( + basePath, + ".sf", + "runtime", + "autonomous-solver", + "iterations.jsonl", + ); + if (!existsSync(iterPath)) return null; + try { + const MAX_BYTES = 2 * 1024 * 1024; + const stat = statSync(iterPath); + // File is small in practice; if somehow large, clamp to last 2 MB via string slice + let raw = readFileSync(iterPath, "utf-8"); + if (stat.size > MAX_BYTES) { + raw = raw.slice(raw.length - MAX_BYTES); + } + const unitId = `${milestoneId}/${sliceId}/${taskId}`; + const lines = raw.split("\n").filter((l) => l.trim().length > 0); + // Scan from the end, cap at last 2000 lines + const tail = lines.slice(-2000); + for (let i = tail.length - 1; i >= 0; i--) { + try { + const entry = JSON.parse(tail[i]); + if (entry.unitId === unitId && entry.outcome) { + return { outcome: entry.outcome, ts: entry.ts }; + } + } catch { + // skip malformed lines + } + } + } catch { + // best-effort — never block on file read failure + } + return null; +} + +/** + * Append a revert-blocked self-feedback entry directly to + * `/.sf/self-feedback.jsonl` (best-effort, never throws). + */ +function _appendRevertBlockedFeedback(basePath, unitId, iterCompleteTs, attemptedRevert, stackFrame) { + try { + const ts = Date.now().toString(36); + const rnd = Math.random().toString(36).slice(2, 8); + const id = `sf-${ts}-${rnd}`; + const entry = { + schemaVersion: 1, + id, + ts: new Date().toISOString(), + kind: "revert-blocked-by-iter-truth", + severity: "medium", + summary: `Revert to "${attemptedRevert}" blocked for ${unitId} — iterations.jsonl shows outcome=complete at ${iterCompleteTs}`, + evidence: stackFrame ?? "(no stack frame)", + unitId, + iterCompleteTs, + attemptedRevert, + }; + const sfJsonlPath = join(basePath, ".sf", "self-feedback.jsonl"); + appendFileSync(sfJsonlPath, `${JSON.stringify(entry)}\n`, "utf-8"); + } catch { + // best-effort — never let feedback write block the error path + } +} + export function insertTask(t) { const currentDb = _getAdapter(); if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open"); @@ -155,6 +232,7 @@ export function updateTaskStatus( status, completedAt, purposeTrace, + { basePath = process.cwd() } = {}, ) { const currentDb = _getAdapter(); if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open"); @@ -166,6 +244,42 @@ export function updateTaskStatus( typeof purposeTrace === "string" && purposeTrace.trim().length > 0 ? purposeTrace.trim() : null; + // Layer B: block reverts when iterations.jsonl recently recorded outcome=complete + // for this (milestone, slice, task) triple (R072 + T02-clobber fix). + const isRevert = status !== "complete" && status !== "done" && status !== "in_progress"; + if (isRevert) { + const windowMs = + process.env.SF_REVERT_BLOCK_WINDOW_MS !== undefined + ? Number(process.env.SF_REVERT_BLOCK_WINDOW_MS) + : SF_REVERT_BLOCK_WINDOW_MS_DEFAULT; + if (windowMs > 0) { + const iterResult = _findLatestIterOutcomeForUnit( + basePath, + milestoneId, + sliceId, + taskId, + ); + if (iterResult && iterResult.outcome === "complete") { + const iterCompleteTs = iterResult.ts; + const iterCompleteMs = iterCompleteTs ? new Date(iterCompleteTs).getTime() : 0; + const ageMs = Date.now() - iterCompleteMs; + if (ageMs <= windowMs) { + const unitId = `${milestoneId}/${sliceId}/${taskId}`; + const stackLine = new Error().stack?.split("\n")[2]?.trim() ?? ""; + _appendRevertBlockedFeedback( + basePath, + unitId, + iterCompleteTs, + status, + stackLine, + ); + throw new Error( + `revert-blocked-by-iter-truth: ${unitId} has outcome=complete in iterations.jsonl at ${iterCompleteTs} (${Math.round(ageMs / 1000)}s ago, window=${windowMs}ms); attempted revert to "${status}"`, + ); + } + } + } + } // R072: When reverting a task to a non-complete status (e.g. the safety // file-change guard or hook retry path calls updateTaskStatus("pending")), // clear verification_status so it cannot drift out of sync with status. diff --git a/src/resources/extensions/sf/tests/iter-completion-reconciler.test.mjs b/src/resources/extensions/sf/tests/iter-completion-reconciler.test.mjs new file mode 100644 index 000000000..fd2f97157 --- /dev/null +++ b/src/resources/extensions/sf/tests/iter-completion-reconciler.test.mjs @@ -0,0 +1,288 @@ +/** + * iter-completion-reconciler.test.mjs — Layer A iter↔DB reconciliation sweeper tests. + * + * Purpose: verify reconcileIterCompletions and iterCompletionReconcilerGate + * behave correctly against tmp fixtures; never touches real .sf/ data. + */ +import { + mkdirSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { DatabaseSync } from "node:sqlite"; +import { afterEach, describe, expect, it } from "vitest"; +import { + iterCompletionReconcilerGate, + reconcileIterCompletions, +} from "../sf-db/iteration-completion-reconciler.js"; + +// ─── Fixtures ─────────────────────────────────────────────────────────────── + +const tmpRoots = []; + +afterEach(() => { + for (const root of tmpRoots.splice(0)) { + rmSync(root, { recursive: true, force: true }); + } +}); + +function makeProject() { + const root = mkdtempSync(join(tmpdir(), "sf-iter-reconciler-")); + tmpRoots.push(root); + mkdirSync(join(root, ".sf", "runtime", "autonomous-solver"), { + recursive: true, + }); + return root; +} + +function writeIterJsonl(root, entries) { + const path = join( + root, + ".sf", + "runtime", + "autonomous-solver", + "iterations.jsonl", + ); + const content = entries.map((e) => JSON.stringify(e)).join("\n") + "\n"; + writeFileSync(path, content, "utf-8"); +} + +/** + * Create a minimal in-memory SQLite DB with a tasks table. + * Returns the adapter object (prepare/exec/close interface). + */ +function makeMemDb() { + const raw = new DatabaseSync(":memory:"); + raw.exec(` + CREATE TABLE IF NOT EXISTS tasks ( + milestone_id TEXT NOT NULL, + slice_id TEXT NOT NULL, + id TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + completed_at TEXT DEFAULT NULL, + verification_status TEXT NOT NULL DEFAULT '', + PRIMARY KEY (milestone_id, slice_id, id) + ) + `); + + // Minimal adapter interface matching sf-db-core createAdapter output + const stmtCache = new Map(); + function wrapStmt(raw2) { + return { + run(...params) { + return raw2.run(...params); + }, + get(...params) { + const r = raw2.get(...params); + return r == null ? undefined : { ...r }; + }, + all(...params) { + return raw2.all(...params).map((r) => ({ ...r })); + }, + }; + } + return { + exec(sql) { + raw.exec(sql); + }, + prepare(sql) { + let cached = stmtCache.get(sql); + if (cached) return cached; + cached = wrapStmt(raw.prepare(sql)); + stmtCache.set(sql, cached); + return cached; + }, + close() { + stmtCache.clear(); + raw.close(); + }, + // Helper for tests + insertTask(milestoneId, sliceId, id, status, completedAt) { + raw + .prepare( + `INSERT INTO tasks (milestone_id, slice_id, id, status, completed_at) + VALUES (?, ?, ?, ?, ?)`, + ) + .run(milestoneId, sliceId, id, status, completedAt ?? null); + }, + getTask(milestoneId, sliceId, id) { + const r = raw + .prepare( + "SELECT * FROM tasks WHERE milestone_id = ? AND slice_id = ? AND id = ?", + ) + .get(milestoneId, sliceId, id); + return r == null ? undefined : { ...r }; + }, + }; +} + +const TS_X = "2026-05-17T12:42:05.618Z"; +const TS_OLDER = "2026-05-17T11:00:00.000Z"; +const TS_NEWER = "2026-05-17T14:00:00.000Z"; + +// ─── Tests ────────────────────────────────────────────────────────────────── + +describe("reconcileIterCompletions", () => { + it("empty_iter_and_empty_db_returns_no_reconciliations", async () => { + const root = makeProject(); + const db = makeMemDb(); + // No iter file written → empty + + const result = await reconcileIterCompletions(root, { db }); + + expect(result.reconciled).toEqual([]); + expect(result.totalChecked).toBe(0); + expect(typeof result.durationMs).toBe("number"); + db.close(); + }); + + it("iter_complete_db_pending_reconciler_flips_db_to_complete", async () => { + const root = makeProject(); + const db = makeMemDb(); + + db.insertTask("M999", "S99", "T99", "pending", null); + + writeIterJsonl(root, [ + { ts: TS_X, unitId: "M999/S99/T99", outcome: "complete", iteration: 1 }, + ]); + + const result = await reconcileIterCompletions(root, { db }); + + expect(result.reconciled).toHaveLength(1); + expect(result.reconciled[0].unitId).toBe("M999/S99/T99"); + expect(result.reconciled[0].oldStatus).toBe("pending"); + expect(result.reconciled[0].newStatus).toBe("complete"); + expect(result.reconciled[0].source).toBe("iterations.jsonl"); + expect(result.totalChecked).toBe(1); + + const row = db.getTask("M999", "S99", "T99"); + expect(row.status).toBe("complete"); + expect(row.completed_at).toBe(TS_X); + expect(row.verification_status).toBe("all_pass"); + + db.close(); + }); + + it("iter_complete_db_complete_with_newer_completed_at_no_change", async () => { + const root = makeProject(); + const db = makeMemDb(); + + // DB already has a newer completed_at — DB is ahead of iter, leave it + db.insertTask("M999", "S99", "T99", "complete", TS_NEWER); + + writeIterJsonl(root, [ + { ts: TS_X, unitId: "M999/S99/T99", outcome: "complete", iteration: 1 }, + ]); + + const result = await reconcileIterCompletions(root, { db }); + + expect(result.reconciled).toHaveLength(0); + expect(result.totalChecked).toBe(1); + + // DB row should be untouched + const row = db.getTask("M999", "S99", "T99"); + expect(row.completed_at).toBe(TS_NEWER); + + db.close(); + }); + + it("iter_research_slice_unit_no_T_part_is_skipped", async () => { + const root = makeProject(); + const db = makeMemDb(); + + writeIterJsonl(root, [ + { + ts: TS_X, + unitId: "M999/parallel-research", + outcome: "complete", + iteration: 1, + }, + ]); + + const result = await reconcileIterCompletions(root, { db }); + + // totalChecked = 0 because the unit has no T-segment + expect(result.reconciled).toHaveLength(0); + expect(result.totalChecked).toBe(0); + + db.close(); + }); + + it("dryRun_true_returns_reconciliations_but_no_db_writes", async () => { + const root = makeProject(); + const db = makeMemDb(); + + db.insertTask("M999", "S99", "T99", "pending", null); + + writeIterJsonl(root, [ + { ts: TS_X, unitId: "M999/S99/T99", outcome: "complete", iteration: 1 }, + ]); + + const result = await reconcileIterCompletions(root, { db, dryRun: true }); + + // Reports the drift but does not write + expect(result.reconciled).toHaveLength(1); + expect(result.reconciled[0].unitId).toBe("M999/S99/T99"); + + // DB must remain untouched + const row = db.getTask("M999", "S99", "T99"); + expect(row.status).toBe("pending"); + expect(row.completed_at).toBeNull(); + + db.close(); + }); +}); + +describe("iterCompletionReconcilerGate", () => { + it("exports_adr0075_gate_contract_shape", () => { + expect(iterCompletionReconcilerGate.id).toBe("iter-completion-reconciler"); + expect(iterCompletionReconcilerGate.type).toBe("verification"); + expect(typeof iterCompletionReconcilerGate.execute).toBe("function"); + }); + + it("execute_returns_pass_when_no_drift", async () => { + const root = makeProject(); + const db = makeMemDb(); + + // No iter file — nothing to reconcile + const result = await iterCompletionReconcilerGate.execute({ + basePath: root, + options: { db }, + }); + + expect(result.outcome).toBe("pass"); + expect(result.failureClass).toBeNull(); + expect(typeof result.rationale).toBe("string"); + expect(result.rationale).toContain("0 of"); + + db.close(); + }); + + it("execute_returns_manual_attention_with_findings_when_drift_reconciled", async () => { + const root = makeProject(); + const db = makeMemDb(); + + db.insertTask("M999", "S99", "T99", "pending", null); + writeIterJsonl(root, [ + { ts: TS_X, unitId: "M999/S99/T99", outcome: "complete", iteration: 1 }, + ]); + + const result = await iterCompletionReconcilerGate.execute({ + basePath: root, + options: { db }, + }); + + expect(result.outcome).toBe("manual-attention"); + expect(result.failureClass).toBe("verification"); + expect(typeof result.rationale).toBe("string"); + expect(result.rationale).toContain("1 of"); + expect(Array.isArray(result.findings)).toBe(true); + expect(result.findings[0].unitId).toBe("M999/S99/T99"); + + db.close(); + }); +}); diff --git a/src/resources/extensions/sf/tests/update-task-status-revert-safety.test.mjs b/src/resources/extensions/sf/tests/update-task-status-revert-safety.test.mjs new file mode 100644 index 000000000..622a3340e --- /dev/null +++ b/src/resources/extensions/sf/tests/update-task-status-revert-safety.test.mjs @@ -0,0 +1,176 @@ +/** + * update-task-status-revert-safety.test.mjs — Layer B revert guard. + * + * Purpose: prove that updateTaskStatus refuses to revert a task whose + * (milestone, slice, task) triple appears in iterations.jsonl with + * outcome=complete within SF_REVERT_BLOCK_WINDOW_MS (default 30 min). + * + * Consumer: safety harness that prevented T02 from being clobbered by a + * "pending" revert after iterations.jsonl already recorded completion. + */ + +import assert from "node:assert/strict"; +import { + existsSync, + mkdirSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, test } from "vitest"; +import { + closeDatabase, + insertMilestone, + insertSlice, + insertTask, + openDatabase, + updateTaskStatus, +} from "../sf-db.js"; + +const tmpDirs = []; + +afterEach(() => { + closeDatabase(); + delete process.env.SF_REVERT_BLOCK_WINDOW_MS; + while (tmpDirs.length > 0) { + const dir = tmpDirs.pop(); + if (dir) rmSync(dir, { recursive: true, force: true }); + } +}); + +/** + * Create a minimal project with a DB, optional iterations.jsonl, and + * a task seeded in "complete" status. + */ +function makeProject({ iterLines = null } = {}) { + const dir = mkdtempSync(join(tmpdir(), "sf-revert-safety-")); + tmpDirs.push(dir); + + // Create required dirs + mkdirSync(join(dir, ".sf", "runtime", "autonomous-solver"), { + recursive: true, + }); + + // Write iterations.jsonl if provided + if (iterLines !== null) { + writeFileSync( + join(dir, ".sf", "runtime", "autonomous-solver", "iterations.jsonl"), + iterLines, + "utf-8", + ); + } + + // Open DB and seed data + openDatabase(join(dir, ".sf", "sf.db")); + insertMilestone({ id: "M001", title: "Test milestone", status: "active" }); + insertSlice({ milestoneId: "M001", id: "S01", title: "Test slice", status: "pending" }); + insertTask({ milestoneId: "M001", sliceId: "S01", id: "T02", title: "Test task", status: "complete" }); + + return dir; +} + +function makeIterLine(unitId, outcome, ts) { + return JSON.stringify({ schemaVersion: 1, unitId, outcome, ts }) + "\n"; +} + +// ── Test 1: Revert to pending with NO iter entry → succeeds ────────────────── +test("updateTaskStatus_revert_to_pending_with_no_iter_entry_succeeds", () => { + const dir = makeProject({ iterLines: null }); + // Should not throw + updateTaskStatus("M001", "S01", "T02", "pending", null, undefined, { + basePath: dir, + }); +}); + +// ── Test 2: Revert with iter outcome=complete but older than window → succeeds +test("updateTaskStatus_revert_with_stale_complete_iter_entry_succeeds", () => { + // 1h old — well outside the 30min window + const oldTs = new Date(Date.now() - 60 * 60 * 1000).toISOString(); + const iterLines = makeIterLine("M001/S01/T02", "complete", oldTs); + const dir = makeProject({ iterLines }); + // Should not throw + updateTaskStatus("M001", "S01", "T02", "pending", null, undefined, { + basePath: dir, + }); +}); + +// ── Test 3: Revert with iter outcome=complete 5min ago → BLOCKED ───────────── +test("updateTaskStatus_revert_with_recent_complete_iter_entry_is_blocked", () => { + const recentTs = new Date(Date.now() - 5 * 60 * 1000).toISOString(); // 5min ago + const iterLines = makeIterLine("M001/S01/T02", "complete", recentTs); + const dir = makeProject({ iterLines }); + + assert.throws( + () => + updateTaskStatus("M001", "S01", "T02", "pending", null, undefined, { + basePath: dir, + }), + (err) => { + assert.ok( + err.message.includes("revert-blocked-by-iter-truth"), + `Expected revert-blocked-by-iter-truth in: ${err.message}`, + ); + return true; + }, + ); + + // Self-feedback entry must have been appended + const sfJsonlPath = join(dir, ".sf", "self-feedback.jsonl"); + assert.ok(existsSync(sfJsonlPath), "self-feedback.jsonl should be created"); + const content = readFileSync(sfJsonlPath, "utf-8"); + const entry = JSON.parse(content.trim().split("\n")[0]); + assert.equal(entry.kind, "revert-blocked-by-iter-truth"); + assert.equal(entry.severity, "medium"); + assert.equal(entry.unitId, "M001/S01/T02"); + assert.equal(entry.attemptedRevert, "pending"); + assert.equal(entry.iterCompleteTs, recentTs); +}); + +// ── Test 4: SF_REVERT_BLOCK_WINDOW_MS=0 disables check → succeeds ──────────── +test("updateTaskStatus_revert_with_window_ms_zero_env_succeeds", () => { + process.env.SF_REVERT_BLOCK_WINDOW_MS = "0"; + const recentTs = new Date(Date.now() - 5 * 60 * 1000).toISOString(); + const iterLines = makeIterLine("M001/S01/T02", "complete", recentTs); + const dir = makeProject({ iterLines }); + // Should not throw (window disabled) + updateTaskStatus("M001", "S01", "T02", "pending", null, undefined, { + basePath: dir, + }); +}); + +// ── Test 5: Iter outcome=continue (not complete) → revert succeeds ──────────── +test("updateTaskStatus_revert_with_continue_iter_outcome_succeeds", () => { + const recentTs = new Date(Date.now() - 5 * 60 * 1000).toISOString(); + const iterLines = makeIterLine("M001/S01/T02", "continue", recentTs); + const dir = makeProject({ iterLines }); + // Should not throw — outcome is not "complete" + updateTaskStatus("M001", "S01", "T02", "pending", null, undefined, { + basePath: dir, + }); +}); + +// ── Test 6: newStatus = "in_progress" → never blocked ──────────────────────── +test("updateTaskStatus_in_progress_never_blocked_even_with_recent_complete_iter", () => { + const recentTs = new Date(Date.now() - 5 * 60 * 1000).toISOString(); + const iterLines = makeIterLine("M001/S01/T02", "complete", recentTs); + const dir = makeProject({ iterLines }); + // "in_progress" is explicitly excluded from the revert guard + updateTaskStatus("M001", "S01", "T02", "in_progress", null, undefined, { + basePath: dir, + }); +}); + +// ── Test 7: newStatus = "complete" → never blocked (forward transition) ─────── +test("updateTaskStatus_complete_forward_transition_never_blocked", () => { + const recentTs = new Date(Date.now() - 5 * 60 * 1000).toISOString(); + const iterLines = makeIterLine("M001/S01/T02", "complete", recentTs); + // Seed the task as pending so a "complete" update makes sense + const dir = makeProject({ iterLines }); + // updateTaskStatus to "complete" should never be blocked + updateTaskStatus("M001", "S01", "T02", "complete", new Date().toISOString(), undefined, { + basePath: dir, + }); +}); diff --git a/src/resources/extensions/sf/uok/gate-registry-bootstrap.js b/src/resources/extensions/sf/uok/gate-registry-bootstrap.js index db789c71d..9e58ef75d 100644 --- a/src/resources/extensions/sf/uok/gate-registry-bootstrap.js +++ b/src/resources/extensions/sf/uok/gate-registry-bootstrap.js @@ -1,4 +1,5 @@ import { driftDetectionGate } from "./drift-detection-gate.js"; +import { iterCompletionReconcilerGate } from "../sf-db/iteration-completion-reconciler.js"; import { getGateRegistry } from "./gate-registry.js"; /** @@ -20,5 +21,6 @@ const registry = getGateRegistry(); // Registered lazily at call site via registry.has() guard; ctxFactory pattern supplies verdict/rationale/remediationPlan. // SKIP planning-flow-gate: execute() closes over persistGate arguments from guided-flow.js. registry.register(driftDetectionGate); +registry.register(iterCompletionReconcilerGate); export { registry as gateRegistry };