fix(sf-db,autonomous-solver): resolve schema-drift and checkpoint runaway loop

- sf-db-schema.js: per-migration transaction boundaries (runMigrationStep)
  so a late migration failure does not roll back earlier successful ones.
  Post-migration assertion recreates routing_history if missing.
- routing-history.js: catch missing routing_history table at init and latch
  _dbTableAvailable=false so auto-start does not crash.
- autonomous-solver.js: sticky identity guard in appendAutonomousSolverCheckpoint
  pins to orchestrator's unitType/unitId instead of trusting agent's claim.
  Emit journal event on identity mismatch. Record mismatchedIdentity diagnostic.
  Hard cap MAX_CHECKPOINTS_PER_ITERATION=5 in assessAutonomousSolverTurn.
- Tests: add v52 DB smoke test with auto-start path; add sticky identity
  tests (4 cases); add excessive-checkpoint pause test.

Fixes: sf-mp36kfqm-rjrzju, sf-mp37kjmo-1mfuru
This commit is contained in:
Mikael Hugo 2026-05-13 01:47:19 +02:00
parent a49ea1da87
commit 1ed505669b
5 changed files with 1254 additions and 646 deletions

View file

@ -17,6 +17,7 @@ import {
import { dirname, join } from "node:path";
import { atomicWriteSync } from "./atomic-write.js";
import { sfRoot } from "./paths.js";
import { emitJournalEvent } from "./journal.js";
export const AUTONOMOUS_SOLVER_OUTCOMES = [
"continue",
@ -30,6 +31,7 @@ const DEFAULT_SOLVER_MAX_ITERATIONS = 30000;
const MIN_SOLVER_MAX_ITERATIONS = 1;
const MAX_SOLVER_MAX_ITERATIONS = 100000;
const DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS = 4;
const MAX_CHECKPOINTS_PER_ITERATION = 5;
const SOLVER_CHECKPOINT_SCHEMA_VERSION = 1;
const SOLVER_STEERING_SCHEMA_VERSION = 1;
const STALL_THRESHOLD_ITERATIONS = 3;
@ -265,6 +267,8 @@ export function beginAutonomousSolverIteration(
? existing.recentCheckpointSummaries
: []
: [],
// Safety cap: how many checkpoints have been written this iteration
checkpointCountThisIteration: 0,
};
writeState(basePath, state);
return state;
@ -463,11 +467,13 @@ export function buildSolverPassPrompt(
"",
"## Classification Rubric",
"",
"- `executor-refused`: The executor emitted a generic refusal ('I'm sorry', 'I cannot help', 'I don't have the necessary tools'). → checkpoint outcome=`blocked`, blockerReason=`executor-refused`.",
"- `executor-noop`: The executor emitted prose but made zero tool calls, zero file edits, and zero measurable progress. → checkpoint outcome=`blocked` (or `continue` ONLY if the executor explicitly states it is waiting for an external event).",
"- `progress`: The executor made concrete progress (file edits, tests run, tools called). → checkpoint outcome=`continue` with accurate completedItems/remainingItems.",
"- `complete`: The executor finished the unit's required artifact AND called any mandatory completion tool. → checkpoint outcome=`complete`.",
"- `blocker-other`: The executor hit a hard blocker (missing credentials, broken environment). → checkpoint outcome=`blocked` with a precise blockerReason.",
"Apply these in order; emit the FIRST one that matches.",
"",
"1. `executor-refused`: The executor emitted a generic refusal ('I'm sorry', 'I cannot help', 'I don't have the necessary tools', 'outside my capabilities'). → checkpoint outcome=`blocked`, blockerReason=`executor-refused`.",
"2. `executor-noop`: The executor emitted prose but made zero tool calls, zero file edits, and zero measurable progress. → checkpoint outcome=`blocked`, blockerReason=`executor-noop`. There is no `continue` escape hatch for this case — synthesizing forward progress over a no-op iteration is the exact bug ADR-0079 closes. If the executor genuinely needs an external event, that is a `blocker-external-wait` (rule 5), not a continue.",
"3. `progress`: The executor made concrete progress (file edits, tests run, tools called). → checkpoint outcome=`continue` with accurate completedItems/remainingItems.",
"4. `complete`: The executor finished the unit's required artifact AND called any mandatory completion tool. → checkpoint outcome=`complete`.",
"5. `blocker-other`: The executor hit a hard blocker (missing credentials, broken environment, external wait). → checkpoint outcome=`blocked` with a precise blockerReason naming the cause.",
"",
"## Executor Transcript",
"",
@ -500,6 +506,29 @@ export function buildSolverPassPrompt(
* must not satisfy the repair gate.
*
* Consumer: assessAutonomousSolverTurn to reject no-op continues.
*
* Implementation: structural inspection only. We look for evidence that the
* executor actually invoked tools, in either of the two message shapes used
* across SF's provider runtimes:
*
* 1. Anthropic-style: `msg.content` is an array of blocks; tool activity
* shows as `{ type: "tool_use", name: ... }` (assistant) or
* `{ type: "tool_result", ... }` (user/tool role). This is the shape
* Claude messages take when stored in pi's agent_end events (see
* undo.js:431-447 which uses the same pattern to extract tool_result
* content).
* 2. OpenAI-style: `msg.tool_calls` array on the assistant message and
* `msg.role === "tool"` (or "tool_result") with `msg.name` on the
* reply. Used by OpenAI-compatible providers.
*
* A `checkpoint` tool call by itself doesn't count as work — that's the
* protocol step, not the unit deliverable. Any other named tool counts.
*
* We deliberately do NOT grep prose ("File edited", "```diff", ). Prose
* patterns are runtime-specific and produce false negatives that mark real
* work as no-op, which would synthesize a blocker over completed iterations.
* If a transcript has zero structural tool activity, it really is a no-op
* even if its prose is plausible.
*/
export function isNoOpExecutorTranscript(messages) {
if (!Array.isArray(messages) || messages.length === 0) return true;
@ -507,38 +536,57 @@ export function isNoOpExecutorTranscript(messages) {
// Refusal is always a no-op
if (classifyExecutorRefusal(messages)) return true;
const isWorkToolName = (name) => {
if (!name || typeof name !== "string") return false;
// `checkpoint` is the protocol; the executor calling it is not unit work.
// (Per ADR-0079 the executor isn't even supposed to call it.) Anything
// else — reads, writes, bash, complete_task, save_summary — counts.
return name !== "checkpoint";
};
for (const msg of messages) {
if (!msg || typeof msg !== "object") continue;
// Assistant requested non-checkpoint tool calls
if (Array.isArray(msg.tool_calls)) {
for (const tc of msg.tool_calls) {
const name = tc?.function?.name ?? tc?.name ?? "";
if (name && name !== "checkpoint") {
// ── Anthropic-style: content is an array of typed blocks ──
if (Array.isArray(msg.content)) {
for (const block of msg.content) {
if (!block || typeof block !== "object") continue;
if (block.type === "tool_use" && isWorkToolName(block.name)) {
return false;
}
if (block.type === "tool_result") {
// tool_result has no name on the block itself; presence of a
// non-checkpoint tool_result implies a non-checkpoint tool_use
// preceded it. The pair-match would require backward scan; for
// robustness, treat ANY tool_result as evidence of work unless
// it's a checkpoint result (which would have been emitted by
// the assistant's checkpoint tool_use earlier in this same
// transcript — but that's protocol, not work). Without the
// block name we can't distinguish, so be conservative: a
// tool_result is non-no-op work UNLESS the entire transcript's
// only tool_use was `checkpoint`. We carry that check via the
// tool_use scan above — if a non-checkpoint tool_use exists,
// we've already returned false. If only `checkpoint` was used,
// the tool_result here is the checkpoint reply and we should
// keep scanning.
// Simpler approach: ignore tool_result blocks for the
// classification; the tool_use scan is authoritative.
continue;
}
}
}
// Tool results from non-checkpoint tools
if (msg.role === "tool" || msg.role === "tool_result") {
const name = msg.name ?? "";
if (name && name !== "checkpoint") {
return false;
// ── OpenAI-style: msg.tool_calls on assistant ──
if (Array.isArray(msg.tool_calls)) {
for (const tc of msg.tool_calls) {
const name = tc?.function?.name ?? tc?.name ?? "";
if (isWorkToolName(name)) return false;
}
}
// Content that shows concrete work was done
const content = typeof msg.content === "string" ? msg.content : "";
if (
content.includes("File edited") ||
content.includes("File written") ||
content.includes("File created") ||
content.includes("```diff") ||
content.includes("--- a/") ||
content.includes("+++ b/")
) {
return false;
// ── OpenAI-style: tool reply rows ──
if (msg.role === "tool" || msg.role === "tool_result") {
if (isWorkToolName(msg.name)) return false;
}
}
@ -554,17 +602,61 @@ export function isNoOpExecutorTranscript(messages) {
* Consumer: checkpoint tool.
*/
export function appendAutonomousSolverCheckpoint(basePath, params) {
const persisted = readJson(statePath(basePath));
const state =
readJson(statePath(basePath)) ??
persisted ??
beginAutonomousSolverIteration(basePath, params.unitType, params.unitId);
// ── Sticky identity guard ──
// The orchestrator owns the active unit identity (it called
// beginAutonomousSolverIteration with the canonical unitType/unitId).
// If the agent's checkpoint call passes a *different* unitType/unitId
// because it guessed wrong (real-world: minimax/M2.1 stuck at
// 2026-05-13 calling checkpoint with `parallel-research` /
// `1-ci-build-pipeline/parallel-research` / `research-slice 1-...`
// — three different strings — none matching the orchestrator's
// active identity), the previous implementation silently overwrote
// state.unitType/unitId with the wrong claim. assessAutonomousSolverTurn
// then failed sameUnit() against the orchestrator's identity, fired
// missing-checkpoint-retry, the agent re-checkpointed with another
// wrong guess, and the loop ran indefinitely (60+ wasted calls).
//
// Fix: when there is an active running/paused state, pin the
// checkpoint to the active state's identity instead of trusting the
// agent's claim. Surface the mismatch on the checkpoint payload so it
// is visible in traces.
const hasActiveIdentity =
persisted &&
persisted.unitType &&
persisted.unitId &&
persisted.status !== "complete";
const isMismatch =
hasActiveIdentity &&
!sameUnit(persisted, params.unitType, params.unitId);
if (isMismatch) {
emitJournalEvent(basePath, {
flowId: `${state.unitType}-${state.unitId}-${Date.now()}`,
seq: 1,
ts: nowIso(),
eventType: "checkpoint-identity-mismatch",
data: {
claimedUnitType: params.unitType,
claimedUnitId: params.unitId,
pinnedToUnitType: state.unitType,
pinnedToUnitId: state.unitId,
},
});
}
const effectiveUnitType = isMismatch ? state.unitType : params.unitType;
const effectiveUnitId = isMismatch ? state.unitId : params.unitId;
const checkpoint = {
schemaVersion: SOLVER_CHECKPOINT_SCHEMA_VERSION,
ts: nowIso(),
unitType: params.unitType,
unitId: params.unitId,
iteration: sameUnit(state, params.unitType, params.unitId)
? state.iteration
: 1,
unitType: effectiveUnitType,
unitId: effectiveUnitId,
// Iteration must match the orchestrator's current iteration so
// assessAutonomousSolverTurn's hasCurrentCheckpoint check passes
// and the outcome (especially `complete`) is honored.
iteration: state.iteration,
outcome: params.outcome,
summary: String(params.summary ?? "").trim(),
completedItems: sanitizeList(params.completedItems),
@ -586,11 +678,22 @@ export function appendAutonomousSolverCheckpoint(basePath, params) {
invariants: String(params.pdd?.invariants ?? "").trim(),
assumptions: String(params.pdd?.assumptions ?? "").trim(),
},
// Diagnostic: when the agent's claim differs from the active unit,
// record both so trace consumers can flag the model's confusion.
...(isMismatch
? {
mismatchedIdentity: {
claimedUnitType: String(params.unitType ?? ""),
claimedUnitId: String(params.unitId ?? ""),
pinnedToActive: { unitType: state.unitType, unitId: state.unitId },
},
}
: {}),
};
const nextState = {
...state,
unitType: params.unitType,
unitId: params.unitId,
unitType: effectiveUnitType,
unitId: effectiveUnitId,
status:
params.outcome === "complete"
? "complete"
@ -630,6 +733,9 @@ export function appendAutonomousSolverCheckpoint(basePath, params) {
: []),
checkpoint.summary,
].slice(-ROLLING_SUMMARY_WINDOW),
// Increment checkpoint count for this iteration (safety cap)
checkpointCountThisIteration:
(state.checkpointCountThisIteration || 0) + 1,
};
mkdirSync(dirname(historyPath(basePath)), { recursive: true });
writeFileSync(historyPath(basePath), `${JSON.stringify(checkpoint)}\n`, {
@ -985,6 +1091,20 @@ export function assessAutonomousSolverTurn(
maxRepairAttempts: DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS,
};
}
// Hard cap on excessive checkpoints within a single iteration
if (
(state.checkpointCountThisIteration || 0) >=
MAX_CHECKPOINTS_PER_ITERATION
) {
return {
action: "pause",
reason: "solver-excessive-checkpoints",
state,
checkpoint,
checkpointCount: state.checkpointCountThisIteration,
maxCheckpointCount: MAX_CHECKPOINTS_PER_ITERATION,
};
}
if (
state.iteration >= state.maxIterations &&
checkpoint.outcome !== "complete"

View file

@ -15,15 +15,45 @@ const FAILURE_THRESHOLD = 0.2; // >20% failure rate triggers tier bump
const FEEDBACK_WEIGHT = 2; // feedback signals count 2x vs automatic
// ─── In-Memory State ─────────────────────────────────────────────────────────
let history = null;
// Latches to false when the `routing_history` table is observed missing at
// init time. Subsequent DB writes from recordOutcome/recordFeedback are then
// skipped so a stale-schema project doesn't repeatedly throw on every
// dispatch. The next openDatabase that successfully runs the v53 migration
// will let a later initRoutingHistory call flip this back to true.
let _dbTableAvailable = true;
// ─── Public API ──────────────────────────────────────────────────────────────
/**
* Initialize routing history for a project.
*
* Resilient to a missing `routing_history` table: a project DB whose schema
* version predates the routing_history migration (v53) will throw `no such
* table: routing_history` on the underlying SELECT. We swallow that one
* specific case so auto-start is not blocked by a stale schema; the
* in-memory history simply starts empty and accumulates from scratch.
* Anything else (corrupt DB, permission errors) re-throws so it remains
* visible.
*/
export function initRoutingHistory(_base) {
history = createEmptyHistory();
const db = getDatabase();
if (!db) return;
const rows = getAllRoutingHistory(db);
let rows;
try {
rows = getAllRoutingHistory(db);
_dbTableAvailable = true;
} catch (err) {
const message = err?.message ? String(err.message) : "";
if (/no such table:\s*routing_history/i.test(message)) {
// Schema lags the code — fresh project, or a project whose DB never
// migrated past v52. Start with empty in-memory state and latch the
// flag so recordOutcome/recordFeedback skip their DB writes for the
// remainder of the session instead of crashing on every dispatch.
rows = [];
_dbTableAvailable = false;
} else {
throw err;
}
}
for (const row of rows) {
if (!history.patterns[row.pattern]) {
history.patterns[row.pattern] = {
@ -62,13 +92,14 @@ export function resetRoutingHistory() {
export function recordOutcome(unitType, tier, success, tags) {
if (!history) return;
const db = getDatabase();
const canWriteDb = db && _dbTableAvailable;
// Record for the base unit type
const basePattern = unitType;
ensurePattern(basePattern);
const outcome = history.patterns[basePattern][tier];
if (success) outcome.success++;
else outcome.fail++;
if (db) upsertRoutingOutcome(db, basePattern, tier, success);
if (canWriteDb) upsertRoutingOutcome(db, basePattern, tier, success);
// Record for tag-specific patterns (e.g. "execute-task:docs")
if (tags && tags.length > 0) {
for (const tag of tags) {
@ -77,7 +108,7 @@ export function recordOutcome(unitType, tier, success, tags) {
const tagOutcome = history.patterns[tagPattern][tier];
if (success) tagOutcome.success++;
else tagOutcome.fail++;
if (db) upsertRoutingOutcome(db, tagPattern, tier, success);
if (canWriteDb) upsertRoutingOutcome(db, tagPattern, tier, success);
}
}
// Apply rolling window — cap total entries per tier per pattern
@ -111,7 +142,8 @@ export function recordFeedback(unitType, _unitId, tier, rating) {
history.feedback = history.feedback.slice(-200);
}
const db = getDatabase();
if (db) insertRoutingFeedback(db, unitType, tier, rating);
const canWriteDb = db && _dbTableAvailable;
if (canWriteDb) insertRoutingFeedback(db, unitType, tier, rating);
// Apply feedback as weighted outcome
const pattern = unitType;
ensurePattern(pattern);
@ -122,7 +154,7 @@ export function recordFeedback(unitType, _unitId, tier, rating) {
if (lower) {
const outcomes = history.patterns[pattern][lower];
outcomes.success += FEEDBACK_WEIGHT;
if (db) {
if (canWriteDb) {
for (let i = 0; i < FEEDBACK_WEIGHT; i++) {
upsertRoutingOutcome(db, pattern, lower, true);
}
@ -132,7 +164,7 @@ export function recordFeedback(unitType, _unitId, tier, rating) {
// User says this needed a better model → record as failure at current tier
const outcomes = history.patterns[pattern][tier];
outcomes.fail += FEEDBACK_WEIGHT;
if (db) {
if (canWriteDb) {
for (let i = 0; i < FEEDBACK_WEIGHT; i++) {
upsertRoutingOutcome(db, pattern, tier, false);
}
@ -165,7 +197,7 @@ export function getAdaptiveTierAdjustment(unitType, currentTier, tags) {
export function clearRoutingHistory(_base) {
history = createEmptyHistory();
const db = getDatabase();
if (db) dbClearRoutingHistory(db);
if (db && _dbTableAvailable) dbClearRoutingHistory(db);
}
/**
* Get current history data (for display/debugging).

File diff suppressed because it is too large Load diff

View file

@ -317,6 +317,36 @@ describe("autonomous solver", () => {
expect(result.reason).toBe("solver-max-iterations");
});
test("assessAutonomousSolverTurn_excessive_checkpoints_pauses_after_cap", () => {
// Fail-fast when the agent calls checkpoint 5+ times within a single
// iteration without making other tool progress. Prevents the 60+
// no-op checkpoint loop from sf-mp37kjmo-1mfuru.
const project = makeProject();
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
for (let i = 0; i < 5; i++) {
appendAutonomousSolverCheckpoint(project, {
unitType: "execute-task",
unitId: "M001/S01/T01",
outcome: "continue",
summary: `Checkpoint ${i + 1} — still stuck.`,
completedItems: [],
remainingItems: ["need help"],
verificationEvidence: [],
pdd: pdd(),
});
}
const result = assessAutonomousSolverTurn(
project,
"execute-task",
"M001/S01/T01",
);
expect(result.action).toBe("pause");
expect(result.reason).toBe("solver-excessive-checkpoints");
expect(result.checkpointCount).toBe(5);
});
test("steering_append_consume_is_idempotent", () => {
const project = makeProject();
appendAutonomousSolverSteering(project, "Prefer runtime enforcement.");
@ -934,3 +964,114 @@ describe("assessAutonomousSolverTurn no-op detection", () => {
expect(result.reason).toBe("solver-noop-continue");
});
});
describe("appendAutonomousSolverCheckpoint sticky identity", () => {
test("pins to orchestrator unit identity when agent passes a different unitId", () => {
// Real-world bug (2026-05-13): minimax/M2.1 stuck in 60+ checkpoint
// loop because each call passed a guessed unitId
// ("parallel-research" / "research-slice 1-ci-build-pipeline/..."),
// silently overwriting state.unitId. assessAutonomousSolverTurn then
// failed sameUnit() against the orchestrator's identity and re-fired
// repair forever. The active state's identity must be sticky.
const project = makeProject();
beginAutonomousSolverIteration(
project,
"execute-task",
"M001/S04/T02",
);
appendAutonomousSolverCheckpoint(project, {
unitType: "execute-task",
unitId: "parallel-research", // <-- agent guesses wrong
outcome: "complete",
summary: "Done.",
completedItems: ["work"],
remainingItems: [],
verificationEvidence: ["ls -la"],
pdd: pdd(),
});
const state = readAutonomousSolverState(project);
// State identity must NOT shift to the agent's wrong claim.
expect(state.unitType).toBe("execute-task");
expect(state.unitId).toBe("M001/S04/T02");
// Checkpoint payload itself is pinned to the orchestrator's identity.
expect(state.latestCheckpoint.unitType).toBe("execute-task");
expect(state.latestCheckpoint.unitId).toBe("M001/S04/T02");
// Mismatch is surfaced diagnostically.
expect(state.latestCheckpoint.mismatchedIdentity).toEqual({
claimedUnitType: "execute-task",
claimedUnitId: "parallel-research",
pinnedToActive: {
unitType: "execute-task",
unitId: "M001/S04/T02",
},
});
});
test("assessAutonomousSolverTurn honors complete after sticky-pin rescue", () => {
// End-to-end: agent passes wrong unitId, checkpoint stickys to
// orchestrator's identity, assess sees outcome=complete and returns
// action=complete (NOT missing-checkpoint-retry).
const project = makeProject();
beginAutonomousSolverIteration(
project,
"execute-task",
"M001/S04/T02",
);
appendAutonomousSolverCheckpoint(project, {
unitType: "execute-task",
unitId: "wrong-guess",
outcome: "complete",
summary: "Done.",
completedItems: ["work"],
remainingItems: [],
verificationEvidence: ["ls -la"],
pdd: pdd(),
});
const assessment = assessAutonomousSolverTurn(
project,
"execute-task",
"M001/S04/T02",
);
expect(assessment.action).toBe("complete");
});
test("matching unitId does not flag mismatch", () => {
const project = makeProject();
beginAutonomousSolverIteration(
project,
"execute-task",
"M001/S04/T02",
);
appendAutonomousSolverCheckpoint(project, {
unitType: "execute-task",
unitId: "M001/S04/T02",
outcome: "continue",
summary: "Progress",
completedItems: ["read files"],
remainingItems: ["edit code"],
verificationEvidence: ["grep -n"],
pdd: pdd(),
});
const state = readAutonomousSolverState(project);
expect(state.latestCheckpoint.mismatchedIdentity).toBeUndefined();
});
test("fresh project with no active state accepts agent-provided identity", () => {
// Bootstrap case: state is null on first call; the agent's claim
// initializes the state. (Same behavior as before the sticky fix.)
const project = makeProject();
appendAutonomousSolverCheckpoint(project, {
unitType: "execute-task",
unitId: "M001/S01/T01",
outcome: "continue",
summary: "First iteration",
completedItems: [],
remainingItems: ["plan"],
verificationEvidence: [],
pdd: pdd(),
});
const state = readAutonomousSolverState(project);
expect(state.unitId).toBe("M001/S01/T01");
expect(state.latestCheckpoint.mismatchedIdentity).toBeUndefined();
});
});

View file

@ -29,6 +29,7 @@ import {
openDatabase,
reconcileWorktreeDb,
} from "../sf-db.js";
import { initRoutingHistory } from "../routing-history.js";
const tmpDirs = [];
@ -149,6 +150,56 @@ function makeLegacyV27Db() {
return dbPath;
}
function makeLegacyV52Db() {
const dir = mkdtempSync(join(tmpdir(), "sf-legacy-v52-"));
tmpDirs.push(dir);
const sfDir = join(dir, ".sf");
mkdirSync(sfDir, { recursive: true });
const dbPath = join(sfDir, "sf.db");
const db = new DatabaseSync(dbPath);
db.exec(`
CREATE TABLE schema_version (
version INTEGER NOT NULL,
applied_at TEXT NOT NULL
);
INSERT INTO schema_version (version, applied_at)
VALUES (52, '2026-05-06T00:00:00.000Z');
CREATE TABLE milestones (
id TEXT PRIMARY KEY,
title TEXT NOT NULL DEFAULT '',
status TEXT NOT NULL DEFAULT 'active',
depends_on TEXT NOT NULL DEFAULT '[]',
created_at TEXT NOT NULL DEFAULT '',
completed_at TEXT DEFAULT NULL
);
CREATE TABLE slices (
milestone_id TEXT NOT NULL,
id TEXT NOT NULL,
title TEXT NOT NULL DEFAULT '',
status TEXT NOT NULL DEFAULT 'pending',
risk TEXT NOT NULL DEFAULT 'medium',
depends TEXT NOT NULL DEFAULT '[]',
demo TEXT NOT NULL DEFAULT '',
created_at TEXT NOT NULL DEFAULT '',
completed_at TEXT DEFAULT NULL,
PRIMARY KEY (milestone_id, id)
);
CREATE TABLE tasks (
milestone_id TEXT NOT NULL,
slice_id TEXT NOT NULL,
id TEXT NOT NULL,
title TEXT NOT NULL DEFAULT '',
status TEXT NOT NULL DEFAULT 'pending',
PRIMARY KEY (milestone_id, slice_id, id)
);
`);
db.close();
return dbPath;
}
function makeLegacyV35GateRunsDb() {
const dir = mkdtempSync(join(tmpdir(), "sf-legacy-v35-gates-"));
tmpDirs.push(dir);
@ -262,6 +313,35 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
assert.deepEqual(schedulerRow, { status: "queued" });
});
test("openDatabase_v52_db_heals_routing_history_and_auto_start_path_works", () => {
const dbPath = makeLegacyV52Db();
assert.equal(openDatabase(dbPath), true);
const db = getDatabase();
// ensurePostBootstrapTables should have created routing_history
const routingTable = db
.prepare(
"SELECT name FROM sqlite_master WHERE type='table' AND name='routing_history'",
)
.get();
assert.ok(
routingTable,
"routing_history table should exist after ensurePostBootstrapTables",
);
// initRoutingHistory (auto-start path) must not crash on a v52 DB
assert.doesNotThrow(() => {
initRoutingHistory(dbPath);
}, "initRoutingHistory should not throw on a v52 DB");
// Schema should have migrated to v62
const version = db
.prepare("SELECT MAX(version) AS version FROM schema_version")
.get();
assert.equal(version.version, 62);
});
test("openDatabase_when_fresh_db_supports_schedule_entries", () => {
assert.equal(openDatabase(":memory:"), true);