fix(sf-db,autonomous-solver): resolve schema-drift and checkpoint runaway loop

- sf-db-schema.js: per-migration transaction boundaries (runMigrationStep) so a late migration failure does not roll back earlier successful ones. Post-migration assertion recreates routing_history if missing. - routing-history.js: catch missing routing_history table at init and latch _dbTableAvailable=false so auto-start does not crash. - autonomous-solver.js: sticky identity guard in appendAutonomousSolverCheckpoint pins to orchestrator's unitType/unitId instead of trusting agent's claim. Emit journal event on identity mismatch. Record mismatchedIdentity diagnostic. Hard cap MAX_CHECKPOINTS_PER_ITERATION=5 in assessAutonomousSolverTurn. - Tests: add v52 DB smoke test with auto-start path; add sticky identity tests (4 cases); add excessive-checkpoint pause test. Fixes: sf-mp36kfqm-rjrzju, sf-mp37kjmo-1mfuru
2026-05-13 01:47:19 +02:00 · 2026-05-13 01:47:19 +02:00 · 1ed505669b
commit 1ed505669b
parent a49ea1da87
5 changed files with 1254 additions and 646 deletions
--- a/src/resources/extensions/sf/autonomous-solver.js
+++ b/src/resources/extensions/sf/autonomous-solver.js
@ -17,6 +17,7 @@ import {
 import { dirname, join } from "node:path";
 import { atomicWriteSync } from "./atomic-write.js";
 import { sfRoot } from "./paths.js";
+import { emitJournalEvent } from "./journal.js";

 export const AUTONOMOUS_SOLVER_OUTCOMES = [
 	"continue",
@ -30,6 +31,7 @@ const DEFAULT_SOLVER_MAX_ITERATIONS = 30000;
 const MIN_SOLVER_MAX_ITERATIONS = 1;
 const MAX_SOLVER_MAX_ITERATIONS = 100000;
 const DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS = 4;
+const MAX_CHECKPOINTS_PER_ITERATION = 5;
 const SOLVER_CHECKPOINT_SCHEMA_VERSION = 1;
 const SOLVER_STEERING_SCHEMA_VERSION = 1;
 const STALL_THRESHOLD_ITERATIONS = 3;
@ -265,6 +267,8 @@ export function beginAutonomousSolverIteration(
 				? existing.recentCheckpointSummaries
 				: []
 			: [],
+		// Safety cap: how many checkpoints have been written this iteration
+		checkpointCountThisIteration: 0,
 	};
 	writeState(basePath, state);
 	return state;
@ -463,11 +467,13 @@ export function buildSolverPassPrompt(
 		"",
 		"## Classification Rubric",
 		"",
-		"- `executor-refused`: The executor emitted a generic refusal ('I'm sorry', 'I cannot help', 'I don't have the necessary tools'). → checkpoint outcome=`blocked`, blockerReason=`executor-refused`.",
-		"- `executor-noop`: The executor emitted prose but made zero tool calls, zero file edits, and zero measurable progress. → checkpoint outcome=`blocked` (or `continue` ONLY if the executor explicitly states it is waiting for an external event).",
-		"- `progress`: The executor made concrete progress (file edits, tests run, tools called). → checkpoint outcome=`continue` with accurate completedItems/remainingItems.",
-		"- `complete`: The executor finished the unit's required artifact AND called any mandatory completion tool. → checkpoint outcome=`complete`.",
-		"- `blocker-other`: The executor hit a hard blocker (missing credentials, broken environment). → checkpoint outcome=`blocked` with a precise blockerReason.",
+		"Apply these in order; emit the FIRST one that matches.",
+		"",
+		"1. `executor-refused`: The executor emitted a generic refusal ('I'm sorry', 'I cannot help', 'I don't have the necessary tools', 'outside my capabilities'). → checkpoint outcome=`blocked`, blockerReason=`executor-refused`.",
+		"2. `executor-noop`: The executor emitted prose but made zero tool calls, zero file edits, and zero measurable progress. → checkpoint outcome=`blocked`, blockerReason=`executor-noop`. There is no `continue` escape hatch for this case — synthesizing forward progress over a no-op iteration is the exact bug ADR-0079 closes. If the executor genuinely needs an external event, that is a `blocker-external-wait` (rule 5), not a continue.",
+		"3. `progress`: The executor made concrete progress (file edits, tests run, tools called). → checkpoint outcome=`continue` with accurate completedItems/remainingItems.",
+		"4. `complete`: The executor finished the unit's required artifact AND called any mandatory completion tool. → checkpoint outcome=`complete`.",
+		"5. `blocker-other`: The executor hit a hard blocker (missing credentials, broken environment, external wait). → checkpoint outcome=`blocked` with a precise blockerReason naming the cause.",
 		"",
 		"## Executor Transcript",
 		"",
@ -500,6 +506,29 @@ export function buildSolverPassPrompt(
 * must not satisfy the repair gate.
 *
 * Consumer: assessAutonomousSolverTurn to reject no-op continues.
+ *
+ * Implementation: structural inspection only. We look for evidence that the
+ * executor actually invoked tools, in either of the two message shapes used
+ * across SF's provider runtimes:
+ *
+ *   1. Anthropic-style: `msg.content` is an array of blocks; tool activity
+ *      shows as `{ type: "tool_use", name: ... }` (assistant) or
+ *      `{ type: "tool_result", ... }` (user/tool role). This is the shape
+ *      Claude messages take when stored in pi's agent_end events (see
+ *      undo.js:431-447 which uses the same pattern to extract tool_result
+ *      content).
+ *   2. OpenAI-style: `msg.tool_calls` array on the assistant message and
+ *      `msg.role === "tool"` (or "tool_result") with `msg.name` on the
+ *      reply. Used by OpenAI-compatible providers.
+ *
+ * A `checkpoint` tool call by itself doesn't count as work — that's the
+ * protocol step, not the unit deliverable. Any other named tool counts.
+ *
+ * We deliberately do NOT grep prose ("File edited", "```diff", …). Prose
+ * patterns are runtime-specific and produce false negatives that mark real
+ * work as no-op, which would synthesize a blocker over completed iterations.
+ * If a transcript has zero structural tool activity, it really is a no-op
+ * even if its prose is plausible.
 */
 export function isNoOpExecutorTranscript(messages) {
 	if (!Array.isArray(messages) || messages.length === 0) return true;
@ -507,38 +536,57 @@ export function isNoOpExecutorTranscript(messages) {
 	// Refusal is always a no-op
 	if (classifyExecutorRefusal(messages)) return true;

+	const isWorkToolName = (name) => {
+		if (!name || typeof name !== "string") return false;
+		// `checkpoint` is the protocol; the executor calling it is not unit work.
+		// (Per ADR-0079 the executor isn't even supposed to call it.) Anything
+		// else — reads, writes, bash, complete_task, save_summary — counts.
+		return name !== "checkpoint";
+	};
+
 	for (const msg of messages) {
 		if (!msg || typeof msg !== "object") continue;

-		// Assistant requested non-checkpoint tool calls
-		if (Array.isArray(msg.tool_calls)) {
-			for (const tc of msg.tool_calls) {
-				const name = tc?.function?.name ?? tc?.name ?? "";
-				if (name && name !== "checkpoint") {
+		// ── Anthropic-style: content is an array of typed blocks ──
+		if (Array.isArray(msg.content)) {
+			for (const block of msg.content) {
+				if (!block || typeof block !== "object") continue;
+				if (block.type === "tool_use" && isWorkToolName(block.name)) {
 					return false;
 				}
+				if (block.type === "tool_result") {
+					// tool_result has no name on the block itself; presence of a
+					// non-checkpoint tool_result implies a non-checkpoint tool_use
+					// preceded it. The pair-match would require backward scan; for
+					// robustness, treat ANY tool_result as evidence of work unless
+					// it's a checkpoint result (which would have been emitted by
+					// the assistant's checkpoint tool_use earlier in this same
+					// transcript — but that's protocol, not work). Without the
+					// block name we can't distinguish, so be conservative: a
+					// tool_result is non-no-op work UNLESS the entire transcript's
+					// only tool_use was `checkpoint`. We carry that check via the
+					// tool_use scan above — if a non-checkpoint tool_use exists,
+					// we've already returned false. If only `checkpoint` was used,
+					// the tool_result here is the checkpoint reply and we should
+					// keep scanning.
+					// Simpler approach: ignore tool_result blocks for the
+					// classification; the tool_use scan is authoritative.
+					continue;
+				}
 			}
 		}

-		// Tool results from non-checkpoint tools
-		if (msg.role === "tool" || msg.role === "tool_result") {
-			const name = msg.name ?? "";
-			if (name && name !== "checkpoint") {
-				return false;
+		// ── OpenAI-style: msg.tool_calls on assistant ──
+		if (Array.isArray(msg.tool_calls)) {
+			for (const tc of msg.tool_calls) {
+				const name = tc?.function?.name ?? tc?.name ?? "";
+				if (isWorkToolName(name)) return false;
 			}
 		}

-		// Content that shows concrete work was done
-		const content = typeof msg.content === "string" ? msg.content : "";
-		if (
-			content.includes("File edited") ||
-			content.includes("File written") ||
-			content.includes("File created") ||
-			content.includes("```diff") ||
-			content.includes("--- a/") ||
-			content.includes("+++ b/")
-		) {
-			return false;
+		// ── OpenAI-style: tool reply rows ──
+		if (msg.role === "tool" || msg.role === "tool_result") {
+			if (isWorkToolName(msg.name)) return false;
 		}
 	}

@ -554,17 +602,61 @@ export function isNoOpExecutorTranscript(messages) {
 * Consumer: checkpoint tool.
 */
 export function appendAutonomousSolverCheckpoint(basePath, params) {
+	const persisted = readJson(statePath(basePath));
 	const state =
-		readJson(statePath(basePath)) ??
+		persisted ??
 		beginAutonomousSolverIteration(basePath, params.unitType, params.unitId);
+	// ── Sticky identity guard ──
+	// The orchestrator owns the active unit identity (it called
+	// beginAutonomousSolverIteration with the canonical unitType/unitId).
+	// If the agent's checkpoint call passes a *different* unitType/unitId
+	// because it guessed wrong (real-world: minimax/M2.1 stuck at
+	// 2026-05-13 calling checkpoint with `parallel-research` /
+	// `1-ci-build-pipeline/parallel-research` / `research-slice 1-...`
+	// — three different strings — none matching the orchestrator's
+	// active identity), the previous implementation silently overwrote
+	// state.unitType/unitId with the wrong claim. assessAutonomousSolverTurn
+	// then failed sameUnit() against the orchestrator's identity, fired
+	// missing-checkpoint-retry, the agent re-checkpointed with another
+	// wrong guess, and the loop ran indefinitely (60+ wasted calls).
+	//
+	// Fix: when there is an active running/paused state, pin the
+	// checkpoint to the active state's identity instead of trusting the
+	// agent's claim. Surface the mismatch on the checkpoint payload so it
+	// is visible in traces.
+	const hasActiveIdentity =
+		persisted &&
+		persisted.unitType &&
+		persisted.unitId &&
+		persisted.status !== "complete";
+	const isMismatch =
+		hasActiveIdentity &&
+		!sameUnit(persisted, params.unitType, params.unitId);
+	if (isMismatch) {
+		emitJournalEvent(basePath, {
+			flowId: `${state.unitType}-${state.unitId}-${Date.now()}`,
+			seq: 1,
+			ts: nowIso(),
+			eventType: "checkpoint-identity-mismatch",
+			data: {
+				claimedUnitType: params.unitType,
+				claimedUnitId: params.unitId,
+				pinnedToUnitType: state.unitType,
+				pinnedToUnitId: state.unitId,
+			},
+		});
+	}
+	const effectiveUnitType = isMismatch ? state.unitType : params.unitType;
+	const effectiveUnitId = isMismatch ? state.unitId : params.unitId;
 	const checkpoint = {
 		schemaVersion: SOLVER_CHECKPOINT_SCHEMA_VERSION,
 		ts: nowIso(),
-		unitType: params.unitType,
-		unitId: params.unitId,
-		iteration: sameUnit(state, params.unitType, params.unitId)
-			? state.iteration
-			: 1,
+		unitType: effectiveUnitType,
+		unitId: effectiveUnitId,
+		// Iteration must match the orchestrator's current iteration so
+		// assessAutonomousSolverTurn's hasCurrentCheckpoint check passes
+		// and the outcome (especially `complete`) is honored.
+		iteration: state.iteration,
 		outcome: params.outcome,
 		summary: String(params.summary ?? "").trim(),
 		completedItems: sanitizeList(params.completedItems),
@ -586,11 +678,22 @@ export function appendAutonomousSolverCheckpoint(basePath, params) {
 			invariants: String(params.pdd?.invariants ?? "").trim(),
 			assumptions: String(params.pdd?.assumptions ?? "").trim(),
 		},
+		// Diagnostic: when the agent's claim differs from the active unit,
+		// record both so trace consumers can flag the model's confusion.
+		...(isMismatch
+			? {
+					mismatchedIdentity: {
+						claimedUnitType: String(params.unitType ?? ""),
+						claimedUnitId: String(params.unitId ?? ""),
+						pinnedToActive: { unitType: state.unitType, unitId: state.unitId },
+					},
+				}
+			: {}),
 	};
 	const nextState = {
 		...state,
-		unitType: params.unitType,
-		unitId: params.unitId,
+		unitType: effectiveUnitType,
+		unitId: effectiveUnitId,
 		status:
 			params.outcome === "complete"
 				? "complete"
@ -630,6 +733,9 @@ export function appendAutonomousSolverCheckpoint(basePath, params) {
 				: []),
 			checkpoint.summary,
 		].slice(-ROLLING_SUMMARY_WINDOW),
+		// Increment checkpoint count for this iteration (safety cap)
+		checkpointCountThisIteration:
+			(state.checkpointCountThisIteration || 0) + 1,
 	};
 	mkdirSync(dirname(historyPath(basePath)), { recursive: true });
 	writeFileSync(historyPath(basePath), `${JSON.stringify(checkpoint)}\n`, {
@ -985,6 +1091,20 @@ export function assessAutonomousSolverTurn(
 			maxRepairAttempts: DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS,
 		};
 	}
+	// Hard cap on excessive checkpoints within a single iteration
+	if (
+		(state.checkpointCountThisIteration || 0) >=
+		MAX_CHECKPOINTS_PER_ITERATION
+	) {
+		return {
+			action: "pause",
+			reason: "solver-excessive-checkpoints",
+			state,
+			checkpoint,
+			checkpointCount: state.checkpointCountThisIteration,
+			maxCheckpointCount: MAX_CHECKPOINTS_PER_ITERATION,
+		};
+	}
 	if (
 		state.iteration >= state.maxIterations &&
 		checkpoint.outcome !== "complete"
--- a/src/resources/extensions/sf/routing-history.js
+++ b/src/resources/extensions/sf/routing-history.js
@ -15,15 +15,45 @@ const FAILURE_THRESHOLD = 0.2; // >20% failure rate triggers tier bump
 const FEEDBACK_WEIGHT = 2; // feedback signals count 2x vs automatic
 // ─── In-Memory State ─────────────────────────────────────────────────────────
 let history = null;
+// Latches to false when the `routing_history` table is observed missing at
+// init time. Subsequent DB writes from recordOutcome/recordFeedback are then
+// skipped so a stale-schema project doesn't repeatedly throw on every
+// dispatch. The next openDatabase that successfully runs the v53 migration
+// will let a later initRoutingHistory call flip this back to true.
+let _dbTableAvailable = true;
 // ─── Public API ──────────────────────────────────────────────────────────────
 /**
 * Initialize routing history for a project.
+ *
+ * Resilient to a missing `routing_history` table: a project DB whose schema
+ * version predates the routing_history migration (v53) will throw `no such
+ * table: routing_history` on the underlying SELECT. We swallow that one
+ * specific case so auto-start is not blocked by a stale schema; the
+ * in-memory history simply starts empty and accumulates from scratch.
+ * Anything else (corrupt DB, permission errors) re-throws so it remains
+ * visible.
 */
 export function initRoutingHistory(_base) {
 	history = createEmptyHistory();
 	const db = getDatabase();
 	if (!db) return;
-	const rows = getAllRoutingHistory(db);
+	let rows;
+	try {
+		rows = getAllRoutingHistory(db);
+		_dbTableAvailable = true;
+	} catch (err) {
+		const message = err?.message ? String(err.message) : "";
+		if (/no such table:\s*routing_history/i.test(message)) {
+			// Schema lags the code — fresh project, or a project whose DB never
+			// migrated past v52. Start with empty in-memory state and latch the
+			// flag so recordOutcome/recordFeedback skip their DB writes for the
+			// remainder of the session instead of crashing on every dispatch.
+			rows = [];
+			_dbTableAvailable = false;
+		} else {
+			throw err;
+		}
+	}
 	for (const row of rows) {
 		if (!history.patterns[row.pattern]) {
 			history.patterns[row.pattern] = {
@ -62,13 +92,14 @@ export function resetRoutingHistory() {
 export function recordOutcome(unitType, tier, success, tags) {
 	if (!history) return;
 	const db = getDatabase();
+	const canWriteDb = db && _dbTableAvailable;
 	// Record for the base unit type
 	const basePattern = unitType;
 	ensurePattern(basePattern);
 	const outcome = history.patterns[basePattern][tier];
 	if (success) outcome.success++;
 	else outcome.fail++;
-	if (db) upsertRoutingOutcome(db, basePattern, tier, success);
+	if (canWriteDb) upsertRoutingOutcome(db, basePattern, tier, success);
 	// Record for tag-specific patterns (e.g. "execute-task:docs")
 	if (tags && tags.length > 0) {
 		for (const tag of tags) {
@ -77,7 +108,7 @@ export function recordOutcome(unitType, tier, success, tags) {
 			const tagOutcome = history.patterns[tagPattern][tier];
 			if (success) tagOutcome.success++;
 			else tagOutcome.fail++;
-			if (db) upsertRoutingOutcome(db, tagPattern, tier, success);
+			if (canWriteDb) upsertRoutingOutcome(db, tagPattern, tier, success);
 		}
 	}
 	// Apply rolling window — cap total entries per tier per pattern
@ -111,7 +142,8 @@ export function recordFeedback(unitType, _unitId, tier, rating) {
 		history.feedback = history.feedback.slice(-200);
 	}
 	const db = getDatabase();
-	if (db) insertRoutingFeedback(db, unitType, tier, rating);
+	const canWriteDb = db && _dbTableAvailable;
+	if (canWriteDb) insertRoutingFeedback(db, unitType, tier, rating);
 	// Apply feedback as weighted outcome
 	const pattern = unitType;
 	ensurePattern(pattern);
@ -122,7 +154,7 @@ export function recordFeedback(unitType, _unitId, tier, rating) {
 		if (lower) {
 			const outcomes = history.patterns[pattern][lower];
 			outcomes.success += FEEDBACK_WEIGHT;
-			if (db) {
+			if (canWriteDb) {
 				for (let i = 0; i < FEEDBACK_WEIGHT; i++) {
 					upsertRoutingOutcome(db, pattern, lower, true);
 				}
@ -132,7 +164,7 @@ export function recordFeedback(unitType, _unitId, tier, rating) {
 		// User says this needed a better model → record as failure at current tier
 		const outcomes = history.patterns[pattern][tier];
 		outcomes.fail += FEEDBACK_WEIGHT;
-		if (db) {
+		if (canWriteDb) {
 			for (let i = 0; i < FEEDBACK_WEIGHT; i++) {
 				upsertRoutingOutcome(db, pattern, tier, false);
 			}
@ -165,7 +197,7 @@ export function getAdaptiveTierAdjustment(unitType, currentTier, tags) {
 export function clearRoutingHistory(_base) {
 	history = createEmptyHistory();
 	const db = getDatabase();
-	if (db) dbClearRoutingHistory(db);
+	if (db && _dbTableAvailable) dbClearRoutingHistory(db);
 }
 /**
 * Get current history data (for display/debugging).
--- a/src/resources/extensions/sf/sf-db/sf-db-schema.js
+++ b/src/resources/extensions/sf/sf-db/sf-db-schema.js
--- a/src/resources/extensions/sf/tests/autonomous-solver.test.mjs
+++ b/src/resources/extensions/sf/tests/autonomous-solver.test.mjs
@ -317,6 +317,36 @@ describe("autonomous solver", () => {
 		expect(result.reason).toBe("solver-max-iterations");
 	});

+	test("assessAutonomousSolverTurn_excessive_checkpoints_pauses_after_cap", () => {
+		// Fail-fast when the agent calls checkpoint 5+ times within a single
+		// iteration without making other tool progress. Prevents the 60+
+		// no-op checkpoint loop from sf-mp37kjmo-1mfuru.
+		const project = makeProject();
+		beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
+
+		for (let i = 0; i < 5; i++) {
+			appendAutonomousSolverCheckpoint(project, {
+				unitType: "execute-task",
+				unitId: "M001/S01/T01",
+				outcome: "continue",
+				summary: `Checkpoint ${i + 1} — still stuck.`,
+				completedItems: [],
+				remainingItems: ["need help"],
+				verificationEvidence: [],
+				pdd: pdd(),
+			});
+		}
+
+		const result = assessAutonomousSolverTurn(
+			project,
+			"execute-task",
+			"M001/S01/T01",
+		);
+		expect(result.action).toBe("pause");
+		expect(result.reason).toBe("solver-excessive-checkpoints");
+		expect(result.checkpointCount).toBe(5);
+	});
+
 	test("steering_append_consume_is_idempotent", () => {
 		const project = makeProject();
 		appendAutonomousSolverSteering(project, "Prefer runtime enforcement.");
@ -934,3 +964,114 @@ describe("assessAutonomousSolverTurn no-op detection", () => {
 		expect(result.reason).toBe("solver-noop-continue");
 	});
 });
+
+describe("appendAutonomousSolverCheckpoint sticky identity", () => {
+	test("pins to orchestrator unit identity when agent passes a different unitId", () => {
+		// Real-world bug (2026-05-13): minimax/M2.1 stuck in 60+ checkpoint
+		// loop because each call passed a guessed unitId
+		// ("parallel-research" / "research-slice 1-ci-build-pipeline/..."),
+		// silently overwriting state.unitId. assessAutonomousSolverTurn then
+		// failed sameUnit() against the orchestrator's identity and re-fired
+		// repair forever. The active state's identity must be sticky.
+		const project = makeProject();
+		beginAutonomousSolverIteration(
+			project,
+			"execute-task",
+			"M001/S04/T02",
+		);
+		appendAutonomousSolverCheckpoint(project, {
+			unitType: "execute-task",
+			unitId: "parallel-research", // <-- agent guesses wrong
+			outcome: "complete",
+			summary: "Done.",
+			completedItems: ["work"],
+			remainingItems: [],
+			verificationEvidence: ["ls -la"],
+			pdd: pdd(),
+		});
+		const state = readAutonomousSolverState(project);
+		// State identity must NOT shift to the agent's wrong claim.
+		expect(state.unitType).toBe("execute-task");
+		expect(state.unitId).toBe("M001/S04/T02");
+		// Checkpoint payload itself is pinned to the orchestrator's identity.
+		expect(state.latestCheckpoint.unitType).toBe("execute-task");
+		expect(state.latestCheckpoint.unitId).toBe("M001/S04/T02");
+		// Mismatch is surfaced diagnostically.
+		expect(state.latestCheckpoint.mismatchedIdentity).toEqual({
+			claimedUnitType: "execute-task",
+			claimedUnitId: "parallel-research",
+			pinnedToActive: {
+				unitType: "execute-task",
+				unitId: "M001/S04/T02",
+			},
+		});
+	});
+
+	test("assessAutonomousSolverTurn honors complete after sticky-pin rescue", () => {
+		// End-to-end: agent passes wrong unitId, checkpoint stickys to
+		// orchestrator's identity, assess sees outcome=complete and returns
+		// action=complete (NOT missing-checkpoint-retry).
+		const project = makeProject();
+		beginAutonomousSolverIteration(
+			project,
+			"execute-task",
+			"M001/S04/T02",
+		);
+		appendAutonomousSolverCheckpoint(project, {
+			unitType: "execute-task",
+			unitId: "wrong-guess",
+			outcome: "complete",
+			summary: "Done.",
+			completedItems: ["work"],
+			remainingItems: [],
+			verificationEvidence: ["ls -la"],
+			pdd: pdd(),
+		});
+		const assessment = assessAutonomousSolverTurn(
+			project,
+			"execute-task",
+			"M001/S04/T02",
+		);
+		expect(assessment.action).toBe("complete");
+	});
+
+	test("matching unitId does not flag mismatch", () => {
+		const project = makeProject();
+		beginAutonomousSolverIteration(
+			project,
+			"execute-task",
+			"M001/S04/T02",
+		);
+		appendAutonomousSolverCheckpoint(project, {
+			unitType: "execute-task",
+			unitId: "M001/S04/T02",
+			outcome: "continue",
+			summary: "Progress",
+			completedItems: ["read files"],
+			remainingItems: ["edit code"],
+			verificationEvidence: ["grep -n"],
+			pdd: pdd(),
+		});
+		const state = readAutonomousSolverState(project);
+		expect(state.latestCheckpoint.mismatchedIdentity).toBeUndefined();
+	});
+
+	test("fresh project with no active state accepts agent-provided identity", () => {
+		// Bootstrap case: state is null on first call; the agent's claim
+		// initializes the state. (Same behavior as before the sticky fix.)
+		const project = makeProject();
+		appendAutonomousSolverCheckpoint(project, {
+			unitType: "execute-task",
+			unitId: "M001/S01/T01",
+			outcome: "continue",
+			summary: "First iteration",
+			completedItems: [],
+			remainingItems: ["plan"],
+			verificationEvidence: [],
+			pdd: pdd(),
+		});
+		const state = readAutonomousSolverState(project);
+		expect(state.unitId).toBe("M001/S01/T01");
+		expect(state.latestCheckpoint.mismatchedIdentity).toBeUndefined();
+	});
+});
--- a/src/resources/extensions/sf/tests/sf-db-migration.test.mjs
+++ b/src/resources/extensions/sf/tests/sf-db-migration.test.mjs
@ -29,6 +29,7 @@ import {
 	openDatabase,
 	reconcileWorktreeDb,
 } from "../sf-db.js";
+import { initRoutingHistory } from "../routing-history.js";

 const tmpDirs = [];

@ -149,6 +150,56 @@ function makeLegacyV27Db() {
 	return dbPath;
 }

+function makeLegacyV52Db() {
+	const dir = mkdtempSync(join(tmpdir(), "sf-legacy-v52-"));
+	tmpDirs.push(dir);
+	const sfDir = join(dir, ".sf");
+	mkdirSync(sfDir, { recursive: true });
+	const dbPath = join(sfDir, "sf.db");
+	const db = new DatabaseSync(dbPath);
+	db.exec(`
+		CREATE TABLE schema_version (
+			version INTEGER NOT NULL,
+			applied_at TEXT NOT NULL
+		);
+		INSERT INTO schema_version (version, applied_at)
+		VALUES (52, '2026-05-06T00:00:00.000Z');
+
+		CREATE TABLE milestones (
+			id TEXT PRIMARY KEY,
+			title TEXT NOT NULL DEFAULT '',
+			status TEXT NOT NULL DEFAULT 'active',
+			depends_on TEXT NOT NULL DEFAULT '[]',
+			created_at TEXT NOT NULL DEFAULT '',
+			completed_at TEXT DEFAULT NULL
+		);
+
+		CREATE TABLE slices (
+			milestone_id TEXT NOT NULL,
+			id TEXT NOT NULL,
+			title TEXT NOT NULL DEFAULT '',
+			status TEXT NOT NULL DEFAULT 'pending',
+			risk TEXT NOT NULL DEFAULT 'medium',
+			depends TEXT NOT NULL DEFAULT '[]',
+			demo TEXT NOT NULL DEFAULT '',
+			created_at TEXT NOT NULL DEFAULT '',
+			completed_at TEXT DEFAULT NULL,
+			PRIMARY KEY (milestone_id, id)
+		);
+
+		CREATE TABLE tasks (
+			milestone_id TEXT NOT NULL,
+			slice_id TEXT NOT NULL,
+			id TEXT NOT NULL,
+			title TEXT NOT NULL DEFAULT '',
+			status TEXT NOT NULL DEFAULT 'pending',
+			PRIMARY KEY (milestone_id, slice_id, id)
+		);
+	`);
+	db.close();
+	return dbPath;
+}
+
 function makeLegacyV35GateRunsDb() {
 	const dir = mkdtempSync(join(tmpdir(), "sf-legacy-v35-gates-"));
 	tmpDirs.push(dir);
@ -262,6 +313,35 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
 	assert.deepEqual(schedulerRow, { status: "queued" });
 });

+test("openDatabase_v52_db_heals_routing_history_and_auto_start_path_works", () => {
+	const dbPath = makeLegacyV52Db();
+
+	assert.equal(openDatabase(dbPath), true);
+	const db = getDatabase();
+
+	// ensurePostBootstrapTables should have created routing_history
+	const routingTable = db
+		.prepare(
+			"SELECT name FROM sqlite_master WHERE type='table' AND name='routing_history'",
+		)
+		.get();
+	assert.ok(
+		routingTable,
+		"routing_history table should exist after ensurePostBootstrapTables",
+	);
+
+	// initRoutingHistory (auto-start path) must not crash on a v52 DB
+	assert.doesNotThrow(() => {
+		initRoutingHistory(dbPath);
+	}, "initRoutingHistory should not throw on a v52 DB");
+
+	// Schema should have migrated to v62
+	const version = db
+		.prepare("SELECT MAX(version) AS version FROM schema_version")
+		.get();
+	assert.equal(version.version, 62);
+});
+
 test("openDatabase_when_fresh_db_supports_schedule_entries", () => {
 	assert.equal(openDatabase(":memory:"), true);