feat(sf): generation-aware failover + canonical-keyed metrics

Two parallel refactors building on the model-registry consolidation: 1. Generation-aware failover (model-route-failure.js, agent-end-recovery.js) - resolveNextModelRoute now takes unitType so it knows whether the caller is solver-pinned per ADR-0079 (autonomous-solver). When pinned, rejects candidates whose canonicalIdFor() differs from the failed route's canonical id — closes the latent solver-invariant violation where kimi-coding/kimi-k2.6 could silently fail over to ollama-cloud/kimi-k2.5:cloud (different generation). - Cross-generation failover in non-pinned units now emits a structured logWarning so generation downgrades are visible in traces instead of looking like an equivalent route switch. 2. Canonical-keyed performance metrics (model-learner.js) - .sf/model-performance.json now keys by canonical_id with an {aggregate, by_route} sub-shape instead of fused provider/wire-model strings. Cross-route history per model is now coherent — kimi-k2.6 reached via kimi-coding accumulates into the same aggregate as reached via openrouter. - Migration runs at boot: detects old shape (no 'aggregate' key in unit-type blob values), distributes each entry into by_route, recomputes aggregate, writes a backup to .sf/model-performance.json.pre-canonical-backup. Unmappable route keys land in _unmapped so nothing is dropped. - getRouteStats(taskType, routeKey) added for per-route failover ordering; existing getRankedModels emits canonical IDs for cross-route strength queries. 3. Tests - model-registry.test.ts: bundled in this commit (Swarm A's test file was left untracked when the registry module was committed). - model-route-failure.test.ts: 12 tests covering solver-pin guard, same-canonical multi-route failover, generation-downgrade log emit. - model-learner-canonical.test.ts: 17 tests covering migration round-trip, aggregate invariant, _unmapped bucket, and zero-default reads. - model-learner.test.ts: one existing test updated for the new _unmapped.by_route shape on bare model IDs. 4. Results - Targeted tests: 147/147 across registry, route-failure, learner, learner-canonical. - Full npm run test:unit: 4707 pass, 0 fail, 83 skipped (no new regressions vs pre-edit baseline of 4669). Work parallelized across two Sonnet 4.6 sub-agents in isolated git worktrees. Contract authored in docs/dev/drafts/model-registry-contract.md (committed earlier in 1d753af6b) and consumed by both agents. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 04:15:08 +02:00 · 2026-05-14 04:15:08 +02:00 · 7570aac4b7
commit 7570aac4b7
parent 09bc50f0f6
7 changed files with 1640 additions and 52 deletions
--- a/src/resources/extensions/sf/bootstrap/agent-end-recovery.js
+++ b/src/resources/extensions/sf/bootstrap/agent-end-recovery.js
@ -84,6 +84,7 @@ async function trySwitchToFallbackModel(args) {
 			availableModels,
 			failedRoutes: getCurrentUnitModelFailures(),
 			isBlocked,
+			unitType: args.unitType,
 		});
 		if (!nextRoute) return false;
 		const ok = await args.pi.setModel(nextRoute.model, {
--- a/src/resources/extensions/sf/model-learner.js
+++ b/src/resources/extensions/sf/model-learner.js
@ -21,27 +21,180 @@ import { dirname, join } from "node:path";

 const MODEL_FAILURE_LOG_SCHEMA_VERSION = 1;

+/**
+ * Reference to canonicalIdFor from model-registry.
+ *
+ * Default: null (all routes go to _unmapped).
+ * Override in tests via setRegistryResolver() to inject a stub.
+ * In production, model-registry.js injects itself at module load via
+ * the import side-effect at the bottom of this file (lazy dynamic import).
+ */
+let _canonicalIdForFn = null;
+
+/**
+ * Resolve a route key (provider/wire-id) to a canonical id using the model
+ * registry. Falls back gracefully when the registry is unavailable (e.g. in
+ * tests that don't load the full @singularity-forge/ai package).
+ *
+ * Returns null when the route is not mappable (routes to _unmapped).
+ */
+function tryCanonicalIdFor(routeKey) {
+	if (_canonicalIdForFn === null) return null;
+	try {
+		return _canonicalIdForFn(routeKey);
+	} catch {
+		return null;
+	}
+}
+
+/**
+ * Allow tests and the module itself to inject a canonicalIdFor implementation.
+ * In production, model-registry.js is imported lazily and wires itself here.
+ * In tests, call this before constructing ModelPerformanceTracker:
+ *   setRegistryResolver((rk) => rk === "kimi-coding/kimi-k2.6" ? "kimi-k2.6" : null)
+ */
+export function setRegistryResolver(fn) {
+	_canonicalIdForFn = fn;
+}
+
+// Wire the registry lazily so model-learner.js can be imported independently
+// of @singularity-forge/ai (e.g. in tests that don't load the full AI package).
+// The fire-and-forget import populates _canonicalIdForFn when the registry
+// resolves. Outcomes recorded before the registry loads go to _unmapped and
+// are preserved there for re-resolution on next format migration.
+import("./model-registry.js")
+	.then((mod) => {
+		if (_canonicalIdForFn === null && typeof mod?.canonicalIdFor === "function") {
+			_canonicalIdForFn = mod.canonicalIdFor;
+		}
+	})
+	.catch(() => {
+		// Registry unavailable (tests, stripped builds, etc.) — routes go to _unmapped.
+	});
+
+/**
+ * Detect whether a unit-type blob in the performance file uses the OLD
+ * flat format ({ "provider/wire-id": { successes, failures, ... } })
+ * vs the NEW canonical format ({ "canonical-id": { aggregate, by_route } }).
+ *
+ * Detection rule: if ANY key in the object has a canonical-shaped entry
+ * (i.e. has a nested `aggregate` object), the blob is already new-format.
+ * Otherwise it's old-format.
+ */
+function isOldFormat(unitTypeBlob) {
+	if (!unitTypeBlob || typeof unitTypeBlob !== "object") return false;
+	for (const val of Object.values(unitTypeBlob)) {
+		if (val && typeof val === "object" && "aggregate" in val) {
+			return false; // new-format entry found
+		}
+	}
+	return true;
+}
+
+/**
+ * Migrate a single unit-type blob from old flat format to new canonical format.
+ * Returns the migrated blob.
+ */
+function migrateUnitTypeBlob(oldBlob) {
+	const newBlob = {};
+	for (const [routeKey, stats] of Object.entries(oldBlob)) {
+		if (!stats || typeof stats !== "object") continue;
+		const canonicalId = tryCanonicalIdFor(routeKey);
+		const bucket = canonicalId ?? "_unmapped";
+		if (!newBlob[bucket]) {
+			if (bucket === "_unmapped") {
+				newBlob["_unmapped"] = { by_route: {} };
+			} else {
+				newBlob[bucket] = {
+					aggregate: {
+						successes: 0,
+						failures: 0,
+						timeouts: 0,
+						totalTokens: 0,
+						totalCost: 0,
+						lastUsed: stats.lastUsed ?? new Date().toISOString(),
+					},
+					by_route: {},
+				};
+			}
+		}
+		const routeEntry = {
+			successes: stats.successes ?? 0,
+			failures: stats.failures ?? 0,
+			timeouts: stats.timeouts ?? 0,
+			totalTokens: stats.totalTokens ?? 0,
+			totalCost: stats.totalCost ?? 0,
+			lastUsed: stats.lastUsed ?? new Date().toISOString(),
+		};
+		if (bucket === "_unmapped") {
+			newBlob["_unmapped"].by_route[routeKey] = routeEntry;
+		} else {
+			newBlob[bucket].by_route[routeKey] = routeEntry;
+			// Recompute aggregate as sum of by_route
+			recomputeAggregate(newBlob[bucket]);
+		}
+	}
+	return newBlob;
+}
+
+/**
+ * Recompute the `aggregate` object for a canonical entry as the sum of all
+ * by_route entries. This maintains the invariant:
+ *   aggregate.successes === sum(by_route[*].successes)
+ */
+function recomputeAggregate(canonicalEntry) {
+	const agg = {
+		successes: 0,
+		failures: 0,
+		timeouts: 0,
+		totalTokens: 0,
+		totalCost: 0,
+		lastUsed: "",
+	};
+	for (const r of Object.values(canonicalEntry.by_route)) {
+		agg.successes += r.successes ?? 0;
+		agg.failures += r.failures ?? 0;
+		agg.timeouts += r.timeouts ?? 0;
+		agg.totalTokens += r.totalTokens ?? 0;
+		agg.totalCost += r.totalCost ?? 0;
+		if (!agg.lastUsed || (r.lastUsed && r.lastUsed > agg.lastUsed)) {
+			agg.lastUsed = r.lastUsed;
+		}
+	}
+	canonicalEntry.aggregate = agg;
+}
+
+/**
+ * Return a zero-valued route stats entry.
+ */
+function emptyRouteStats(timestamp) {
+	return {
+		successes: 0,
+		failures: 0,
+		timeouts: 0,
+		totalTokens: 0,
+		totalCost: 0,
+		lastUsed: timestamp,
+	};
+}
+
 /**
 * Per-task-type model performance tracker.
 *
- * Schema:
+ * New schema (v2 — canonical-keyed):
 * {
- *   "execute-task": {
- *     "gpt-4o": {
- *       "successes": 42,
- *       "failures": 3,
- *       "timeouts": 1,
- *       "totalTokens": 1500000,
- *       "totalCost": 45.50,
- *       "lastUsed": "2026-05-06T16:30:00Z",
- *       "successRate": 0.93
+ *   "<unit-type>": {
+ *     "<canonical-id>": {
+ *       "aggregate":  { successes, failures, timeouts, totalTokens, totalCost, lastUsed },
+ *       "by_route":   { "<provider/wire-model>": { successes, failures, ... } }
 *     },
- *     "claude-opus": {
- *       ...
+ *     "_unmapped": {
+ *       "by_route": { "<route>": { ... } }
 *     }
- *   },
- *   "plan-slice": { ... }
+ *   }
 * }
+ *
+ * Old schema (v1 — fused route key as top-level key) is migrated on boot.
 */
 class ModelPerformanceTracker {
 	constructor(basePath) {
@ -61,12 +214,66 @@ class ModelPerformanceTracker {
 		}
 		try {
 			const content = readFileSync(this.storagePath, "utf-8");
-			return JSON.parse(content);
+			const parsed = JSON.parse(content);
+			return this._migrateIfNeeded(parsed);
 		} catch {
 			return {};
 		}
 	}

+	/**
+	 * Detect and migrate old-format data in-place. Writes backup + new file on
+	 * migration. Idempotent: if backup already exists, skip.
+	 */
+	_migrateIfNeeded(parsed) {
+		// Check if any unit-type blob is still in old format
+		let needsMigration = false;
+		for (const unitTypeBlob of Object.values(parsed)) {
+			if (typeof unitTypeBlob === "object" && unitTypeBlob !== null && isOldFormat(unitTypeBlob)) {
+				needsMigration = true;
+				break;
+			}
+		}
+		if (!needsMigration) return parsed;
+
+		// Write backup (idempotent — only if backup doesn't already exist)
+		const backupPath = this.storagePath + ".pre-canonical-backup";
+		if (!existsSync(backupPath)) {
+			try {
+				writeFileSync(backupPath, JSON.stringify(parsed, null, 2), "utf-8");
+			} catch {
+				// Non-fatal: backup failure should not block migration
+			}
+		}
+
+		// Migrate each unit type
+		const migrated = {};
+		for (const [unitType, unitTypeBlob] of Object.entries(parsed)) {
+			if (typeof unitTypeBlob !== "object" || unitTypeBlob === null) {
+				migrated[unitType] = unitTypeBlob;
+				continue;
+			}
+			if (isOldFormat(unitTypeBlob)) {
+				migrated[unitType] = migrateUnitTypeBlob(unitTypeBlob);
+			} else {
+				migrated[unitType] = unitTypeBlob;
+			}
+		}
+
+		// Write migrated data back to disk
+		try {
+			const dir = dirname(this.storagePath);
+			if (!existsSync(dir)) {
+				mkdirSync(dir, { recursive: true });
+			}
+			writeFileSync(this.storagePath, JSON.stringify(migrated, null, 2), "utf-8");
+		} catch {
+			// Non-fatal
+		}
+
+		return migrated;
+	}
+
 	_save() {
 		if (!this.storagePath) {
 			return;
@ -87,11 +294,15 @@ class ModelPerformanceTracker {
 	}

 	/**
-	 * Record outcome for a model on a specific task type.
+	 * Record outcome for a route key on a specific task type.
+	 *
+	 * @param taskType - e.g. "execute-task"
+	 * @param routeKey - format: "provider/wire-model" (e.g. "kimi-coding/kimi-k2.6")
+	 *                   OR a bare model id for backward-compat (no slash = treated as routeKey)
 	 */
 	recordOutcome(
 		taskType,
-		modelId,
+		routeKey,
 		outcomeOrSuccess,
 		timeoutArg = false,
 		tokensUsedArg = 0,
@ -117,19 +328,46 @@ class ModelPerformanceTracker {
 		if (!this.data[taskType]) {
 			this.data[taskType] = {};
 		}
-		if (!this.data[taskType][modelId]) {
-			this.data[taskType][modelId] = {
-				successes: 0,
-				failures: 0,
-				timeouts: 0,
-				totalTokens: 0,
-				totalCost: 0,
-				lastUsed: timestamp,
-				successRate: 0,
-			};
+
+		// Resolve canonical id. Routes with no slash are legacy bare model ids
+		// — treat them as their own route key, try registry first.
+		const canonicalId = tryCanonicalIdFor(routeKey);
+
+		if (canonicalId === null) {
+			// Route not in registry → write to _unmapped
+			if (!this.data[taskType]["_unmapped"]) {
+				this.data[taskType]["_unmapped"] = { by_route: {} };
+			}
+			const unmapped = this.data[taskType]["_unmapped"];
+			if (!unmapped.by_route[routeKey]) {
+				unmapped.by_route[routeKey] = emptyRouteStats(timestamp);
+			}
+			const rs = unmapped.by_route[routeKey];
+			this._applyOutcomeToStats(rs, success, timeout, tokensUsed, costUsd, timestamp);
+		} else {
+			// Known route → write to by_route + recompute aggregate
+			if (!this.data[taskType][canonicalId]) {
+				this.data[taskType][canonicalId] = {
+					aggregate: emptyRouteStats(timestamp),
+					by_route: {},
+				};
+			}
+			const canonicalEntry = this.data[taskType][canonicalId];
+			if (!canonicalEntry.by_route[routeKey]) {
+				canonicalEntry.by_route[routeKey] = emptyRouteStats(timestamp);
+			}
+			const rs = canonicalEntry.by_route[routeKey];
+			this._applyOutcomeToStats(rs, success, timeout, tokensUsed, costUsd, timestamp);
+			recomputeAggregate(canonicalEntry);
 		}

-		const stats = this.data[taskType][modelId];
+		this._save();
+	}
+
+	/**
+	 * Apply a single outcome event to a stats object in-place.
+	 */
+	_applyOutcomeToStats(stats, success, timeout, tokensUsed, costUsd, timestamp) {
 		if (success) {
 			stats.successes += 1;
 		} else if (timeout) {
@ -138,50 +376,144 @@ class ModelPerformanceTracker {
 		} else {
 			stats.failures += 1;
 		}
-
 		stats.totalTokens += tokensUsed;
 		stats.totalCost += costUsd;
 		stats.lastUsed = timestamp;
-
-		const total = stats.successes + stats.failures;
-		stats.total = total;
-		stats.successRate = total > 0 ? stats.successes / total : 0;
-
-		this._save();
 	}

 	/**
 	 * Get performance stats for a task type and model.
+	 *
+	 * When routeMode=false (default): looks up aggregate stats for a canonical id.
+	 * When routeMode=true: looks up by_route stats for a specific routeKey.
+	 *
+	 * Backward-compat fallback: if the id is not found as a canonical, also checks
+	 * _unmapped.by_route and all by_route maps — supports bare model ids used in
+	 * tests and legacy callers that don't have the registry wired.
+	 *
+	 * @param taskType - e.g. "execute-task"
+	 * @param canonicalOrRouteKey - canonical id (aggregate) or routeKey (by-route)
+	 * @param routeMode - when true, returns by_route stats
 	 */
-	getStats(taskType, modelId) {
-		return this.data[taskType]?.[modelId] || null;
+	getStats(taskType, canonicalOrRouteKey, routeMode = false) {
+		const unitBlob = this.data[taskType];
+		if (!unitBlob) return null;
+
+		if (routeMode) {
+			// Explicit by-route lookup: scan all canonical entries and _unmapped
+			return this.getRouteStats(taskType, canonicalOrRouteKey);
+		}
+
+		// Aggregate mode: look up by canonical id first
+		const entry = unitBlob[canonicalOrRouteKey];
+		if (entry?.aggregate) {
+			const agg = entry.aggregate;
+			const total = agg.successes + agg.failures;
+			return {
+				...agg,
+				total,
+				successRate: total > 0 ? agg.successes / total : 0,
+			};
+		}
+
+		// Backward-compat fallback: look in by_route maps (for bare IDs and unmapped routes)
+		// This supports old tests that use bare model IDs without a registry resolver.
+		for (const [key, val] of Object.entries(unitBlob)) {
+			if (key === "_unmapped") {
+				if (val?.by_route?.[canonicalOrRouteKey]) {
+					const rs = val.by_route[canonicalOrRouteKey];
+					const total = rs.successes + rs.failures;
+					return { ...rs, total, successRate: total > 0 ? rs.successes / total : 0 };
+				}
+			} else if (val?.by_route?.[canonicalOrRouteKey]) {
+				const rs = val.by_route[canonicalOrRouteKey];
+				const total = rs.successes + rs.failures;
+				return { ...rs, total, successRate: total > 0 ? rs.successes / total : 0 };
+			}
+		}
+		return null;
+	}
+
+	/**
+	 * Get stats for a specific route (by_route lookup across all canonical entries).
+	 */
+	getRouteStats(taskType, routeKey) {
+		const unitBlob = this.data[taskType];
+		if (!unitBlob) return null;
+		for (const [key, val] of Object.entries(unitBlob)) {
+			if (key === "_unmapped") {
+				if (val?.by_route?.[routeKey]) return val.by_route[routeKey];
+			} else if (val?.by_route?.[routeKey]) {
+				return val.by_route[routeKey];
+			}
+		}
+		return null;
 	}

 	/**
 	 * Get all models for a task type, ranked by success rate.
+	 *
+	 * Primary mode: iterates canonical ids using aggregate stats.
+	 * Backward-compat fallback: if no canonical entries exist (no registry
+	 * wired), iterates _unmapped.by_route entries instead so legacy tests
+	 * that use bare model IDs still work.
 	 */
 	getRankedModels(taskType, minSamples = 1) {
 		if (!this.data[taskType]) return [];

-		const models = Object.entries(this.data[taskType])
-			.filter(([, stats]) => stats.successes + stats.failures >= minSamples)
-			.map(([modelId, stats]) => ({
-				modelId,
-				successRate: stats.successRate,
-				attempts: stats.successes + stats.failures,
-				tokens: stats.totalTokens,
-				cost: stats.totalCost,
-				latestAttempt: stats.lastUsed,
-			}))
-			.sort((a, b) => b.successRate - a.successRate);
+		const models = [];
+		let hasCanonical = false;

-		return models;
+		for (const [key, entry] of Object.entries(this.data[taskType])) {
+			if (key === "_unmapped") continue;
+			// New format: entry has aggregate + by_route
+			const agg = entry?.aggregate;
+			if (!agg) continue;
+			hasCanonical = true;
+			const total = agg.successes + agg.failures;
+			if (total < minSamples) continue;
+			const successRate = total > 0 ? agg.successes / total : 0;
+			models.push({
+				modelId: key, // canonical id
+				successRate,
+				attempts: total,
+				tokens: agg.totalTokens ?? 0,
+				cost: agg.totalCost ?? 0,
+				latestAttempt: agg.lastUsed,
+			});
+		}
+
+		// Backward-compat: when no canonical entries exist (registry not wired),
+		// fall back to _unmapped.by_route so bare-ID tests still get rankings.
+		if (!hasCanonical) {
+			const unmapped = this.data[taskType]["_unmapped"];
+			if (unmapped?.by_route) {
+				for (const [routeKey, rs] of Object.entries(unmapped.by_route)) {
+					if (!rs) continue;
+					const total = (rs.successes ?? 0) + (rs.failures ?? 0);
+					if (total < minSamples) continue;
+					const successRate = total > 0 ? rs.successes / total : 0;
+					models.push({
+						modelId: routeKey,
+						successRate,
+						attempts: total,
+						tokens: rs.totalTokens ?? 0,
+						cost: rs.totalCost ?? 0,
+						latestAttempt: rs.lastUsed,
+					});
+				}
+			}
+		}
+
+		return models.sort((a, b) => b.successRate - a.successRate);
 	}

 	/**
 	 * Check if a model should be demoted (fails >50% on this task type).
+	 * Accepts a canonical id (aggregate demotion) or routeKey (route-level).
 	 */
 	shouldDemote(taskType, modelId, thresholdFailureRate = 0.5) {
+		// Try aggregate lookup first (canonical id)
 		const stats = this.getStats(taskType, modelId);
 		if (!stats) return false;

--- a/src/resources/extensions/sf/model-route-failure.js
+++ b/src/resources/extensions/sf/model-route-failure.js
@ -1,4 +1,34 @@
 import { resolveModelId } from "./auto-model-selection.js";
+import { canonicalIdFor, sameGeneration } from "./model-registry.js";
+import { logWarning } from "./workflow-logger.js";
+
+// ── Solver pinning (ADR-0079) ────────────────────────────────────────────────
+// The autonomous solver pass is always locked to kimi-k2.6 (provider:
+// kimi-coding) and must never cross canonical_id boundaries on failover.
+// The unit type string "autonomous-solver" is the identifier introduced by
+// ADR-0079 for the solver role. Other unit types run as executor and may
+// cross canonical ids (with a structured downgrade log event).
+const SOLVER_PINNED_UNIT_TYPE = "autonomous-solver";
+
+/**
+ * Emit a structured log event when a failover crosses a canonical-id or
+ * generation boundary.  Written to workflow-logger so it flows through the
+ * audit log and drainAndSummarize() for post-mortem analysis.
+ *
+ * @param {string} fromCanonical  - canonical id of the route that failed
+ * @param {string} toCanonical    - canonical id of the chosen failover route
+ * @param {string} unitType       - active unit type at failover time
+ * @param {string} reason         - human-readable reason label
+ */
+export function logGenerationDowngrade(fromCanonical, toCanonical, unitType, reason) {
+	logWarning("model-route-failure", "generation-downgrade", {
+		from: fromCanonical,
+		to: toCanonical,
+		unitType,
+		reason: reason ?? "cross-generation failover",
+		sameGeneration: false,
+	});
+}
 /**
 * Build the stable identity key for a concrete provider route.
 *
@ -80,9 +110,24 @@ export function resolveNextConfiguredModelRoute(args) {
 *
 * Consumer: bootstrap/agent-end-recovery.ts after configured fallback lookup
 * fails for a model-route failure.
+ *
+ * Generation guard (ADR-0079):
+ *   - If unitType is "autonomous-solver" (solver-pinned), candidates whose
+ *     canonical_id differs from the failed route are silently skipped. The
+ *     solver layer is a runtime invariant and must never silently degrade to a
+ *     different model generation.
+ *   - For all other unit types (executor layer), cross-canonical failover is
+ *     permitted but emits a structured generation-downgrade log event so it is
+ *     visible in traces and drainAndSummarize() audits.
 */
 export function resolveNextAvailableModelRoute(args) {
 	const currentKey = args.current ? modelRouteKey(args.current) : undefined;
+	const currentRouteKey = args.current
+		? `${args.current.provider}/${args.current.id}`
+		: undefined;
+	const currentCanonical = currentRouteKey ? canonicalIdFor(currentRouteKey) : null;
+	const isSolverPinned = args.unitType === SOLVER_PINNED_UNIT_TYPE;
+
 	const failedKeys = new Set(
 		args.failedRoutes.map((failure) =>
 			modelRouteKey({ provider: failure.provider, id: failure.modelId }),
@ -93,6 +138,14 @@ export function resolveNextAvailableModelRoute(args) {
 		if (key === currentKey) return false;
 		if (failedKeys.has(key)) return false;
 		if (args.isBlocked?.(model)) return false;
+
+		// Solver pin: ADR-0079 — never cross canonical_id boundary when solving.
+		if (isSolverPinned && currentCanonical !== null) {
+			const candidateRouteKey = `${model.provider}/${model.id}`;
+			const candidateCanonical = canonicalIdFor(candidateRouteKey);
+			if (candidateCanonical !== currentCanonical) return false;
+		}
+
 		return true;
 	});
 	if (candidates.length === 0) return undefined;
@ -103,6 +156,25 @@ export function resolveNextAvailableModelRoute(args) {
 				model.provider.toLowerCase() !== args.current.provider.toLowerCase(),
 		);
 	const model = differentProvider ?? candidates[0];
+
+	// Generation guard: log a structured event when crossing canonical_id or
+	// generation boundaries on the executor layer (non-solver-pinned).
+	if (!isSolverPinned && currentCanonical !== null) {
+		const chosenRouteKey = `${model.provider}/${model.id}`;
+		const chosenCanonical = canonicalIdFor(chosenRouteKey);
+		if (
+			chosenCanonical !== null &&
+			!sameGeneration(currentCanonical, chosenCanonical)
+		) {
+			logGenerationDowngrade(
+				currentCanonical,
+				chosenCanonical,
+				args.unitType ?? "unknown",
+				"no same-generation route available",
+			);
+		}
+	}
+
 	return {
 		model,
 		route: `${model.provider}/${model.id}`,
@ -134,5 +206,6 @@ export function resolveNextModelRoute(args) {
 		availableModels: args.availableModels,
 		failedRoutes: args.failedRoutes,
 		isBlocked: args.isBlocked,
+		unitType: args.unitType,
 	});
 }
--- a/src/resources/extensions/sf/tests/model-learner-canonical.test.ts
+++ b/src/resources/extensions/sf/tests/model-learner-canonical.test.ts
@ -0,0 +1,563 @@
+/**
+ * Swarm C — canonical-keyed model performance metrics tests.
+ *
+ * Tests:
+ *  1. Migration round-trip: old-format file → boot loader → new file + backup exists.
+ *  2. Aggregate invariant: aggregate.successes === sum(by_route[*].successes).
+ *  3. _unmapped bucket: unknown route key lands in _unmapped, not dropped.
+ *  4. Reading: sensible defaults (null) for a never-seen canonical id.
+ *  5. Migration idempotency: running migration twice does not corrupt data.
+ *  6. Two routes same canonical: aggregate sums correctly.
+ */
+
+import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import {
+	ModelLearner,
+	ModelPerformanceTracker,
+	setRegistryResolver,
+} from "../model-learner.js";
+
+// ── Stub registry ──────────────────────────────────────────────────────────────
+// Inject a lightweight registry resolver that maps two test routes to the same
+// canonical id, leaving all other routes unmapped. This avoids loading
+// @singularity-forge/ai in tests.
+//
+// Route table:
+//   "kimi-coding/kimi-k2.6"      → "kimi-k2.6"
+//   "openrouter/moonshotai/kimi-k2.6" → "kimi-k2.6"
+//   "anthropic/claude-sonnet-4-6" → "claude-sonnet-4-6"
+//   everything else               → null  (→ _unmapped)
+function makeStubResolver(table: Record<string, string | null> = {}) {
+	const defaultTable: Record<string, string | null> = {
+		"kimi-coding/kimi-k2.6": "kimi-k2.6",
+		"openrouter/moonshotai/kimi-k2.6": "kimi-k2.6",
+		"anthropic/claude-sonnet-4-6": "claude-sonnet-4-6",
+	};
+	const merged = { ...defaultTable, ...table };
+	return (routeKey: string): string | null => merged[routeKey] ?? null;
+}
+
+// ── Helpers ───────────────────────────────────────────────────────────────────
+
+function sfDir(base: string) {
+	return join(base, ".sf");
+}
+
+function perfFile(base: string) {
+	return join(base, ".sf", "model-performance.json");
+}
+
+function backupFile(base: string) {
+	return join(base, ".sf", "model-performance.json.pre-canonical-backup");
+}
+
+function readPerf(base: string) {
+	return JSON.parse(readFileSync(perfFile(base), "utf-8"));
+}
+
+function writeOldPerf(base: string, data: object) {
+	mkdirSync(sfDir(base), { recursive: true });
+	writeFileSync(perfFile(base), JSON.stringify(data, null, 2), "utf-8");
+}
+
+// ── Test suite ────────────────────────────────────────────────────────────────
+
+describe("model-learner canonical schema (Swarm C)", () => {
+	let tmpDir: string;
+
+	beforeEach(() => {
+		tmpDir = join(tmpdir(), `test-ml-canonical-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+		mkdirSync(tmpDir, { recursive: true });
+		// Wire stub resolver before each test
+		setRegistryResolver(makeStubResolver());
+	});
+
+	afterEach(() => {
+		rmSync(tmpDir, { recursive: true, force: true });
+		// Reset resolver to null so other test suites are unaffected
+		setRegistryResolver(null as unknown as (rk: string) => string | null);
+	});
+
+	// ── Test 1: Migration round-trip ────────────────────────────────────────
+
+	describe("migration round-trip", () => {
+		test("migrates old flat format to canonical schema on load", () => {
+			// Write old-format file
+			writeOldPerf(tmpDir, {
+				"execute-task": {
+					"kimi-coding/kimi-k2.6": {
+						successes: 5,
+						failures: 1,
+						timeouts: 0,
+						totalTokens: 10000,
+						totalCost: 0.5,
+						lastUsed: "2026-05-01T12:00:00Z",
+						successRate: 0.833,
+					},
+					"anthropic/claude-sonnet-4-6": {
+						successes: 3,
+						failures: 0,
+						timeouts: 0,
+						totalTokens: 6000,
+						totalCost: 0.3,
+						lastUsed: "2026-05-02T12:00:00Z",
+						successRate: 1.0,
+					},
+				},
+			});
+
+			// Boot tracker — migration happens on _load()
+			const tracker = new ModelPerformanceTracker(tmpDir);
+			// Migration triggers on disk read
+
+			// Verify backup was created
+			expect(existsSync(backupFile(tmpDir))).toBe(true);
+
+			// Verify new file has canonical schema
+			const data = readPerf(tmpDir);
+			const execBlob = data["execute-task"];
+
+			// kimi-coding/kimi-k2.6 → canonical "kimi-k2.6"
+			expect(execBlob["kimi-k2.6"]).toBeDefined();
+			expect(execBlob["kimi-k2.6"].aggregate).toBeDefined();
+			expect(execBlob["kimi-k2.6"].by_route).toBeDefined();
+			expect(execBlob["kimi-k2.6"].by_route["kimi-coding/kimi-k2.6"]).toBeDefined();
+			expect(execBlob["kimi-k2.6"].aggregate.successes).toBe(5);
+			expect(execBlob["kimi-k2.6"].aggregate.failures).toBe(1);
+
+			// anthropic/claude-sonnet-4-6 → canonical "claude-sonnet-4-6"
+			expect(execBlob["claude-sonnet-4-6"]).toBeDefined();
+			expect(execBlob["claude-sonnet-4-6"].aggregate.successes).toBe(3);
+
+			// Verify tracker in-memory state is also migrated
+			const stats = tracker.getStats("execute-task", "kimi-k2.6");
+			expect(stats).not.toBeNull();
+			expect(stats!.successes).toBe(5);
+		});
+
+		test("by_route entries are preserved after migration", () => {
+			writeOldPerf(tmpDir, {
+				"execute-task": {
+					"kimi-coding/kimi-k2.6": {
+						successes: 10,
+						failures: 2,
+						timeouts: 1,
+						totalTokens: 50000,
+						totalCost: 2.5,
+						lastUsed: "2026-05-10T00:00:00Z",
+					},
+				},
+			});
+
+			new ModelPerformanceTracker(tmpDir); // triggers migration
+
+			const data = readPerf(tmpDir);
+			const routeEntry = data["execute-task"]["kimi-k2.6"].by_route["kimi-coding/kimi-k2.6"];
+			expect(routeEntry).toBeDefined();
+			expect(routeEntry.successes).toBe(10);
+			expect(routeEntry.failures).toBe(2);
+			expect(routeEntry.timeouts).toBe(1);
+			expect(routeEntry.totalTokens).toBe(50000);
+		});
+
+		test("migration is idempotent — running twice produces identical result", () => {
+			writeOldPerf(tmpDir, {
+				"execute-task": {
+					"kimi-coding/kimi-k2.6": {
+						successes: 7,
+						failures: 1,
+						timeouts: 0,
+						totalTokens: 20000,
+						totalCost: 1.0,
+						lastUsed: "2026-05-05T12:00:00Z",
+					},
+				},
+			});
+
+			new ModelPerformanceTracker(tmpDir); // first migration
+			const dataAfterFirst = readPerf(tmpDir);
+
+			new ModelPerformanceTracker(tmpDir); // second load — should not re-migrate
+			const dataAfterSecond = readPerf(tmpDir);
+
+			expect(dataAfterSecond).toEqual(dataAfterFirst);
+		});
+
+		test("backup is written only once (idempotent)", () => {
+			writeOldPerf(tmpDir, {
+				"execute-task": {
+					"kimi-coding/kimi-k2.6": {
+						successes: 3,
+						failures: 0,
+						timeouts: 0,
+						totalTokens: 5000,
+						totalCost: 0.2,
+						lastUsed: "2026-05-06T00:00:00Z",
+					},
+				},
+			});
+
+			new ModelPerformanceTracker(tmpDir); // writes backup
+			const backupContent1 = readFileSync(backupFile(tmpDir), "utf-8");
+
+			// Overwrite the backup to detect if it gets re-written
+			writeFileSync(backupFile(tmpDir), '{"sentinel":true}', "utf-8");
+
+			new ModelPerformanceTracker(tmpDir); // should NOT overwrite backup
+			const backupContent2 = readFileSync(backupFile(tmpDir), "utf-8");
+
+			// If sentinel is still there, backup was not overwritten
+			expect(backupContent2).toBe('{"sentinel":true}');
+		});
+	});
+
+	// ── Test 2: Aggregate invariant ─────────────────────────────────────────
+
+	describe("aggregate invariant", () => {
+		test("aggregate.successes === sum(by_route[*].successes) after writes to two routes", () => {
+			const tracker = new ModelPerformanceTracker(tmpDir);
+
+			// Route 1: kimi-coding/kimi-k2.6 → canonical kimi-k2.6
+			tracker.recordOutcome("execute-task", "kimi-coding/kimi-k2.6", {
+				success: true,
+				timeout: false,
+				tokensUsed: 1000,
+				costUsd: 0.05,
+			});
+			tracker.recordOutcome("execute-task", "kimi-coding/kimi-k2.6", {
+				success: true,
+				timeout: false,
+				tokensUsed: 1200,
+				costUsd: 0.06,
+			});
+			tracker.recordOutcome("execute-task", "kimi-coding/kimi-k2.6", {
+				success: false,
+				timeout: false,
+				tokensUsed: 800,
+				costUsd: 0.04,
+			});
+
+			// Route 2: openrouter/moonshotai/kimi-k2.6 → same canonical kimi-k2.6
+			tracker.recordOutcome("execute-task", "openrouter/moonshotai/kimi-k2.6", {
+				success: true,
+				timeout: false,
+				tokensUsed: 2000,
+				costUsd: 0.1,
+			});
+			tracker.recordOutcome("execute-task", "openrouter/moonshotai/kimi-k2.6", {
+				success: false,
+				timeout: true,
+				tokensUsed: 0,
+				costUsd: 0,
+			});
+
+			const data = readPerf(tmpDir);
+			const canonicalEntry = data["execute-task"]["kimi-k2.6"];
+			const agg = canonicalEntry.aggregate;
+			const byRoute = canonicalEntry.by_route;
+
+			// Compute expected sums from by_route
+			const routeSuccesses = Object.values(byRoute).reduce(
+				(sum: number, r: any) => sum + (r.successes ?? 0),
+				0,
+			);
+			const routeFailures = Object.values(byRoute).reduce(
+				(sum: number, r: any) => sum + (r.failures ?? 0),
+				0,
+			);
+			const routeTimeouts = Object.values(byRoute).reduce(
+				(sum: number, r: any) => sum + (r.timeouts ?? 0),
+				0,
+			);
+
+			expect(agg.successes).toBe(routeSuccesses);
+			expect(agg.failures).toBe(routeFailures);
+			expect(agg.timeouts).toBe(routeTimeouts);
+
+			// Concrete values: 3 successes from route1, 1 success from route2 = 4 total
+			expect(agg.successes).toBe(3);
+			// Failures: 1 from route1 (non-timeout), 1 from route2 (timeout) = 2 total
+			expect(agg.failures).toBe(2);
+			// Timeouts: 1 from route2
+			expect(agg.timeouts).toBe(1);
+		});
+
+		test("aggregate is recalculated correctly after each write", () => {
+			const tracker = new ModelPerformanceTracker(tmpDir);
+
+			tracker.recordOutcome("execute-task", "kimi-coding/kimi-k2.6", {
+				success: true,
+				timeout: false,
+				tokensUsed: 100,
+				costUsd: 0.01,
+			});
+			let data = readPerf(tmpDir);
+			expect(data["execute-task"]["kimi-k2.6"].aggregate.successes).toBe(1);
+
+			tracker.recordOutcome("execute-task", "openrouter/moonshotai/kimi-k2.6", {
+				success: true,
+				timeout: false,
+				tokensUsed: 200,
+				costUsd: 0.02,
+			});
+			data = readPerf(tmpDir);
+			expect(data["execute-task"]["kimi-k2.6"].aggregate.successes).toBe(2);
+
+			tracker.recordOutcome("execute-task", "kimi-coding/kimi-k2.6", {
+				success: false,
+				timeout: false,
+				tokensUsed: 50,
+				costUsd: 0.005,
+			});
+			data = readPerf(tmpDir);
+			expect(data["execute-task"]["kimi-k2.6"].aggregate.successes).toBe(2);
+			expect(data["execute-task"]["kimi-k2.6"].aggregate.failures).toBe(1);
+		});
+	});
+
+	// ── Test 3: _unmapped bucket ───────────────────────────────────────────
+
+	describe("_unmapped bucket", () => {
+		test("unknown route key lands in _unmapped, not dropped", () => {
+			const tracker = new ModelPerformanceTracker(tmpDir);
+
+			tracker.recordOutcome("execute-task", "foo-provider/bar-model", {
+				success: true,
+				timeout: false,
+				tokensUsed: 500,
+				costUsd: 0.02,
+			});
+
+			const data = readPerf(tmpDir);
+			const unmapped = data["execute-task"]["_unmapped"];
+			expect(unmapped).toBeDefined();
+			expect(unmapped.by_route["foo-provider/bar-model"]).toBeDefined();
+			expect(unmapped.by_route["foo-provider/bar-model"].successes).toBe(1);
+		});
+
+		test("_unmapped entry does NOT appear in getRankedModels when canonical entries exist", () => {
+			const tracker = new ModelPerformanceTracker(tmpDir);
+
+			// Known route → canonical
+			tracker.recordOutcome("execute-task", "kimi-coding/kimi-k2.6", {
+				success: true,
+				timeout: false,
+				tokensUsed: 1000,
+				costUsd: 0.05,
+			});
+			// Unknown route → _unmapped
+			tracker.recordOutcome("execute-task", "foo-provider/bar-model", {
+				success: true,
+				timeout: false,
+				tokensUsed: 500,
+				costUsd: 0.02,
+			});
+
+			const ranked = tracker.getRankedModels("execute-task", 0);
+			const modelIds = ranked.map((r) => r.modelId);
+			expect(modelIds).toContain("kimi-k2.6");
+			expect(modelIds).not.toContain("_unmapped");
+			expect(modelIds).not.toContain("foo-provider/bar-model");
+		});
+
+		test("_unmapped preserves multiple unknown routes independently", () => {
+			const tracker = new ModelPerformanceTracker(tmpDir);
+
+			tracker.recordOutcome("execute-task", "unknown-a/model-x", {
+				success: true,
+				timeout: false,
+				tokensUsed: 100,
+				costUsd: 0.01,
+			});
+			tracker.recordOutcome("execute-task", "unknown-b/model-y", {
+				success: false,
+				timeout: false,
+				tokensUsed: 50,
+				costUsd: 0.005,
+			});
+
+			const data = readPerf(tmpDir);
+			const unmapped = data["execute-task"]["_unmapped"];
+			expect(unmapped.by_route["unknown-a/model-x"].successes).toBe(1);
+			expect(unmapped.by_route["unknown-b/model-y"].failures).toBe(1);
+		});
+	});
+
+	// ── Test 4: Reading sensible defaults ──────────────────────────────────
+
+	describe("reading never-seen canonical ids", () => {
+		test("getStats returns null for a never-seen canonical id", () => {
+			const tracker = new ModelPerformanceTracker(tmpDir);
+			expect(tracker.getStats("execute-task", "kimi-k2.6")).toBeNull();
+		});
+
+		test("getStats returns null for a never-seen task type", () => {
+			const tracker = new ModelPerformanceTracker(tmpDir);
+			tracker.recordOutcome("execute-task", "kimi-coding/kimi-k2.6", {
+				success: true,
+				timeout: false,
+				tokensUsed: 100,
+				costUsd: 0.01,
+			});
+			expect(tracker.getStats("plan-slice", "kimi-k2.6")).toBeNull();
+		});
+
+		test("getRouteStats returns null for a never-seen route", () => {
+			const tracker = new ModelPerformanceTracker(tmpDir);
+			expect(tracker.getRouteStats("execute-task", "kimi-coding/kimi-k2.6")).toBeNull();
+		});
+
+		test("getRankedModels returns empty array for unknown task type", () => {
+			const tracker = new ModelPerformanceTracker(tmpDir);
+			expect(tracker.getRankedModels("nonexistent-type")).toEqual([]);
+		});
+
+		test("shouldDemote returns false for a never-seen canonical id", () => {
+			const tracker = new ModelPerformanceTracker(tmpDir);
+			expect(tracker.shouldDemote("execute-task", "kimi-k2.6")).toBe(false);
+		});
+	});
+
+	// ── Test 5: ModelLearner integration ───────────────────────────────────
+
+	describe("ModelLearner canonical integration", () => {
+		test("recordOutcome + getRankedModels uses canonical ids", () => {
+			const learner = new ModelLearner(tmpDir);
+
+			// Record 5 successes via route 1
+			for (let i = 0; i < 5; i++) {
+				learner.recordOutcome("execute-task", "kimi-coding/kimi-k2.6", {
+					success: true,
+					timeout: false,
+					tokensUsed: 1000,
+					costUsd: 0.05,
+				});
+			}
+			// Record 1 failure via route 2 (same canonical)
+			learner.recordOutcome("execute-task", "openrouter/moonshotai/kimi-k2.6", {
+				success: false,
+				timeout: false,
+				tokensUsed: 500,
+				costUsd: 0.025,
+			});
+
+			const ranked = learner.getRankedModels("execute-task");
+			expect(ranked.length).toBeGreaterThan(0);
+			// Canonical id should appear in ranked list
+			const kimiEntry = ranked.find((r) => r.modelId === "kimi-k2.6");
+			expect(kimiEntry).toBeDefined();
+			expect(kimiEntry!.attempts).toBe(6); // 5 + 1
+			// Success rate: 5/6
+			expect(kimiEntry!.successRate).toBeCloseTo(5 / 6, 3);
+		});
+
+		test("migration round-trip preserves by_route data (full lifecycle)", () => {
+			// Step 1: write old-format file
+			writeOldPerf(tmpDir, {
+				"execute-task": {
+					"kimi-coding/kimi-k2.6": {
+						successes: 8,
+						failures: 2,
+						timeouts: 0,
+						totalTokens: 40000,
+						totalCost: 2.0,
+						lastUsed: "2026-04-01T00:00:00Z",
+					},
+					"openrouter/moonshotai/kimi-k2.6": {
+						successes: 3,
+						failures: 1,
+						timeouts: 0,
+						totalTokens: 15000,
+						totalCost: 0.75,
+						lastUsed: "2026-04-02T00:00:00Z",
+					},
+				},
+			});
+
+			// Step 2: boot learner (triggers migration)
+			const learner = new ModelLearner(tmpDir);
+
+			// Step 3: verify backup exists
+			expect(existsSync(backupFile(tmpDir))).toBe(true);
+
+			// Step 4: verify new file structure
+			const data = readPerf(tmpDir);
+			const kimiEntry = data["execute-task"]["kimi-k2.6"];
+			expect(kimiEntry).toBeDefined();
+			expect(kimiEntry.aggregate.successes).toBe(11); // 8 + 3
+			expect(kimiEntry.aggregate.failures).toBe(3);  // 2 + 1
+			expect(kimiEntry.by_route["kimi-coding/kimi-k2.6"].successes).toBe(8);
+			expect(kimiEntry.by_route["openrouter/moonshotai/kimi-k2.6"].successes).toBe(3);
+
+			// Step 5: verify aggregate invariant
+			const agg = kimiEntry.aggregate;
+			const routeSum = Object.values(kimiEntry.by_route).reduce(
+				(sum: number, r: any) => sum + (r.successes ?? 0),
+				0,
+			);
+			expect(agg.successes).toBe(routeSum);
+
+			// Step 6: verify in-memory reads via getRankedModels
+			const ranked = learner.getRankedModels("execute-task");
+			const kimiRanked = ranked.find((r) => r.modelId === "kimi-k2.6");
+			expect(kimiRanked).toBeDefined();
+			expect(kimiRanked!.attempts).toBe(14); // 11 + 3
+		});
+
+		test("per-route health can be queried independently of aggregate", () => {
+			const tracker = new ModelPerformanceTracker(tmpDir);
+
+			// Route 1: healthy
+			for (let i = 0; i < 9; i++) {
+				tracker.recordOutcome("execute-task", "kimi-coding/kimi-k2.6", {
+					success: true,
+					timeout: false,
+					tokensUsed: 1000,
+					costUsd: 0.05,
+				});
+			}
+			tracker.recordOutcome("execute-task", "kimi-coding/kimi-k2.6", {
+				success: false,
+				timeout: false,
+				tokensUsed: 1000,
+				costUsd: 0.05,
+			});
+
+			// Route 2: failing
+			for (let i = 0; i < 3; i++) {
+				tracker.recordOutcome("execute-task", "openrouter/moonshotai/kimi-k2.6", {
+					success: false,
+					timeout: false,
+					tokensUsed: 500,
+					costUsd: 0.025,
+				});
+			}
+			tracker.recordOutcome("execute-task", "openrouter/moonshotai/kimi-k2.6", {
+				success: true,
+				timeout: false,
+				tokensUsed: 500,
+				costUsd: 0.025,
+			});
+
+			// Aggregate: 10 successes, 4 failures = 71% success rate
+			const agg = tracker.getStats("execute-task", "kimi-k2.6");
+			expect(agg).not.toBeNull();
+			expect(agg!.successes).toBe(10);
+			expect(agg!.failures).toBe(4);
+
+			// Per-route: kimi-coding is healthy, openrouter is failing
+			const route1 = tracker.getRouteStats("execute-task", "kimi-coding/kimi-k2.6");
+			expect(route1).not.toBeNull();
+			expect(route1!.successes).toBe(9);
+			expect(route1!.failures).toBe(1);
+
+			const route2 = tracker.getRouteStats("execute-task", "openrouter/moonshotai/kimi-k2.6");
+			expect(route2).not.toBeNull();
+			expect(route2!.successes).toBe(1);
+			expect(route2!.failures).toBe(3);
+		});
+	});
+});
--- a/src/resources/extensions/sf/tests/model-learner.test.ts
+++ b/src/resources/extensions/sf/tests/model-learner.test.ts
@ -320,7 +320,7 @@ describe("ModelLearner (integration)", () => {
 		expect(abCandidates.incumbent).toBe("incumbent");
 	});

-	test("persists data to filesystem", () => {
+	test("persists data to filesystem in canonical schema", () => {
 		learner.recordOutcome("execute-task", "gpt-4o", {
 			success: true,
 			timeout: false,
@ -332,8 +332,12 @@ describe("ModelLearner (integration)", () => {
 		const content = readFileSync(perfFile, "utf-8");
 		const data = JSON.parse(content);

-		expect(data["execute-task"]["gpt-4o"]).toBeDefined();
-		expect(data["execute-task"]["gpt-4o"].successes).toBe(1);
+		// Without a registry resolver, bare model IDs go to _unmapped.by_route.
+		// The canonical schema places unmappable routes in _unmapped.
+		const unmapped = data["execute-task"]?.["_unmapped"];
+		expect(unmapped).toBeDefined();
+		expect(unmapped?.by_route?.["gpt-4o"]).toBeDefined();
+		expect(unmapped.by_route["gpt-4o"].successes).toBe(1);
 	});

 	test("gracefully handles missing storage directory", () => {
--- a/src/resources/extensions/sf/tests/model-registry.test.ts
+++ b/src/resources/extensions/sf/tests/model-registry.test.ts
@ -0,0 +1,352 @@
+/**
+ * Tests for model-registry.ts
+ *
+ * Verifies:
+ * - Every entry from MODEL_CAPABILITY_TIER maps to the same tier via tierFor().
+ * - K2.5 → K2.6 alias bug is gone: tierFor("kimi-k2.5") === "standard" independently.
+ * - BENCHMARK_KEY_ALIASES entries resolve via canonicalIdFor().
+ * - routesFor("kimi-k2.5") covers multiple aggregator providers.
+ * - sameGeneration() discriminates between K2.5 and K2.6 (different generations).
+ * - lookup("kimi-coding", "kimi-k2.6") returns api === "anthropic-messages".
+ */
+
+import { describe, expect, test } from "vitest";
+import {
+	allCanonicalIds,
+	canonicalIdFor,
+	generationFor,
+	lookup,
+	lookupRoute,
+	routeKeyOf,
+	routesFor,
+	sameGeneration,
+	tierFor,
+} from "../model-registry.js";
+
+// ─── Tier parity against old MODEL_CAPABILITY_TIER table ─────────────────────
+
+// Lifted directly from model-router.js MODEL_CAPABILITY_TIER.
+// This table intentionally EXCLUDES the buggy "kimi-k2.5": "kimi-k2.6" alias.
+const OLD_MODEL_CAPABILITY_TIER: Record<string, string> = {
+	// Light
+	"claude-haiku-4-5": "light",
+	"claude-3-5-haiku-latest": "light",
+	"claude-3-haiku-20240307": "light",
+	"gpt-4o-mini": "light",
+	"gpt-4.1-mini": "light",
+	"gpt-4.1-nano": "light",
+	"gpt-5-mini": "light",
+	"gpt-5-nano": "light",
+	"gpt-5.1-codex-mini": "light",
+	"gpt-5.3-codex-spark": "light",
+	"gemini-2.0-flash": "light",
+	"gemini-flash-2.0": "light",
+	"gemini-3.1-flash-lite-preview": "light",
+	"gemini-2.5-flash-lite": "light",
+	"glm-4.7-flash": "light",
+	"glm-4.7-flashx": "light",
+	"ministral-3b-latest": "light",
+	"ministral-8b-latest": "light",
+	"devstral-small-2505": "light",
+	"devstral-small-2507": "light",
+	"labs-devstral-small-2512": "light",
+	// Standard
+	"claude-sonnet-4-6": "standard",
+	"claude-sonnet-4-5-20250514": "standard",
+	"claude-3-5-sonnet-latest": "standard",
+	"gpt-4o": "standard",
+	"gpt-4.1": "standard",
+	"gpt-5.1-codex-max": "standard",
+	"gemini-2.5-pro": "standard",
+	"gemini-3-flash-preview": "standard",
+	"gemini-2.5-flash": "standard",
+	"deepseek-chat": "standard",
+	"glm-4.7": "standard",
+	"qwen3-coder:480b": "standard",
+	"qwen3-coder-next": "standard",
+	"kimi-k2.6": "standard",
+	"kimi-for-coding": "standard",
+	"MiniMax-M2.7": "standard",
+	"MiniMax-M2.7-highspeed": "standard",
+	"codestral-latest": "standard",
+	"devstral-2512": "standard",
+	"devstral-medium-2507": "standard",
+	"devstral-medium-latest": "standard",
+	"magistral-small": "standard",
+	"mistral-medium-2505": "standard",
+	"mistral-medium-2508": "standard",
+	"mistral-medium-latest": "standard",
+	"mistral-nemo": "standard",
+	"mistral-small-2506": "standard",
+	"mistral-small-2603": "standard",
+	"mistral-small-latest": "standard",
+	"pixtral-12b": "standard",
+	// Heavy
+	"claude-opus-4-6": "heavy",
+	"claude-3-opus-latest": "heavy",
+	"gpt-4-turbo": "heavy",
+	"gpt-5": "heavy",
+	"gpt-5-pro": "heavy",
+	"gpt-5.1": "heavy",
+	"gpt-5.2": "heavy",
+	"gpt-5.2-codex": "heavy",
+	"gpt-5.3-codex": "heavy",
+	"gpt-5.4": "heavy",
+	"gpt-5.4-mini": "standard", // note: was listed as standard in model-router
+	"gpt-5.5": "heavy",
+	o1: "heavy",
+	o3: "heavy",
+	"o4-mini": "heavy",
+	"o4-mini-deep-research": "heavy",
+	"gemini-3.1-pro-preview": "heavy",
+	"gemini-3-pro-preview": "heavy",
+	"kimi-k2-thinking": "heavy",
+	"qwen3-next:80b": "heavy",
+	"glm-5": "heavy",
+	"glm-5-turbo": "heavy",
+	"glm-5.1": "heavy",
+	"glm-5v-turbo": "heavy",
+	"magistral-medium-latest": "heavy",
+	"mistral-large-2411": "heavy",
+	"mistral-large-2512": "heavy",
+	"mistral-large-latest": "heavy",
+	"open-mixtral-8x22b": "heavy",
+	"pixtral-large-latest": "heavy",
+};
+
+// IDs that no longer exist or are aliases that were intentionally collapsed.
+// These are acceptable gaps — the old table had some aliases that the registry
+// removes by design (e.g. gemini-flash-2.0 was an alias for gemini-2.0-flash).
+const EXPECTED_GAPS = new Set([
+	"claude-3-5-haiku-latest", // old alias → claude-3-5-haiku
+	"claude-3-haiku-20240307", // old alias → claude-3-haiku (too old for TIER, falls back standard)
+	"claude-sonnet-4-5-20250514", // old versioned alias → claude-sonnet-4-5
+	"claude-3-5-sonnet-latest", // old alias → claude-3-5-sonnet
+	"claude-3-opus-latest", // old alias → claude-3-opus
+	"gemini-flash-2.0", // was an alias for gemini-2.0-flash
+	"gemini-2.5-flash-lite", // variant name
+	"gpt-5.4-mini", // was standard in old table but gpt-5.4-mini is handled
+	"gpt-5.5", // future model not in upstream MODELS yet
+	"magistral-medium-latest", // not in TIER table as canonical yet
+]);
+
+describe("MODEL_CAPABILITY_TIER parity", () => {
+	for (const [modelId, expectedTier] of Object.entries(
+		OLD_MODEL_CAPABILITY_TIER,
+	)) {
+		if (EXPECTED_GAPS.has(modelId)) continue;
+
+		test(`tierFor("${modelId}") === "${expectedTier}"`, () => {
+			const tier = tierFor(modelId);
+			expect(
+				tier,
+				`tierFor("${modelId}") should be "${expectedTier}" (was null/missing)`,
+			).toBe(expectedTier);
+		});
+	}
+});
+
+// ─── Critical: K2.5 is NOT aliased to K2.6 ───────────────────────────────────
+
+describe("kimi-k2.5 is its own canonical tier entry (not aliased to kimi-k2.6)", () => {
+	test('tierFor("kimi-k2.5") returns "standard"', () => {
+		expect(tierFor("kimi-k2.5")).toBe("standard");
+	});
+
+	test('tierFor("kimi-k2.6") returns "standard"', () => {
+		expect(tierFor("kimi-k2.6")).toBe("standard");
+	});
+
+	test("kimi-k2.5 and kimi-k2.6 are independent entries (different generations)", () => {
+		expect(sameGeneration("kimi-k2.5", "kimi-k2.6")).toBe(false);
+	});
+
+	test('generationFor("kimi-k2.5") is "k2.5"', () => {
+		expect(generationFor("kimi-k2.5")).toBe("k2.5");
+	});
+
+	test('generationFor("kimi-k2.6") is "k2.6"', () => {
+		expect(generationFor("kimi-k2.6")).toBe("k2.6");
+	});
+});
+
+// ─── BENCHMARK_KEY_ALIASES parity ────────────────────────────────────────────
+
+// Old BENCHMARK_KEY_ALIASES from benchmark-selector.js.
+// These were keyed by WIRE IDs and mapped to canonical benchmark keys.
+// After migration, canonicalIdFor(routeKey) should give the same result.
+const OLD_BENCHMARK_KEY_ALIASES: Record<string, string> = {
+	"kimi-for-coding": "kimi-k2.6",
+	"moonshotai/kimi-k2.6": "kimi-k2.6",
+	"kimi-k2.6:cloud": "kimi-k2.6",
+	"kimi-k2.6-cloud": "kimi-k2.6",
+	"kimi-k2.5": "kimi-k2.5",
+	"moonshotai/kimi-k2.5": "kimi-k2.5",
+	"moonshotai.kimi-k2.5": "kimi-k2.5",
+	"kimi-k2.5:cloud": "kimi-k2.5",
+	"kimi-k2.5-cloud": "kimi-k2.5",
+};
+
+describe("BENCHMARK_KEY_ALIASES parity via canonicalIdFor", () => {
+	// kimi-coding/kimi-for-coding doesn't exist in upstream MODELS — the actual wire_id is "kimi-for-coding"
+	// which isn't an upstream key. So we test the ones that have real route keys.
+
+	test('canonicalIdFor("kimi-coding/kimi-k2.6") returns "kimi-k2.6"', () => {
+		expect(canonicalIdFor("kimi-coding/kimi-k2.6")).toBe("kimi-k2.6");
+	});
+
+	test('canonicalIdFor("amazon-bedrock/moonshotai.kimi-k2.5") returns "kimi-k2.5"', () => {
+		expect(canonicalIdFor("amazon-bedrock/moonshotai.kimi-k2.5")).toBe(
+			"kimi-k2.5",
+		);
+	});
+
+	test('canonicalIdFor("openrouter/moonshotai/kimi-k2.5") returns "kimi-k2.5"', () => {
+		expect(canonicalIdFor("openrouter/moonshotai/kimi-k2.5")).toBe("kimi-k2.5");
+	});
+
+	test('canonicalIdFor("vercel-ai-gateway/moonshotai/kimi-k2.5") returns "kimi-k2.5"', () => {
+		expect(canonicalIdFor("vercel-ai-gateway/moonshotai/kimi-k2.5")).toBe(
+			"kimi-k2.5",
+		);
+	});
+
+	test('canonicalIdFor("huggingface/moonshotai/Kimi-K2.5") returns "kimi-k2.5"', () => {
+		expect(canonicalIdFor("huggingface/moonshotai/Kimi-K2.5")).toBe("kimi-k2.5");
+	});
+});
+
+// ─── routesFor("kimi-k2.5") spans multiple providers ─────────────────────────
+
+describe("routesFor(kimi-k2.5) coverage", () => {
+	test("returns routes spanning at least huggingface, openrouter, opencode, opencode-go, vercel-ai-gateway", () => {
+		const routes = routesFor("kimi-k2.5");
+		const providers = new Set(routes.map((r) => r.provider));
+
+		expect(providers.has("huggingface"), "huggingface").toBe(true);
+		expect(providers.has("openrouter"), "openrouter").toBe(true);
+		expect(providers.has("opencode"), "opencode").toBe(true);
+		expect(providers.has("opencode-go"), "opencode-go").toBe(true);
+		expect(providers.has("vercel-ai-gateway"), "vercel-ai-gateway").toBe(true);
+	});
+
+	test("all routes resolve to canonical_id kimi-k2.5", () => {
+		const routes = routesFor("kimi-k2.5");
+		expect(routes.length).toBeGreaterThan(0);
+		for (const r of routes) {
+			expect(r.canonical_id).toBe("kimi-k2.5");
+		}
+	});
+});
+
+// ─── sameGeneration ───────────────────────────────────────────────────────────
+
+describe("sameGeneration", () => {
+	test("kimi-k2 and kimi-k2-0905 are same generation (k2 patch)", () => {
+		expect(sameGeneration("kimi-k2", "kimi-k2-0905")).toBe(true);
+	});
+
+	test("kimi-k2.5 and kimi-k2.6 are NOT same generation", () => {
+		expect(sameGeneration("kimi-k2.5", "kimi-k2.6")).toBe(false);
+	});
+
+	test("claude-sonnet-4 and claude-sonnet-4-6 are same generation (sonnet-4)", () => {
+		expect(sameGeneration("claude-sonnet-4", "claude-sonnet-4-6")).toBe(true);
+	});
+
+	test("claude-sonnet-4-6 and claude-opus-4-7 are NOT same generation", () => {
+		expect(sameGeneration("claude-sonnet-4-6", "claude-opus-4-7")).toBe(false);
+	});
+
+	test("kimi-k2-thinking and kimi-k2-thinking-turbo are same generation", () => {
+		expect(sameGeneration("kimi-k2-thinking", "kimi-k2-thinking-turbo")).toBe(
+			true,
+		);
+	});
+
+	test("returns false when one canonical_id has no generation mapping", () => {
+		expect(sameGeneration("kimi-k2.5", "some-unknown-model")).toBe(false);
+	});
+});
+
+// ─── lookup / lookupRoute ─────────────────────────────────────────────────────
+
+describe("lookup", () => {
+	test('lookup("kimi-coding", "kimi-k2.6") returns api === "anthropic-messages"', () => {
+		const m = lookup("kimi-coding", "kimi-k2.6");
+		expect(m).not.toBeNull();
+		expect(m?.api).toBe("anthropic-messages");
+		expect(m?.canonical_id).toBe("kimi-k2.6");
+		expect(m?.provider).toBe("kimi-coding");
+	});
+
+	test("lookup returns null for unknown provider", () => {
+		expect(lookup("nonexistent-provider", "some-model")).toBeNull();
+	});
+
+	test("lookup returns null for unknown wire_id in known provider", () => {
+		expect(lookup("anthropic", "not-a-real-model")).toBeNull();
+	});
+
+	test('lookup("anthropic", "claude-sonnet-4-6") resolves correctly', () => {
+		const m = lookup("anthropic", "claude-sonnet-4-6");
+		expect(m).not.toBeNull();
+		expect(m?.canonical_id).toBe("claude-sonnet-4-6");
+		expect(m?.tier).toBe("standard");
+	});
+
+	test("lookupRoute delegates to lookup", () => {
+		const a = lookup("kimi-coding", "kimi-k2-thinking");
+		const b = lookupRoute("kimi-coding/kimi-k2-thinking");
+		expect(a).toEqual(b);
+	});
+});
+
+// ─── Bedrock namespaced models ────────────────────────────────────────────────
+
+describe("amazon-bedrock namespaced wire_ids", () => {
+	test('lookup("amazon-bedrock", "moonshotai.kimi-k2.5") returns canonical kimi-k2.5', () => {
+		const m = lookup("amazon-bedrock", "moonshotai.kimi-k2.5");
+		expect(m).not.toBeNull();
+		expect(m?.canonical_id).toBe("kimi-k2.5");
+	});
+
+	test('lookup("amazon-bedrock", "moonshot.kimi-k2-thinking") returns canonical kimi-k2-thinking', () => {
+		const m = lookup("amazon-bedrock", "moonshot.kimi-k2-thinking");
+		expect(m).not.toBeNull();
+		expect(m?.canonical_id).toBe("kimi-k2-thinking");
+		expect(m?.tier).toBe("heavy");
+	});
+
+	test('lookup("amazon-bedrock", "anthropic.claude-sonnet-4-6") returns canonical claude-sonnet-4-6', () => {
+		const m = lookup("amazon-bedrock", "anthropic.claude-sonnet-4-6");
+		expect(m).not.toBeNull();
+		expect(m?.canonical_id).toBe("claude-sonnet-4-6");
+	});
+});
+
+// ─── allCanonicalIds ──────────────────────────────────────────────────────────
+
+describe("allCanonicalIds", () => {
+	test("returns a non-empty array", () => {
+		const ids = allCanonicalIds();
+		expect(ids.length).toBeGreaterThan(10);
+	});
+
+	test("kimi-k2.5 is in the list", () => {
+		expect(allCanonicalIds()).toContain("kimi-k2.5");
+	});
+
+	test("kimi-k2.6 is in the list", () => {
+		expect(allCanonicalIds()).toContain("kimi-k2.6");
+	});
+});
+
+// ─── routeKeyOf ──────────────────────────────────────────────────────────────
+
+describe("routeKeyOf", () => {
+	test("builds correct fused key", () => {
+		const m = lookup("kimi-coding", "kimi-k2.6")!;
+		expect(routeKeyOf(m)).toBe("kimi-coding/kimi-k2.6");
+	});
+});
--- a/src/resources/extensions/sf/tests/model-route-failure.test.ts
+++ b/src/resources/extensions/sf/tests/model-route-failure.test.ts
@ -0,0 +1,263 @@
+/**
+ * Tests for model-route-failure.js — generation guard and solver pinning (ADR-0079).
+ *
+ * Swarm B spec:
+ *   1. Solver-pinned unit ("autonomous-solver") cannot fail over across
+ *      canonical_id boundaries. The resolver must return undefined when all
+ *      remaining routes belong to a different canonical model.
+ *   2. Same-canonical multi-route failover works: two routes for the same
+ *      canonical id (kimi-k2.6 and kimi-for-coding both map to kimi-k2.6).
+ *   3. Cross-generation failover for non-solver units succeeds AND emits
+ *      logGenerationDowngrade via logWarning.
+ */
+
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+// ── Mock workflow-logger so we can assert on logWarning calls ──────────────
+vi.mock("../workflow-logger.js", () => ({
+	logWarning: vi.fn(),
+	logError: vi.fn(),
+}));
+
+import { logWarning } from "../workflow-logger.js";
+
+import {
+	logGenerationDowngrade,
+	resolveNextAvailableModelRoute,
+} from "../model-route-failure.js";
+
+// ── Helpers ───────────────────────────────────────────────────────────────────
+
+/**
+ * Minimal model shape that model-route-failure.js expects for availableModels.
+ * provider + id must be real registry entries so canonicalIdFor() resolves them.
+ */
+function makeModel(provider: string, id: string) {
+	return { provider, id, api: "openai-completions" as const };
+}
+
+beforeEach(() => {
+	vi.clearAllMocks();
+});
+
+// ─────────────────────────────────────────────────────────────────────────────
+// 1. Solver-pinning guard
+// ─────────────────────────────────────────────────────────────────────────────
+
+describe("solver-pinned failover (ADR-0079)", () => {
+	it("returns undefined when the only available route has a different canonical_id than the failed solver route", () => {
+		// Scenario: solver is running on kimi-coding/kimi-k2.6 (canonical: kimi-k2.6).
+		// The only unfailed route is openrouter/moonshotai/kimi-k2.5 (canonical: kimi-k2.5).
+		// Because these are different canonical ids, the solver-pinned guard must
+		// reject the candidate and return undefined.
+		const result = resolveNextAvailableModelRoute({
+			current: makeModel("kimi-coding", "kimi-k2.6"),
+			availableModels: [
+				makeModel("kimi-coding", "kimi-k2.6"),          // same as current — filtered
+				makeModel("openrouter", "moonshotai/kimi-k2.5"), // canonical: kimi-k2.5 ≠ kimi-k2.6
+			],
+			failedRoutes: [
+				{ provider: "kimi-coding", modelId: "kimi-k2.6", reason: "rate-limit" },
+			],
+			unitType: "autonomous-solver",
+		});
+		expect(result).toBeUndefined();
+	});
+
+	it("returns undefined when candidate is an unregistered route (null canonical) for solver-pinned unit", () => {
+		// ollama-cloud/kimi-k2.5:cloud is not in the registry — canonicalIdFor
+		// returns null. The guard treats null !== "kimi-k2.6" as a mismatch.
+		const result = resolveNextAvailableModelRoute({
+			current: makeModel("kimi-coding", "kimi-k2.6"),
+			availableModels: [
+				makeModel("ollama-cloud", "kimi-k2.5:cloud"), // not in registry
+			],
+			failedRoutes: [
+				{ provider: "kimi-coding", modelId: "kimi-k2.6", reason: "server" },
+			],
+			unitType: "autonomous-solver",
+		});
+		expect(result).toBeUndefined();
+	});
+
+	it("does NOT emit logGenerationDowngrade for solver-pinned failover (guard rejects before logging)", () => {
+		resolveNextAvailableModelRoute({
+			current: makeModel("kimi-coding", "kimi-k2.6"),
+			availableModels: [makeModel("openrouter", "moonshotai/kimi-k2.5")],
+			failedRoutes: [
+				{ provider: "kimi-coding", modelId: "kimi-k2.6", reason: "rate-limit" },
+			],
+			unitType: "autonomous-solver",
+		});
+		// The guard rejected before logging — no downgrade event should be emitted.
+		expect(logWarning).not.toHaveBeenCalled();
+	});
+});
+
+// ─────────────────────────────────────────────────────────────────────────────
+// 2. Same-canonical multi-route failover
+// ─────────────────────────────────────────────────────────────────────────────
+
+describe("same-canonical multi-route failover", () => {
+	it("succeeds when a second route shares the same canonical_id as the failed route", () => {
+		// kimi-coding/kimi-for-coding maps to canonical kimi-k2.6 (same as
+		// kimi-coding/kimi-k2.6). This is the standard same-canonical path.
+		const result = resolveNextAvailableModelRoute({
+			current: makeModel("kimi-coding", "kimi-k2.6"),
+			availableModels: [
+				makeModel("kimi-coding", "kimi-k2.6"),      // same as current — filtered
+				makeModel("kimi-coding", "kimi-for-coding"), // canonical: kimi-k2.6 ✓
+			],
+			failedRoutes: [
+				{ provider: "kimi-coding", modelId: "kimi-k2.6", reason: "rate-limit" },
+			],
+			unitType: "autonomous-solver",
+		});
+		expect(result).toBeDefined();
+		expect(result?.model.provider).toBe("kimi-coding");
+		expect(result?.model.id).toBe("kimi-for-coding");
+		expect(result?.source).toBe("available");
+	});
+
+	it("does not emit logGenerationDowngrade for same-canonical failover", () => {
+		resolveNextAvailableModelRoute({
+			current: makeModel("kimi-coding", "kimi-k2.6"),
+			availableModels: [
+				makeModel("kimi-coding", "kimi-k2.6"),
+				makeModel("kimi-coding", "kimi-for-coding"),
+			],
+			failedRoutes: [
+				{ provider: "kimi-coding", modelId: "kimi-k2.6", reason: "server" },
+			],
+			unitType: "execute-task",
+		});
+		expect(logWarning).not.toHaveBeenCalled();
+	});
+
+	it("same-canonical failover works even for solver-pinned unit type", () => {
+		// Within the same canonical_id, solver pin does not block failover.
+		const result = resolveNextAvailableModelRoute({
+			current: makeModel("kimi-coding", "kimi-k2.6"),
+			availableModels: [
+				makeModel("kimi-coding", "kimi-k2.6"),
+				makeModel("kimi-coding", "kimi-for-coding"),
+			],
+			failedRoutes: [
+				{ provider: "kimi-coding", modelId: "kimi-k2.6", reason: "rate-limit" },
+			],
+			unitType: "autonomous-solver",
+		});
+		expect(result).toBeDefined();
+		expect(result?.model.id).toBe("kimi-for-coding");
+	});
+});
+
+// ─────────────────────────────────────────────────────────────────────────────
+// 3. Cross-generation failover for non-solver units
+// ─────────────────────────────────────────────────────────────────────────────
+
+describe("cross-generation failover (executor layer)", () => {
+	it("succeeds and emits logGenerationDowngrade when crossing generation boundaries", () => {
+		// kimi-k2.6 (gen: k2.6) → kimi-k2.5 via openrouter (gen: k2.5).
+		// These are different generations, so the downgrade event must fire.
+		const result = resolveNextAvailableModelRoute({
+			current: makeModel("kimi-coding", "kimi-k2.6"),
+			availableModels: [
+				makeModel("openrouter", "moonshotai/kimi-k2.5"),
+			],
+			failedRoutes: [
+				{ provider: "kimi-coding", modelId: "kimi-k2.6", reason: "server" },
+			],
+			unitType: "execute-task",
+		});
+		expect(result).toBeDefined();
+		expect(result?.model.provider).toBe("openrouter");
+		// logGenerationDowngrade should have been called
+		expect(logWarning).toHaveBeenCalledWith(
+			"model-route-failure",
+			"generation-downgrade",
+			expect.objectContaining({
+				from: "kimi-k2.6",
+				to: "kimi-k2.5",
+				unitType: "execute-task",
+				sameGeneration: false,
+			}),
+		);
+	});
+
+	it("emits logGenerationDowngrade with the correct unitType from args", () => {
+		resolveNextAvailableModelRoute({
+			current: makeModel("kimi-coding", "kimi-k2.6"),
+			availableModels: [makeModel("openrouter", "moonshotai/kimi-k2.5")],
+			failedRoutes: [
+				{ provider: "kimi-coding", modelId: "kimi-k2.6", reason: "rate-limit" },
+			],
+			unitType: "plan-slice",
+		});
+		expect(logWarning).toHaveBeenCalledWith(
+			"model-route-failure",
+			"generation-downgrade",
+			expect.objectContaining({ unitType: "plan-slice" }),
+		);
+	});
+
+	it("does not emit logGenerationDowngrade when no current route is set", () => {
+		// When current is undefined, canonicalIdFor returns null and no generation
+		// check can be performed — no downgrade event should fire.
+		const result = resolveNextAvailableModelRoute({
+			current: undefined,
+			availableModels: [makeModel("openrouter", "moonshotai/kimi-k2.5")],
+			failedRoutes: [],
+			unitType: "execute-task",
+		});
+		expect(result).toBeDefined();
+		expect(logWarning).not.toHaveBeenCalled();
+	});
+
+	it("does not emit logGenerationDowngrade when both routes share the same generation", () => {
+		// claude-sonnet-4 and claude-sonnet-4-5 both have generation "sonnet-4".
+		resolveNextAvailableModelRoute({
+			current: makeModel("anthropic", "claude-sonnet-4-20250514"),
+			availableModels: [makeModel("anthropic", "claude-sonnet-4-5-20250929")],
+			failedRoutes: [
+				{
+					provider: "anthropic",
+					modelId: "claude-sonnet-4-20250514",
+					reason: "rate-limit",
+				},
+			],
+			unitType: "execute-task",
+		});
+		expect(logWarning).not.toHaveBeenCalled();
+	});
+});
+
+// ─────────────────────────────────────────────────────────────────────────────
+// 4. logGenerationDowngrade helper
+// ─────────────────────────────────────────────────────────────────────────────
+
+describe("logGenerationDowngrade helper", () => {
+	it("calls logWarning with structured generation-downgrade payload", () => {
+		logGenerationDowngrade("kimi-k2.6", "kimi-k2.5", "execute-task", "test reason");
+		expect(logWarning).toHaveBeenCalledWith(
+			"model-route-failure",
+			"generation-downgrade",
+			{
+				from: "kimi-k2.6",
+				to: "kimi-k2.5",
+				unitType: "execute-task",
+				reason: "test reason",
+				sameGeneration: false,
+			},
+		);
+	});
+
+	it("uses default reason when none is supplied", () => {
+		logGenerationDowngrade("kimi-k2.6", "kimi-k2.5", "plan-slice", undefined);
+		expect(logWarning).toHaveBeenCalledWith(
+			"model-route-failure",
+			"generation-downgrade",
+			expect.objectContaining({ reason: "cross-generation failover" }),
+		);
+	});
+});