From dd03d170897b2950389c876b83e5fd3780a0f58c Mon Sep 17 00:00:00 2001
From: Mikael Hugo <mikkihugo@users.noreply.github.com>
Date: Sun, 17 May 2026 20:33:12 +0200
Subject: [PATCH] chore: auto-commit after challenge

SF-Unit: M048/S04/challenge
---
 .../extensions/sf/auto-model-selection.js     | 44 +++++++++++++++----
 .../extensions/sf/benchmark-selector.js       | 10 +++++
 src/resources/extensions/sf/model-router.js   | 17 ++++---
 .../extensions/sf/preferences-models.js       | 11 ++---
 4 files changed, 63 insertions(+), 19 deletions(-)

diff --git a/src/resources/extensions/sf/auto-model-selection.js b/src/resources/extensions/sf/auto-model-selection.js
index c4aa7e9a9..c5f60848d 100644
--- a/src/resources/extensions/sf/auto-model-selection.js
+++ b/src/resources/extensions/sf/auto-model-selection.js
@@ -105,6 +105,19 @@ export function allowsFreeTierAutoRoute(unitType) {
 	);
 }
 
+/**
+ * Return true when autonomous dispatch must pick a scored, reliable route.
+ *
+ * Purpose: prevent main repo-changing or verification units from silently
+ * inheriting the session default when benchmark/model routing did not produce a
+ * candidate.
+ *
+ * Consumer: selectAndApplyModel final fallback handling.
+ */
+function requiresScoredAutonomousRoute(unitType) {
+	return !allowsFreeTierAutoRoute(unitType);
+}
+
 /**
  * Remove free-tier routes from autonomous auto-selection unless the operator
  * explicitly opts in.
@@ -1006,18 +1019,28 @@ export async function selectAndApplyModel(
 					`or unset enabledModels. Set SF_BYPASS_ENABLED_MODELS=1 to disable the check.`,
 			);
 		}
-		if (
-			uokFlags.modelPolicy &&
-			policyAllowedModelKeys &&
-			!attemptedPolicyEligible
+			if (
+				uokFlags.modelPolicy &&
+				policyAllowedModelKeys &&
+				!attemptedPolicyEligible
 		) {
 			throw new ModelPolicyDispatchBlockedError(
 				unitType,
 				unitId,
-				policyDenyReasons,
-			);
-		}
-		// ── Advisor-check fallback to session model ─────────────────────────────────
+					policyDenyReasons,
+				);
+			}
+			if (
+				appliedModel === null &&
+				isAutoMode &&
+				requiresScoredAutonomousRoute(unitType)
+			) {
+				throw new Error(
+					`No scored autonomous model route was available for ${unitType}/${unitId}. ` +
+						`Refusing to fall back to the session/default model for a main autonomous unit.`,
+				);
+			}
+			// ── Advisor-check fallback to session model ─────────────────────────────────
 		// When all configured models were filtered by the advisor check and no
 		// autoModeStartModel was provided, fall back to ctx.model (the active session
 		// model) so the subagent can still run on an allowed provider.
@@ -1039,7 +1062,10 @@ export async function selectAndApplyModel(
 				reapplyThinkingLevel(pi, autoModeStartThinkingLevel);
 			}
 		}
-	} else if (autoModeStartModel) {
+		} else if (
+			autoModeStartModel &&
+			(!isAutoMode || !requiresScoredAutonomousRoute(unitType))
+		) {
 		// No model preference for this unit type — re-apply the model captured
 		// at autonomous mode start to prevent bleed from shared global settings.json (#650).
 		const startEnabledModels = readEnabledModels();
diff --git a/src/resources/extensions/sf/benchmark-selector.js b/src/resources/extensions/sf/benchmark-selector.js
index 655e3bffa..f438b69ef 100644
--- a/src/resources/extensions/sf/benchmark-selector.js
+++ b/src/resources/extensions/sf/benchmark-selector.js
@@ -269,6 +269,16 @@ const PROFILES = {
 		weights: { hle: 0.3, gpqa: 0.25, mmlu_pro: 0.25, swe_bench: 0.2 },
 		label: "validation",
 	},
+	challenge: {
+		weights: {
+			hle: 0.25,
+			gpqa: 0.2,
+			swe_bench: 0.25,
+			instruction_following: 0.15,
+			live_code_bench: 0.15,
+		},
+		label: "adversarial-review",
+	},
 	subagent: {
 		weights: {
 			swe_bench: 0.3,
diff --git a/src/resources/extensions/sf/model-router.js b/src/resources/extensions/sf/model-router.js
index ca61e7b8b..65c89e414 100644
--- a/src/resources/extensions/sf/model-router.js
+++ b/src/resources/extensions/sf/model-router.js
@@ -1092,6 +1092,12 @@ export const BASE_REQUIREMENTS = {
 		reasoning: 0.8,
 		agentic: 0.9,
 	},
+	challenge: {
+		debugging: 0.7,
+		reasoning: 0.85,
+		instruction: 0.65,
+		agentic: 0.9,
+	},
 };
 // ─── Public API ──────────────────────────────────────────────────────────────
 /**
@@ -1374,11 +1380,12 @@ export function resolveModelForComplexity(
 			// the winner, prefer it. Stops within-slice routing thrash where
 			// T01 → gemini-flash and T02 → codestral on the same slice.
 			const STICKY_WINDOW_POINTS = 8;
-			const stickyId = (() => {
-				if (!stickyHint?.id) return null;
-				const stickyKey = stickyHint.provider
-					? `${stickyHint.provider}/${stickyHint.id}`
-					: stickyHint.id;
+				const stickyId = (() => {
+					if (routingConfig.sticky_routing !== true) return null;
+					if (!stickyHint?.id) return null;
+					const stickyKey = stickyHint.provider
+						? `${stickyHint.provider}/${stickyHint.id}`
+						: stickyHint.id;
 				// Match either "provider/model" or bare model id in the eligible list.
 				const found = scored.find(
 					(s) =>
diff --git a/src/resources/extensions/sf/preferences-models.js b/src/resources/extensions/sf/preferences-models.js
index 20efd6b54..6f374f644 100644
--- a/src/resources/extensions/sf/preferences-models.js
+++ b/src/resources/extensions/sf/preferences-models.js
@@ -403,11 +403,12 @@ export function resolveModelWithFallbacksForUnit(unitType, options = {}) {
 		case "run-uat":
 			phaseConfig = m.completion;
 			break;
-		case "reassess-roadmap":
-		case "gate-evaluate":
-		case "validate-milestone":
-			phaseConfig = m.validation ?? m.planning;
-			break;
+			case "reassess-roadmap":
+			case "gate-evaluate":
+			case "validate-milestone":
+			case "challenge":
+				phaseConfig = m.validation ?? m.planning;
+				break;
 		case "rewrite-docs":
 			phaseConfig = m.validation ?? m.execution ?? m.planning;
 			break;