From dd03d170897b2950389c876b83e5fd3780a0f58c Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sun, 17 May 2026 20:33:12 +0200 Subject: [PATCH] chore: auto-commit after challenge SF-Unit: M048/S04/challenge --- .../extensions/sf/auto-model-selection.js | 44 +++++++++++++++---- .../extensions/sf/benchmark-selector.js | 10 +++++ src/resources/extensions/sf/model-router.js | 17 ++++--- .../extensions/sf/preferences-models.js | 11 ++--- 4 files changed, 63 insertions(+), 19 deletions(-) diff --git a/src/resources/extensions/sf/auto-model-selection.js b/src/resources/extensions/sf/auto-model-selection.js index c4aa7e9a9..c5f60848d 100644 --- a/src/resources/extensions/sf/auto-model-selection.js +++ b/src/resources/extensions/sf/auto-model-selection.js @@ -105,6 +105,19 @@ export function allowsFreeTierAutoRoute(unitType) { ); } +/** + * Return true when autonomous dispatch must pick a scored, reliable route. + * + * Purpose: prevent main repo-changing or verification units from silently + * inheriting the session default when benchmark/model routing did not produce a + * candidate. + * + * Consumer: selectAndApplyModel final fallback handling. + */ +function requiresScoredAutonomousRoute(unitType) { + return !allowsFreeTierAutoRoute(unitType); +} + /** * Remove free-tier routes from autonomous auto-selection unless the operator * explicitly opts in. @@ -1006,18 +1019,28 @@ export async function selectAndApplyModel( `or unset enabledModels. Set SF_BYPASS_ENABLED_MODELS=1 to disable the check.`, ); } - if ( - uokFlags.modelPolicy && - policyAllowedModelKeys && - !attemptedPolicyEligible + if ( + uokFlags.modelPolicy && + policyAllowedModelKeys && + !attemptedPolicyEligible ) { throw new ModelPolicyDispatchBlockedError( unitType, unitId, - policyDenyReasons, - ); - } - // ── Advisor-check fallback to session model ───────────────────────────────── + policyDenyReasons, + ); + } + if ( + appliedModel === null && + isAutoMode && + requiresScoredAutonomousRoute(unitType) + ) { + throw new Error( + `No scored autonomous model route was available for ${unitType}/${unitId}. ` + + `Refusing to fall back to the session/default model for a main autonomous unit.`, + ); + } + // ── Advisor-check fallback to session model ───────────────────────────────── // When all configured models were filtered by the advisor check and no // autoModeStartModel was provided, fall back to ctx.model (the active session // model) so the subagent can still run on an allowed provider. @@ -1039,7 +1062,10 @@ export async function selectAndApplyModel( reapplyThinkingLevel(pi, autoModeStartThinkingLevel); } } - } else if (autoModeStartModel) { + } else if ( + autoModeStartModel && + (!isAutoMode || !requiresScoredAutonomousRoute(unitType)) + ) { // No model preference for this unit type — re-apply the model captured // at autonomous mode start to prevent bleed from shared global settings.json (#650). const startEnabledModels = readEnabledModels(); diff --git a/src/resources/extensions/sf/benchmark-selector.js b/src/resources/extensions/sf/benchmark-selector.js index 655e3bffa..f438b69ef 100644 --- a/src/resources/extensions/sf/benchmark-selector.js +++ b/src/resources/extensions/sf/benchmark-selector.js @@ -269,6 +269,16 @@ const PROFILES = { weights: { hle: 0.3, gpqa: 0.25, mmlu_pro: 0.25, swe_bench: 0.2 }, label: "validation", }, + challenge: { + weights: { + hle: 0.25, + gpqa: 0.2, + swe_bench: 0.25, + instruction_following: 0.15, + live_code_bench: 0.15, + }, + label: "adversarial-review", + }, subagent: { weights: { swe_bench: 0.3, diff --git a/src/resources/extensions/sf/model-router.js b/src/resources/extensions/sf/model-router.js index ca61e7b8b..65c89e414 100644 --- a/src/resources/extensions/sf/model-router.js +++ b/src/resources/extensions/sf/model-router.js @@ -1092,6 +1092,12 @@ export const BASE_REQUIREMENTS = { reasoning: 0.8, agentic: 0.9, }, + challenge: { + debugging: 0.7, + reasoning: 0.85, + instruction: 0.65, + agentic: 0.9, + }, }; // ─── Public API ────────────────────────────────────────────────────────────── /** @@ -1374,11 +1380,12 @@ export function resolveModelForComplexity( // the winner, prefer it. Stops within-slice routing thrash where // T01 → gemini-flash and T02 → codestral on the same slice. const STICKY_WINDOW_POINTS = 8; - const stickyId = (() => { - if (!stickyHint?.id) return null; - const stickyKey = stickyHint.provider - ? `${stickyHint.provider}/${stickyHint.id}` - : stickyHint.id; + const stickyId = (() => { + if (routingConfig.sticky_routing !== true) return null; + if (!stickyHint?.id) return null; + const stickyKey = stickyHint.provider + ? `${stickyHint.provider}/${stickyHint.id}` + : stickyHint.id; // Match either "provider/model" or bare model id in the eligible list. const found = scored.find( (s) => diff --git a/src/resources/extensions/sf/preferences-models.js b/src/resources/extensions/sf/preferences-models.js index 20efd6b54..6f374f644 100644 --- a/src/resources/extensions/sf/preferences-models.js +++ b/src/resources/extensions/sf/preferences-models.js @@ -403,11 +403,12 @@ export function resolveModelWithFallbacksForUnit(unitType, options = {}) { case "run-uat": phaseConfig = m.completion; break; - case "reassess-roadmap": - case "gate-evaluate": - case "validate-milestone": - phaseConfig = m.validation ?? m.planning; - break; + case "reassess-roadmap": + case "gate-evaluate": + case "validate-milestone": + case "challenge": + phaseConfig = m.validation ?? m.planning; + break; case "rewrite-docs": phaseConfig = m.validation ?? m.execution ?? m.planning; break;