chore: auto-commit after challenge

SF-Unit: M048/S04/challenge
This commit is contained in:
Mikael Hugo 2026-05-17 20:33:12 +02:00
parent d8fd70e57f
commit dd03d17089
4 changed files with 63 additions and 19 deletions

View file

@ -105,6 +105,19 @@ export function allowsFreeTierAutoRoute(unitType) {
);
}
/**
* Return true when autonomous dispatch must pick a scored, reliable route.
*
* Purpose: prevent main repo-changing or verification units from silently
* inheriting the session default when benchmark/model routing did not produce a
* candidate.
*
* Consumer: selectAndApplyModel final fallback handling.
*/
function requiresScoredAutonomousRoute(unitType) {
return !allowsFreeTierAutoRoute(unitType);
}
/**
* Remove free-tier routes from autonomous auto-selection unless the operator
* explicitly opts in.
@ -1006,18 +1019,28 @@ export async function selectAndApplyModel(
`or unset enabledModels. Set SF_BYPASS_ENABLED_MODELS=1 to disable the check.`,
);
}
if (
uokFlags.modelPolicy &&
policyAllowedModelKeys &&
!attemptedPolicyEligible
if (
uokFlags.modelPolicy &&
policyAllowedModelKeys &&
!attemptedPolicyEligible
) {
throw new ModelPolicyDispatchBlockedError(
unitType,
unitId,
policyDenyReasons,
);
}
// ── Advisor-check fallback to session model ─────────────────────────────────
policyDenyReasons,
);
}
if (
appliedModel === null &&
isAutoMode &&
requiresScoredAutonomousRoute(unitType)
) {
throw new Error(
`No scored autonomous model route was available for ${unitType}/${unitId}. ` +
`Refusing to fall back to the session/default model for a main autonomous unit.`,
);
}
// ── Advisor-check fallback to session model ─────────────────────────────────
// When all configured models were filtered by the advisor check and no
// autoModeStartModel was provided, fall back to ctx.model (the active session
// model) so the subagent can still run on an allowed provider.
@ -1039,7 +1062,10 @@ export async function selectAndApplyModel(
reapplyThinkingLevel(pi, autoModeStartThinkingLevel);
}
}
} else if (autoModeStartModel) {
} else if (
autoModeStartModel &&
(!isAutoMode || !requiresScoredAutonomousRoute(unitType))
) {
// No model preference for this unit type — re-apply the model captured
// at autonomous mode start to prevent bleed from shared global settings.json (#650).
const startEnabledModels = readEnabledModels();

View file

@ -269,6 +269,16 @@ const PROFILES = {
weights: { hle: 0.3, gpqa: 0.25, mmlu_pro: 0.25, swe_bench: 0.2 },
label: "validation",
},
challenge: {
weights: {
hle: 0.25,
gpqa: 0.2,
swe_bench: 0.25,
instruction_following: 0.15,
live_code_bench: 0.15,
},
label: "adversarial-review",
},
subagent: {
weights: {
swe_bench: 0.3,

View file

@ -1092,6 +1092,12 @@ export const BASE_REQUIREMENTS = {
reasoning: 0.8,
agentic: 0.9,
},
challenge: {
debugging: 0.7,
reasoning: 0.85,
instruction: 0.65,
agentic: 0.9,
},
};
// ─── Public API ──────────────────────────────────────────────────────────────
/**
@ -1374,11 +1380,12 @@ export function resolveModelForComplexity(
// the winner, prefer it. Stops within-slice routing thrash where
// T01 → gemini-flash and T02 → codestral on the same slice.
const STICKY_WINDOW_POINTS = 8;
const stickyId = (() => {
if (!stickyHint?.id) return null;
const stickyKey = stickyHint.provider
? `${stickyHint.provider}/${stickyHint.id}`
: stickyHint.id;
const stickyId = (() => {
if (routingConfig.sticky_routing !== true) return null;
if (!stickyHint?.id) return null;
const stickyKey = stickyHint.provider
? `${stickyHint.provider}/${stickyHint.id}`
: stickyHint.id;
// Match either "provider/model" or bare model id in the eligible list.
const found = scored.find(
(s) =>

View file

@ -403,11 +403,12 @@ export function resolveModelWithFallbacksForUnit(unitType, options = {}) {
case "run-uat":
phaseConfig = m.completion;
break;
case "reassess-roadmap":
case "gate-evaluate":
case "validate-milestone":
phaseConfig = m.validation ?? m.planning;
break;
case "reassess-roadmap":
case "gate-evaluate":
case "validate-milestone":
case "challenge":
phaseConfig = m.validation ?? m.planning;
break;
case "rewrite-docs":
phaseConfig = m.validation ?? m.execution ?? m.planning;
break;