From 0694803df38d2bb681184234cf6ba9deec2af206 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Thu, 14 May 2026 06:28:06 +0200 Subject: [PATCH] feat(model-router): explicit agentic score for every capability profile Sweep MODEL_CAPABILITY_PROFILES so all 82 entries declare an explicit agentic score; the agentic=50 fallback in scoreModel was silently giving untouched profiles a generous default and letting weak agentic models slip through execute-task routing. Anchors per the entry's suggestedFix: coding-only ~25-40, very small/older ~30-40, older generations ~55-70, frontier agentic ~85-95. Adds an invariant test that asserts no profile relies on the default. Closes sf-mp37p9u2-80f2gz. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/resources/extensions/sf/model-router.js | 45 +++++++++++++++++++ .../sf/tests/model-router-agentic.test.mjs | 15 +++++++ 2 files changed, 60 insertions(+) diff --git a/src/resources/extensions/sf/model-router.js b/src/resources/extensions/sf/model-router.js index 980a18d03..6ed7a720c 100644 --- a/src/resources/extensions/sf/model-router.js +++ b/src/resources/extensions/sf/model-router.js @@ -174,6 +174,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 95, longContext: 40, instruction: 65, + agentic: 35, }, "claude-3-opus-latest": { agentic: 88, @@ -194,6 +195,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 65, longContext: 70, instruction: 80, + agentic: 65, }, "gpt-4o-mini": { coding: 55, @@ -203,6 +205,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 90, longContext: 45, instruction: 70, + agentic: 50, }, "gpt-4-turbo": { coding: 78, @@ -212,6 +215,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 50, longContext: 65, instruction: 78, + agentic: 60, }, "gpt-4.1": { coding: 82, @@ -221,6 +225,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 62, longContext: 72, instruction: 82, + agentic: 70, }, "gpt-4.1-mini": { coding: 58, @@ -230,6 +235,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 88, longContext: 48, instruction: 72, + agentic: 55, }, "gpt-4.1-nano": { coding: 40, @@ -239,6 +245,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 95, longContext: 30, instruction: 60, + agentic: 35, }, "gpt-5": { coding: 92, @@ -259,6 +266,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 88, longContext: 52, instruction: 74, + agentic: 75, }, "gpt-5-nano": { coding: 42, @@ -268,6 +276,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 95, longContext: 32, instruction: 62, + agentic: 60, }, "gpt-5-pro": { coding: 94, @@ -393,6 +402,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 20, longContext: 65, instruction: 82, + agentic: 65, }, o3: { coding: 80, @@ -402,6 +412,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 25, longContext: 70, instruction: 85, + agentic: 72, }, "o4-mini": { coding: 75, @@ -411,6 +422,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 60, longContext: 65, instruction: 80, + agentic: 70, }, "o4-mini-deep-research": { coding: 75, @@ -420,6 +432,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 30, longContext: 80, instruction: 80, + agentic: 65, }, // ── Google ───────────────────────────────────────────────────────────────── "gemini-2.5-pro": { @@ -430,6 +443,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 55, longContext: 90, instruction: 75, + agentic: 70, }, "gemini-3.1-pro-preview": { coding: 82, @@ -472,6 +486,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 96, longContext: 85, instruction: 68, + agentic: 60, }, "gemini-2.5-flash": { coding: 60, @@ -481,6 +496,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 92, longContext: 85, instruction: 70, + agentic: 60, }, "gemini-2.5-flash-lite": { coding: 52, @@ -490,6 +506,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 97, longContext: 78, instruction: 65, + agentic: 50, }, "gemini-2.0-flash": { coding: 50, @@ -499,6 +516,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 95, longContext: 60, instruction: 65, + agentic: 55, }, "gemini-flash-2.0": { coding: 50, @@ -508,6 +526,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 95, longContext: 60, instruction: 65, + agentic: 55, }, // ── DeepSeek ─────────────────────────────────────────────────────────────── "deepseek-chat": { @@ -518,6 +537,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 70, longContext: 55, instruction: 65, + agentic: 55, }, // ── Mistral AI ───────────────────────────────────────────────────────────── "mistral-large-latest": { @@ -528,6 +548,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 50, longContext: 75, instruction: 85, + agentic: 60, }, "mistral-large-2411": { coding: 85, @@ -537,6 +558,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 50, longContext: 75, instruction: 85, + agentic: 55, }, "mistral-large-2512": { coding: 88, @@ -546,6 +568,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 52, longContext: 78, instruction: 88, + agentic: 65, }, "pixtral-large-latest": { coding: 85, @@ -555,6 +578,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 45, longContext: 80, instruction: 85, + agentic: 50, }, "mistral-medium-latest": { coding: 75, @@ -564,6 +588,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 60, longContext: 65, instruction: 75, + agentic: 55, }, "mistral-medium-2505": { coding: 75, @@ -573,6 +598,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 60, longContext: 65, instruction: 75, + agentic: 50, }, "mistral-medium-2508": { coding: 78, @@ -582,6 +608,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 62, longContext: 68, instruction: 78, + agentic: 55, }, "mistral-small-latest": { coding: 65, @@ -591,6 +618,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 80, longContext: 55, instruction: 70, + agentic: 40, }, "mistral-small-2506": { coding: 65, @@ -600,6 +628,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 80, longContext: 55, instruction: 70, + agentic: 40, }, "mistral-small-2603": { coding: 68, @@ -609,6 +638,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 82, longContext: 58, instruction: 72, + agentic: 40, }, "codestral-latest": { coding: 85, @@ -631,6 +661,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 90, longContext: 45, instruction: 70, + agentic: 30, }, "ministral-3b-latest": { coding: 45, @@ -640,6 +671,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 95, longContext: 35, instruction: 60, + agentic: 25, }, "open-mixtral-8x22b": { coding: 75, @@ -649,6 +681,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 40, longContext: 70, instruction: 75, + agentic: 40, }, "pixtral-12b": { coding: 60, @@ -658,6 +691,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 75, longContext: 60, instruction: 65, + agentic: 35, }, "mistral-nemo": { coding: 60, @@ -667,6 +701,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 85, longContext: 60, instruction: 65, + agentic: 35, }, "magistral-medium-latest": { coding: 80, @@ -676,6 +711,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 55, longContext: 75, instruction: 80, + agentic: 65, }, "magistral-small": { coding: 70, @@ -685,6 +721,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 75, longContext: 65, instruction: 70, + agentic: 50, }, "devstral-2512": { coding: 82, @@ -757,6 +794,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 35, longContext: 80, instruction: 88, + agentic: 75, }, "glm-5-turbo": { coding: 85, @@ -766,6 +804,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 65, longContext: 75, instruction: 85, + agentic: 70, }, "glm-5.1": { coding: 92, @@ -775,6 +814,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 38, longContext: 82, instruction: 89, + agentic: 75, }, "glm-5v-turbo": { coding: 82, @@ -784,6 +824,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 60, longContext: 75, instruction: 82, + agentic: 65, }, "glm-4.7": { coding: 80, @@ -793,6 +834,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 60, longContext: 70, instruction: 80, + agentic: 55, }, "glm-4.7-flash": { coding: 50, @@ -802,6 +844,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 95, longContext: 50, instruction: 65, + agentic: 50, }, "glm-4.7-flashx": { coding: 45, @@ -811,6 +854,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 98, longContext: 45, instruction: 60, + agentic: 50, }, // ── Qwen / Ollama Cloud compatible tags ────────────────────────────────── "qwen3-coder:480b": { @@ -842,6 +886,7 @@ export const MODEL_CAPABILITY_PROFILES = { speed: 62, longContext: 86, instruction: 74, + agentic: 55, }, // ── Moonshot / Kimi ─────────────────────────────────────────────────────── "kimi-k2.6": { diff --git a/src/resources/extensions/sf/tests/model-router-agentic.test.mjs b/src/resources/extensions/sf/tests/model-router-agentic.test.mjs index e3e0d98c7..77a87420f 100644 --- a/src/resources/extensions/sf/tests/model-router-agentic.test.mjs +++ b/src/resources/extensions/sf/tests/model-router-agentic.test.mjs @@ -115,6 +115,21 @@ describe("agentic capability axis (ADR-0079)", () => { expect(newScore).toBeGreaterThan(oldScore); }); + test("every profile has an explicit agentic score (no defaulting to 50)", () => { + // sf-mp37p9u2-80f2gz: the agentic=50 fallback in scoreModel was + // silently letting untouched profiles escape penalization for poor + // tool-use reliability. Every profile must declare a deliberate + // score so the router can differentiate the full table. + const offenders = []; + for (const [id, profile] of Object.entries(MODEL_CAPABILITY_PROFILES)) { + if (typeof profile.agentic !== "number") offenders.push(id); + } + expect( + offenders, + `profiles missing explicit agentic: ${offenders.join(", ")}`, + ).toEqual([]); + }); + test("known agentic-frontier models all have agentic >= 85", () => { const agenticFrontier = [ "claude-opus-4-6",