feat(model-router): explicit agentic score for every capability profile
Sweep MODEL_CAPABILITY_PROFILES so all 82 entries declare an explicit agentic score; the agentic=50 fallback in scoreModel was silently giving untouched profiles a generous default and letting weak agentic models slip through execute-task routing. Anchors per the entry's suggestedFix: coding-only ~25-40, very small/older ~30-40, older generations ~55-70, frontier agentic ~85-95. Adds an invariant test that asserts no profile relies on the default. Closes sf-mp37p9u2-80f2gz. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
48e793c003
commit
0694803df3
2 changed files with 60 additions and 0 deletions
|
|
@ -174,6 +174,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 95,
|
||||
longContext: 40,
|
||||
instruction: 65,
|
||||
agentic: 35,
|
||||
},
|
||||
"claude-3-opus-latest": {
|
||||
agentic: 88,
|
||||
|
|
@ -194,6 +195,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 65,
|
||||
longContext: 70,
|
||||
instruction: 80,
|
||||
agentic: 65,
|
||||
},
|
||||
"gpt-4o-mini": {
|
||||
coding: 55,
|
||||
|
|
@ -203,6 +205,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 90,
|
||||
longContext: 45,
|
||||
instruction: 70,
|
||||
agentic: 50,
|
||||
},
|
||||
"gpt-4-turbo": {
|
||||
coding: 78,
|
||||
|
|
@ -212,6 +215,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 50,
|
||||
longContext: 65,
|
||||
instruction: 78,
|
||||
agentic: 60,
|
||||
},
|
||||
"gpt-4.1": {
|
||||
coding: 82,
|
||||
|
|
@ -221,6 +225,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 62,
|
||||
longContext: 72,
|
||||
instruction: 82,
|
||||
agentic: 70,
|
||||
},
|
||||
"gpt-4.1-mini": {
|
||||
coding: 58,
|
||||
|
|
@ -230,6 +235,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 88,
|
||||
longContext: 48,
|
||||
instruction: 72,
|
||||
agentic: 55,
|
||||
},
|
||||
"gpt-4.1-nano": {
|
||||
coding: 40,
|
||||
|
|
@ -239,6 +245,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 95,
|
||||
longContext: 30,
|
||||
instruction: 60,
|
||||
agentic: 35,
|
||||
},
|
||||
"gpt-5": {
|
||||
coding: 92,
|
||||
|
|
@ -259,6 +266,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 88,
|
||||
longContext: 52,
|
||||
instruction: 74,
|
||||
agentic: 75,
|
||||
},
|
||||
"gpt-5-nano": {
|
||||
coding: 42,
|
||||
|
|
@ -268,6 +276,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 95,
|
||||
longContext: 32,
|
||||
instruction: 62,
|
||||
agentic: 60,
|
||||
},
|
||||
"gpt-5-pro": {
|
||||
coding: 94,
|
||||
|
|
@ -393,6 +402,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 20,
|
||||
longContext: 65,
|
||||
instruction: 82,
|
||||
agentic: 65,
|
||||
},
|
||||
o3: {
|
||||
coding: 80,
|
||||
|
|
@ -402,6 +412,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 25,
|
||||
longContext: 70,
|
||||
instruction: 85,
|
||||
agentic: 72,
|
||||
},
|
||||
"o4-mini": {
|
||||
coding: 75,
|
||||
|
|
@ -411,6 +422,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 60,
|
||||
longContext: 65,
|
||||
instruction: 80,
|
||||
agentic: 70,
|
||||
},
|
||||
"o4-mini-deep-research": {
|
||||
coding: 75,
|
||||
|
|
@ -420,6 +432,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 30,
|
||||
longContext: 80,
|
||||
instruction: 80,
|
||||
agentic: 65,
|
||||
},
|
||||
// ── Google ─────────────────────────────────────────────────────────────────
|
||||
"gemini-2.5-pro": {
|
||||
|
|
@ -430,6 +443,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 55,
|
||||
longContext: 90,
|
||||
instruction: 75,
|
||||
agentic: 70,
|
||||
},
|
||||
"gemini-3.1-pro-preview": {
|
||||
coding: 82,
|
||||
|
|
@ -472,6 +486,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 96,
|
||||
longContext: 85,
|
||||
instruction: 68,
|
||||
agentic: 60,
|
||||
},
|
||||
"gemini-2.5-flash": {
|
||||
coding: 60,
|
||||
|
|
@ -481,6 +496,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 92,
|
||||
longContext: 85,
|
||||
instruction: 70,
|
||||
agentic: 60,
|
||||
},
|
||||
"gemini-2.5-flash-lite": {
|
||||
coding: 52,
|
||||
|
|
@ -490,6 +506,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 97,
|
||||
longContext: 78,
|
||||
instruction: 65,
|
||||
agentic: 50,
|
||||
},
|
||||
"gemini-2.0-flash": {
|
||||
coding: 50,
|
||||
|
|
@ -499,6 +516,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 95,
|
||||
longContext: 60,
|
||||
instruction: 65,
|
||||
agentic: 55,
|
||||
},
|
||||
"gemini-flash-2.0": {
|
||||
coding: 50,
|
||||
|
|
@ -508,6 +526,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 95,
|
||||
longContext: 60,
|
||||
instruction: 65,
|
||||
agentic: 55,
|
||||
},
|
||||
// ── DeepSeek ───────────────────────────────────────────────────────────────
|
||||
"deepseek-chat": {
|
||||
|
|
@ -518,6 +537,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 70,
|
||||
longContext: 55,
|
||||
instruction: 65,
|
||||
agentic: 55,
|
||||
},
|
||||
// ── Mistral AI ─────────────────────────────────────────────────────────────
|
||||
"mistral-large-latest": {
|
||||
|
|
@ -528,6 +548,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 50,
|
||||
longContext: 75,
|
||||
instruction: 85,
|
||||
agentic: 60,
|
||||
},
|
||||
"mistral-large-2411": {
|
||||
coding: 85,
|
||||
|
|
@ -537,6 +558,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 50,
|
||||
longContext: 75,
|
||||
instruction: 85,
|
||||
agentic: 55,
|
||||
},
|
||||
"mistral-large-2512": {
|
||||
coding: 88,
|
||||
|
|
@ -546,6 +568,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 52,
|
||||
longContext: 78,
|
||||
instruction: 88,
|
||||
agentic: 65,
|
||||
},
|
||||
"pixtral-large-latest": {
|
||||
coding: 85,
|
||||
|
|
@ -555,6 +578,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 45,
|
||||
longContext: 80,
|
||||
instruction: 85,
|
||||
agentic: 50,
|
||||
},
|
||||
"mistral-medium-latest": {
|
||||
coding: 75,
|
||||
|
|
@ -564,6 +588,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 60,
|
||||
longContext: 65,
|
||||
instruction: 75,
|
||||
agentic: 55,
|
||||
},
|
||||
"mistral-medium-2505": {
|
||||
coding: 75,
|
||||
|
|
@ -573,6 +598,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 60,
|
||||
longContext: 65,
|
||||
instruction: 75,
|
||||
agentic: 50,
|
||||
},
|
||||
"mistral-medium-2508": {
|
||||
coding: 78,
|
||||
|
|
@ -582,6 +608,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 62,
|
||||
longContext: 68,
|
||||
instruction: 78,
|
||||
agentic: 55,
|
||||
},
|
||||
"mistral-small-latest": {
|
||||
coding: 65,
|
||||
|
|
@ -591,6 +618,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 80,
|
||||
longContext: 55,
|
||||
instruction: 70,
|
||||
agentic: 40,
|
||||
},
|
||||
"mistral-small-2506": {
|
||||
coding: 65,
|
||||
|
|
@ -600,6 +628,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 80,
|
||||
longContext: 55,
|
||||
instruction: 70,
|
||||
agentic: 40,
|
||||
},
|
||||
"mistral-small-2603": {
|
||||
coding: 68,
|
||||
|
|
@ -609,6 +638,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 82,
|
||||
longContext: 58,
|
||||
instruction: 72,
|
||||
agentic: 40,
|
||||
},
|
||||
"codestral-latest": {
|
||||
coding: 85,
|
||||
|
|
@ -631,6 +661,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 90,
|
||||
longContext: 45,
|
||||
instruction: 70,
|
||||
agentic: 30,
|
||||
},
|
||||
"ministral-3b-latest": {
|
||||
coding: 45,
|
||||
|
|
@ -640,6 +671,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 95,
|
||||
longContext: 35,
|
||||
instruction: 60,
|
||||
agentic: 25,
|
||||
},
|
||||
"open-mixtral-8x22b": {
|
||||
coding: 75,
|
||||
|
|
@ -649,6 +681,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 40,
|
||||
longContext: 70,
|
||||
instruction: 75,
|
||||
agentic: 40,
|
||||
},
|
||||
"pixtral-12b": {
|
||||
coding: 60,
|
||||
|
|
@ -658,6 +691,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 75,
|
||||
longContext: 60,
|
||||
instruction: 65,
|
||||
agentic: 35,
|
||||
},
|
||||
"mistral-nemo": {
|
||||
coding: 60,
|
||||
|
|
@ -667,6 +701,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 85,
|
||||
longContext: 60,
|
||||
instruction: 65,
|
||||
agentic: 35,
|
||||
},
|
||||
"magistral-medium-latest": {
|
||||
coding: 80,
|
||||
|
|
@ -676,6 +711,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 55,
|
||||
longContext: 75,
|
||||
instruction: 80,
|
||||
agentic: 65,
|
||||
},
|
||||
"magistral-small": {
|
||||
coding: 70,
|
||||
|
|
@ -685,6 +721,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 75,
|
||||
longContext: 65,
|
||||
instruction: 70,
|
||||
agentic: 50,
|
||||
},
|
||||
"devstral-2512": {
|
||||
coding: 82,
|
||||
|
|
@ -757,6 +794,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 35,
|
||||
longContext: 80,
|
||||
instruction: 88,
|
||||
agentic: 75,
|
||||
},
|
||||
"glm-5-turbo": {
|
||||
coding: 85,
|
||||
|
|
@ -766,6 +804,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 65,
|
||||
longContext: 75,
|
||||
instruction: 85,
|
||||
agentic: 70,
|
||||
},
|
||||
"glm-5.1": {
|
||||
coding: 92,
|
||||
|
|
@ -775,6 +814,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 38,
|
||||
longContext: 82,
|
||||
instruction: 89,
|
||||
agentic: 75,
|
||||
},
|
||||
"glm-5v-turbo": {
|
||||
coding: 82,
|
||||
|
|
@ -784,6 +824,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 60,
|
||||
longContext: 75,
|
||||
instruction: 82,
|
||||
agentic: 65,
|
||||
},
|
||||
"glm-4.7": {
|
||||
coding: 80,
|
||||
|
|
@ -793,6 +834,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 60,
|
||||
longContext: 70,
|
||||
instruction: 80,
|
||||
agentic: 55,
|
||||
},
|
||||
"glm-4.7-flash": {
|
||||
coding: 50,
|
||||
|
|
@ -802,6 +844,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 95,
|
||||
longContext: 50,
|
||||
instruction: 65,
|
||||
agentic: 50,
|
||||
},
|
||||
"glm-4.7-flashx": {
|
||||
coding: 45,
|
||||
|
|
@ -811,6 +854,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 98,
|
||||
longContext: 45,
|
||||
instruction: 60,
|
||||
agentic: 50,
|
||||
},
|
||||
// ── Qwen / Ollama Cloud compatible tags ──────────────────────────────────
|
||||
"qwen3-coder:480b": {
|
||||
|
|
@ -842,6 +886,7 @@ export const MODEL_CAPABILITY_PROFILES = {
|
|||
speed: 62,
|
||||
longContext: 86,
|
||||
instruction: 74,
|
||||
agentic: 55,
|
||||
},
|
||||
// ── Moonshot / Kimi ───────────────────────────────────────────────────────
|
||||
"kimi-k2.6": {
|
||||
|
|
|
|||
|
|
@ -115,6 +115,21 @@ describe("agentic capability axis (ADR-0079)", () => {
|
|||
expect(newScore).toBeGreaterThan(oldScore);
|
||||
});
|
||||
|
||||
test("every profile has an explicit agentic score (no defaulting to 50)", () => {
|
||||
// sf-mp37p9u2-80f2gz: the agentic=50 fallback in scoreModel was
|
||||
// silently letting untouched profiles escape penalization for poor
|
||||
// tool-use reliability. Every profile must declare a deliberate
|
||||
// score so the router can differentiate the full table.
|
||||
const offenders = [];
|
||||
for (const [id, profile] of Object.entries(MODEL_CAPABILITY_PROFILES)) {
|
||||
if (typeof profile.agentic !== "number") offenders.push(id);
|
||||
}
|
||||
expect(
|
||||
offenders,
|
||||
`profiles missing explicit agentic: ${offenders.join(", ")}`,
|
||||
).toEqual([]);
|
||||
});
|
||||
|
||||
test("known agentic-frontier models all have agentic >= 85", () => {
|
||||
const agenticFrontier = [
|
||||
"claude-opus-4-6",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue