feat(model-router): explicit agentic score for every capability profile

Sweep MODEL_CAPABILITY_PROFILES so all 82 entries declare an explicit agentic score; the agentic=50 fallback in scoreModel was silently giving untouched profiles a generous default and letting weak agentic models slip through execute-task routing. Anchors per the entry's suggestedFix: coding-only ~25-40, very small/older ~30-40, older generations ~55-70, frontier agentic ~85-95. Adds an invariant test that asserts no profile relies on the default. Closes sf-mp37p9u2-80f2gz. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 06:28:06 +02:00 · 2026-05-14 06:28:06 +02:00 · 0694803df3
commit 0694803df3
parent 48e793c003
2 changed files with 60 additions and 0 deletions
--- a/src/resources/extensions/sf/model-router.js
+++ b/src/resources/extensions/sf/model-router.js
@ -174,6 +174,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 40,
 		instruction: 65,
+		agentic: 35,
 	},
 	"claude-3-opus-latest": {
 		agentic: 88,
@ -194,6 +195,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 65,
 		longContext: 70,
 		instruction: 80,
+		agentic: 65,
 	},
 	"gpt-4o-mini": {
 		coding: 55,
@ -203,6 +205,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 90,
 		longContext: 45,
 		instruction: 70,
+		agentic: 50,
 	},
 	"gpt-4-turbo": {
 		coding: 78,
@ -212,6 +215,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 50,
 		longContext: 65,
 		instruction: 78,
+		agentic: 60,
 	},
 	"gpt-4.1": {
 		coding: 82,
@ -221,6 +225,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 62,
 		longContext: 72,
 		instruction: 82,
+		agentic: 70,
 	},
 	"gpt-4.1-mini": {
 		coding: 58,
@ -230,6 +235,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 88,
 		longContext: 48,
 		instruction: 72,
+		agentic: 55,
 	},
 	"gpt-4.1-nano": {
 		coding: 40,
@ -239,6 +245,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 30,
 		instruction: 60,
+		agentic: 35,
 	},
 	"gpt-5": {
 		coding: 92,
@ -259,6 +266,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 88,
 		longContext: 52,
 		instruction: 74,
+		agentic: 75,
 	},
 	"gpt-5-nano": {
 		coding: 42,
@ -268,6 +276,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 32,
 		instruction: 62,
+		agentic: 60,
 	},
 	"gpt-5-pro": {
 		coding: 94,
@ -393,6 +402,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 20,
 		longContext: 65,
 		instruction: 82,
+		agentic: 65,
 	},
 	o3: {
 		coding: 80,
@ -402,6 +412,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 25,
 		longContext: 70,
 		instruction: 85,
+		agentic: 72,
 	},
 	"o4-mini": {
 		coding: 75,
@ -411,6 +422,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 60,
 		longContext: 65,
 		instruction: 80,
+		agentic: 70,
 	},
 	"o4-mini-deep-research": {
 		coding: 75,
@ -420,6 +432,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 30,
 		longContext: 80,
 		instruction: 80,
+		agentic: 65,
 	},
 	// ── Google ─────────────────────────────────────────────────────────────────
 	"gemini-2.5-pro": {
@ -430,6 +443,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 55,
 		longContext: 90,
 		instruction: 75,
+		agentic: 70,
 	},
 	"gemini-3.1-pro-preview": {
 		coding: 82,
@ -472,6 +486,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 96,
 		longContext: 85,
 		instruction: 68,
+		agentic: 60,
 	},
 	"gemini-2.5-flash": {
 		coding: 60,
@ -481,6 +496,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 92,
 		longContext: 85,
 		instruction: 70,
+		agentic: 60,
 	},
 	"gemini-2.5-flash-lite": {
 		coding: 52,
@ -490,6 +506,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 97,
 		longContext: 78,
 		instruction: 65,
+		agentic: 50,
 	},
 	"gemini-2.0-flash": {
 		coding: 50,
@ -499,6 +516,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 60,
 		instruction: 65,
+		agentic: 55,
 	},
 	"gemini-flash-2.0": {
 		coding: 50,
@ -508,6 +526,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 60,
 		instruction: 65,
+		agentic: 55,
 	},
 	// ── DeepSeek ───────────────────────────────────────────────────────────────
 	"deepseek-chat": {
@ -518,6 +537,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 70,
 		longContext: 55,
 		instruction: 65,
+		agentic: 55,
 	},
 	// ── Mistral AI ─────────────────────────────────────────────────────────────
 	"mistral-large-latest": {
@ -528,6 +548,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 50,
 		longContext: 75,
 		instruction: 85,
+		agentic: 60,
 	},
 	"mistral-large-2411": {
 		coding: 85,
@ -537,6 +558,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 50,
 		longContext: 75,
 		instruction: 85,
+		agentic: 55,
 	},
 	"mistral-large-2512": {
 		coding: 88,
@ -546,6 +568,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 52,
 		longContext: 78,
 		instruction: 88,
+		agentic: 65,
 	},
 	"pixtral-large-latest": {
 		coding: 85,
@ -555,6 +578,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 45,
 		longContext: 80,
 		instruction: 85,
+		agentic: 50,
 	},
 	"mistral-medium-latest": {
 		coding: 75,
@ -564,6 +588,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 60,
 		longContext: 65,
 		instruction: 75,
+		agentic: 55,
 	},
 	"mistral-medium-2505": {
 		coding: 75,
@ -573,6 +598,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 60,
 		longContext: 65,
 		instruction: 75,
+		agentic: 50,
 	},
 	"mistral-medium-2508": {
 		coding: 78,
@ -582,6 +608,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 62,
 		longContext: 68,
 		instruction: 78,
+		agentic: 55,
 	},
 	"mistral-small-latest": {
 		coding: 65,
@ -591,6 +618,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 80,
 		longContext: 55,
 		instruction: 70,
+		agentic: 40,
 	},
 	"mistral-small-2506": {
 		coding: 65,
@ -600,6 +628,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 80,
 		longContext: 55,
 		instruction: 70,
+		agentic: 40,
 	},
 	"mistral-small-2603": {
 		coding: 68,
@ -609,6 +638,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 82,
 		longContext: 58,
 		instruction: 72,
+		agentic: 40,
 	},
 	"codestral-latest": {
 		coding: 85,
@ -631,6 +661,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 90,
 		longContext: 45,
 		instruction: 70,
+		agentic: 30,
 	},
 	"ministral-3b-latest": {
 		coding: 45,
@ -640,6 +671,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 35,
 		instruction: 60,
+		agentic: 25,
 	},
 	"open-mixtral-8x22b": {
 		coding: 75,
@ -649,6 +681,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 40,
 		longContext: 70,
 		instruction: 75,
+		agentic: 40,
 	},
 	"pixtral-12b": {
 		coding: 60,
@ -658,6 +691,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 75,
 		longContext: 60,
 		instruction: 65,
+		agentic: 35,
 	},
 	"mistral-nemo": {
 		coding: 60,
@ -667,6 +701,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 85,
 		longContext: 60,
 		instruction: 65,
+		agentic: 35,
 	},
 	"magistral-medium-latest": {
 		coding: 80,
@ -676,6 +711,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 55,
 		longContext: 75,
 		instruction: 80,
+		agentic: 65,
 	},
 	"magistral-small": {
 		coding: 70,
@ -685,6 +721,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 75,
 		longContext: 65,
 		instruction: 70,
+		agentic: 50,
 	},
 	"devstral-2512": {
 		coding: 82,
@ -757,6 +794,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 35,
 		longContext: 80,
 		instruction: 88,
+		agentic: 75,
 	},
 	"glm-5-turbo": {
 		coding: 85,
@ -766,6 +804,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 65,
 		longContext: 75,
 		instruction: 85,
+		agentic: 70,
 	},
 	"glm-5.1": {
 		coding: 92,
@ -775,6 +814,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 38,
 		longContext: 82,
 		instruction: 89,
+		agentic: 75,
 	},
 	"glm-5v-turbo": {
 		coding: 82,
@ -784,6 +824,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 60,
 		longContext: 75,
 		instruction: 82,
+		agentic: 65,
 	},
 	"glm-4.7": {
 		coding: 80,
@ -793,6 +834,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 60,
 		longContext: 70,
 		instruction: 80,
+		agentic: 55,
 	},
 	"glm-4.7-flash": {
 		coding: 50,
@ -802,6 +844,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 50,
 		instruction: 65,
+		agentic: 50,
 	},
 	"glm-4.7-flashx": {
 		coding: 45,
@ -811,6 +854,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 98,
 		longContext: 45,
 		instruction: 60,
+		agentic: 50,
 	},
 	// ── Qwen / Ollama Cloud compatible tags ──────────────────────────────────
 	"qwen3-coder:480b": {
@ -842,6 +886,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 62,
 		longContext: 86,
 		instruction: 74,
+		agentic: 55,
 	},
 	// ── Moonshot / Kimi ───────────────────────────────────────────────────────
 	"kimi-k2.6": {
--- a/src/resources/extensions/sf/tests/model-router-agentic.test.mjs
+++ b/src/resources/extensions/sf/tests/model-router-agentic.test.mjs
@ -115,6 +115,21 @@ describe("agentic capability axis (ADR-0079)", () => {
 		expect(newScore).toBeGreaterThan(oldScore);
 	});

+	test("every profile has an explicit agentic score (no defaulting to 50)", () => {
+		// sf-mp37p9u2-80f2gz: the agentic=50 fallback in scoreModel was
+		// silently letting untouched profiles escape penalization for poor
+		// tool-use reliability. Every profile must declare a deliberate
+		// score so the router can differentiate the full table.
+		const offenders = [];
+		for (const [id, profile] of Object.entries(MODEL_CAPABILITY_PROFILES)) {
+			if (typeof profile.agentic !== "number") offenders.push(id);
+		}
+		expect(
+			offenders,
+			`profiles missing explicit agentic: ${offenders.join(", ")}`,
+		).toEqual([]);
+	});
+
 	test("known agentic-frontier models all have agentic >= 85", () => {
 		const agenticFrontier = [
 			"claude-opus-4-6",