From 0694803df38d2bb681184234cf6ba9deec2af206 Mon Sep 17 00:00:00 2001
From: Mikael Hugo <mikkihugo@users.noreply.github.com>
Date: Thu, 14 May 2026 06:28:06 +0200
Subject: [PATCH] feat(model-router): explicit agentic score for every
 capability profile

Sweep MODEL_CAPABILITY_PROFILES so all 82 entries declare an explicit
agentic score; the agentic=50 fallback in scoreModel was silently
giving untouched profiles a generous default and letting weak agentic
models slip through execute-task routing. Anchors per the entry's
suggestedFix: coding-only ~25-40, very small/older ~30-40, older
generations ~55-70, frontier agentic ~85-95.

Adds an invariant test that asserts no profile relies on the default.

Closes sf-mp37p9u2-80f2gz.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/resources/extensions/sf/model-router.js   | 45 +++++++++++++++++++
 .../sf/tests/model-router-agentic.test.mjs    | 15 +++++++
 2 files changed, 60 insertions(+)

diff --git a/src/resources/extensions/sf/model-router.js b/src/resources/extensions/sf/model-router.js
index 980a18d03..6ed7a720c 100644
--- a/src/resources/extensions/sf/model-router.js
+++ b/src/resources/extensions/sf/model-router.js
@@ -174,6 +174,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 40,
 		instruction: 65,
+		agentic: 35,
 	},
 	"claude-3-opus-latest": {
 		agentic: 88,
@@ -194,6 +195,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 65,
 		longContext: 70,
 		instruction: 80,
+		agentic: 65,
 	},
 	"gpt-4o-mini": {
 		coding: 55,
@@ -203,6 +205,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 90,
 		longContext: 45,
 		instruction: 70,
+		agentic: 50,
 	},
 	"gpt-4-turbo": {
 		coding: 78,
@@ -212,6 +215,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 50,
 		longContext: 65,
 		instruction: 78,
+		agentic: 60,
 	},
 	"gpt-4.1": {
 		coding: 82,
@@ -221,6 +225,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 62,
 		longContext: 72,
 		instruction: 82,
+		agentic: 70,
 	},
 	"gpt-4.1-mini": {
 		coding: 58,
@@ -230,6 +235,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 88,
 		longContext: 48,
 		instruction: 72,
+		agentic: 55,
 	},
 	"gpt-4.1-nano": {
 		coding: 40,
@@ -239,6 +245,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 30,
 		instruction: 60,
+		agentic: 35,
 	},
 	"gpt-5": {
 		coding: 92,
@@ -259,6 +266,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 88,
 		longContext: 52,
 		instruction: 74,
+		agentic: 75,
 	},
 	"gpt-5-nano": {
 		coding: 42,
@@ -268,6 +276,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 32,
 		instruction: 62,
+		agentic: 60,
 	},
 	"gpt-5-pro": {
 		coding: 94,
@@ -393,6 +402,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 20,
 		longContext: 65,
 		instruction: 82,
+		agentic: 65,
 	},
 	o3: {
 		coding: 80,
@@ -402,6 +412,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 25,
 		longContext: 70,
 		instruction: 85,
+		agentic: 72,
 	},
 	"o4-mini": {
 		coding: 75,
@@ -411,6 +422,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 60,
 		longContext: 65,
 		instruction: 80,
+		agentic: 70,
 	},
 	"o4-mini-deep-research": {
 		coding: 75,
@@ -420,6 +432,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 30,
 		longContext: 80,
 		instruction: 80,
+		agentic: 65,
 	},
 	// ── Google ─────────────────────────────────────────────────────────────────
 	"gemini-2.5-pro": {
@@ -430,6 +443,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 55,
 		longContext: 90,
 		instruction: 75,
+		agentic: 70,
 	},
 	"gemini-3.1-pro-preview": {
 		coding: 82,
@@ -472,6 +486,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 96,
 		longContext: 85,
 		instruction: 68,
+		agentic: 60,
 	},
 	"gemini-2.5-flash": {
 		coding: 60,
@@ -481,6 +496,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 92,
 		longContext: 85,
 		instruction: 70,
+		agentic: 60,
 	},
 	"gemini-2.5-flash-lite": {
 		coding: 52,
@@ -490,6 +506,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 97,
 		longContext: 78,
 		instruction: 65,
+		agentic: 50,
 	},
 	"gemini-2.0-flash": {
 		coding: 50,
@@ -499,6 +516,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 60,
 		instruction: 65,
+		agentic: 55,
 	},
 	"gemini-flash-2.0": {
 		coding: 50,
@@ -508,6 +526,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 60,
 		instruction: 65,
+		agentic: 55,
 	},
 	// ── DeepSeek ───────────────────────────────────────────────────────────────
 	"deepseek-chat": {
@@ -518,6 +537,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 70,
 		longContext: 55,
 		instruction: 65,
+		agentic: 55,
 	},
 	// ── Mistral AI ─────────────────────────────────────────────────────────────
 	"mistral-large-latest": {
@@ -528,6 +548,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 50,
 		longContext: 75,
 		instruction: 85,
+		agentic: 60,
 	},
 	"mistral-large-2411": {
 		coding: 85,
@@ -537,6 +558,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 50,
 		longContext: 75,
 		instruction: 85,
+		agentic: 55,
 	},
 	"mistral-large-2512": {
 		coding: 88,
@@ -546,6 +568,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 52,
 		longContext: 78,
 		instruction: 88,
+		agentic: 65,
 	},
 	"pixtral-large-latest": {
 		coding: 85,
@@ -555,6 +578,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 45,
 		longContext: 80,
 		instruction: 85,
+		agentic: 50,
 	},
 	"mistral-medium-latest": {
 		coding: 75,
@@ -564,6 +588,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 60,
 		longContext: 65,
 		instruction: 75,
+		agentic: 55,
 	},
 	"mistral-medium-2505": {
 		coding: 75,
@@ -573,6 +598,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 60,
 		longContext: 65,
 		instruction: 75,
+		agentic: 50,
 	},
 	"mistral-medium-2508": {
 		coding: 78,
@@ -582,6 +608,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 62,
 		longContext: 68,
 		instruction: 78,
+		agentic: 55,
 	},
 	"mistral-small-latest": {
 		coding: 65,
@@ -591,6 +618,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 80,
 		longContext: 55,
 		instruction: 70,
+		agentic: 40,
 	},
 	"mistral-small-2506": {
 		coding: 65,
@@ -600,6 +628,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 80,
 		longContext: 55,
 		instruction: 70,
+		agentic: 40,
 	},
 	"mistral-small-2603": {
 		coding: 68,
@@ -609,6 +638,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 82,
 		longContext: 58,
 		instruction: 72,
+		agentic: 40,
 	},
 	"codestral-latest": {
 		coding: 85,
@@ -631,6 +661,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 90,
 		longContext: 45,
 		instruction: 70,
+		agentic: 30,
 	},
 	"ministral-3b-latest": {
 		coding: 45,
@@ -640,6 +671,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 35,
 		instruction: 60,
+		agentic: 25,
 	},
 	"open-mixtral-8x22b": {
 		coding: 75,
@@ -649,6 +681,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 40,
 		longContext: 70,
 		instruction: 75,
+		agentic: 40,
 	},
 	"pixtral-12b": {
 		coding: 60,
@@ -658,6 +691,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 75,
 		longContext: 60,
 		instruction: 65,
+		agentic: 35,
 	},
 	"mistral-nemo": {
 		coding: 60,
@@ -667,6 +701,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 85,
 		longContext: 60,
 		instruction: 65,
+		agentic: 35,
 	},
 	"magistral-medium-latest": {
 		coding: 80,
@@ -676,6 +711,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 55,
 		longContext: 75,
 		instruction: 80,
+		agentic: 65,
 	},
 	"magistral-small": {
 		coding: 70,
@@ -685,6 +721,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 75,
 		longContext: 65,
 		instruction: 70,
+		agentic: 50,
 	},
 	"devstral-2512": {
 		coding: 82,
@@ -757,6 +794,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 35,
 		longContext: 80,
 		instruction: 88,
+		agentic: 75,
 	},
 	"glm-5-turbo": {
 		coding: 85,
@@ -766,6 +804,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 65,
 		longContext: 75,
 		instruction: 85,
+		agentic: 70,
 	},
 	"glm-5.1": {
 		coding: 92,
@@ -775,6 +814,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 38,
 		longContext: 82,
 		instruction: 89,
+		agentic: 75,
 	},
 	"glm-5v-turbo": {
 		coding: 82,
@@ -784,6 +824,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 60,
 		longContext: 75,
 		instruction: 82,
+		agentic: 65,
 	},
 	"glm-4.7": {
 		coding: 80,
@@ -793,6 +834,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 60,
 		longContext: 70,
 		instruction: 80,
+		agentic: 55,
 	},
 	"glm-4.7-flash": {
 		coding: 50,
@@ -802,6 +844,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 95,
 		longContext: 50,
 		instruction: 65,
+		agentic: 50,
 	},
 	"glm-4.7-flashx": {
 		coding: 45,
@@ -811,6 +854,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 98,
 		longContext: 45,
 		instruction: 60,
+		agentic: 50,
 	},
 	// ── Qwen / Ollama Cloud compatible tags ──────────────────────────────────
 	"qwen3-coder:480b": {
@@ -842,6 +886,7 @@ export const MODEL_CAPABILITY_PROFILES = {
 		speed: 62,
 		longContext: 86,
 		instruction: 74,
+		agentic: 55,
 	},
 	// ── Moonshot / Kimi ───────────────────────────────────────────────────────
 	"kimi-k2.6": {
diff --git a/src/resources/extensions/sf/tests/model-router-agentic.test.mjs b/src/resources/extensions/sf/tests/model-router-agentic.test.mjs
index e3e0d98c7..77a87420f 100644
--- a/src/resources/extensions/sf/tests/model-router-agentic.test.mjs
+++ b/src/resources/extensions/sf/tests/model-router-agentic.test.mjs
@@ -115,6 +115,21 @@ describe("agentic capability axis (ADR-0079)", () => {
 		expect(newScore).toBeGreaterThan(oldScore);
 	});
 
+	test("every profile has an explicit agentic score (no defaulting to 50)", () => {
+		// sf-mp37p9u2-80f2gz: the agentic=50 fallback in scoreModel was
+		// silently letting untouched profiles escape penalization for poor
+		// tool-use reliability. Every profile must declare a deliberate
+		// score so the router can differentiate the full table.
+		const offenders = [];
+		for (const [id, profile] of Object.entries(MODEL_CAPABILITY_PROFILES)) {
+			if (typeof profile.agentic !== "number") offenders.push(id);
+		}
+		expect(
+			offenders,
+			`profiles missing explicit agentic: ${offenders.join(", ")}`,
+		).toEqual([]);
+	});
+
 	test("known agentic-frontier models all have agentic >= 85", () => {
 		const agenticFrontier = [
 			"claude-opus-4-6",