diff --git a/src/resources/extensions/gsd/auto-model-selection.ts b/src/resources/extensions/gsd/auto-model-selection.ts index 7dc1593f8..a8f0d5db4 100644 --- a/src/resources/extensions/gsd/auto-model-selection.ts +++ b/src/resources/extensions/gsd/auto-model-selection.ts @@ -132,9 +132,7 @@ export async function selectAndApplyModel( } // Load user capability overrides from preferences (D-17: deep-merged with built-in profiles) - const capabilityOverrides = loadCapabilityOverrides( - (prefs as { modelOverrides?: Record }> } | undefined) ?? {}, - ); + const capabilityOverrides = loadCapabilityOverrides(prefs ?? {}); // Fire before_model_select hook (ADR-004, D-03) // Hook can override model selection entirely by returning { modelId } diff --git a/src/resources/extensions/gsd/model-router.ts b/src/resources/extensions/gsd/model-router.ts index 0efbbf9b6..17ff1c70a 100644 --- a/src/resources/extensions/gsd/model-router.ts +++ b/src/resources/extensions/gsd/model-router.ts @@ -58,7 +58,7 @@ export interface ModelCapabilities { // Maps known model IDs to their capability tier. Used when tier_models is not // explicitly configured to pick the best available model for each tier. -const MODEL_CAPABILITY_TIER: Record = { +export const MODEL_CAPABILITY_TIER: Record = { // Light-tier models (cheapest) "claude-haiku-4-5": "light", "claude-3-5-haiku-latest": "light", @@ -139,15 +139,49 @@ const MODEL_COST_PER_1K_INPUT: Record = { // model selection within an eligible tier set. export const MODEL_CAPABILITY_PROFILES: Record = { - "claude-opus-4-6": { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 }, - "claude-sonnet-4-6": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 }, - "claude-haiku-4-5": { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 }, - "gpt-4o": { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 }, - "gpt-4o-mini": { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 }, - "gemini-2.5-pro": { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 }, - "gemini-2.0-flash": { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 }, - "deepseek-chat": { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 }, - "o3": { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 }, + // ── Anthropic ────────────────────────────────────────────────────────────── + "claude-opus-4-6": { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 }, + "claude-sonnet-4-6": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 }, + "claude-sonnet-4-5-20250514": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 }, + "claude-3-5-sonnet-latest": { coding: 82, debugging: 78, research: 72, reasoning: 78, speed: 62, longContext: 70, instruction: 82 }, + "claude-haiku-4-5": { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 }, + "claude-3-5-haiku-latest": { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 }, + "claude-3-haiku-20240307": { coding: 50, debugging: 40, research: 35, reasoning: 40, speed: 95, longContext: 40, instruction: 65 }, + "claude-3-opus-latest": { coding: 90, debugging: 85, research: 82, reasoning: 90, speed: 35, longContext: 75, instruction: 88 }, + + // ── OpenAI GPT ───────────────────────────────────────────────────────────── + "gpt-4o": { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 }, + "gpt-4o-mini": { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 }, + "gpt-4-turbo": { coding: 78, debugging: 72, research: 68, reasoning: 72, speed: 50, longContext: 65, instruction: 78 }, + "gpt-4.1": { coding: 82, debugging: 78, research: 72, reasoning: 78, speed: 62, longContext: 72, instruction: 82 }, + "gpt-4.1-mini": { coding: 58, debugging: 48, research: 42, reasoning: 48, speed: 88, longContext: 48, instruction: 72 }, + "gpt-4.1-nano": { coding: 40, debugging: 30, research: 25, reasoning: 30, speed: 95, longContext: 30, instruction: 60 }, + "gpt-5": { coding: 92, debugging: 88, research: 85, reasoning: 92, speed: 40, longContext: 85, instruction: 90 }, + "gpt-5-mini": { coding: 62, debugging: 52, research: 48, reasoning: 52, speed: 88, longContext: 52, instruction: 74 }, + "gpt-5-nano": { coding: 42, debugging: 32, research: 28, reasoning: 32, speed: 95, longContext: 32, instruction: 62 }, + "gpt-5-pro": { coding: 94, debugging: 90, research: 88, reasoning: 94, speed: 35, longContext: 88, instruction: 92 }, + "gpt-5.1": { coding: 93, debugging: 89, research: 86, reasoning: 93, speed: 42, longContext: 86, instruction: 91 }, + "gpt-5.1-codex-max": { coding: 90, debugging: 85, research: 70, reasoning: 85, speed: 55, longContext: 75, instruction: 85 }, + "gpt-5.1-codex-mini": { coding: 65, debugging: 55, research: 40, reasoning: 50, speed: 88, longContext: 48, instruction: 72 }, + "gpt-5.2": { coding: 93, debugging: 90, research: 87, reasoning: 93, speed: 42, longContext: 87, instruction: 91 }, + "gpt-5.2-codex": { coding: 93, debugging: 90, research: 72, reasoning: 88, speed: 50, longContext: 78, instruction: 88 }, + "gpt-5.3-codex": { coding: 94, debugging: 91, research: 74, reasoning: 89, speed: 50, longContext: 80, instruction: 89 }, + "gpt-5.3-codex-spark": { coding: 68, debugging: 58, research: 42, reasoning: 52, speed: 90, longContext: 50, instruction: 74 }, + "gpt-5.4": { coding: 95, debugging: 92, research: 88, reasoning: 94, speed: 42, longContext: 88, instruction: 92 }, + + // ── OpenAI o-series (reasoning-first) ────────────────────────────────────── + "o1": { coding: 78, debugging: 82, research: 78, reasoning: 90, speed: 20, longContext: 65, instruction: 82 }, + "o3": { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 }, + "o4-mini": { coding: 75, debugging: 80, research: 72, reasoning: 88, speed: 60, longContext: 65, instruction: 80 }, + "o4-mini-deep-research": { coding: 75, debugging: 80, research: 85, reasoning: 88, speed: 30, longContext: 80, instruction: 80 }, + + // ── Google ───────────────────────────────────────────────────────────────── + "gemini-2.5-pro": { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 }, + "gemini-2.0-flash": { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 }, + "gemini-flash-2.0": { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 }, + + // ── DeepSeek ─────────────────────────────────────────────────────────────── + "deepseek-chat": { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 }, }; // ─── Base Task Requirements Data Table ─────────────────────────────────────── diff --git a/src/resources/extensions/gsd/preferences-types.ts b/src/resources/extensions/gsd/preferences-types.ts index 58b847cc9..47ed0c12b 100644 --- a/src/resources/extensions/gsd/preferences-types.ts +++ b/src/resources/extensions/gsd/preferences-types.ts @@ -20,7 +20,7 @@ import type { ReactiveExecutionConfig, GateEvaluationConfig, } from "./types.js"; -import type { DynamicRoutingConfig } from "./model-router.js"; +import type { DynamicRoutingConfig, ModelCapabilities } from "./model-router.js"; export interface ContextManagementConfig { observation_masking?: boolean; // default: true @@ -255,6 +255,8 @@ export interface GSDPreferences { post_unit_hooks?: PostUnitHookConfig[]; pre_dispatch_hooks?: PreDispatchHookConfig[]; dynamic_routing?: DynamicRoutingConfig; + /** Per-model capability overrides. Deep-merged with built-in profiles for capability-aware routing (ADR-004). */ + modelOverrides?: Record }>; context_management?: ContextManagementConfig; token_profile?: TokenProfile; phases?: PhaseSkipPreferences; diff --git a/src/resources/extensions/gsd/tests/capability-router.test.ts b/src/resources/extensions/gsd/tests/capability-router.test.ts index 751fc6e11..8e185b508 100644 --- a/src/resources/extensions/gsd/tests/capability-router.test.ts +++ b/src/resources/extensions/gsd/tests/capability-router.test.ts @@ -11,6 +11,7 @@ import { getEligibleModels, resolveModelForComplexity, MODEL_CAPABILITY_PROFILES, + MODEL_CAPABILITY_TIER, BASE_REQUIREMENTS, defaultRoutingConfig, } from "../model-router.js"; @@ -125,13 +126,9 @@ describe("computeTaskRequirements", () => { // ─── MODEL_CAPABILITY_PROFILES ─────────────────────────────────────────────── describe("MODEL_CAPABILITY_PROFILES", () => { - test("contains all 9 required models", () => { - const required = [ - "claude-opus-4-6", "claude-sonnet-4-6", "claude-haiku-4-5", - "gpt-4o", "gpt-4o-mini", "gemini-2.5-pro", "gemini-2.0-flash", - "deepseek-chat", "o3", - ]; - for (const model of required) { + test("contains profiles for all tier-mapped models", () => { + const tierModels = Object.keys(MODEL_CAPABILITY_TIER); + for (const model of tierModels) { assert.ok(MODEL_CAPABILITY_PROFILES[model], `Missing profile for ${model}`); } }); @@ -345,3 +342,30 @@ describe("RoutingDecision.selectionMethod", () => { assert.equal(result.selectionMethod, "tier-only"); }); }); + +// ─── ADR-004: Profile Completeness Lint ───────────────────────────────────── +// Every model in MODEL_CAPABILITY_TIER must have an entry in +// MODEL_CAPABILITY_PROFILES. This prevents profile staleness as new models +// are added to the tier map without corresponding capability data. + +describe("profile completeness (ADR-004 lint)", () => { + test("every model in MODEL_CAPABILITY_TIER has a MODEL_CAPABILITY_PROFILES entry", () => { + const tierModels = Object.keys(MODEL_CAPABILITY_TIER); + const missing = tierModels.filter(id => !MODEL_CAPABILITY_PROFILES[id]); + assert.equal( + missing.length, + 0, + `Models in MODEL_CAPABILITY_TIER but missing from MODEL_CAPABILITY_PROFILES:\n ${missing.join("\n ")}\n\nAdd capability profiles for these models in model-router.ts.`, + ); + }); + + test("MODEL_CAPABILITY_PROFILES does not contain models absent from MODEL_CAPABILITY_TIER", () => { + const profileModels = Object.keys(MODEL_CAPABILITY_PROFILES); + const orphaned = profileModels.filter(id => !MODEL_CAPABILITY_TIER[id]); + assert.equal( + orphaned.length, + 0, + `Models in MODEL_CAPABILITY_PROFILES but not in MODEL_CAPABILITY_TIER:\n ${orphaned.join("\n ")}\n\nEither add these to MODEL_CAPABILITY_TIER or remove stale profiles.`, + ); + }); +}); diff --git a/src/resources/extensions/gsd/tests/model-router.test.ts b/src/resources/extensions/gsd/tests/model-router.test.ts index c81242215..d12a71df9 100644 --- a/src/resources/extensions/gsd/tests/model-router.test.ts +++ b/src/resources/extensions/gsd/tests/model-router.test.ts @@ -287,9 +287,9 @@ test("resolveModelForComplexity falls back to tier-only when capability_routing assert.ok(!result.selectionMethod || result.selectionMethod === "tier-only"); }); -test("MODEL_CAPABILITY_PROFILES has entries for core models", () => { +test("MODEL_CAPABILITY_PROFILES has entries for all tier-mapped models", () => { const profiledModels = Object.keys(MODEL_CAPABILITY_PROFILES); - assert.ok(profiledModels.length >= 9, `Expected ≥9 profiles, got ${profiledModels.length}`); + assert.ok(profiledModels.length >= 30, `Expected ≥30 profiles, got ${profiledModels.length}`); assert.ok(MODEL_CAPABILITY_PROFILES["claude-opus-4-6"]); assert.ok(MODEL_CAPABILITY_PROFILES["claude-haiku-4-5"]); });