feat(gsd): complete ADR-004 capability-aware model routing implementation

Close three remaining gaps from ADR-004: 1. Add modelOverrides to GSDPreferences type — removes unsafe type cast in auto-model-selection.ts, enables TypeScript validation for user capability override config. 2. Add profile completeness lint test — two tests in capability-router that fail if MODEL_CAPABILITY_TIER and MODEL_CAPABILITY_PROFILES drift out of sync (catches stale profiles on new model additions). 3. Add capability profiles for all 24 missing tier-mapped models — goes from 9 to 33 profiles, organized by provider. Values reflect each model family's known strengths (o-series high reasoning, nano/spark high speed, codex variants high coding). Closes #2659
2026-04-10 12:10:29 -05:00 · 2026-04-10 12:10:29 -05:00 · f96bc91014
commit f96bc91014
parent 750c5b7aeb
5 changed files with 81 additions and 23 deletions
--- a/src/resources/extensions/gsd/auto-model-selection.ts
+++ b/src/resources/extensions/gsd/auto-model-selection.ts
@ -132,9 +132,7 @@ export async function selectAndApplyModel(
        }

        // Load user capability overrides from preferences (D-17: deep-merged with built-in profiles)
-        const capabilityOverrides = loadCapabilityOverrides(
-          (prefs as { modelOverrides?: Record<string, { capabilities?: Record<string, number> }> } | undefined) ?? {},
-        );
+        const capabilityOverrides = loadCapabilityOverrides(prefs ?? {});

        // Fire before_model_select hook (ADR-004, D-03)
        // Hook can override model selection entirely by returning { modelId }
--- a/src/resources/extensions/gsd/model-router.ts
+++ b/src/resources/extensions/gsd/model-router.ts
@ -58,7 +58,7 @@ export interface ModelCapabilities {
 // Maps known model IDs to their capability tier. Used when tier_models is not
 // explicitly configured to pick the best available model for each tier.

-const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
+export const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
  // Light-tier models (cheapest)
  "claude-haiku-4-5": "light",
  "claude-3-5-haiku-latest": "light",
@ -139,15 +139,49 @@ const MODEL_COST_PER_1K_INPUT: Record<string, number> = {
 // model selection within an eligible tier set.

 export const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
-  "claude-opus-4-6":   { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 },
-  "claude-sonnet-4-6": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 },
-  "claude-haiku-4-5":  { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 },
-  "gpt-4o":            { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 },
-  "gpt-4o-mini":       { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 },
-  "gemini-2.5-pro":    { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 },
-  "gemini-2.0-flash":  { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 },
-  "deepseek-chat":     { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 },
-  "o3":                { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 },
+  // ── Anthropic ──────────────────────────────────────────────────────────────
+  "claude-opus-4-6":              { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 },
+  "claude-sonnet-4-6":            { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 },
+  "claude-sonnet-4-5-20250514":   { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 },
+  "claude-3-5-sonnet-latest":     { coding: 82, debugging: 78, research: 72, reasoning: 78, speed: 62, longContext: 70, instruction: 82 },
+  "claude-haiku-4-5":             { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 },
+  "claude-3-5-haiku-latest":      { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 },
+  "claude-3-haiku-20240307":      { coding: 50, debugging: 40, research: 35, reasoning: 40, speed: 95, longContext: 40, instruction: 65 },
+  "claude-3-opus-latest":         { coding: 90, debugging: 85, research: 82, reasoning: 90, speed: 35, longContext: 75, instruction: 88 },
+
+  // ── OpenAI GPT ─────────────────────────────────────────────────────────────
+  "gpt-4o":                       { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 },
+  "gpt-4o-mini":                  { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 },
+  "gpt-4-turbo":                  { coding: 78, debugging: 72, research: 68, reasoning: 72, speed: 50, longContext: 65, instruction: 78 },
+  "gpt-4.1":                      { coding: 82, debugging: 78, research: 72, reasoning: 78, speed: 62, longContext: 72, instruction: 82 },
+  "gpt-4.1-mini":                 { coding: 58, debugging: 48, research: 42, reasoning: 48, speed: 88, longContext: 48, instruction: 72 },
+  "gpt-4.1-nano":                 { coding: 40, debugging: 30, research: 25, reasoning: 30, speed: 95, longContext: 30, instruction: 60 },
+  "gpt-5":                        { coding: 92, debugging: 88, research: 85, reasoning: 92, speed: 40, longContext: 85, instruction: 90 },
+  "gpt-5-mini":                   { coding: 62, debugging: 52, research: 48, reasoning: 52, speed: 88, longContext: 52, instruction: 74 },
+  "gpt-5-nano":                   { coding: 42, debugging: 32, research: 28, reasoning: 32, speed: 95, longContext: 32, instruction: 62 },
+  "gpt-5-pro":                    { coding: 94, debugging: 90, research: 88, reasoning: 94, speed: 35, longContext: 88, instruction: 92 },
+  "gpt-5.1":                      { coding: 93, debugging: 89, research: 86, reasoning: 93, speed: 42, longContext: 86, instruction: 91 },
+  "gpt-5.1-codex-max":            { coding: 90, debugging: 85, research: 70, reasoning: 85, speed: 55, longContext: 75, instruction: 85 },
+  "gpt-5.1-codex-mini":           { coding: 65, debugging: 55, research: 40, reasoning: 50, speed: 88, longContext: 48, instruction: 72 },
+  "gpt-5.2":                      { coding: 93, debugging: 90, research: 87, reasoning: 93, speed: 42, longContext: 87, instruction: 91 },
+  "gpt-5.2-codex":                { coding: 93, debugging: 90, research: 72, reasoning: 88, speed: 50, longContext: 78, instruction: 88 },
+  "gpt-5.3-codex":                { coding: 94, debugging: 91, research: 74, reasoning: 89, speed: 50, longContext: 80, instruction: 89 },
+  "gpt-5.3-codex-spark":          { coding: 68, debugging: 58, research: 42, reasoning: 52, speed: 90, longContext: 50, instruction: 74 },
+  "gpt-5.4":                      { coding: 95, debugging: 92, research: 88, reasoning: 94, speed: 42, longContext: 88, instruction: 92 },
+
+  // ── OpenAI o-series (reasoning-first) ──────────────────────────────────────
+  "o1":                           { coding: 78, debugging: 82, research: 78, reasoning: 90, speed: 20, longContext: 65, instruction: 82 },
+  "o3":                           { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 },
+  "o4-mini":                      { coding: 75, debugging: 80, research: 72, reasoning: 88, speed: 60, longContext: 65, instruction: 80 },
+  "o4-mini-deep-research":        { coding: 75, debugging: 80, research: 85, reasoning: 88, speed: 30, longContext: 80, instruction: 80 },
+
+  // ── Google ─────────────────────────────────────────────────────────────────
+  "gemini-2.5-pro":               { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 },
+  "gemini-2.0-flash":             { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 },
+  "gemini-flash-2.0":             { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 },
+
+  // ── DeepSeek ───────────────────────────────────────────────────────────────
+  "deepseek-chat":                { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 },
 };

 // ─── Base Task Requirements Data Table ───────────────────────────────────────
--- a/src/resources/extensions/gsd/preferences-types.ts
+++ b/src/resources/extensions/gsd/preferences-types.ts
@ -20,7 +20,7 @@ import type {
  ReactiveExecutionConfig,
  GateEvaluationConfig,
 } from "./types.js";
-import type { DynamicRoutingConfig } from "./model-router.js";
+import type { DynamicRoutingConfig, ModelCapabilities } from "./model-router.js";

 export interface ContextManagementConfig {
  observation_masking?: boolean;          // default: true
@ -255,6 +255,8 @@ export interface GSDPreferences {
  post_unit_hooks?: PostUnitHookConfig[];
  pre_dispatch_hooks?: PreDispatchHookConfig[];
  dynamic_routing?: DynamicRoutingConfig;
+  /** Per-model capability overrides. Deep-merged with built-in profiles for capability-aware routing (ADR-004). */
+  modelOverrides?: Record<string, { capabilities?: Partial<ModelCapabilities> }>;
  context_management?: ContextManagementConfig;
  token_profile?: TokenProfile;
  phases?: PhaseSkipPreferences;
--- a/src/resources/extensions/gsd/tests/capability-router.test.ts
+++ b/src/resources/extensions/gsd/tests/capability-router.test.ts
@ -11,6 +11,7 @@ import {
  getEligibleModels,
  resolveModelForComplexity,
  MODEL_CAPABILITY_PROFILES,
+  MODEL_CAPABILITY_TIER,
  BASE_REQUIREMENTS,
  defaultRoutingConfig,
 } from "../model-router.js";
@ -125,13 +126,9 @@ describe("computeTaskRequirements", () => {
 // ─── MODEL_CAPABILITY_PROFILES ───────────────────────────────────────────────

 describe("MODEL_CAPABILITY_PROFILES", () => {
-  test("contains all 9 required models", () => {
-    const required = [
-      "claude-opus-4-6", "claude-sonnet-4-6", "claude-haiku-4-5",
-      "gpt-4o", "gpt-4o-mini", "gemini-2.5-pro", "gemini-2.0-flash",
-      "deepseek-chat", "o3",
-    ];
-    for (const model of required) {
+  test("contains profiles for all tier-mapped models", () => {
+    const tierModels = Object.keys(MODEL_CAPABILITY_TIER);
+    for (const model of tierModels) {
      assert.ok(MODEL_CAPABILITY_PROFILES[model], `Missing profile for ${model}`);
    }
  });
@ -345,3 +342,30 @@ describe("RoutingDecision.selectionMethod", () => {
    assert.equal(result.selectionMethod, "tier-only");
  });
 });
+
+// ─── ADR-004: Profile Completeness Lint ─────────────────────────────────────
+// Every model in MODEL_CAPABILITY_TIER must have an entry in
+// MODEL_CAPABILITY_PROFILES. This prevents profile staleness as new models
+// are added to the tier map without corresponding capability data.
+
+describe("profile completeness (ADR-004 lint)", () => {
+  test("every model in MODEL_CAPABILITY_TIER has a MODEL_CAPABILITY_PROFILES entry", () => {
+    const tierModels = Object.keys(MODEL_CAPABILITY_TIER);
+    const missing = tierModels.filter(id => !MODEL_CAPABILITY_PROFILES[id]);
+    assert.equal(
+      missing.length,
+      0,
+      `Models in MODEL_CAPABILITY_TIER but missing from MODEL_CAPABILITY_PROFILES:\n  ${missing.join("\n  ")}\n\nAdd capability profiles for these models in model-router.ts.`,
+    );
+  });
+
+  test("MODEL_CAPABILITY_PROFILES does not contain models absent from MODEL_CAPABILITY_TIER", () => {
+    const profileModels = Object.keys(MODEL_CAPABILITY_PROFILES);
+    const orphaned = profileModels.filter(id => !MODEL_CAPABILITY_TIER[id]);
+    assert.equal(
+      orphaned.length,
+      0,
+      `Models in MODEL_CAPABILITY_PROFILES but not in MODEL_CAPABILITY_TIER:\n  ${orphaned.join("\n  ")}\n\nEither add these to MODEL_CAPABILITY_TIER or remove stale profiles.`,
+    );
+  });
+});
--- a/src/resources/extensions/gsd/tests/model-router.test.ts
+++ b/src/resources/extensions/gsd/tests/model-router.test.ts
@ -287,9 +287,9 @@ test("resolveModelForComplexity falls back to tier-only when capability_routing
  assert.ok(!result.selectionMethod || result.selectionMethod === "tier-only");
 });

-test("MODEL_CAPABILITY_PROFILES has entries for core models", () => {
+test("MODEL_CAPABILITY_PROFILES has entries for all tier-mapped models", () => {
  const profiledModels = Object.keys(MODEL_CAPABILITY_PROFILES);
-  assert.ok(profiledModels.length >= 9, `Expected ≥9 profiles, got ${profiledModels.length}`);
+  assert.ok(profiledModels.length >= 30, `Expected ≥30 profiles, got ${profiledModels.length}`);
  assert.ok(MODEL_CAPABILITY_PROFILES["claude-opus-4-6"]);
  assert.ok(MODEL_CAPABILITY_PROFILES["claude-haiku-4-5"]);
 });