test(01-05): add capability-aware routing integration tests

- Full pipeline with capability_routing: true returns capability-scored decision - capability_routing: false falls back to tier-only with no capabilityScores - Single eligible model (pinned) skips scoring and uses tier-only - Unknown model gets uniform score of 50 and competes in scoring - capabilityOverrides change scoring outcome in scoreEligibleModels - capabilityOverrides pass through resolveModelForComplexity to STEP 2 - Regression guards: routing disabled, unknown model, no-downgrade-needed all pass - All 51 tests pass (42 existing + 9 new integration)
2026-03-26 17:28:50 -05:00 · 2026-03-26 17:28:50 -05:00 · 6dc7c0ec1d
commit 6dc7c0ec1d
parent 1645be072c
1 changed files with 158 additions and 0 deletions
--- a/src/resources/extensions/gsd/tests/model-router.test.ts
+++ b/src/resources/extensions/gsd/tests/model-router.test.ts
@ -565,6 +565,164 @@ describe("getEligibleModels", () => {
  });
 });

+// ─── capability-aware routing integration ────────────────────────────────────
+
+describe("capability-aware routing integration", () => {
+  // All standard-tier models available alongside heavy (opus)
+  const MULTI_MODEL_AVAILABLE = [
+    "claude-opus-4-6",
+    "claude-sonnet-4-6",
+    "gpt-4o",
+    "gemini-2.5-pro",
+    "claude-haiku-4-5",
+    "gpt-4o-mini",
+  ];
+
+  // 1. Full pipeline with capability scoring active
+  test("full pipeline with capability_routing: true returns capability-scored decision", () => {
+    const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true };
+    // Configured primary is opus (heavy) — standard tier should trigger capability scoring
+    const result = resolveModelForComplexity(
+      { tier: "standard", reason: "test", downgraded: false },
+      { primary: "claude-opus-4-6", fallbacks: [] },
+      config,
+      MULTI_MODEL_AVAILABLE,
+      "execute-task",
+      { tags: [], complexityKeywords: [], fileCount: 3, estimatedLines: 100, codeBlockCount: 0 },
+    );
+    assert.equal(result.selectionMethod, "capability-scored", "should use capability scoring when enabled with multiple eligible models");
+    assert.ok(result.capabilityScores !== undefined, "capabilityScores should be populated");
+    assert.ok(Object.keys(result.capabilityScores!).length > 1, "should have scores for multiple models");
+    assert.equal(result.wasDowngraded, true, "should be downgraded from opus");
+  });
+
+  // 2. capability_routing: false falls back to tier-only
+  test("capability_routing: false skips scoring and uses tier-only", () => {
+    const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: false };
+    const result = resolveModelForComplexity(
+      { tier: "standard", reason: "test", downgraded: false },
+      { primary: "claude-opus-4-6", fallbacks: [] },
+      config,
+      MULTI_MODEL_AVAILABLE,
+      "execute-task",
+      undefined,
+    );
+    assert.equal(result.selectionMethod, "tier-only", "capability_routing: false should use tier-only");
+    assert.equal(result.capabilityScores, undefined, "capabilityScores should be undefined for tier-only");
+  });
+
+  // 3. Single eligible model skips scoring
+  test("single eligible model skips capability scoring and uses tier-only", () => {
+    const config: DynamicRoutingConfig = {
+      ...defaultRoutingConfig(),
+      enabled: true,
+      capability_routing: true,
+      tier_models: { standard: "claude-sonnet-4-6" },
+    };
+    // Pin to single standard model — eligible.length === 1 → skips STEP 2
+    const result = resolveModelForComplexity(
+      { tier: "standard", reason: "test", downgraded: false },
+      { primary: "claude-opus-4-6", fallbacks: [] },
+      config,
+      MULTI_MODEL_AVAILABLE,
+      "execute-task",
+      undefined,
+    );
+    // Single pinned model → tier-only (no scoring needed)
+    assert.equal(result.selectionMethod, "tier-only", "single eligible model should use tier-only");
+    assert.equal(result.modelId, "claude-sonnet-4-6", "should use the pinned model");
+  });
+
+  // 4. Unknown model with no profile gets uniform 50s and competes
+  test("unknown model with no profile gets uniform score of 50 and can compete", () => {
+    const unknownModel = "unknown-future-model-xyz";
+    const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true };
+    // Add unknown model to available list at standard tier (unknown → standard per D-15)
+    // scoring should still work with score=50 for the unknown model
+    const requirements = { coding: 0.9, instruction: 0.7, speed: 0.3 };
+    const scored = scoreEligibleModels([unknownModel, "claude-sonnet-4-6"], requirements);
+    const unknownEntry = scored.find(s => s.modelId === unknownModel);
+    assert.ok(unknownEntry !== undefined, "unknown model should be in scored results");
+    // Unknown model gets uniform 50s: (0.9*50 + 0.7*50 + 0.3*50) / (0.9+0.7+0.3) ≈ 50
+    assert.ok(Math.abs(unknownEntry!.score - 50) < 0.01, `expected score ~50, got ${unknownEntry!.score}`);
+  });
+
+  // 5. Capability overrides change scoring outcome
+  test("capabilityOverrides boost a model above another for same task", () => {
+    // sonnet: coding=85, gpt-4o: coding=80. Override gpt-4o coding to 99 → gpt-4o should win.
+    const requirements = { coding: 1.0 };
+    const overrides = { "gpt-4o": { coding: 99 } };
+    const scored = scoreEligibleModels(["claude-sonnet-4-6", "gpt-4o"], requirements, overrides);
+    assert.equal(scored[0].modelId, "gpt-4o", "overridden model should win for coding-heavy task");
+    assert.ok(scored[0].score > 90, `expected score > 90 after override, got ${scored[0].score}`);
+  });
+
+  // 5b. Capability overrides pass through resolveModelForComplexity to scoreEligibleModels
+  test("resolveModelForComplexity passes capabilityOverrides to scoring step", () => {
+    const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true };
+    // sonnet coding=85, gpt-4o coding=80. Override gpt-4o coding to 99 → gpt-4o should win.
+    const overrides: Record<string, Partial<ModelCapabilities>> = { "gpt-4o": { coding: 99 } };
+    const result = resolveModelForComplexity(
+      { tier: "standard", reason: "test", downgraded: false },
+      { primary: "claude-opus-4-6", fallbacks: [] },
+      config,
+      ["claude-opus-4-6", "claude-sonnet-4-6", "gpt-4o"],
+      "execute-task",
+      undefined,
+      overrides,
+    );
+    assert.equal(result.selectionMethod, "capability-scored");
+    assert.equal(result.modelId, "gpt-4o", "gpt-4o should win with coding override");
+  });
+
+  // 6. Regression: existing routing guards unchanged
+  test("regression: routing-disabled passthrough still returns tier-only", () => {
+    const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: false };
+    const result = resolveModelForComplexity(
+      { tier: "light", reason: "test", downgraded: false },
+      { primary: "claude-opus-4-6", fallbacks: [] },
+      config,
+      MULTI_MODEL_AVAILABLE,
+      "execute-task",
+      undefined,
+    );
+    assert.equal(result.selectionMethod, "tier-only");
+    assert.equal(result.wasDowngraded, false);
+    assert.equal(result.modelId, "claude-opus-4-6");
+  });
+
+  test("regression: unknown-model bypass returns tier-only and does not downgrade", () => {
+    const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true };
+    const result = resolveModelForComplexity(
+      { tier: "light", reason: "test", downgraded: false },
+      { primary: "totally-unknown-custom-model", fallbacks: [] },
+      config,
+      ["totally-unknown-custom-model", ...MULTI_MODEL_AVAILABLE],
+      "execute-task",
+      undefined,
+    );
+    assert.equal(result.selectionMethod, "tier-only");
+    assert.equal(result.wasDowngraded, false);
+    assert.equal(result.modelId, "totally-unknown-custom-model");
+  });
+
+  test("regression: no-downgrade-needed path returns tier-only", () => {
+    const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true };
+    // Configured model is sonnet (standard), requesting standard → no downgrade needed
+    const result = resolveModelForComplexity(
+      { tier: "standard", reason: "test", downgraded: false },
+      { primary: "claude-sonnet-4-6", fallbacks: [] },
+      config,
+      MULTI_MODEL_AVAILABLE,
+      "execute-task",
+      undefined,
+    );
+    assert.equal(result.selectionMethod, "tier-only");
+    assert.equal(result.wasDowngraded, false);
+    assert.equal(result.modelId, "claude-sonnet-4-6");
+  });
+});
+
 // ─── getModelTier unknown default ────────────────────────────────────────────

 describe("getModelTier unknown default", () => {