From 6dc7c0ec1da76c731894127d3040acb0befeebe6 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Thu, 26 Mar 2026 17:28:50 -0500 Subject: [PATCH] test(01-05): add capability-aware routing integration tests - Full pipeline with capability_routing: true returns capability-scored decision - capability_routing: false falls back to tier-only with no capabilityScores - Single eligible model (pinned) skips scoring and uses tier-only - Unknown model gets uniform score of 50 and competes in scoring - capabilityOverrides change scoring outcome in scoreEligibleModels - capabilityOverrides pass through resolveModelForComplexity to STEP 2 - Regression guards: routing disabled, unknown model, no-downgrade-needed all pass - All 51 tests pass (42 existing + 9 new integration) --- .../extensions/gsd/tests/model-router.test.ts | 158 ++++++++++++++++++ 1 file changed, 158 insertions(+) diff --git a/src/resources/extensions/gsd/tests/model-router.test.ts b/src/resources/extensions/gsd/tests/model-router.test.ts index 329145e7d..c26171e3f 100644 --- a/src/resources/extensions/gsd/tests/model-router.test.ts +++ b/src/resources/extensions/gsd/tests/model-router.test.ts @@ -565,6 +565,164 @@ describe("getEligibleModels", () => { }); }); +// ─── capability-aware routing integration ──────────────────────────────────── + +describe("capability-aware routing integration", () => { + // All standard-tier models available alongside heavy (opus) + const MULTI_MODEL_AVAILABLE = [ + "claude-opus-4-6", + "claude-sonnet-4-6", + "gpt-4o", + "gemini-2.5-pro", + "claude-haiku-4-5", + "gpt-4o-mini", + ]; + + // 1. Full pipeline with capability scoring active + test("full pipeline with capability_routing: true returns capability-scored decision", () => { + const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true }; + // Configured primary is opus (heavy) — standard tier should trigger capability scoring + const result = resolveModelForComplexity( + { tier: "standard", reason: "test", downgraded: false }, + { primary: "claude-opus-4-6", fallbacks: [] }, + config, + MULTI_MODEL_AVAILABLE, + "execute-task", + { tags: [], complexityKeywords: [], fileCount: 3, estimatedLines: 100, codeBlockCount: 0 }, + ); + assert.equal(result.selectionMethod, "capability-scored", "should use capability scoring when enabled with multiple eligible models"); + assert.ok(result.capabilityScores !== undefined, "capabilityScores should be populated"); + assert.ok(Object.keys(result.capabilityScores!).length > 1, "should have scores for multiple models"); + assert.equal(result.wasDowngraded, true, "should be downgraded from opus"); + }); + + // 2. capability_routing: false falls back to tier-only + test("capability_routing: false skips scoring and uses tier-only", () => { + const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: false }; + const result = resolveModelForComplexity( + { tier: "standard", reason: "test", downgraded: false }, + { primary: "claude-opus-4-6", fallbacks: [] }, + config, + MULTI_MODEL_AVAILABLE, + "execute-task", + undefined, + ); + assert.equal(result.selectionMethod, "tier-only", "capability_routing: false should use tier-only"); + assert.equal(result.capabilityScores, undefined, "capabilityScores should be undefined for tier-only"); + }); + + // 3. Single eligible model skips scoring + test("single eligible model skips capability scoring and uses tier-only", () => { + const config: DynamicRoutingConfig = { + ...defaultRoutingConfig(), + enabled: true, + capability_routing: true, + tier_models: { standard: "claude-sonnet-4-6" }, + }; + // Pin to single standard model — eligible.length === 1 → skips STEP 2 + const result = resolveModelForComplexity( + { tier: "standard", reason: "test", downgraded: false }, + { primary: "claude-opus-4-6", fallbacks: [] }, + config, + MULTI_MODEL_AVAILABLE, + "execute-task", + undefined, + ); + // Single pinned model → tier-only (no scoring needed) + assert.equal(result.selectionMethod, "tier-only", "single eligible model should use tier-only"); + assert.equal(result.modelId, "claude-sonnet-4-6", "should use the pinned model"); + }); + + // 4. Unknown model with no profile gets uniform 50s and competes + test("unknown model with no profile gets uniform score of 50 and can compete", () => { + const unknownModel = "unknown-future-model-xyz"; + const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true }; + // Add unknown model to available list at standard tier (unknown → standard per D-15) + // scoring should still work with score=50 for the unknown model + const requirements = { coding: 0.9, instruction: 0.7, speed: 0.3 }; + const scored = scoreEligibleModels([unknownModel, "claude-sonnet-4-6"], requirements); + const unknownEntry = scored.find(s => s.modelId === unknownModel); + assert.ok(unknownEntry !== undefined, "unknown model should be in scored results"); + // Unknown model gets uniform 50s: (0.9*50 + 0.7*50 + 0.3*50) / (0.9+0.7+0.3) ≈ 50 + assert.ok(Math.abs(unknownEntry!.score - 50) < 0.01, `expected score ~50, got ${unknownEntry!.score}`); + }); + + // 5. Capability overrides change scoring outcome + test("capabilityOverrides boost a model above another for same task", () => { + // sonnet: coding=85, gpt-4o: coding=80. Override gpt-4o coding to 99 → gpt-4o should win. + const requirements = { coding: 1.0 }; + const overrides = { "gpt-4o": { coding: 99 } }; + const scored = scoreEligibleModels(["claude-sonnet-4-6", "gpt-4o"], requirements, overrides); + assert.equal(scored[0].modelId, "gpt-4o", "overridden model should win for coding-heavy task"); + assert.ok(scored[0].score > 90, `expected score > 90 after override, got ${scored[0].score}`); + }); + + // 5b. Capability overrides pass through resolveModelForComplexity to scoreEligibleModels + test("resolveModelForComplexity passes capabilityOverrides to scoring step", () => { + const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true }; + // sonnet coding=85, gpt-4o coding=80. Override gpt-4o coding to 99 → gpt-4o should win. + const overrides: Record> = { "gpt-4o": { coding: 99 } }; + const result = resolveModelForComplexity( + { tier: "standard", reason: "test", downgraded: false }, + { primary: "claude-opus-4-6", fallbacks: [] }, + config, + ["claude-opus-4-6", "claude-sonnet-4-6", "gpt-4o"], + "execute-task", + undefined, + overrides, + ); + assert.equal(result.selectionMethod, "capability-scored"); + assert.equal(result.modelId, "gpt-4o", "gpt-4o should win with coding override"); + }); + + // 6. Regression: existing routing guards unchanged + test("regression: routing-disabled passthrough still returns tier-only", () => { + const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: false }; + const result = resolveModelForComplexity( + { tier: "light", reason: "test", downgraded: false }, + { primary: "claude-opus-4-6", fallbacks: [] }, + config, + MULTI_MODEL_AVAILABLE, + "execute-task", + undefined, + ); + assert.equal(result.selectionMethod, "tier-only"); + assert.equal(result.wasDowngraded, false); + assert.equal(result.modelId, "claude-opus-4-6"); + }); + + test("regression: unknown-model bypass returns tier-only and does not downgrade", () => { + const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true }; + const result = resolveModelForComplexity( + { tier: "light", reason: "test", downgraded: false }, + { primary: "totally-unknown-custom-model", fallbacks: [] }, + config, + ["totally-unknown-custom-model", ...MULTI_MODEL_AVAILABLE], + "execute-task", + undefined, + ); + assert.equal(result.selectionMethod, "tier-only"); + assert.equal(result.wasDowngraded, false); + assert.equal(result.modelId, "totally-unknown-custom-model"); + }); + + test("regression: no-downgrade-needed path returns tier-only", () => { + const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true }; + // Configured model is sonnet (standard), requesting standard → no downgrade needed + const result = resolveModelForComplexity( + { tier: "standard", reason: "test", downgraded: false }, + { primary: "claude-sonnet-4-6", fallbacks: [] }, + config, + MULTI_MODEL_AVAILABLE, + "execute-task", + undefined, + ); + assert.equal(result.selectionMethod, "tier-only"); + assert.equal(result.wasDowngraded, false); + assert.equal(result.modelId, "claude-sonnet-4-6"); + }); +}); + // ─── getModelTier unknown default ──────────────────────────────────────────── describe("getModelTier unknown default", () => {