feat(01-01): add capability types, data tables, and scoring functions to model-router
- Import TaskMetadata from complexity-classifier - Add capability_routing?: boolean to DynamicRoutingConfig - Add capabilityScores, taskRequirements, selectionMethod fields to RoutingDecision - Add ModelCapabilities interface (7 dimensions: coding, debugging, research, reasoning, speed, longContext, instruction) - Add MODEL_CAPABILITY_PROFILES data table with 9 model profiles - Add BASE_REQUIREMENTS data table with 11 unit type vectors - Add exported scoreModel() pure function (weighted average, 0-100 range) - Add exported computeTaskRequirements() with metadata-driven vector refinement - Add exported scoreEligibleModels() with cost-preferring tie-break sorting - Add exported getEligibleModels() extracted from findModelForTier() logic - Add selectionMethod: "tier-only" to all 5 return sites in resolveModelForComplexity() - Change getModelTier() unknown default from "heavy" to "standard" (per D-15) - Add capability_routing: true to defaultRoutingConfig()
This commit is contained in:
parent
e89bf7d18e
commit
0ccd3fd8a4
1 changed files with 122 additions and 49 deletions
|
|
@ -2,7 +2,7 @@
|
|||
// Maps complexity tiers to models, enforcing downgrade-only semantics.
|
||||
// The user's configured model is always the ceiling.
|
||||
|
||||
import type { ComplexityTier, ClassificationResult } from "./complexity-classifier.js";
|
||||
import type { ComplexityTier, ClassificationResult, TaskMetadata } from "./complexity-classifier.js";
|
||||
import { tierOrdinal } from "./complexity-classifier.js";
|
||||
import type { ResolvedModelConfig } from "./preferences.js";
|
||||
|
||||
|
|
@ -33,14 +33,27 @@ export interface RoutingDecision {
|
|||
wasDowngraded: boolean;
|
||||
/** Human-readable reason for this decision */
|
||||
reason: string;
|
||||
/** How the model was selected. */
|
||||
selectionMethod?: "tier-only" | "capability-scored";
|
||||
/** Capability scores per model (when capability-scored). */
|
||||
/** How the model was selected */
|
||||
selectionMethod: "tier-only" | "capability-scored";
|
||||
/** Capability scores per eligible model (capability-scored path only) */
|
||||
capabilityScores?: Record<string, number>;
|
||||
/** Task requirement vector (when capability-scored). */
|
||||
/** Task requirement vector used for scoring */
|
||||
taskRequirements?: Partial<Record<string, number>>;
|
||||
}
|
||||
|
||||
// ─── Capability Profiles ─────────────────────────────────────────────────────
|
||||
|
||||
/** Seven-dimension capability profile for a model. All values in 0–100 range. */
|
||||
export interface ModelCapabilities {
|
||||
coding: number;
|
||||
debugging: number;
|
||||
research: number;
|
||||
reasoning: number;
|
||||
speed: number;
|
||||
longContext: number;
|
||||
instruction: number;
|
||||
}
|
||||
|
||||
// ─── Known Model Tiers ───────────────────────────────────────────────────────
|
||||
// Maps known model IDs to their capability tier. Used when tier_models is not
|
||||
// explicitly configured to pick the best available model for each tier.
|
||||
|
|
@ -121,33 +134,27 @@ const MODEL_COST_PER_1K_INPUT: Record<string, number> = {
|
|||
"deepseek-chat": 0.00014,
|
||||
};
|
||||
|
||||
// ─── Capability Profiles (ADR-004 Phase 2) ──────────────────────────────────
|
||||
// 7-dimension profiles, 0–100 normalized. Models without a profile
|
||||
// score 50 uniformly — capability scoring is a no-op for them.
|
||||
|
||||
export interface ModelCapabilities {
|
||||
coding: number;
|
||||
debugging: number;
|
||||
research: number;
|
||||
reasoning: number;
|
||||
speed: number;
|
||||
longContext: number;
|
||||
instruction: number;
|
||||
}
|
||||
// ─── Capability Profiles Data Table ──────────────────────────────────────────
|
||||
// Per-model capability profiles (0–100 scale). Used for capability-aware
|
||||
// model selection within an eligible tier set.
|
||||
|
||||
export const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
|
||||
"claude-opus-4-6": { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 },
|
||||
"claude-sonnet-4-6": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 },
|
||||
"claude-haiku-4-5": { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 },
|
||||
"gpt-4o": { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 },
|
||||
"gpt-4o-mini": { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 },
|
||||
"gemini-2.5-pro": { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 },
|
||||
"gemini-2.0-flash": { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 },
|
||||
"deepseek-chat": { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 },
|
||||
"o3": { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 },
|
||||
"claude-opus-4-6": { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 },
|
||||
"claude-sonnet-4-6": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 },
|
||||
"claude-haiku-4-5": { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 },
|
||||
"gpt-4o": { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 },
|
||||
"gpt-4o-mini": { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 },
|
||||
"gemini-2.5-pro": { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 },
|
||||
"gemini-2.0-flash": { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 },
|
||||
"deepseek-chat": { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 },
|
||||
"o3": { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 },
|
||||
};
|
||||
|
||||
const BASE_REQUIREMENTS: Record<string, Partial<Record<keyof ModelCapabilities, number>>> = {
|
||||
// ─── Base Task Requirements Data Table ───────────────────────────────────────
|
||||
// Per-unit-type base requirement vectors. Weights indicate how important each
|
||||
// capability dimension is for this unit type.
|
||||
|
||||
export const BASE_REQUIREMENTS: Record<string, Partial<Record<keyof ModelCapabilities, number>>> = {
|
||||
"execute-task": { coding: 0.9, instruction: 0.7, speed: 0.3 },
|
||||
"research-milestone": { research: 0.9, longContext: 0.7, reasoning: 0.5 },
|
||||
"research-slice": { research: 0.9, longContext: 0.7, reasoning: 0.5 },
|
||||
|
|
@ -161,15 +168,36 @@ const BASE_REQUIREMENTS: Record<string, Partial<Record<keyof ModelCapabilities,
|
|||
"complete-milestone": { instruction: 0.8, reasoning: 0.5 },
|
||||
};
|
||||
|
||||
// ─── Public API ──────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Compute a task requirement vector from unit type and optional metadata.
|
||||
* Score a model's suitability for a task given a requirement vector.
|
||||
* Returns a weighted average of capability dimensions (0–100).
|
||||
* Returns 50 if requirements are empty (neutral score).
|
||||
*/
|
||||
export function scoreModel(
|
||||
model: ModelCapabilities,
|
||||
requirements: Partial<Record<keyof ModelCapabilities, number>>,
|
||||
): number {
|
||||
let weightedSum = 0;
|
||||
let weightSum = 0;
|
||||
for (const [dim, weight] of Object.entries(requirements)) {
|
||||
const capability = model[dim as keyof ModelCapabilities] ?? 50;
|
||||
weightedSum += weight * capability;
|
||||
weightSum += weight;
|
||||
}
|
||||
return weightSum > 0 ? weightedSum / weightSum : 50;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute dynamic task requirements from unit type and optional task metadata.
|
||||
* Returns a requirement vector refined by task-specific signals.
|
||||
*/
|
||||
export function computeTaskRequirements(
|
||||
unitType: string,
|
||||
metadata?: { tags?: string[]; complexityKeywords?: string[]; fileCount?: number; estimatedLines?: number },
|
||||
metadata?: TaskMetadata,
|
||||
): Partial<Record<keyof ModelCapabilities, number>> {
|
||||
const base = { ...(BASE_REQUIREMENTS[unitType] ?? { reasoning: 0.5 }) };
|
||||
|
||||
const base = BASE_REQUIREMENTS[unitType] ?? { reasoning: 0.5 };
|
||||
if (unitType === "execute-task" && metadata) {
|
||||
if (metadata.tags?.some(t => /^(docs?|readme|comment|config|typo|rename)$/i.test(t))) {
|
||||
return { ...base, instruction: 0.9, coding: 0.3, speed: 0.7 };
|
||||
|
|
@ -184,29 +212,71 @@ export function computeTaskRequirements(
|
|||
return { ...base, coding: 0.9, reasoning: 0.7 };
|
||||
}
|
||||
}
|
||||
|
||||
return base;
|
||||
}
|
||||
|
||||
/**
|
||||
* Score a model against a task requirement vector.
|
||||
* Returns weighted average in range 0–100. Returns 50 for empty requirements.
|
||||
* Score all eligible models against a requirement vector and return them
|
||||
* sorted by score descending. Within 2 points: prefer cheaper; equal cost:
|
||||
* lexicographic tie-break by model ID.
|
||||
*/
|
||||
export function scoreModel(
|
||||
capabilities: ModelCapabilities,
|
||||
export function scoreEligibleModels(
|
||||
eligibleModelIds: string[],
|
||||
requirements: Partial<Record<keyof ModelCapabilities, number>>,
|
||||
): number {
|
||||
let weightedSum = 0;
|
||||
let weightSum = 0;
|
||||
for (const [dim, weight] of Object.entries(requirements)) {
|
||||
const capability = capabilities[dim as keyof ModelCapabilities] ?? 50;
|
||||
weightedSum += weight * capability;
|
||||
weightSum += weight;
|
||||
}
|
||||
return weightSum > 0 ? weightedSum / weightSum : 50;
|
||||
capabilityOverrides?: Record<string, Partial<ModelCapabilities>>,
|
||||
): Array<{ modelId: string; score: number }> {
|
||||
const scored = eligibleModelIds.map(modelId => {
|
||||
const builtin = MODEL_CAPABILITY_PROFILES[modelId];
|
||||
const override = capabilityOverrides?.[modelId];
|
||||
const profile: ModelCapabilities = builtin
|
||||
? override ? { ...builtin, ...override } : builtin
|
||||
: { coding: 50, debugging: 50, research: 50, reasoning: 50, speed: 50, longContext: 50, instruction: 50 };
|
||||
return { modelId, score: scoreModel(profile, requirements) };
|
||||
});
|
||||
scored.sort((a, b) => {
|
||||
const scoreDiff = b.score - a.score;
|
||||
if (Math.abs(scoreDiff) > 2) return scoreDiff;
|
||||
const costA = MODEL_COST_PER_1K_INPUT[a.modelId] ?? Infinity;
|
||||
const costB = MODEL_COST_PER_1K_INPUT[b.modelId] ?? Infinity;
|
||||
if (costA !== costB) return costA - costB;
|
||||
return a.modelId.localeCompare(b.modelId);
|
||||
});
|
||||
return scored;
|
||||
}
|
||||
|
||||
// ─── Public API ──────────────────────────────────────────────────────────────
|
||||
/**
|
||||
* Return all models eligible for a given tier, sorted cheapest first.
|
||||
* If routingConfig.tier_models[tier] is set and available, returns only that
|
||||
* model. Otherwise filters availableModelIds by tier from MODEL_CAPABILITY_TIER.
|
||||
*/
|
||||
export function getEligibleModels(
|
||||
tier: ComplexityTier,
|
||||
availableModelIds: string[],
|
||||
routingConfig: DynamicRoutingConfig,
|
||||
): string[] {
|
||||
// 1. Check explicit tier_models config
|
||||
const explicitModel = routingConfig.tier_models?.[tier];
|
||||
if (explicitModel) {
|
||||
// Exact match
|
||||
if (availableModelIds.includes(explicitModel)) return [explicitModel];
|
||||
// Provider-prefix-stripped match
|
||||
const match = availableModelIds.find(id => {
|
||||
const bareAvail = id.includes("/") ? id.split("/").pop()! : id;
|
||||
const bareExplicit = explicitModel.includes("/") ? explicitModel.split("/").pop()! : explicitModel;
|
||||
return bareAvail === bareExplicit;
|
||||
});
|
||||
if (match) return [match];
|
||||
}
|
||||
|
||||
// 2. Auto-detect: filter by tier, sort cheapest first
|
||||
return availableModelIds
|
||||
.filter(id => getModelTier(id) === tier)
|
||||
.sort((a, b) => {
|
||||
const costA = getModelCost(a);
|
||||
const costB = getModelCost(b);
|
||||
return costA - costB;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the model to use for a given complexity tier.
|
||||
|
|
@ -235,6 +305,7 @@ export function resolveModelForComplexity(
|
|||
tier: classification.tier,
|
||||
wasDowngraded: false,
|
||||
reason: "dynamic routing disabled or no phase config",
|
||||
selectionMethod: "tier-only",
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -254,6 +325,7 @@ export function resolveModelForComplexity(
|
|||
tier: requestedTier,
|
||||
wasDowngraded: false,
|
||||
reason: `configured model "${configuredPrimary}" is not in the known tier map — honoring explicit config`,
|
||||
selectionMethod: "tier-only",
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -265,6 +337,7 @@ export function resolveModelForComplexity(
|
|||
tier: requestedTier,
|
||||
wasDowngraded: false,
|
||||
reason: `tier ${requestedTier} >= configured ${configuredTier}`,
|
||||
selectionMethod: "tier-only",
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -360,8 +433,8 @@ function getModelTier(modelId: string): ComplexityTier {
|
|||
if (bareId.includes(knownId) || knownId.includes(bareId)) return tier;
|
||||
}
|
||||
|
||||
// Unknown models are assumed heavy (safest assumption)
|
||||
return "heavy";
|
||||
// Unknown models are assumed standard (per D-15: avoids silently ignoring user config)
|
||||
return "standard";
|
||||
}
|
||||
|
||||
/** Check if a model ID has a known capability tier mapping. (#2192) */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue