feat(headless,gemini-cli): add sf headless usage + unify gemini quota path
Adds a machine-readable headless surface for live LLM-provider usage and
unifies the gemini-cli quota fetch through one helper, removing the
duplication that existed between usage-bar.js and the new package.
1. snapshotGeminiCliAccount in @singularity-forge/google-gemini-cli-provider
- Single source of truth for { projectId, userTierId, userTierName,
paidTier, models[] } via setupUser + retrieveUserQuota.
- Dedups buckets per modelId, keeping the worst (lowest remainingFraction)
so consumers always see the most-restrictive window. Code Assist
sometimes returns multiple buckets per model; the pessimistic choice
is what every consumer needs.
- discoverGeminiCliModels(cwd?) wraps it for catalog-cache callers that
only need the IDs.
2. sf headless usage subcommand
- New src/headless-usage.ts handler. text (default) and --json output.
Uses the package's snapshot directly — no RPC child, no jiti
gymnastics — matching the shape of headless-uok-status / headless-doctor.
- Wired into src/headless.ts after the doctor block.
- Help text adds the command line.
3. usage-bar.js refactored to delegate
- fetchGeminiUsage no longer imports gemini-cli-core directly. It calls
snapshotGeminiCliAccount and reshapes the result into the existing
{ provider, displayName, windows[] } UI contract.
- Eliminates the duplicate setupUser + retrieveUserQuota code path.
- The fast existsSync(~/.gemini/oauth_creds.json) pre-flight stays
so unauth'd users get a friendly message without paying for OAuth
bootstrap.
4. Model registry refactor (separate track committed alongside)
- src/resources/extensions/sf/model-registry.ts (new) consolidates
canonical model identity, capability tier, and generation tags into
one source of truth that auto-model-selection, benchmark-selector,
and model-router now consume instead of maintaining parallel maps.
All 1487 tests pass (151 files); typecheck clean for both the package
and the SF extensions.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c6a3fa6a6a
commit
383e495085
9 changed files with 1073 additions and 94 deletions
|
|
@ -105,20 +105,32 @@ export async function snapshotGeminiCliAccount(
|
|||
if (!projectId || typeof projectId !== "string") return null;
|
||||
const server = new CodeAssistServer(authClient, projectId, { headers: {} });
|
||||
const data = await server.retrieveUserQuota({ project: projectId });
|
||||
const models: GeminiQuotaBucket[] = [];
|
||||
// Dedup buckets per modelId, keeping the WORST quota (lowest
|
||||
// remainingFraction). Code Assist sometimes returns multiple buckets
|
||||
// for the same model when more than one quota window applies; the
|
||||
// pessimistic choice is what every consumer (UI, capacity diagnostics,
|
||||
// model picker) actually wants to surface.
|
||||
const byModel = new Map<string, GeminiQuotaBucket>();
|
||||
for (const b of data?.buckets ?? []) {
|
||||
const modelId = typeof b.modelId === "string" ? b.modelId : "";
|
||||
if (!modelId) continue;
|
||||
const remainingFraction =
|
||||
typeof b.remainingFraction === "number" ? b.remainingFraction : 1;
|
||||
models.push({
|
||||
const bucket: GeminiQuotaBucket = {
|
||||
modelId,
|
||||
usedFraction: 1 - remainingFraction,
|
||||
remainingFraction,
|
||||
resetTime:
|
||||
typeof b.resetTime === "string" ? b.resetTime : undefined,
|
||||
});
|
||||
};
|
||||
const existing = byModel.get(modelId);
|
||||
if (!existing || bucket.remainingFraction < existing.remainingFraction) {
|
||||
byModel.set(modelId, bucket);
|
||||
}
|
||||
}
|
||||
const models = Array.from(byModel.values()).sort((a, b) =>
|
||||
a.modelId.localeCompare(b.modelId),
|
||||
);
|
||||
if (models.length === 0) return null;
|
||||
return {
|
||||
projectId,
|
||||
|
|
|
|||
102
src/headless-usage.ts
Normal file
102
src/headless-usage.ts
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
/**
|
||||
* headless-usage.ts — `sf headless usage`
|
||||
*
|
||||
* Purpose: expose live LLM-provider usage data (account tier, project, per-model
|
||||
* quota usage with reset windows) via the headless CLI so operators and CI can
|
||||
* see capacity state without launching the interactive UI.
|
||||
*
|
||||
* Today this covers the gemini-cli provider (the most quota-sensitive surface
|
||||
* because of AI Ultra's per-model windowed quotas). Other providers can be
|
||||
* added by extending the snapshot helper as their introspection APIs are
|
||||
* wired into dedicated provider packages.
|
||||
*
|
||||
* Consumer: headless.ts when command === "usage".
|
||||
*/
|
||||
|
||||
import {
|
||||
type GeminiAccountSnapshot,
|
||||
snapshotGeminiCliAccount,
|
||||
} from "@singularity-forge/google-gemini-cli-provider";
|
||||
|
||||
export interface HandleUsageOptions {
|
||||
json?: boolean;
|
||||
}
|
||||
|
||||
export interface HandleUsageResult {
|
||||
exitCode: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Render a snapshot as a compact text table (default) or as JSON for machine
|
||||
* consumers. Always writes to stdout; never throws.
|
||||
*/
|
||||
export async function handleUsage(
|
||||
cwd: string,
|
||||
options: HandleUsageOptions = {},
|
||||
): Promise<HandleUsageResult> {
|
||||
let snapshot: GeminiAccountSnapshot | null;
|
||||
try {
|
||||
snapshot = await snapshotGeminiCliAccount(cwd);
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
const payload = {
|
||||
provider: "google-gemini-cli",
|
||||
ok: false,
|
||||
error: msg,
|
||||
};
|
||||
process.stdout.write(
|
||||
options.json ? `${JSON.stringify(payload)}\n` : `error: ${msg}\n`,
|
||||
);
|
||||
return { exitCode: 1 };
|
||||
}
|
||||
|
||||
if (!snapshot) {
|
||||
const payload = {
|
||||
provider: "google-gemini-cli",
|
||||
ok: false,
|
||||
error:
|
||||
"No gemini-cli account snapshot — run `gemini auth login` and verify ~/.gemini/oauth_creds.json exists.",
|
||||
};
|
||||
process.stdout.write(
|
||||
options.json
|
||||
? `${JSON.stringify(payload)}\n`
|
||||
: `${payload.error}\n`,
|
||||
);
|
||||
return { exitCode: 1 };
|
||||
}
|
||||
|
||||
if (options.json) {
|
||||
process.stdout.write(
|
||||
`${JSON.stringify({ provider: "google-gemini-cli", ok: true, snapshot })}\n`,
|
||||
);
|
||||
return { exitCode: 0 };
|
||||
}
|
||||
|
||||
const lines: string[] = [];
|
||||
lines.push("Gemini CLI usage");
|
||||
lines.push("");
|
||||
lines.push(` project: ${snapshot.projectId}`);
|
||||
if (snapshot.userTierId || snapshot.userTierName) {
|
||||
lines.push(
|
||||
` userTier: ${snapshot.userTierId ?? "?"}${snapshot.userTierName ? ` (${snapshot.userTierName})` : ""}`,
|
||||
);
|
||||
}
|
||||
if (snapshot.paidTier?.id || snapshot.paidTier?.name) {
|
||||
lines.push(
|
||||
` paidTier: ${snapshot.paidTier.id ?? "?"}${snapshot.paidTier.name ? ` — ${snapshot.paidTier.name}` : ""}`,
|
||||
);
|
||||
}
|
||||
lines.push("");
|
||||
lines.push(" Per-model quota:");
|
||||
const modelW = Math.max(
|
||||
20,
|
||||
...snapshot.models.map((m) => m.modelId.length),
|
||||
);
|
||||
for (const m of snapshot.models) {
|
||||
const usedPct = (m.usedFraction * 100).toFixed(1).padStart(5);
|
||||
const reset = m.resetTime ?? "-";
|
||||
lines.push(` ${m.modelId.padEnd(modelW)} used=${usedPct}% reset=${reset}`);
|
||||
}
|
||||
process.stdout.write(`${lines.join("\n")}\n`);
|
||||
return { exitCode: 0 };
|
||||
}
|
||||
|
|
@ -824,6 +824,16 @@ async function runHeadlessOnce(
|
|||
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
|
||||
}
|
||||
|
||||
// Usage: gemini-cli account snapshot (tier, project, per-model quota), no
|
||||
// RPC child needed. Uses snapshotGeminiCliAccount from the
|
||||
// @singularity-forge/google-gemini-cli-provider package directly.
|
||||
if (options.command === "usage") {
|
||||
const wantsJson = options.json || options.commandArgs.includes("--json");
|
||||
const { handleUsage } = await import("./headless-usage.js");
|
||||
const result = await handleUsage(process.cwd(), { json: wantsJson });
|
||||
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
|
||||
}
|
||||
|
||||
// Doctor: read-only health check, no RPC child needed (#4904 live-regression).
|
||||
// ARCHITECTURE NOTE: this intentionally bypasses the SF extension dispatcher
|
||||
// for performance and TTY-independence. The interactive `/doctor` command in
|
||||
|
|
|
|||
|
|
@ -223,6 +223,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
|
|||
" status Show progress dashboard",
|
||||
" new-milestone Create a milestone from a specification document",
|
||||
" query Machine snapshot: JSON state + next dispatch + costs (no LLM)",
|
||||
" usage Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)",
|
||||
"",
|
||||
"new-milestone flags:",
|
||||
" --context <path> Path to spec/PRD file (use '-' for stdin)",
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import {
|
|||
tierLabel,
|
||||
} from "./complexity-classifier.js";
|
||||
import { getLedger, getProjectTotals } from "./metrics.js";
|
||||
import { routesFor } from "./model-registry.js";
|
||||
import {
|
||||
adjustToolSet,
|
||||
escalateTier,
|
||||
|
|
@ -124,42 +125,21 @@ const BARE_MODEL_FAMILY_PRIORITY = [
|
|||
providers: ["xiaomi", "opencode-go"],
|
||||
},
|
||||
];
|
||||
function preferredBareModelIds(modelId) {
|
||||
const lower = modelId.toLowerCase();
|
||||
if (
|
||||
lower === "kimi-for-coding" ||
|
||||
lower === "kimi-k2.6" ||
|
||||
lower === "kimi-k2.6:cloud" ||
|
||||
lower === "kimi-k2.6-cloud" ||
|
||||
lower === "moonshotai/kimi-k2.6"
|
||||
) {
|
||||
return [
|
||||
"kimi-for-coding",
|
||||
"kimi-k2.6",
|
||||
"kimi-k2.6:cloud",
|
||||
"kimi-k2.6-cloud",
|
||||
"moonshotai/kimi-k2.6",
|
||||
];
|
||||
}
|
||||
if (
|
||||
lower === "kimi-k2.5" ||
|
||||
lower === "kimi-k2.5:cloud" ||
|
||||
lower === "moonshotai/kimi-k2.5"
|
||||
) {
|
||||
return ["kimi-k2.5", "moonshotai/kimi-k2.5", "kimi-k2.5:cloud"];
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
function resolveFamilyPreferredBareModel(modelId, candidates) {
|
||||
const rule = BARE_MODEL_FAMILY_PRIORITY.find((r) => r.match.test(modelId));
|
||||
if (!rule) return undefined;
|
||||
const preferredModelIds = preferredBareModelIds(modelId);
|
||||
// Use model registry to get preferred wire ids for this canonical model.
|
||||
// routesFor returns all known routes; we filter to preferred providers below.
|
||||
const canonicalId =
|
||||
candidates.find((m) => m.id.toLowerCase() === modelId.toLowerCase())
|
||||
?.canonical_id ?? modelId;
|
||||
const preferredWireIds = routesFor(canonicalId).map((r) => r.wire_id);
|
||||
for (const provider of rule.providers) {
|
||||
const providerCandidates = candidates.filter(
|
||||
(m) => m.provider.toLowerCase() === provider.toLowerCase(),
|
||||
);
|
||||
if (preferredModelIds) {
|
||||
for (const preferredId of preferredModelIds) {
|
||||
if (preferredWireIds.length > 0) {
|
||||
for (const preferredId of preferredWireIds) {
|
||||
const match = providerCandidates.find(
|
||||
(m) => m.id.toLowerCase() === preferredId.toLowerCase(),
|
||||
);
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
import { existsSync, readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tierOrdinal } from "./complexity-classifier.js";
|
||||
import { lookup } from "./model-registry.js";
|
||||
import { getModelTier } from "./model-router.js";
|
||||
|
||||
// ─── Benchmark File Loader ───────────────────────────────────────────────────
|
||||
|
|
@ -263,27 +264,16 @@ function profileForUnitType(unitType) {
|
|||
* Match a provider+model pair to a benchmark record key. Benchmarks are
|
||||
* keyed by semantic model ID (e.g. "devstral-latest", "kimi-k2.5"), while registered
|
||||
* models may carry provider wire IDs or versioned suffixes
|
||||
* (`kimi-for-coding`, `devstral-2507`, `minimax-m2.7`). We try semantic
|
||||
* aliases first, then exact match, then strip common version/date suffixes,
|
||||
* then try a family-level key (e.g. `mistral-large-2411` →
|
||||
* (`kimi-for-coding`, `devstral-2507`, `minimax-m2.7`). We try canonical id
|
||||
* from the model registry first, then exact match, then strip common
|
||||
* version/date suffixes, then try a family-level key (e.g. `mistral-large-2411` →
|
||||
* `mistral-large-latest`).
|
||||
*/
|
||||
const BENCHMARK_KEY_ALIASES = {
|
||||
// Kimi Code's provider wire ID. The benchmark identity is Kimi K2.6.
|
||||
"kimi-for-coding": "kimi-k2.6",
|
||||
"moonshotai/kimi-k2.6": "kimi-k2.6",
|
||||
"kimi-k2.6:cloud": "kimi-k2.6",
|
||||
"kimi-k2.6-cloud": "kimi-k2.6",
|
||||
// Kimi aggregator wire IDs. Kimi Code's `kimi-for-coding` is K2.6 above.
|
||||
"kimi-k2.5": "kimi-k2.5",
|
||||
"moonshotai/kimi-k2.5": "kimi-k2.5",
|
||||
"moonshotai.kimi-k2.5": "kimi-k2.5",
|
||||
"kimi-k2.5:cloud": "kimi-k2.5",
|
||||
"kimi-k2.5-cloud": "kimi-k2.5",
|
||||
};
|
||||
function findBenchmarkKey(modelId, benchmarks) {
|
||||
const alias = BENCHMARK_KEY_ALIASES[modelId.toLowerCase()];
|
||||
if (alias && alias in benchmarks) return alias;
|
||||
function findBenchmarkKey(modelId, benchmarks, provider) {
|
||||
// Use canonical id from registry when a provider is known.
|
||||
const resolved = provider ? lookup(provider, modelId) : null;
|
||||
const semantic = resolved?.canonical_id ?? modelId;
|
||||
if (semantic !== modelId && semantic in benchmarks) return semantic;
|
||||
if (modelId in benchmarks) return modelId;
|
||||
// Strip date-style suffixes: "devstral-medium-2507" → "devstral-medium"
|
||||
const noDate = modelId.replace(/-\d{4}$/, "");
|
||||
|
|
@ -332,7 +322,7 @@ function readDimension(rec, dim) {
|
|||
return null;
|
||||
}
|
||||
function scoreCandidate(candidate, profile, benchmarks) {
|
||||
const key = findBenchmarkKey(candidate.id, benchmarks);
|
||||
const key = findBenchmarkKey(candidate.id, benchmarks, candidate.provider);
|
||||
if (!key) return { score: 0, coverage: 0 };
|
||||
const rec = benchmarks[key];
|
||||
if (!rec || typeof rec !== "object") return { score: 0, coverage: 0 };
|
||||
|
|
|
|||
898
src/resources/extensions/sf/model-registry.ts
Normal file
898
src/resources/extensions/sf/model-registry.ts
Normal file
|
|
@ -0,0 +1,898 @@
|
|||
/**
|
||||
* SF Model Registry — single source of truth for canonical model identity,
|
||||
* capability tier, and generation across all providers.
|
||||
*
|
||||
* The upstream `MODELS` constant from `@singularity-forge/ai` is the
|
||||
* authoritative route catalog. This module enriches it with:
|
||||
* 1. Canonical model identity (many routes → one stable id)
|
||||
* 2. Capability tier (light / standard / heavy)
|
||||
* 3. Generation tag (same-generation routes are direct failover candidates)
|
||||
*/
|
||||
|
||||
// ─── Upstream data import ─────────────────────────────────────────────────────
|
||||
// Use the public API of @singularity-forge/ai so we get:
|
||||
// 1. Both generated + CUSTOM_MODELS entries (e.g. kimi-coding/kimi-for-coding,
|
||||
// which only appears once CUSTOM_MODELS merge runs in models.js).
|
||||
// 2. A stable import path that resolves identically at test-time, dist-time,
|
||||
// and runtime (~/.sf/agent/extensions/sf/) — relative paths into the
|
||||
// monorepo can't satisfy the latter.
|
||||
import { getModels, getProviders } from "@singularity-forge/ai";
|
||||
|
||||
// ─── Public types ─────────────────────────────────────────────────────────────
|
||||
|
||||
export type WireFormat =
|
||||
| "anthropic-messages"
|
||||
| "openai-completions"
|
||||
| "openai-responses"
|
||||
| "bedrock-converse-stream"
|
||||
| "google-generative"
|
||||
| string; // open enum — pass through unknown values from upstream
|
||||
|
||||
export type CapabilityTier = "light" | "standard" | "heavy";
|
||||
|
||||
export type CanonicalId = string;
|
||||
// Stable, generation-aware identity. Examples:
|
||||
// "kimi-k2.5" (NOT the same as kimi-k2.6 — generation matters)
|
||||
// "kimi-k2.6"
|
||||
// "kimi-k2-thinking"
|
||||
// "claude-sonnet-4-6"
|
||||
// "MiniMax-M2.7"
|
||||
|
||||
export type RouteKey = string;
|
||||
// Format: `${provider}/${wire_id}`. Examples:
|
||||
// "kimi-coding/kimi-k2.6"
|
||||
// "openrouter/moonshotai/kimi-k2.5"
|
||||
// "amazon-bedrock/moonshotai.kimi-k2.5"
|
||||
|
||||
export interface ResolvedModel {
|
||||
canonical_id: CanonicalId;
|
||||
generation: string; // free-form, e.g. "k2.5", "k2.6", "sonnet-4-6"
|
||||
tier: CapabilityTier;
|
||||
// Pass-through from upstream ModelEntry:
|
||||
wire_id: string; // the upstream entry's `id`
|
||||
provider: string;
|
||||
api: WireFormat; // wire format axis
|
||||
baseUrl: string;
|
||||
capabilities?: Record<string, unknown>;
|
||||
cost?: {
|
||||
input?: number;
|
||||
output?: number;
|
||||
cacheRead?: number;
|
||||
cacheWrite?: number;
|
||||
};
|
||||
contextWindow?: number;
|
||||
maxTokens?: number;
|
||||
reasoning?: boolean;
|
||||
inputModalities?: string[]; // renamed from upstream `input` for clarity
|
||||
}
|
||||
|
||||
// ─── Internal data tables ─────────────────────────────────────────────────────
|
||||
//
|
||||
// Only three hand-maintained tables are needed. Everything else is derived
|
||||
// from the upstream MODELS catalog at module initialisation time.
|
||||
|
||||
/**
|
||||
* (provider, wire_id) → canonical id.
|
||||
* Only entries that DIVERGE from `wire_id` itself need a mapping.
|
||||
* Entries that are already canonical (e.g. provider="kimi-coding", wire_id="kimi-k2.6")
|
||||
* can be omitted; the resolver falls back to wire_id when no mapping exists.
|
||||
*/
|
||||
const CANONICAL_BY_ROUTE: Record<RouteKey, CanonicalId> = {
|
||||
// ── amazon-bedrock ────────────────────────────────────────────────────────
|
||||
"amazon-bedrock/amazon.nova-2-lite-v1:0": "nova-2-lite",
|
||||
"amazon-bedrock/amazon.nova-lite-v1:0": "nova-lite",
|
||||
"amazon-bedrock/amazon.nova-micro-v1:0": "nova-micro",
|
||||
"amazon-bedrock/amazon.nova-premier-v1:0": "nova-premier",
|
||||
"amazon-bedrock/amazon.nova-pro-v1:0": "nova-pro",
|
||||
"amazon-bedrock/anthropic.claude-3-5-haiku-20241022-v1:0": "claude-3-5-haiku",
|
||||
"amazon-bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0":
|
||||
"claude-3-5-sonnet",
|
||||
"amazon-bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0":
|
||||
"claude-3-5-sonnet",
|
||||
"amazon-bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0":
|
||||
"claude-3-7-sonnet",
|
||||
"amazon-bedrock/anthropic.claude-3-haiku-20240307-v1:0": "claude-3-haiku",
|
||||
"amazon-bedrock/anthropic.claude-haiku-4-5-20251001-v1:0": "claude-haiku-4-5",
|
||||
"amazon-bedrock/anthropic.claude-opus-4-1-20250805-v1:0": "claude-opus-4-1",
|
||||
"amazon-bedrock/anthropic.claude-opus-4-20250514-v1:0": "claude-opus-4",
|
||||
"amazon-bedrock/anthropic.claude-opus-4-5-20251101-v1:0": "claude-opus-4-5",
|
||||
"amazon-bedrock/anthropic.claude-opus-4-6-v1": "claude-opus-4-6",
|
||||
"amazon-bedrock/anthropic.claude-opus-4-7": "claude-opus-4-7",
|
||||
"amazon-bedrock/anthropic.claude-sonnet-4-20250514-v1:0": "claude-sonnet-4",
|
||||
"amazon-bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0":
|
||||
"claude-sonnet-4-5",
|
||||
"amazon-bedrock/anthropic.claude-sonnet-4-6": "claude-sonnet-4-6",
|
||||
"amazon-bedrock/deepseek.r1-v1:0": "deepseek-r1",
|
||||
"amazon-bedrock/deepseek.v3-v1:0": "deepseek-v3",
|
||||
"amazon-bedrock/deepseek.v3.2": "deepseek-v3.2",
|
||||
"amazon-bedrock/eu.anthropic.claude-haiku-4-5-20251001-v1:0":
|
||||
"claude-haiku-4-5",
|
||||
"amazon-bedrock/eu.anthropic.claude-opus-4-5-20251101-v1:0":
|
||||
"claude-opus-4-5",
|
||||
"amazon-bedrock/eu.anthropic.claude-opus-4-6-v1": "claude-opus-4-6",
|
||||
"amazon-bedrock/eu.anthropic.claude-opus-4-7": "claude-opus-4-7",
|
||||
"amazon-bedrock/eu.anthropic.claude-sonnet-4-20250514-v1:0":
|
||||
"claude-sonnet-4",
|
||||
"amazon-bedrock/eu.anthropic.claude-sonnet-4-5-20250929-v1:0":
|
||||
"claude-sonnet-4-5",
|
||||
"amazon-bedrock/eu.anthropic.claude-sonnet-4-6": "claude-sonnet-4-6",
|
||||
"amazon-bedrock/global.anthropic.claude-haiku-4-5-20251001-v1:0":
|
||||
"claude-haiku-4-5",
|
||||
"amazon-bedrock/global.anthropic.claude-opus-4-5-20251101-v1:0":
|
||||
"claude-opus-4-5",
|
||||
"amazon-bedrock/global.anthropic.claude-opus-4-6-v1": "claude-opus-4-6",
|
||||
"amazon-bedrock/global.anthropic.claude-opus-4-7": "claude-opus-4-7",
|
||||
"amazon-bedrock/global.anthropic.claude-sonnet-4-20250514-v1:0":
|
||||
"claude-sonnet-4",
|
||||
"amazon-bedrock/global.anthropic.claude-sonnet-4-5-20250929-v1:0":
|
||||
"claude-sonnet-4-5",
|
||||
"amazon-bedrock/global.anthropic.claude-sonnet-4-6": "claude-sonnet-4-6",
|
||||
"amazon-bedrock/google.gemma-3-27b-it": "gemma-3-27b-it",
|
||||
"amazon-bedrock/google.gemma-3-4b-it": "gemma-3-4b-it",
|
||||
"amazon-bedrock/meta.llama3-1-405b-instruct-v1:0":
|
||||
"llama3-1-405b-instruct",
|
||||
"amazon-bedrock/meta.llama3-1-70b-instruct-v1:0": "llama3-1-70b-instruct",
|
||||
"amazon-bedrock/meta.llama3-1-8b-instruct-v1:0": "llama3-1-8b-instruct",
|
||||
"amazon-bedrock/meta.llama3-2-11b-instruct-v1:0": "llama3-2-11b-instruct",
|
||||
"amazon-bedrock/meta.llama3-2-1b-instruct-v1:0": "llama3-2-1b-instruct",
|
||||
"amazon-bedrock/meta.llama3-2-3b-instruct-v1:0": "llama3-2-3b-instruct",
|
||||
"amazon-bedrock/meta.llama3-2-90b-instruct-v1:0": "llama3-2-90b-instruct",
|
||||
"amazon-bedrock/meta.llama3-3-70b-instruct-v1:0": "llama3-3-70b-instruct",
|
||||
"amazon-bedrock/meta.llama4-maverick-17b-instruct-v1:0":
|
||||
"llama4-maverick-17b-instruct",
|
||||
"amazon-bedrock/meta.llama4-scout-17b-instruct-v1:0":
|
||||
"llama4-scout-17b-instruct",
|
||||
"amazon-bedrock/minimax.minimax-m2": "minimax-m2",
|
||||
"amazon-bedrock/minimax.minimax-m2.1": "minimax-m2.1",
|
||||
"amazon-bedrock/minimax.minimax-m2.5": "minimax-m2.5",
|
||||
"amazon-bedrock/mistral.devstral-2-123b": "devstral-2512",
|
||||
"amazon-bedrock/mistral.magistral-small-2509": "magistral-small",
|
||||
"amazon-bedrock/mistral.ministral-3-14b-instruct": "mistral-small-latest",
|
||||
"amazon-bedrock/mistral.ministral-3-3b-instruct": "ministral-3b-latest",
|
||||
"amazon-bedrock/mistral.ministral-3-8b-instruct": "ministral-8b-latest",
|
||||
"amazon-bedrock/mistral.mistral-large-3-675b-instruct":
|
||||
"mistral-large-latest",
|
||||
"amazon-bedrock/mistral.pixtral-large-2502-v1:0": "pixtral-large-latest",
|
||||
"amazon-bedrock/mistral.voxtral-mini-3b-2507": "voxtral-mini-3b-2507",
|
||||
"amazon-bedrock/mistral.voxtral-small-24b-2507": "voxtral-small-24b-2507",
|
||||
"amazon-bedrock/moonshot.kimi-k2-thinking": "kimi-k2-thinking",
|
||||
"amazon-bedrock/moonshotai.kimi-k2.5": "kimi-k2.5",
|
||||
"amazon-bedrock/nvidia.nemotron-nano-12b-v2": "nemotron-nano-12b-v2",
|
||||
"amazon-bedrock/nvidia.nemotron-nano-3-30b": "nemotron-nano-3-30b",
|
||||
"amazon-bedrock/nvidia.nemotron-nano-9b-v2": "nemotron-nano-9b-v2",
|
||||
"amazon-bedrock/nvidia.nemotron-super-3-120b": "nemotron-super-3-120b",
|
||||
"amazon-bedrock/openai.gpt-oss-120b-1:0": "gpt-oss-120b",
|
||||
"amazon-bedrock/openai.gpt-oss-20b-1:0": "gpt-oss-20b",
|
||||
"amazon-bedrock/openai.gpt-oss-safeguard-120b": "gpt-oss-safeguard-120b",
|
||||
"amazon-bedrock/openai.gpt-oss-safeguard-20b": "gpt-oss-safeguard-20b",
|
||||
"amazon-bedrock/qwen.qwen3-235b-a22b-2507-v1:0": "qwen3-235b-a22b-2507",
|
||||
"amazon-bedrock/qwen.qwen3-32b-v1:0": "qwen3-32b",
|
||||
"amazon-bedrock/qwen.qwen3-coder-30b-a3b-v1:0": "qwen3-coder-next",
|
||||
"amazon-bedrock/qwen.qwen3-coder-480b-a35b-v1:0": "qwen3-coder:480b",
|
||||
"amazon-bedrock/qwen.qwen3-coder-next": "qwen3-coder-next",
|
||||
"amazon-bedrock/qwen.qwen3-next-80b-a3b": "qwen3-next:80b",
|
||||
"amazon-bedrock/qwen.qwen3-vl-235b-a22b": "qwen3-vl-235b-a22b",
|
||||
"amazon-bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0":
|
||||
"claude-haiku-4-5",
|
||||
"amazon-bedrock/us.anthropic.claude-opus-4-1-20250805-v1:0":
|
||||
"claude-opus-4-1",
|
||||
"amazon-bedrock/us.anthropic.claude-opus-4-20250514-v1:0": "claude-opus-4",
|
||||
"amazon-bedrock/us.anthropic.claude-opus-4-5-20251101-v1:0":
|
||||
"claude-opus-4-5",
|
||||
"amazon-bedrock/us.anthropic.claude-opus-4-6-v1": "claude-opus-4-6",
|
||||
"amazon-bedrock/us.anthropic.claude-opus-4-7": "claude-opus-4-7",
|
||||
"amazon-bedrock/us.anthropic.claude-sonnet-4-20250514-v1:0":
|
||||
"claude-sonnet-4",
|
||||
"amazon-bedrock/us.anthropic.claude-sonnet-4-5-20250929-v1:0":
|
||||
"claude-sonnet-4-5",
|
||||
"amazon-bedrock/us.anthropic.claude-sonnet-4-6": "claude-sonnet-4-6",
|
||||
"amazon-bedrock/writer.palmyra-x4-v1:0": "palmyra-x4",
|
||||
"amazon-bedrock/writer.palmyra-x5-v1:0": "palmyra-x5",
|
||||
"amazon-bedrock/zai.glm-4.7": "glm-4.7",
|
||||
"amazon-bedrock/zai.glm-4.7-flash": "glm-4.7-flash",
|
||||
"amazon-bedrock/zai.glm-5": "glm-5",
|
||||
// ── anthropic/ provider (versioned wire ids → stable canonical) ───────────
|
||||
"anthropic/claude-3-5-haiku-20241022": "claude-3-5-haiku",
|
||||
"anthropic/claude-3-5-haiku-latest": "claude-3-5-haiku",
|
||||
"anthropic/claude-3-5-sonnet-20240620": "claude-3-5-sonnet",
|
||||
"anthropic/claude-3-5-sonnet-20241022": "claude-3-5-sonnet",
|
||||
"anthropic/claude-3-7-sonnet-20250219": "claude-3-7-sonnet",
|
||||
"anthropic/claude-3-haiku-20240307": "claude-3-haiku",
|
||||
"anthropic/claude-3-opus-20240229": "claude-3-opus",
|
||||
"anthropic/claude-3-sonnet-20240229": "claude-3-sonnet",
|
||||
"anthropic/claude-haiku-4-5-20251001": "claude-haiku-4-5",
|
||||
"anthropic/claude-opus-4-0": "claude-opus-4",
|
||||
"anthropic/claude-opus-4-1-20250805": "claude-opus-4-1",
|
||||
"anthropic/claude-opus-4-20250514": "claude-opus-4",
|
||||
"anthropic/claude-opus-4-5-20251101": "claude-opus-4-5",
|
||||
"anthropic/claude-opus-4-6": "claude-opus-4-6",
|
||||
"anthropic/claude-sonnet-4-0": "claude-sonnet-4",
|
||||
"anthropic/claude-sonnet-4-20250514": "claude-sonnet-4",
|
||||
"anthropic/claude-sonnet-4-5-20250929": "claude-sonnet-4-5",
|
||||
// ── cerebras ─────────────────────────────────────────────────────────────
|
||||
"cerebras/zai-glm-4.7": "glm-4.7",
|
||||
// ── github-copilot (dot-notation → dash) ─────────────────────────────────
|
||||
"github-copilot/claude-haiku-4.5": "claude-haiku-4-5",
|
||||
"github-copilot/claude-opus-4.5": "claude-opus-4-5",
|
||||
"github-copilot/claude-opus-4.6": "claude-opus-4-6",
|
||||
"github-copilot/claude-opus-4.7": "claude-opus-4-7",
|
||||
"github-copilot/claude-sonnet-4": "claude-sonnet-4",
|
||||
"github-copilot/claude-sonnet-4.5": "claude-sonnet-4-5",
|
||||
"github-copilot/claude-sonnet-4.6": "claude-sonnet-4-6",
|
||||
// ── groq ─────────────────────────────────────────────────────────────────
|
||||
"groq/groq/compound": "compound",
|
||||
"groq/groq/compound-mini": "compound-mini",
|
||||
"groq/meta-llama/llama-4-maverick-17b-128e-instruct":
|
||||
"llama-4-maverick-17b-128e-instruct",
|
||||
"groq/meta-llama/llama-4-scout-17b-16e-instruct":
|
||||
"llama-4-scout-17b-16e-instruct",
|
||||
"groq/moonshotai/kimi-k2-instruct": "kimi-k2",
|
||||
"groq/moonshotai/kimi-k2-instruct-0905": "kimi-k2-0905",
|
||||
"groq/openai/gpt-oss-120b": "gpt-oss-120b",
|
||||
"groq/openai/gpt-oss-20b": "gpt-oss-20b",
|
||||
"groq/openai/gpt-oss-safeguard-20b": "gpt-oss-safeguard-20b",
|
||||
"groq/qwen/qwen3-32b": "qwen3-32b",
|
||||
// ── huggingface ───────────────────────────────────────────────────────────
|
||||
"huggingface/MiniMaxAI/MiniMax-M2.1": "minimax-m2.1",
|
||||
"huggingface/MiniMaxAI/MiniMax-M2.5": "minimax-m2.5",
|
||||
"huggingface/MiniMaxAI/MiniMax-M2.7": "MiniMax-M2.7",
|
||||
"huggingface/Qwen/Qwen3-235B-A22B-Thinking-2507": "qwen3-235b-a22b-2507",
|
||||
"huggingface/Qwen/Qwen3-Coder-480B-A35B-Instruct": "qwen3-coder:480b",
|
||||
"huggingface/Qwen/Qwen3-Coder-Next": "qwen3-coder-next",
|
||||
"huggingface/Qwen/Qwen3-Next-80B-A3B-Instruct": "qwen3-next:80b",
|
||||
"huggingface/Qwen/Qwen3-Next-80B-A3B-Thinking": "qwen3-next:80b",
|
||||
"huggingface/Qwen/Qwen3.5-397B-A17B": "qwen3.5-397b-a17b",
|
||||
"huggingface/XiaomiMiMo/MiMo-V2-Flash": "mimo-v2-flash",
|
||||
"huggingface/deepseek-ai/DeepSeek-R1-0528": "deepseek-r1-0528",
|
||||
"huggingface/deepseek-ai/DeepSeek-V3.2": "deepseek-v3.2",
|
||||
"huggingface/moonshotai/Kimi-K2-Instruct": "kimi-k2",
|
||||
"huggingface/moonshotai/Kimi-K2-Instruct-0905": "kimi-k2-0905",
|
||||
"huggingface/moonshotai/Kimi-K2-Thinking": "kimi-k2-thinking",
|
||||
"huggingface/moonshotai/Kimi-K2.5": "kimi-k2.5",
|
||||
"huggingface/zai-org/GLM-4.7": "glm-4.7",
|
||||
"huggingface/zai-org/GLM-4.7-Flash": "glm-4.7-flash",
|
||||
"huggingface/zai-org/GLM-5": "glm-5",
|
||||
"huggingface/zai-org/GLM-5.1": "glm-5.1",
|
||||
// ── minimax ───────────────────────────────────────────────────────────────
|
||||
"minimax/MiniMax-M2": "minimax-m2",
|
||||
"minimax/MiniMax-M2.1": "minimax-m2.1",
|
||||
"minimax/MiniMax-M2.5": "minimax-m2.5",
|
||||
"minimax/MiniMax-M2.5-highspeed": "minimax-m2.5-highspeed",
|
||||
"minimax/MiniMax-M2.7": "MiniMax-M2.7",
|
||||
"minimax/MiniMax-M2.7-highspeed": "MiniMax-M2.7-highspeed",
|
||||
"minimax-cn/MiniMax-M2": "minimax-m2",
|
||||
"minimax-cn/MiniMax-M2.1": "minimax-m2.1",
|
||||
"minimax-cn/MiniMax-M2.5": "minimax-m2.5",
|
||||
"minimax-cn/MiniMax-M2.5-highspeed": "minimax-m2.5-highspeed",
|
||||
"minimax-cn/MiniMax-M2.7": "MiniMax-M2.7",
|
||||
"minimax-cn/MiniMax-M2.7-highspeed": "MiniMax-M2.7-highspeed",
|
||||
// ── kimi-coding ───────────────────────────────────────────────────────────
|
||||
// Already canonical wire_ids — included for completeness; resolver falls
|
||||
// back to wire_id anyway.
|
||||
"kimi-coding/kimi-k2.6": "kimi-k2.6",
|
||||
"kimi-coding/kimi-k2-thinking": "kimi-k2-thinking",
|
||||
// kimi-for-coding is a CUSTOM_MODELS alias for kimi-k2.6 (same name, same
|
||||
// price, same wire shape) — verified via getModels("kimi-coding"). Collapse
|
||||
// to kimi-k2.6 so routesFor("kimi-k2.6") sees both routes.
|
||||
"kimi-coding/kimi-for-coding": "kimi-k2.6",
|
||||
// opencode wire IDs that need to map through
|
||||
"opencode/kimi-k2.5": "kimi-k2.5",
|
||||
"opencode-go/kimi-k2.5": "kimi-k2.5",
|
||||
// ── openrouter ────────────────────────────────────────────────────────────
|
||||
"openrouter/anthropic/claude-3-haiku": "claude-3-haiku",
|
||||
"openrouter/anthropic/claude-3.5-haiku": "claude-3-5-haiku",
|
||||
"openrouter/anthropic/claude-3.7-sonnet": "claude-3-7-sonnet",
|
||||
"openrouter/anthropic/claude-3.7-sonnet:thinking": "claude-3-7-sonnet",
|
||||
"openrouter/anthropic/claude-haiku-4.5": "claude-haiku-4-5",
|
||||
"openrouter/anthropic/claude-opus-4": "claude-opus-4",
|
||||
"openrouter/anthropic/claude-opus-4.1": "claude-opus-4-1",
|
||||
"openrouter/anthropic/claude-opus-4.5": "claude-opus-4-5",
|
||||
"openrouter/anthropic/claude-opus-4.6": "claude-opus-4-6",
|
||||
"openrouter/anthropic/claude-opus-4.6-fast": "claude-opus-4-6",
|
||||
"openrouter/anthropic/claude-opus-4.7": "claude-opus-4-7",
|
||||
"openrouter/anthropic/claude-sonnet-4": "claude-sonnet-4",
|
||||
"openrouter/anthropic/claude-sonnet-4.5": "claude-sonnet-4-5",
|
||||
"openrouter/anthropic/claude-sonnet-4.6": "claude-sonnet-4-6",
|
||||
"openrouter/deepseek/deepseek-chat": "deepseek-chat",
|
||||
"openrouter/deepseek/deepseek-chat-v3-0324": "deepseek-chat",
|
||||
"openrouter/deepseek/deepseek-chat-v3.1": "deepseek-chat",
|
||||
"openrouter/deepseek/deepseek-r1": "deepseek-r1",
|
||||
"openrouter/deepseek/deepseek-r1-0528": "deepseek-r1-0528",
|
||||
"openrouter/deepseek/deepseek-v3.1-terminus": "deepseek-chat",
|
||||
"openrouter/deepseek/deepseek-v3.2": "deepseek-v3.2",
|
||||
"openrouter/deepseek/deepseek-v3.2-exp": "deepseek-v3.2",
|
||||
"openrouter/google/gemini-2.0-flash-001": "gemini-2.0-flash",
|
||||
"openrouter/google/gemini-2.0-flash-lite-001": "gemini-2.0-flash",
|
||||
"openrouter/google/gemini-2.5-flash": "gemini-2.5-flash",
|
||||
"openrouter/google/gemini-2.5-flash-lite": "gemini-2.5-flash-lite",
|
||||
"openrouter/google/gemini-2.5-flash-lite-preview-09-2025":
|
||||
"gemini-2.5-flash-lite",
|
||||
"openrouter/google/gemini-2.5-pro": "gemini-2.5-pro",
|
||||
"openrouter/google/gemini-2.5-pro-preview": "gemini-2.5-pro",
|
||||
"openrouter/google/gemini-2.5-pro-preview-05-06": "gemini-2.5-pro",
|
||||
"openrouter/google/gemini-3-flash-preview": "gemini-3-flash-preview",
|
||||
"openrouter/google/gemini-3.1-flash-lite-preview":
|
||||
"gemini-3.1-flash-lite-preview",
|
||||
"openrouter/google/gemini-3.1-pro-preview": "gemini-3.1-pro-preview",
|
||||
"openrouter/google/gemini-3.1-pro-preview-customtools":
|
||||
"gemini-3.1-pro-preview",
|
||||
"openrouter/google/gemma-4-26b-a4b-it": "gemma-4-26b-a4b-it",
|
||||
"openrouter/google/gemma-4-26b-a4b-it:free": "gemma-4-26b-a4b-it",
|
||||
"openrouter/google/gemma-4-31b-it": "gemma-4-31b-it",
|
||||
"openrouter/google/gemma-4-31b-it:free": "gemma-4-31b-it",
|
||||
"openrouter/meta-llama/llama-3-8b-instruct": "llama-3-8b-instruct",
|
||||
"openrouter/meta-llama/llama-3.1-70b-instruct": "llama-3.1-70b-instruct",
|
||||
"openrouter/meta-llama/llama-3.1-8b-instruct": "llama-3.1-8b-instruct",
|
||||
"openrouter/meta-llama/llama-3.3-70b-instruct": "llama-3.3-70b-instruct",
|
||||
"openrouter/meta-llama/llama-3.3-70b-instruct:free": "llama-3.3-70b-instruct",
|
||||
"openrouter/meta-llama/llama-4-scout": "llama-4-scout",
|
||||
"openrouter/minimax/minimax-m1": "minimax-m1",
|
||||
"openrouter/minimax/minimax-m2": "minimax-m2",
|
||||
"openrouter/minimax/minimax-m2.1": "minimax-m2.1",
|
||||
"openrouter/minimax/minimax-m2.5": "minimax-m2.5",
|
||||
"openrouter/minimax/minimax-m2.5:free": "minimax-m2.5",
|
||||
"openrouter/minimax/minimax-m2.7": "MiniMax-M2.7",
|
||||
"openrouter/mistralai/codestral-2508": "codestral-latest",
|
||||
"openrouter/mistralai/devstral-2512": "devstral-2512",
|
||||
"openrouter/mistralai/devstral-medium": "devstral-medium-latest",
|
||||
"openrouter/mistralai/devstral-small": "devstral-small-2507",
|
||||
"openrouter/mistralai/ministral-14b-2512": "mistral-small-latest",
|
||||
"openrouter/mistralai/ministral-3b-2512": "ministral-3b-latest",
|
||||
"openrouter/mistralai/ministral-8b-2512": "ministral-8b-latest",
|
||||
"openrouter/mistralai/mistral-large": "mistral-large-latest",
|
||||
"openrouter/mistralai/mistral-large-2407": "mistral-large-latest",
|
||||
"openrouter/mistralai/mistral-large-2411": "mistral-large-2411",
|
||||
"openrouter/mistralai/mistral-large-2512": "mistral-large-2512",
|
||||
"openrouter/mistralai/mistral-medium-3": "mistral-medium-latest",
|
||||
"openrouter/mistralai/mistral-medium-3.1": "mistral-medium-latest",
|
||||
"openrouter/mistralai/mistral-nemo": "mistral-nemo",
|
||||
"openrouter/mistralai/mistral-small-2603": "mistral-small-2603",
|
||||
"openrouter/mistralai/mistral-small-3.2-24b-instruct": "mistral-small-latest",
|
||||
"openrouter/mistralai/mistral-small-creative": "mistral-small-latest",
|
||||
"openrouter/mistralai/mixtral-8x22b-instruct": "open-mixtral-8x22b",
|
||||
"openrouter/mistralai/mixtral-8x7b-instruct": "open-mixtral-8x7b",
|
||||
"openrouter/mistralai/pixtral-large-2411": "pixtral-large-latest",
|
||||
"openrouter/mistralai/voxtral-small-24b-2507": "voxtral-small-24b-2507",
|
||||
"openrouter/moonshotai/kimi-k2": "kimi-k2",
|
||||
"openrouter/moonshotai/kimi-k2-0905": "kimi-k2-0905",
|
||||
"openrouter/moonshotai/kimi-k2-thinking": "kimi-k2-thinking",
|
||||
"openrouter/moonshotai/kimi-k2.5": "kimi-k2.5",
|
||||
"openrouter/nvidia/nemotron-3-nano-30b-a3b": "nemotron-3-nano-30b",
|
||||
"openrouter/nvidia/nemotron-3-nano-30b-a3b:free": "nemotron-3-nano-30b",
|
||||
"openrouter/nvidia/nemotron-3-super-120b-a12b": "nemotron-3-super",
|
||||
"openrouter/nvidia/nemotron-3-super-120b-a12b:free": "nemotron-3-super",
|
||||
"openrouter/nvidia/nemotron-nano-12b-v2-vl:free": "nemotron-nano-12b-v2",
|
||||
"openrouter/nvidia/nemotron-nano-9b-v2": "nemotron-nano-9b-v2",
|
||||
"openrouter/nvidia/nemotron-nano-9b-v2:free": "nemotron-nano-9b-v2",
|
||||
"openrouter/openai/gpt-4": "gpt-4",
|
||||
"openrouter/openai/gpt-4-turbo": "gpt-4-turbo",
|
||||
"openrouter/openai/gpt-4o": "gpt-4o",
|
||||
"openrouter/openai/gpt-4o-mini": "gpt-4o-mini",
|
||||
"openrouter/openai/gpt-5": "gpt-5",
|
||||
"openrouter/openai/gpt-5-mini": "gpt-5-mini",
|
||||
"openrouter/openai/gpt-5-nano": "gpt-5-nano",
|
||||
"openrouter/openai/gpt-5-pro": "gpt-5-pro",
|
||||
"openrouter/openai/gpt-5.1": "gpt-5.1",
|
||||
"openrouter/openai/gpt-5.1-codex": "gpt-5.1-codex",
|
||||
"openrouter/openai/gpt-5.1-codex-max": "gpt-5.1-codex-max",
|
||||
"openrouter/openai/gpt-5.1-codex-mini": "gpt-5.1-codex-mini",
|
||||
"openrouter/openai/gpt-5.2": "gpt-5.2",
|
||||
"openrouter/openai/gpt-5.2-codex": "gpt-5.2-codex",
|
||||
"openrouter/openai/gpt-5.3-codex": "gpt-5.3-codex",
|
||||
"openrouter/openai/gpt-5.4": "gpt-5.4",
|
||||
"openrouter/openai/gpt-5.4-mini": "gpt-5.4-mini",
|
||||
"openrouter/openai/gpt-5.4-nano": "gpt-5.4-nano",
|
||||
"openrouter/openai/gpt-5.4-pro": "gpt-5.4-pro",
|
||||
"openrouter/openai/gpt-oss-120b": "gpt-oss-120b",
|
||||
"openrouter/openai/gpt-oss-120b:free": "gpt-oss-120b",
|
||||
"openrouter/openai/gpt-oss-20b": "gpt-oss-20b",
|
||||
"openrouter/openai/gpt-oss-20b:free": "gpt-oss-20b",
|
||||
"openrouter/openai/gpt-oss-safeguard-20b": "gpt-oss-safeguard-20b",
|
||||
"openrouter/openai/o1": "o1",
|
||||
"openrouter/openai/o3": "o3",
|
||||
"openrouter/openai/o4-mini": "o4-mini",
|
||||
"openrouter/openai/o4-mini-deep-research": "o4-mini-deep-research",
|
||||
"openrouter/qwen/qwen3-coder": "qwen3-coder:480b",
|
||||
"openrouter/qwen/qwen3-coder:free": "qwen3-coder:480b",
|
||||
"openrouter/qwen/qwen3-coder-next": "qwen3-coder-next",
|
||||
"openrouter/qwen/qwen3-max": "qwen3-max",
|
||||
"openrouter/qwen/qwen3-next-80b-a3b-instruct": "qwen3-next:80b",
|
||||
"openrouter/qwen/qwen3-next-80b-a3b-instruct:free": "qwen3-next:80b",
|
||||
"openrouter/qwen/qwen3-next-80b-a3b-thinking": "qwen3-next:80b",
|
||||
"openrouter/x-ai/grok-3": "grok-3",
|
||||
"openrouter/x-ai/grok-3-mini": "grok-3-mini",
|
||||
"openrouter/x-ai/grok-4": "grok-4",
|
||||
"openrouter/z-ai/glm-4.5": "glm-4.5",
|
||||
"openrouter/z-ai/glm-4.5-air": "glm-4.5-air",
|
||||
"openrouter/z-ai/glm-4.5-air:free": "glm-4.5-air",
|
||||
"openrouter/z-ai/glm-4.6": "glm-4.6",
|
||||
"openrouter/z-ai/glm-4.7": "glm-4.7",
|
||||
"openrouter/z-ai/glm-4.7-flash": "glm-4.7-flash",
|
||||
"openrouter/z-ai/glm-5": "glm-5",
|
||||
"openrouter/z-ai/glm-5-turbo": "glm-5-turbo",
|
||||
"openrouter/z-ai/glm-5.1": "glm-5.1",
|
||||
"openrouter/z-ai/glm-5v-turbo": "glm-5v-turbo",
|
||||
"openrouter/xiaomi/mimo-v2-flash": "mimo-v2-flash",
|
||||
"openrouter/xiaomi/mimo-v2-omni": "mimo-v2-omni",
|
||||
"openrouter/xiaomi/mimo-v2-pro": "mimo-v2-pro",
|
||||
// ── vercel-ai-gateway ─────────────────────────────────────────────────────
|
||||
"vercel-ai-gateway/anthropic/claude-3-haiku": "claude-3-haiku",
|
||||
"vercel-ai-gateway/anthropic/claude-3.5-haiku": "claude-3-5-haiku",
|
||||
"vercel-ai-gateway/anthropic/claude-3.7-sonnet": "claude-3-7-sonnet",
|
||||
"vercel-ai-gateway/anthropic/claude-haiku-4.5": "claude-haiku-4-5",
|
||||
"vercel-ai-gateway/anthropic/claude-opus-4": "claude-opus-4",
|
||||
"vercel-ai-gateway/anthropic/claude-opus-4.1": "claude-opus-4-1",
|
||||
"vercel-ai-gateway/anthropic/claude-opus-4.5": "claude-opus-4-5",
|
||||
"vercel-ai-gateway/anthropic/claude-opus-4.6": "claude-opus-4-6",
|
||||
"vercel-ai-gateway/anthropic/claude-opus-4.7": "claude-opus-4-7",
|
||||
"vercel-ai-gateway/anthropic/claude-sonnet-4": "claude-sonnet-4",
|
||||
"vercel-ai-gateway/anthropic/claude-sonnet-4.5": "claude-sonnet-4-5",
|
||||
"vercel-ai-gateway/anthropic/claude-sonnet-4.6": "claude-sonnet-4-6",
|
||||
"vercel-ai-gateway/deepseek/deepseek-r1": "deepseek-r1",
|
||||
"vercel-ai-gateway/deepseek/deepseek-v3": "deepseek-chat",
|
||||
"vercel-ai-gateway/deepseek/deepseek-v3.1": "deepseek-chat",
|
||||
"vercel-ai-gateway/deepseek/deepseek-v3.1-terminus": "deepseek-chat",
|
||||
"vercel-ai-gateway/deepseek/deepseek-v3.2": "deepseek-v3.2",
|
||||
"vercel-ai-gateway/deepseek/deepseek-v3.2-thinking": "deepseek-v3.2",
|
||||
"vercel-ai-gateway/google/gemini-2.0-flash": "gemini-2.0-flash",
|
||||
"vercel-ai-gateway/google/gemini-2.0-flash-lite": "gemini-2.0-flash",
|
||||
"vercel-ai-gateway/google/gemini-2.5-flash": "gemini-2.5-flash",
|
||||
"vercel-ai-gateway/google/gemini-2.5-flash-lite": "gemini-2.5-flash-lite",
|
||||
"vercel-ai-gateway/google/gemini-2.5-pro": "gemini-2.5-pro",
|
||||
"vercel-ai-gateway/google/gemini-3-flash": "gemini-3-flash-preview",
|
||||
"vercel-ai-gateway/google/gemini-3-pro-preview": "gemini-3-pro-preview",
|
||||
"vercel-ai-gateway/google/gemini-3.1-flash-lite-preview":
|
||||
"gemini-3.1-flash-lite-preview",
|
||||
"vercel-ai-gateway/google/gemini-3.1-pro-preview": "gemini-3.1-pro-preview",
|
||||
"vercel-ai-gateway/minimax/minimax-m2": "minimax-m2",
|
||||
"vercel-ai-gateway/minimax/minimax-m2.1": "minimax-m2.1",
|
||||
"vercel-ai-gateway/minimax/minimax-m2.1-lightning": "minimax-m2.1",
|
||||
"vercel-ai-gateway/minimax/minimax-m2.5": "minimax-m2.5",
|
||||
"vercel-ai-gateway/minimax/minimax-m2.5-highspeed": "minimax-m2.5-highspeed",
|
||||
"vercel-ai-gateway/minimax/minimax-m2.7": "MiniMax-M2.7",
|
||||
"vercel-ai-gateway/minimax/minimax-m2.7-highspeed": "MiniMax-M2.7-highspeed",
|
||||
"vercel-ai-gateway/mistral/codestral": "codestral-latest",
|
||||
"vercel-ai-gateway/mistral/devstral-2": "devstral-2512",
|
||||
"vercel-ai-gateway/mistral/devstral-small": "devstral-small-2507",
|
||||
"vercel-ai-gateway/mistral/devstral-small-2": "devstral-small-2507",
|
||||
"vercel-ai-gateway/mistral/ministral-3b": "ministral-3b-latest",
|
||||
"vercel-ai-gateway/mistral/ministral-8b": "ministral-8b-latest",
|
||||
"vercel-ai-gateway/mistral/mistral-medium": "mistral-medium-latest",
|
||||
"vercel-ai-gateway/mistral/mistral-small": "mistral-small-latest",
|
||||
"vercel-ai-gateway/moonshotai/kimi-k2": "kimi-k2",
|
||||
"vercel-ai-gateway/moonshotai/kimi-k2-0905": "kimi-k2-0905",
|
||||
"vercel-ai-gateway/moonshotai/kimi-k2-thinking": "kimi-k2-thinking",
|
||||
"vercel-ai-gateway/moonshotai/kimi-k2-thinking-turbo": "kimi-k2-thinking-turbo",
|
||||
"vercel-ai-gateway/moonshotai/kimi-k2-turbo": "kimi-k2-turbo",
|
||||
"vercel-ai-gateway/moonshotai/kimi-k2.5": "kimi-k2.5",
|
||||
"vercel-ai-gateway/openai/gpt-4-turbo": "gpt-4-turbo",
|
||||
"vercel-ai-gateway/openai/gpt-4o": "gpt-4o",
|
||||
"vercel-ai-gateway/openai/gpt-4o-mini": "gpt-4o-mini",
|
||||
"vercel-ai-gateway/openai/gpt-5": "gpt-5",
|
||||
"vercel-ai-gateway/openai/gpt-5-mini": "gpt-5-mini",
|
||||
"vercel-ai-gateway/openai/gpt-5-nano": "gpt-5-nano",
|
||||
"vercel-ai-gateway/openai/gpt-5-pro": "gpt-5-pro",
|
||||
"vercel-ai-gateway/openai/gpt-5.1-codex": "gpt-5.1-codex",
|
||||
"vercel-ai-gateway/openai/gpt-5.1-codex-max": "gpt-5.1-codex-max",
|
||||
"vercel-ai-gateway/openai/gpt-5.1-codex-mini": "gpt-5.1-codex-mini",
|
||||
"vercel-ai-gateway/openai/gpt-5.2": "gpt-5.2",
|
||||
"vercel-ai-gateway/openai/gpt-5.2-codex": "gpt-5.2-codex",
|
||||
"vercel-ai-gateway/openai/gpt-5.3-codex": "gpt-5.3-codex",
|
||||
"vercel-ai-gateway/openai/gpt-5.4": "gpt-5.4",
|
||||
"vercel-ai-gateway/openai/gpt-5.4-mini": "gpt-5.4-mini",
|
||||
"vercel-ai-gateway/openai/gpt-5.4-nano": "gpt-5.4-nano",
|
||||
"vercel-ai-gateway/openai/gpt-5.4-pro": "gpt-5.4-pro",
|
||||
"vercel-ai-gateway/openai/o1": "o1",
|
||||
"vercel-ai-gateway/openai/o3": "o3",
|
||||
"vercel-ai-gateway/openai/o4-mini": "o4-mini",
|
||||
"vercel-ai-gateway/xai/grok-3": "grok-3",
|
||||
"vercel-ai-gateway/xai/grok-3-mini": "grok-3-mini",
|
||||
"vercel-ai-gateway/xai/grok-4": "grok-4",
|
||||
"vercel-ai-gateway/zai/glm-4.5": "glm-4.5",
|
||||
"vercel-ai-gateway/zai/glm-4.5-air": "glm-4.5-air",
|
||||
"vercel-ai-gateway/zai/glm-4.6": "glm-4.6",
|
||||
"vercel-ai-gateway/zai/glm-4.7": "glm-4.7",
|
||||
"vercel-ai-gateway/zai/glm-4.7-flash": "glm-4.7-flash",
|
||||
"vercel-ai-gateway/zai/glm-5": "glm-5",
|
||||
"vercel-ai-gateway/zai/glm-5-turbo": "glm-5-turbo",
|
||||
"vercel-ai-gateway/zai/glm-5.1": "glm-5.1",
|
||||
"vercel-ai-gateway/zai/glm-5v-turbo": "glm-5v-turbo",
|
||||
"vercel-ai-gateway/xiaomi/mimo-v2-flash": "mimo-v2-flash",
|
||||
"vercel-ai-gateway/xiaomi/mimo-v2-pro": "mimo-v2-pro",
|
||||
// ── zai ──────────────────────────────────────────────────────────────────
|
||||
// zai models already have clean IDs (glm-5.1, etc.) — no mapping needed
|
||||
};
|
||||
|
||||
/**
|
||||
* Canonical id → generation tag.
|
||||
* Same-generation routes are eligible for direct failover (no downgrade signal).
|
||||
*/
|
||||
const GENERATION: Record<CanonicalId, string> = {
|
||||
// ── Kimi K2 family ────────────────────────────────────────────────────────
|
||||
"kimi-k2": "k2",
|
||||
"kimi-k2-0905": "k2", // same generation, post-release patch
|
||||
"kimi-k2-instruct": "k2",
|
||||
"kimi-k2-turbo": "k2",
|
||||
"kimi-k2.5": "k2.5",
|
||||
"kimi-k2.6": "k2.6",
|
||||
"kimi-k2-thinking": "k2-thinking",
|
||||
"kimi-k2-thinking-turbo": "k2-thinking",
|
||||
"kimi-for-coding": "k2.6", // wire alias for kimi-k2.6
|
||||
// ── Claude 3.x ───────────────────────────────────────────────────────────
|
||||
"claude-3-haiku": "haiku-3",
|
||||
"claude-3-sonnet": "sonnet-3",
|
||||
"claude-3-opus": "opus-3",
|
||||
"claude-3-5-haiku": "haiku-3.5",
|
||||
"claude-3-5-sonnet": "sonnet-3.5",
|
||||
"claude-3-7-sonnet": "sonnet-3.7",
|
||||
// ── Claude 4.x ───────────────────────────────────────────────────────────
|
||||
"claude-haiku-4-5": "haiku-4",
|
||||
"claude-haiku-4.5": "haiku-4", // dot-notation variant (github-copilot)
|
||||
"claude-sonnet-4": "sonnet-4",
|
||||
"claude-sonnet-4-5": "sonnet-4",
|
||||
"claude-sonnet-4-6": "sonnet-4",
|
||||
"claude-sonnet-4.5": "sonnet-4",
|
||||
"claude-sonnet-4.6": "sonnet-4",
|
||||
"claude-opus-4": "opus-4",
|
||||
"claude-opus-4-1": "opus-4",
|
||||
"claude-opus-4-5": "opus-4",
|
||||
"claude-opus-4-6": "opus-4",
|
||||
"claude-opus-4-7": "opus-4",
|
||||
"claude-opus-4.5": "opus-4",
|
||||
"claude-opus-4.6": "opus-4",
|
||||
"claude-opus-4.7": "opus-4",
|
||||
// ── Gemini ────────────────────────────────────────────────────────────────
|
||||
"gemini-2.0-flash": "2",
|
||||
"gemini-2.5-flash": "2.5",
|
||||
"gemini-2.5-flash-lite": "2.5",
|
||||
"gemini-2.5-pro": "2.5",
|
||||
"gemini-3-flash-preview": "3",
|
||||
"gemini-3-pro-preview": "3",
|
||||
"gemini-3.1-flash-lite-preview": "3.1",
|
||||
"gemini-3.1-pro-preview": "3.1",
|
||||
// ── GPT / OpenAI ─────────────────────────────────────────────────────────
|
||||
"gpt-4": "4",
|
||||
"gpt-4-turbo": "4",
|
||||
"gpt-4o": "4o",
|
||||
"gpt-4o-mini": "4o",
|
||||
"gpt-4.1": "4.1",
|
||||
"gpt-4.1-mini": "4.1",
|
||||
"gpt-4.1-nano": "4.1",
|
||||
"gpt-5": "5",
|
||||
"gpt-5-mini": "5",
|
||||
"gpt-5-nano": "5",
|
||||
"gpt-5-pro": "5",
|
||||
"gpt-5.1": "5.1",
|
||||
"gpt-5.1-codex": "5.1",
|
||||
"gpt-5.1-codex-max": "5.1",
|
||||
"gpt-5.1-codex-mini": "5.1",
|
||||
"gpt-5.2": "5.2",
|
||||
"gpt-5.2-codex": "5.2",
|
||||
"gpt-5.3-codex": "5.3",
|
||||
"gpt-5.4": "5.4",
|
||||
"gpt-5.4-mini": "5.4",
|
||||
"gpt-5.4-nano": "5.4",
|
||||
"gpt-5.4-pro": "5.4",
|
||||
"gpt-5.3-codex-spark": "5.3",
|
||||
"gpt-5-mini-latest": "5",
|
||||
o1: "o1",
|
||||
o3: "o3",
|
||||
"o4-mini": "o4",
|
||||
"o4-mini-deep-research": "o4",
|
||||
// ── DeepSeek ─────────────────────────────────────────────────────────────
|
||||
"deepseek-chat": "v3",
|
||||
"deepseek-v3.2": "v3.2",
|
||||
"deepseek-r1": "r1",
|
||||
"deepseek-r1-0528": "r1",
|
||||
// ── MiniMax ───────────────────────────────────────────────────────────────
|
||||
"minimax-m2": "m2",
|
||||
"minimax-m2.1": "m2.1",
|
||||
"minimax-m2.5": "m2.5",
|
||||
"minimax-m2.5-highspeed": "m2.5",
|
||||
"MiniMax-M2.7": "m2.7",
|
||||
"MiniMax-M2.7-highspeed": "m2.7",
|
||||
"minimax-m1": "m1",
|
||||
// ── GLM (ZAI) ─────────────────────────────────────────────────────────────
|
||||
"glm-4.5": "glm-4.5",
|
||||
"glm-4.5-air": "glm-4.5",
|
||||
"glm-4.6": "glm-4.6",
|
||||
"glm-4.7": "glm-4.7",
|
||||
"glm-4.7-flash": "glm-4.7",
|
||||
"glm-4.7-flashx": "glm-4.7",
|
||||
"glm-5": "glm-5",
|
||||
"glm-5-turbo": "glm-5",
|
||||
"glm-5.1": "glm-5.1",
|
||||
"glm-5v-turbo": "glm-5",
|
||||
// ── Mistral / Devstral ───────────────────────────────────────────────────
|
||||
"codestral-latest": "codestral",
|
||||
"devstral-2512": "devstral-2",
|
||||
"devstral-medium-latest": "devstral-medium",
|
||||
"devstral-medium-2507": "devstral-medium",
|
||||
"devstral-small-2507": "devstral-small",
|
||||
"devstral-small-2505": "devstral-small",
|
||||
"labs-devstral-small-2512": "devstral-small",
|
||||
"magistral-small": "magistral-small",
|
||||
"mistral-large-2411": "mistral-large",
|
||||
"mistral-large-2512": "mistral-large",
|
||||
"mistral-large-latest": "mistral-large",
|
||||
"mistral-medium-latest": "mistral-medium",
|
||||
"mistral-medium-2505": "mistral-medium",
|
||||
"mistral-medium-2508": "mistral-medium",
|
||||
"mistral-nemo": "mistral-nemo",
|
||||
"mistral-small-latest": "mistral-small",
|
||||
"mistral-small-2506": "mistral-small",
|
||||
"mistral-small-2603": "mistral-small",
|
||||
"ministral-3b-latest": "ministral-3b",
|
||||
"ministral-8b-latest": "ministral-8b",
|
||||
"pixtral-large-latest": "pixtral-large",
|
||||
"pixtral-12b": "pixtral-12b",
|
||||
// ── Qwen ─────────────────────────────────────────────────────────────────
|
||||
"qwen3-coder:480b": "qwen3-coder",
|
||||
"qwen3-coder-next": "qwen3-coder",
|
||||
"qwen3-next:80b": "qwen3-next",
|
||||
// ── XAI (Grok) ───────────────────────────────────────────────────────────
|
||||
"grok-3": "grok-3",
|
||||
"grok-3-mini": "grok-3",
|
||||
"grok-4": "grok-4",
|
||||
// ── MiMo (Xiaomi) ────────────────────────────────────────────────────────
|
||||
"mimo-v2-flash": "mimo-v2",
|
||||
"mimo-v2-omni": "mimo-v2",
|
||||
"mimo-v2-pro": "mimo-v2",
|
||||
};
|
||||
|
||||
/**
|
||||
* Canonical id → capability tier.
|
||||
* Lifted from MODEL_CAPABILITY_TIER in model-router.js.
|
||||
* CRITICAL: kimi-k2.5 is its own tier entry (NOT aliased to kimi-k2.6).
|
||||
*/
|
||||
const TIER: Record<CanonicalId, CapabilityTier> = {
|
||||
// ── Light ─────────────────────────────────────────────────────────────────
|
||||
"claude-haiku-4-5": "light",
|
||||
"claude-3-5-haiku": "light",
|
||||
"claude-3-haiku": "light",
|
||||
"gpt-4o-mini": "light",
|
||||
"gpt-4.1-mini": "light",
|
||||
"gpt-4.1-nano": "light",
|
||||
"gpt-5-mini": "light",
|
||||
"gpt-5-nano": "light",
|
||||
"gpt-5.1-codex-mini": "light",
|
||||
"gpt-5.3-codex-spark": "light",
|
||||
"gemini-2.0-flash": "light",
|
||||
"gemini-2.5-flash-lite": "light",
|
||||
"gemini-3.1-flash-lite-preview": "light",
|
||||
"glm-4.7-flash": "light",
|
||||
"glm-4.7-flashx": "light",
|
||||
"ministral-3b-latest": "light",
|
||||
"ministral-8b-latest": "light",
|
||||
"devstral-small-2505": "light",
|
||||
"devstral-small-2507": "light",
|
||||
"labs-devstral-small-2512": "light",
|
||||
// ── Standard ──────────────────────────────────────────────────────────────
|
||||
"claude-sonnet-4-6": "standard",
|
||||
"claude-sonnet-4-5": "standard",
|
||||
"claude-sonnet-4": "standard",
|
||||
"claude-3-5-sonnet": "standard",
|
||||
"gpt-4o": "standard",
|
||||
"gpt-4.1": "standard",
|
||||
"gpt-5.1-codex-max": "standard",
|
||||
"gpt-5.4-mini": "standard",
|
||||
"gemini-2.5-pro": "standard",
|
||||
"gemini-3-flash-preview": "standard",
|
||||
"gemini-2.5-flash": "standard",
|
||||
"deepseek-chat": "standard",
|
||||
"glm-4.7": "standard",
|
||||
"qwen3-coder:480b": "standard",
|
||||
"qwen3-coder-next": "standard",
|
||||
// kimi-k2.5 is standard — its own entry (NOT aliased to kimi-k2.6)
|
||||
"kimi-k2.5": "standard",
|
||||
"kimi-k2.6": "standard",
|
||||
"kimi-for-coding": "standard",
|
||||
"MiniMax-M2.7": "standard",
|
||||
"MiniMax-M2.7-highspeed": "standard",
|
||||
"codestral-latest": "standard",
|
||||
"devstral-2512": "standard",
|
||||
"devstral-medium-2507": "standard",
|
||||
"devstral-medium-latest": "standard",
|
||||
"magistral-small": "standard",
|
||||
"mistral-medium-2505": "standard",
|
||||
"mistral-medium-2508": "standard",
|
||||
"mistral-medium-latest": "standard",
|
||||
"mistral-nemo": "standard",
|
||||
"mistral-small-2506": "standard",
|
||||
"mistral-small-2603": "standard",
|
||||
"mistral-small-latest": "standard",
|
||||
"pixtral-12b": "standard",
|
||||
// ── Heavy ─────────────────────────────────────────────────────────────────
|
||||
"claude-opus-4-6": "heavy",
|
||||
"claude-opus-4-7": "heavy",
|
||||
"claude-opus-4-5": "heavy",
|
||||
"claude-3-opus": "heavy",
|
||||
"gpt-4-turbo": "heavy",
|
||||
"gpt-5": "heavy",
|
||||
"gpt-5-pro": "heavy",
|
||||
"gpt-5.1": "heavy",
|
||||
"gpt-5.2": "heavy",
|
||||
"gpt-5.2-codex": "heavy",
|
||||
"gpt-5.3-codex": "heavy",
|
||||
"gpt-5.4": "heavy",
|
||||
"gpt-5.4-pro": "heavy",
|
||||
"gpt-5.5": "heavy",
|
||||
o1: "heavy",
|
||||
o3: "heavy",
|
||||
"o4-mini": "heavy",
|
||||
"o4-mini-deep-research": "heavy",
|
||||
"gemini-3.1-pro-preview": "heavy",
|
||||
"gemini-3-pro-preview": "heavy",
|
||||
"kimi-k2-thinking": "heavy",
|
||||
"kimi-k2-thinking-turbo": "heavy",
|
||||
"qwen3-next:80b": "heavy",
|
||||
"glm-5": "heavy",
|
||||
"glm-5-turbo": "heavy",
|
||||
"glm-5.1": "heavy",
|
||||
"glm-5v-turbo": "heavy",
|
||||
"magistral-medium-latest": "heavy",
|
||||
"mistral-large-2411": "heavy",
|
||||
"mistral-large-2512": "heavy",
|
||||
"mistral-large-latest": "heavy",
|
||||
"open-mixtral-8x22b": "heavy",
|
||||
"pixtral-large-latest": "heavy",
|
||||
};
|
||||
|
||||
// ─── Module-level index built at startup ─────────────────────────────────────
|
||||
|
||||
/** Flattened upstream catalog: routeKey → upstream ModelEntry */
|
||||
const _ENTRY_BY_ROUTE = new Map<
|
||||
RouteKey,
|
||||
{
|
||||
id: string;
|
||||
name: string;
|
||||
api: string;
|
||||
provider: string;
|
||||
baseUrl: string;
|
||||
reasoning?: boolean;
|
||||
input?: string[];
|
||||
capabilities?: Record<string, unknown>;
|
||||
cost?: { input?: number; output?: number; cacheRead?: number; cacheWrite?: number };
|
||||
contextWindow?: number;
|
||||
maxTokens?: number;
|
||||
}
|
||||
>();
|
||||
|
||||
/** routeKey → resolved ResolvedModel (lazily populated cache) */
|
||||
const _RESOLVED_CACHE = new Map<RouteKey, ResolvedModel | null>();
|
||||
|
||||
/** canonical id → set of route keys */
|
||||
const _ROUTES_BY_CANONICAL = new Map<CanonicalId, RouteKey[]>();
|
||||
|
||||
// Build the indexes once at module load. `getModels(provider)` returns an
|
||||
// Array<ModelEntry> (after generated + CUSTOM_MODELS merge); we key by
|
||||
// `${provider}/${entry.id}`.
|
||||
(function buildIndex() {
|
||||
type UpstreamEntry = {
|
||||
id: string;
|
||||
name: string;
|
||||
api: string;
|
||||
provider: string;
|
||||
baseUrl: string;
|
||||
reasoning?: boolean;
|
||||
input?: string[];
|
||||
capabilities?: Record<string, unknown>;
|
||||
cost?: { input?: number; output?: number; cacheRead?: number; cacheWrite?: number };
|
||||
contextWindow?: number;
|
||||
maxTokens?: number;
|
||||
};
|
||||
for (const provider of getProviders()) {
|
||||
const entries = getModels(provider) as unknown as UpstreamEntry[];
|
||||
if (!entries) continue;
|
||||
for (const entry of entries) {
|
||||
const wireId = entry.id;
|
||||
const routeKey = `${provider}/${wireId}` as RouteKey;
|
||||
_ENTRY_BY_ROUTE.set(routeKey, entry);
|
||||
|
||||
// Determine canonical id
|
||||
const canonical = CANONICAL_BY_ROUTE[routeKey] ?? wireId;
|
||||
|
||||
// Build reverse index
|
||||
const routes = _ROUTES_BY_CANONICAL.get(canonical) ?? [];
|
||||
routes.push(routeKey);
|
||||
_ROUTES_BY_CANONICAL.set(canonical, routes);
|
||||
}
|
||||
}
|
||||
})();
|
||||
|
||||
// ─── Resolution helpers ───────────────────────────────────────────────────────
|
||||
|
||||
function resolveEntry(
|
||||
routeKey: RouteKey,
|
||||
entry: ReturnType<typeof _ENTRY_BY_ROUTE["get"]>,
|
||||
): ResolvedModel | null {
|
||||
if (!entry) return null;
|
||||
const canonical = CANONICAL_BY_ROUTE[routeKey] ?? entry.id;
|
||||
const generation = GENERATION[canonical] ?? canonical;
|
||||
const tier: CapabilityTier = TIER[canonical] ?? "standard";
|
||||
return {
|
||||
canonical_id: canonical,
|
||||
generation,
|
||||
tier,
|
||||
wire_id: entry.id,
|
||||
provider: entry.provider,
|
||||
api: entry.api,
|
||||
baseUrl: entry.baseUrl,
|
||||
capabilities: entry.capabilities,
|
||||
cost: entry.cost,
|
||||
contextWindow: entry.contextWindow,
|
||||
maxTokens: entry.maxTokens,
|
||||
reasoning: entry.reasoning,
|
||||
inputModalities: entry.input,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Public API ───────────────────────────────────────────────────────────────
|
||||
|
||||
/** Look up a (provider, wire_id) pair. Returns null if not in upstream. */
|
||||
export function lookup(
|
||||
provider: string,
|
||||
wireId: string,
|
||||
): ResolvedModel | null {
|
||||
const routeKey = `${provider}/${wireId}` as RouteKey;
|
||||
return lookupRoute(routeKey);
|
||||
}
|
||||
|
||||
/** Same, parsed from a fused route key. */
|
||||
export function lookupRoute(routeKey: RouteKey): ResolvedModel | null {
|
||||
if (_RESOLVED_CACHE.has(routeKey)) {
|
||||
return _RESOLVED_CACHE.get(routeKey) ?? null;
|
||||
}
|
||||
const entry = _ENTRY_BY_ROUTE.get(routeKey);
|
||||
const resolved = entry ? resolveEntry(routeKey, entry) : null;
|
||||
_RESOLVED_CACHE.set(routeKey, resolved);
|
||||
return resolved;
|
||||
}
|
||||
|
||||
/** All routes (across all providers) that resolve to this canonical id. */
|
||||
export function routesFor(canonicalId: CanonicalId): ResolvedModel[] {
|
||||
const routeKeys = _ROUTES_BY_CANONICAL.get(canonicalId) ?? [];
|
||||
return routeKeys
|
||||
.map((rk) => lookupRoute(rk))
|
||||
.filter((r): r is ResolvedModel => r !== null);
|
||||
}
|
||||
|
||||
/** Map a route key to a canonical id, or null if unmappable. */
|
||||
export function canonicalIdFor(
|
||||
routeKey: RouteKey,
|
||||
): CanonicalId | null {
|
||||
const entry = _ENTRY_BY_ROUTE.get(routeKey);
|
||||
if (!entry) return null;
|
||||
return CANONICAL_BY_ROUTE[routeKey] ?? entry.id;
|
||||
}
|
||||
|
||||
/** Capability tier of a canonical id. */
|
||||
export function tierFor(canonicalId: CanonicalId): CapabilityTier | null {
|
||||
return TIER[canonicalId] ?? null;
|
||||
}
|
||||
|
||||
/** Generation of a canonical id (e.g. "k2.5"). */
|
||||
export function generationFor(canonicalId: CanonicalId): string | null {
|
||||
return GENERATION[canonicalId] ?? null;
|
||||
}
|
||||
|
||||
/** Two canonical ids share a generation (failover may cross). */
|
||||
export function sameGeneration(a: CanonicalId, b: CanonicalId): boolean {
|
||||
const ga = GENERATION[a];
|
||||
const gb = GENERATION[b];
|
||||
if (ga === undefined || gb === undefined) return false;
|
||||
return ga === gb;
|
||||
}
|
||||
|
||||
/** Iterate every canonical id known to SF. */
|
||||
export function allCanonicalIds(): CanonicalId[] {
|
||||
return Array.from(_ROUTES_BY_CANONICAL.keys());
|
||||
}
|
||||
|
||||
/** Build a route key from a resolved model (for metrics aggregation). */
|
||||
export function routeKeyOf(m: {
|
||||
provider: string;
|
||||
wire_id: string;
|
||||
}): RouteKey {
|
||||
return `${m.provider}/${m.wire_id}` as RouteKey;
|
||||
}
|
||||
|
|
@ -5,9 +5,13 @@ import { getProviderCapabilities } from "@singularity-forge/ai";
|
|||
import { getToolCompatibility } from "@singularity-forge/coding-agent";
|
||||
import { tierOrdinal } from "./complexity-classifier.js";
|
||||
import { lookupModelCost } from "./model-cost-table.js";
|
||||
import { tierFor } from "./model-registry.js";
|
||||
// ─── Known Model Tiers ───────────────────────────────────────────────────────
|
||||
// Maps known model IDs to their capability tier. Used when tier_models is not
|
||||
// explicitly configured to pick the best available model for each tier.
|
||||
// MIGRATED: this table moved to model-registry.ts as the TIER map. Kept here
|
||||
// as a deprecated re-export shim so external callers that import
|
||||
// MODEL_CAPABILITY_TIER directly don't break during the transition. Swarm C
|
||||
// will remove this shim once metrics + remaining consumers are migrated.
|
||||
/** @deprecated Use tierFor() from model-registry.js instead. */
|
||||
export const MODEL_CAPABILITY_TIER = {
|
||||
// Light-tier models (cheapest)
|
||||
"claude-haiku-4-5": "light",
|
||||
|
|
@ -961,7 +965,10 @@ const MODEL_CAPABILITY_ALIASES = {
|
|||
"gpt-oss:120b": "gpt-4o",
|
||||
"gpt-oss:20b": "gpt-4o-mini",
|
||||
"kimi-k2:1t": "kimi-k2.6",
|
||||
"kimi-k2.5": "kimi-k2.6",
|
||||
// NOTE: "kimi-k2.5" → "kimi-k2.6" alias REMOVED. K2.5 is a distinct
|
||||
// generation from K2.6; aliasing them caused K2.5 to silently inherit
|
||||
// K2.6's tier (latent downgrade-conflation bug). K2.5 now resolves via
|
||||
// tierFor("kimi-k2.5") in the registry directly.
|
||||
"kimi-for-coding": "kimi-k2.6",
|
||||
"kimi-k2.6:cloud": "kimi-k2.6",
|
||||
"kimi-k2.6-cloud": "kimi-k2.6",
|
||||
|
|
@ -1384,8 +1391,12 @@ export function defaultRoutingConfig() {
|
|||
}
|
||||
// ─── Internal ────────────────────────────────────────────────────────────────
|
||||
export function getModelTier(modelId) {
|
||||
// Prefer registry lookup using canonical id — this eliminates the K2.5→K2.6
|
||||
// downgrade alias that was in MODEL_CAPABILITY_ALIASES.
|
||||
const canonicalId = canonicalCapabilityModelId(modelId);
|
||||
// Check exact match first
|
||||
const registryTier = tierFor(canonicalId);
|
||||
if (registryTier) return registryTier;
|
||||
// Fall back to the local table for models not yet in the registry.
|
||||
if (MODEL_CAPABILITY_TIER[canonicalId])
|
||||
return MODEL_CAPABILITY_TIER[canonicalId];
|
||||
const sizeTier = inferTierFromModelSize(canonicalId);
|
||||
|
|
|
|||
|
|
@ -11,13 +11,7 @@ import { execSync, spawnSync } from "node:child_process";
|
|||
import * as fs from "node:fs";
|
||||
import * as os from "node:os";
|
||||
import * as path from "node:path";
|
||||
import {
|
||||
AuthType,
|
||||
CodeAssistServer,
|
||||
getOauthClient,
|
||||
makeFakeConfig,
|
||||
setupUser,
|
||||
} from "@google/gemini-cli-core";
|
||||
import { snapshotGeminiCliAccount } from "@singularity-forge/google-gemini-cli-provider";
|
||||
import { visibleWidth } from "@singularity-forge/tui";
|
||||
import { sfHome } from "../sf-home.js";
|
||||
|
||||
|
|
@ -203,6 +197,8 @@ async function fetchClaudeUsage() {
|
|||
// Gemini Usage
|
||||
// ============================================================================
|
||||
async function fetchGeminiUsage(_modelRegistry) {
|
||||
// Existence check is a fast pre-flight so we surface a friendly "not logged
|
||||
// in" message without paying for the OAuth bootstrap inside snapshotGeminiCliAccount.
|
||||
const credPath = path.join(os.homedir(), ".gemini", "oauth_creds.json");
|
||||
if (!fs.existsSync(credPath)) {
|
||||
return {
|
||||
|
|
@ -213,47 +209,26 @@ async function fetchGeminiUsage(_modelRegistry) {
|
|||
};
|
||||
}
|
||||
try {
|
||||
const config = makeFakeConfig();
|
||||
const authClient = await getOauthClient(AuthType.LOGIN_WITH_GOOGLE, config);
|
||||
const userData = await setupUser(authClient, config);
|
||||
const projectId = userData.projectId;
|
||||
if (!projectId) {
|
||||
const snapshot = await snapshotGeminiCliAccount();
|
||||
if (!snapshot) {
|
||||
return {
|
||||
provider: "gemini",
|
||||
displayName: "Gemini",
|
||||
windows: [],
|
||||
error: "No Code Assist project",
|
||||
error: "No Code Assist project or empty quota response",
|
||||
};
|
||||
}
|
||||
const server = new CodeAssistServer(authClient, projectId, { headers: {} });
|
||||
const data = await server.retrieveUserQuota({
|
||||
project: projectId,
|
||||
});
|
||||
const quotas = {};
|
||||
for (const bucket of data.buckets || []) {
|
||||
const model = bucket.modelId || "unknown";
|
||||
const frac = bucket.remainingFraction ?? 1;
|
||||
if (!quotas[model] || frac < quotas[model].remainingFraction) {
|
||||
quotas[model] = {
|
||||
remainingFraction: frac,
|
||||
resetTime: bucket.resetTime,
|
||||
};
|
||||
}
|
||||
}
|
||||
const windows = [];
|
||||
for (const [model, quota] of Object.entries(quotas).sort(([a], [b]) =>
|
||||
a.localeCompare(b),
|
||||
)) {
|
||||
const resetDate = quota.resetTime ? new Date(quota.resetTime) : undefined;
|
||||
windows.push({
|
||||
label: model.replace(/^gemini-/, "").slice(0, 7),
|
||||
usedPercent: (1 - quota.remainingFraction) * 100,
|
||||
const windows = snapshot.models.map((m) => {
|
||||
const resetDate = m.resetTime ? new Date(m.resetTime) : undefined;
|
||||
return {
|
||||
label: m.modelId.replace(/^gemini-/, "").slice(0, 7),
|
||||
usedPercent: m.usedFraction * 100,
|
||||
resetDescription:
|
||||
resetDate && !Number.isNaN(resetDate.getTime())
|
||||
? formatReset(resetDate)
|
||||
: undefined,
|
||||
});
|
||||
}
|
||||
};
|
||||
});
|
||||
return { provider: "gemini", displayName: "Gemini", windows };
|
||||
} catch (e) {
|
||||
return {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue