From 383e495085937cdc139d9cffcaa6bd60156888dc Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Thu, 14 May 2026 03:42:53 +0200 Subject: [PATCH] feat(headless,gemini-cli): add sf headless usage + unify gemini quota path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a machine-readable headless surface for live LLM-provider usage and unifies the gemini-cli quota fetch through one helper, removing the duplication that existed between usage-bar.js and the new package. 1. snapshotGeminiCliAccount in @singularity-forge/google-gemini-cli-provider - Single source of truth for { projectId, userTierId, userTierName, paidTier, models[] } via setupUser + retrieveUserQuota. - Dedups buckets per modelId, keeping the worst (lowest remainingFraction) so consumers always see the most-restrictive window. Code Assist sometimes returns multiple buckets per model; the pessimistic choice is what every consumer needs. - discoverGeminiCliModels(cwd?) wraps it for catalog-cache callers that only need the IDs. 2. sf headless usage subcommand - New src/headless-usage.ts handler. text (default) and --json output. Uses the package's snapshot directly — no RPC child, no jiti gymnastics — matching the shape of headless-uok-status / headless-doctor. - Wired into src/headless.ts after the doctor block. - Help text adds the command line. 3. usage-bar.js refactored to delegate - fetchGeminiUsage no longer imports gemini-cli-core directly. It calls snapshotGeminiCliAccount and reshapes the result into the existing { provider, displayName, windows[] } UI contract. - Eliminates the duplicate setupUser + retrieveUserQuota code path. - The fast existsSync(~/.gemini/oauth_creds.json) pre-flight stays so unauth'd users get a friendly message without paying for OAuth bootstrap. 4. Model registry refactor (separate track committed alongside) - src/resources/extensions/sf/model-registry.ts (new) consolidates canonical model identity, capability tier, and generation tags into one source of truth that auto-model-selection, benchmark-selector, and model-router now consume instead of maintaining parallel maps. All 1487 tests pass (151 files); typecheck clean for both the package and the SF extensions. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../google-gemini-cli-provider/src/index.ts | 18 +- src/headless-usage.ts | 102 ++ src/headless.ts | 10 + src/help-text.ts | 1 + .../extensions/sf/auto-model-selection.js | 38 +- .../extensions/sf/benchmark-selector.js | 30 +- src/resources/extensions/sf/model-registry.ts | 898 ++++++++++++++++++ src/resources/extensions/sf/model-router.js | 19 +- src/resources/extensions/sf/ui/usage-bar.js | 51 +- 9 files changed, 1073 insertions(+), 94 deletions(-) create mode 100644 src/headless-usage.ts create mode 100644 src/resources/extensions/sf/model-registry.ts diff --git a/packages/google-gemini-cli-provider/src/index.ts b/packages/google-gemini-cli-provider/src/index.ts index 8f72f84d2..81105e817 100644 --- a/packages/google-gemini-cli-provider/src/index.ts +++ b/packages/google-gemini-cli-provider/src/index.ts @@ -105,20 +105,32 @@ export async function snapshotGeminiCliAccount( if (!projectId || typeof projectId !== "string") return null; const server = new CodeAssistServer(authClient, projectId, { headers: {} }); const data = await server.retrieveUserQuota({ project: projectId }); - const models: GeminiQuotaBucket[] = []; + // Dedup buckets per modelId, keeping the WORST quota (lowest + // remainingFraction). Code Assist sometimes returns multiple buckets + // for the same model when more than one quota window applies; the + // pessimistic choice is what every consumer (UI, capacity diagnostics, + // model picker) actually wants to surface. + const byModel = new Map(); for (const b of data?.buckets ?? []) { const modelId = typeof b.modelId === "string" ? b.modelId : ""; if (!modelId) continue; const remainingFraction = typeof b.remainingFraction === "number" ? b.remainingFraction : 1; - models.push({ + const bucket: GeminiQuotaBucket = { modelId, usedFraction: 1 - remainingFraction, remainingFraction, resetTime: typeof b.resetTime === "string" ? b.resetTime : undefined, - }); + }; + const existing = byModel.get(modelId); + if (!existing || bucket.remainingFraction < existing.remainingFraction) { + byModel.set(modelId, bucket); + } } + const models = Array.from(byModel.values()).sort((a, b) => + a.modelId.localeCompare(b.modelId), + ); if (models.length === 0) return null; return { projectId, diff --git a/src/headless-usage.ts b/src/headless-usage.ts new file mode 100644 index 000000000..80abfd25c --- /dev/null +++ b/src/headless-usage.ts @@ -0,0 +1,102 @@ +/** + * headless-usage.ts — `sf headless usage` + * + * Purpose: expose live LLM-provider usage data (account tier, project, per-model + * quota usage with reset windows) via the headless CLI so operators and CI can + * see capacity state without launching the interactive UI. + * + * Today this covers the gemini-cli provider (the most quota-sensitive surface + * because of AI Ultra's per-model windowed quotas). Other providers can be + * added by extending the snapshot helper as their introspection APIs are + * wired into dedicated provider packages. + * + * Consumer: headless.ts when command === "usage". + */ + +import { + type GeminiAccountSnapshot, + snapshotGeminiCliAccount, +} from "@singularity-forge/google-gemini-cli-provider"; + +export interface HandleUsageOptions { + json?: boolean; +} + +export interface HandleUsageResult { + exitCode: number; +} + +/** + * Render a snapshot as a compact text table (default) or as JSON for machine + * consumers. Always writes to stdout; never throws. + */ +export async function handleUsage( + cwd: string, + options: HandleUsageOptions = {}, +): Promise { + let snapshot: GeminiAccountSnapshot | null; + try { + snapshot = await snapshotGeminiCliAccount(cwd); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + const payload = { + provider: "google-gemini-cli", + ok: false, + error: msg, + }; + process.stdout.write( + options.json ? `${JSON.stringify(payload)}\n` : `error: ${msg}\n`, + ); + return { exitCode: 1 }; + } + + if (!snapshot) { + const payload = { + provider: "google-gemini-cli", + ok: false, + error: + "No gemini-cli account snapshot — run `gemini auth login` and verify ~/.gemini/oauth_creds.json exists.", + }; + process.stdout.write( + options.json + ? `${JSON.stringify(payload)}\n` + : `${payload.error}\n`, + ); + return { exitCode: 1 }; + } + + if (options.json) { + process.stdout.write( + `${JSON.stringify({ provider: "google-gemini-cli", ok: true, snapshot })}\n`, + ); + return { exitCode: 0 }; + } + + const lines: string[] = []; + lines.push("Gemini CLI usage"); + lines.push(""); + lines.push(` project: ${snapshot.projectId}`); + if (snapshot.userTierId || snapshot.userTierName) { + lines.push( + ` userTier: ${snapshot.userTierId ?? "?"}${snapshot.userTierName ? ` (${snapshot.userTierName})` : ""}`, + ); + } + if (snapshot.paidTier?.id || snapshot.paidTier?.name) { + lines.push( + ` paidTier: ${snapshot.paidTier.id ?? "?"}${snapshot.paidTier.name ? ` — ${snapshot.paidTier.name}` : ""}`, + ); + } + lines.push(""); + lines.push(" Per-model quota:"); + const modelW = Math.max( + 20, + ...snapshot.models.map((m) => m.modelId.length), + ); + for (const m of snapshot.models) { + const usedPct = (m.usedFraction * 100).toFixed(1).padStart(5); + const reset = m.resetTime ?? "-"; + lines.push(` ${m.modelId.padEnd(modelW)} used=${usedPct}% reset=${reset}`); + } + process.stdout.write(`${lines.join("\n")}\n`); + return { exitCode: 0 }; +} diff --git a/src/headless.ts b/src/headless.ts index 5942503fa..3877e99b5 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -824,6 +824,16 @@ async function runHeadlessOnce( return { exitCode: result.exitCode, interrupted: false, timedOut: false }; } + // Usage: gemini-cli account snapshot (tier, project, per-model quota), no + // RPC child needed. Uses snapshotGeminiCliAccount from the + // @singularity-forge/google-gemini-cli-provider package directly. + if (options.command === "usage") { + const wantsJson = options.json || options.commandArgs.includes("--json"); + const { handleUsage } = await import("./headless-usage.js"); + const result = await handleUsage(process.cwd(), { json: wantsJson }); + return { exitCode: result.exitCode, interrupted: false, timedOut: false }; + } + // Doctor: read-only health check, no RPC child needed (#4904 live-regression). // ARCHITECTURE NOTE: this intentionally bypasses the SF extension dispatcher // for performance and TTY-independence. The interactive `/doctor` command in diff --git a/src/help-text.ts b/src/help-text.ts index 3cdb8b77f..f62a682a5 100644 --- a/src/help-text.ts +++ b/src/help-text.ts @@ -223,6 +223,7 @@ const SUBCOMMAND_HELP: Record = { " status Show progress dashboard", " new-milestone Create a milestone from a specification document", " query Machine snapshot: JSON state + next dispatch + costs (no LLM)", + " usage Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)", "", "new-milestone flags:", " --context Path to spec/PRD file (use '-' for stdin)", diff --git a/src/resources/extensions/sf/auto-model-selection.js b/src/resources/extensions/sf/auto-model-selection.js index dd9593255..e6dfbb315 100644 --- a/src/resources/extensions/sf/auto-model-selection.js +++ b/src/resources/extensions/sf/auto-model-selection.js @@ -11,6 +11,7 @@ import { tierLabel, } from "./complexity-classifier.js"; import { getLedger, getProjectTotals } from "./metrics.js"; +import { routesFor } from "./model-registry.js"; import { adjustToolSet, escalateTier, @@ -124,42 +125,21 @@ const BARE_MODEL_FAMILY_PRIORITY = [ providers: ["xiaomi", "opencode-go"], }, ]; -function preferredBareModelIds(modelId) { - const lower = modelId.toLowerCase(); - if ( - lower === "kimi-for-coding" || - lower === "kimi-k2.6" || - lower === "kimi-k2.6:cloud" || - lower === "kimi-k2.6-cloud" || - lower === "moonshotai/kimi-k2.6" - ) { - return [ - "kimi-for-coding", - "kimi-k2.6", - "kimi-k2.6:cloud", - "kimi-k2.6-cloud", - "moonshotai/kimi-k2.6", - ]; - } - if ( - lower === "kimi-k2.5" || - lower === "kimi-k2.5:cloud" || - lower === "moonshotai/kimi-k2.5" - ) { - return ["kimi-k2.5", "moonshotai/kimi-k2.5", "kimi-k2.5:cloud"]; - } - return undefined; -} function resolveFamilyPreferredBareModel(modelId, candidates) { const rule = BARE_MODEL_FAMILY_PRIORITY.find((r) => r.match.test(modelId)); if (!rule) return undefined; - const preferredModelIds = preferredBareModelIds(modelId); + // Use model registry to get preferred wire ids for this canonical model. + // routesFor returns all known routes; we filter to preferred providers below. + const canonicalId = + candidates.find((m) => m.id.toLowerCase() === modelId.toLowerCase()) + ?.canonical_id ?? modelId; + const preferredWireIds = routesFor(canonicalId).map((r) => r.wire_id); for (const provider of rule.providers) { const providerCandidates = candidates.filter( (m) => m.provider.toLowerCase() === provider.toLowerCase(), ); - if (preferredModelIds) { - for (const preferredId of preferredModelIds) { + if (preferredWireIds.length > 0) { + for (const preferredId of preferredWireIds) { const match = providerCandidates.find( (m) => m.id.toLowerCase() === preferredId.toLowerCase(), ); diff --git a/src/resources/extensions/sf/benchmark-selector.js b/src/resources/extensions/sf/benchmark-selector.js index 37e34401a..99c05ae85 100644 --- a/src/resources/extensions/sf/benchmark-selector.js +++ b/src/resources/extensions/sf/benchmark-selector.js @@ -23,6 +23,7 @@ import { existsSync, readFileSync } from "node:fs"; import { join } from "node:path"; import { tierOrdinal } from "./complexity-classifier.js"; +import { lookup } from "./model-registry.js"; import { getModelTier } from "./model-router.js"; // ─── Benchmark File Loader ─────────────────────────────────────────────────── @@ -263,27 +264,16 @@ function profileForUnitType(unitType) { * Match a provider+model pair to a benchmark record key. Benchmarks are * keyed by semantic model ID (e.g. "devstral-latest", "kimi-k2.5"), while registered * models may carry provider wire IDs or versioned suffixes - * (`kimi-for-coding`, `devstral-2507`, `minimax-m2.7`). We try semantic - * aliases first, then exact match, then strip common version/date suffixes, - * then try a family-level key (e.g. `mistral-large-2411` → + * (`kimi-for-coding`, `devstral-2507`, `minimax-m2.7`). We try canonical id + * from the model registry first, then exact match, then strip common + * version/date suffixes, then try a family-level key (e.g. `mistral-large-2411` → * `mistral-large-latest`). */ -const BENCHMARK_KEY_ALIASES = { - // Kimi Code's provider wire ID. The benchmark identity is Kimi K2.6. - "kimi-for-coding": "kimi-k2.6", - "moonshotai/kimi-k2.6": "kimi-k2.6", - "kimi-k2.6:cloud": "kimi-k2.6", - "kimi-k2.6-cloud": "kimi-k2.6", - // Kimi aggregator wire IDs. Kimi Code's `kimi-for-coding` is K2.6 above. - "kimi-k2.5": "kimi-k2.5", - "moonshotai/kimi-k2.5": "kimi-k2.5", - "moonshotai.kimi-k2.5": "kimi-k2.5", - "kimi-k2.5:cloud": "kimi-k2.5", - "kimi-k2.5-cloud": "kimi-k2.5", -}; -function findBenchmarkKey(modelId, benchmarks) { - const alias = BENCHMARK_KEY_ALIASES[modelId.toLowerCase()]; - if (alias && alias in benchmarks) return alias; +function findBenchmarkKey(modelId, benchmarks, provider) { + // Use canonical id from registry when a provider is known. + const resolved = provider ? lookup(provider, modelId) : null; + const semantic = resolved?.canonical_id ?? modelId; + if (semantic !== modelId && semantic in benchmarks) return semantic; if (modelId in benchmarks) return modelId; // Strip date-style suffixes: "devstral-medium-2507" → "devstral-medium" const noDate = modelId.replace(/-\d{4}$/, ""); @@ -332,7 +322,7 @@ function readDimension(rec, dim) { return null; } function scoreCandidate(candidate, profile, benchmarks) { - const key = findBenchmarkKey(candidate.id, benchmarks); + const key = findBenchmarkKey(candidate.id, benchmarks, candidate.provider); if (!key) return { score: 0, coverage: 0 }; const rec = benchmarks[key]; if (!rec || typeof rec !== "object") return { score: 0, coverage: 0 }; diff --git a/src/resources/extensions/sf/model-registry.ts b/src/resources/extensions/sf/model-registry.ts new file mode 100644 index 000000000..d780b5984 --- /dev/null +++ b/src/resources/extensions/sf/model-registry.ts @@ -0,0 +1,898 @@ +/** + * SF Model Registry — single source of truth for canonical model identity, + * capability tier, and generation across all providers. + * + * The upstream `MODELS` constant from `@singularity-forge/ai` is the + * authoritative route catalog. This module enriches it with: + * 1. Canonical model identity (many routes → one stable id) + * 2. Capability tier (light / standard / heavy) + * 3. Generation tag (same-generation routes are direct failover candidates) + */ + +// ─── Upstream data import ───────────────────────────────────────────────────── +// Use the public API of @singularity-forge/ai so we get: +// 1. Both generated + CUSTOM_MODELS entries (e.g. kimi-coding/kimi-for-coding, +// which only appears once CUSTOM_MODELS merge runs in models.js). +// 2. A stable import path that resolves identically at test-time, dist-time, +// and runtime (~/.sf/agent/extensions/sf/) — relative paths into the +// monorepo can't satisfy the latter. +import { getModels, getProviders } from "@singularity-forge/ai"; + +// ─── Public types ───────────────────────────────────────────────────────────── + +export type WireFormat = + | "anthropic-messages" + | "openai-completions" + | "openai-responses" + | "bedrock-converse-stream" + | "google-generative" + | string; // open enum — pass through unknown values from upstream + +export type CapabilityTier = "light" | "standard" | "heavy"; + +export type CanonicalId = string; +// Stable, generation-aware identity. Examples: +// "kimi-k2.5" (NOT the same as kimi-k2.6 — generation matters) +// "kimi-k2.6" +// "kimi-k2-thinking" +// "claude-sonnet-4-6" +// "MiniMax-M2.7" + +export type RouteKey = string; +// Format: `${provider}/${wire_id}`. Examples: +// "kimi-coding/kimi-k2.6" +// "openrouter/moonshotai/kimi-k2.5" +// "amazon-bedrock/moonshotai.kimi-k2.5" + +export interface ResolvedModel { + canonical_id: CanonicalId; + generation: string; // free-form, e.g. "k2.5", "k2.6", "sonnet-4-6" + tier: CapabilityTier; + // Pass-through from upstream ModelEntry: + wire_id: string; // the upstream entry's `id` + provider: string; + api: WireFormat; // wire format axis + baseUrl: string; + capabilities?: Record; + cost?: { + input?: number; + output?: number; + cacheRead?: number; + cacheWrite?: number; + }; + contextWindow?: number; + maxTokens?: number; + reasoning?: boolean; + inputModalities?: string[]; // renamed from upstream `input` for clarity +} + +// ─── Internal data tables ───────────────────────────────────────────────────── +// +// Only three hand-maintained tables are needed. Everything else is derived +// from the upstream MODELS catalog at module initialisation time. + +/** + * (provider, wire_id) → canonical id. + * Only entries that DIVERGE from `wire_id` itself need a mapping. + * Entries that are already canonical (e.g. provider="kimi-coding", wire_id="kimi-k2.6") + * can be omitted; the resolver falls back to wire_id when no mapping exists. + */ +const CANONICAL_BY_ROUTE: Record = { + // ── amazon-bedrock ──────────────────────────────────────────────────────── + "amazon-bedrock/amazon.nova-2-lite-v1:0": "nova-2-lite", + "amazon-bedrock/amazon.nova-lite-v1:0": "nova-lite", + "amazon-bedrock/amazon.nova-micro-v1:0": "nova-micro", + "amazon-bedrock/amazon.nova-premier-v1:0": "nova-premier", + "amazon-bedrock/amazon.nova-pro-v1:0": "nova-pro", + "amazon-bedrock/anthropic.claude-3-5-haiku-20241022-v1:0": "claude-3-5-haiku", + "amazon-bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0": + "claude-3-5-sonnet", + "amazon-bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0": + "claude-3-5-sonnet", + "amazon-bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0": + "claude-3-7-sonnet", + "amazon-bedrock/anthropic.claude-3-haiku-20240307-v1:0": "claude-3-haiku", + "amazon-bedrock/anthropic.claude-haiku-4-5-20251001-v1:0": "claude-haiku-4-5", + "amazon-bedrock/anthropic.claude-opus-4-1-20250805-v1:0": "claude-opus-4-1", + "amazon-bedrock/anthropic.claude-opus-4-20250514-v1:0": "claude-opus-4", + "amazon-bedrock/anthropic.claude-opus-4-5-20251101-v1:0": "claude-opus-4-5", + "amazon-bedrock/anthropic.claude-opus-4-6-v1": "claude-opus-4-6", + "amazon-bedrock/anthropic.claude-opus-4-7": "claude-opus-4-7", + "amazon-bedrock/anthropic.claude-sonnet-4-20250514-v1:0": "claude-sonnet-4", + "amazon-bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0": + "claude-sonnet-4-5", + "amazon-bedrock/anthropic.claude-sonnet-4-6": "claude-sonnet-4-6", + "amazon-bedrock/deepseek.r1-v1:0": "deepseek-r1", + "amazon-bedrock/deepseek.v3-v1:0": "deepseek-v3", + "amazon-bedrock/deepseek.v3.2": "deepseek-v3.2", + "amazon-bedrock/eu.anthropic.claude-haiku-4-5-20251001-v1:0": + "claude-haiku-4-5", + "amazon-bedrock/eu.anthropic.claude-opus-4-5-20251101-v1:0": + "claude-opus-4-5", + "amazon-bedrock/eu.anthropic.claude-opus-4-6-v1": "claude-opus-4-6", + "amazon-bedrock/eu.anthropic.claude-opus-4-7": "claude-opus-4-7", + "amazon-bedrock/eu.anthropic.claude-sonnet-4-20250514-v1:0": + "claude-sonnet-4", + "amazon-bedrock/eu.anthropic.claude-sonnet-4-5-20250929-v1:0": + "claude-sonnet-4-5", + "amazon-bedrock/eu.anthropic.claude-sonnet-4-6": "claude-sonnet-4-6", + "amazon-bedrock/global.anthropic.claude-haiku-4-5-20251001-v1:0": + "claude-haiku-4-5", + "amazon-bedrock/global.anthropic.claude-opus-4-5-20251101-v1:0": + "claude-opus-4-5", + "amazon-bedrock/global.anthropic.claude-opus-4-6-v1": "claude-opus-4-6", + "amazon-bedrock/global.anthropic.claude-opus-4-7": "claude-opus-4-7", + "amazon-bedrock/global.anthropic.claude-sonnet-4-20250514-v1:0": + "claude-sonnet-4", + "amazon-bedrock/global.anthropic.claude-sonnet-4-5-20250929-v1:0": + "claude-sonnet-4-5", + "amazon-bedrock/global.anthropic.claude-sonnet-4-6": "claude-sonnet-4-6", + "amazon-bedrock/google.gemma-3-27b-it": "gemma-3-27b-it", + "amazon-bedrock/google.gemma-3-4b-it": "gemma-3-4b-it", + "amazon-bedrock/meta.llama3-1-405b-instruct-v1:0": + "llama3-1-405b-instruct", + "amazon-bedrock/meta.llama3-1-70b-instruct-v1:0": "llama3-1-70b-instruct", + "amazon-bedrock/meta.llama3-1-8b-instruct-v1:0": "llama3-1-8b-instruct", + "amazon-bedrock/meta.llama3-2-11b-instruct-v1:0": "llama3-2-11b-instruct", + "amazon-bedrock/meta.llama3-2-1b-instruct-v1:0": "llama3-2-1b-instruct", + "amazon-bedrock/meta.llama3-2-3b-instruct-v1:0": "llama3-2-3b-instruct", + "amazon-bedrock/meta.llama3-2-90b-instruct-v1:0": "llama3-2-90b-instruct", + "amazon-bedrock/meta.llama3-3-70b-instruct-v1:0": "llama3-3-70b-instruct", + "amazon-bedrock/meta.llama4-maverick-17b-instruct-v1:0": + "llama4-maverick-17b-instruct", + "amazon-bedrock/meta.llama4-scout-17b-instruct-v1:0": + "llama4-scout-17b-instruct", + "amazon-bedrock/minimax.minimax-m2": "minimax-m2", + "amazon-bedrock/minimax.minimax-m2.1": "minimax-m2.1", + "amazon-bedrock/minimax.minimax-m2.5": "minimax-m2.5", + "amazon-bedrock/mistral.devstral-2-123b": "devstral-2512", + "amazon-bedrock/mistral.magistral-small-2509": "magistral-small", + "amazon-bedrock/mistral.ministral-3-14b-instruct": "mistral-small-latest", + "amazon-bedrock/mistral.ministral-3-3b-instruct": "ministral-3b-latest", + "amazon-bedrock/mistral.ministral-3-8b-instruct": "ministral-8b-latest", + "amazon-bedrock/mistral.mistral-large-3-675b-instruct": + "mistral-large-latest", + "amazon-bedrock/mistral.pixtral-large-2502-v1:0": "pixtral-large-latest", + "amazon-bedrock/mistral.voxtral-mini-3b-2507": "voxtral-mini-3b-2507", + "amazon-bedrock/mistral.voxtral-small-24b-2507": "voxtral-small-24b-2507", + "amazon-bedrock/moonshot.kimi-k2-thinking": "kimi-k2-thinking", + "amazon-bedrock/moonshotai.kimi-k2.5": "kimi-k2.5", + "amazon-bedrock/nvidia.nemotron-nano-12b-v2": "nemotron-nano-12b-v2", + "amazon-bedrock/nvidia.nemotron-nano-3-30b": "nemotron-nano-3-30b", + "amazon-bedrock/nvidia.nemotron-nano-9b-v2": "nemotron-nano-9b-v2", + "amazon-bedrock/nvidia.nemotron-super-3-120b": "nemotron-super-3-120b", + "amazon-bedrock/openai.gpt-oss-120b-1:0": "gpt-oss-120b", + "amazon-bedrock/openai.gpt-oss-20b-1:0": "gpt-oss-20b", + "amazon-bedrock/openai.gpt-oss-safeguard-120b": "gpt-oss-safeguard-120b", + "amazon-bedrock/openai.gpt-oss-safeguard-20b": "gpt-oss-safeguard-20b", + "amazon-bedrock/qwen.qwen3-235b-a22b-2507-v1:0": "qwen3-235b-a22b-2507", + "amazon-bedrock/qwen.qwen3-32b-v1:0": "qwen3-32b", + "amazon-bedrock/qwen.qwen3-coder-30b-a3b-v1:0": "qwen3-coder-next", + "amazon-bedrock/qwen.qwen3-coder-480b-a35b-v1:0": "qwen3-coder:480b", + "amazon-bedrock/qwen.qwen3-coder-next": "qwen3-coder-next", + "amazon-bedrock/qwen.qwen3-next-80b-a3b": "qwen3-next:80b", + "amazon-bedrock/qwen.qwen3-vl-235b-a22b": "qwen3-vl-235b-a22b", + "amazon-bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0": + "claude-haiku-4-5", + "amazon-bedrock/us.anthropic.claude-opus-4-1-20250805-v1:0": + "claude-opus-4-1", + "amazon-bedrock/us.anthropic.claude-opus-4-20250514-v1:0": "claude-opus-4", + "amazon-bedrock/us.anthropic.claude-opus-4-5-20251101-v1:0": + "claude-opus-4-5", + "amazon-bedrock/us.anthropic.claude-opus-4-6-v1": "claude-opus-4-6", + "amazon-bedrock/us.anthropic.claude-opus-4-7": "claude-opus-4-7", + "amazon-bedrock/us.anthropic.claude-sonnet-4-20250514-v1:0": + "claude-sonnet-4", + "amazon-bedrock/us.anthropic.claude-sonnet-4-5-20250929-v1:0": + "claude-sonnet-4-5", + "amazon-bedrock/us.anthropic.claude-sonnet-4-6": "claude-sonnet-4-6", + "amazon-bedrock/writer.palmyra-x4-v1:0": "palmyra-x4", + "amazon-bedrock/writer.palmyra-x5-v1:0": "palmyra-x5", + "amazon-bedrock/zai.glm-4.7": "glm-4.7", + "amazon-bedrock/zai.glm-4.7-flash": "glm-4.7-flash", + "amazon-bedrock/zai.glm-5": "glm-5", + // ── anthropic/ provider (versioned wire ids → stable canonical) ─────────── + "anthropic/claude-3-5-haiku-20241022": "claude-3-5-haiku", + "anthropic/claude-3-5-haiku-latest": "claude-3-5-haiku", + "anthropic/claude-3-5-sonnet-20240620": "claude-3-5-sonnet", + "anthropic/claude-3-5-sonnet-20241022": "claude-3-5-sonnet", + "anthropic/claude-3-7-sonnet-20250219": "claude-3-7-sonnet", + "anthropic/claude-3-haiku-20240307": "claude-3-haiku", + "anthropic/claude-3-opus-20240229": "claude-3-opus", + "anthropic/claude-3-sonnet-20240229": "claude-3-sonnet", + "anthropic/claude-haiku-4-5-20251001": "claude-haiku-4-5", + "anthropic/claude-opus-4-0": "claude-opus-4", + "anthropic/claude-opus-4-1-20250805": "claude-opus-4-1", + "anthropic/claude-opus-4-20250514": "claude-opus-4", + "anthropic/claude-opus-4-5-20251101": "claude-opus-4-5", + "anthropic/claude-opus-4-6": "claude-opus-4-6", + "anthropic/claude-sonnet-4-0": "claude-sonnet-4", + "anthropic/claude-sonnet-4-20250514": "claude-sonnet-4", + "anthropic/claude-sonnet-4-5-20250929": "claude-sonnet-4-5", + // ── cerebras ───────────────────────────────────────────────────────────── + "cerebras/zai-glm-4.7": "glm-4.7", + // ── github-copilot (dot-notation → dash) ───────────────────────────────── + "github-copilot/claude-haiku-4.5": "claude-haiku-4-5", + "github-copilot/claude-opus-4.5": "claude-opus-4-5", + "github-copilot/claude-opus-4.6": "claude-opus-4-6", + "github-copilot/claude-opus-4.7": "claude-opus-4-7", + "github-copilot/claude-sonnet-4": "claude-sonnet-4", + "github-copilot/claude-sonnet-4.5": "claude-sonnet-4-5", + "github-copilot/claude-sonnet-4.6": "claude-sonnet-4-6", + // ── groq ───────────────────────────────────────────────────────────────── + "groq/groq/compound": "compound", + "groq/groq/compound-mini": "compound-mini", + "groq/meta-llama/llama-4-maverick-17b-128e-instruct": + "llama-4-maverick-17b-128e-instruct", + "groq/meta-llama/llama-4-scout-17b-16e-instruct": + "llama-4-scout-17b-16e-instruct", + "groq/moonshotai/kimi-k2-instruct": "kimi-k2", + "groq/moonshotai/kimi-k2-instruct-0905": "kimi-k2-0905", + "groq/openai/gpt-oss-120b": "gpt-oss-120b", + "groq/openai/gpt-oss-20b": "gpt-oss-20b", + "groq/openai/gpt-oss-safeguard-20b": "gpt-oss-safeguard-20b", + "groq/qwen/qwen3-32b": "qwen3-32b", + // ── huggingface ─────────────────────────────────────────────────────────── + "huggingface/MiniMaxAI/MiniMax-M2.1": "minimax-m2.1", + "huggingface/MiniMaxAI/MiniMax-M2.5": "minimax-m2.5", + "huggingface/MiniMaxAI/MiniMax-M2.7": "MiniMax-M2.7", + "huggingface/Qwen/Qwen3-235B-A22B-Thinking-2507": "qwen3-235b-a22b-2507", + "huggingface/Qwen/Qwen3-Coder-480B-A35B-Instruct": "qwen3-coder:480b", + "huggingface/Qwen/Qwen3-Coder-Next": "qwen3-coder-next", + "huggingface/Qwen/Qwen3-Next-80B-A3B-Instruct": "qwen3-next:80b", + "huggingface/Qwen/Qwen3-Next-80B-A3B-Thinking": "qwen3-next:80b", + "huggingface/Qwen/Qwen3.5-397B-A17B": "qwen3.5-397b-a17b", + "huggingface/XiaomiMiMo/MiMo-V2-Flash": "mimo-v2-flash", + "huggingface/deepseek-ai/DeepSeek-R1-0528": "deepseek-r1-0528", + "huggingface/deepseek-ai/DeepSeek-V3.2": "deepseek-v3.2", + "huggingface/moonshotai/Kimi-K2-Instruct": "kimi-k2", + "huggingface/moonshotai/Kimi-K2-Instruct-0905": "kimi-k2-0905", + "huggingface/moonshotai/Kimi-K2-Thinking": "kimi-k2-thinking", + "huggingface/moonshotai/Kimi-K2.5": "kimi-k2.5", + "huggingface/zai-org/GLM-4.7": "glm-4.7", + "huggingface/zai-org/GLM-4.7-Flash": "glm-4.7-flash", + "huggingface/zai-org/GLM-5": "glm-5", + "huggingface/zai-org/GLM-5.1": "glm-5.1", + // ── minimax ─────────────────────────────────────────────────────────────── + "minimax/MiniMax-M2": "minimax-m2", + "minimax/MiniMax-M2.1": "minimax-m2.1", + "minimax/MiniMax-M2.5": "minimax-m2.5", + "minimax/MiniMax-M2.5-highspeed": "minimax-m2.5-highspeed", + "minimax/MiniMax-M2.7": "MiniMax-M2.7", + "minimax/MiniMax-M2.7-highspeed": "MiniMax-M2.7-highspeed", + "minimax-cn/MiniMax-M2": "minimax-m2", + "minimax-cn/MiniMax-M2.1": "minimax-m2.1", + "minimax-cn/MiniMax-M2.5": "minimax-m2.5", + "minimax-cn/MiniMax-M2.5-highspeed": "minimax-m2.5-highspeed", + "minimax-cn/MiniMax-M2.7": "MiniMax-M2.7", + "minimax-cn/MiniMax-M2.7-highspeed": "MiniMax-M2.7-highspeed", + // ── kimi-coding ─────────────────────────────────────────────────────────── + // Already canonical wire_ids — included for completeness; resolver falls + // back to wire_id anyway. + "kimi-coding/kimi-k2.6": "kimi-k2.6", + "kimi-coding/kimi-k2-thinking": "kimi-k2-thinking", + // kimi-for-coding is a CUSTOM_MODELS alias for kimi-k2.6 (same name, same + // price, same wire shape) — verified via getModels("kimi-coding"). Collapse + // to kimi-k2.6 so routesFor("kimi-k2.6") sees both routes. + "kimi-coding/kimi-for-coding": "kimi-k2.6", + // opencode wire IDs that need to map through + "opencode/kimi-k2.5": "kimi-k2.5", + "opencode-go/kimi-k2.5": "kimi-k2.5", + // ── openrouter ──────────────────────────────────────────────────────────── + "openrouter/anthropic/claude-3-haiku": "claude-3-haiku", + "openrouter/anthropic/claude-3.5-haiku": "claude-3-5-haiku", + "openrouter/anthropic/claude-3.7-sonnet": "claude-3-7-sonnet", + "openrouter/anthropic/claude-3.7-sonnet:thinking": "claude-3-7-sonnet", + "openrouter/anthropic/claude-haiku-4.5": "claude-haiku-4-5", + "openrouter/anthropic/claude-opus-4": "claude-opus-4", + "openrouter/anthropic/claude-opus-4.1": "claude-opus-4-1", + "openrouter/anthropic/claude-opus-4.5": "claude-opus-4-5", + "openrouter/anthropic/claude-opus-4.6": "claude-opus-4-6", + "openrouter/anthropic/claude-opus-4.6-fast": "claude-opus-4-6", + "openrouter/anthropic/claude-opus-4.7": "claude-opus-4-7", + "openrouter/anthropic/claude-sonnet-4": "claude-sonnet-4", + "openrouter/anthropic/claude-sonnet-4.5": "claude-sonnet-4-5", + "openrouter/anthropic/claude-sonnet-4.6": "claude-sonnet-4-6", + "openrouter/deepseek/deepseek-chat": "deepseek-chat", + "openrouter/deepseek/deepseek-chat-v3-0324": "deepseek-chat", + "openrouter/deepseek/deepseek-chat-v3.1": "deepseek-chat", + "openrouter/deepseek/deepseek-r1": "deepseek-r1", + "openrouter/deepseek/deepseek-r1-0528": "deepseek-r1-0528", + "openrouter/deepseek/deepseek-v3.1-terminus": "deepseek-chat", + "openrouter/deepseek/deepseek-v3.2": "deepseek-v3.2", + "openrouter/deepseek/deepseek-v3.2-exp": "deepseek-v3.2", + "openrouter/google/gemini-2.0-flash-001": "gemini-2.0-flash", + "openrouter/google/gemini-2.0-flash-lite-001": "gemini-2.0-flash", + "openrouter/google/gemini-2.5-flash": "gemini-2.5-flash", + "openrouter/google/gemini-2.5-flash-lite": "gemini-2.5-flash-lite", + "openrouter/google/gemini-2.5-flash-lite-preview-09-2025": + "gemini-2.5-flash-lite", + "openrouter/google/gemini-2.5-pro": "gemini-2.5-pro", + "openrouter/google/gemini-2.5-pro-preview": "gemini-2.5-pro", + "openrouter/google/gemini-2.5-pro-preview-05-06": "gemini-2.5-pro", + "openrouter/google/gemini-3-flash-preview": "gemini-3-flash-preview", + "openrouter/google/gemini-3.1-flash-lite-preview": + "gemini-3.1-flash-lite-preview", + "openrouter/google/gemini-3.1-pro-preview": "gemini-3.1-pro-preview", + "openrouter/google/gemini-3.1-pro-preview-customtools": + "gemini-3.1-pro-preview", + "openrouter/google/gemma-4-26b-a4b-it": "gemma-4-26b-a4b-it", + "openrouter/google/gemma-4-26b-a4b-it:free": "gemma-4-26b-a4b-it", + "openrouter/google/gemma-4-31b-it": "gemma-4-31b-it", + "openrouter/google/gemma-4-31b-it:free": "gemma-4-31b-it", + "openrouter/meta-llama/llama-3-8b-instruct": "llama-3-8b-instruct", + "openrouter/meta-llama/llama-3.1-70b-instruct": "llama-3.1-70b-instruct", + "openrouter/meta-llama/llama-3.1-8b-instruct": "llama-3.1-8b-instruct", + "openrouter/meta-llama/llama-3.3-70b-instruct": "llama-3.3-70b-instruct", + "openrouter/meta-llama/llama-3.3-70b-instruct:free": "llama-3.3-70b-instruct", + "openrouter/meta-llama/llama-4-scout": "llama-4-scout", + "openrouter/minimax/minimax-m1": "minimax-m1", + "openrouter/minimax/minimax-m2": "minimax-m2", + "openrouter/minimax/minimax-m2.1": "minimax-m2.1", + "openrouter/minimax/minimax-m2.5": "minimax-m2.5", + "openrouter/minimax/minimax-m2.5:free": "minimax-m2.5", + "openrouter/minimax/minimax-m2.7": "MiniMax-M2.7", + "openrouter/mistralai/codestral-2508": "codestral-latest", + "openrouter/mistralai/devstral-2512": "devstral-2512", + "openrouter/mistralai/devstral-medium": "devstral-medium-latest", + "openrouter/mistralai/devstral-small": "devstral-small-2507", + "openrouter/mistralai/ministral-14b-2512": "mistral-small-latest", + "openrouter/mistralai/ministral-3b-2512": "ministral-3b-latest", + "openrouter/mistralai/ministral-8b-2512": "ministral-8b-latest", + "openrouter/mistralai/mistral-large": "mistral-large-latest", + "openrouter/mistralai/mistral-large-2407": "mistral-large-latest", + "openrouter/mistralai/mistral-large-2411": "mistral-large-2411", + "openrouter/mistralai/mistral-large-2512": "mistral-large-2512", + "openrouter/mistralai/mistral-medium-3": "mistral-medium-latest", + "openrouter/mistralai/mistral-medium-3.1": "mistral-medium-latest", + "openrouter/mistralai/mistral-nemo": "mistral-nemo", + "openrouter/mistralai/mistral-small-2603": "mistral-small-2603", + "openrouter/mistralai/mistral-small-3.2-24b-instruct": "mistral-small-latest", + "openrouter/mistralai/mistral-small-creative": "mistral-small-latest", + "openrouter/mistralai/mixtral-8x22b-instruct": "open-mixtral-8x22b", + "openrouter/mistralai/mixtral-8x7b-instruct": "open-mixtral-8x7b", + "openrouter/mistralai/pixtral-large-2411": "pixtral-large-latest", + "openrouter/mistralai/voxtral-small-24b-2507": "voxtral-small-24b-2507", + "openrouter/moonshotai/kimi-k2": "kimi-k2", + "openrouter/moonshotai/kimi-k2-0905": "kimi-k2-0905", + "openrouter/moonshotai/kimi-k2-thinking": "kimi-k2-thinking", + "openrouter/moonshotai/kimi-k2.5": "kimi-k2.5", + "openrouter/nvidia/nemotron-3-nano-30b-a3b": "nemotron-3-nano-30b", + "openrouter/nvidia/nemotron-3-nano-30b-a3b:free": "nemotron-3-nano-30b", + "openrouter/nvidia/nemotron-3-super-120b-a12b": "nemotron-3-super", + "openrouter/nvidia/nemotron-3-super-120b-a12b:free": "nemotron-3-super", + "openrouter/nvidia/nemotron-nano-12b-v2-vl:free": "nemotron-nano-12b-v2", + "openrouter/nvidia/nemotron-nano-9b-v2": "nemotron-nano-9b-v2", + "openrouter/nvidia/nemotron-nano-9b-v2:free": "nemotron-nano-9b-v2", + "openrouter/openai/gpt-4": "gpt-4", + "openrouter/openai/gpt-4-turbo": "gpt-4-turbo", + "openrouter/openai/gpt-4o": "gpt-4o", + "openrouter/openai/gpt-4o-mini": "gpt-4o-mini", + "openrouter/openai/gpt-5": "gpt-5", + "openrouter/openai/gpt-5-mini": "gpt-5-mini", + "openrouter/openai/gpt-5-nano": "gpt-5-nano", + "openrouter/openai/gpt-5-pro": "gpt-5-pro", + "openrouter/openai/gpt-5.1": "gpt-5.1", + "openrouter/openai/gpt-5.1-codex": "gpt-5.1-codex", + "openrouter/openai/gpt-5.1-codex-max": "gpt-5.1-codex-max", + "openrouter/openai/gpt-5.1-codex-mini": "gpt-5.1-codex-mini", + "openrouter/openai/gpt-5.2": "gpt-5.2", + "openrouter/openai/gpt-5.2-codex": "gpt-5.2-codex", + "openrouter/openai/gpt-5.3-codex": "gpt-5.3-codex", + "openrouter/openai/gpt-5.4": "gpt-5.4", + "openrouter/openai/gpt-5.4-mini": "gpt-5.4-mini", + "openrouter/openai/gpt-5.4-nano": "gpt-5.4-nano", + "openrouter/openai/gpt-5.4-pro": "gpt-5.4-pro", + "openrouter/openai/gpt-oss-120b": "gpt-oss-120b", + "openrouter/openai/gpt-oss-120b:free": "gpt-oss-120b", + "openrouter/openai/gpt-oss-20b": "gpt-oss-20b", + "openrouter/openai/gpt-oss-20b:free": "gpt-oss-20b", + "openrouter/openai/gpt-oss-safeguard-20b": "gpt-oss-safeguard-20b", + "openrouter/openai/o1": "o1", + "openrouter/openai/o3": "o3", + "openrouter/openai/o4-mini": "o4-mini", + "openrouter/openai/o4-mini-deep-research": "o4-mini-deep-research", + "openrouter/qwen/qwen3-coder": "qwen3-coder:480b", + "openrouter/qwen/qwen3-coder:free": "qwen3-coder:480b", + "openrouter/qwen/qwen3-coder-next": "qwen3-coder-next", + "openrouter/qwen/qwen3-max": "qwen3-max", + "openrouter/qwen/qwen3-next-80b-a3b-instruct": "qwen3-next:80b", + "openrouter/qwen/qwen3-next-80b-a3b-instruct:free": "qwen3-next:80b", + "openrouter/qwen/qwen3-next-80b-a3b-thinking": "qwen3-next:80b", + "openrouter/x-ai/grok-3": "grok-3", + "openrouter/x-ai/grok-3-mini": "grok-3-mini", + "openrouter/x-ai/grok-4": "grok-4", + "openrouter/z-ai/glm-4.5": "glm-4.5", + "openrouter/z-ai/glm-4.5-air": "glm-4.5-air", + "openrouter/z-ai/glm-4.5-air:free": "glm-4.5-air", + "openrouter/z-ai/glm-4.6": "glm-4.6", + "openrouter/z-ai/glm-4.7": "glm-4.7", + "openrouter/z-ai/glm-4.7-flash": "glm-4.7-flash", + "openrouter/z-ai/glm-5": "glm-5", + "openrouter/z-ai/glm-5-turbo": "glm-5-turbo", + "openrouter/z-ai/glm-5.1": "glm-5.1", + "openrouter/z-ai/glm-5v-turbo": "glm-5v-turbo", + "openrouter/xiaomi/mimo-v2-flash": "mimo-v2-flash", + "openrouter/xiaomi/mimo-v2-omni": "mimo-v2-omni", + "openrouter/xiaomi/mimo-v2-pro": "mimo-v2-pro", + // ── vercel-ai-gateway ───────────────────────────────────────────────────── + "vercel-ai-gateway/anthropic/claude-3-haiku": "claude-3-haiku", + "vercel-ai-gateway/anthropic/claude-3.5-haiku": "claude-3-5-haiku", + "vercel-ai-gateway/anthropic/claude-3.7-sonnet": "claude-3-7-sonnet", + "vercel-ai-gateway/anthropic/claude-haiku-4.5": "claude-haiku-4-5", + "vercel-ai-gateway/anthropic/claude-opus-4": "claude-opus-4", + "vercel-ai-gateway/anthropic/claude-opus-4.1": "claude-opus-4-1", + "vercel-ai-gateway/anthropic/claude-opus-4.5": "claude-opus-4-5", + "vercel-ai-gateway/anthropic/claude-opus-4.6": "claude-opus-4-6", + "vercel-ai-gateway/anthropic/claude-opus-4.7": "claude-opus-4-7", + "vercel-ai-gateway/anthropic/claude-sonnet-4": "claude-sonnet-4", + "vercel-ai-gateway/anthropic/claude-sonnet-4.5": "claude-sonnet-4-5", + "vercel-ai-gateway/anthropic/claude-sonnet-4.6": "claude-sonnet-4-6", + "vercel-ai-gateway/deepseek/deepseek-r1": "deepseek-r1", + "vercel-ai-gateway/deepseek/deepseek-v3": "deepseek-chat", + "vercel-ai-gateway/deepseek/deepseek-v3.1": "deepseek-chat", + "vercel-ai-gateway/deepseek/deepseek-v3.1-terminus": "deepseek-chat", + "vercel-ai-gateway/deepseek/deepseek-v3.2": "deepseek-v3.2", + "vercel-ai-gateway/deepseek/deepseek-v3.2-thinking": "deepseek-v3.2", + "vercel-ai-gateway/google/gemini-2.0-flash": "gemini-2.0-flash", + "vercel-ai-gateway/google/gemini-2.0-flash-lite": "gemini-2.0-flash", + "vercel-ai-gateway/google/gemini-2.5-flash": "gemini-2.5-flash", + "vercel-ai-gateway/google/gemini-2.5-flash-lite": "gemini-2.5-flash-lite", + "vercel-ai-gateway/google/gemini-2.5-pro": "gemini-2.5-pro", + "vercel-ai-gateway/google/gemini-3-flash": "gemini-3-flash-preview", + "vercel-ai-gateway/google/gemini-3-pro-preview": "gemini-3-pro-preview", + "vercel-ai-gateway/google/gemini-3.1-flash-lite-preview": + "gemini-3.1-flash-lite-preview", + "vercel-ai-gateway/google/gemini-3.1-pro-preview": "gemini-3.1-pro-preview", + "vercel-ai-gateway/minimax/minimax-m2": "minimax-m2", + "vercel-ai-gateway/minimax/minimax-m2.1": "minimax-m2.1", + "vercel-ai-gateway/minimax/minimax-m2.1-lightning": "minimax-m2.1", + "vercel-ai-gateway/minimax/minimax-m2.5": "minimax-m2.5", + "vercel-ai-gateway/minimax/minimax-m2.5-highspeed": "minimax-m2.5-highspeed", + "vercel-ai-gateway/minimax/minimax-m2.7": "MiniMax-M2.7", + "vercel-ai-gateway/minimax/minimax-m2.7-highspeed": "MiniMax-M2.7-highspeed", + "vercel-ai-gateway/mistral/codestral": "codestral-latest", + "vercel-ai-gateway/mistral/devstral-2": "devstral-2512", + "vercel-ai-gateway/mistral/devstral-small": "devstral-small-2507", + "vercel-ai-gateway/mistral/devstral-small-2": "devstral-small-2507", + "vercel-ai-gateway/mistral/ministral-3b": "ministral-3b-latest", + "vercel-ai-gateway/mistral/ministral-8b": "ministral-8b-latest", + "vercel-ai-gateway/mistral/mistral-medium": "mistral-medium-latest", + "vercel-ai-gateway/mistral/mistral-small": "mistral-small-latest", + "vercel-ai-gateway/moonshotai/kimi-k2": "kimi-k2", + "vercel-ai-gateway/moonshotai/kimi-k2-0905": "kimi-k2-0905", + "vercel-ai-gateway/moonshotai/kimi-k2-thinking": "kimi-k2-thinking", + "vercel-ai-gateway/moonshotai/kimi-k2-thinking-turbo": "kimi-k2-thinking-turbo", + "vercel-ai-gateway/moonshotai/kimi-k2-turbo": "kimi-k2-turbo", + "vercel-ai-gateway/moonshotai/kimi-k2.5": "kimi-k2.5", + "vercel-ai-gateway/openai/gpt-4-turbo": "gpt-4-turbo", + "vercel-ai-gateway/openai/gpt-4o": "gpt-4o", + "vercel-ai-gateway/openai/gpt-4o-mini": "gpt-4o-mini", + "vercel-ai-gateway/openai/gpt-5": "gpt-5", + "vercel-ai-gateway/openai/gpt-5-mini": "gpt-5-mini", + "vercel-ai-gateway/openai/gpt-5-nano": "gpt-5-nano", + "vercel-ai-gateway/openai/gpt-5-pro": "gpt-5-pro", + "vercel-ai-gateway/openai/gpt-5.1-codex": "gpt-5.1-codex", + "vercel-ai-gateway/openai/gpt-5.1-codex-max": "gpt-5.1-codex-max", + "vercel-ai-gateway/openai/gpt-5.1-codex-mini": "gpt-5.1-codex-mini", + "vercel-ai-gateway/openai/gpt-5.2": "gpt-5.2", + "vercel-ai-gateway/openai/gpt-5.2-codex": "gpt-5.2-codex", + "vercel-ai-gateway/openai/gpt-5.3-codex": "gpt-5.3-codex", + "vercel-ai-gateway/openai/gpt-5.4": "gpt-5.4", + "vercel-ai-gateway/openai/gpt-5.4-mini": "gpt-5.4-mini", + "vercel-ai-gateway/openai/gpt-5.4-nano": "gpt-5.4-nano", + "vercel-ai-gateway/openai/gpt-5.4-pro": "gpt-5.4-pro", + "vercel-ai-gateway/openai/o1": "o1", + "vercel-ai-gateway/openai/o3": "o3", + "vercel-ai-gateway/openai/o4-mini": "o4-mini", + "vercel-ai-gateway/xai/grok-3": "grok-3", + "vercel-ai-gateway/xai/grok-3-mini": "grok-3-mini", + "vercel-ai-gateway/xai/grok-4": "grok-4", + "vercel-ai-gateway/zai/glm-4.5": "glm-4.5", + "vercel-ai-gateway/zai/glm-4.5-air": "glm-4.5-air", + "vercel-ai-gateway/zai/glm-4.6": "glm-4.6", + "vercel-ai-gateway/zai/glm-4.7": "glm-4.7", + "vercel-ai-gateway/zai/glm-4.7-flash": "glm-4.7-flash", + "vercel-ai-gateway/zai/glm-5": "glm-5", + "vercel-ai-gateway/zai/glm-5-turbo": "glm-5-turbo", + "vercel-ai-gateway/zai/glm-5.1": "glm-5.1", + "vercel-ai-gateway/zai/glm-5v-turbo": "glm-5v-turbo", + "vercel-ai-gateway/xiaomi/mimo-v2-flash": "mimo-v2-flash", + "vercel-ai-gateway/xiaomi/mimo-v2-pro": "mimo-v2-pro", + // ── zai ────────────────────────────────────────────────────────────────── + // zai models already have clean IDs (glm-5.1, etc.) — no mapping needed +}; + +/** + * Canonical id → generation tag. + * Same-generation routes are eligible for direct failover (no downgrade signal). + */ +const GENERATION: Record = { + // ── Kimi K2 family ──────────────────────────────────────────────────────── + "kimi-k2": "k2", + "kimi-k2-0905": "k2", // same generation, post-release patch + "kimi-k2-instruct": "k2", + "kimi-k2-turbo": "k2", + "kimi-k2.5": "k2.5", + "kimi-k2.6": "k2.6", + "kimi-k2-thinking": "k2-thinking", + "kimi-k2-thinking-turbo": "k2-thinking", + "kimi-for-coding": "k2.6", // wire alias for kimi-k2.6 + // ── Claude 3.x ─────────────────────────────────────────────────────────── + "claude-3-haiku": "haiku-3", + "claude-3-sonnet": "sonnet-3", + "claude-3-opus": "opus-3", + "claude-3-5-haiku": "haiku-3.5", + "claude-3-5-sonnet": "sonnet-3.5", + "claude-3-7-sonnet": "sonnet-3.7", + // ── Claude 4.x ─────────────────────────────────────────────────────────── + "claude-haiku-4-5": "haiku-4", + "claude-haiku-4.5": "haiku-4", // dot-notation variant (github-copilot) + "claude-sonnet-4": "sonnet-4", + "claude-sonnet-4-5": "sonnet-4", + "claude-sonnet-4-6": "sonnet-4", + "claude-sonnet-4.5": "sonnet-4", + "claude-sonnet-4.6": "sonnet-4", + "claude-opus-4": "opus-4", + "claude-opus-4-1": "opus-4", + "claude-opus-4-5": "opus-4", + "claude-opus-4-6": "opus-4", + "claude-opus-4-7": "opus-4", + "claude-opus-4.5": "opus-4", + "claude-opus-4.6": "opus-4", + "claude-opus-4.7": "opus-4", + // ── Gemini ──────────────────────────────────────────────────────────────── + "gemini-2.0-flash": "2", + "gemini-2.5-flash": "2.5", + "gemini-2.5-flash-lite": "2.5", + "gemini-2.5-pro": "2.5", + "gemini-3-flash-preview": "3", + "gemini-3-pro-preview": "3", + "gemini-3.1-flash-lite-preview": "3.1", + "gemini-3.1-pro-preview": "3.1", + // ── GPT / OpenAI ───────────────────────────────────────────────────────── + "gpt-4": "4", + "gpt-4-turbo": "4", + "gpt-4o": "4o", + "gpt-4o-mini": "4o", + "gpt-4.1": "4.1", + "gpt-4.1-mini": "4.1", + "gpt-4.1-nano": "4.1", + "gpt-5": "5", + "gpt-5-mini": "5", + "gpt-5-nano": "5", + "gpt-5-pro": "5", + "gpt-5.1": "5.1", + "gpt-5.1-codex": "5.1", + "gpt-5.1-codex-max": "5.1", + "gpt-5.1-codex-mini": "5.1", + "gpt-5.2": "5.2", + "gpt-5.2-codex": "5.2", + "gpt-5.3-codex": "5.3", + "gpt-5.4": "5.4", + "gpt-5.4-mini": "5.4", + "gpt-5.4-nano": "5.4", + "gpt-5.4-pro": "5.4", + "gpt-5.3-codex-spark": "5.3", + "gpt-5-mini-latest": "5", + o1: "o1", + o3: "o3", + "o4-mini": "o4", + "o4-mini-deep-research": "o4", + // ── DeepSeek ───────────────────────────────────────────────────────────── + "deepseek-chat": "v3", + "deepseek-v3.2": "v3.2", + "deepseek-r1": "r1", + "deepseek-r1-0528": "r1", + // ── MiniMax ─────────────────────────────────────────────────────────────── + "minimax-m2": "m2", + "minimax-m2.1": "m2.1", + "minimax-m2.5": "m2.5", + "minimax-m2.5-highspeed": "m2.5", + "MiniMax-M2.7": "m2.7", + "MiniMax-M2.7-highspeed": "m2.7", + "minimax-m1": "m1", + // ── GLM (ZAI) ───────────────────────────────────────────────────────────── + "glm-4.5": "glm-4.5", + "glm-4.5-air": "glm-4.5", + "glm-4.6": "glm-4.6", + "glm-4.7": "glm-4.7", + "glm-4.7-flash": "glm-4.7", + "glm-4.7-flashx": "glm-4.7", + "glm-5": "glm-5", + "glm-5-turbo": "glm-5", + "glm-5.1": "glm-5.1", + "glm-5v-turbo": "glm-5", + // ── Mistral / Devstral ─────────────────────────────────────────────────── + "codestral-latest": "codestral", + "devstral-2512": "devstral-2", + "devstral-medium-latest": "devstral-medium", + "devstral-medium-2507": "devstral-medium", + "devstral-small-2507": "devstral-small", + "devstral-small-2505": "devstral-small", + "labs-devstral-small-2512": "devstral-small", + "magistral-small": "magistral-small", + "mistral-large-2411": "mistral-large", + "mistral-large-2512": "mistral-large", + "mistral-large-latest": "mistral-large", + "mistral-medium-latest": "mistral-medium", + "mistral-medium-2505": "mistral-medium", + "mistral-medium-2508": "mistral-medium", + "mistral-nemo": "mistral-nemo", + "mistral-small-latest": "mistral-small", + "mistral-small-2506": "mistral-small", + "mistral-small-2603": "mistral-small", + "ministral-3b-latest": "ministral-3b", + "ministral-8b-latest": "ministral-8b", + "pixtral-large-latest": "pixtral-large", + "pixtral-12b": "pixtral-12b", + // ── Qwen ───────────────────────────────────────────────────────────────── + "qwen3-coder:480b": "qwen3-coder", + "qwen3-coder-next": "qwen3-coder", + "qwen3-next:80b": "qwen3-next", + // ── XAI (Grok) ─────────────────────────────────────────────────────────── + "grok-3": "grok-3", + "grok-3-mini": "grok-3", + "grok-4": "grok-4", + // ── MiMo (Xiaomi) ──────────────────────────────────────────────────────── + "mimo-v2-flash": "mimo-v2", + "mimo-v2-omni": "mimo-v2", + "mimo-v2-pro": "mimo-v2", +}; + +/** + * Canonical id → capability tier. + * Lifted from MODEL_CAPABILITY_TIER in model-router.js. + * CRITICAL: kimi-k2.5 is its own tier entry (NOT aliased to kimi-k2.6). + */ +const TIER: Record = { + // ── Light ───────────────────────────────────────────────────────────────── + "claude-haiku-4-5": "light", + "claude-3-5-haiku": "light", + "claude-3-haiku": "light", + "gpt-4o-mini": "light", + "gpt-4.1-mini": "light", + "gpt-4.1-nano": "light", + "gpt-5-mini": "light", + "gpt-5-nano": "light", + "gpt-5.1-codex-mini": "light", + "gpt-5.3-codex-spark": "light", + "gemini-2.0-flash": "light", + "gemini-2.5-flash-lite": "light", + "gemini-3.1-flash-lite-preview": "light", + "glm-4.7-flash": "light", + "glm-4.7-flashx": "light", + "ministral-3b-latest": "light", + "ministral-8b-latest": "light", + "devstral-small-2505": "light", + "devstral-small-2507": "light", + "labs-devstral-small-2512": "light", + // ── Standard ────────────────────────────────────────────────────────────── + "claude-sonnet-4-6": "standard", + "claude-sonnet-4-5": "standard", + "claude-sonnet-4": "standard", + "claude-3-5-sonnet": "standard", + "gpt-4o": "standard", + "gpt-4.1": "standard", + "gpt-5.1-codex-max": "standard", + "gpt-5.4-mini": "standard", + "gemini-2.5-pro": "standard", + "gemini-3-flash-preview": "standard", + "gemini-2.5-flash": "standard", + "deepseek-chat": "standard", + "glm-4.7": "standard", + "qwen3-coder:480b": "standard", + "qwen3-coder-next": "standard", + // kimi-k2.5 is standard — its own entry (NOT aliased to kimi-k2.6) + "kimi-k2.5": "standard", + "kimi-k2.6": "standard", + "kimi-for-coding": "standard", + "MiniMax-M2.7": "standard", + "MiniMax-M2.7-highspeed": "standard", + "codestral-latest": "standard", + "devstral-2512": "standard", + "devstral-medium-2507": "standard", + "devstral-medium-latest": "standard", + "magistral-small": "standard", + "mistral-medium-2505": "standard", + "mistral-medium-2508": "standard", + "mistral-medium-latest": "standard", + "mistral-nemo": "standard", + "mistral-small-2506": "standard", + "mistral-small-2603": "standard", + "mistral-small-latest": "standard", + "pixtral-12b": "standard", + // ── Heavy ───────────────────────────────────────────────────────────────── + "claude-opus-4-6": "heavy", + "claude-opus-4-7": "heavy", + "claude-opus-4-5": "heavy", + "claude-3-opus": "heavy", + "gpt-4-turbo": "heavy", + "gpt-5": "heavy", + "gpt-5-pro": "heavy", + "gpt-5.1": "heavy", + "gpt-5.2": "heavy", + "gpt-5.2-codex": "heavy", + "gpt-5.3-codex": "heavy", + "gpt-5.4": "heavy", + "gpt-5.4-pro": "heavy", + "gpt-5.5": "heavy", + o1: "heavy", + o3: "heavy", + "o4-mini": "heavy", + "o4-mini-deep-research": "heavy", + "gemini-3.1-pro-preview": "heavy", + "gemini-3-pro-preview": "heavy", + "kimi-k2-thinking": "heavy", + "kimi-k2-thinking-turbo": "heavy", + "qwen3-next:80b": "heavy", + "glm-5": "heavy", + "glm-5-turbo": "heavy", + "glm-5.1": "heavy", + "glm-5v-turbo": "heavy", + "magistral-medium-latest": "heavy", + "mistral-large-2411": "heavy", + "mistral-large-2512": "heavy", + "mistral-large-latest": "heavy", + "open-mixtral-8x22b": "heavy", + "pixtral-large-latest": "heavy", +}; + +// ─── Module-level index built at startup ───────────────────────────────────── + +/** Flattened upstream catalog: routeKey → upstream ModelEntry */ +const _ENTRY_BY_ROUTE = new Map< + RouteKey, + { + id: string; + name: string; + api: string; + provider: string; + baseUrl: string; + reasoning?: boolean; + input?: string[]; + capabilities?: Record; + cost?: { input?: number; output?: number; cacheRead?: number; cacheWrite?: number }; + contextWindow?: number; + maxTokens?: number; + } +>(); + +/** routeKey → resolved ResolvedModel (lazily populated cache) */ +const _RESOLVED_CACHE = new Map(); + +/** canonical id → set of route keys */ +const _ROUTES_BY_CANONICAL = new Map(); + +// Build the indexes once at module load. `getModels(provider)` returns an +// Array (after generated + CUSTOM_MODELS merge); we key by +// `${provider}/${entry.id}`. +(function buildIndex() { + type UpstreamEntry = { + id: string; + name: string; + api: string; + provider: string; + baseUrl: string; + reasoning?: boolean; + input?: string[]; + capabilities?: Record; + cost?: { input?: number; output?: number; cacheRead?: number; cacheWrite?: number }; + contextWindow?: number; + maxTokens?: number; + }; + for (const provider of getProviders()) { + const entries = getModels(provider) as unknown as UpstreamEntry[]; + if (!entries) continue; + for (const entry of entries) { + const wireId = entry.id; + const routeKey = `${provider}/${wireId}` as RouteKey; + _ENTRY_BY_ROUTE.set(routeKey, entry); + + // Determine canonical id + const canonical = CANONICAL_BY_ROUTE[routeKey] ?? wireId; + + // Build reverse index + const routes = _ROUTES_BY_CANONICAL.get(canonical) ?? []; + routes.push(routeKey); + _ROUTES_BY_CANONICAL.set(canonical, routes); + } + } +})(); + +// ─── Resolution helpers ─────────────────────────────────────────────────────── + +function resolveEntry( + routeKey: RouteKey, + entry: ReturnType, +): ResolvedModel | null { + if (!entry) return null; + const canonical = CANONICAL_BY_ROUTE[routeKey] ?? entry.id; + const generation = GENERATION[canonical] ?? canonical; + const tier: CapabilityTier = TIER[canonical] ?? "standard"; + return { + canonical_id: canonical, + generation, + tier, + wire_id: entry.id, + provider: entry.provider, + api: entry.api, + baseUrl: entry.baseUrl, + capabilities: entry.capabilities, + cost: entry.cost, + contextWindow: entry.contextWindow, + maxTokens: entry.maxTokens, + reasoning: entry.reasoning, + inputModalities: entry.input, + }; +} + +// ─── Public API ─────────────────────────────────────────────────────────────── + +/** Look up a (provider, wire_id) pair. Returns null if not in upstream. */ +export function lookup( + provider: string, + wireId: string, +): ResolvedModel | null { + const routeKey = `${provider}/${wireId}` as RouteKey; + return lookupRoute(routeKey); +} + +/** Same, parsed from a fused route key. */ +export function lookupRoute(routeKey: RouteKey): ResolvedModel | null { + if (_RESOLVED_CACHE.has(routeKey)) { + return _RESOLVED_CACHE.get(routeKey) ?? null; + } + const entry = _ENTRY_BY_ROUTE.get(routeKey); + const resolved = entry ? resolveEntry(routeKey, entry) : null; + _RESOLVED_CACHE.set(routeKey, resolved); + return resolved; +} + +/** All routes (across all providers) that resolve to this canonical id. */ +export function routesFor(canonicalId: CanonicalId): ResolvedModel[] { + const routeKeys = _ROUTES_BY_CANONICAL.get(canonicalId) ?? []; + return routeKeys + .map((rk) => lookupRoute(rk)) + .filter((r): r is ResolvedModel => r !== null); +} + +/** Map a route key to a canonical id, or null if unmappable. */ +export function canonicalIdFor( + routeKey: RouteKey, +): CanonicalId | null { + const entry = _ENTRY_BY_ROUTE.get(routeKey); + if (!entry) return null; + return CANONICAL_BY_ROUTE[routeKey] ?? entry.id; +} + +/** Capability tier of a canonical id. */ +export function tierFor(canonicalId: CanonicalId): CapabilityTier | null { + return TIER[canonicalId] ?? null; +} + +/** Generation of a canonical id (e.g. "k2.5"). */ +export function generationFor(canonicalId: CanonicalId): string | null { + return GENERATION[canonicalId] ?? null; +} + +/** Two canonical ids share a generation (failover may cross). */ +export function sameGeneration(a: CanonicalId, b: CanonicalId): boolean { + const ga = GENERATION[a]; + const gb = GENERATION[b]; + if (ga === undefined || gb === undefined) return false; + return ga === gb; +} + +/** Iterate every canonical id known to SF. */ +export function allCanonicalIds(): CanonicalId[] { + return Array.from(_ROUTES_BY_CANONICAL.keys()); +} + +/** Build a route key from a resolved model (for metrics aggregation). */ +export function routeKeyOf(m: { + provider: string; + wire_id: string; +}): RouteKey { + return `${m.provider}/${m.wire_id}` as RouteKey; +} diff --git a/src/resources/extensions/sf/model-router.js b/src/resources/extensions/sf/model-router.js index 060f122d4..980a18d03 100644 --- a/src/resources/extensions/sf/model-router.js +++ b/src/resources/extensions/sf/model-router.js @@ -5,9 +5,13 @@ import { getProviderCapabilities } from "@singularity-forge/ai"; import { getToolCompatibility } from "@singularity-forge/coding-agent"; import { tierOrdinal } from "./complexity-classifier.js"; import { lookupModelCost } from "./model-cost-table.js"; +import { tierFor } from "./model-registry.js"; // ─── Known Model Tiers ─────────────────────────────────────────────────────── -// Maps known model IDs to their capability tier. Used when tier_models is not -// explicitly configured to pick the best available model for each tier. +// MIGRATED: this table moved to model-registry.ts as the TIER map. Kept here +// as a deprecated re-export shim so external callers that import +// MODEL_CAPABILITY_TIER directly don't break during the transition. Swarm C +// will remove this shim once metrics + remaining consumers are migrated. +/** @deprecated Use tierFor() from model-registry.js instead. */ export const MODEL_CAPABILITY_TIER = { // Light-tier models (cheapest) "claude-haiku-4-5": "light", @@ -961,7 +965,10 @@ const MODEL_CAPABILITY_ALIASES = { "gpt-oss:120b": "gpt-4o", "gpt-oss:20b": "gpt-4o-mini", "kimi-k2:1t": "kimi-k2.6", - "kimi-k2.5": "kimi-k2.6", + // NOTE: "kimi-k2.5" → "kimi-k2.6" alias REMOVED. K2.5 is a distinct + // generation from K2.6; aliasing them caused K2.5 to silently inherit + // K2.6's tier (latent downgrade-conflation bug). K2.5 now resolves via + // tierFor("kimi-k2.5") in the registry directly. "kimi-for-coding": "kimi-k2.6", "kimi-k2.6:cloud": "kimi-k2.6", "kimi-k2.6-cloud": "kimi-k2.6", @@ -1384,8 +1391,12 @@ export function defaultRoutingConfig() { } // ─── Internal ──────────────────────────────────────────────────────────────── export function getModelTier(modelId) { + // Prefer registry lookup using canonical id — this eliminates the K2.5→K2.6 + // downgrade alias that was in MODEL_CAPABILITY_ALIASES. const canonicalId = canonicalCapabilityModelId(modelId); - // Check exact match first + const registryTier = tierFor(canonicalId); + if (registryTier) return registryTier; + // Fall back to the local table for models not yet in the registry. if (MODEL_CAPABILITY_TIER[canonicalId]) return MODEL_CAPABILITY_TIER[canonicalId]; const sizeTier = inferTierFromModelSize(canonicalId); diff --git a/src/resources/extensions/sf/ui/usage-bar.js b/src/resources/extensions/sf/ui/usage-bar.js index 7862c15b7..29292ff3a 100644 --- a/src/resources/extensions/sf/ui/usage-bar.js +++ b/src/resources/extensions/sf/ui/usage-bar.js @@ -11,13 +11,7 @@ import { execSync, spawnSync } from "node:child_process"; import * as fs from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; -import { - AuthType, - CodeAssistServer, - getOauthClient, - makeFakeConfig, - setupUser, -} from "@google/gemini-cli-core"; +import { snapshotGeminiCliAccount } from "@singularity-forge/google-gemini-cli-provider"; import { visibleWidth } from "@singularity-forge/tui"; import { sfHome } from "../sf-home.js"; @@ -203,6 +197,8 @@ async function fetchClaudeUsage() { // Gemini Usage // ============================================================================ async function fetchGeminiUsage(_modelRegistry) { + // Existence check is a fast pre-flight so we surface a friendly "not logged + // in" message without paying for the OAuth bootstrap inside snapshotGeminiCliAccount. const credPath = path.join(os.homedir(), ".gemini", "oauth_creds.json"); if (!fs.existsSync(credPath)) { return { @@ -213,47 +209,26 @@ async function fetchGeminiUsage(_modelRegistry) { }; } try { - const config = makeFakeConfig(); - const authClient = await getOauthClient(AuthType.LOGIN_WITH_GOOGLE, config); - const userData = await setupUser(authClient, config); - const projectId = userData.projectId; - if (!projectId) { + const snapshot = await snapshotGeminiCliAccount(); + if (!snapshot) { return { provider: "gemini", displayName: "Gemini", windows: [], - error: "No Code Assist project", + error: "No Code Assist project or empty quota response", }; } - const server = new CodeAssistServer(authClient, projectId, { headers: {} }); - const data = await server.retrieveUserQuota({ - project: projectId, - }); - const quotas = {}; - for (const bucket of data.buckets || []) { - const model = bucket.modelId || "unknown"; - const frac = bucket.remainingFraction ?? 1; - if (!quotas[model] || frac < quotas[model].remainingFraction) { - quotas[model] = { - remainingFraction: frac, - resetTime: bucket.resetTime, - }; - } - } - const windows = []; - for (const [model, quota] of Object.entries(quotas).sort(([a], [b]) => - a.localeCompare(b), - )) { - const resetDate = quota.resetTime ? new Date(quota.resetTime) : undefined; - windows.push({ - label: model.replace(/^gemini-/, "").slice(0, 7), - usedPercent: (1 - quota.remainingFraction) * 100, + const windows = snapshot.models.map((m) => { + const resetDate = m.resetTime ? new Date(m.resetTime) : undefined; + return { + label: m.modelId.replace(/^gemini-/, "").slice(0, 7), + usedPercent: m.usedFraction * 100, resetDescription: resetDate && !Number.isNaN(resetDate.getTime()) ? formatReset(resetDate) : undefined, - }); - } + }; + }); return { provider: "gemini", displayName: "Gemini", windows }; } catch (e) { return {