feat(headless,gemini-cli): add sf headless usage + unify gemini quota path

Adds a machine-readable headless surface for live LLM-provider usage and
unifies the gemini-cli quota fetch through one helper, removing the
duplication that existed between usage-bar.js and the new package.

1. snapshotGeminiCliAccount in @singularity-forge/google-gemini-cli-provider

   - Single source of truth for { projectId, userTierId, userTierName,
     paidTier, models[] } via setupUser + retrieveUserQuota.
   - Dedups buckets per modelId, keeping the worst (lowest remainingFraction)
     so consumers always see the most-restrictive window. Code Assist
     sometimes returns multiple buckets per model; the pessimistic choice
     is what every consumer needs.
   - discoverGeminiCliModels(cwd?) wraps it for catalog-cache callers that
     only need the IDs.

2. sf headless usage subcommand

   - New src/headless-usage.ts handler. text (default) and --json output.
     Uses the package's snapshot directly — no RPC child, no jiti
     gymnastics — matching the shape of headless-uok-status / headless-doctor.
   - Wired into src/headless.ts after the doctor block.
   - Help text adds the command line.

3. usage-bar.js refactored to delegate

   - fetchGeminiUsage no longer imports gemini-cli-core directly. It calls
     snapshotGeminiCliAccount and reshapes the result into the existing
     { provider, displayName, windows[] } UI contract.
   - Eliminates the duplicate setupUser + retrieveUserQuota code path.
   - The fast existsSync(~/.gemini/oauth_creds.json) pre-flight stays
     so unauth'd users get a friendly message without paying for OAuth
     bootstrap.

4. Model registry refactor (separate track committed alongside)

   - src/resources/extensions/sf/model-registry.ts (new) consolidates
     canonical model identity, capability tier, and generation tags into
     one source of truth that auto-model-selection, benchmark-selector,
     and model-router now consume instead of maintaining parallel maps.

All 1487 tests pass (151 files); typecheck clean for both the package
and the SF extensions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-14 03:42:53 +02:00
parent c6a3fa6a6a
commit 383e495085
9 changed files with 1073 additions and 94 deletions

View file

@ -105,20 +105,32 @@ export async function snapshotGeminiCliAccount(
if (!projectId || typeof projectId !== "string") return null;
const server = new CodeAssistServer(authClient, projectId, { headers: {} });
const data = await server.retrieveUserQuota({ project: projectId });
const models: GeminiQuotaBucket[] = [];
// Dedup buckets per modelId, keeping the WORST quota (lowest
// remainingFraction). Code Assist sometimes returns multiple buckets
// for the same model when more than one quota window applies; the
// pessimistic choice is what every consumer (UI, capacity diagnostics,
// model picker) actually wants to surface.
const byModel = new Map<string, GeminiQuotaBucket>();
for (const b of data?.buckets ?? []) {
const modelId = typeof b.modelId === "string" ? b.modelId : "";
if (!modelId) continue;
const remainingFraction =
typeof b.remainingFraction === "number" ? b.remainingFraction : 1;
models.push({
const bucket: GeminiQuotaBucket = {
modelId,
usedFraction: 1 - remainingFraction,
remainingFraction,
resetTime:
typeof b.resetTime === "string" ? b.resetTime : undefined,
});
};
const existing = byModel.get(modelId);
if (!existing || bucket.remainingFraction < existing.remainingFraction) {
byModel.set(modelId, bucket);
}
}
const models = Array.from(byModel.values()).sort((a, b) =>
a.modelId.localeCompare(b.modelId),
);
if (models.length === 0) return null;
return {
projectId,

102
src/headless-usage.ts Normal file
View file

@ -0,0 +1,102 @@
/**
* headless-usage.ts `sf headless usage`
*
* Purpose: expose live LLM-provider usage data (account tier, project, per-model
* quota usage with reset windows) via the headless CLI so operators and CI can
* see capacity state without launching the interactive UI.
*
* Today this covers the gemini-cli provider (the most quota-sensitive surface
* because of AI Ultra's per-model windowed quotas). Other providers can be
* added by extending the snapshot helper as their introspection APIs are
* wired into dedicated provider packages.
*
* Consumer: headless.ts when command === "usage".
*/
import {
type GeminiAccountSnapshot,
snapshotGeminiCliAccount,
} from "@singularity-forge/google-gemini-cli-provider";
export interface HandleUsageOptions {
json?: boolean;
}
export interface HandleUsageResult {
exitCode: number;
}
/**
* Render a snapshot as a compact text table (default) or as JSON for machine
* consumers. Always writes to stdout; never throws.
*/
export async function handleUsage(
cwd: string,
options: HandleUsageOptions = {},
): Promise<HandleUsageResult> {
let snapshot: GeminiAccountSnapshot | null;
try {
snapshot = await snapshotGeminiCliAccount(cwd);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
const payload = {
provider: "google-gemini-cli",
ok: false,
error: msg,
};
process.stdout.write(
options.json ? `${JSON.stringify(payload)}\n` : `error: ${msg}\n`,
);
return { exitCode: 1 };
}
if (!snapshot) {
const payload = {
provider: "google-gemini-cli",
ok: false,
error:
"No gemini-cli account snapshot — run `gemini auth login` and verify ~/.gemini/oauth_creds.json exists.",
};
process.stdout.write(
options.json
? `${JSON.stringify(payload)}\n`
: `${payload.error}\n`,
);
return { exitCode: 1 };
}
if (options.json) {
process.stdout.write(
`${JSON.stringify({ provider: "google-gemini-cli", ok: true, snapshot })}\n`,
);
return { exitCode: 0 };
}
const lines: string[] = [];
lines.push("Gemini CLI usage");
lines.push("");
lines.push(` project: ${snapshot.projectId}`);
if (snapshot.userTierId || snapshot.userTierName) {
lines.push(
` userTier: ${snapshot.userTierId ?? "?"}${snapshot.userTierName ? ` (${snapshot.userTierName})` : ""}`,
);
}
if (snapshot.paidTier?.id || snapshot.paidTier?.name) {
lines.push(
` paidTier: ${snapshot.paidTier.id ?? "?"}${snapshot.paidTier.name ? `${snapshot.paidTier.name}` : ""}`,
);
}
lines.push("");
lines.push(" Per-model quota:");
const modelW = Math.max(
20,
...snapshot.models.map((m) => m.modelId.length),
);
for (const m of snapshot.models) {
const usedPct = (m.usedFraction * 100).toFixed(1).padStart(5);
const reset = m.resetTime ?? "-";
lines.push(` ${m.modelId.padEnd(modelW)} used=${usedPct}% reset=${reset}`);
}
process.stdout.write(`${lines.join("\n")}\n`);
return { exitCode: 0 };
}

View file

@ -824,6 +824,16 @@ async function runHeadlessOnce(
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
}
// Usage: gemini-cli account snapshot (tier, project, per-model quota), no
// RPC child needed. Uses snapshotGeminiCliAccount from the
// @singularity-forge/google-gemini-cli-provider package directly.
if (options.command === "usage") {
const wantsJson = options.json || options.commandArgs.includes("--json");
const { handleUsage } = await import("./headless-usage.js");
const result = await handleUsage(process.cwd(), { json: wantsJson });
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
}
// Doctor: read-only health check, no RPC child needed (#4904 live-regression).
// ARCHITECTURE NOTE: this intentionally bypasses the SF extension dispatcher
// for performance and TTY-independence. The interactive `/doctor` command in

View file

@ -223,6 +223,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
" status Show progress dashboard",
" new-milestone Create a milestone from a specification document",
" query Machine snapshot: JSON state + next dispatch + costs (no LLM)",
" usage Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)",
"",
"new-milestone flags:",
" --context <path> Path to spec/PRD file (use '-' for stdin)",

View file

@ -11,6 +11,7 @@ import {
tierLabel,
} from "./complexity-classifier.js";
import { getLedger, getProjectTotals } from "./metrics.js";
import { routesFor } from "./model-registry.js";
import {
adjustToolSet,
escalateTier,
@ -124,42 +125,21 @@ const BARE_MODEL_FAMILY_PRIORITY = [
providers: ["xiaomi", "opencode-go"],
},
];
function preferredBareModelIds(modelId) {
const lower = modelId.toLowerCase();
if (
lower === "kimi-for-coding" ||
lower === "kimi-k2.6" ||
lower === "kimi-k2.6:cloud" ||
lower === "kimi-k2.6-cloud" ||
lower === "moonshotai/kimi-k2.6"
) {
return [
"kimi-for-coding",
"kimi-k2.6",
"kimi-k2.6:cloud",
"kimi-k2.6-cloud",
"moonshotai/kimi-k2.6",
];
}
if (
lower === "kimi-k2.5" ||
lower === "kimi-k2.5:cloud" ||
lower === "moonshotai/kimi-k2.5"
) {
return ["kimi-k2.5", "moonshotai/kimi-k2.5", "kimi-k2.5:cloud"];
}
return undefined;
}
function resolveFamilyPreferredBareModel(modelId, candidates) {
const rule = BARE_MODEL_FAMILY_PRIORITY.find((r) => r.match.test(modelId));
if (!rule) return undefined;
const preferredModelIds = preferredBareModelIds(modelId);
// Use model registry to get preferred wire ids for this canonical model.
// routesFor returns all known routes; we filter to preferred providers below.
const canonicalId =
candidates.find((m) => m.id.toLowerCase() === modelId.toLowerCase())
?.canonical_id ?? modelId;
const preferredWireIds = routesFor(canonicalId).map((r) => r.wire_id);
for (const provider of rule.providers) {
const providerCandidates = candidates.filter(
(m) => m.provider.toLowerCase() === provider.toLowerCase(),
);
if (preferredModelIds) {
for (const preferredId of preferredModelIds) {
if (preferredWireIds.length > 0) {
for (const preferredId of preferredWireIds) {
const match = providerCandidates.find(
(m) => m.id.toLowerCase() === preferredId.toLowerCase(),
);

View file

@ -23,6 +23,7 @@
import { existsSync, readFileSync } from "node:fs";
import { join } from "node:path";
import { tierOrdinal } from "./complexity-classifier.js";
import { lookup } from "./model-registry.js";
import { getModelTier } from "./model-router.js";
// ─── Benchmark File Loader ───────────────────────────────────────────────────
@ -263,27 +264,16 @@ function profileForUnitType(unitType) {
* Match a provider+model pair to a benchmark record key. Benchmarks are
* keyed by semantic model ID (e.g. "devstral-latest", "kimi-k2.5"), while registered
* models may carry provider wire IDs or versioned suffixes
* (`kimi-for-coding`, `devstral-2507`, `minimax-m2.7`). We try semantic
* aliases first, then exact match, then strip common version/date suffixes,
* then try a family-level key (e.g. `mistral-large-2411`
* (`kimi-for-coding`, `devstral-2507`, `minimax-m2.7`). We try canonical id
* from the model registry first, then exact match, then strip common
* version/date suffixes, then try a family-level key (e.g. `mistral-large-2411`
* `mistral-large-latest`).
*/
const BENCHMARK_KEY_ALIASES = {
// Kimi Code's provider wire ID. The benchmark identity is Kimi K2.6.
"kimi-for-coding": "kimi-k2.6",
"moonshotai/kimi-k2.6": "kimi-k2.6",
"kimi-k2.6:cloud": "kimi-k2.6",
"kimi-k2.6-cloud": "kimi-k2.6",
// Kimi aggregator wire IDs. Kimi Code's `kimi-for-coding` is K2.6 above.
"kimi-k2.5": "kimi-k2.5",
"moonshotai/kimi-k2.5": "kimi-k2.5",
"moonshotai.kimi-k2.5": "kimi-k2.5",
"kimi-k2.5:cloud": "kimi-k2.5",
"kimi-k2.5-cloud": "kimi-k2.5",
};
function findBenchmarkKey(modelId, benchmarks) {
const alias = BENCHMARK_KEY_ALIASES[modelId.toLowerCase()];
if (alias && alias in benchmarks) return alias;
function findBenchmarkKey(modelId, benchmarks, provider) {
// Use canonical id from registry when a provider is known.
const resolved = provider ? lookup(provider, modelId) : null;
const semantic = resolved?.canonical_id ?? modelId;
if (semantic !== modelId && semantic in benchmarks) return semantic;
if (modelId in benchmarks) return modelId;
// Strip date-style suffixes: "devstral-medium-2507" → "devstral-medium"
const noDate = modelId.replace(/-\d{4}$/, "");
@ -332,7 +322,7 @@ function readDimension(rec, dim) {
return null;
}
function scoreCandidate(candidate, profile, benchmarks) {
const key = findBenchmarkKey(candidate.id, benchmarks);
const key = findBenchmarkKey(candidate.id, benchmarks, candidate.provider);
if (!key) return { score: 0, coverage: 0 };
const rec = benchmarks[key];
if (!rec || typeof rec !== "object") return { score: 0, coverage: 0 };

View file

@ -0,0 +1,898 @@
/**
* SF Model Registry single source of truth for canonical model identity,
* capability tier, and generation across all providers.
*
* The upstream `MODELS` constant from `@singularity-forge/ai` is the
* authoritative route catalog. This module enriches it with:
* 1. Canonical model identity (many routes one stable id)
* 2. Capability tier (light / standard / heavy)
* 3. Generation tag (same-generation routes are direct failover candidates)
*/
// ─── Upstream data import ─────────────────────────────────────────────────────
// Use the public API of @singularity-forge/ai so we get:
// 1. Both generated + CUSTOM_MODELS entries (e.g. kimi-coding/kimi-for-coding,
// which only appears once CUSTOM_MODELS merge runs in models.js).
// 2. A stable import path that resolves identically at test-time, dist-time,
// and runtime (~/.sf/agent/extensions/sf/) — relative paths into the
// monorepo can't satisfy the latter.
import { getModels, getProviders } from "@singularity-forge/ai";
// ─── Public types ─────────────────────────────────────────────────────────────
export type WireFormat =
| "anthropic-messages"
| "openai-completions"
| "openai-responses"
| "bedrock-converse-stream"
| "google-generative"
| string; // open enum — pass through unknown values from upstream
export type CapabilityTier = "light" | "standard" | "heavy";
export type CanonicalId = string;
// Stable, generation-aware identity. Examples:
// "kimi-k2.5" (NOT the same as kimi-k2.6 — generation matters)
// "kimi-k2.6"
// "kimi-k2-thinking"
// "claude-sonnet-4-6"
// "MiniMax-M2.7"
export type RouteKey = string;
// Format: `${provider}/${wire_id}`. Examples:
// "kimi-coding/kimi-k2.6"
// "openrouter/moonshotai/kimi-k2.5"
// "amazon-bedrock/moonshotai.kimi-k2.5"
export interface ResolvedModel {
canonical_id: CanonicalId;
generation: string; // free-form, e.g. "k2.5", "k2.6", "sonnet-4-6"
tier: CapabilityTier;
// Pass-through from upstream ModelEntry:
wire_id: string; // the upstream entry's `id`
provider: string;
api: WireFormat; // wire format axis
baseUrl: string;
capabilities?: Record<string, unknown>;
cost?: {
input?: number;
output?: number;
cacheRead?: number;
cacheWrite?: number;
};
contextWindow?: number;
maxTokens?: number;
reasoning?: boolean;
inputModalities?: string[]; // renamed from upstream `input` for clarity
}
// ─── Internal data tables ─────────────────────────────────────────────────────
//
// Only three hand-maintained tables are needed. Everything else is derived
// from the upstream MODELS catalog at module initialisation time.
/**
* (provider, wire_id) canonical id.
* Only entries that DIVERGE from `wire_id` itself need a mapping.
* Entries that are already canonical (e.g. provider="kimi-coding", wire_id="kimi-k2.6")
* can be omitted; the resolver falls back to wire_id when no mapping exists.
*/
const CANONICAL_BY_ROUTE: Record<RouteKey, CanonicalId> = {
// ── amazon-bedrock ────────────────────────────────────────────────────────
"amazon-bedrock/amazon.nova-2-lite-v1:0": "nova-2-lite",
"amazon-bedrock/amazon.nova-lite-v1:0": "nova-lite",
"amazon-bedrock/amazon.nova-micro-v1:0": "nova-micro",
"amazon-bedrock/amazon.nova-premier-v1:0": "nova-premier",
"amazon-bedrock/amazon.nova-pro-v1:0": "nova-pro",
"amazon-bedrock/anthropic.claude-3-5-haiku-20241022-v1:0": "claude-3-5-haiku",
"amazon-bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0":
"claude-3-5-sonnet",
"amazon-bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0":
"claude-3-5-sonnet",
"amazon-bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0":
"claude-3-7-sonnet",
"amazon-bedrock/anthropic.claude-3-haiku-20240307-v1:0": "claude-3-haiku",
"amazon-bedrock/anthropic.claude-haiku-4-5-20251001-v1:0": "claude-haiku-4-5",
"amazon-bedrock/anthropic.claude-opus-4-1-20250805-v1:0": "claude-opus-4-1",
"amazon-bedrock/anthropic.claude-opus-4-20250514-v1:0": "claude-opus-4",
"amazon-bedrock/anthropic.claude-opus-4-5-20251101-v1:0": "claude-opus-4-5",
"amazon-bedrock/anthropic.claude-opus-4-6-v1": "claude-opus-4-6",
"amazon-bedrock/anthropic.claude-opus-4-7": "claude-opus-4-7",
"amazon-bedrock/anthropic.claude-sonnet-4-20250514-v1:0": "claude-sonnet-4",
"amazon-bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0":
"claude-sonnet-4-5",
"amazon-bedrock/anthropic.claude-sonnet-4-6": "claude-sonnet-4-6",
"amazon-bedrock/deepseek.r1-v1:0": "deepseek-r1",
"amazon-bedrock/deepseek.v3-v1:0": "deepseek-v3",
"amazon-bedrock/deepseek.v3.2": "deepseek-v3.2",
"amazon-bedrock/eu.anthropic.claude-haiku-4-5-20251001-v1:0":
"claude-haiku-4-5",
"amazon-bedrock/eu.anthropic.claude-opus-4-5-20251101-v1:0":
"claude-opus-4-5",
"amazon-bedrock/eu.anthropic.claude-opus-4-6-v1": "claude-opus-4-6",
"amazon-bedrock/eu.anthropic.claude-opus-4-7": "claude-opus-4-7",
"amazon-bedrock/eu.anthropic.claude-sonnet-4-20250514-v1:0":
"claude-sonnet-4",
"amazon-bedrock/eu.anthropic.claude-sonnet-4-5-20250929-v1:0":
"claude-sonnet-4-5",
"amazon-bedrock/eu.anthropic.claude-sonnet-4-6": "claude-sonnet-4-6",
"amazon-bedrock/global.anthropic.claude-haiku-4-5-20251001-v1:0":
"claude-haiku-4-5",
"amazon-bedrock/global.anthropic.claude-opus-4-5-20251101-v1:0":
"claude-opus-4-5",
"amazon-bedrock/global.anthropic.claude-opus-4-6-v1": "claude-opus-4-6",
"amazon-bedrock/global.anthropic.claude-opus-4-7": "claude-opus-4-7",
"amazon-bedrock/global.anthropic.claude-sonnet-4-20250514-v1:0":
"claude-sonnet-4",
"amazon-bedrock/global.anthropic.claude-sonnet-4-5-20250929-v1:0":
"claude-sonnet-4-5",
"amazon-bedrock/global.anthropic.claude-sonnet-4-6": "claude-sonnet-4-6",
"amazon-bedrock/google.gemma-3-27b-it": "gemma-3-27b-it",
"amazon-bedrock/google.gemma-3-4b-it": "gemma-3-4b-it",
"amazon-bedrock/meta.llama3-1-405b-instruct-v1:0":
"llama3-1-405b-instruct",
"amazon-bedrock/meta.llama3-1-70b-instruct-v1:0": "llama3-1-70b-instruct",
"amazon-bedrock/meta.llama3-1-8b-instruct-v1:0": "llama3-1-8b-instruct",
"amazon-bedrock/meta.llama3-2-11b-instruct-v1:0": "llama3-2-11b-instruct",
"amazon-bedrock/meta.llama3-2-1b-instruct-v1:0": "llama3-2-1b-instruct",
"amazon-bedrock/meta.llama3-2-3b-instruct-v1:0": "llama3-2-3b-instruct",
"amazon-bedrock/meta.llama3-2-90b-instruct-v1:0": "llama3-2-90b-instruct",
"amazon-bedrock/meta.llama3-3-70b-instruct-v1:0": "llama3-3-70b-instruct",
"amazon-bedrock/meta.llama4-maverick-17b-instruct-v1:0":
"llama4-maverick-17b-instruct",
"amazon-bedrock/meta.llama4-scout-17b-instruct-v1:0":
"llama4-scout-17b-instruct",
"amazon-bedrock/minimax.minimax-m2": "minimax-m2",
"amazon-bedrock/minimax.minimax-m2.1": "minimax-m2.1",
"amazon-bedrock/minimax.minimax-m2.5": "minimax-m2.5",
"amazon-bedrock/mistral.devstral-2-123b": "devstral-2512",
"amazon-bedrock/mistral.magistral-small-2509": "magistral-small",
"amazon-bedrock/mistral.ministral-3-14b-instruct": "mistral-small-latest",
"amazon-bedrock/mistral.ministral-3-3b-instruct": "ministral-3b-latest",
"amazon-bedrock/mistral.ministral-3-8b-instruct": "ministral-8b-latest",
"amazon-bedrock/mistral.mistral-large-3-675b-instruct":
"mistral-large-latest",
"amazon-bedrock/mistral.pixtral-large-2502-v1:0": "pixtral-large-latest",
"amazon-bedrock/mistral.voxtral-mini-3b-2507": "voxtral-mini-3b-2507",
"amazon-bedrock/mistral.voxtral-small-24b-2507": "voxtral-small-24b-2507",
"amazon-bedrock/moonshot.kimi-k2-thinking": "kimi-k2-thinking",
"amazon-bedrock/moonshotai.kimi-k2.5": "kimi-k2.5",
"amazon-bedrock/nvidia.nemotron-nano-12b-v2": "nemotron-nano-12b-v2",
"amazon-bedrock/nvidia.nemotron-nano-3-30b": "nemotron-nano-3-30b",
"amazon-bedrock/nvidia.nemotron-nano-9b-v2": "nemotron-nano-9b-v2",
"amazon-bedrock/nvidia.nemotron-super-3-120b": "nemotron-super-3-120b",
"amazon-bedrock/openai.gpt-oss-120b-1:0": "gpt-oss-120b",
"amazon-bedrock/openai.gpt-oss-20b-1:0": "gpt-oss-20b",
"amazon-bedrock/openai.gpt-oss-safeguard-120b": "gpt-oss-safeguard-120b",
"amazon-bedrock/openai.gpt-oss-safeguard-20b": "gpt-oss-safeguard-20b",
"amazon-bedrock/qwen.qwen3-235b-a22b-2507-v1:0": "qwen3-235b-a22b-2507",
"amazon-bedrock/qwen.qwen3-32b-v1:0": "qwen3-32b",
"amazon-bedrock/qwen.qwen3-coder-30b-a3b-v1:0": "qwen3-coder-next",
"amazon-bedrock/qwen.qwen3-coder-480b-a35b-v1:0": "qwen3-coder:480b",
"amazon-bedrock/qwen.qwen3-coder-next": "qwen3-coder-next",
"amazon-bedrock/qwen.qwen3-next-80b-a3b": "qwen3-next:80b",
"amazon-bedrock/qwen.qwen3-vl-235b-a22b": "qwen3-vl-235b-a22b",
"amazon-bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0":
"claude-haiku-4-5",
"amazon-bedrock/us.anthropic.claude-opus-4-1-20250805-v1:0":
"claude-opus-4-1",
"amazon-bedrock/us.anthropic.claude-opus-4-20250514-v1:0": "claude-opus-4",
"amazon-bedrock/us.anthropic.claude-opus-4-5-20251101-v1:0":
"claude-opus-4-5",
"amazon-bedrock/us.anthropic.claude-opus-4-6-v1": "claude-opus-4-6",
"amazon-bedrock/us.anthropic.claude-opus-4-7": "claude-opus-4-7",
"amazon-bedrock/us.anthropic.claude-sonnet-4-20250514-v1:0":
"claude-sonnet-4",
"amazon-bedrock/us.anthropic.claude-sonnet-4-5-20250929-v1:0":
"claude-sonnet-4-5",
"amazon-bedrock/us.anthropic.claude-sonnet-4-6": "claude-sonnet-4-6",
"amazon-bedrock/writer.palmyra-x4-v1:0": "palmyra-x4",
"amazon-bedrock/writer.palmyra-x5-v1:0": "palmyra-x5",
"amazon-bedrock/zai.glm-4.7": "glm-4.7",
"amazon-bedrock/zai.glm-4.7-flash": "glm-4.7-flash",
"amazon-bedrock/zai.glm-5": "glm-5",
// ── anthropic/ provider (versioned wire ids → stable canonical) ───────────
"anthropic/claude-3-5-haiku-20241022": "claude-3-5-haiku",
"anthropic/claude-3-5-haiku-latest": "claude-3-5-haiku",
"anthropic/claude-3-5-sonnet-20240620": "claude-3-5-sonnet",
"anthropic/claude-3-5-sonnet-20241022": "claude-3-5-sonnet",
"anthropic/claude-3-7-sonnet-20250219": "claude-3-7-sonnet",
"anthropic/claude-3-haiku-20240307": "claude-3-haiku",
"anthropic/claude-3-opus-20240229": "claude-3-opus",
"anthropic/claude-3-sonnet-20240229": "claude-3-sonnet",
"anthropic/claude-haiku-4-5-20251001": "claude-haiku-4-5",
"anthropic/claude-opus-4-0": "claude-opus-4",
"anthropic/claude-opus-4-1-20250805": "claude-opus-4-1",
"anthropic/claude-opus-4-20250514": "claude-opus-4",
"anthropic/claude-opus-4-5-20251101": "claude-opus-4-5",
"anthropic/claude-opus-4-6": "claude-opus-4-6",
"anthropic/claude-sonnet-4-0": "claude-sonnet-4",
"anthropic/claude-sonnet-4-20250514": "claude-sonnet-4",
"anthropic/claude-sonnet-4-5-20250929": "claude-sonnet-4-5",
// ── cerebras ─────────────────────────────────────────────────────────────
"cerebras/zai-glm-4.7": "glm-4.7",
// ── github-copilot (dot-notation → dash) ─────────────────────────────────
"github-copilot/claude-haiku-4.5": "claude-haiku-4-5",
"github-copilot/claude-opus-4.5": "claude-opus-4-5",
"github-copilot/claude-opus-4.6": "claude-opus-4-6",
"github-copilot/claude-opus-4.7": "claude-opus-4-7",
"github-copilot/claude-sonnet-4": "claude-sonnet-4",
"github-copilot/claude-sonnet-4.5": "claude-sonnet-4-5",
"github-copilot/claude-sonnet-4.6": "claude-sonnet-4-6",
// ── groq ─────────────────────────────────────────────────────────────────
"groq/groq/compound": "compound",
"groq/groq/compound-mini": "compound-mini",
"groq/meta-llama/llama-4-maverick-17b-128e-instruct":
"llama-4-maverick-17b-128e-instruct",
"groq/meta-llama/llama-4-scout-17b-16e-instruct":
"llama-4-scout-17b-16e-instruct",
"groq/moonshotai/kimi-k2-instruct": "kimi-k2",
"groq/moonshotai/kimi-k2-instruct-0905": "kimi-k2-0905",
"groq/openai/gpt-oss-120b": "gpt-oss-120b",
"groq/openai/gpt-oss-20b": "gpt-oss-20b",
"groq/openai/gpt-oss-safeguard-20b": "gpt-oss-safeguard-20b",
"groq/qwen/qwen3-32b": "qwen3-32b",
// ── huggingface ───────────────────────────────────────────────────────────
"huggingface/MiniMaxAI/MiniMax-M2.1": "minimax-m2.1",
"huggingface/MiniMaxAI/MiniMax-M2.5": "minimax-m2.5",
"huggingface/MiniMaxAI/MiniMax-M2.7": "MiniMax-M2.7",
"huggingface/Qwen/Qwen3-235B-A22B-Thinking-2507": "qwen3-235b-a22b-2507",
"huggingface/Qwen/Qwen3-Coder-480B-A35B-Instruct": "qwen3-coder:480b",
"huggingface/Qwen/Qwen3-Coder-Next": "qwen3-coder-next",
"huggingface/Qwen/Qwen3-Next-80B-A3B-Instruct": "qwen3-next:80b",
"huggingface/Qwen/Qwen3-Next-80B-A3B-Thinking": "qwen3-next:80b",
"huggingface/Qwen/Qwen3.5-397B-A17B": "qwen3.5-397b-a17b",
"huggingface/XiaomiMiMo/MiMo-V2-Flash": "mimo-v2-flash",
"huggingface/deepseek-ai/DeepSeek-R1-0528": "deepseek-r1-0528",
"huggingface/deepseek-ai/DeepSeek-V3.2": "deepseek-v3.2",
"huggingface/moonshotai/Kimi-K2-Instruct": "kimi-k2",
"huggingface/moonshotai/Kimi-K2-Instruct-0905": "kimi-k2-0905",
"huggingface/moonshotai/Kimi-K2-Thinking": "kimi-k2-thinking",
"huggingface/moonshotai/Kimi-K2.5": "kimi-k2.5",
"huggingface/zai-org/GLM-4.7": "glm-4.7",
"huggingface/zai-org/GLM-4.7-Flash": "glm-4.7-flash",
"huggingface/zai-org/GLM-5": "glm-5",
"huggingface/zai-org/GLM-5.1": "glm-5.1",
// ── minimax ───────────────────────────────────────────────────────────────
"minimax/MiniMax-M2": "minimax-m2",
"minimax/MiniMax-M2.1": "minimax-m2.1",
"minimax/MiniMax-M2.5": "minimax-m2.5",
"minimax/MiniMax-M2.5-highspeed": "minimax-m2.5-highspeed",
"minimax/MiniMax-M2.7": "MiniMax-M2.7",
"minimax/MiniMax-M2.7-highspeed": "MiniMax-M2.7-highspeed",
"minimax-cn/MiniMax-M2": "minimax-m2",
"minimax-cn/MiniMax-M2.1": "minimax-m2.1",
"minimax-cn/MiniMax-M2.5": "minimax-m2.5",
"minimax-cn/MiniMax-M2.5-highspeed": "minimax-m2.5-highspeed",
"minimax-cn/MiniMax-M2.7": "MiniMax-M2.7",
"minimax-cn/MiniMax-M2.7-highspeed": "MiniMax-M2.7-highspeed",
// ── kimi-coding ───────────────────────────────────────────────────────────
// Already canonical wire_ids — included for completeness; resolver falls
// back to wire_id anyway.
"kimi-coding/kimi-k2.6": "kimi-k2.6",
"kimi-coding/kimi-k2-thinking": "kimi-k2-thinking",
// kimi-for-coding is a CUSTOM_MODELS alias for kimi-k2.6 (same name, same
// price, same wire shape) — verified via getModels("kimi-coding"). Collapse
// to kimi-k2.6 so routesFor("kimi-k2.6") sees both routes.
"kimi-coding/kimi-for-coding": "kimi-k2.6",
// opencode wire IDs that need to map through
"opencode/kimi-k2.5": "kimi-k2.5",
"opencode-go/kimi-k2.5": "kimi-k2.5",
// ── openrouter ────────────────────────────────────────────────────────────
"openrouter/anthropic/claude-3-haiku": "claude-3-haiku",
"openrouter/anthropic/claude-3.5-haiku": "claude-3-5-haiku",
"openrouter/anthropic/claude-3.7-sonnet": "claude-3-7-sonnet",
"openrouter/anthropic/claude-3.7-sonnet:thinking": "claude-3-7-sonnet",
"openrouter/anthropic/claude-haiku-4.5": "claude-haiku-4-5",
"openrouter/anthropic/claude-opus-4": "claude-opus-4",
"openrouter/anthropic/claude-opus-4.1": "claude-opus-4-1",
"openrouter/anthropic/claude-opus-4.5": "claude-opus-4-5",
"openrouter/anthropic/claude-opus-4.6": "claude-opus-4-6",
"openrouter/anthropic/claude-opus-4.6-fast": "claude-opus-4-6",
"openrouter/anthropic/claude-opus-4.7": "claude-opus-4-7",
"openrouter/anthropic/claude-sonnet-4": "claude-sonnet-4",
"openrouter/anthropic/claude-sonnet-4.5": "claude-sonnet-4-5",
"openrouter/anthropic/claude-sonnet-4.6": "claude-sonnet-4-6",
"openrouter/deepseek/deepseek-chat": "deepseek-chat",
"openrouter/deepseek/deepseek-chat-v3-0324": "deepseek-chat",
"openrouter/deepseek/deepseek-chat-v3.1": "deepseek-chat",
"openrouter/deepseek/deepseek-r1": "deepseek-r1",
"openrouter/deepseek/deepseek-r1-0528": "deepseek-r1-0528",
"openrouter/deepseek/deepseek-v3.1-terminus": "deepseek-chat",
"openrouter/deepseek/deepseek-v3.2": "deepseek-v3.2",
"openrouter/deepseek/deepseek-v3.2-exp": "deepseek-v3.2",
"openrouter/google/gemini-2.0-flash-001": "gemini-2.0-flash",
"openrouter/google/gemini-2.0-flash-lite-001": "gemini-2.0-flash",
"openrouter/google/gemini-2.5-flash": "gemini-2.5-flash",
"openrouter/google/gemini-2.5-flash-lite": "gemini-2.5-flash-lite",
"openrouter/google/gemini-2.5-flash-lite-preview-09-2025":
"gemini-2.5-flash-lite",
"openrouter/google/gemini-2.5-pro": "gemini-2.5-pro",
"openrouter/google/gemini-2.5-pro-preview": "gemini-2.5-pro",
"openrouter/google/gemini-2.5-pro-preview-05-06": "gemini-2.5-pro",
"openrouter/google/gemini-3-flash-preview": "gemini-3-flash-preview",
"openrouter/google/gemini-3.1-flash-lite-preview":
"gemini-3.1-flash-lite-preview",
"openrouter/google/gemini-3.1-pro-preview": "gemini-3.1-pro-preview",
"openrouter/google/gemini-3.1-pro-preview-customtools":
"gemini-3.1-pro-preview",
"openrouter/google/gemma-4-26b-a4b-it": "gemma-4-26b-a4b-it",
"openrouter/google/gemma-4-26b-a4b-it:free": "gemma-4-26b-a4b-it",
"openrouter/google/gemma-4-31b-it": "gemma-4-31b-it",
"openrouter/google/gemma-4-31b-it:free": "gemma-4-31b-it",
"openrouter/meta-llama/llama-3-8b-instruct": "llama-3-8b-instruct",
"openrouter/meta-llama/llama-3.1-70b-instruct": "llama-3.1-70b-instruct",
"openrouter/meta-llama/llama-3.1-8b-instruct": "llama-3.1-8b-instruct",
"openrouter/meta-llama/llama-3.3-70b-instruct": "llama-3.3-70b-instruct",
"openrouter/meta-llama/llama-3.3-70b-instruct:free": "llama-3.3-70b-instruct",
"openrouter/meta-llama/llama-4-scout": "llama-4-scout",
"openrouter/minimax/minimax-m1": "minimax-m1",
"openrouter/minimax/minimax-m2": "minimax-m2",
"openrouter/minimax/minimax-m2.1": "minimax-m2.1",
"openrouter/minimax/minimax-m2.5": "minimax-m2.5",
"openrouter/minimax/minimax-m2.5:free": "minimax-m2.5",
"openrouter/minimax/minimax-m2.7": "MiniMax-M2.7",
"openrouter/mistralai/codestral-2508": "codestral-latest",
"openrouter/mistralai/devstral-2512": "devstral-2512",
"openrouter/mistralai/devstral-medium": "devstral-medium-latest",
"openrouter/mistralai/devstral-small": "devstral-small-2507",
"openrouter/mistralai/ministral-14b-2512": "mistral-small-latest",
"openrouter/mistralai/ministral-3b-2512": "ministral-3b-latest",
"openrouter/mistralai/ministral-8b-2512": "ministral-8b-latest",
"openrouter/mistralai/mistral-large": "mistral-large-latest",
"openrouter/mistralai/mistral-large-2407": "mistral-large-latest",
"openrouter/mistralai/mistral-large-2411": "mistral-large-2411",
"openrouter/mistralai/mistral-large-2512": "mistral-large-2512",
"openrouter/mistralai/mistral-medium-3": "mistral-medium-latest",
"openrouter/mistralai/mistral-medium-3.1": "mistral-medium-latest",
"openrouter/mistralai/mistral-nemo": "mistral-nemo",
"openrouter/mistralai/mistral-small-2603": "mistral-small-2603",
"openrouter/mistralai/mistral-small-3.2-24b-instruct": "mistral-small-latest",
"openrouter/mistralai/mistral-small-creative": "mistral-small-latest",
"openrouter/mistralai/mixtral-8x22b-instruct": "open-mixtral-8x22b",
"openrouter/mistralai/mixtral-8x7b-instruct": "open-mixtral-8x7b",
"openrouter/mistralai/pixtral-large-2411": "pixtral-large-latest",
"openrouter/mistralai/voxtral-small-24b-2507": "voxtral-small-24b-2507",
"openrouter/moonshotai/kimi-k2": "kimi-k2",
"openrouter/moonshotai/kimi-k2-0905": "kimi-k2-0905",
"openrouter/moonshotai/kimi-k2-thinking": "kimi-k2-thinking",
"openrouter/moonshotai/kimi-k2.5": "kimi-k2.5",
"openrouter/nvidia/nemotron-3-nano-30b-a3b": "nemotron-3-nano-30b",
"openrouter/nvidia/nemotron-3-nano-30b-a3b:free": "nemotron-3-nano-30b",
"openrouter/nvidia/nemotron-3-super-120b-a12b": "nemotron-3-super",
"openrouter/nvidia/nemotron-3-super-120b-a12b:free": "nemotron-3-super",
"openrouter/nvidia/nemotron-nano-12b-v2-vl:free": "nemotron-nano-12b-v2",
"openrouter/nvidia/nemotron-nano-9b-v2": "nemotron-nano-9b-v2",
"openrouter/nvidia/nemotron-nano-9b-v2:free": "nemotron-nano-9b-v2",
"openrouter/openai/gpt-4": "gpt-4",
"openrouter/openai/gpt-4-turbo": "gpt-4-turbo",
"openrouter/openai/gpt-4o": "gpt-4o",
"openrouter/openai/gpt-4o-mini": "gpt-4o-mini",
"openrouter/openai/gpt-5": "gpt-5",
"openrouter/openai/gpt-5-mini": "gpt-5-mini",
"openrouter/openai/gpt-5-nano": "gpt-5-nano",
"openrouter/openai/gpt-5-pro": "gpt-5-pro",
"openrouter/openai/gpt-5.1": "gpt-5.1",
"openrouter/openai/gpt-5.1-codex": "gpt-5.1-codex",
"openrouter/openai/gpt-5.1-codex-max": "gpt-5.1-codex-max",
"openrouter/openai/gpt-5.1-codex-mini": "gpt-5.1-codex-mini",
"openrouter/openai/gpt-5.2": "gpt-5.2",
"openrouter/openai/gpt-5.2-codex": "gpt-5.2-codex",
"openrouter/openai/gpt-5.3-codex": "gpt-5.3-codex",
"openrouter/openai/gpt-5.4": "gpt-5.4",
"openrouter/openai/gpt-5.4-mini": "gpt-5.4-mini",
"openrouter/openai/gpt-5.4-nano": "gpt-5.4-nano",
"openrouter/openai/gpt-5.4-pro": "gpt-5.4-pro",
"openrouter/openai/gpt-oss-120b": "gpt-oss-120b",
"openrouter/openai/gpt-oss-120b:free": "gpt-oss-120b",
"openrouter/openai/gpt-oss-20b": "gpt-oss-20b",
"openrouter/openai/gpt-oss-20b:free": "gpt-oss-20b",
"openrouter/openai/gpt-oss-safeguard-20b": "gpt-oss-safeguard-20b",
"openrouter/openai/o1": "o1",
"openrouter/openai/o3": "o3",
"openrouter/openai/o4-mini": "o4-mini",
"openrouter/openai/o4-mini-deep-research": "o4-mini-deep-research",
"openrouter/qwen/qwen3-coder": "qwen3-coder:480b",
"openrouter/qwen/qwen3-coder:free": "qwen3-coder:480b",
"openrouter/qwen/qwen3-coder-next": "qwen3-coder-next",
"openrouter/qwen/qwen3-max": "qwen3-max",
"openrouter/qwen/qwen3-next-80b-a3b-instruct": "qwen3-next:80b",
"openrouter/qwen/qwen3-next-80b-a3b-instruct:free": "qwen3-next:80b",
"openrouter/qwen/qwen3-next-80b-a3b-thinking": "qwen3-next:80b",
"openrouter/x-ai/grok-3": "grok-3",
"openrouter/x-ai/grok-3-mini": "grok-3-mini",
"openrouter/x-ai/grok-4": "grok-4",
"openrouter/z-ai/glm-4.5": "glm-4.5",
"openrouter/z-ai/glm-4.5-air": "glm-4.5-air",
"openrouter/z-ai/glm-4.5-air:free": "glm-4.5-air",
"openrouter/z-ai/glm-4.6": "glm-4.6",
"openrouter/z-ai/glm-4.7": "glm-4.7",
"openrouter/z-ai/glm-4.7-flash": "glm-4.7-flash",
"openrouter/z-ai/glm-5": "glm-5",
"openrouter/z-ai/glm-5-turbo": "glm-5-turbo",
"openrouter/z-ai/glm-5.1": "glm-5.1",
"openrouter/z-ai/glm-5v-turbo": "glm-5v-turbo",
"openrouter/xiaomi/mimo-v2-flash": "mimo-v2-flash",
"openrouter/xiaomi/mimo-v2-omni": "mimo-v2-omni",
"openrouter/xiaomi/mimo-v2-pro": "mimo-v2-pro",
// ── vercel-ai-gateway ─────────────────────────────────────────────────────
"vercel-ai-gateway/anthropic/claude-3-haiku": "claude-3-haiku",
"vercel-ai-gateway/anthropic/claude-3.5-haiku": "claude-3-5-haiku",
"vercel-ai-gateway/anthropic/claude-3.7-sonnet": "claude-3-7-sonnet",
"vercel-ai-gateway/anthropic/claude-haiku-4.5": "claude-haiku-4-5",
"vercel-ai-gateway/anthropic/claude-opus-4": "claude-opus-4",
"vercel-ai-gateway/anthropic/claude-opus-4.1": "claude-opus-4-1",
"vercel-ai-gateway/anthropic/claude-opus-4.5": "claude-opus-4-5",
"vercel-ai-gateway/anthropic/claude-opus-4.6": "claude-opus-4-6",
"vercel-ai-gateway/anthropic/claude-opus-4.7": "claude-opus-4-7",
"vercel-ai-gateway/anthropic/claude-sonnet-4": "claude-sonnet-4",
"vercel-ai-gateway/anthropic/claude-sonnet-4.5": "claude-sonnet-4-5",
"vercel-ai-gateway/anthropic/claude-sonnet-4.6": "claude-sonnet-4-6",
"vercel-ai-gateway/deepseek/deepseek-r1": "deepseek-r1",
"vercel-ai-gateway/deepseek/deepseek-v3": "deepseek-chat",
"vercel-ai-gateway/deepseek/deepseek-v3.1": "deepseek-chat",
"vercel-ai-gateway/deepseek/deepseek-v3.1-terminus": "deepseek-chat",
"vercel-ai-gateway/deepseek/deepseek-v3.2": "deepseek-v3.2",
"vercel-ai-gateway/deepseek/deepseek-v3.2-thinking": "deepseek-v3.2",
"vercel-ai-gateway/google/gemini-2.0-flash": "gemini-2.0-flash",
"vercel-ai-gateway/google/gemini-2.0-flash-lite": "gemini-2.0-flash",
"vercel-ai-gateway/google/gemini-2.5-flash": "gemini-2.5-flash",
"vercel-ai-gateway/google/gemini-2.5-flash-lite": "gemini-2.5-flash-lite",
"vercel-ai-gateway/google/gemini-2.5-pro": "gemini-2.5-pro",
"vercel-ai-gateway/google/gemini-3-flash": "gemini-3-flash-preview",
"vercel-ai-gateway/google/gemini-3-pro-preview": "gemini-3-pro-preview",
"vercel-ai-gateway/google/gemini-3.1-flash-lite-preview":
"gemini-3.1-flash-lite-preview",
"vercel-ai-gateway/google/gemini-3.1-pro-preview": "gemini-3.1-pro-preview",
"vercel-ai-gateway/minimax/minimax-m2": "minimax-m2",
"vercel-ai-gateway/minimax/minimax-m2.1": "minimax-m2.1",
"vercel-ai-gateway/minimax/minimax-m2.1-lightning": "minimax-m2.1",
"vercel-ai-gateway/minimax/minimax-m2.5": "minimax-m2.5",
"vercel-ai-gateway/minimax/minimax-m2.5-highspeed": "minimax-m2.5-highspeed",
"vercel-ai-gateway/minimax/minimax-m2.7": "MiniMax-M2.7",
"vercel-ai-gateway/minimax/minimax-m2.7-highspeed": "MiniMax-M2.7-highspeed",
"vercel-ai-gateway/mistral/codestral": "codestral-latest",
"vercel-ai-gateway/mistral/devstral-2": "devstral-2512",
"vercel-ai-gateway/mistral/devstral-small": "devstral-small-2507",
"vercel-ai-gateway/mistral/devstral-small-2": "devstral-small-2507",
"vercel-ai-gateway/mistral/ministral-3b": "ministral-3b-latest",
"vercel-ai-gateway/mistral/ministral-8b": "ministral-8b-latest",
"vercel-ai-gateway/mistral/mistral-medium": "mistral-medium-latest",
"vercel-ai-gateway/mistral/mistral-small": "mistral-small-latest",
"vercel-ai-gateway/moonshotai/kimi-k2": "kimi-k2",
"vercel-ai-gateway/moonshotai/kimi-k2-0905": "kimi-k2-0905",
"vercel-ai-gateway/moonshotai/kimi-k2-thinking": "kimi-k2-thinking",
"vercel-ai-gateway/moonshotai/kimi-k2-thinking-turbo": "kimi-k2-thinking-turbo",
"vercel-ai-gateway/moonshotai/kimi-k2-turbo": "kimi-k2-turbo",
"vercel-ai-gateway/moonshotai/kimi-k2.5": "kimi-k2.5",
"vercel-ai-gateway/openai/gpt-4-turbo": "gpt-4-turbo",
"vercel-ai-gateway/openai/gpt-4o": "gpt-4o",
"vercel-ai-gateway/openai/gpt-4o-mini": "gpt-4o-mini",
"vercel-ai-gateway/openai/gpt-5": "gpt-5",
"vercel-ai-gateway/openai/gpt-5-mini": "gpt-5-mini",
"vercel-ai-gateway/openai/gpt-5-nano": "gpt-5-nano",
"vercel-ai-gateway/openai/gpt-5-pro": "gpt-5-pro",
"vercel-ai-gateway/openai/gpt-5.1-codex": "gpt-5.1-codex",
"vercel-ai-gateway/openai/gpt-5.1-codex-max": "gpt-5.1-codex-max",
"vercel-ai-gateway/openai/gpt-5.1-codex-mini": "gpt-5.1-codex-mini",
"vercel-ai-gateway/openai/gpt-5.2": "gpt-5.2",
"vercel-ai-gateway/openai/gpt-5.2-codex": "gpt-5.2-codex",
"vercel-ai-gateway/openai/gpt-5.3-codex": "gpt-5.3-codex",
"vercel-ai-gateway/openai/gpt-5.4": "gpt-5.4",
"vercel-ai-gateway/openai/gpt-5.4-mini": "gpt-5.4-mini",
"vercel-ai-gateway/openai/gpt-5.4-nano": "gpt-5.4-nano",
"vercel-ai-gateway/openai/gpt-5.4-pro": "gpt-5.4-pro",
"vercel-ai-gateway/openai/o1": "o1",
"vercel-ai-gateway/openai/o3": "o3",
"vercel-ai-gateway/openai/o4-mini": "o4-mini",
"vercel-ai-gateway/xai/grok-3": "grok-3",
"vercel-ai-gateway/xai/grok-3-mini": "grok-3-mini",
"vercel-ai-gateway/xai/grok-4": "grok-4",
"vercel-ai-gateway/zai/glm-4.5": "glm-4.5",
"vercel-ai-gateway/zai/glm-4.5-air": "glm-4.5-air",
"vercel-ai-gateway/zai/glm-4.6": "glm-4.6",
"vercel-ai-gateway/zai/glm-4.7": "glm-4.7",
"vercel-ai-gateway/zai/glm-4.7-flash": "glm-4.7-flash",
"vercel-ai-gateway/zai/glm-5": "glm-5",
"vercel-ai-gateway/zai/glm-5-turbo": "glm-5-turbo",
"vercel-ai-gateway/zai/glm-5.1": "glm-5.1",
"vercel-ai-gateway/zai/glm-5v-turbo": "glm-5v-turbo",
"vercel-ai-gateway/xiaomi/mimo-v2-flash": "mimo-v2-flash",
"vercel-ai-gateway/xiaomi/mimo-v2-pro": "mimo-v2-pro",
// ── zai ──────────────────────────────────────────────────────────────────
// zai models already have clean IDs (glm-5.1, etc.) — no mapping needed
};
/**
* Canonical id generation tag.
* Same-generation routes are eligible for direct failover (no downgrade signal).
*/
const GENERATION: Record<CanonicalId, string> = {
// ── Kimi K2 family ────────────────────────────────────────────────────────
"kimi-k2": "k2",
"kimi-k2-0905": "k2", // same generation, post-release patch
"kimi-k2-instruct": "k2",
"kimi-k2-turbo": "k2",
"kimi-k2.5": "k2.5",
"kimi-k2.6": "k2.6",
"kimi-k2-thinking": "k2-thinking",
"kimi-k2-thinking-turbo": "k2-thinking",
"kimi-for-coding": "k2.6", // wire alias for kimi-k2.6
// ── Claude 3.x ───────────────────────────────────────────────────────────
"claude-3-haiku": "haiku-3",
"claude-3-sonnet": "sonnet-3",
"claude-3-opus": "opus-3",
"claude-3-5-haiku": "haiku-3.5",
"claude-3-5-sonnet": "sonnet-3.5",
"claude-3-7-sonnet": "sonnet-3.7",
// ── Claude 4.x ───────────────────────────────────────────────────────────
"claude-haiku-4-5": "haiku-4",
"claude-haiku-4.5": "haiku-4", // dot-notation variant (github-copilot)
"claude-sonnet-4": "sonnet-4",
"claude-sonnet-4-5": "sonnet-4",
"claude-sonnet-4-6": "sonnet-4",
"claude-sonnet-4.5": "sonnet-4",
"claude-sonnet-4.6": "sonnet-4",
"claude-opus-4": "opus-4",
"claude-opus-4-1": "opus-4",
"claude-opus-4-5": "opus-4",
"claude-opus-4-6": "opus-4",
"claude-opus-4-7": "opus-4",
"claude-opus-4.5": "opus-4",
"claude-opus-4.6": "opus-4",
"claude-opus-4.7": "opus-4",
// ── Gemini ────────────────────────────────────────────────────────────────
"gemini-2.0-flash": "2",
"gemini-2.5-flash": "2.5",
"gemini-2.5-flash-lite": "2.5",
"gemini-2.5-pro": "2.5",
"gemini-3-flash-preview": "3",
"gemini-3-pro-preview": "3",
"gemini-3.1-flash-lite-preview": "3.1",
"gemini-3.1-pro-preview": "3.1",
// ── GPT / OpenAI ─────────────────────────────────────────────────────────
"gpt-4": "4",
"gpt-4-turbo": "4",
"gpt-4o": "4o",
"gpt-4o-mini": "4o",
"gpt-4.1": "4.1",
"gpt-4.1-mini": "4.1",
"gpt-4.1-nano": "4.1",
"gpt-5": "5",
"gpt-5-mini": "5",
"gpt-5-nano": "5",
"gpt-5-pro": "5",
"gpt-5.1": "5.1",
"gpt-5.1-codex": "5.1",
"gpt-5.1-codex-max": "5.1",
"gpt-5.1-codex-mini": "5.1",
"gpt-5.2": "5.2",
"gpt-5.2-codex": "5.2",
"gpt-5.3-codex": "5.3",
"gpt-5.4": "5.4",
"gpt-5.4-mini": "5.4",
"gpt-5.4-nano": "5.4",
"gpt-5.4-pro": "5.4",
"gpt-5.3-codex-spark": "5.3",
"gpt-5-mini-latest": "5",
o1: "o1",
o3: "o3",
"o4-mini": "o4",
"o4-mini-deep-research": "o4",
// ── DeepSeek ─────────────────────────────────────────────────────────────
"deepseek-chat": "v3",
"deepseek-v3.2": "v3.2",
"deepseek-r1": "r1",
"deepseek-r1-0528": "r1",
// ── MiniMax ───────────────────────────────────────────────────────────────
"minimax-m2": "m2",
"minimax-m2.1": "m2.1",
"minimax-m2.5": "m2.5",
"minimax-m2.5-highspeed": "m2.5",
"MiniMax-M2.7": "m2.7",
"MiniMax-M2.7-highspeed": "m2.7",
"minimax-m1": "m1",
// ── GLM (ZAI) ─────────────────────────────────────────────────────────────
"glm-4.5": "glm-4.5",
"glm-4.5-air": "glm-4.5",
"glm-4.6": "glm-4.6",
"glm-4.7": "glm-4.7",
"glm-4.7-flash": "glm-4.7",
"glm-4.7-flashx": "glm-4.7",
"glm-5": "glm-5",
"glm-5-turbo": "glm-5",
"glm-5.1": "glm-5.1",
"glm-5v-turbo": "glm-5",
// ── Mistral / Devstral ───────────────────────────────────────────────────
"codestral-latest": "codestral",
"devstral-2512": "devstral-2",
"devstral-medium-latest": "devstral-medium",
"devstral-medium-2507": "devstral-medium",
"devstral-small-2507": "devstral-small",
"devstral-small-2505": "devstral-small",
"labs-devstral-small-2512": "devstral-small",
"magistral-small": "magistral-small",
"mistral-large-2411": "mistral-large",
"mistral-large-2512": "mistral-large",
"mistral-large-latest": "mistral-large",
"mistral-medium-latest": "mistral-medium",
"mistral-medium-2505": "mistral-medium",
"mistral-medium-2508": "mistral-medium",
"mistral-nemo": "mistral-nemo",
"mistral-small-latest": "mistral-small",
"mistral-small-2506": "mistral-small",
"mistral-small-2603": "mistral-small",
"ministral-3b-latest": "ministral-3b",
"ministral-8b-latest": "ministral-8b",
"pixtral-large-latest": "pixtral-large",
"pixtral-12b": "pixtral-12b",
// ── Qwen ─────────────────────────────────────────────────────────────────
"qwen3-coder:480b": "qwen3-coder",
"qwen3-coder-next": "qwen3-coder",
"qwen3-next:80b": "qwen3-next",
// ── XAI (Grok) ───────────────────────────────────────────────────────────
"grok-3": "grok-3",
"grok-3-mini": "grok-3",
"grok-4": "grok-4",
// ── MiMo (Xiaomi) ────────────────────────────────────────────────────────
"mimo-v2-flash": "mimo-v2",
"mimo-v2-omni": "mimo-v2",
"mimo-v2-pro": "mimo-v2",
};
/**
* Canonical id capability tier.
* Lifted from MODEL_CAPABILITY_TIER in model-router.js.
* CRITICAL: kimi-k2.5 is its own tier entry (NOT aliased to kimi-k2.6).
*/
const TIER: Record<CanonicalId, CapabilityTier> = {
// ── Light ─────────────────────────────────────────────────────────────────
"claude-haiku-4-5": "light",
"claude-3-5-haiku": "light",
"claude-3-haiku": "light",
"gpt-4o-mini": "light",
"gpt-4.1-mini": "light",
"gpt-4.1-nano": "light",
"gpt-5-mini": "light",
"gpt-5-nano": "light",
"gpt-5.1-codex-mini": "light",
"gpt-5.3-codex-spark": "light",
"gemini-2.0-flash": "light",
"gemini-2.5-flash-lite": "light",
"gemini-3.1-flash-lite-preview": "light",
"glm-4.7-flash": "light",
"glm-4.7-flashx": "light",
"ministral-3b-latest": "light",
"ministral-8b-latest": "light",
"devstral-small-2505": "light",
"devstral-small-2507": "light",
"labs-devstral-small-2512": "light",
// ── Standard ──────────────────────────────────────────────────────────────
"claude-sonnet-4-6": "standard",
"claude-sonnet-4-5": "standard",
"claude-sonnet-4": "standard",
"claude-3-5-sonnet": "standard",
"gpt-4o": "standard",
"gpt-4.1": "standard",
"gpt-5.1-codex-max": "standard",
"gpt-5.4-mini": "standard",
"gemini-2.5-pro": "standard",
"gemini-3-flash-preview": "standard",
"gemini-2.5-flash": "standard",
"deepseek-chat": "standard",
"glm-4.7": "standard",
"qwen3-coder:480b": "standard",
"qwen3-coder-next": "standard",
// kimi-k2.5 is standard — its own entry (NOT aliased to kimi-k2.6)
"kimi-k2.5": "standard",
"kimi-k2.6": "standard",
"kimi-for-coding": "standard",
"MiniMax-M2.7": "standard",
"MiniMax-M2.7-highspeed": "standard",
"codestral-latest": "standard",
"devstral-2512": "standard",
"devstral-medium-2507": "standard",
"devstral-medium-latest": "standard",
"magistral-small": "standard",
"mistral-medium-2505": "standard",
"mistral-medium-2508": "standard",
"mistral-medium-latest": "standard",
"mistral-nemo": "standard",
"mistral-small-2506": "standard",
"mistral-small-2603": "standard",
"mistral-small-latest": "standard",
"pixtral-12b": "standard",
// ── Heavy ─────────────────────────────────────────────────────────────────
"claude-opus-4-6": "heavy",
"claude-opus-4-7": "heavy",
"claude-opus-4-5": "heavy",
"claude-3-opus": "heavy",
"gpt-4-turbo": "heavy",
"gpt-5": "heavy",
"gpt-5-pro": "heavy",
"gpt-5.1": "heavy",
"gpt-5.2": "heavy",
"gpt-5.2-codex": "heavy",
"gpt-5.3-codex": "heavy",
"gpt-5.4": "heavy",
"gpt-5.4-pro": "heavy",
"gpt-5.5": "heavy",
o1: "heavy",
o3: "heavy",
"o4-mini": "heavy",
"o4-mini-deep-research": "heavy",
"gemini-3.1-pro-preview": "heavy",
"gemini-3-pro-preview": "heavy",
"kimi-k2-thinking": "heavy",
"kimi-k2-thinking-turbo": "heavy",
"qwen3-next:80b": "heavy",
"glm-5": "heavy",
"glm-5-turbo": "heavy",
"glm-5.1": "heavy",
"glm-5v-turbo": "heavy",
"magistral-medium-latest": "heavy",
"mistral-large-2411": "heavy",
"mistral-large-2512": "heavy",
"mistral-large-latest": "heavy",
"open-mixtral-8x22b": "heavy",
"pixtral-large-latest": "heavy",
};
// ─── Module-level index built at startup ─────────────────────────────────────
/** Flattened upstream catalog: routeKey → upstream ModelEntry */
const _ENTRY_BY_ROUTE = new Map<
RouteKey,
{
id: string;
name: string;
api: string;
provider: string;
baseUrl: string;
reasoning?: boolean;
input?: string[];
capabilities?: Record<string, unknown>;
cost?: { input?: number; output?: number; cacheRead?: number; cacheWrite?: number };
contextWindow?: number;
maxTokens?: number;
}
>();
/** routeKey → resolved ResolvedModel (lazily populated cache) */
const _RESOLVED_CACHE = new Map<RouteKey, ResolvedModel | null>();
/** canonical id → set of route keys */
const _ROUTES_BY_CANONICAL = new Map<CanonicalId, RouteKey[]>();
// Build the indexes once at module load. `getModels(provider)` returns an
// Array<ModelEntry> (after generated + CUSTOM_MODELS merge); we key by
// `${provider}/${entry.id}`.
(function buildIndex() {
type UpstreamEntry = {
id: string;
name: string;
api: string;
provider: string;
baseUrl: string;
reasoning?: boolean;
input?: string[];
capabilities?: Record<string, unknown>;
cost?: { input?: number; output?: number; cacheRead?: number; cacheWrite?: number };
contextWindow?: number;
maxTokens?: number;
};
for (const provider of getProviders()) {
const entries = getModels(provider) as unknown as UpstreamEntry[];
if (!entries) continue;
for (const entry of entries) {
const wireId = entry.id;
const routeKey = `${provider}/${wireId}` as RouteKey;
_ENTRY_BY_ROUTE.set(routeKey, entry);
// Determine canonical id
const canonical = CANONICAL_BY_ROUTE[routeKey] ?? wireId;
// Build reverse index
const routes = _ROUTES_BY_CANONICAL.get(canonical) ?? [];
routes.push(routeKey);
_ROUTES_BY_CANONICAL.set(canonical, routes);
}
}
})();
// ─── Resolution helpers ───────────────────────────────────────────────────────
function resolveEntry(
routeKey: RouteKey,
entry: ReturnType<typeof _ENTRY_BY_ROUTE["get"]>,
): ResolvedModel | null {
if (!entry) return null;
const canonical = CANONICAL_BY_ROUTE[routeKey] ?? entry.id;
const generation = GENERATION[canonical] ?? canonical;
const tier: CapabilityTier = TIER[canonical] ?? "standard";
return {
canonical_id: canonical,
generation,
tier,
wire_id: entry.id,
provider: entry.provider,
api: entry.api,
baseUrl: entry.baseUrl,
capabilities: entry.capabilities,
cost: entry.cost,
contextWindow: entry.contextWindow,
maxTokens: entry.maxTokens,
reasoning: entry.reasoning,
inputModalities: entry.input,
};
}
// ─── Public API ───────────────────────────────────────────────────────────────
/** Look up a (provider, wire_id) pair. Returns null if not in upstream. */
export function lookup(
provider: string,
wireId: string,
): ResolvedModel | null {
const routeKey = `${provider}/${wireId}` as RouteKey;
return lookupRoute(routeKey);
}
/** Same, parsed from a fused route key. */
export function lookupRoute(routeKey: RouteKey): ResolvedModel | null {
if (_RESOLVED_CACHE.has(routeKey)) {
return _RESOLVED_CACHE.get(routeKey) ?? null;
}
const entry = _ENTRY_BY_ROUTE.get(routeKey);
const resolved = entry ? resolveEntry(routeKey, entry) : null;
_RESOLVED_CACHE.set(routeKey, resolved);
return resolved;
}
/** All routes (across all providers) that resolve to this canonical id. */
export function routesFor(canonicalId: CanonicalId): ResolvedModel[] {
const routeKeys = _ROUTES_BY_CANONICAL.get(canonicalId) ?? [];
return routeKeys
.map((rk) => lookupRoute(rk))
.filter((r): r is ResolvedModel => r !== null);
}
/** Map a route key to a canonical id, or null if unmappable. */
export function canonicalIdFor(
routeKey: RouteKey,
): CanonicalId | null {
const entry = _ENTRY_BY_ROUTE.get(routeKey);
if (!entry) return null;
return CANONICAL_BY_ROUTE[routeKey] ?? entry.id;
}
/** Capability tier of a canonical id. */
export function tierFor(canonicalId: CanonicalId): CapabilityTier | null {
return TIER[canonicalId] ?? null;
}
/** Generation of a canonical id (e.g. "k2.5"). */
export function generationFor(canonicalId: CanonicalId): string | null {
return GENERATION[canonicalId] ?? null;
}
/** Two canonical ids share a generation (failover may cross). */
export function sameGeneration(a: CanonicalId, b: CanonicalId): boolean {
const ga = GENERATION[a];
const gb = GENERATION[b];
if (ga === undefined || gb === undefined) return false;
return ga === gb;
}
/** Iterate every canonical id known to SF. */
export function allCanonicalIds(): CanonicalId[] {
return Array.from(_ROUTES_BY_CANONICAL.keys());
}
/** Build a route key from a resolved model (for metrics aggregation). */
export function routeKeyOf(m: {
provider: string;
wire_id: string;
}): RouteKey {
return `${m.provider}/${m.wire_id}` as RouteKey;
}

View file

@ -5,9 +5,13 @@ import { getProviderCapabilities } from "@singularity-forge/ai";
import { getToolCompatibility } from "@singularity-forge/coding-agent";
import { tierOrdinal } from "./complexity-classifier.js";
import { lookupModelCost } from "./model-cost-table.js";
import { tierFor } from "./model-registry.js";
// ─── Known Model Tiers ───────────────────────────────────────────────────────
// Maps known model IDs to their capability tier. Used when tier_models is not
// explicitly configured to pick the best available model for each tier.
// MIGRATED: this table moved to model-registry.ts as the TIER map. Kept here
// as a deprecated re-export shim so external callers that import
// MODEL_CAPABILITY_TIER directly don't break during the transition. Swarm C
// will remove this shim once metrics + remaining consumers are migrated.
/** @deprecated Use tierFor() from model-registry.js instead. */
export const MODEL_CAPABILITY_TIER = {
// Light-tier models (cheapest)
"claude-haiku-4-5": "light",
@ -961,7 +965,10 @@ const MODEL_CAPABILITY_ALIASES = {
"gpt-oss:120b": "gpt-4o",
"gpt-oss:20b": "gpt-4o-mini",
"kimi-k2:1t": "kimi-k2.6",
"kimi-k2.5": "kimi-k2.6",
// NOTE: "kimi-k2.5" → "kimi-k2.6" alias REMOVED. K2.5 is a distinct
// generation from K2.6; aliasing them caused K2.5 to silently inherit
// K2.6's tier (latent downgrade-conflation bug). K2.5 now resolves via
// tierFor("kimi-k2.5") in the registry directly.
"kimi-for-coding": "kimi-k2.6",
"kimi-k2.6:cloud": "kimi-k2.6",
"kimi-k2.6-cloud": "kimi-k2.6",
@ -1384,8 +1391,12 @@ export function defaultRoutingConfig() {
}
// ─── Internal ────────────────────────────────────────────────────────────────
export function getModelTier(modelId) {
// Prefer registry lookup using canonical id — this eliminates the K2.5→K2.6
// downgrade alias that was in MODEL_CAPABILITY_ALIASES.
const canonicalId = canonicalCapabilityModelId(modelId);
// Check exact match first
const registryTier = tierFor(canonicalId);
if (registryTier) return registryTier;
// Fall back to the local table for models not yet in the registry.
if (MODEL_CAPABILITY_TIER[canonicalId])
return MODEL_CAPABILITY_TIER[canonicalId];
const sizeTier = inferTierFromModelSize(canonicalId);

View file

@ -11,13 +11,7 @@ import { execSync, spawnSync } from "node:child_process";
import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
import {
AuthType,
CodeAssistServer,
getOauthClient,
makeFakeConfig,
setupUser,
} from "@google/gemini-cli-core";
import { snapshotGeminiCliAccount } from "@singularity-forge/google-gemini-cli-provider";
import { visibleWidth } from "@singularity-forge/tui";
import { sfHome } from "../sf-home.js";
@ -203,6 +197,8 @@ async function fetchClaudeUsage() {
// Gemini Usage
// ============================================================================
async function fetchGeminiUsage(_modelRegistry) {
// Existence check is a fast pre-flight so we surface a friendly "not logged
// in" message without paying for the OAuth bootstrap inside snapshotGeminiCliAccount.
const credPath = path.join(os.homedir(), ".gemini", "oauth_creds.json");
if (!fs.existsSync(credPath)) {
return {
@ -213,47 +209,26 @@ async function fetchGeminiUsage(_modelRegistry) {
};
}
try {
const config = makeFakeConfig();
const authClient = await getOauthClient(AuthType.LOGIN_WITH_GOOGLE, config);
const userData = await setupUser(authClient, config);
const projectId = userData.projectId;
if (!projectId) {
const snapshot = await snapshotGeminiCliAccount();
if (!snapshot) {
return {
provider: "gemini",
displayName: "Gemini",
windows: [],
error: "No Code Assist project",
error: "No Code Assist project or empty quota response",
};
}
const server = new CodeAssistServer(authClient, projectId, { headers: {} });
const data = await server.retrieveUserQuota({
project: projectId,
});
const quotas = {};
for (const bucket of data.buckets || []) {
const model = bucket.modelId || "unknown";
const frac = bucket.remainingFraction ?? 1;
if (!quotas[model] || frac < quotas[model].remainingFraction) {
quotas[model] = {
remainingFraction: frac,
resetTime: bucket.resetTime,
};
}
}
const windows = [];
for (const [model, quota] of Object.entries(quotas).sort(([a], [b]) =>
a.localeCompare(b),
)) {
const resetDate = quota.resetTime ? new Date(quota.resetTime) : undefined;
windows.push({
label: model.replace(/^gemini-/, "").slice(0, 7),
usedPercent: (1 - quota.remainingFraction) * 100,
const windows = snapshot.models.map((m) => {
const resetDate = m.resetTime ? new Date(m.resetTime) : undefined;
return {
label: m.modelId.replace(/^gemini-/, "").slice(0, 7),
usedPercent: m.usedFraction * 100,
resetDescription:
resetDate && !Number.isNaN(resetDate.getTime())
? formatReset(resetDate)
: undefined,
});
}
};
});
return { provider: "gemini", displayName: "Gemini", windows };
} catch (e) {
return {