pi-ai: source google-gemini-cli model list from cli-core's VALID_GEMINI_MODELS

generate-models.ts now imports @google/gemini-cli-core's
VALID_GEMINI_MODELS set and iterates it to produce SF's google-gemini-cli
provider entries. Single source of truth: when Google ships a new Gemini
model, it lands in cli-core first, then flows into SF on
`npm update @google/gemini-cli-core` + `generate-models.ts` re-run —
no more hand-editing the generate script.

Before:  6 hardcoded entries (gemini-2.0/2.5/3 flash + pro preview, etc.)
After:   7 entries sourced dynamically, filtered to drop `-customtools`
         variants which require a different tool protocol:

  gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite,
  gemini-3-pro-preview, gemini-3-flash-preview,
  gemini-3.1-pro-preview, gemini-3.1-flash-lite-preview

Capability tagging uses cli-core's isProModel / isPreviewModel so
reasoning=true for pro + 3.x preview variants (excluding flash-lite).
Context-window / max-output-tokens kept in an SF-local override table
since cli-core doesn't publish those per-model.

Pre-existing 4 test failures (zai glm-5.1 x3, anthropic resolveBaseUrl
#4140) unchanged.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-04-19 11:44:28 +02:00
parent d83a59fb14
commit 8abfc98fdc
2 changed files with 357 additions and 312 deletions

View file

@ -1060,83 +1060,58 @@ async function generateModels() {
});
}
// Google Cloud Code Assist models (Gemini CLI)
// Uses production endpoint, standard Gemini models only
// Google Cloud Code Assist models (Gemini CLI) — sourced from
// @google/gemini-cli-core's VALID_GEMINI_MODELS so new models ship
// automatically on `npm update @google/gemini-cli-core`. cli-core is
// the authoritative list (what the real `gemini` CLI binary supports).
//
// We filter out `*-customtools` preview variants — they require a
// specific tool protocol that SF's generic adapter doesn't speak.
const { VALID_GEMINI_MODELS, isProModel: cliCoreIsProModel, isPreviewModel: cliCoreIsPreview } = await import("@google/gemini-cli-core");
const CLOUD_CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com";
const cloudCodeAssistModels: Model<"google-gemini-cli">[] = [
{
id: "gemini-2.5-pro",
name: "Gemini 2.5 Pro (Cloud Code Assist)",
// Map every Gemini model tier to its (context_window, max_output_tokens)
// pair. cli-core doesn't publish these numbers — they live with SF.
const GEMINI_CONTEXT_OVERRIDES: Record<string, { contextWindow: number; maxTokens: number }> = {
"gemini-3.1-pro-preview": { contextWindow: 2097152, maxTokens: 65536 },
"gemini-3-pro-preview": { contextWindow: 2097152, maxTokens: 65536 },
"gemini-3-flash-preview": { contextWindow: 1048576, maxTokens: 65536 },
"gemini-3.1-flash-lite-preview": { contextWindow: 1048576, maxTokens: 32768 },
"gemini-2.5-pro": { contextWindow: 2097152, maxTokens: 65535 },
"gemini-2.5-flash": { contextWindow: 1048576, maxTokens: 65535 },
"gemini-2.5-flash-lite": { contextWindow: 1048576, maxTokens: 8192 },
};
const cloudCodeAssistModels: Model<"google-gemini-cli">[] = [];
for (const modelId of VALID_GEMINI_MODELS) {
if (modelId.endsWith("-customtools")) continue; // custom tool protocol — not yet supported
const override = GEMINI_CONTEXT_OVERRIDES[modelId] ?? { contextWindow: 1048576, maxTokens: 65535 };
const isPro = cliCoreIsProModel(modelId);
const isPreview = cliCoreIsPreview(modelId);
// Pro models (and 3.x preview pros) support thinking; flash-lite does not.
const reasoning = isPro || (isPreview && !modelId.includes("flash-lite"));
// Human-readable tier label for the name
const tier = modelId.includes("pro") ? "Pro"
: modelId.includes("flash-lite") ? "Flash Lite"
: modelId.includes("flash") ? "Flash"
: "";
const version = modelId.startsWith("gemini-3.1") ? "3.1"
: modelId.startsWith("gemini-3-") ? "3"
: modelId.startsWith("gemini-2.5") ? "2.5"
: "";
const previewSuffix = isPreview ? " Preview" : "";
const displayName = `Gemini ${version} ${tier}${previewSuffix} (Cloud Code Assist)`.replace(/\s+/g, " ").trim();
cloudCodeAssistModels.push({
id: modelId,
name: displayName,
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
reasoning: true,
reasoning,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65535,
},
{
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash (Cloud Code Assist)",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
reasoning: true,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65535,
},
{
id: "gemini-2.0-flash",
name: "Gemini 2.0 Flash (Cloud Code Assist)",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
reasoning: false,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 8192,
},
{
id: "gemini-3-pro-preview",
name: "Gemini 3 Pro Preview (Cloud Code Assist)",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
reasoning: true,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65535,
},
{
id: "gemini-3-flash-preview",
name: "Gemini 3 Flash Preview (Cloud Code Assist)",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
reasoning: true,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65535,
},
{
id: "gemini-3.1-pro-preview",
name: "Gemini 3.1 Pro Preview (Cloud Code Assist)",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
reasoning: true,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65535,
},
];
contextWindow: override.contextWindow,
maxTokens: override.maxTokens,
});
}
allModels.push(...cloudCodeAssistModels);

View file

@ -243,9 +243,9 @@ export const MODELS = {
contextWindow: 200000,
maxTokens: 64000,
} satisfies Model<"bedrock-converse-stream">,
"anthropic.claude-opus-4-7": {
id: "anthropic.claude-opus-4-7",
name: "Claude Opus 4.7",
"anthropic.claude-opus-4-6-v1": {
id: "anthropic.claude-opus-4-6-v1",
name: "Claude Opus 4.6",
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
@ -260,9 +260,9 @@ export const MODELS = {
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"bedrock-converse-stream">,
"anthropic.claude-opus-4-6-v1": {
id: "anthropic.claude-opus-4-6-v1",
name: "Claude Opus 4.6",
"anthropic.claude-opus-4-7": {
id: "anthropic.claude-opus-4-7",
name: "Claude Opus 4.7",
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
@ -413,9 +413,9 @@ export const MODELS = {
contextWindow: 200000,
maxTokens: 64000,
} satisfies Model<"bedrock-converse-stream">,
"eu.anthropic.claude-opus-4-7": {
id: "eu.anthropic.claude-opus-4-7",
name: "Claude Opus 4.7 (EU)",
"eu.anthropic.claude-opus-4-6-v1": {
id: "eu.anthropic.claude-opus-4-6-v1",
name: "Claude Opus 4.6 (EU)",
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
@ -430,9 +430,9 @@ export const MODELS = {
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"bedrock-converse-stream">,
"eu.anthropic.claude-opus-4-6-v1": {
id: "eu.anthropic.claude-opus-4-6-v1",
name: "Claude Opus 4.6 (EU)",
"eu.anthropic.claude-opus-4-7": {
id: "eu.anthropic.claude-opus-4-7",
name: "Claude Opus 4.7 (EU)",
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
@ -532,9 +532,9 @@ export const MODELS = {
contextWindow: 200000,
maxTokens: 64000,
} satisfies Model<"bedrock-converse-stream">,
"global.anthropic.claude-opus-4-7": {
id: "global.anthropic.claude-opus-4-7",
name: "Claude Opus 4.7 (Global)",
"global.anthropic.claude-opus-4-6-v1": {
id: "global.anthropic.claude-opus-4-6-v1",
name: "Claude Opus 4.6 (Global)",
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
@ -549,9 +549,9 @@ export const MODELS = {
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"bedrock-converse-stream">,
"global.anthropic.claude-opus-4-6-v1": {
id: "global.anthropic.claude-opus-4-6-v1",
name: "Claude Opus 4.6 (Global)",
"global.anthropic.claude-opus-4-7": {
id: "global.anthropic.claude-opus-4-7",
name: "Claude Opus 4.7 (Global)",
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
@ -1382,9 +1382,9 @@ export const MODELS = {
contextWindow: 200000,
maxTokens: 64000,
} satisfies Model<"bedrock-converse-stream">,
"us.anthropic.claude-opus-4-7": {
id: "us.anthropic.claude-opus-4-7",
name: "Claude Opus 4.7 (US)",
"us.anthropic.claude-opus-4-6-v1": {
id: "us.anthropic.claude-opus-4-6-v1",
name: "Claude Opus 4.6 (US)",
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
@ -1399,9 +1399,9 @@ export const MODELS = {
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"bedrock-converse-stream">,
"us.anthropic.claude-opus-4-6-v1": {
id: "us.anthropic.claude-opus-4-6-v1",
name: "Claude Opus 4.6 (US)",
"us.anthropic.claude-opus-4-7": {
id: "us.anthropic.claude-opus-4-7",
name: "Claude Opus 4.7 (US)",
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
@ -1843,6 +1843,23 @@ export const MODELS = {
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"anthropic-messages">,
"claude-opus-4-7": {
id: "claude-opus-4-7",
name: "Claude Opus 4.7",
api: "anthropic-messages",
provider: "anthropic",
baseUrl: "https://api.anthropic.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 5,
output: 25,
cacheRead: 0.5,
cacheWrite: 6.25,
},
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"anthropic-messages">,
"claude-sonnet-4-0": {
id: "claude-sonnet-4-0",
name: "Claude Sonnet 4 (latest)",
@ -1930,23 +1947,6 @@ export const MODELS = {
} satisfies Model<"anthropic-messages">,
},
"azure-openai-responses": {
"codex-mini-latest": {
id: "codex-mini-latest",
name: "Codex Mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text"],
cost: {
input: 1.5,
output: 6,
cacheRead: 0.375,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"gpt-4": {
id: "gpt-4",
name: "GPT-4",
@ -2753,6 +2753,24 @@ export const MODELS = {
contextWindow: 144000,
maxTokens: 64000,
} satisfies Model<"anthropic-messages">,
"claude-opus-4.7": {
id: "claude-opus-4.7",
name: "Claude Opus 4.7",
api: "anthropic-messages",
provider: "github-copilot",
baseUrl: "https://api.individual.githubcopilot.com",
headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"},
reasoning: true,
input: ["text", "image"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 144000,
maxTokens: 64000,
} satisfies Model<"anthropic-messages">,
"claude-sonnet-4": {
id: "claude-sonnet-4",
name: "Claude Sonnet 4",
@ -3601,9 +3619,26 @@ export const MODELS = {
} satisfies Model<"google-generative-ai">,
},
"google-gemini-cli": {
"gemini-2.0-flash": {
id: "gemini-2.0-flash",
name: "Gemini 2.0 Flash (Cloud Code Assist)",
"gemini-2.5-flash": {
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash (Cloud Code Assist)",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 65535,
} satisfies Model<"google-gemini-cli">,
"gemini-2.5-flash-lite": {
id: "gemini-2.5-flash-lite",
name: "Gemini 2.5 Flash Lite (Cloud Code Assist)",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: "https://cloudcode-pa.googleapis.com",
@ -3618,23 +3653,6 @@ export const MODELS = {
contextWindow: 1048576,
maxTokens: 8192,
} satisfies Model<"google-gemini-cli">,
"gemini-2.5-flash": {
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash (Cloud Code Assist)",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 65535,
} satisfies Model<"google-gemini-cli">,
"gemini-2.5-pro": {
id: "gemini-2.5-pro",
name: "Gemini 2.5 Pro (Cloud Code Assist)",
@ -3649,7 +3667,7 @@ export const MODELS = {
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1048576,
contextWindow: 2097152,
maxTokens: 65535,
} satisfies Model<"google-gemini-cli">,
"gemini-3-flash-preview": {
@ -3667,7 +3685,7 @@ export const MODELS = {
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 65535,
maxTokens: 65536,
} satisfies Model<"google-gemini-cli">,
"gemini-3-pro-preview": {
id: "gemini-3-pro-preview",
@ -3683,8 +3701,25 @@ export const MODELS = {
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 2097152,
maxTokens: 65536,
} satisfies Model<"google-gemini-cli">,
"gemini-3.1-flash-lite-preview": {
id: "gemini-3.1-flash-lite-preview",
name: "Gemini 3.1 Flash Lite Preview (Cloud Code Assist)",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 65535,
maxTokens: 32768,
} satisfies Model<"google-gemini-cli">,
"gemini-3.1-pro-preview": {
id: "gemini-3.1-pro-preview",
@ -3700,8 +3735,8 @@ export const MODELS = {
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 65535,
contextWindow: 2097152,
maxTokens: 65536,
} satisfies Model<"google-gemini-cli">,
},
"google-vertex": {
@ -4255,6 +4290,24 @@ export const MODELS = {
contextWindow: 204800,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"MiniMaxAI/MiniMax-M2.7": {
id: "MiniMaxAI/MiniMax-M2.7",
name: "MiniMax-M2.7",
api: "openai-completions",
provider: "huggingface",
baseUrl: "https://router.huggingface.co/v1",
compat: {"supportsDeveloperRole":false},
reasoning: true,
input: ["text"],
cost: {
input: 0.3,
output: 1.2,
cacheRead: 0.06,
cacheWrite: 0,
},
contextWindow: 204800,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"Qwen/Qwen3-235B-A22B-Thinking-2507": {
id: "Qwen/Qwen3-235B-A22B-Thinking-2507",
name: "Qwen3-235B-A22B-Thinking-2507",
@ -5251,23 +5304,6 @@ export const MODELS = {
} satisfies Model<"mistral-conversations">,
},
"openai": {
"codex-mini-latest": {
id: "codex-mini-latest",
name: "Codex Mini",
api: "openai-responses",
provider: "openai",
baseUrl: "https://api.openai.com/v1",
reasoning: true,
input: ["text"],
cost: {
input: 1.5,
output: 6,
cacheRead: 0.375,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"openai-responses">,
"gpt-4": {
id: "gpt-4",
name: "GPT-4",
@ -6190,6 +6226,23 @@ export const MODELS = {
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"anthropic-messages">,
"claude-opus-4-7": {
id: "claude-opus-4-7",
name: "Claude Opus 4.7",
api: "anthropic-messages",
provider: "opencode",
baseUrl: "https://opencode.ai/zen",
reasoning: true,
input: ["text", "image"],
cost: {
input: 5,
output: 25,
cacheRead: 0.5,
cacheWrite: 6.25,
},
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"anthropic-messages">,
"claude-sonnet-4": {
id: "claude-sonnet-4",
name: "Claude Sonnet 4",
@ -6615,6 +6668,40 @@ export const MODELS = {
contextWindow: 204800,
maxTokens: 128000,
} satisfies Model<"openai-completions">,
"qwen3.5-plus": {
id: "qwen3.5-plus",
name: "Qwen3.5 Plus",
api: "openai-completions",
provider: "opencode",
baseUrl: "https://opencode.ai/zen/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.2,
output: 1.2,
cacheRead: 0.02,
cacheWrite: 0.25,
},
contextWindow: 262144,
maxTokens: 65536,
} satisfies Model<"openai-completions">,
"qwen3.6-plus": {
id: "qwen3.6-plus",
name: "Qwen3.6 Plus",
api: "openai-completions",
provider: "opencode",
baseUrl: "https://opencode.ai/zen/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.5,
output: 3,
cacheRead: 0.05,
cacheWrite: 0.625,
},
contextWindow: 262144,
maxTokens: 65536,
} satisfies Model<"openai-completions">,
},
"opencode-go": {
"glm-5": {
@ -6705,9 +6792,9 @@ export const MODELS = {
"minimax-m2.5": {
id: "minimax-m2.5",
name: "MiniMax M2.5",
api: "anthropic-messages",
api: "openai-completions",
provider: "opencode-go",
baseUrl: "https://opencode.ai/zen/go",
baseUrl: "https://opencode.ai/zen/go/v1",
reasoning: true,
input: ["text"],
cost: {
@ -6717,8 +6804,8 @@ export const MODELS = {
cacheWrite: 0,
},
contextWindow: 204800,
maxTokens: 131072,
} satisfies Model<"anthropic-messages">,
maxTokens: 65536,
} satisfies Model<"openai-completions">,
"minimax-m2.7": {
id: "minimax-m2.7",
name: "MiniMax M2.7",
@ -6736,6 +6823,40 @@ export const MODELS = {
contextWindow: 204800,
maxTokens: 131072,
} satisfies Model<"anthropic-messages">,
"qwen3.5-plus": {
id: "qwen3.5-plus",
name: "Qwen3.5 Plus",
api: "openai-completions",
provider: "opencode-go",
baseUrl: "https://opencode.ai/zen/go/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.2,
output: 1.2,
cacheRead: 0.02,
cacheWrite: 0.25,
},
contextWindow: 262144,
maxTokens: 65536,
} satisfies Model<"openai-completions">,
"qwen3.6-plus": {
id: "qwen3.6-plus",
name: "Qwen3.6 Plus",
api: "openai-completions",
provider: "opencode-go",
baseUrl: "https://opencode.ai/zen/go/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.5,
output: 3,
cacheRead: 0.05,
cacheWrite: 0.625,
},
contextWindow: 262144,
maxTokens: 65536,
} satisfies Model<"openai-completions">,
},
"openrouter": {
"ai21/jamba-large-1.7": {
@ -6959,23 +7080,6 @@ export const MODELS = {
contextWindow: 200000,
maxTokens: 64000,
} satisfies Model<"openai-completions">,
"anthropic/claude-opus-4.7": {
id: "anthropic/claude-opus-4.7",
name: "Anthropic: Claude Opus 4.7",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 5,
output: 25,
cacheRead: 0.5,
cacheWrite: 6.25,
},
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"openai-completions">,
"anthropic/claude-opus-4": {
id: "anthropic/claude-opus-4",
name: "Anthropic: Claude Opus 4",
@ -7061,6 +7165,23 @@ export const MODELS = {
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"openai-completions">,
"anthropic/claude-opus-4.7": {
id: "anthropic/claude-opus-4.7",
name: "Anthropic: Claude Opus 4.7",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 5,
output: 25,
cacheRead: 0.5,
cacheWrite: 6.25,
},
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"openai-completions">,
"anthropic/claude-sonnet-4": {
id: "anthropic/claude-sonnet-4",
name: "Anthropic: Claude Sonnet 4",
@ -7140,7 +7261,7 @@ export const MODELS = {
cost: {
input: 0.22,
output: 0.85,
cacheRead: 0,
cacheRead: 0.06,
cacheWrite: 0,
},
contextWindow: 262144,
@ -7356,7 +7477,7 @@ export const MODELS = {
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
reasoning: false,
input: ["text"],
cost: {
input: 0.19999999999999998,
@ -7444,13 +7565,13 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.26,
output: 0.38,
cacheRead: 0.13,
input: 0.25899999999999995,
output: 0.42,
cacheRead: 0.135,
cacheWrite: 0,
},
contextWindow: 163840,
maxTokens: 4096,
maxTokens: 163840,
} satisfies Model<"openai-completions">,
"deepseek/deepseek-v3.2-exp": {
id: "deepseek/deepseek-v3.2-exp",
@ -7699,13 +7820,13 @@ export const MODELS = {
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.12,
output: 0.39999999999999997,
cacheRead: 0,
input: 0.08,
output: 0.35,
cacheRead: 0.01,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 262144,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"google/gemma-4-26b-a4b-it:free": {
id: "google/gemma-4-26b-a4b-it:free",
@ -7733,13 +7854,13 @@ export const MODELS = {
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.14,
output: 0.39999999999999997,
cacheRead: 0,
input: 0.13,
output: 0.38,
cacheRead: 0.019999999499999997,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"google/gemma-4-31b-it:free": {
id: "google/gemma-4-31b-it:free",
@ -7758,23 +7879,6 @@ export const MODELS = {
contextWindow: 262144,
maxTokens: 32768,
} satisfies Model<"openai-completions">,
"inception/mercury": {
id: "inception/mercury",
name: "Inception: Mercury",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.25,
output: 0.75,
cacheRead: 0.024999999999999998,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 32000,
} satisfies Model<"openai-completions">,
"inception/mercury-2": {
id: "inception/mercury-2",
name: "Inception: Mercury 2",
@ -7792,23 +7896,6 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 50000,
} satisfies Model<"openai-completions">,
"inception/mercury-coder": {
id: "inception/mercury-coder",
name: "Inception: Mercury Coder",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.25,
output: 0.75,
cacheRead: 0.024999999999999998,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 32000,
} satisfies Model<"openai-completions">,
"kwaipilot/kat-coder-pro-v2": {
id: "kwaipilot/kat-coder-pro-v2",
name: "Kwaipilot: KAT-Coder-Pro V2",
@ -7826,23 +7913,6 @@ export const MODELS = {
contextWindow: 256000,
maxTokens: 80000,
} satisfies Model<"openai-completions">,
"meituan/longcat-flash-chat": {
id: "meituan/longcat-flash-chat",
name: "Meituan: LongCat Flash Chat",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.19999999999999998,
output: 0.7999999999999999,
cacheRead: 0.19999999999999998,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3-8b-instruct": {
id: "meta-llama/llama-3-8b-instruct",
name: "Meta: Llama 3 8B Instruct",
@ -7903,13 +7973,13 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.09999999999999999,
output: 0.32,
input: 0.12,
output: 0.38,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 16384,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.3-70b-instruct:free": {
id: "meta-llama/llama-3.3-70b-instruct:free",
@ -7928,23 +7998,6 @@ export const MODELS = {
contextWindow: 65536,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-4-maverick": {
id: "meta-llama/llama-4-maverick",
name: "Meta: Llama 4 Maverick",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.15,
output: 0.6,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"meta-llama/llama-4-scout": {
id: "meta-llama/llama-4-scout",
name: "Meta: Llama 4 Scout",
@ -8453,7 +8506,7 @@ export const MODELS = {
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 131072,
maxTokens: 32768,
} satisfies Model<"openai-completions">,
"moonshotai/kimi-k2-0905": {
id: "moonshotai/kimi-k2-0905",
@ -8483,11 +8536,11 @@ export const MODELS = {
cost: {
input: 0.6,
output: 2.5,
cacheRead: 0,
cacheRead: 0.15,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 4096,
maxTokens: 262144,
} satisfies Model<"openai-completions">,
"moonshotai/kimi-k2.5": {
id: "moonshotai/kimi-k2.5",
@ -8600,9 +8653,9 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.09999999999999999,
output: 0.5,
cacheRead: 0.09999999999999999,
input: 0.09,
output: 0.44999999999999996,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144,
@ -8982,23 +9035,6 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"openai/gpt-4o:extended": {
id: "openai/gpt-4o:extended",
name: "OpenAI: GPT-4o (extended)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 6,
output: 18,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 64000,
} satisfies Model<"openai-completions">,
"openai/gpt-5": {
id: "openai/gpt-5",
name: "OpenAI: GPT-5",
@ -9662,6 +9698,23 @@ export const MODELS = {
contextWindow: 2000000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openrouter/elephant-alpha": {
id: "openrouter/elephant-alpha",
name: "Elephant",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 32768,
} satisfies Model<"openai-completions">,
"openrouter/free": {
id: "openrouter/free",
name: "Free Models Router",
@ -9759,7 +9812,7 @@ export const MODELS = {
input: 0.26,
output: 0.78,
cacheRead: 0.052000000000000005,
cacheWrite: 0,
cacheWrite: 0.325,
},
contextWindow: 1000000,
maxTokens: 32768,
@ -9776,7 +9829,7 @@ export const MODELS = {
input: 0.26,
output: 0.78,
cacheRead: 0,
cacheWrite: 0,
cacheWrite: 0.325,
},
contextWindow: 1000000,
maxTokens: 32768,
@ -9793,7 +9846,7 @@ export const MODELS = {
input: 0.26,
output: 0.78,
cacheRead: 0,
cacheWrite: 0,
cacheWrite: 0.325,
},
contextWindow: 1000000,
maxTokens: 32768,
@ -9872,7 +9925,7 @@ export const MODELS = {
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
reasoning: false,
input: ["text"],
cost: {
input: 0.071,
@ -9892,13 +9945,13 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.14950000000000002,
output: 1.495,
input: 0.13,
output: 0.6,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
contextWindow: 262144,
maxTokens: 262144,
} satisfies Model<"openai-completions">,
"qwen/qwen3-30b-a3b": {
id: "qwen/qwen3-30b-a3b",
@ -10031,7 +10084,7 @@ export const MODELS = {
input: 0.195,
output: 0.975,
cacheRead: 0.039,
cacheWrite: 0,
cacheWrite: 0.24375,
},
contextWindow: 1000000,
maxTokens: 65536,
@ -10065,7 +10118,7 @@ export const MODELS = {
input: 0.65,
output: 3.25,
cacheRead: 0.13,
cacheWrite: 0,
cacheWrite: 0.8125,
},
contextWindow: 1000000,
maxTokens: 65536,
@ -10099,7 +10152,7 @@ export const MODELS = {
input: 0.78,
output: 3.9,
cacheRead: 0.156,
cacheWrite: 0,
cacheWrite: 0.975,
},
contextWindow: 262144,
maxTokens: 32768,
@ -10353,7 +10406,7 @@ export const MODELS = {
cost: {
input: 0.39,
output: 2.34,
cacheRead: 0,
cacheRead: 0.195,
cacheWrite: 0,
},
contextWindow: 262144,
@ -10368,13 +10421,13 @@ export const MODELS = {
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.049999999999999996,
input: 0.09999999999999999,
output: 0.15,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 256000,
maxTokens: 32768,
contextWindow: 262144,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"qwen/qwen3.5-flash-02-23": {
id: "qwen/qwen3.5-flash-02-23",
@ -10388,7 +10441,7 @@ export const MODELS = {
input: 0.065,
output: 0.26,
cacheRead: 0,
cacheWrite: 0,
cacheWrite: 0.08125,
},
contextWindow: 1000000,
maxTokens: 65536,
@ -10405,7 +10458,7 @@ export const MODELS = {
input: 0.26,
output: 1.56,
cacheRead: 0,
cacheWrite: 0,
cacheWrite: 0.325,
},
contextWindow: 1000000,
maxTokens: 65536,
@ -10422,7 +10475,7 @@ export const MODELS = {
input: 0.325,
output: 1.95,
cacheRead: 0,
cacheWrite: 0,
cacheWrite: 0.40625,
},
contextWindow: 1000000,
maxTokens: 65536,
@ -11432,6 +11485,23 @@ export const MODELS = {
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"anthropic-messages">,
"anthropic/claude-opus-4.7": {
id: "anthropic/claude-opus-4.7",
name: "Claude Opus 4.7",
api: "anthropic-messages",
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
reasoning: true,
input: ["text", "image"],
cost: {
input: 5,
output: 25,
cacheRead: 0.5,
cacheWrite: 6.25,
},
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"anthropic-messages">,
"anthropic/claude-sonnet-4": {
id: "anthropic/claude-sonnet-4",
name: "Claude Sonnet 4",
@ -12913,7 +12983,7 @@ export const MODELS = {
} satisfies Model<"anthropic-messages">,
"openai/gpt-oss-20b": {
id: "openai/gpt-oss-20b",
name: "gpt-oss-20b",
name: "GPT OSS 120B",
api: "anthropic-messages",
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
@ -12930,7 +13000,7 @@ export const MODELS = {
} satisfies Model<"anthropic-messages">,
"openai/gpt-oss-safeguard-20b": {
id: "openai/gpt-oss-safeguard-20b",
name: "gpt-oss-safeguard-20b",
name: "GPT OSS Safeguard 20B",
api: "anthropic-messages",
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
@ -13190,7 +13260,7 @@ export const MODELS = {
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
reasoning: false,
input: ["text"],
input: ["text", "image"],
cost: {
input: 0.19999999999999998,
output: 0.5,
@ -13207,7 +13277,7 @@ export const MODELS = {
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
reasoning: true,
input: ["text"],
input: ["text", "image"],
cost: {
input: 0.19999999999999998,
output: 0.5,
@ -13224,7 +13294,7 @@ export const MODELS = {
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
reasoning: false,
input: ["text"],
input: ["text", "image"],
cost: {
input: 0.19999999999999998,
output: 0.5,
@ -13241,7 +13311,7 @@ export const MODELS = {
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
reasoning: true,
input: ["text"],
input: ["text", "image"],
cost: {
input: 0.19999999999999998,
output: 0.5,
@ -13258,7 +13328,7 @@ export const MODELS = {
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
reasoning: true,
input: ["text"],
input: ["text", "image"],
cost: {
input: 2,
output: 6,
@ -13275,7 +13345,7 @@ export const MODELS = {
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
reasoning: true,
input: ["text"],
input: ["text", "image"],
cost: {
input: 2,
output: 6,
@ -13598,15 +13668,15 @@ export const MODELS = {
provider: "vercel-ai-gateway",
baseUrl: "https://ai-gateway.vercel.sh",
reasoning: true,
input: ["text"],
input: ["text", "image"],
cost: {
input: 1.4,
output: 4.4,
cacheRead: 0.26,
cacheWrite: 0,
},
contextWindow: 202800,
maxTokens: 64000,
contextWindow: 202752,
maxTokens: 202752,
} satisfies Model<"anthropic-messages">,
"zai/glm-5v-turbo": {
id: "zai/glm-5v-turbo",