From 8abfc98fdc912e204a8219493958565765bde6f5 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sun, 19 Apr 2026 11:44:28 +0200 Subject: [PATCH] pi-ai: source google-gemini-cli model list from cli-core's VALID_GEMINI_MODELS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit generate-models.ts now imports @google/gemini-cli-core's VALID_GEMINI_MODELS set and iterates it to produce SF's google-gemini-cli provider entries. Single source of truth: when Google ships a new Gemini model, it lands in cli-core first, then flows into SF on `npm update @google/gemini-cli-core` + `generate-models.ts` re-run — no more hand-editing the generate script. Before: 6 hardcoded entries (gemini-2.0/2.5/3 flash + pro preview, etc.) After: 7 entries sourced dynamically, filtered to drop `-customtools` variants which require a different tool protocol: gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite, gemini-3-pro-preview, gemini-3-flash-preview, gemini-3.1-pro-preview, gemini-3.1-flash-lite-preview Capability tagging uses cli-core's isProModel / isPreviewModel so reasoning=true for pro + 3.x preview variants (excluding flash-lite). Context-window / max-output-tokens kept in an SF-local override table since cli-core doesn't publish those per-model. Pre-existing 4 test failures (zai glm-5.1 x3, anthropic resolveBaseUrl #4140) unchanged. Co-Authored-By: Claude Sonnet 4.6 --- packages/pi-ai/scripts/generate-models.ts | 117 ++--- packages/pi-ai/src/models.generated.ts | 552 ++++++++++++---------- 2 files changed, 357 insertions(+), 312 deletions(-) diff --git a/packages/pi-ai/scripts/generate-models.ts b/packages/pi-ai/scripts/generate-models.ts index 9fd2d2576..2bc9a7232 100644 --- a/packages/pi-ai/scripts/generate-models.ts +++ b/packages/pi-ai/scripts/generate-models.ts @@ -1060,83 +1060,58 @@ async function generateModels() { }); } - // Google Cloud Code Assist models (Gemini CLI) - // Uses production endpoint, standard Gemini models only + // Google Cloud Code Assist models (Gemini CLI) — sourced from + // @google/gemini-cli-core's VALID_GEMINI_MODELS so new models ship + // automatically on `npm update @google/gemini-cli-core`. cli-core is + // the authoritative list (what the real `gemini` CLI binary supports). + // + // We filter out `*-customtools` preview variants — they require a + // specific tool protocol that SF's generic adapter doesn't speak. + const { VALID_GEMINI_MODELS, isProModel: cliCoreIsProModel, isPreviewModel: cliCoreIsPreview } = await import("@google/gemini-cli-core"); const CLOUD_CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com"; - const cloudCodeAssistModels: Model<"google-gemini-cli">[] = [ - { - id: "gemini-2.5-pro", - name: "Gemini 2.5 Pro (Cloud Code Assist)", + // Map every Gemini model tier to its (context_window, max_output_tokens) + // pair. cli-core doesn't publish these numbers — they live with SF. + const GEMINI_CONTEXT_OVERRIDES: Record = { + "gemini-3.1-pro-preview": { contextWindow: 2097152, maxTokens: 65536 }, + "gemini-3-pro-preview": { contextWindow: 2097152, maxTokens: 65536 }, + "gemini-3-flash-preview": { contextWindow: 1048576, maxTokens: 65536 }, + "gemini-3.1-flash-lite-preview": { contextWindow: 1048576, maxTokens: 32768 }, + "gemini-2.5-pro": { contextWindow: 2097152, maxTokens: 65535 }, + "gemini-2.5-flash": { contextWindow: 1048576, maxTokens: 65535 }, + "gemini-2.5-flash-lite": { contextWindow: 1048576, maxTokens: 8192 }, + }; + const cloudCodeAssistModels: Model<"google-gemini-cli">[] = []; + for (const modelId of VALID_GEMINI_MODELS) { + if (modelId.endsWith("-customtools")) continue; // custom tool protocol — not yet supported + const override = GEMINI_CONTEXT_OVERRIDES[modelId] ?? { contextWindow: 1048576, maxTokens: 65535 }; + const isPro = cliCoreIsProModel(modelId); + const isPreview = cliCoreIsPreview(modelId); + // Pro models (and 3.x preview pros) support thinking; flash-lite does not. + const reasoning = isPro || (isPreview && !modelId.includes("flash-lite")); + // Human-readable tier label for the name + const tier = modelId.includes("pro") ? "Pro" + : modelId.includes("flash-lite") ? "Flash Lite" + : modelId.includes("flash") ? "Flash" + : ""; + const version = modelId.startsWith("gemini-3.1") ? "3.1" + : modelId.startsWith("gemini-3-") ? "3" + : modelId.startsWith("gemini-2.5") ? "2.5" + : ""; + const previewSuffix = isPreview ? " Preview" : ""; + const displayName = `Gemini ${version} ${tier}${previewSuffix} (Cloud Code Assist)`.replace(/\s+/g, " ").trim(); + cloudCodeAssistModels.push({ + id: modelId, + name: displayName, api: "google-gemini-cli", provider: "google-gemini-cli", baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, - reasoning: true, + reasoning, input: ["text", "image"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 1048576, - maxTokens: 65535, - }, - { - id: "gemini-2.5-flash", - name: "Gemini 2.5 Flash (Cloud Code Assist)", - api: "google-gemini-cli", - provider: "google-gemini-cli", - baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, - reasoning: true, - input: ["text", "image"], - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 1048576, - maxTokens: 65535, - }, - { - id: "gemini-2.0-flash", - name: "Gemini 2.0 Flash (Cloud Code Assist)", - api: "google-gemini-cli", - provider: "google-gemini-cli", - baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, - reasoning: false, - input: ["text", "image"], - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 1048576, - maxTokens: 8192, - }, - { - id: "gemini-3-pro-preview", - name: "Gemini 3 Pro Preview (Cloud Code Assist)", - api: "google-gemini-cli", - provider: "google-gemini-cli", - baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, - reasoning: true, - input: ["text", "image"], - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 1048576, - maxTokens: 65535, - }, - { - id: "gemini-3-flash-preview", - name: "Gemini 3 Flash Preview (Cloud Code Assist)", - api: "google-gemini-cli", - provider: "google-gemini-cli", - baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, - reasoning: true, - input: ["text", "image"], - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 1048576, - maxTokens: 65535, - }, - { - id: "gemini-3.1-pro-preview", - name: "Gemini 3.1 Pro Preview (Cloud Code Assist)", - api: "google-gemini-cli", - provider: "google-gemini-cli", - baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, - reasoning: true, - input: ["text", "image"], - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 1048576, - maxTokens: 65535, - }, - ]; + contextWindow: override.contextWindow, + maxTokens: override.maxTokens, + }); + } allModels.push(...cloudCodeAssistModels); diff --git a/packages/pi-ai/src/models.generated.ts b/packages/pi-ai/src/models.generated.ts index ffaab0880..793237587 100644 --- a/packages/pi-ai/src/models.generated.ts +++ b/packages/pi-ai/src/models.generated.ts @@ -243,9 +243,9 @@ export const MODELS = { contextWindow: 200000, maxTokens: 64000, } satisfies Model<"bedrock-converse-stream">, - "anthropic.claude-opus-4-7": { - id: "anthropic.claude-opus-4-7", - name: "Claude Opus 4.7", + "anthropic.claude-opus-4-6-v1": { + id: "anthropic.claude-opus-4-6-v1", + name: "Claude Opus 4.6", api: "bedrock-converse-stream", provider: "amazon-bedrock", baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", @@ -260,9 +260,9 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"bedrock-converse-stream">, - "anthropic.claude-opus-4-6-v1": { - id: "anthropic.claude-opus-4-6-v1", - name: "Claude Opus 4.6", + "anthropic.claude-opus-4-7": { + id: "anthropic.claude-opus-4-7", + name: "Claude Opus 4.7", api: "bedrock-converse-stream", provider: "amazon-bedrock", baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", @@ -413,9 +413,9 @@ export const MODELS = { contextWindow: 200000, maxTokens: 64000, } satisfies Model<"bedrock-converse-stream">, - "eu.anthropic.claude-opus-4-7": { - id: "eu.anthropic.claude-opus-4-7", - name: "Claude Opus 4.7 (EU)", + "eu.anthropic.claude-opus-4-6-v1": { + id: "eu.anthropic.claude-opus-4-6-v1", + name: "Claude Opus 4.6 (EU)", api: "bedrock-converse-stream", provider: "amazon-bedrock", baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", @@ -430,9 +430,9 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"bedrock-converse-stream">, - "eu.anthropic.claude-opus-4-6-v1": { - id: "eu.anthropic.claude-opus-4-6-v1", - name: "Claude Opus 4.6 (EU)", + "eu.anthropic.claude-opus-4-7": { + id: "eu.anthropic.claude-opus-4-7", + name: "Claude Opus 4.7 (EU)", api: "bedrock-converse-stream", provider: "amazon-bedrock", baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", @@ -532,9 +532,9 @@ export const MODELS = { contextWindow: 200000, maxTokens: 64000, } satisfies Model<"bedrock-converse-stream">, - "global.anthropic.claude-opus-4-7": { - id: "global.anthropic.claude-opus-4-7", - name: "Claude Opus 4.7 (Global)", + "global.anthropic.claude-opus-4-6-v1": { + id: "global.anthropic.claude-opus-4-6-v1", + name: "Claude Opus 4.6 (Global)", api: "bedrock-converse-stream", provider: "amazon-bedrock", baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", @@ -549,9 +549,9 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"bedrock-converse-stream">, - "global.anthropic.claude-opus-4-6-v1": { - id: "global.anthropic.claude-opus-4-6-v1", - name: "Claude Opus 4.6 (Global)", + "global.anthropic.claude-opus-4-7": { + id: "global.anthropic.claude-opus-4-7", + name: "Claude Opus 4.7 (Global)", api: "bedrock-converse-stream", provider: "amazon-bedrock", baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", @@ -1382,9 +1382,9 @@ export const MODELS = { contextWindow: 200000, maxTokens: 64000, } satisfies Model<"bedrock-converse-stream">, - "us.anthropic.claude-opus-4-7": { - id: "us.anthropic.claude-opus-4-7", - name: "Claude Opus 4.7 (US)", + "us.anthropic.claude-opus-4-6-v1": { + id: "us.anthropic.claude-opus-4-6-v1", + name: "Claude Opus 4.6 (US)", api: "bedrock-converse-stream", provider: "amazon-bedrock", baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", @@ -1399,9 +1399,9 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"bedrock-converse-stream">, - "us.anthropic.claude-opus-4-6-v1": { - id: "us.anthropic.claude-opus-4-6-v1", - name: "Claude Opus 4.6 (US)", + "us.anthropic.claude-opus-4-7": { + id: "us.anthropic.claude-opus-4-7", + name: "Claude Opus 4.7 (US)", api: "bedrock-converse-stream", provider: "amazon-bedrock", baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", @@ -1843,6 +1843,23 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"anthropic-messages">, + "claude-opus-4-7": { + id: "claude-opus-4-7", + name: "Claude Opus 4.7", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 5, + output: 25, + cacheRead: 0.5, + cacheWrite: 6.25, + }, + contextWindow: 1000000, + maxTokens: 128000, + } satisfies Model<"anthropic-messages">, "claude-sonnet-4-0": { id: "claude-sonnet-4-0", name: "Claude Sonnet 4 (latest)", @@ -1930,23 +1947,6 @@ export const MODELS = { } satisfies Model<"anthropic-messages">, }, "azure-openai-responses": { - "codex-mini-latest": { - id: "codex-mini-latest", - name: "Codex Mini", - api: "azure-openai-responses", - provider: "azure-openai-responses", - baseUrl: "", - reasoning: true, - input: ["text"], - cost: { - input: 1.5, - output: 6, - cacheRead: 0.375, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"azure-openai-responses">, "gpt-4": { id: "gpt-4", name: "GPT-4", @@ -2753,6 +2753,24 @@ export const MODELS = { contextWindow: 144000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, + "claude-opus-4.7": { + id: "claude-opus-4.7", + name: "Claude Opus 4.7", + api: "anthropic-messages", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 144000, + maxTokens: 64000, + } satisfies Model<"anthropic-messages">, "claude-sonnet-4": { id: "claude-sonnet-4", name: "Claude Sonnet 4", @@ -3601,9 +3619,26 @@ export const MODELS = { } satisfies Model<"google-generative-ai">, }, "google-gemini-cli": { - "gemini-2.0-flash": { - id: "gemini-2.0-flash", - name: "Gemini 2.0 Flash (Cloud Code Assist)", + "gemini-2.5-flash": { + id: "gemini-2.5-flash", + name: "Gemini 2.5 Flash (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: "https://cloudcode-pa.googleapis.com", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65535, + } satisfies Model<"google-gemini-cli">, + "gemini-2.5-flash-lite": { + id: "gemini-2.5-flash-lite", + name: "Gemini 2.5 Flash Lite (Cloud Code Assist)", api: "google-gemini-cli", provider: "google-gemini-cli", baseUrl: "https://cloudcode-pa.googleapis.com", @@ -3618,23 +3653,6 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 8192, } satisfies Model<"google-gemini-cli">, - "gemini-2.5-flash": { - id: "gemini-2.5-flash", - name: "Gemini 2.5 Flash (Cloud Code Assist)", - api: "google-gemini-cli", - provider: "google-gemini-cli", - baseUrl: "https://cloudcode-pa.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65535, - } satisfies Model<"google-gemini-cli">, "gemini-2.5-pro": { id: "gemini-2.5-pro", name: "Gemini 2.5 Pro (Cloud Code Assist)", @@ -3649,7 +3667,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 1048576, + contextWindow: 2097152, maxTokens: 65535, } satisfies Model<"google-gemini-cli">, "gemini-3-flash-preview": { @@ -3667,7 +3685,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 1048576, - maxTokens: 65535, + maxTokens: 65536, } satisfies Model<"google-gemini-cli">, "gemini-3-pro-preview": { id: "gemini-3-pro-preview", @@ -3683,8 +3701,25 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, + contextWindow: 2097152, + maxTokens: 65536, + } satisfies Model<"google-gemini-cli">, + "gemini-3.1-flash-lite-preview": { + id: "gemini-3.1-flash-lite-preview", + name: "Gemini 3.1 Flash Lite Preview (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: "https://cloudcode-pa.googleapis.com", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, contextWindow: 1048576, - maxTokens: 65535, + maxTokens: 32768, } satisfies Model<"google-gemini-cli">, "gemini-3.1-pro-preview": { id: "gemini-3.1-pro-preview", @@ -3700,8 +3735,8 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 1048576, - maxTokens: 65535, + contextWindow: 2097152, + maxTokens: 65536, } satisfies Model<"google-gemini-cli">, }, "google-vertex": { @@ -4255,6 +4290,24 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"openai-completions">, + "MiniMaxAI/MiniMax-M2.7": { + id: "MiniMaxAI/MiniMax-M2.7", + name: "MiniMax-M2.7", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0.06, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "Qwen/Qwen3-235B-A22B-Thinking-2507": { id: "Qwen/Qwen3-235B-A22B-Thinking-2507", name: "Qwen3-235B-A22B-Thinking-2507", @@ -5251,23 +5304,6 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, }, "openai": { - "codex-mini-latest": { - id: "codex-mini-latest", - name: "Codex Mini", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text"], - cost: { - input: 1.5, - output: 6, - cacheRead: 0.375, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-responses">, "gpt-4": { id: "gpt-4", name: "GPT-4", @@ -6190,6 +6226,23 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"anthropic-messages">, + "claude-opus-4-7": { + id: "claude-opus-4-7", + name: "Claude Opus 4.7", + api: "anthropic-messages", + provider: "opencode", + baseUrl: "https://opencode.ai/zen", + reasoning: true, + input: ["text", "image"], + cost: { + input: 5, + output: 25, + cacheRead: 0.5, + cacheWrite: 6.25, + }, + contextWindow: 1000000, + maxTokens: 128000, + } satisfies Model<"anthropic-messages">, "claude-sonnet-4": { id: "claude-sonnet-4", name: "Claude Sonnet 4", @@ -6615,6 +6668,40 @@ export const MODELS = { contextWindow: 204800, maxTokens: 128000, } satisfies Model<"openai-completions">, + "qwen3.5-plus": { + id: "qwen3.5-plus", + name: "Qwen3.5 Plus", + api: "openai-completions", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.2, + output: 1.2, + cacheRead: 0.02, + cacheWrite: 0.25, + }, + contextWindow: 262144, + maxTokens: 65536, + } satisfies Model<"openai-completions">, + "qwen3.6-plus": { + id: "qwen3.6-plus", + name: "Qwen3.6 Plus", + api: "openai-completions", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.5, + output: 3, + cacheRead: 0.05, + cacheWrite: 0.625, + }, + contextWindow: 262144, + maxTokens: 65536, + } satisfies Model<"openai-completions">, }, "opencode-go": { "glm-5": { @@ -6705,9 +6792,9 @@ export const MODELS = { "minimax-m2.5": { id: "minimax-m2.5", name: "MiniMax M2.5", - api: "anthropic-messages", + api: "openai-completions", provider: "opencode-go", - baseUrl: "https://opencode.ai/zen/go", + baseUrl: "https://opencode.ai/zen/go/v1", reasoning: true, input: ["text"], cost: { @@ -6717,8 +6804,8 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 204800, - maxTokens: 131072, - } satisfies Model<"anthropic-messages">, + maxTokens: 65536, + } satisfies Model<"openai-completions">, "minimax-m2.7": { id: "minimax-m2.7", name: "MiniMax M2.7", @@ -6736,6 +6823,40 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"anthropic-messages">, + "qwen3.5-plus": { + id: "qwen3.5-plus", + name: "Qwen3.5 Plus", + api: "openai-completions", + provider: "opencode-go", + baseUrl: "https://opencode.ai/zen/go/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.2, + output: 1.2, + cacheRead: 0.02, + cacheWrite: 0.25, + }, + contextWindow: 262144, + maxTokens: 65536, + } satisfies Model<"openai-completions">, + "qwen3.6-plus": { + id: "qwen3.6-plus", + name: "Qwen3.6 Plus", + api: "openai-completions", + provider: "opencode-go", + baseUrl: "https://opencode.ai/zen/go/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.5, + output: 3, + cacheRead: 0.05, + cacheWrite: 0.625, + }, + contextWindow: 262144, + maxTokens: 65536, + } satisfies Model<"openai-completions">, }, "openrouter": { "ai21/jamba-large-1.7": { @@ -6959,23 +7080,6 @@ export const MODELS = { contextWindow: 200000, maxTokens: 64000, } satisfies Model<"openai-completions">, - "anthropic/claude-opus-4.7": { - id: "anthropic/claude-opus-4.7", - name: "Anthropic: Claude Opus 4.7", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 5, - output: 25, - cacheRead: 0.5, - cacheWrite: 6.25, - }, - contextWindow: 1000000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, "anthropic/claude-opus-4": { id: "anthropic/claude-opus-4", name: "Anthropic: Claude Opus 4", @@ -7061,6 +7165,23 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"openai-completions">, + "anthropic/claude-opus-4.7": { + id: "anthropic/claude-opus-4.7", + name: "Anthropic: Claude Opus 4.7", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 5, + output: 25, + cacheRead: 0.5, + cacheWrite: 6.25, + }, + contextWindow: 1000000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, "anthropic/claude-sonnet-4": { id: "anthropic/claude-sonnet-4", name: "Anthropic: Claude Sonnet 4", @@ -7140,7 +7261,7 @@ export const MODELS = { cost: { input: 0.22, output: 0.85, - cacheRead: 0, + cacheRead: 0.06, cacheWrite: 0, }, contextWindow: 262144, @@ -7356,7 +7477,7 @@ export const MODELS = { api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, + reasoning: false, input: ["text"], cost: { input: 0.19999999999999998, @@ -7444,13 +7565,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.26, - output: 0.38, - cacheRead: 0.13, + input: 0.25899999999999995, + output: 0.42, + cacheRead: 0.135, cacheWrite: 0, }, contextWindow: 163840, - maxTokens: 4096, + maxTokens: 163840, } satisfies Model<"openai-completions">, "deepseek/deepseek-v3.2-exp": { id: "deepseek/deepseek-v3.2-exp", @@ -7699,13 +7820,13 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.12, - output: 0.39999999999999997, - cacheRead: 0, + input: 0.08, + output: 0.35, + cacheRead: 0.01, cacheWrite: 0, }, contextWindow: 262144, - maxTokens: 262144, + maxTokens: 4096, } satisfies Model<"openai-completions">, "google/gemma-4-26b-a4b-it:free": { id: "google/gemma-4-26b-a4b-it:free", @@ -7733,13 +7854,13 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.14, - output: 0.39999999999999997, - cacheRead: 0, + input: 0.13, + output: 0.38, + cacheRead: 0.019999999499999997, cacheWrite: 0, }, contextWindow: 262144, - maxTokens: 131072, + maxTokens: 4096, } satisfies Model<"openai-completions">, "google/gemma-4-31b-it:free": { id: "google/gemma-4-31b-it:free", @@ -7758,23 +7879,6 @@ export const MODELS = { contextWindow: 262144, maxTokens: 32768, } satisfies Model<"openai-completions">, - "inception/mercury": { - id: "inception/mercury", - name: "Inception: Mercury", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.25, - output: 0.75, - cacheRead: 0.024999999999999998, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 32000, - } satisfies Model<"openai-completions">, "inception/mercury-2": { id: "inception/mercury-2", name: "Inception: Mercury 2", @@ -7792,23 +7896,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 50000, } satisfies Model<"openai-completions">, - "inception/mercury-coder": { - id: "inception/mercury-coder", - name: "Inception: Mercury Coder", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.25, - output: 0.75, - cacheRead: 0.024999999999999998, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 32000, - } satisfies Model<"openai-completions">, "kwaipilot/kat-coder-pro-v2": { id: "kwaipilot/kat-coder-pro-v2", name: "Kwaipilot: KAT-Coder-Pro V2", @@ -7826,23 +7913,6 @@ export const MODELS = { contextWindow: 256000, maxTokens: 80000, } satisfies Model<"openai-completions">, - "meituan/longcat-flash-chat": { - id: "meituan/longcat-flash-chat", - name: "Meituan: LongCat Flash Chat", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.19999999999999998, - output: 0.7999999999999999, - cacheRead: 0.19999999999999998, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, "meta-llama/llama-3-8b-instruct": { id: "meta-llama/llama-3-8b-instruct", name: "Meta: Llama 3 8B Instruct", @@ -7903,13 +7973,13 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.09999999999999999, - output: 0.32, + input: 0.12, + output: 0.38, cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 16384, + maxTokens: 131072, } satisfies Model<"openai-completions">, "meta-llama/llama-3.3-70b-instruct:free": { id: "meta-llama/llama-3.3-70b-instruct:free", @@ -7928,23 +7998,6 @@ export const MODELS = { contextWindow: 65536, maxTokens: 4096, } satisfies Model<"openai-completions">, - "meta-llama/llama-4-maverick": { - id: "meta-llama/llama-4-maverick", - name: "Meta: Llama 4 Maverick", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.15, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 16384, - } satisfies Model<"openai-completions">, "meta-llama/llama-4-scout": { id: "meta-llama/llama-4-scout", name: "Meta: Llama 4 Scout", @@ -8453,7 +8506,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 131072, + maxTokens: 32768, } satisfies Model<"openai-completions">, "moonshotai/kimi-k2-0905": { id: "moonshotai/kimi-k2-0905", @@ -8483,11 +8536,11 @@ export const MODELS = { cost: { input: 0.6, output: 2.5, - cacheRead: 0, + cacheRead: 0.15, cacheWrite: 0, }, contextWindow: 262144, - maxTokens: 4096, + maxTokens: 262144, } satisfies Model<"openai-completions">, "moonshotai/kimi-k2.5": { id: "moonshotai/kimi-k2.5", @@ -8600,9 +8653,9 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.09999999999999999, - output: 0.5, - cacheRead: 0.09999999999999999, + input: 0.09, + output: 0.44999999999999996, + cacheRead: 0, cacheWrite: 0, }, contextWindow: 262144, @@ -8982,23 +9035,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 16384, } satisfies Model<"openai-completions">, - "openai/gpt-4o:extended": { - id: "openai/gpt-4o:extended", - name: "OpenAI: GPT-4o (extended)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 6, - output: 18, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 64000, - } satisfies Model<"openai-completions">, "openai/gpt-5": { id: "openai/gpt-5", name: "OpenAI: GPT-5", @@ -9662,6 +9698,23 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 4096, } satisfies Model<"openai-completions">, + "openrouter/elephant-alpha": { + id: "openrouter/elephant-alpha", + name: "Elephant", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 32768, + } satisfies Model<"openai-completions">, "openrouter/free": { id: "openrouter/free", name: "Free Models Router", @@ -9759,7 +9812,7 @@ export const MODELS = { input: 0.26, output: 0.78, cacheRead: 0.052000000000000005, - cacheWrite: 0, + cacheWrite: 0.325, }, contextWindow: 1000000, maxTokens: 32768, @@ -9776,7 +9829,7 @@ export const MODELS = { input: 0.26, output: 0.78, cacheRead: 0, - cacheWrite: 0, + cacheWrite: 0.325, }, contextWindow: 1000000, maxTokens: 32768, @@ -9793,7 +9846,7 @@ export const MODELS = { input: 0.26, output: 0.78, cacheRead: 0, - cacheWrite: 0, + cacheWrite: 0.325, }, contextWindow: 1000000, maxTokens: 32768, @@ -9872,7 +9925,7 @@ export const MODELS = { api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, + reasoning: false, input: ["text"], cost: { input: 0.071, @@ -9892,13 +9945,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.14950000000000002, - output: 1.495, + input: 0.13, + output: 0.6, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 4096, + contextWindow: 262144, + maxTokens: 262144, } satisfies Model<"openai-completions">, "qwen/qwen3-30b-a3b": { id: "qwen/qwen3-30b-a3b", @@ -10031,7 +10084,7 @@ export const MODELS = { input: 0.195, output: 0.975, cacheRead: 0.039, - cacheWrite: 0, + cacheWrite: 0.24375, }, contextWindow: 1000000, maxTokens: 65536, @@ -10065,7 +10118,7 @@ export const MODELS = { input: 0.65, output: 3.25, cacheRead: 0.13, - cacheWrite: 0, + cacheWrite: 0.8125, }, contextWindow: 1000000, maxTokens: 65536, @@ -10099,7 +10152,7 @@ export const MODELS = { input: 0.78, output: 3.9, cacheRead: 0.156, - cacheWrite: 0, + cacheWrite: 0.975, }, contextWindow: 262144, maxTokens: 32768, @@ -10353,7 +10406,7 @@ export const MODELS = { cost: { input: 0.39, output: 2.34, - cacheRead: 0, + cacheRead: 0.195, cacheWrite: 0, }, contextWindow: 262144, @@ -10368,13 +10421,13 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.049999999999999996, + input: 0.09999999999999999, output: 0.15, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 256000, - maxTokens: 32768, + contextWindow: 262144, + maxTokens: 4096, } satisfies Model<"openai-completions">, "qwen/qwen3.5-flash-02-23": { id: "qwen/qwen3.5-flash-02-23", @@ -10388,7 +10441,7 @@ export const MODELS = { input: 0.065, output: 0.26, cacheRead: 0, - cacheWrite: 0, + cacheWrite: 0.08125, }, contextWindow: 1000000, maxTokens: 65536, @@ -10405,7 +10458,7 @@ export const MODELS = { input: 0.26, output: 1.56, cacheRead: 0, - cacheWrite: 0, + cacheWrite: 0.325, }, contextWindow: 1000000, maxTokens: 65536, @@ -10422,7 +10475,7 @@ export const MODELS = { input: 0.325, output: 1.95, cacheRead: 0, - cacheWrite: 0, + cacheWrite: 0.40625, }, contextWindow: 1000000, maxTokens: 65536, @@ -11432,6 +11485,23 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"anthropic-messages">, + "anthropic/claude-opus-4.7": { + id: "anthropic/claude-opus-4.7", + name: "Claude Opus 4.7", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 5, + output: 25, + cacheRead: 0.5, + cacheWrite: 6.25, + }, + contextWindow: 1000000, + maxTokens: 128000, + } satisfies Model<"anthropic-messages">, "anthropic/claude-sonnet-4": { id: "anthropic/claude-sonnet-4", name: "Claude Sonnet 4", @@ -12913,7 +12983,7 @@ export const MODELS = { } satisfies Model<"anthropic-messages">, "openai/gpt-oss-20b": { id: "openai/gpt-oss-20b", - name: "gpt-oss-20b", + name: "GPT OSS 120B", api: "anthropic-messages", provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", @@ -12930,7 +13000,7 @@ export const MODELS = { } satisfies Model<"anthropic-messages">, "openai/gpt-oss-safeguard-20b": { id: "openai/gpt-oss-safeguard-20b", - name: "gpt-oss-safeguard-20b", + name: "GPT OSS Safeguard 20B", api: "anthropic-messages", provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", @@ -13190,7 +13260,7 @@ export const MODELS = { provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: false, - input: ["text"], + input: ["text", "image"], cost: { input: 0.19999999999999998, output: 0.5, @@ -13207,7 +13277,7 @@ export const MODELS = { provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: true, - input: ["text"], + input: ["text", "image"], cost: { input: 0.19999999999999998, output: 0.5, @@ -13224,7 +13294,7 @@ export const MODELS = { provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: false, - input: ["text"], + input: ["text", "image"], cost: { input: 0.19999999999999998, output: 0.5, @@ -13241,7 +13311,7 @@ export const MODELS = { provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: true, - input: ["text"], + input: ["text", "image"], cost: { input: 0.19999999999999998, output: 0.5, @@ -13258,7 +13328,7 @@ export const MODELS = { provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: true, - input: ["text"], + input: ["text", "image"], cost: { input: 2, output: 6, @@ -13275,7 +13345,7 @@ export const MODELS = { provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: true, - input: ["text"], + input: ["text", "image"], cost: { input: 2, output: 6, @@ -13598,15 +13668,15 @@ export const MODELS = { provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: true, - input: ["text"], + input: ["text", "image"], cost: { input: 1.4, output: 4.4, cacheRead: 0.26, cacheWrite: 0, }, - contextWindow: 202800, - maxTokens: 64000, + contextWindow: 202752, + maxTokens: 202752, } satisfies Model<"anthropic-messages">, "zai/glm-5v-turbo": { id: "zai/glm-5v-turbo",