diff --git a/packages/pi-ai/scripts/generate-models.ts b/packages/pi-ai/scripts/generate-models.ts new file mode 100644 index 000000000..839428bcb --- /dev/null +++ b/packages/pi-ai/scripts/generate-models.ts @@ -0,0 +1,1543 @@ +#!/usr/bin/env tsx + +import { writeFileSync } from "fs"; +import { join, dirname } from "path"; +import { fileURLToPath } from "url"; +import { Api, KnownProvider, Model } from "../src/types.js"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); +const packageRoot = join(__dirname, ".."); + +interface ModelsDevModel { + id: string; + name: string; + tool_call?: boolean; + reasoning?: boolean; + limit?: { + context?: number; + output?: number; + }; + cost?: { + input?: number; + output?: number; + cache_read?: number; + cache_write?: number; + }; + modalities?: { + input?: string[]; + }; + provider?: { + npm?: string; + }; +} + +interface AiGatewayModel { + id: string; + name?: string; + context_window?: number; + max_tokens?: number; + tags?: string[]; + pricing?: { + input?: string | number; + output?: string | number; + input_cache_read?: string | number; + input_cache_write?: string | number; + }; +} + +const COPILOT_STATIC_HEADERS = { + "User-Agent": "GitHubCopilotChat/0.35.0", + "Editor-Version": "vscode/1.107.0", + "Editor-Plugin-Version": "copilot-chat/0.35.0", + "Copilot-Integration-Id": "vscode-chat", +} as const; + +const AI_GATEWAY_MODELS_URL = "https://ai-gateway.vercel.sh/v1"; +const AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh"; + +async function fetchOpenRouterModels(): Promise[]> { + try { + console.log("Fetching models from OpenRouter API..."); + const response = await fetch("https://openrouter.ai/api/v1/models"); + const data = await response.json(); + + const models: Model[] = []; + + for (const model of data.data) { + // Only include models that support tools + if (!model.supported_parameters?.includes("tools")) continue; + + // Parse provider from model ID + let provider: KnownProvider = "openrouter"; + let modelKey = model.id; + + modelKey = model.id; // Keep full ID for OpenRouter + + // Parse input modalities + const input: ("text" | "image")[] = ["text"]; + if (model.architecture?.modality?.includes("image")) { + input.push("image"); + } + + // Convert pricing from $/token to $/million tokens + const inputCost = parseFloat(model.pricing?.prompt || "0") * 1_000_000; + const outputCost = parseFloat(model.pricing?.completion || "0") * 1_000_000; + const cacheReadCost = parseFloat(model.pricing?.input_cache_read || "0") * 1_000_000; + const cacheWriteCost = parseFloat(model.pricing?.input_cache_write || "0") * 1_000_000; + + const normalizedModel: Model = { + id: modelKey, + name: model.name, + api: "openai-completions", + baseUrl: "https://openrouter.ai/api/v1", + provider, + reasoning: model.supported_parameters?.includes("reasoning") || false, + input, + cost: { + input: inputCost, + output: outputCost, + cacheRead: cacheReadCost, + cacheWrite: cacheWriteCost, + }, + contextWindow: model.context_length || 4096, + maxTokens: model.top_provider?.max_completion_tokens || 4096, + }; + models.push(normalizedModel); + } + + console.log(`Fetched ${models.length} tool-capable models from OpenRouter`); + return models; + } catch (error) { + console.error("Failed to fetch OpenRouter models:", error); + return []; + } +} + +async function fetchAiGatewayModels(): Promise[]> { + try { + console.log("Fetching models from Vercel AI Gateway API..."); + const response = await fetch(`${AI_GATEWAY_MODELS_URL}/models`); + const data = await response.json(); + const models: Model[] = []; + + const toNumber = (value: string | number | undefined): number => { + if (typeof value === "number") { + return Number.isFinite(value) ? value : 0; + } + const parsed = parseFloat(value ?? "0"); + return Number.isFinite(parsed) ? parsed : 0; + }; + + const items = Array.isArray(data.data) ? (data.data as AiGatewayModel[]) : []; + for (const model of items) { + const tags = Array.isArray(model.tags) ? model.tags : []; + // Only include models that support tools + if (!tags.includes("tool-use")) continue; + + const input: ("text" | "image")[] = ["text"]; + if (tags.includes("vision")) { + input.push("image"); + } + + const inputCost = toNumber(model.pricing?.input) * 1_000_000; + const outputCost = toNumber(model.pricing?.output) * 1_000_000; + const cacheReadCost = toNumber(model.pricing?.input_cache_read) * 1_000_000; + const cacheWriteCost = toNumber(model.pricing?.input_cache_write) * 1_000_000; + + models.push({ + id: model.id, + name: model.name || model.id, + api: "anthropic-messages", + baseUrl: AI_GATEWAY_BASE_URL, + provider: "vercel-ai-gateway", + reasoning: tags.includes("reasoning"), + input, + cost: { + input: inputCost, + output: outputCost, + cacheRead: cacheReadCost, + cacheWrite: cacheWriteCost, + }, + contextWindow: model.context_window || 4096, + maxTokens: model.max_tokens || 4096, + }); + } + + console.log(`Fetched ${models.length} tool-capable models from Vercel AI Gateway`); + return models; + } catch (error) { + console.error("Failed to fetch Vercel AI Gateway models:", error); + return []; + } +} + +async function loadModelsDevData(): Promise[]> { + try { + console.log("Fetching models from models.dev API..."); + const response = await fetch("https://models.dev/api.json"); + const data = await response.json(); + + const models: Model[] = []; + + // Process Amazon Bedrock models + if (data["amazon-bedrock"]?.models) { + for (const [modelId, model] of Object.entries(data["amazon-bedrock"].models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + let id = modelId; + + if (id.startsWith("ai21.jamba")) { + // These models doesn't support tool use in streaming mode + continue; + } + + if (id.startsWith("mistral.mistral-7b-instruct-v0")) { + // These models doesn't support system messages + continue; + } + + models.push({ + id, + name: m.name || id, + api: "bedrock-converse-stream" as const, + provider: "amazon-bedrock" as const, + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: m.reasoning === true, + input: (m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"]) as ("text" | "image")[], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process Anthropic models + if (data.anthropic?.models) { + for (const [modelId, model] of Object.entries(data.anthropic.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process Google models + if (data.google?.models) { + for (const [modelId, model] of Object.entries(data.google.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process OpenAI models + if (data.openai?.models) { + for (const [modelId, model] of Object.entries(data.openai.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process Groq models + if (data.groq?.models) { + for (const [modelId, model] of Object.entries(data.groq.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process Cerebras models + if (data.cerebras?.models) { + for (const [modelId, model] of Object.entries(data.cerebras.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "openai-completions", + provider: "cerebras", + baseUrl: "https://api.cerebras.ai/v1", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process xAi models + if (data.xai?.models) { + for (const [modelId, model] of Object.entries(data.xai.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process zAi models + if (data.zai?.models) { + for (const [modelId, model] of Object.entries(data.zai.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + const supportsImage = m.modalities?.input?.includes("image") + + models.push({ + id: modelId, + name: m.name || modelId, + api: "openai-completions", + provider: "zai", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + reasoning: m.reasoning === true, + input: supportsImage ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + compat: { + supportsDeveloperRole: false, + thinkingFormat: "zai", + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process Mistral models + if (data.mistral?.models) { + for (const [modelId, model] of Object.entries(data.mistral.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "mistral-conversations", + provider: "mistral", + baseUrl: "https://api.mistral.ai", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process Hugging Face models + if (data.huggingface?.models) { + for (const [modelId, model] of Object.entries(data.huggingface.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + compat: { + supportsDeveloperRole: false, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process OpenCode models (Zen and Go) + // API mapping based on provider.npm field: + // - @ai-sdk/openai → openai-responses + // - @ai-sdk/anthropic → anthropic-messages + // - @ai-sdk/google → google-generative-ai + // - null/undefined/@ai-sdk/openai-compatible → openai-completions + const opencodeVariants = [ + { key: "opencode", provider: "opencode", basePath: "https://opencode.ai/zen" }, + { key: "opencode-go", provider: "opencode-go", basePath: "https://opencode.ai/zen/go" }, + ] as const; + + for (const variant of opencodeVariants) { + if (!data[variant.key]?.models) continue; + + for (const [modelId, model] of Object.entries(data[variant.key].models)) { + const m = model as ModelsDevModel & { status?: string }; + if (m.tool_call !== true) continue; + if (m.status === "deprecated") continue; + + const npm = m.provider?.npm; + let api: Api; + let baseUrl: string; + + if (npm === "@ai-sdk/openai") { + api = "openai-responses"; + baseUrl = `${variant.basePath}/v1`; + } else if (npm === "@ai-sdk/anthropic") { + api = "anthropic-messages"; + // Anthropic SDK appends /v1/messages to baseURL + baseUrl = variant.basePath; + } else if (npm === "@ai-sdk/google") { + api = "google-generative-ai"; + baseUrl = `${variant.basePath}/v1`; + } else { + // null, undefined, or @ai-sdk/openai-compatible + api = "openai-completions"; + baseUrl = `${variant.basePath}/v1`; + } + + models.push({ + id: modelId, + name: m.name || modelId, + api, + provider: variant.provider, + baseUrl, + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process GitHub Copilot models + if (data["github-copilot"]?.models) { + for (const [modelId, model] of Object.entries(data["github-copilot"].models)) { + const m = model as ModelsDevModel & { status?: string }; + if (m.tool_call !== true) continue; + if (m.status === "deprecated") continue; + + // Claude 4.x models route to Anthropic Messages API + const isCopilotClaude4 = /^claude-(haiku|sonnet|opus)-4([.\-]|$)/.test(modelId); + // gpt-5 models require responses API, others use completions + const needsResponsesApi = modelId.startsWith("gpt-5") || modelId.startsWith("oswe"); + + const api: Api = isCopilotClaude4 + ? "anthropic-messages" + : needsResponsesApi + ? "openai-responses" + : "openai-completions"; + + const copilotModel: Model = { + id: modelId, + name: m.name || modelId, + api, + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 128000, + maxTokens: m.limit?.output || 8192, + headers: { ...COPILOT_STATIC_HEADERS }, + // compat only applies to openai-completions + ...(api === "openai-completions" ? { + compat: { + supportsStore: false, + supportsDeveloperRole: false, + supportsReasoningEffort: false, + }, + } : {}), + }; + + models.push(copilotModel); + } + } + + // Process MiniMax models + const minimaxVariants = [ + { key: "minimax", provider: "minimax", baseUrl: "https://api.minimax.io/anthropic" }, + { key: "minimax-cn", provider: "minimax-cn", baseUrl: "https://api.minimaxi.com/anthropic" }, + ] as const; + + for (const { key, provider, baseUrl } of minimaxVariants) { + if (data[key]?.models) { + for (const [modelId, model] of Object.entries(data[key].models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "anthropic-messages", + provider, + // MiniMax's Anthropic-compatible API - SDK appends /v1/messages + baseUrl, + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + } + + // Process Kimi For Coding models + if (data["kimi-for-coding"]?.models) { + for (const [modelId, model] of Object.entries(data["kimi-for-coding"].models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "anthropic-messages", + provider: "kimi-coding", + // Kimi For Coding's Anthropic-compatible API - SDK appends /v1/messages + baseUrl: "https://api.kimi.com/coding", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + console.log(`Loaded ${models.length} tool-capable models from models.dev`); + return models; + } catch (error) { + console.error("Failed to load models.dev data:", error); + return []; + } +} + +async function generateModels() { + // Fetch models from both sources + // models.dev: Anthropic, Google, OpenAI, Groq, Cerebras + // OpenRouter: xAI and other providers (excluding Anthropic, Google, OpenAI) + // AI Gateway: OpenAI-compatible catalog with tool-capable models + const modelsDevModels = await loadModelsDevData(); + const openRouterModels = await fetchOpenRouterModels(); + const aiGatewayModels = await fetchAiGatewayModels(); + + // Combine models (models.dev has priority) + const allModels = [...modelsDevModels, ...openRouterModels, ...aiGatewayModels].filter( + (model) => + !((model.provider === "opencode" || model.provider === "opencode-go") && model.id === "gpt-5.3-codex-spark"), + ); + + // Fix incorrect cache pricing for Claude Opus 4.5 from models.dev + // models.dev has 3x the correct pricing (1.5/18.75 instead of 0.5/6.25) + const opus45 = allModels.find(m => m.provider === "anthropic" && m.id === "claude-opus-4-5"); + if (opus45) { + opus45.cost.cacheRead = 0.5; + opus45.cost.cacheWrite = 6.25; + } + + // Temporary overrides until upstream model metadata is corrected. + for (const candidate of allModels) { + if (candidate.provider === "amazon-bedrock" && candidate.id.includes("anthropic.claude-opus-4-6-v1")) { + candidate.cost.cacheRead = 0.5; + candidate.cost.cacheWrite = 6.25; + candidate.contextWindow = 1000000; + } + if (candidate.provider === "amazon-bedrock" && candidate.id.includes("anthropic.claude-sonnet-4-6")) { + candidate.contextWindow = 1000000; + } + if ( + (candidate.provider === "anthropic" || + candidate.provider === "opencode" || + candidate.provider === "opencode-go") && + (candidate.id === "claude-opus-4-6" || + candidate.id === "claude-sonnet-4-6" || + candidate.id === "claude-opus-4.6" || + candidate.id === "claude-sonnet-4.6") + ) { + candidate.contextWindow = 1000000; + } + if ( + candidate.provider === "google-antigravity" && + (candidate.id === "claude-opus-4-6-thinking" || candidate.id === "claude-sonnet-4-6") + ) { + candidate.contextWindow = 1000000; + } + // OpenCode variants list Claude Sonnet 4/4.5 with 1M context, actual limit is 200K + if ( + (candidate.provider === "opencode" || candidate.provider === "opencode-go") && + (candidate.id === "claude-sonnet-4-5" || candidate.id === "claude-sonnet-4") + ) { + candidate.contextWindow = 200000; + } + if ((candidate.provider === "opencode" || candidate.provider === "opencode-go") && candidate.id === "gpt-5.4") { + candidate.contextWindow = 272000; + candidate.maxTokens = 128000; + } + if (candidate.provider === "openai" && candidate.id === "gpt-5.4") { + candidate.contextWindow = 272000; + candidate.maxTokens = 128000; + } + // Keep selected OpenRouter model metadata stable until upstream settles. + if (candidate.provider === "openrouter" && candidate.id === "moonshotai/kimi-k2.5") { + candidate.cost.input = 0.41; + candidate.cost.output = 2.06; + candidate.cost.cacheRead = 0.07; + candidate.maxTokens = 4096; + } + if (candidate.provider === "openrouter" && candidate.id === "z-ai/glm-5") { + candidate.cost.input = 0.6; + candidate.cost.output = 1.9; + candidate.cost.cacheRead = 0.119; + } + } + + + // Add missing EU Opus 4.6 profile + if (!allModels.some((m) => m.provider === "amazon-bedrock" && m.id === "eu.anthropic.claude-opus-4-6-v1")) { + allModels.push({ + id: "eu.anthropic.claude-opus-4-6-v1", + name: "Claude Opus 4.6 (EU)", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 5, + output: 25, + cacheRead: 0.5, + cacheWrite: 6.25, + }, + contextWindow: 1000000, + maxTokens: 128000, + }); + } + + // Add missing Claude Opus 4.6 + if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-opus-4-6")) { + allModels.push({ + id: "claude-opus-4-6", + name: "Claude Opus 4.6", + api: "anthropic-messages", + baseUrl: "https://api.anthropic.com", + provider: "anthropic", + reasoning: true, + input: ["text", "image"], + cost: { + input: 5, + output: 25, + cacheRead: 0.5, + cacheWrite: 6.25, + }, + contextWindow: 1000000, + maxTokens: 128000, + }); + } + + // Add missing Claude Sonnet 4.6 + if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-sonnet-4-6")) { + allModels.push({ + id: "claude-sonnet-4-6", + name: "Claude Sonnet 4.6", + api: "anthropic-messages", + baseUrl: "https://api.anthropic.com", + provider: "anthropic", + reasoning: true, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, + }, + contextWindow: 1000000, + maxTokens: 64000, + }); + } + + // Add missing Gemini 3.1 Flash Lite Preview until models.dev includes it. + if (!allModels.some((m) => m.provider === "google" && m.id === "gemini-3.1-flash-lite-preview")) { + allModels.push({ + id: "gemini-3.1-flash-lite-preview", + name: "Gemini 3.1 Flash Lite Preview", + api: "google-generative-ai", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + provider: "google", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65536, + }); + } + + // Add missing gpt models + if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5-chat-latest")) { + allModels.push({ + id: "gpt-5-chat-latest", + name: "GPT-5 Chat Latest", + api: "openai-responses", + baseUrl: "https://api.openai.com/v1", + provider: "openai", + reasoning: false, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + }); + } + + if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex")) { + allModels.push({ + id: "gpt-5.1-codex", + name: "GPT-5.1 Codex", + api: "openai-responses", + baseUrl: "https://api.openai.com/v1", + provider: "openai", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 5, + cacheRead: 0.125, + cacheWrite: 1.25, + }, + contextWindow: 400000, + maxTokens: 128000, + }); + } + + if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex-max")) { + allModels.push({ + id: "gpt-5.1-codex-max", + name: "GPT-5.1 Codex Max", + api: "openai-responses", + baseUrl: "https://api.openai.com/v1", + provider: "openai", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + }); + } + + if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.3-codex-spark")) { + allModels.push({ + id: "gpt-5.3-codex-spark", + name: "GPT-5.3 Codex Spark", + api: "openai-responses", + baseUrl: "https://api.openai.com/v1", + provider: "openai", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + }); + } + + // Add missing GitHub Copilot GPT-5.3 models until models.dev includes them. + const copilotBaseModel = allModels.find( + (m) => m.provider === "github-copilot" && m.id === "gpt-5.2-codex", + ); + if (copilotBaseModel) { + if (!allModels.some((m) => m.provider === "github-copilot" && m.id === "gpt-5.3-codex")) { + allModels.push({ + ...copilotBaseModel, + id: "gpt-5.3-codex", + name: "GPT-5.3 Codex", + }); + } + } + + if (!allModels.some((m) => m.provider === "openai" && m.id === "gpt-5.4")) { + allModels.push({ + id: "gpt-5.4", + name: "GPT-5.4", + api: "openai-responses", + baseUrl: "https://api.openai.com/v1", + provider: "openai", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2.5, + output: 15, + cacheRead: 0.25, + cacheWrite: 0, + }, + contextWindow: 272000, + maxTokens: 128000, + }); + } + + // OpenAI Codex (ChatGPT OAuth) models + // NOTE: These are not fetched from models.dev; we keep a small, explicit list to avoid aliases. + // Context window is based on observed server limits (400s above ~272k), not marketing numbers. + const CODEX_BASE_URL = "https://chatgpt.com/backend-api"; + const CODEX_CONTEXT = 272000; + const CODEX_MAX_TOKENS = 128000; + const codexModels: Model<"openai-codex-responses">[] = [ + { + id: "gpt-5.1", + name: "GPT-5.1", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 }, + contextWindow: CODEX_CONTEXT, + maxTokens: CODEX_MAX_TOKENS, + }, + { + id: "gpt-5.1-codex-max", + name: "GPT-5.1 Codex Max", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 }, + contextWindow: CODEX_CONTEXT, + maxTokens: CODEX_MAX_TOKENS, + }, + { + id: "gpt-5.1-codex-mini", + name: "GPT-5.1 Codex Mini", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 0.25, output: 2, cacheRead: 0.025, cacheWrite: 0 }, + contextWindow: CODEX_CONTEXT, + maxTokens: CODEX_MAX_TOKENS, + }, + { + id: "gpt-5.2", + name: "GPT-5.2", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + contextWindow: CODEX_CONTEXT, + maxTokens: CODEX_MAX_TOKENS, + }, + { + id: "gpt-5.2-codex", + name: "GPT-5.2 Codex", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + contextWindow: CODEX_CONTEXT, + maxTokens: CODEX_MAX_TOKENS, + }, + { + id: "gpt-5.3-codex", + name: "GPT-5.3 Codex", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + contextWindow: CODEX_CONTEXT, + maxTokens: CODEX_MAX_TOKENS, + }, + { + id: "gpt-5.4", + name: "GPT-5.4", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 2.5, output: 15, cacheRead: 0.25, cacheWrite: 0 }, + contextWindow: CODEX_CONTEXT, + maxTokens: CODEX_MAX_TOKENS, + }, + { + id: "gpt-5.3-codex-spark", + name: "GPT-5.3 Codex Spark", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: CODEX_MAX_TOKENS, + }, + ]; + allModels.push(...codexModels); + + // Add missing Grok models + if (!allModels.some(m => m.provider === "xai" && m.id === "grok-code-fast-1")) { + allModels.push({ + id: "grok-code-fast-1", + name: "Grok Code Fast 1", + api: "openai-completions", + baseUrl: "https://api.x.ai/v1", + provider: "xai", + reasoning: false, + input: ["text"], + cost: { + input: 0.2, + output: 1.5, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 8192, + }); + } + + // Add "auto" alias for openrouter/auto + if (!allModels.some(m => m.provider === "openrouter" && m.id === "auto")) { + allModels.push({ + id: "auto", + name: "Auto", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + // we dont know about the costs because OpenRouter auto routes to different models + // and then charges you for the underlying used model + input:0, + output:0, + cacheRead:0, + cacheWrite:0, + }, + contextWindow: 2000000, + maxTokens: 30000, + }); + } + + // Google Cloud Code Assist models (Gemini CLI) + // Uses production endpoint, standard Gemini models only + const CLOUD_CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com"; + const cloudCodeAssistModels: Model<"google-gemini-cli">[] = [ + { + id: "gemini-2.5-pro", + name: "Gemini 2.5 Pro (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + { + id: "gemini-2.5-flash", + name: "Gemini 2.5 Flash (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + { + id: "gemini-2.0-flash", + name: "Gemini 2.0 Flash (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, + reasoning: false, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 8192, + }, + { + id: "gemini-3-pro-preview", + name: "Gemini 3 Pro Preview (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + { + id: "gemini-3-flash-preview", + name: "Gemini 3 Flash Preview (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + { + id: "gemini-3.1-pro-preview", + name: "Gemini 3.1 Pro Preview (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + ]; + allModels.push(...cloudCodeAssistModels); + + // Antigravity models (Gemini 3, Claude, GPT-OSS via Google Cloud) + // Uses sandbox endpoint and different OAuth credentials for access to additional models + const ANTIGRAVITY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com"; + const antigravityModels: Model<"google-gemini-cli">[] = [ + { + id: "gemini-3.1-pro-high", + name: "Gemini 3.1 Pro High (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: true, + input: ["text", "image"], + // the Model type doesn't seem to support having extended-context costs, so I'm just using the pricing for <200k input + cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 2.375 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + { + id: "gemini-3.1-pro-low", + name: "Gemini 3.1 Pro Low (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: true, + input: ["text", "image"], + // the Model type doesn't seem to support having extended-context costs, so I'm just using the pricing for <200k input + cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 2.375 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + { + id: "gemini-3-flash", + name: "Gemini 3 Flash (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 0.5, output: 3, cacheRead: 0.5, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + { + id: "claude-sonnet-4-5", + name: "Claude Sonnet 4.5 (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: false, + input: ["text", "image"], + cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + contextWindow: 200000, + maxTokens: 64000, + }, + { + id: "claude-sonnet-4-5-thinking", + name: "Claude Sonnet 4.5 Thinking (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + contextWindow: 200000, + maxTokens: 64000, + }, + { + id: "claude-opus-4-5-thinking", + name: "Claude Opus 4.5 Thinking (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, + contextWindow: 200000, + maxTokens: 64000, + }, + { + id: "claude-opus-4-6-thinking", + name: "Claude Opus 4.6 Thinking (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, + contextWindow: 200000, + maxTokens: 128000, + }, + { + id: "claude-sonnet-4-6", + name: "Claude Sonnet 4.6 (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + contextWindow: 200000, + maxTokens: 64000, + }, + { + id: "gpt-oss-120b-medium", + name: "GPT-OSS 120B Medium (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: false, + input: ["text"], + cost: { input: 0.09, output: 0.36, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 131072, + maxTokens: 32768, + }, + ]; + allModels.push(...antigravityModels); + + const VERTEX_BASE_URL = "https://{location}-aiplatform.googleapis.com"; + const vertexModels: Model<"google-vertex">[] = [ + { + id: "gemini-3-pro-preview", + name: "Gemini 3 Pro Preview (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 }, + contextWindow: 1000000, + maxTokens: 64000, + }, + { + id: "gemini-3.1-pro-preview", + name: "Gemini 3.1 Pro Preview (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65536, + }, + { + id: "gemini-3-flash-preview", + name: "Gemini 3 Flash Preview (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 0.5, output: 3, cacheRead: 0.05, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65536, + }, + { + id: "gemini-2.0-flash", + name: "Gemini 2.0 Flash (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: false, + input: ["text", "image"], + cost: { input: 0.15, output: 0.6, cacheRead: 0.0375, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 8192, + }, + { + id: "gemini-2.0-flash-lite", + name: "Gemini 2.0 Flash Lite (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65536, + }, + { + id: "gemini-2.5-pro", + name: "Gemini 2.5 Pro (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65536, + }, + { + id: "gemini-2.5-flash", + name: "Gemini 2.5 Flash (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 0.3, output: 2.5, cacheRead: 0.03, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65536, + }, + { + id: "gemini-2.5-flash-lite-preview-09-2025", + name: "Gemini 2.5 Flash Lite Preview 09-25 (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65536, + }, + { + id: "gemini-2.5-flash-lite", + name: "Gemini 2.5 Flash Lite (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65536, + }, + { + id: "gemini-1.5-pro", + name: "Gemini 1.5 Pro (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: false, + input: ["text", "image"], + cost: { input: 1.25, output: 5, cacheRead: 0.3125, cacheWrite: 0 }, + contextWindow: 1000000, + maxTokens: 8192, + }, + { + id: "gemini-1.5-flash", + name: "Gemini 1.5 Flash (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: false, + input: ["text", "image"], + cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 }, + contextWindow: 1000000, + maxTokens: 8192, + }, + { + id: "gemini-1.5-flash-8b", + name: "Gemini 1.5 Flash-8B (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: false, + input: ["text", "image"], + cost: { input: 0.0375, output: 0.15, cacheRead: 0.01, cacheWrite: 0 }, + contextWindow: 1000000, + maxTokens: 8192, + }, + ]; + allModels.push(...vertexModels); + + // Kimi For Coding models (Moonshot AI's Anthropic-compatible coding API) + // Static fallback in case models.dev doesn't have them yet + const KIMI_CODING_BASE_URL = "https://api.kimi.com/coding"; + const kimiCodingModels: Model<"anthropic-messages">[] = [ + { + id: "kimi-k2-thinking", + name: "Kimi K2 Thinking", + api: "anthropic-messages", + provider: "kimi-coding", + baseUrl: KIMI_CODING_BASE_URL, + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 262144, + maxTokens: 32768, + }, + { + id: "k2p5", + name: "Kimi K2.5", + api: "anthropic-messages", + provider: "kimi-coding", + baseUrl: KIMI_CODING_BASE_URL, + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 262144, + maxTokens: 32768, + }, + ]; + // Only add if not already present from models.dev + for (const model of kimiCodingModels) { + if (!allModels.some(m => m.provider === "kimi-coding" && m.id === model.id)) { + allModels.push(model); + } + } + + const azureOpenAiModels: Model[] = allModels + .filter((model) => model.provider === "openai" && model.api === "openai-responses") + .map((model) => ({ + ...model, + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + })); + allModels.push(...azureOpenAiModels); + + // Group by provider and deduplicate by model ID + const providers: Record>> = {}; + for (const model of allModels) { + if (!providers[model.provider]) { + providers[model.provider] = {}; + } + // Use model ID as key to automatically deduplicate + // Only add if not already present (models.dev takes priority over OpenRouter) + if (!providers[model.provider][model.id]) { + providers[model.provider][model.id] = model; + } + } + + // Generate TypeScript file + let output = `// This file is auto-generated by scripts/generate-models.ts +// Do not edit manually - run 'npm run generate-models' to update + +import type { Model } from "./types.js"; + +export const MODELS = { +`; + + // Generate provider sections (sorted for deterministic output) + const sortedProviderIds = Object.keys(providers).sort(); + for (const providerId of sortedProviderIds) { + const models = providers[providerId]; + output += `\t${JSON.stringify(providerId)}: {\n`; + + const sortedModelIds = Object.keys(models).sort(); + for (const modelId of sortedModelIds) { + const model = models[modelId]; + output += `\t\t"${model.id}": {\n`; + output += `\t\t\tid: "${model.id}",\n`; + output += `\t\t\tname: "${model.name}",\n`; + output += `\t\t\tapi: "${model.api}",\n`; + output += `\t\t\tprovider: "${model.provider}",\n`; + if (model.baseUrl !== undefined) { + output += `\t\t\tbaseUrl: "${model.baseUrl}",\n`; + } + if (model.headers) { + output += `\t\t\theaders: ${JSON.stringify(model.headers)},\n`; + } + if (model.compat) { + output += ` compat: ${JSON.stringify(model.compat)}, +`; + } + output += `\t\t\treasoning: ${model.reasoning},\n`; + output += `\t\t\tinput: [${model.input.map(i => `"${i}"`).join(", ")}],\n`; + output += `\t\t\tcost: {\n`; + output += `\t\t\t\tinput: ${model.cost.input},\n`; + output += `\t\t\t\toutput: ${model.cost.output},\n`; + output += `\t\t\t\tcacheRead: ${model.cost.cacheRead},\n`; + output += `\t\t\t\tcacheWrite: ${model.cost.cacheWrite},\n`; + output += `\t\t\t},\n`; + output += `\t\t\tcontextWindow: ${model.contextWindow},\n`; + output += `\t\t\tmaxTokens: ${model.maxTokens},\n`; + output += `\t\t} satisfies Model<"${model.api}">,\n`; + } + + output += `\t},\n`; + } + + output += `} as const; +`; + + // Write file + writeFileSync(join(packageRoot, "src/models.generated.ts"), output); + console.log("Generated src/models.generated.ts"); + + // Print statistics + const totalModels = allModels.length; + const reasoningModels = allModels.filter(m => m.reasoning).length; + + console.log(`\nModel Statistics:`); + console.log(` Total tool-capable models: ${totalModels}`); + console.log(` Reasoning-capable models: ${reasoningModels}`); + + for (const [provider, models] of Object.entries(providers)) { + console.log(` ${provider}: ${Object.keys(models).length} models`); + } +} + +// Run the generator +generateModels().catch(console.error); diff --git a/packages/pi-ai/src/models.generated.ts b/packages/pi-ai/src/models.generated.ts index ac56d2069..e62965533 100644 --- a/packages/pi-ai/src/models.generated.ts +++ b/packages/pi-ai/src/models.generated.ts @@ -90,40 +90,6 @@ export const MODELS = { contextWindow: 300000, maxTokens: 8192, } satisfies Model<"bedrock-converse-stream">, - "amazon.titan-text-express-v1": { - id: "amazon.titan-text-express-v1", - name: "Titan Text G1 - Express", - api: "bedrock-converse-stream", - provider: "amazon-bedrock", - baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", - reasoning: false, - input: ["text"], - cost: { - input: 0.2, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"bedrock-converse-stream">, - "amazon.titan-text-express-v1:0:8k": { - id: "amazon.titan-text-express-v1:0:8k", - name: "Titan Text G1 - Express", - api: "bedrock-converse-stream", - provider: "amazon-bedrock", - baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", - reasoning: false, - input: ["text"], - cost: { - input: 0.2, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"bedrock-converse-stream">, "anthropic.claude-3-5-haiku-20241022-v1:0": { id: "anthropic.claude-3-5-haiku-20241022-v1:0", name: "Claude Haiku 3.5", @@ -209,40 +175,6 @@ export const MODELS = { contextWindow: 200000, maxTokens: 4096, } satisfies Model<"bedrock-converse-stream">, - "anthropic.claude-3-opus-20240229-v1:0": { - id: "anthropic.claude-3-opus-20240229-v1:0", - name: "Claude Opus 3", - api: "bedrock-converse-stream", - provider: "amazon-bedrock", - baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", - reasoning: false, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 4096, - } satisfies Model<"bedrock-converse-stream">, - "anthropic.claude-3-sonnet-20240229-v1:0": { - id: "anthropic.claude-3-sonnet-20240229-v1:0", - name: "Claude Sonnet 3", - api: "bedrock-converse-stream", - provider: "amazon-bedrock", - baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", - reasoning: false, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 4096, - } satisfies Model<"bedrock-converse-stream">, "anthropic.claude-haiku-4-5-20251001-v1:0": { id: "anthropic.claude-haiku-4-5-20251001-v1:0", name: "Claude Haiku 4.5", @@ -325,7 +257,7 @@ export const MODELS = { cacheRead: 0.5, cacheWrite: 6.25, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"bedrock-converse-stream">, "anthropic.claude-sonnet-4-20250514-v1:0": { @@ -376,43 +308,9 @@ export const MODELS = { cacheRead: 0.3, cacheWrite: 3.75, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"bedrock-converse-stream">, - "cohere.command-r-plus-v1:0": { - id: "cohere.command-r-plus-v1:0", - name: "Command R+", - api: "bedrock-converse-stream", - provider: "amazon-bedrock", - baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", - reasoning: false, - input: ["text"], - cost: { - input: 3, - output: 15, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"bedrock-converse-stream">, - "cohere.command-r-v1:0": { - id: "cohere.command-r-v1:0", - name: "Command R", - api: "bedrock-converse-stream", - provider: "amazon-bedrock", - baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", - reasoning: false, - input: ["text"], - cost: { - input: 0.5, - output: 1.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"bedrock-converse-stream">, "deepseek.r1-v1:0": { id: "deepseek.r1-v1:0", name: "DeepSeek-R1", @@ -447,8 +345,8 @@ export const MODELS = { contextWindow: 163840, maxTokens: 81920, } satisfies Model<"bedrock-converse-stream">, - "deepseek.v3.2-v1:0": { - id: "deepseek.v3.2-v1:0", + "deepseek.v3.2": { + id: "deepseek.v3.2", name: "DeepSeek-V3.2", api: "bedrock-converse-stream", provider: "amazon-bedrock", @@ -512,7 +410,7 @@ export const MODELS = { cacheRead: 0.5, cacheWrite: 6.25, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"bedrock-converse-stream">, "eu.anthropic.claude-sonnet-4-20250514-v1:0": { @@ -563,7 +461,7 @@ export const MODELS = { cacheRead: 0.3, cacheWrite: 3.75, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"bedrock-converse-stream">, "global.anthropic.claude-haiku-4-5-20251001-v1:0": { @@ -614,7 +512,7 @@ export const MODELS = { cacheRead: 0.5, cacheWrite: 6.25, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"bedrock-converse-stream">, "global.anthropic.claude-sonnet-4-20250514-v1:0": { @@ -665,7 +563,7 @@ export const MODELS = { cacheRead: 0.3, cacheWrite: 3.75, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"bedrock-converse-stream">, "google.gemma-3-27b-it": { @@ -702,6 +600,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"bedrock-converse-stream">, + "meta.llama3-1-405b-instruct-v1:0": { + id: "meta.llama3-1-405b-instruct-v1:0", + name: "Llama 3.1 405B Instruct", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: false, + input: ["text"], + cost: { + input: 2.4, + output: 2.4, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"bedrock-converse-stream">, "meta.llama3-1-70b-instruct-v1:0": { id: "meta.llama3-1-70b-instruct-v1:0", name: "Llama 3.1 70B Instruct", @@ -889,6 +804,40 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"bedrock-converse-stream">, + "mistral.devstral-2-123b": { + id: "mistral.devstral-2-123b", + name: "Devstral 2 123B", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: false, + input: ["text"], + cost: { + input: 0.4, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 8192, + } satisfies Model<"bedrock-converse-stream">, + "mistral.magistral-small-2509": { + id: "mistral.magistral-small-2509", + name: "Magistral Small 1.2", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.5, + output: 1.5, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 40000, + } satisfies Model<"bedrock-converse-stream">, "mistral.ministral-3-14b-instruct": { id: "mistral.ministral-3-14b-instruct", name: "Ministral 14B 3.0", @@ -906,6 +855,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"bedrock-converse-stream">, + "mistral.ministral-3-3b-instruct": { + id: "mistral.ministral-3-3b-instruct", + name: "Ministral 3 3B", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.1, + output: 0.1, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 8192, + } satisfies Model<"bedrock-converse-stream">, "mistral.ministral-3-8b-instruct": { id: "mistral.ministral-3-8b-instruct", name: "Ministral 3 8B", @@ -923,22 +889,39 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"bedrock-converse-stream">, - "mistral.mistral-large-2402-v1:0": { - id: "mistral.mistral-large-2402-v1:0", - name: "Mistral Large (24.02)", + "mistral.mistral-large-3-675b-instruct": { + id: "mistral.mistral-large-3-675b-instruct", + name: "Mistral Large 3", api: "bedrock-converse-stream", provider: "amazon-bedrock", baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", reasoning: false, - input: ["text"], + input: ["text", "image"], cost: { input: 0.5, output: 1.5, cacheRead: 0, cacheWrite: 0, }, + contextWindow: 256000, + maxTokens: 8192, + } satisfies Model<"bedrock-converse-stream">, + "mistral.pixtral-large-2502-v1:0": { + id: "mistral.pixtral-large-2502-v1:0", + name: "Pixtral Large (25.02)", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0, + cacheWrite: 0, + }, contextWindow: 128000, - maxTokens: 4096, + maxTokens: 8192, } satisfies Model<"bedrock-converse-stream">, "mistral.voxtral-mini-3b-2507": { id: "mistral.voxtral-mini-3b-2507", @@ -1025,6 +1008,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"bedrock-converse-stream">, + "nvidia.nemotron-nano-3-30b": { + id: "nvidia.nemotron-nano-3-30b", + name: "NVIDIA Nemotron Nano 3 30B", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text"], + cost: { + input: 0.06, + output: 0.24, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"bedrock-converse-stream">, "nvidia.nemotron-nano-9b-v2": { id: "nvidia.nemotron-nano-9b-v2", name: "NVIDIA Nemotron Nano 9B v2", @@ -1294,7 +1294,7 @@ export const MODELS = { cacheRead: 0.5, cacheWrite: 6.25, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"bedrock-converse-stream">, "us.anthropic.claude-sonnet-4-20250514-v1:0": { @@ -1345,7 +1345,7 @@ export const MODELS = { cacheRead: 0.3, cacheWrite: 3.75, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"bedrock-converse-stream">, "writer.palmyra-x4-v1:0": { @@ -1721,23 +1721,6 @@ export const MODELS = { cacheRead: 0.5, cacheWrite: 6.25, }, - contextWindow: 200000, - maxTokens: 128000, - } satisfies Model<"anthropic-messages">, - "claude-opus-4-6[1m]": { - id: "claude-opus-4-6[1m]", - name: "Claude Opus 4.6 (1M)", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 5, - output: 25, - cacheRead: 0.5, - cacheWrite: 6.25, - }, contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"anthropic-messages">, @@ -1823,182 +1806,10 @@ export const MODELS = { cacheRead: 0.3, cacheWrite: 3.75, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, }, - "anthropic-vertex": { - "claude-opus-4-6": { - id: "claude-opus-4-6", - name: "Claude Opus 4.6 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 128000, - } satisfies Model<"anthropic-vertex">, - "claude-opus-4-6[1m]": { - id: "claude-opus-4-6[1m]", - name: "Claude Opus 4.6 1M (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 1000000, - maxTokens: 128000, - } satisfies Model<"anthropic-vertex">, - "claude-sonnet-4-6": { - id: "claude-sonnet-4-6", - name: "Claude Sonnet 4.6 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"anthropic-vertex">, - "claude-sonnet-4-6[1m]": { - id: "claude-sonnet-4-6[1m]", - name: "Claude Sonnet 4.6 1M (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 1000000, - maxTokens: 64000, - } satisfies Model<"anthropic-vertex">, - "claude-sonnet-4-5@20250929": { - id: "claude-sonnet-4-5@20250929", - name: "Claude Sonnet 4.5 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"anthropic-vertex">, - "claude-sonnet-4@20250514": { - id: "claude-sonnet-4@20250514", - name: "Claude Sonnet 4 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"anthropic-vertex">, - "claude-opus-4-5@20251101": { - id: "claude-opus-4-5@20251101", - name: "Claude Opus 4.5 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 32000, - } satisfies Model<"anthropic-vertex">, - "claude-opus-4-1@20250805": { - id: "claude-opus-4-1@20250805", - name: "Claude Opus 4.1 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 32000, - } satisfies Model<"anthropic-vertex">, - "claude-opus-4@20250514": { - id: "claude-opus-4@20250514", - name: "Claude Opus 4 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 32000, - } satisfies Model<"anthropic-vertex">, - "claude-haiku-4-5@20251001": { - id: "claude-haiku-4-5@20251001", - name: "Claude Haiku 4.5 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.8, - output: 4, - cacheRead: 0.08, - cacheWrite: 1, - }, - contextWindow: 200000, - maxTokens: 8192, - } satisfies Model<"anthropic-vertex">, - }, "azure-openai-responses": { "codex-mini-latest": { id: "codex-mini-latest", @@ -2493,6 +2304,40 @@ export const MODELS = { contextWindow: 272000, maxTokens: 128000, } satisfies Model<"azure-openai-responses">, + "gpt-5.4-mini": { + id: "gpt-5.4-mini", + name: "GPT-5.4 mini", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.75, + output: 4.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, + "gpt-5.4-nano": { + id: "gpt-5.4-nano", + name: "GPT-5.4 nano", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.2, + output: 1.25, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, "gpt-5.4-pro": { id: "gpt-5.4-pro", name: "GPT-5.4 Pro", @@ -2733,7 +2578,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 144000, maxTokens: 32000, } satisfies Model<"anthropic-messages">, "claude-opus-4.5": { @@ -2751,7 +2596,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 160000, maxTokens: 32000, } satisfies Model<"anthropic-messages">, "claude-opus-4.6": { @@ -2769,7 +2614,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 144000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, "claude-sonnet-4": { @@ -2787,7 +2632,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 216000, maxTokens: 16000, } satisfies Model<"anthropic-messages">, "claude-sonnet-4.5": { @@ -2805,7 +2650,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 144000, maxTokens: 32000, } satisfies Model<"anthropic-messages">, "claude-sonnet-4.6": { @@ -2823,7 +2668,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 200000, maxTokens: 32000, } satisfies Model<"anthropic-messages">, "gemini-2.5-pro": { @@ -2918,7 +2763,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 64000, + contextWindow: 128000, maxTokens: 16384, } satisfies Model<"openai-completions">, "gpt-4o": { @@ -2937,8 +2782,8 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 64000, - maxTokens: 16384, + contextWindow: 128000, + maxTokens: 4096, } satisfies Model<"openai-completions">, "gpt-5": { id: "gpt-5", @@ -2973,7 +2818,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 264000, maxTokens: 64000, } satisfies Model<"openai-responses">, "gpt-5.1": { @@ -2991,7 +2836,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 264000, maxTokens: 64000, } satisfies Model<"openai-responses">, "gpt-5.1-codex": { @@ -3009,7 +2854,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, "gpt-5.1-codex-max": { @@ -3027,7 +2872,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, "gpt-5.1-codex-mini": { @@ -3045,7 +2890,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, "gpt-5.2": { @@ -3063,7 +2908,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 264000, maxTokens: 64000, } satisfies Model<"openai-responses">, "gpt-5.2-codex": { @@ -3081,7 +2926,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 272000, + contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, "gpt-5.3-codex": { @@ -3120,6 +2965,24 @@ export const MODELS = { contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, + "gpt-5.4-mini": { + id: "gpt-5.4-mini", + name: "GPT-5.4 mini", + api: "openai-responses", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, "grok-code-fast-1": { id: "grok-code-fast-1", name: "Grok Code Fast 1", @@ -3439,10 +3302,10 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, + input: 0.25, + output: 1.5, + cacheRead: 0.025, + cacheWrite: 1, }, contextWindow: 1048576, maxTokens: 65536, @@ -4703,6 +4566,40 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"anthropic-messages">, + "MiniMax-M2.7": { + id: "MiniMax-M2.7", + name: "MiniMax-M2.7", + api: "anthropic-messages", + provider: "minimax", + baseUrl: "https://api.minimax.io/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0.06, + cacheWrite: 0.375, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, + "MiniMax-M2.7-highspeed": { + id: "MiniMax-M2.7-highspeed", + name: "MiniMax-M2.7-highspeed", + api: "anthropic-messages", + provider: "minimax", + baseUrl: "https://api.minimax.io/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0.6, + output: 2.4, + cacheRead: 0.06, + cacheWrite: 0.375, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, }, "minimax-cn": { "MiniMax-M2": { @@ -4773,11 +4670,45 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"anthropic-messages">, + "MiniMax-M2.7": { + id: "MiniMax-M2.7", + name: "MiniMax-M2.7", + api: "anthropic-messages", + provider: "minimax-cn", + baseUrl: "https://api.minimaxi.com/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0.06, + cacheWrite: 0.375, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, + "MiniMax-M2.7-highspeed": { + id: "MiniMax-M2.7-highspeed", + name: "MiniMax-M2.7-highspeed", + api: "anthropic-messages", + provider: "minimax-cn", + baseUrl: "https://api.minimaxi.com/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0.6, + output: 2.4, + cacheRead: 0.06, + cacheWrite: 0.375, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, }, "mistral": { "codestral-latest": { id: "codestral-latest", - name: "Codestral", + name: "Codestral (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -4828,7 +4759,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "devstral-medium-latest": { id: "devstral-medium-latest", - name: "Devstral 2", + name: "Devstral 2 (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -4896,7 +4827,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "magistral-medium-latest": { id: "magistral-medium-latest", - name: "Magistral Medium", + name: "Magistral Medium (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -4930,7 +4861,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "ministral-3b-latest": { id: "ministral-3b-latest", - name: "Ministral 3B", + name: "Ministral 3B (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -4947,7 +4878,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "ministral-8b-latest": { id: "ministral-8b-latest", - name: "Ministral 8B", + name: "Ministral 8B (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -4998,7 +4929,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "mistral-large-latest": { id: "mistral-large-latest", - name: "Mistral Large", + name: "Mistral Large (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -5049,7 +4980,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "mistral-medium-latest": { id: "mistral-medium-latest", - name: "Mistral Medium", + name: "Mistral Medium (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -5100,7 +5031,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "mistral-small-latest": { id: "mistral-small-latest", - name: "Mistral Small", + name: "Mistral Small (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -5185,7 +5116,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "pixtral-large-latest": { id: "pixtral-large-latest", - name: "Pixtral Large", + name: "Pixtral Large (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -5695,6 +5626,40 @@ export const MODELS = { contextWindow: 272000, maxTokens: 128000, } satisfies Model<"openai-responses">, + "gpt-5.4-mini": { + id: "gpt-5.4-mini", + name: "GPT-5.4 mini", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.75, + output: 4.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "gpt-5.4-nano": { + id: "gpt-5.4-nano", + name: "GPT-5.4 nano", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.2, + output: 1.25, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, "gpt-5.4-pro": { id: "gpt-5.4-pro", name: "GPT-5.4 Pro", @@ -6087,7 +6052,7 @@ export const MODELS = { cacheRead: 0.5, cacheWrite: 6.25, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"anthropic-messages">, "claude-sonnet-4": { @@ -6158,23 +6123,6 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 65536, } satisfies Model<"google-generative-ai">, - "gemini-3-pro": { - id: "gemini-3-pro", - name: "Gemini 3 Pro", - api: "google-generative-ai", - provider: "opencode", - baseUrl: "https://opencode.ai/zen/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 2, - output: 12, - cacheRead: 0.2, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, "gemini-3.1-pro": { id: "gemini-3.1-pro", name: "Gemini 3.1 Pro Preview", @@ -6192,40 +6140,6 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 65536, } satisfies Model<"google-generative-ai">, - "glm-4.6": { - id: "glm-4.6", - name: "GLM-4.6", - api: "openai-completions", - provider: "opencode", - baseUrl: "https://opencode.ai/zen/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.6, - output: 2.2, - cacheRead: 0.1, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "glm-4.7": { - id: "glm-4.7", - name: "GLM-4.7", - api: "openai-completions", - provider: "opencode", - baseUrl: "https://opencode.ai/zen/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.6, - output: 2.2, - cacheRead: 0.1, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 131072, - } satisfies Model<"openai-completions">, "glm-5": { id: "glm-5", name: "GLM-5", @@ -6430,6 +6344,40 @@ export const MODELS = { contextWindow: 272000, maxTokens: 128000, } satisfies Model<"openai-responses">, + "gpt-5.4-mini": { + id: "gpt-5.4-mini", + name: "GPT-5.4 Mini", + api: "openai-responses", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.75, + output: 4.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "gpt-5.4-nano": { + id: "gpt-5.4-nano", + name: "GPT-5.4 Nano", + api: "openai-responses", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.2, + output: 1.25, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, "gpt-5.4-pro": { id: "gpt-5.4-pro", name: "GPT-5.4 Pro", @@ -6464,22 +6412,39 @@ export const MODELS = { contextWindow: 262144, maxTokens: 65536, } satisfies Model<"openai-completions">, - "minimax-m2.1": { - id: "minimax-m2.1", - name: "MiniMax M2.1", + "mimo-v2-omni-free": { + id: "mimo-v2-omni-free", + name: "MiMo V2 Omni Free", + api: "openai-completions", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "mimo-v2-pro-free": { + id: "mimo-v2-pro-free", + name: "MiMo V2 Pro Free", api: "openai-completions", provider: "opencode", baseUrl: "https://opencode.ai/zen/v1", reasoning: true, input: ["text"], cost: { - input: 0.3, - output: 1.2, - cacheRead: 0.1, + input: 0, + output: 0, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 204800, - maxTokens: 131072, + contextWindow: 1048576, + maxTokens: 64000, } satisfies Model<"openai-completions">, "minimax-m2.5": { id: "minimax-m2.5", @@ -6515,6 +6480,23 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"anthropic-messages">, + "nemotron-3-super-free": { + id: "nemotron-3-super-free", + name: "Nemotron 3 Super Free", + api: "openai-completions", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, }, "opencode-go": { "glm-5": { @@ -6568,6 +6550,23 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"anthropic-messages">, + "minimax-m2.7": { + id: "minimax-m2.7", + name: "MiniMax M2.7", + api: "anthropic-messages", + provider: "opencode-go", + baseUrl: "https://opencode.ai/zen/go", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0.06, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, }, "openrouter": { "ai21/jamba-large-1.7": { @@ -7080,6 +7079,23 @@ export const MODELS = { contextWindow: 262144, maxTokens: 32768, } satisfies Model<"openai-completions">, + "bytedance-seed/seed-2.0-lite": { + id: "bytedance-seed/seed-2.0-lite", + name: "ByteDance Seed: Seed-2.0-Lite", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.25, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "bytedance-seed/seed-2.0-mini": { id: "bytedance-seed/seed-2.0-mini", name: "ByteDance Seed: Seed-2.0-Mini", @@ -7159,11 +7175,11 @@ export const MODELS = { cost: { input: 0.19999999999999998, output: 0.77, - cacheRead: 0.13, + cacheRead: 0.135, cacheWrite: 0, }, contextWindow: 163840, - maxTokens: 163840, + maxTokens: 4096, } satisfies Model<"openai-completions">, "deepseek/deepseek-chat-v3.1": { id: "deepseek/deepseek-chat-v3.1", @@ -7233,23 +7249,6 @@ export const MODELS = { contextWindow: 163840, maxTokens: 4096, } satisfies Model<"openai-completions">, - "deepseek/deepseek-v3.1-terminus:exacto": { - id: "deepseek/deepseek-v3.1-terminus:exacto", - name: "DeepSeek: DeepSeek V3.1 Terminus (exacto)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.21, - output: 0.7899999999999999, - cacheRead: 0.16799999999999998, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "deepseek/deepseek-v3.2": { id: "deepseek/deepseek-v3.2", name: "DeepSeek: DeepSeek V3.2", @@ -7259,13 +7258,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.25, - output: 0.39999999999999997, - cacheRead: 0, + input: 0.26, + output: 0.38, + cacheRead: 0.13, cacheWrite: 0, }, contextWindow: 163840, - maxTokens: 65536, + maxTokens: 4096, } satisfies Model<"openai-completions">, "deepseek/deepseek-v3.2-exp": { id: "deepseek/deepseek-v3.2-exp", @@ -7522,40 +7521,6 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 65536, } satisfies Model<"openai-completions">, - "google/gemma-3-27b-it": { - id: "google/gemma-3-27b-it", - name: "Google: Gemma 3 27B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.04, - output: 0.15, - cacheRead: 0.02, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "google/gemma-3-27b-it:free": { - id: "google/gemma-3-27b-it:free", - name: "Google: Gemma 3 27B (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, "inception/mercury": { id: "inception/mercury", name: "Inception: Mercury", @@ -7658,23 +7623,6 @@ export const MODELS = { contextWindow: 8192, maxTokens: 16384, } satisfies Model<"openai-completions">, - "meta-llama/llama-3.1-405b-instruct": { - id: "meta-llama/llama-3.1-405b-instruct", - name: "Meta: Llama 3.1 405B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 4, - output: 4, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "meta-llama/llama-3.1-70b-instruct": { id: "meta-llama/llama-3.1-70b-instruct", name: "Meta: Llama 3.1 70B Instruct", @@ -7740,8 +7688,8 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, - maxTokens: 128000, + contextWindow: 65536, + maxTokens: 4096, } satisfies Model<"openai-completions">, "meta-llama/llama-4-maverick": { id: "meta-llama/llama-4-maverick", @@ -7837,14 +7785,48 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.295, - output: 1.2, - cacheRead: 0.03, + input: 0.19999999999999998, + output: 1.17, + cacheRead: 0.09999999999999999, + cacheWrite: 0, + }, + contextWindow: 196608, + maxTokens: 65536, + } satisfies Model<"openai-completions">, + "minimax/minimax-m2.5:free": { + id: "minimax/minimax-m2.5:free", + name: "MiniMax: MiniMax M2.5 (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, cacheWrite: 0, }, contextWindow: 196608, maxTokens: 196608, } satisfies Model<"openai-completions">, + "minimax/minimax-m2.7": { + id: "minimax/minimax-m2.7", + name: "MiniMax: MiniMax M2.7", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0.06, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "mistralai/codestral-2508": { id: "mistralai/codestral-2508", name: "Mistral: Codestral 2508", @@ -7856,7 +7838,7 @@ export const MODELS = { cost: { input: 0.3, output: 0.8999999999999999, - cacheRead: 0, + cacheRead: 0.03, cacheWrite: 0, }, contextWindow: 256000, @@ -7873,7 +7855,7 @@ export const MODELS = { cost: { input: 0.39999999999999997, output: 2, - cacheRead: 0, + cacheRead: 0.04, cacheWrite: 0, }, contextWindow: 262144, @@ -7890,7 +7872,7 @@ export const MODELS = { cost: { input: 0.39999999999999997, output: 2, - cacheRead: 0, + cacheRead: 0.04, cacheWrite: 0, }, contextWindow: 131072, @@ -7907,7 +7889,7 @@ export const MODELS = { cost: { input: 0.09999999999999999, output: 0.3, - cacheRead: 0, + cacheRead: 0.01, cacheWrite: 0, }, contextWindow: 131072, @@ -7924,7 +7906,7 @@ export const MODELS = { cost: { input: 0.19999999999999998, output: 0.19999999999999998, - cacheRead: 0, + cacheRead: 0.02, cacheWrite: 0, }, contextWindow: 262144, @@ -7941,7 +7923,7 @@ export const MODELS = { cost: { input: 0.09999999999999999, output: 0.09999999999999999, - cacheRead: 0, + cacheRead: 0.01, cacheWrite: 0, }, contextWindow: 131072, @@ -7958,7 +7940,7 @@ export const MODELS = { cost: { input: 0.15, output: 0.15, - cacheRead: 0, + cacheRead: 0.015, cacheWrite: 0, }, contextWindow: 262144, @@ -7975,7 +7957,7 @@ export const MODELS = { cost: { input: 2, output: 6, - cacheRead: 0, + cacheRead: 0.19999999999999998, cacheWrite: 0, }, contextWindow: 128000, @@ -7992,7 +7974,7 @@ export const MODELS = { cost: { input: 2, output: 6, - cacheRead: 0, + cacheRead: 0.19999999999999998, cacheWrite: 0, }, contextWindow: 131072, @@ -8009,7 +7991,7 @@ export const MODELS = { cost: { input: 2, output: 6, - cacheRead: 0, + cacheRead: 0.19999999999999998, cacheWrite: 0, }, contextWindow: 131072, @@ -8026,7 +8008,7 @@ export const MODELS = { cost: { input: 0.5, output: 1.5, - cacheRead: 0, + cacheRead: 0.049999999999999996, cacheWrite: 0, }, contextWindow: 262144, @@ -8043,7 +8025,7 @@ export const MODELS = { cost: { input: 0.39999999999999997, output: 2, - cacheRead: 0, + cacheRead: 0.04, cacheWrite: 0, }, contextWindow: 131072, @@ -8060,7 +8042,7 @@ export const MODELS = { cost: { input: 0.39999999999999997, output: 2, - cacheRead: 0, + cacheRead: 0.04, cacheWrite: 0, }, contextWindow: 131072, @@ -8094,7 +8076,7 @@ export const MODELS = { cost: { input: 0.19999999999999998, output: 0.6, - cacheRead: 0, + cacheRead: 0.02, cacheWrite: 0, }, contextWindow: 32768, @@ -8117,6 +8099,23 @@ export const MODELS = { contextWindow: 32768, maxTokens: 16384, } satisfies Model<"openai-completions">, + "mistralai/mistral-small-2603": { + id: "mistralai/mistral-small-2603", + name: "Mistral: Mistral Small 4", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0.015, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "mistralai/mistral-small-3.1-24b-instruct:free": { id: "mistralai/mistral-small-3.1-24b-instruct:free", name: "Mistral: Mistral Small 3.1 24B (free)", @@ -8143,13 +8142,13 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 0.06, - output: 0.18, - cacheRead: 0.03, + input: 0.075, + output: 0.19999999999999998, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 131072, + contextWindow: 128000, + maxTokens: 4096, } satisfies Model<"openai-completions">, "mistralai/mistral-small-creative": { id: "mistralai/mistral-small-creative", @@ -8162,7 +8161,7 @@ export const MODELS = { cost: { input: 0.09999999999999999, output: 0.3, - cacheRead: 0, + cacheRead: 0.01, cacheWrite: 0, }, contextWindow: 32768, @@ -8179,7 +8178,7 @@ export const MODELS = { cost: { input: 2, output: 6, - cacheRead: 0, + cacheRead: 0.19999999999999998, cacheWrite: 0, }, contextWindow: 65536, @@ -8213,7 +8212,7 @@ export const MODELS = { cost: { input: 2, output: 6, - cacheRead: 0, + cacheRead: 0.19999999999999998, cacheWrite: 0, }, contextWindow: 131072, @@ -8230,7 +8229,7 @@ export const MODELS = { cost: { input: 0.09999999999999999, output: 0.3, - cacheRead: 0, + cacheRead: 0.01, cacheWrite: 0, }, contextWindow: 32000, @@ -8270,23 +8269,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 4096, } satisfies Model<"openai-completions">, - "moonshotai/kimi-k2-0905:exacto": { - id: "moonshotai/kimi-k2-0905:exacto", - name: "MoonshotAI: Kimi K2 0905 (exacto)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.6, - output: 2.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "moonshotai/kimi-k2-thinking": { id: "moonshotai/kimi-k2-thinking", name: "MoonshotAI: Kimi K2 Thinking", @@ -8406,6 +8388,40 @@ export const MODELS = { contextWindow: 256000, maxTokens: 4096, } satisfies Model<"openai-completions">, + "nvidia/nemotron-3-super-120b-a12b": { + id: "nvidia/nemotron-3-super-120b-a12b", + name: "NVIDIA: Nemotron 3 Super", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.09999999999999999, + output: 0.5, + cacheRead: 0.04, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "nvidia/nemotron-3-super-120b-a12b:free": { + id: "nvidia/nemotron-3-super-120b-a12b:free", + name: "NVIDIA: Nemotron 3 Super (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, "nvidia/nemotron-nano-12b-v2-vl:free": { id: "nvidia/nemotron-nano-12b-v2-vl:free", name: "NVIDIA: Nemotron Nano 12B 2 VL (free)", @@ -9103,6 +9119,40 @@ export const MODELS = { contextWindow: 1050000, maxTokens: 128000, } satisfies Model<"openai-completions">, + "openai/gpt-5.4-mini": { + id: "openai/gpt-5.4-mini", + name: "OpenAI: GPT-5.4 Mini", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.75, + output: 4.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "openai/gpt-5.4-nano": { + id: "openai/gpt-5.4-nano", + name: "OpenAI: GPT-5.4 Nano", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.19999999999999998, + output: 1.25, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, "openai/gpt-5.4-pro": { id: "openai/gpt-5.4-pro", name: "OpenAI: GPT-5.4 Pro", @@ -9137,23 +9187,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 4096, } satisfies Model<"openai-completions">, - "openai/gpt-oss-120b:exacto": { - id: "openai/gpt-oss-120b:exacto", - name: "OpenAI: gpt-oss-120b (exacto)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.039, - output: 0.19, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "openai/gpt-oss-120b:free": { id: "openai/gpt-oss-120b:free", name: "OpenAI: gpt-oss-120b (free)", @@ -9181,12 +9214,12 @@ export const MODELS = { input: ["text"], cost: { input: 0.03, - output: 0.14, - cacheRead: 0, + output: 0.11, + cacheRead: 0.015, cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 4096, + maxTokens: 131072, } satisfies Model<"openai-completions">, "openai/gpt-oss-20b:free": { id: "openai/gpt-oss-20b:free", @@ -9228,7 +9261,7 @@ export const MODELS = { api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, + reasoning: true, input: ["text", "image"], cost: { input: 15, @@ -9279,7 +9312,7 @@ export const MODELS = { api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, + reasoning: true, input: ["text"], cost: { input: 1.1, @@ -9296,7 +9329,7 @@ export const MODELS = { api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, + reasoning: true, input: ["text"], cost: { input: 1.1, @@ -9486,9 +9519,9 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.39999999999999997, - output: 1.2, - cacheRead: 0.08, + input: 0.26, + output: 0.78, + cacheRead: 0.052000000000000005, cacheWrite: 0, }, contextWindow: 1000000, @@ -9554,8 +9587,8 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 0.7999999999999999, - output: 3.1999999999999997, + input: 0.52, + output: 2.08, cacheRead: 0, cacheWrite: 0, }, @@ -9622,13 +9655,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.11, - output: 0.6, - cacheRead: 0.055, + input: 0.14950000000000002, + output: 1.495, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 262144, - maxTokens: 262144, + contextWindow: 131072, + maxTokens: 4096, } satisfies Model<"openai-completions">, "qwen/qwen3-30b-a3b": { id: "qwen/qwen3-30b-a3b", @@ -9673,13 +9706,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.051, - output: 0.33999999999999997, - cacheRead: 0, + input: 0.08, + output: 0.39999999999999997, + cacheRead: 0.08, cacheWrite: 0, }, - contextWindow: 32768, - maxTokens: 4096, + contextWindow: 131072, + maxTokens: 131072, } satisfies Model<"openai-completions">, "qwen/qwen3-32b": { id: "qwen/qwen3-32b", @@ -9817,23 +9850,6 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 65536, } satisfies Model<"openai-completions">, - "qwen/qwen3-coder:exacto": { - id: "qwen/qwen3-coder:exacto", - name: "Qwen: Qwen3 Coder 480B A35B (exacto)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.22, - output: 1.7999999999999998, - cacheRead: 0.022, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 65536, - } satisfies Model<"openai-completions">, "qwen/qwen3-coder:free": { id: "qwen/qwen3-coder:free", name: "Qwen: Qwen3 Coder 480B A35B (free)", @@ -9860,9 +9876,9 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 1.2, - output: 6, - cacheRead: 0.24, + input: 0.78, + output: 3.9, + cacheRead: 0.156, cacheWrite: 0, }, contextWindow: 262144, @@ -9928,13 +9944,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.15, - output: 1.2, + input: 0.0975, + output: 0.78, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, - maxTokens: 4096, + contextWindow: 131072, + maxTokens: 32768, } satisfies Model<"openai-completions">, "qwen/qwen3-vl-235b-a22b-instruct": { id: "qwen/qwen3-vl-235b-a22b-instruct", @@ -9962,8 +9978,8 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0, - output: 0, + input: 0.26, + output: 2.6, cacheRead: 0, cacheWrite: 0, }, @@ -9996,8 +10012,8 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0, - output: 0, + input: 0.13, + output: 1.56, cacheRead: 0, cacheWrite: 0, }, @@ -10123,6 +10139,23 @@ export const MODELS = { contextWindow: 262144, maxTokens: 65536, } satisfies Model<"openai-completions">, + "qwen/qwen3.5-9b": { + id: "qwen/qwen3.5-9b", + name: "Qwen: Qwen3.5-9B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.049999999999999996, + output: 0.15, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "qwen/qwen3.5-flash-02-23": { id: "qwen/qwen3.5-flash-02-23", name: "Qwen: Qwen3.5-Flash", @@ -10132,8 +10165,8 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.09999999999999999, - output: 0.39999999999999997, + input: 0.065, + output: 0.26, cacheRead: 0, cacheWrite: 0, }, @@ -10167,12 +10200,12 @@ export const MODELS = { input: ["text"], cost: { input: 0.15, - output: 0.39999999999999997, + output: 0.58, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 32768, - maxTokens: 32768, + contextWindow: 131072, + maxTokens: 131072, } satisfies Model<"openai-completions">, "relace/relace-search": { id: "relace/relace-search", @@ -10217,13 +10250,13 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.65, - output: 0.75, + input: 0.85, + output: 0.85, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 32768, - maxTokens: 32768, + contextWindow: 131072, + maxTokens: 16384, } satisfies Model<"openai-completions">, "stepfun/step-3.5-flash": { id: "stepfun/step-3.5-flash", @@ -10302,9 +10335,9 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.25, - output: 0.85, - cacheRead: 0.125, + input: 0.3, + output: 1.1, + cacheRead: 0.15, cacheWrite: 0, }, contextWindow: 163840, @@ -10446,6 +10479,23 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 30000, } satisfies Model<"openai-completions">, + "x-ai/grok-4.20-beta": { + id: "x-ai/grok-4.20-beta", + name: "xAI: Grok 4.20 Beta", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "x-ai/grok-code-fast-1": { id: "x-ai/grok-code-fast-1", name: "xAI: Grok Code Fast 1", @@ -10480,6 +10530,40 @@ export const MODELS = { contextWindow: 262144, maxTokens: 65536, } satisfies Model<"openai-completions">, + "xiaomi/mimo-v2-omni": { + id: "xiaomi/mimo-v2-omni", + name: "Xiaomi: MiMo-V2-Omni", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.39999999999999997, + output: 2, + cacheRead: 0.08, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 65536, + } satisfies Model<"openai-completions">, + "xiaomi/mimo-v2-pro": { + id: "xiaomi/mimo-v2-pro", + name: "Xiaomi: MiMo-V2-Pro", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "z-ai/glm-4-32b": { id: "z-ai/glm-4-32b", name: "Z.ai: GLM 4 32B ", @@ -10582,23 +10666,6 @@ export const MODELS = { contextWindow: 204800, maxTokens: 204800, } satisfies Model<"openai-completions">, - "z-ai/glm-4.6:exacto": { - id: "z-ai/glm-4.6:exacto", - name: "Z.ai: GLM 4.6 (exacto)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.44, - output: 1.76, - cacheRead: 0.11, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 131072, - } satisfies Model<"openai-completions">, "z-ai/glm-4.6v": { id: "z-ai/glm-4.6v", name: "Z.ai: GLM 4.6V", @@ -10625,13 +10692,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.38, - output: 1.9800000000000002, - cacheRead: 0.19, + input: 0.39, + output: 1.75, + cacheRead: 0.195, cacheWrite: 0, }, contextWindow: 202752, - maxTokens: 4096, + maxTokens: 65535, } satisfies Model<"openai-completions">, "z-ai/glm-4.7-flash": { id: "z-ai/glm-4.7-flash", @@ -10664,8 +10731,25 @@ export const MODELS = { cacheRead: 0.119, cacheWrite: 0, }, + contextWindow: 80000, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "z-ai/glm-5-turbo": { + id: "z-ai/glm-5-turbo", + name: "Z.ai: GLM 5 Turbo", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.96, + output: 3.1999999999999997, + cacheRead: 0.192, + cacheWrite: 0, + }, contextWindow: 202752, - maxTokens: 4096, + maxTokens: 131072, } satisfies Model<"openai-completions">, }, "vercel-ai-gateway": { @@ -10678,7 +10762,7 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.06, + input: 0.12, output: 0.24, cacheRead: 0, cacheWrite: 0, @@ -10729,13 +10813,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.09999999999999999, - output: 0.3, - cacheRead: 0, + input: 0.29, + output: 0.59, + cacheRead: 0.145, cacheWrite: 0, }, - contextWindow: 40960, - maxTokens: 16384, + contextWindow: 131072, + maxTokens: 40960, } satisfies Model<"anthropic-messages">, "alibaba/qwen3-235b-a22b-thinking": { id: "alibaba/qwen3-235b-a22b-thinking", @@ -10746,9 +10830,9 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.3, - output: 2.9000000000000004, - cacheRead: 0, + input: 0.22999999999999998, + output: 2.3, + cacheRead: 0.19999999999999998, cacheWrite: 0, }, contextWindow: 262114, @@ -10765,7 +10849,7 @@ export const MODELS = { cost: { input: 0.39999999999999997, output: 1.5999999999999999, - cacheRead: 0, + cacheRead: 0.022, cacheWrite: 0, }, contextWindow: 262144, @@ -10780,13 +10864,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.07, - output: 0.27, + input: 0.15, + output: 0.6, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 160000, - maxTokens: 32768, + contextWindow: 262144, + maxTokens: 8192, } satisfies Model<"anthropic-messages">, "alibaba/qwen3-coder-next": { id: "alibaba/qwen3-coder-next", @@ -10794,7 +10878,7 @@ export const MODELS = { api: "anthropic-messages", provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, + reasoning: false, input: ["text"], cost: { input: 0.5, @@ -10822,6 +10906,23 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 65536, } satisfies Model<"anthropic-messages">, + "alibaba/qwen3-max": { + id: "alibaba/qwen3-max", + name: "Qwen3 Max", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text"], + cost: { + input: 1.2, + output: 6, + cacheRead: 0.24, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 32768, + } satisfies Model<"anthropic-messages">, "alibaba/qwen3-max-preview": { id: "alibaba/qwen3-max-preview", name: "Qwen3 Max Preview", @@ -10969,8 +11070,8 @@ export const MODELS = { cost: { input: 3, output: 15, - cacheRead: 0, - cacheWrite: 0, + cacheRead: 0.3, + cacheWrite: 3.75, }, contextWindow: 200000, maxTokens: 8192, @@ -11179,6 +11280,23 @@ export const MODELS = { contextWindow: 256000, maxTokens: 8000, } satisfies Model<"anthropic-messages">, + "deepseek/deepseek-r1": { + id: "deepseek/deepseek-r1", + name: "DeepSeek-R1", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 1.35, + output: 5.4, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 8192, + } satisfies Model<"anthropic-messages">, "deepseek/deepseek-v3": { id: "deepseek/deepseek-v3", name: "DeepSeek V3 0324", @@ -11205,13 +11323,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.21, - output: 0.7899999999999999, + input: 0.5, + output: 1.5, cacheRead: 0, cacheWrite: 0, }, contextWindow: 163840, - maxTokens: 128000, + maxTokens: 16384, } satisfies Model<"anthropic-messages">, "deepseek/deepseek-v3.1-terminus": { id: "deepseek/deepseek-v3.1-terminus", @@ -11224,7 +11342,7 @@ export const MODELS = { cost: { input: 0.27, output: 1, - cacheRead: 0, + cacheRead: 0.135, cacheWrite: 0, }, contextWindow: 131072, @@ -11239,9 +11357,9 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.26, - output: 0.38, - cacheRead: 0.13, + input: 0.28, + output: 0.42, + cacheRead: 0.028, cacheWrite: 0, }, contextWindow: 128000, @@ -11264,6 +11382,40 @@ export const MODELS = { contextWindow: 128000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, + "google/gemini-2.0-flash": { + id: "google/gemini-2.0-flash", + name: "Gemini 2.0 Flash", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0.024999999999999998, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 8192, + } satisfies Model<"anthropic-messages">, + "google/gemini-2.0-flash-lite": { + id: "google/gemini-2.0-flash-lite", + name: "Gemini 2.0 Flash Lite", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.075, + output: 0.3, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 8192, + } satisfies Model<"anthropic-messages">, "google/gemini-2.5-flash": { id: "google/gemini-2.5-flash", name: "Gemini 2.5 Flash", @@ -11271,11 +11423,11 @@ export const MODELS = { provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: true, - input: ["text"], + input: ["text", "image"], cost: { input: 0.3, output: 2.5, - cacheRead: 0, + cacheRead: 0.03, cacheWrite: 0, }, contextWindow: 1000000, @@ -11298,40 +11450,6 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 65536, } satisfies Model<"anthropic-messages">, - "google/gemini-2.5-flash-lite-preview-09-2025": { - id: "google/gemini-2.5-flash-lite-preview-09-2025", - name: "Gemini 2.5 Flash Lite Preview 09-2025", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.09999999999999999, - output: 0.39999999999999997, - cacheRead: 0.01, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"anthropic-messages">, - "google/gemini-2.5-flash-preview-09-2025": { - id: "google/gemini-2.5-flash-preview-09-2025", - name: "Gemini 2.5 Flash Preview 09-2025", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.3, - output: 2.5, - cacheRead: 0.03, - cacheWrite: 0, - }, - contextWindow: 1000000, - maxTokens: 65536, - } satisfies Model<"anthropic-messages">, "google/gemini-2.5-pro": { id: "google/gemini-2.5-pro", name: "Gemini 2.5 Pro", @@ -11339,11 +11457,11 @@ export const MODELS = { provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: true, - input: ["text"], + input: ["text", "image"], cost: { input: 1.25, output: 10, - cacheRead: 0, + cacheRead: 0.125, cacheWrite: 0, }, contextWindow: 1048576, @@ -11364,7 +11482,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 1000000, - maxTokens: 64000, + maxTokens: 65000, } satisfies Model<"anthropic-messages">, "google/gemini-3-pro-preview": { id: "google/gemini-3-pro-preview", @@ -11466,7 +11584,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 128000, - maxTokens: 8192, + maxTokens: 100000, } satisfies Model<"anthropic-messages">, "meituan/longcat-flash-thinking": { id: "meituan/longcat-flash-thinking", @@ -11494,13 +11612,13 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.39999999999999997, - output: 0.39999999999999997, + input: 0.72, + output: 0.72, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 16384, + contextWindow: 128000, + maxTokens: 8192, } satisfies Model<"anthropic-messages">, "meta/llama-3.1-8b": { id: "meta/llama-3.1-8b", @@ -11511,12 +11629,12 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.03, - output: 0.049999999999999996, - cacheRead: 0, + input: 0.09999999999999999, + output: 0.09999999999999999, + cacheRead: 0.09999999999999999, cacheWrite: 0, }, - contextWindow: 131072, + contextWindow: 128000, maxTokens: 16384, } satisfies Model<"anthropic-messages">, "meta/llama-3.2-11b": { @@ -11579,12 +11697,12 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 0.15, - output: 0.6, + input: 0.24, + output: 0.9700000000000001, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, + contextWindow: 128000, maxTokens: 8192, } satisfies Model<"anthropic-messages">, "meta/llama-4-scout": { @@ -11596,12 +11714,12 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 0.08, - output: 0.3, + input: 0.16999999999999998, + output: 0.66, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, + contextWindow: 128000, maxTokens: 8192, } satisfies Model<"anthropic-messages">, "minimax/minimax-m2": { @@ -11632,8 +11750,8 @@ export const MODELS = { cost: { input: 0.3, output: 1.2, - cacheRead: 0.15, - cacheWrite: 0, + cacheRead: 0.03, + cacheWrite: 0.375, }, contextWindow: 204800, maxTokens: 131072, @@ -11686,8 +11804,42 @@ export const MODELS = { cacheRead: 0.03, cacheWrite: 0.375, }, - contextWindow: 4096, - maxTokens: 4096, + contextWindow: 204800, + maxTokens: 131000, + } satisfies Model<"anthropic-messages">, + "minimax/minimax-m2.7": { + id: "minimax/minimax-m2.7", + name: "Minimax M2.7", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0.06, + cacheWrite: 0.375, + }, + contextWindow: 204800, + maxTokens: 131000, + } satisfies Model<"anthropic-messages">, + "minimax/minimax-m2.7-highspeed": { + id: "minimax/minimax-m2.7-highspeed", + name: "MiniMax M2.7 High Speed", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.6, + output: 2.4, + cacheRead: 0.06, + cacheWrite: 0.375, + }, + contextWindow: 204800, + maxTokens: 131100, } satisfies Model<"anthropic-messages">, "mistral/codestral": { id: "mistral/codestral", @@ -11715,8 +11867,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0, - output: 0, + input: 0.39999999999999997, + output: 2, cacheRead: 0, cacheWrite: 0, }, @@ -11749,8 +11901,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0, - output: 0, + input: 0.09999999999999999, + output: 0.3, cacheRead: 0, cacheWrite: 0, }, @@ -11766,8 +11918,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.04, - output: 0.04, + input: 0.09999999999999999, + output: 0.09999999999999999, cacheRead: 0, cacheWrite: 0, }, @@ -11783,8 +11935,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.09999999999999999, - output: 0.09999999999999999, + input: 0.15, + output: 0.15, cacheRead: 0, cacheWrite: 0, }, @@ -11868,14 +12020,31 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.5, - output: 2, - cacheRead: 0, + input: 0.6, + output: 2.5, + cacheRead: 0.15, cacheWrite: 0, }, contextWindow: 131072, maxTokens: 16384, } satisfies Model<"anthropic-messages">, + "moonshotai/kimi-k2-0905": { + id: "moonshotai/kimi-k2-0905", + name: "Kimi K2 0905", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text"], + cost: { + input: 0.6, + output: 2.5, + cacheRead: 0.15, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 16384, + } satisfies Model<"anthropic-messages">, "moonshotai/kimi-k2-thinking": { id: "moonshotai/kimi-k2-thinking", name: "Kimi K2 Thinking", @@ -11885,13 +12054,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.47, - output: 2, - cacheRead: 0.14100000000000001, + input: 0.6, + output: 2.5, + cacheRead: 0.15, cacheWrite: 0, }, - contextWindow: 216144, - maxTokens: 216144, + contextWindow: 262114, + maxTokens: 262114, } satisfies Model<"anthropic-messages">, "moonshotai/kimi-k2-thinking-turbo": { id: "moonshotai/kimi-k2-thinking-turbo", @@ -11919,9 +12088,9 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 2.4, - output: 10, - cacheRead: 0, + input: 1.15, + output: 8, + cacheRead: 0.15, cacheWrite: 0, }, contextWindow: 256000, @@ -11936,13 +12105,13 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.5, - output: 2.8, - cacheRead: 0, + input: 0.6, + output: 3, + cacheRead: 0.09999999999999999, cacheWrite: 0, }, - contextWindow: 256000, - maxTokens: 256000, + contextWindow: 262114, + maxTokens: 262114, } satisfies Model<"anthropic-messages">, "nvidia/nemotron-nano-12b-v2-vl": { id: "nvidia/nemotron-nano-12b-v2-vl", @@ -11970,31 +12139,14 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.04, - output: 0.16, + input: 0.06, + output: 0.22999999999999998, cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, maxTokens: 131072, } satisfies Model<"anthropic-messages">, - "openai/codex-mini": { - id: "openai/codex-mini", - name: "Codex Mini", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.5, - output: 6, - cacheRead: 0.375, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"anthropic-messages">, "openai/gpt-4-turbo": { id: "openai/gpt-4-turbo", name: "GPT-4 Turbo", @@ -12057,7 +12209,7 @@ export const MODELS = { cost: { input: 0.09999999999999999, output: 0.39999999999999997, - cacheRead: 0.03, + cacheRead: 0.024999999999999998, cacheWrite: 0, }, contextWindow: 1047576, @@ -12108,7 +12260,7 @@ export const MODELS = { cost: { input: 1.25, output: 10, - cacheRead: 0.13, + cacheRead: 0.125, cacheWrite: 0, }, contextWindow: 400000, @@ -12138,11 +12290,11 @@ export const MODELS = { provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: true, - input: ["text", "image"], + input: ["text"], cost: { input: 1.25, output: 10, - cacheRead: 0.13, + cacheRead: 0.125, cacheWrite: 0, }, contextWindow: 400000, @@ -12159,7 +12311,7 @@ export const MODELS = { cost: { input: 0.25, output: 2, - cacheRead: 0.03, + cacheRead: 0.024999999999999998, cacheWrite: 0, }, contextWindow: 400000, @@ -12176,7 +12328,7 @@ export const MODELS = { cost: { input: 0.049999999999999996, output: 0.39999999999999997, - cacheRead: 0.01, + cacheRead: 0.005, cacheWrite: 0, }, contextWindow: 400000, @@ -12210,7 +12362,7 @@ export const MODELS = { cost: { input: 1.25, output: 10, - cacheRead: 0.13, + cacheRead: 0.125, cacheWrite: 0, }, contextWindow: 400000, @@ -12261,7 +12413,7 @@ export const MODELS = { cost: { input: 1.25, output: 10, - cacheRead: 0.13, + cacheRead: 0.125, cacheWrite: 0, }, contextWindow: 128000, @@ -12278,7 +12430,7 @@ export const MODELS = { cost: { input: 1.25, output: 10, - cacheRead: 0.13, + cacheRead: 0.125, cacheWrite: 0, }, contextWindow: 400000, @@ -12295,7 +12447,7 @@ export const MODELS = { cost: { input: 1.75, output: 14, - cacheRead: 0.18, + cacheRead: 0.175, cacheWrite: 0, }, contextWindow: 400000, @@ -12400,7 +12552,41 @@ export const MODELS = { cacheRead: 0.25, cacheWrite: 0, }, - contextWindow: 200000, + contextWindow: 1050000, + maxTokens: 128000, + } satisfies Model<"anthropic-messages">, + "openai/gpt-5.4-mini": { + id: "openai/gpt-5.4-mini", + name: "GPT 5.4 Mini", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.75, + output: 4.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"anthropic-messages">, + "openai/gpt-5.4-nano": { + id: "openai/gpt-5.4-nano", + name: "GPT 5.4 Nano", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.19999999999999998, + output: 1.25, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 400000, maxTokens: 128000, } satisfies Model<"anthropic-messages">, "openai/gpt-5.4-pro": { @@ -12420,23 +12606,6 @@ export const MODELS = { contextWindow: 1050000, maxTokens: 128000, } satisfies Model<"anthropic-messages">, - "openai/gpt-oss-120b": { - id: "openai/gpt-oss-120b", - name: "gpt-oss-120b", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, - input: ["text"], - cost: { - input: 0.09999999999999999, - output: 0.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"anthropic-messages">, "openai/gpt-oss-20b": { id: "openai/gpt-oss-20b", name: "gpt-oss-20b", @@ -12624,40 +12793,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 131072, } satisfies Model<"anthropic-messages">, - "vercel/v0-1.0-md": { - id: "vercel/v0-1.0-md", - name: "v0-1.0-md", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: false, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 32000, - } satisfies Model<"anthropic-messages">, - "vercel/v0-1.5-md": { - id: "vercel/v0-1.5-md", - name: "v0-1.5-md", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: false, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 32768, - } satisfies Model<"anthropic-messages">, "xai/grok-2-vision": { id: "xai/grok-2-vision", name: "Grok 2 Vision", @@ -12686,7 +12821,7 @@ export const MODELS = { cost: { input: 3, output: 15, - cacheRead: 0, + cacheRead: 0.75, cacheWrite: 0, }, contextWindow: 131072, @@ -12703,7 +12838,7 @@ export const MODELS = { cost: { input: 5, output: 25, - cacheRead: 0, + cacheRead: 1.25, cacheWrite: 0, }, contextWindow: 131072, @@ -12720,7 +12855,7 @@ export const MODELS = { cost: { input: 0.3, output: 0.5, - cacheRead: 0, + cacheRead: 0.075, cacheWrite: 0, }, contextWindow: 131072, @@ -12754,7 +12889,7 @@ export const MODELS = { cost: { input: 3, output: 15, - cacheRead: 0, + cacheRead: 0.75, cacheWrite: 0, }, contextWindow: 256000, @@ -12828,6 +12963,57 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 30000, } satisfies Model<"anthropic-messages">, + "xai/grok-4.20-multi-agent-beta": { + id: "xai/grok-4.20-multi-agent-beta", + name: "Grok 4.20 Multi Agent Beta", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 2, + output: 6, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 2000000, + } satisfies Model<"anthropic-messages">, + "xai/grok-4.20-non-reasoning-beta": { + id: "xai/grok-4.20-non-reasoning-beta", + name: "Grok 4.20 Beta Non-Reasoning", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 2000000, + } satisfies Model<"anthropic-messages">, + "xai/grok-4.20-reasoning-beta": { + id: "xai/grok-4.20-reasoning-beta", + name: "Grok 4.20 Beta Reasoning", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 2000000, + } satisfies Model<"anthropic-messages">, "xai/grok-code-fast-1": { id: "xai/grok-code-fast-1", name: "Grok Code Fast 1", @@ -12854,14 +13040,31 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.09, - output: 0.29, - cacheRead: 0, + input: 0.09999999999999999, + output: 0.3, + cacheRead: 0.02, cacheWrite: 0, }, contextWindow: 262144, maxTokens: 32000, } satisfies Model<"anthropic-messages">, + "xiaomi/mimo-v2-pro": { + id: "xiaomi/mimo-v2-pro", + name: "MiMo V2 Pro", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 128000, + } satisfies Model<"anthropic-messages">, "zai/glm-4.5": { id: "zai/glm-4.5", name: "GLM-4.5", @@ -12873,11 +13076,11 @@ export const MODELS = { cost: { input: 0.6, output: 2.2, - cacheRead: 0, + cacheRead: 0.11, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 131072, + contextWindow: 128000, + maxTokens: 96000, } satisfies Model<"anthropic-messages">, "zai/glm-4.5-air": { id: "zai/glm-4.5-air", @@ -12902,16 +13105,16 @@ export const MODELS = { api: "anthropic-messages", provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, + reasoning: false, input: ["text", "image"], cost: { input: 0.6, output: 1.7999999999999998, - cacheRead: 0, + cacheRead: 0.11, cacheWrite: 0, }, - contextWindow: 65536, - maxTokens: 16384, + contextWindow: 66000, + maxTokens: 16000, } satisfies Model<"anthropic-messages">, "zai/glm-4.6": { id: "zai/glm-4.6", @@ -12922,8 +13125,8 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.44999999999999996, - output: 1.7999999999999998, + input: 0.6, + output: 2.2, cacheRead: 0.11, cacheWrite: 0, }, @@ -12973,14 +13176,31 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.43, - output: 1.75, - cacheRead: 0.08, + input: 0.6, + output: 2.2, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 202752, + contextWindow: 200000, maxTokens: 120000, } satisfies Model<"anthropic-messages">, + "zai/glm-4.7-flash": { + id: "zai/glm-4.7-flash", + name: "GLM 4.7 Flash", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 0.07, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 131000, + } satisfies Model<"anthropic-messages">, "zai/glm-4.7-flashx": { id: "zai/glm-4.7-flashx", name: "GLM 4.7 FlashX", @@ -13000,7 +13220,7 @@ export const MODELS = { } satisfies Model<"anthropic-messages">, "zai/glm-5": { id: "zai/glm-5", - name: "GLM-5", + name: "GLM 5", api: "anthropic-messages", provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", @@ -13013,7 +13233,24 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 202800, - maxTokens: 131072, + maxTokens: 131100, + } satisfies Model<"anthropic-messages">, + "zai/glm-5-turbo": { + id: "zai/glm-5-turbo", + name: "GLM 5 Turbo", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 1.2, + output: 4, + cacheRead: 0.24, + cacheWrite: 0, + }, + contextWindow: 202800, + maxTokens: 131100, } satisfies Model<"anthropic-messages">, }, "xai": { @@ -13340,6 +13577,40 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 30000, } satisfies Model<"openai-completions">, + "grok-4.20-0309-non-reasoning": { + id: "grok-4.20-0309-non-reasoning", + name: "Grok 4.20 (Non-Reasoning)", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0.2, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 30000, + } satisfies Model<"openai-completions">, + "grok-4.20-0309-reasoning": { + id: "grok-4.20-0309-reasoning", + name: "Grok 4.20 (Reasoning)", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0.2, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 30000, + } satisfies Model<"openai-completions">, "grok-beta": { id: "grok-beta", name: "Grok Beta", @@ -13555,747 +13826,23 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"openai-completions">, - }, - "alibaba-coding-plan": { - "qwen3.5-plus": { - id: "qwen3.5-plus", - name: "Qwen3.5 Plus", + "glm-5-turbo": { + id: "glm-5-turbo", + name: "GLM-5-Turbo", api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", + provider: "zai", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"}, reasoning: true, input: ["text"], cost: { - input: 0, - output: 0, - cacheRead: 0, + input: 1.2, + output: 4, + cacheRead: 0.24, cacheWrite: 0, }, - contextWindow: 983616, - maxTokens: 65536, - compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, - } satisfies Model<"openai-completions">, - "qwen3-max-2026-01-23": { - id: "qwen3-max-2026-01-23", - name: "Qwen3 Max 2026-01-23", - api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 258048, - maxTokens: 32768, - compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, - } satisfies Model<"openai-completions">, - "qwen3-coder-next": { - id: "qwen3-coder-next", - name: "Qwen3 Coder Next", - api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 65536, - compat: { supportsDeveloperRole: false }, - } satisfies Model<"openai-completions">, - "qwen3-coder-plus": { - id: "qwen3-coder-plus", - name: "Qwen3 Coder Plus", - api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 997952, - maxTokens: 65536, - compat: { supportsDeveloperRole: false }, - } satisfies Model<"openai-completions">, - "MiniMax-M2.5": { - id: "MiniMax-M2.5", - name: "MiniMax M2.5", - api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 196608, - maxTokens: 65536, - compat: { supportsStore: false, supportsDeveloperRole: false, supportsReasoningEffort: true, maxTokensField: "max_tokens" }, - } satisfies Model<"openai-completions">, - "glm-5": { - id: "glm-5", - name: "GLM-5", - api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 202752, - maxTokens: 16384, - compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, - } satisfies Model<"openai-completions">, - "glm-4.7": { - id: "glm-4.7", - name: "GLM-4.7", - api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 169984, - maxTokens: 16384, - compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, - } satisfies Model<"openai-completions">, - "kimi-k2.5": { - id: "kimi-k2.5", - name: "Kimi K2.5", - api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 258048, - maxTokens: 32768, - compat: { thinkingFormat: "zai", supportsDeveloperRole: false }, - } satisfies Model<"openai-completions">, - }, - "ollama-cloud": { - "cogito-2.1:671b": { - id: "cogito-2.1:671b", - name: "Cogito 2.1 671B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 32000, - } satisfies Model<"openai-completions">, - "deepseek-v3.1:671b": { - id: "deepseek-v3.1:671b", - name: "DeepSeek V3.1 671B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 163840, - } satisfies Model<"openai-completions">, - "deepseek-v3.2": { - id: "deepseek-v3.2", - name: "DeepSeek V3.2", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "devstral-2:123b": { - id: "devstral-2:123b", - name: "Devstral 2 123B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "devstral-small-2:24b": { - id: "devstral-small-2:24b", - name: "Devstral Small 2 24B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "gemini-3-flash-preview": { - id: "gemini-3-flash-preview", - name: "Gemini 3 Flash Preview", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "gemma3:12b": { - id: "gemma3:12b", - name: "Gemma 3 12B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, + contextWindow: 200000, maxTokens: 131072, } satisfies Model<"openai-completions">, - "gemma3:27b": { - id: "gemma3:27b", - name: "Gemma 3 27B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "gemma3:4b": { - id: "gemma3:4b", - name: "Gemma 3 4B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "glm-4.6": { - id: "glm-4.6", - name: "GLM 4.6", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 202752, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "glm-4.7": { - id: "glm-4.7", - name: "GLM 4.7", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 202752, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "glm-5": { - id: "glm-5", - name: "GLM 5", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 202752, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "gpt-oss:120b": { - id: "gpt-oss:120b", - name: "GPT-OSS 120B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "gpt-oss:20b": { - id: "gpt-oss:20b", - name: "GPT-OSS 20B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "kimi-k2:1t": { - id: "kimi-k2:1t", - name: "Kimi K2 1T", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "kimi-k2.5": { - id: "kimi-k2.5", - name: "Kimi K2.5", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "kimi-k2-thinking": { - id: "kimi-k2-thinking", - name: "Kimi K2 Thinking", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "minimax-m2.1": { - id: "minimax-m2.1", - name: "Minimax M2.1", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "minimax-m2.5": { - id: "minimax-m2.5", - name: "Minimax M2.5", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "minimax-m2": { - id: "minimax-m2", - name: "Minimax M2", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "ministral-3:14b": { - id: "ministral-3:14b", - name: "Ministral 3 14B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "ministral-3:3b": { - id: "ministral-3:3b", - name: "Ministral 3 3B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "ministral-3:8b": { - id: "ministral-3:8b", - name: "Ministral 3 8B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "mistral-large-3:675b": { - id: "mistral-large-3:675b", - name: "Mistral Large 3 675B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "nemotron-3-nano:30b": { - id: "nemotron-3-nano:30b", - name: "Nemotron 3 Nano 30B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "nemotron-3-super": { - id: "nemotron-3-super", - name: "Nemotron 3 Super", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "qwen3.5:397b": { - id: "qwen3.5:397b", - name: "Qwen 3.5 397B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 81920, - } satisfies Model<"openai-completions">, - "qwen3-coder:480b": { - id: "qwen3-coder:480b", - name: "Qwen 3 Coder 480B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "qwen3-coder-next": { - id: "qwen3-coder-next", - name: "Qwen 3 Coder Next", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "qwen3-next:80b": { - id: "qwen3-next:80b", - name: "Qwen 3 Next 80B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "qwen3-vl:235b-instruct": { - id: "qwen3-vl:235b-instruct", - name: "Qwen 3 VL 235B Instruct", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "qwen3-vl:235b": { - id: "qwen3-vl:235b", - name: "Qwen 3 VL 235B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "rnj-1:8b": { - id: "rnj-1:8b", - name: "RNJ 1 8B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, }, } as const; diff --git a/packages/pi-ai/src/models.ts b/packages/pi-ai/src/models.ts index 3c06c0cc6..8a4805ac1 100644 --- a/packages/pi-ai/src/models.ts +++ b/packages/pi-ai/src/models.ts @@ -12,12 +12,15 @@ for (const [provider, models] of Object.entries(MODELS)) { modelRegistry.set(provider, providerModels); } +/** Providers that have entries in the generated MODELS constant */ +type GeneratedProvider = keyof typeof MODELS & KnownProvider; + type ModelApi< - TProvider extends KnownProvider, + TProvider extends GeneratedProvider, TModelId extends keyof (typeof MODELS)[TProvider], > = (typeof MODELS)[TProvider][TModelId] extends { api: infer TApi } ? (TApi extends Api ? TApi : never) : never; -export function getModel( +export function getModel( provider: TProvider, modelId: TModelId, ): Model> { @@ -31,9 +34,9 @@ export function getProviders(): KnownProvider[] { export function getModels( provider: TProvider, -): Model>[] { +): Model[] { const models = modelRegistry.get(provider); - return models ? (Array.from(models.values()) as Model>[]) : []; + return models ? (Array.from(models.values()) as Model[]) : []; } export function calculateCost(model: Model, usage: Usage): Usage["cost"] { diff --git a/packages/pi-ai/src/utils/oauth/github-copilot.ts b/packages/pi-ai/src/utils/oauth/github-copilot.ts index 08ffb24d3..eae8e9a5f 100644 --- a/packages/pi-ai/src/utils/oauth/github-copilot.ts +++ b/packages/pi-ai/src/utils/oauth/github-copilot.ts @@ -8,6 +8,8 @@ import type { OAuthCredentials, OAuthLoginCallbacks, OAuthProviderInterface } fr type CopilotCredentials = OAuthCredentials & { enterpriseUrl?: string; + /** Model limits from the /models API, keyed by model ID */ + modelLimits?: Record; }; const decode = (s: string) => atob(s); @@ -305,6 +307,47 @@ async function enableAllGitHubCopilotModels( ); } +async function fetchCopilotModelLimits( + token: string, + enterpriseDomain?: string, +): Promise> { + const baseUrl = getGitHubCopilotBaseUrl(token, enterpriseDomain); + try { + const response = await fetch(`${baseUrl}/models`, { + headers: { + Accept: "application/json", + Authorization: `Bearer ${token}`, + "X-GitHub-Api-Version": "2025-05-01", + ...COPILOT_HEADERS, + }, + signal: AbortSignal.timeout(30_000), + }); + if (!response.ok) return {}; + const data = (await response.json()) as { + data?: Array<{ + id: string; + capabilities?: { + limits?: { + max_context_window_tokens?: number; + max_output_tokens?: number; + }; + }; + }>; + }; + const limits: Record = {}; + for (const m of data.data || []) { + const ctx = m.capabilities?.limits?.max_context_window_tokens; + const out = m.capabilities?.limits?.max_output_tokens; + if (typeof ctx === "number" && typeof out === "number" && ctx > 0 && out > 0 && Number.isFinite(ctx) && Number.isFinite(out)) { + limits[m.id] = { contextWindow: ctx, maxTokens: out }; + } + } + return limits; + } catch { + return {}; + } +} + /** * Login with GitHub Copilot OAuth (device code flow) * @@ -351,6 +394,14 @@ export async function loginGitHubCopilot(options: { // Enable all models after successful login options.onProgress?.("Enabling models..."); await enableAllGitHubCopilotModels(credentials.access, enterpriseDomain ?? undefined); + + // Fetch real model limits from the Copilot API + options.onProgress?.("Fetching model limits..."); + const modelLimits = await fetchCopilotModelLimits(credentials.access, enterpriseDomain ?? undefined); + if (Object.keys(modelLimits).length > 0) { + (credentials as CopilotCredentials).modelLimits = modelLimits; + } + return credentials; } @@ -369,7 +420,16 @@ export const githubCopilotOAuthProvider: OAuthProviderInterface = { async refreshToken(credentials: OAuthCredentials): Promise { const creds = credentials as CopilotCredentials; - return refreshGitHubCopilotToken(creds.refresh, creds.enterpriseUrl); + const refreshed = await refreshGitHubCopilotToken(creds.refresh, creds.enterpriseUrl); + try { + const modelLimits = await fetchCopilotModelLimits(refreshed.access, creds.enterpriseUrl); + if (Object.keys(modelLimits).length > 0) { + (refreshed as CopilotCredentials).modelLimits = modelLimits; + } + } catch { + // Model limits fetch is best-effort; don't block token refresh + } + return refreshed; }, getApiKey(credentials: OAuthCredentials): string { @@ -380,6 +440,18 @@ export const githubCopilotOAuthProvider: OAuthProviderInterface = { const creds = credentials as CopilotCredentials; const domain = creds.enterpriseUrl ? (normalizeDomain(creds.enterpriseUrl) ?? undefined) : undefined; const baseUrl = getGitHubCopilotBaseUrl(creds.access, domain); - return models.map((m) => (m.provider === "github-copilot" ? { ...m, baseUrl } : m)); + const limits = creds.modelLimits; + return models.map((m) => { + if (m.provider !== "github-copilot") return m; + const modelLimits = limits?.[m.id]; + return { + ...m, + baseUrl, + ...(modelLimits && { + contextWindow: modelLimits.contextWindow, + maxTokens: modelLimits.maxTokens, + }), + }; + }); }, }; diff --git a/packages/pi-coding-agent/src/core/auth-storage.ts b/packages/pi-coding-agent/src/core/auth-storage.ts index e921328f2..c632090a7 100644 --- a/packages/pi-coding-agent/src/core/auth-storage.ts +++ b/packages/pi-coding-agent/src/core/auth-storage.ts @@ -202,6 +202,7 @@ export class AuthStorage { private fallbackResolver?: (provider: string) => string | undefined; private loadError: Error | null = null; private errors: Error[] = []; + private credentialChangeListeners: Set<() => void> = new Set(); /** * Round-robin index per provider. Incremented on each call to getApiKey @@ -263,6 +264,25 @@ export class AuthStorage { this.fallbackResolver = resolver; } + /** + * Register a callback to be notified when credentials change (e.g., after OAuth token refresh). + * Returns a function to unregister the listener. + */ + onCredentialChange(listener: () => void): () => void { + this.credentialChangeListeners.add(listener); + return () => this.credentialChangeListeners.delete(listener); + } + + private notifyCredentialChange(): void { + for (const listener of this.credentialChangeListeners) { + try { + listener(); + } catch { + // Don't let listener errors break the refresh flow + } + } + } + private recordError(error: unknown): void { const normalizedError = error instanceof Error ? error : new Error(String(error)); this.errors.push(normalizedError); @@ -667,6 +687,11 @@ export class AuthStorage { return { result: refreshed, next: JSON.stringify(merged, null, 2) }; }); + // Notify listeners after credential change (e.g., model registry refresh) + if (result) { + queueMicrotask(() => this.notifyCredentialChange()); + } + return result; } diff --git a/packages/pi-coding-agent/src/core/model-registry.ts b/packages/pi-coding-agent/src/core/model-registry.ts index 08766af24..b6d161c89 100644 --- a/packages/pi-coding-agent/src/core/model-registry.ts +++ b/packages/pi-coding-agent/src/core/model-registry.ts @@ -243,6 +243,9 @@ export class ModelRegistry { return undefined; }); + // Refresh models when credentials change (e.g., OAuth token refresh with new model limits) + this.authStorage.onCredentialChange(() => this.refresh()); + // Load models this.loadModels(); }