diff --git a/packages/pi-ai/src/models.generated.test.ts b/packages/pi-ai/src/models.generated.test.ts new file mode 100644 index 000000000..bfba0704d --- /dev/null +++ b/packages/pi-ai/src/models.generated.test.ts @@ -0,0 +1,373 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { MODELS } from "./models.generated.js"; +import { getModel, getModels, getProviders } from "./models.js"; + +// ═══════════════════════════════════════════════════════════════════════════ +// Regression: qwen/qwen3.6-plus missing from OpenRouter (issue #3582) +// ═══════════════════════════════════════════════════════════════════════════ + +describe("regression #3582 — qwen/qwen3.6-plus available via openrouter", () => { + it("qwen/qwen3.6-plus exists in MODELS['openrouter']", () => { + const model = MODELS["openrouter"]["qwen/qwen3.6-plus" as keyof (typeof MODELS)["openrouter"]]; + assert.ok(model, "qwen/qwen3.6-plus must be present in MODELS.openrouter"); + }); + + it("qwen/qwen3.6-plus is accessible via getModel()", () => { + const model = getModel("openrouter", "qwen/qwen3.6-plus" as any); + assert.ok(model, "getModel('openrouter', 'qwen/qwen3.6-plus') must return a model"); + }); + + it("qwen/qwen3.6-plus has id matching its registry key", () => { + const model = getModel("openrouter", "qwen/qwen3.6-plus" as any); + assert.equal(model.id, "qwen/qwen3.6-plus"); + }); + + it("qwen/qwen3.6-plus has provider set to openrouter", () => { + const model = getModel("openrouter", "qwen/qwen3.6-plus" as any); + assert.equal(model.provider, "openrouter"); + }); + + it("qwen/qwen3.6-plus has reasoning enabled", () => { + const model = getModel("openrouter", "qwen/qwen3.6-plus" as any); + assert.equal(model.reasoning, true, "Qwen3.6 Plus is a reasoning model"); + }); + + it("qwen/qwen3.6-plus has 1M context window", () => { + const model = getModel("openrouter", "qwen/qwen3.6-plus" as any); + assert.equal(model.contextWindow, 1_000_000); + }); +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// Regression: z-ai/glm-5.1 missing from OpenRouter (issue #4069) +// ═══════════════════════════════════════════════════════════════════════════ + +describe("regression #4069 — z-ai/glm-5.1 available via openrouter", () => { + it("z-ai/glm-5.1 exists in MODELS['openrouter']", () => { + const model = MODELS["openrouter"]["z-ai/glm-5.1" as keyof (typeof MODELS)["openrouter"]]; + assert.ok(model, "z-ai/glm-5.1 must be present in MODELS.openrouter"); + }); + + it("z-ai/glm-5.1 is accessible via getModel()", () => { + const model = getModel("openrouter", "z-ai/glm-5.1" as any); + assert.ok(model, "getModel('openrouter', 'z-ai/glm-5.1') must return a model"); + }); + + it("z-ai/glm-5.1 has id matching its registry key", () => { + const model = getModel("openrouter", "z-ai/glm-5.1" as any); + assert.equal(model.id, "z-ai/glm-5.1"); + }); + + it("z-ai/glm-5.1 has provider set to openrouter", () => { + const model = getModel("openrouter", "z-ai/glm-5.1" as any); + assert.equal(model.provider, "openrouter"); + }); + + it("z-ai/glm-5.1 has a positive context window", () => { + const model = getModel("openrouter", "z-ai/glm-5.1" as any); + assert.ok(model.contextWindow > 0); + }); + + it("z-ai/glm-5.1 uses the OpenRouter base URL", () => { + const model = getModel("openrouter", "z-ai/glm-5.1" as any); + assert.equal(model.baseUrl, "https://openrouter.ai/api/v1"); + }); +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// Structural invariants — every model in MODELS must be well-formed +// ═══════════════════════════════════════════════════════════════════════════ + +describe("MODELS structural invariants", () => { + type ModelEntry = { providerKey: string; modelKey: string; model: Record }; + + function allModels(): ModelEntry[] { + const entries: ModelEntry[] = []; + for (const [providerKey, providerModels] of Object.entries(MODELS)) { + for (const [modelKey, model] of Object.entries(providerModels)) { + entries.push({ providerKey, modelKey, model: model as Record }); + } + } + return entries; + } + + it("every model's id field matches its key in MODELS", () => { + const mismatches: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (model["id"] !== modelKey) { + mismatches.push(`${providerKey}/${modelKey}: id="${model["id"]}"`); + } + } + assert.deepEqual(mismatches, [], `Models where 'id' doesn't match registry key:\n ${mismatches.join("\n ")}`); + }); + + it("every model's provider field matches its parent provider key", () => { + const mismatches: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (model["provider"] !== providerKey) { + mismatches.push(`${providerKey}/${modelKey}: provider="${model["provider"]}"`); + } + } + assert.deepEqual(mismatches, [], `Models where 'provider' doesn't match parent key:\n ${mismatches.join("\n ")}`); + }); + + it("every model has a non-empty string name", () => { + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (typeof model["name"] !== "string" || model["name"].trim() === "") { + invalid.push(`${providerKey}/${modelKey}`); + } + } + assert.deepEqual(invalid, [], `Models with missing or empty name:\n ${invalid.join("\n ")}`); + }); + + it("every model has a non-empty string api", () => { + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (typeof model["api"] !== "string" || model["api"].trim() === "") { + invalid.push(`${providerKey}/${modelKey}`); + } + } + assert.deepEqual(invalid, [], `Models with missing or empty api:\n ${invalid.join("\n ")}`); + }); + + it("every model's baseUrl starts with https:// (or is empty for azure-openai-responses)", () => { + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (providerKey === "azure-openai-responses") continue; + const url = model["baseUrl"]; + if (typeof url !== "string" || !url.startsWith("https://")) { + invalid.push(`${providerKey}/${modelKey}: baseUrl="${url}"`); + } + } + assert.deepEqual(invalid, [], `Models with missing or non-HTTPS baseUrl:\n ${invalid.join("\n ")}`); + }); + + it("azure-openai-responses models have an empty baseUrl (runtime-configured)", () => { + const models = getModels("azure-openai-responses"); + assert.ok(models.length > 0, "azure-openai-responses must have at least one model"); + for (const model of models) { + assert.equal(model.baseUrl, "", `azure-openai-responses/${model.id} should have empty baseUrl`); + } + }); + + it("every model has a boolean reasoning field", () => { + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (typeof model["reasoning"] !== "boolean") { + invalid.push(`${providerKey}/${modelKey}: reasoning=${model["reasoning"]}`); + } + } + assert.deepEqual(invalid, [], `Models with non-boolean reasoning:\n ${invalid.join("\n ")}`); + }); + + it("every model has a non-empty input array", () => { + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + const input = model["input"]; + if (!Array.isArray(input) || input.length === 0) { + invalid.push(`${providerKey}/${modelKey}`); + } + } + assert.deepEqual(invalid, [], `Models with missing or empty input array:\n ${invalid.join("\n ")}`); + }); + + it("every model has a positive contextWindow", () => { + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + const cw = model["contextWindow"]; + if (typeof cw !== "number" || cw <= 0 || !Number.isFinite(cw)) { + invalid.push(`${providerKey}/${modelKey}: contextWindow=${cw}`); + } + } + assert.deepEqual(invalid, [], `Models with invalid contextWindow:\n ${invalid.join("\n ")}`); + }); + + it("every model has a positive maxTokens", () => { + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + const mt = model["maxTokens"]; + if (typeof mt !== "number" || mt <= 0 || !Number.isFinite(mt)) { + invalid.push(`${providerKey}/${modelKey}: maxTokens=${mt}`); + } + } + assert.deepEqual(invalid, [], `Models with invalid maxTokens:\n ${invalid.join("\n ")}`); + }); + + it("every model's maxTokens does not exceed contextWindow", () => { + const knownExceptions = new Set([ + "openrouter/meta-llama/llama-3-8b-instruct", + "openrouter/nex-agi/deepseek-v3.1-nex-n1", + "openrouter/openai/gpt-3.5-turbo-0613", + "openrouter/z-ai/glm-5", + ]); + + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (knownExceptions.has(`${providerKey}/${modelKey}`)) continue; + const cw = model["contextWindow"] as number; + const mt = model["maxTokens"] as number; + if (typeof cw === "number" && typeof mt === "number" && mt > cw) { + invalid.push(`${providerKey}/${modelKey}: maxTokens(${mt}) > contextWindow(${cw})`); + } + } + assert.deepEqual(invalid, [], `Models where maxTokens exceeds contextWindow:\n ${invalid.join("\n ")}`); + }); + + it("every model has a cost object with non-negative numeric fields", () => { + const knownNegativeCostModels = new Set([ + "openrouter/openrouter/auto", + ]); + + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (knownNegativeCostModels.has(`${providerKey}/${modelKey}`)) continue; + const cost = model["cost"] as Record | undefined; + if (!cost || typeof cost !== "object") { + invalid.push(`${providerKey}/${modelKey}: missing cost object`); + continue; + } + for (const field of ["input", "output", "cacheRead", "cacheWrite"] as const) { + const val = cost[field]; + if (typeof val !== "number" || val < 0 || !Number.isFinite(val)) { + invalid.push(`${providerKey}/${modelKey}: cost.${field}=${val}`); + } + } + } + assert.deepEqual(invalid, [], `Models with invalid cost fields:\n ${invalid.join("\n ")}`); + }); + + it("no provider has duplicate model IDs", () => { + const duplicates: string[] = []; + for (const [providerKey, providerModels] of Object.entries(MODELS)) { + const ids = Object.values(providerModels).map((m) => (m as Record)["id"] as string); + const seen = new Set(); + for (const id of ids) { + if (seen.has(id)) duplicates.push(`${providerKey}/${id}`); + seen.add(id); + } + } + assert.deepEqual(duplicates, [], `Duplicate model IDs within a provider:\n ${duplicates.join("\n ")}`); + }); +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// Registry shape +// ═══════════════════════════════════════════════════════════════════════════ + +describe("MODELS registry shape", () => { + it("has exactly 23 providers", () => { + const count = Object.keys(MODELS).length; + assert.equal(count, 23, `Expected 23 providers, got ${count}: ${Object.keys(MODELS).join(", ")}`); + }); + + it("has at least 200 models in total (sanity check)", () => { + let total = 0; + for (const providerModels of Object.values(MODELS)) { + total += Object.keys(providerModels).length; + } + assert.ok(total >= 200, `Registry has only ${total} models — unexpectedly small`); + }); + + it("all 23 expected providers are present", () => { + const expected = [ + "amazon-bedrock", + "anthropic", + "azure-openai-responses", + "cerebras", + "github-copilot", + "google", + "google-antigravity", + "google-gemini-cli", + "google-vertex", + "groq", + "huggingface", + "kimi-coding", + "minimax", + "minimax-cn", + "mistral", + "openai", + "openai-codex", + "opencode", + "opencode-go", + "openrouter", + "vercel-ai-gateway", + "xai", + "zai", + ]; + const actual = Object.keys(MODELS).sort(); + assert.deepEqual(actual, expected.sort()); + }); + + it("getProviders() returns all generated providers", () => { + const providers = getProviders(); + for (const p of Object.keys(MODELS)) { + assert.ok(providers.includes(p as any), `getProviders() missing generated provider: ${p}`); + } + }); +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// Removed models must not exist +// ═══════════════════════════════════════════════════════════════════════════ + +describe("removed models are absent from the registry", () => { + const removedModels: Array<{ provider: string; id: string }> = [ + { provider: "openrouter", id: "anthropic/claude-3.5-sonnet" }, + { provider: "openrouter", id: "anthropic/claude-3.5-sonnet-20240620" }, + { provider: "openrouter", id: "mistralai/mistral-small-24b-instruct-2501" }, + { provider: "openrouter", id: "mistralai/mistral-small-3.1-24b-instruct:free" }, + { provider: "openrouter", id: "qwen/qwen3-4b:free" }, + { provider: "openrouter", id: "stepfun/step-3.5-flash:free" }, + { provider: "openrouter", id: "x-ai/grok-4.20-beta" }, + { provider: "openrouter", id: "arcee-ai/trinity-mini:free" }, + { provider: "openrouter", id: "google/gemini-3-pro-preview" }, + { provider: "openrouter", id: "kwaipilot/kat-coder-pro" }, + { provider: "openrouter", id: "meituan/longcat-flash-thinking" }, + { provider: "vercel-ai-gateway", id: "xai/grok-2-vision" }, + { provider: "anthropic", id: "claude-3-7-sonnet-latest" }, + ]; + + for (const { provider, id } of removedModels) { + it(`${provider}/${id} has been removed`, () => { + const model = getModel(provider as any, id as any); + assert.equal(model, undefined, `${provider}/${id} should be removed but is still present`); + }); + } +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// Spot-checks for notable models added in this regeneration +// ═══════════════════════════════════════════════════════════════════════════ + +describe("spot-checks for models added in this regeneration", () => { + const newModels: Array<{ provider: string; id: string; reasoning?: boolean }> = [ + { provider: "openrouter", id: "z-ai/glm-5.1" }, + { provider: "openrouter", id: "z-ai/glm-5v-turbo" }, + { provider: "openrouter", id: "google/gemma-4-31b-it" }, + { provider: "openrouter", id: "google/gemma-4-26b-a4b-it" }, + { provider: "openrouter", id: "arcee-ai/trinity-large-thinking", reasoning: true }, + { provider: "openrouter", id: "openai/gpt-audio" }, + { provider: "openrouter", id: "anthropic/claude-opus-4.6-fast" }, + { provider: "openrouter", id: "qwen/qwen3.6-plus" }, + { provider: "groq", id: "groq/compound" }, + { provider: "groq", id: "groq/compound-mini" }, + { provider: "huggingface", id: "zai-org/GLM-5.1" }, + { provider: "openai", id: "gpt-5.3-chat-latest" }, + { provider: "mistral", id: "mistral-small-2603" }, + { provider: "zai", id: "glm-5.1" }, + ]; + + for (const { provider, id, reasoning } of newModels) { + it(`${provider}/${id} is present in the registry`, () => { + const model = getModel(provider as any, id as any); + assert.ok(model, `Expected ${provider}/${id} to be present after regeneration`); + assert.equal(model.id, id); + assert.equal(model.provider, provider); + if (reasoning !== undefined) { + assert.equal(model.reasoning, reasoning, `${id} reasoning should be ${reasoning}`); + } + }); + } +}); diff --git a/packages/pi-ai/src/models.generated.ts b/packages/pi-ai/src/models.generated.ts index e62965533..cb775bf68 100644 --- a/packages/pi-ai/src/models.generated.ts +++ b/packages/pi-ai/src/models.generated.ts @@ -804,6 +804,23 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"bedrock-converse-stream">, + "minimax.minimax-m2.5": { + id: "minimax.minimax-m2.5", + name: "MiniMax M2.5", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 196608, + maxTokens: 98304, + } satisfies Model<"bedrock-converse-stream">, "mistral.devstral-2-123b": { id: "mistral.devstral-2-123b", name: "Devstral 2 123B", @@ -1042,6 +1059,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"bedrock-converse-stream">, + "nvidia.nemotron-super-3-120b": { + id: "nvidia.nemotron-super-3-120b", + name: "NVIDIA Nemotron 3 Super 120B A12B", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text"], + cost: { + input: 0.15, + output: 0.65, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 131072, + } satisfies Model<"bedrock-converse-stream">, "openai.gpt-oss-120b-1:0": { id: "openai.gpt-oss-120b-1:0", name: "gpt-oss-120b", @@ -1178,6 +1212,23 @@ export const MODELS = { contextWindow: 131072, maxTokens: 65536, } satisfies Model<"bedrock-converse-stream">, + "qwen.qwen3-coder-next": { + id: "qwen.qwen3-coder-next", + name: "Qwen3 Coder Next", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text"], + cost: { + input: 0.22, + output: 1.8, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 65536, + } satisfies Model<"bedrock-converse-stream">, "qwen.qwen3-next-80b-a3b": { id: "qwen.qwen3-next-80b-a3b", name: "Qwen/Qwen3-Next-80B-A3B-Instruct", @@ -1416,6 +1467,23 @@ export const MODELS = { contextWindow: 200000, maxTokens: 131072, } satisfies Model<"bedrock-converse-stream">, + "zai.glm-5": { + id: "zai.glm-5", + name: "GLM-5", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text"], + cost: { + input: 1, + output: 3.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 202752, + maxTokens: 101376, + } satisfies Model<"bedrock-converse-stream">, }, "anthropic": { "claude-3-5-haiku-20241022": { @@ -1503,23 +1571,6 @@ export const MODELS = { contextWindow: 200000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, - "claude-3-7-sonnet-latest": { - id: "claude-3-7-sonnet-latest", - name: "Claude Sonnet 3.7 (latest)", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"anthropic-messages">, "claude-3-haiku-20240307": { id: "claude-3-haiku-20240307", name: "Claude Haiku 3", @@ -2253,6 +2304,23 @@ export const MODELS = { contextWindow: 400000, maxTokens: 128000, } satisfies Model<"azure-openai-responses">, + "gpt-5.3-chat-latest": { + id: "gpt-5.3-chat-latest", + name: "GPT-5.3 Chat (latest)", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: false, + input: ["text", "image"], + cost: { + input: 1.75, + output: 14, + cacheRead: 0.175, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"azure-openai-responses">, "gpt-5.3-codex": { id: "gpt-5.3-codex", name: "GPT-5.3 Codex", @@ -2967,7 +3035,7 @@ export const MODELS = { } satisfies Model<"openai-responses">, "gpt-5.4-mini": { id: "gpt-5.4-mini", - name: "GPT-5.4 mini", + name: "GPT-5.4 Mini", api: "openai-responses", provider: "github-copilot", baseUrl: "https://api.individual.githubcopilot.com", @@ -3412,6 +3480,57 @@ export const MODELS = { contextWindow: 131072, maxTokens: 65536, } satisfies Model<"google-generative-ai">, + "gemma-3-27b-it": { + id: "gemma-3-27b-it", + name: "Gemma 3 27B", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"google-generative-ai">, + "gemma-4-26b-it": { + id: "gemma-4-26b-it", + name: "Gemma 4 26B", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 8192, + } satisfies Model<"google-generative-ai">, + "gemma-4-31b-it": { + id: "gemma-4-31b-it", + name: "Gemma 4 31B", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 8192, + } satisfies Model<"google-generative-ai">, }, "google-antigravity": { "claude-opus-4-5-thinking": { @@ -3913,6 +4032,40 @@ export const MODELS = { contextWindow: 8192, maxTokens: 8192, } satisfies Model<"openai-completions">, + "groq/compound": { + id: "groq/compound", + name: "Compound", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "groq/compound-mini": { + id: "groq/compound-mini", + name: "Compound Mini", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, "llama-3.1-8b-instant": { id: "llama-3.1-8b-instant", name: "Llama 3.1 8B Instant", @@ -4100,6 +4253,23 @@ export const MODELS = { contextWindow: 131072, maxTokens: 65536, } satisfies Model<"openai-completions">, + "openai/gpt-oss-safeguard-20b": { + id: "openai/gpt-oss-safeguard-20b", + name: "Safety GPT OSS 20B", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.075, + output: 0.3, + cacheRead: 0.037, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 65536, + } satisfies Model<"openai-completions">, "qwen-qwq-32b": { id: "qwen-qwq-32b", name: "Qwen QwQ 32B", @@ -4132,7 +4302,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 16384, + maxTokens: 40960, } satisfies Model<"openai-completions">, }, "huggingface": { @@ -4460,6 +4630,24 @@ export const MODELS = { contextWindow: 202752, maxTokens: 131072, } satisfies Model<"openai-completions">, + "zai-org/GLM-5.1": { + id: "zai-org/GLM-5.1", + name: "GLM-5.1", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: true, + input: ["text"], + cost: { + input: 1, + output: 3.2, + cacheRead: 0.2, + cacheWrite: 0, + }, + contextWindow: 202752, + maxTokens: 131072, + } satisfies Model<"openai-completions">, }, "kimi-coding": { "k2p5": { @@ -5029,22 +5217,39 @@ export const MODELS = { contextWindow: 128000, maxTokens: 16384, } satisfies Model<"mistral-conversations">, + "mistral-small-2603": { + id: "mistral-small-2603", + name: "Mistral Small 4", + api: "mistral-conversations", + provider: "mistral", + baseUrl: "https://api.mistral.ai", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 256000, + } satisfies Model<"mistral-conversations">, "mistral-small-latest": { id: "mistral-small-latest", name: "Mistral Small (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", - reasoning: false, + reasoning: true, input: ["text", "image"], cost: { - input: 0.1, - output: 0.3, + input: 0.15, + output: 0.6, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, - maxTokens: 16384, + contextWindow: 256000, + maxTokens: 256000, } satisfies Model<"mistral-conversations">, "open-mistral-7b": { id: "open-mistral-7b", @@ -5575,6 +5780,23 @@ export const MODELS = { contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, + "gpt-5.3-chat-latest": { + id: "gpt-5.3-chat-latest", + name: "GPT-5.3 Chat (latest)", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 1.75, + output: 14, + cacheRead: 0.175, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-responses">, "gpt-5.3-codex": { id: "gpt-5.3-codex", name: "GPT-5.3 Codex", @@ -6157,6 +6379,23 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"openai-completions">, + "glm-5.1": { + id: "glm-5.1", + name: "GLM-5.1", + api: "openai-completions", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + reasoning: true, + input: ["text"], + cost: { + input: 1.4, + output: 4.4, + cacheRead: 0.26, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "gpt-5": { id: "gpt-5", name: "GPT-5", @@ -6412,40 +6651,6 @@ export const MODELS = { contextWindow: 262144, maxTokens: 65536, } satisfies Model<"openai-completions">, - "mimo-v2-omni-free": { - id: "mimo-v2-omni-free", - name: "MiMo V2 Omni Free", - api: "openai-completions", - provider: "opencode", - baseUrl: "https://opencode.ai/zen/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 64000, - } satisfies Model<"openai-completions">, - "mimo-v2-pro-free": { - id: "mimo-v2-pro-free", - name: "MiMo V2 Pro Free", - api: "openai-completions", - provider: "opencode", - baseUrl: "https://opencode.ai/zen/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 64000, - } satisfies Model<"openai-completions">, "minimax-m2.5": { id: "minimax-m2.5", name: "MiniMax M2.5", @@ -6494,7 +6699,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 1000000, + contextWindow: 204800, maxTokens: 128000, } satisfies Model<"openai-completions">, }, @@ -6516,6 +6721,23 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"openai-completions">, + "glm-5.1": { + id: "glm-5.1", + name: "GLM-5.1", + api: "openai-completions", + provider: "opencode-go", + baseUrl: "https://opencode.ai/zen/go/v1", + reasoning: true, + input: ["text"], + cost: { + input: 1.4, + output: 4.4, + cacheRead: 0.26, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "kimi-k2.5": { id: "kimi-k2.5", name: "Kimi K2.5", @@ -6533,6 +6755,40 @@ export const MODELS = { contextWindow: 262144, maxTokens: 65536, } satisfies Model<"openai-completions">, + "mimo-v2-omni": { + id: "mimo-v2-omni", + name: "MiMo V2 Omni", + api: "openai-completions", + provider: "opencode-go", + baseUrl: "https://opencode.ai/zen/go/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.4, + output: 2, + cacheRead: 0.08, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "mimo-v2-pro": { + id: "mimo-v2-pro", + name: "MiMo V2 Pro", + api: "openai-completions", + provider: "opencode-go", + baseUrl: "https://opencode.ai/zen/go/v1", + reasoning: true, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0.2, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 64000, + } satisfies Model<"openai-completions">, "minimax-m2.5": { id: "minimax-m2.5", name: "MiniMax M2.5", @@ -6739,23 +6995,6 @@ export const MODELS = { contextWindow: 200000, maxTokens: 8192, } satisfies Model<"openai-completions">, - "anthropic/claude-3.5-sonnet": { - id: "anthropic/claude-3.5-sonnet", - name: "Anthropic: Claude 3.5 Sonnet", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 6, - output: 30, - cacheRead: 0.6, - cacheWrite: 7.5, - }, - contextWindow: 200000, - maxTokens: 8192, - } satisfies Model<"openai-completions">, "anthropic/claude-3.7-sonnet": { id: "anthropic/claude-3.7-sonnet", name: "Anthropic: Claude 3.7 Sonnet", @@ -6771,7 +7010,7 @@ export const MODELS = { cacheWrite: 3.75, }, contextWindow: 200000, - maxTokens: 64000, + maxTokens: 128000, } satisfies Model<"openai-completions">, "anthropic/claude-3.7-sonnet:thinking": { id: "anthropic/claude-3.7-sonnet:thinking", @@ -6875,6 +7114,23 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"openai-completions">, + "anthropic/claude-opus-4.6-fast": { + id: "anthropic/claude-opus-4.6-fast", + name: "Anthropic: Claude Opus 4.6 (Fast)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 30, + output: 150, + cacheRead: 3, + cacheWrite: 37.5, + }, + contextWindow: 1000000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, "anthropic/claude-sonnet-4": { id: "anthropic/claude-sonnet-4", name: "Anthropic: Claude Sonnet 4", @@ -6889,7 +7145,7 @@ export const MODELS = { cacheRead: 0.3, cacheWrite: 3.75, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"openai-completions">, "anthropic/claude-sonnet-4.5": { @@ -6943,6 +7199,23 @@ export const MODELS = { contextWindow: 131000, maxTokens: 4096, } satisfies Model<"openai-completions">, + "arcee-ai/trinity-large-thinking": { + id: "arcee-ai/trinity-large-thinking", + name: "Arcee AI: Trinity Large Thinking", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.22, + output: 0.85, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, "arcee-ai/trinity-mini": { id: "arcee-ai/trinity-mini", name: "Arcee AI: Trinity Mini", @@ -6960,23 +7233,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 131072, } satisfies Model<"openai-completions">, - "arcee-ai/trinity-mini:free": { - id: "arcee-ai/trinity-mini:free", - name: "Arcee AI: Trinity Mini (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "arcee-ai/virtuoso-large": { id: "arcee-ai/virtuoso-large", name: "Arcee AI: Virtuoso Large", @@ -7224,13 +7480,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.44999999999999996, + input: 0.5, output: 2.1500000000000004, - cacheRead: 0.22499999999999998, + cacheRead: 0.35, cacheWrite: 0, }, contextWindow: 163840, - maxTokens: 65536, + maxTokens: 4096, } satisfies Model<"openai-completions">, "deepseek/deepseek-v3.1-terminus": { id: "deepseek/deepseek-v3.1-terminus", @@ -7383,7 +7639,7 @@ export const MODELS = { cacheWrite: 0.08333333333333334, }, contextWindow: 1048576, - maxTokens: 65536, + maxTokens: 65535, } satisfies Model<"openai-completions">, "google/gemini-2.5-pro": { id: "google/gemini-2.5-pro", @@ -7453,23 +7709,6 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 65536, } satisfies Model<"openai-completions">, - "google/gemini-3-pro-preview": { - id: "google/gemini-3-pro-preview", - name: "Google: Gemini 3 Pro Preview", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 2, - output: 12, - cacheRead: 0.19999999999999998, - cacheWrite: 0.375, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"openai-completions">, "google/gemini-3.1-flash-lite-preview": { id: "google/gemini-3.1-flash-lite-preview", name: "Google: Gemini 3.1 Flash Lite Preview", @@ -7521,6 +7760,74 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 65536, } satisfies Model<"openai-completions">, + "google/gemma-4-26b-a4b-it": { + id: "google/gemma-4-26b-a4b-it", + name: "Google: Gemma 4 26B A4B ", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.12, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "google/gemma-4-26b-a4b-it:free": { + id: "google/gemma-4-26b-a4b-it:free", + name: "Google: Gemma 4 26B A4B (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "google/gemma-4-31b-it": { + id: "google/gemma-4-31b-it", + name: "Google: Gemma 4 31B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.14, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "google/gemma-4-31b-it:free": { + id: "google/gemma-4-31b-it:free", + name: "Google: Gemma 4 31B (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 32768, + } satisfies Model<"openai-completions">, "inception/mercury": { id: "inception/mercury", name: "Inception: Mercury", @@ -7572,22 +7879,22 @@ export const MODELS = { contextWindow: 128000, maxTokens: 32000, } satisfies Model<"openai-completions">, - "kwaipilot/kat-coder-pro": { - id: "kwaipilot/kat-coder-pro", - name: "Kwaipilot: KAT-Coder-Pro V1", + "kwaipilot/kat-coder-pro-v2": { + id: "kwaipilot/kat-coder-pro-v2", + name: "Kwaipilot: KAT-Coder-Pro V2", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: false, input: ["text"], cost: { - input: 0.207, - output: 0.828, - cacheRead: 0.0414, + input: 0.3, + output: 1.2, + cacheRead: 0.06, cacheWrite: 0, }, contextWindow: 256000, - maxTokens: 128000, + maxTokens: 80000, } satisfies Model<"openai-completions">, "meituan/longcat-flash-chat": { id: "meituan/longcat-flash-chat", @@ -7768,13 +8075,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.27, + input: 0.29, output: 0.95, - cacheRead: 0.0290000007, + cacheRead: 0.03, cacheWrite: 0, }, contextWindow: 196608, - maxTokens: 4096, + maxTokens: 196608, } satisfies Model<"openai-completions">, "minimax/minimax-m2.5": { id: "minimax/minimax-m2.5", @@ -7785,9 +8092,9 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.19999999999999998, - output: 1.17, - cacheRead: 0.09999999999999999, + input: 0.118, + output: 0.9900000000000001, + cacheRead: 0.059, cacheWrite: 0, }, contextWindow: 196608, @@ -7808,7 +8115,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 196608, - maxTokens: 196608, + maxTokens: 8192, } satisfies Model<"openai-completions">, "minimax/minimax-m2.7": { id: "minimax/minimax-m2.7", @@ -7821,11 +8128,11 @@ export const MODELS = { cost: { input: 0.3, output: 1.2, - cacheRead: 0.06, + cacheRead: 0.059, cacheWrite: 0, }, - contextWindow: 204800, - maxTokens: 131072, + contextWindow: 196608, + maxTokens: 4096, } satisfies Model<"openai-completions">, "mistralai/codestral-2508": { id: "mistralai/codestral-2508", @@ -8082,23 +8389,6 @@ export const MODELS = { contextWindow: 32768, maxTokens: 4096, } satisfies Model<"openai-completions">, - "mistralai/mistral-small-24b-instruct-2501": { - id: "mistralai/mistral-small-24b-instruct-2501", - name: "Mistral: Mistral Small 3", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.049999999999999996, - output: 0.08, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 16384, - } satisfies Model<"openai-completions">, "mistralai/mistral-small-2603": { id: "mistralai/mistral-small-2603", name: "Mistral: Mistral Small 4", @@ -8116,23 +8406,6 @@ export const MODELS = { contextWindow: 262144, maxTokens: 4096, } satisfies Model<"openai-completions">, - "mistralai/mistral-small-3.1-24b-instruct:free": { - id: "mistralai/mistral-small-3.1-24b-instruct:free", - name: "Mistral: Mistral Small 3.1 24B (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "mistralai/mistral-small-3.2-24b-instruct": { id: "mistralai/mistral-small-3.2-24b-instruct", name: "Mistral: Mistral Small 3.2 24B", @@ -8244,13 +8517,13 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.55, - output: 2.2, + input: 0.5700000000000001, + output: 2.3, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131000, - maxTokens: 4096, + contextWindow: 131072, + maxTokens: 131072, } satisfies Model<"openai-completions">, "moonshotai/kimi-k2-0905": { id: "moonshotai/kimi-k2-0905", @@ -8263,11 +8536,11 @@ export const MODELS = { cost: { input: 0.39999999999999997, output: 2, - cacheRead: 0.15, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 4096, + contextWindow: 262144, + maxTokens: 262144, } satisfies Model<"openai-completions">, "moonshotai/kimi-k2-thinking": { id: "moonshotai/kimi-k2-thinking", @@ -8278,12 +8551,12 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.47, - output: 2, - cacheRead: 0.14100000000000001, + input: 0.6, + output: 2.5, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, + contextWindow: 262144, maxTokens: 4096, } satisfies Model<"openai-completions">, "moonshotai/kimi-k2.5": { @@ -8312,8 +8585,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.27, - output: 1, + input: 0.135, + output: 0.5, cacheRead: 0, cacheWrite: 0, }, @@ -8399,7 +8672,7 @@ export const MODELS = { cost: { input: 0.09999999999999999, output: 0.5, - cacheRead: 0.04, + cacheRead: 0.09999999999999999, cacheWrite: 0, }, contextWindow: 262144, @@ -8624,7 +8897,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 1047576, - maxTokens: 32768, + maxTokens: 4096, } satisfies Model<"openai-completions">, "openai/gpt-4.1-mini": { id: "openai/gpt-4.1-mini", @@ -8671,7 +8944,7 @@ export const MODELS = { cost: { input: 2.5, output: 10, - cacheRead: 1.25, + cacheRead: 0, cacheWrite: 0, }, contextWindow: 128000, @@ -8892,11 +9165,11 @@ export const MODELS = { cost: { input: 0.049999999999999996, output: 0.39999999999999997, - cacheRead: 0.005, + cacheRead: 0.01, cacheWrite: 0, }, contextWindow: 400000, - maxTokens: 128000, + maxTokens: 4096, } satisfies Model<"openai-completions">, "openai/gpt-5-pro": { id: "openai/gpt-5-pro", @@ -8926,7 +9199,7 @@ export const MODELS = { cost: { input: 1.25, output: 10, - cacheRead: 0.125, + cacheRead: 0.13, cacheWrite: 0, }, contextWindow: 400000, @@ -8994,11 +9267,11 @@ export const MODELS = { cost: { input: 0.25, output: 2, - cacheRead: 0.024999999999999998, + cacheRead: 0.03, cacheWrite: 0, }, contextWindow: 400000, - maxTokens: 100000, + maxTokens: 128000, } satisfies Model<"openai-completions">, "openai/gpt-5.2": { id: "openai/gpt-5.2", @@ -9032,7 +9305,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 128000, - maxTokens: 16384, + maxTokens: 32000, } satisfies Model<"openai-completions">, "openai/gpt-5.2-codex": { id: "openai/gpt-5.2-codex", @@ -9170,6 +9443,40 @@ export const MODELS = { contextWindow: 1050000, maxTokens: 128000, } satisfies Model<"openai-completions">, + "openai/gpt-audio": { + id: "openai/gpt-audio", + name: "OpenAI: GPT Audio", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2.5, + output: 10, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "openai/gpt-audio-mini": { + id: "openai/gpt-audio-mini", + name: "OpenAI: GPT Audio Mini", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.6, + output: 2.4, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, "openai/gpt-oss-120b": { id: "openai/gpt-oss-120b", name: "OpenAI: gpt-oss-120b", @@ -9214,12 +9521,12 @@ export const MODELS = { input: ["text"], cost: { input: 0.03, - output: 0.11, - cacheRead: 0.015, + output: 0.14, + cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 131072, + maxTokens: 4096, } satisfies Model<"openai-completions">, "openai/gpt-oss-20b:free": { id: "openai/gpt-oss-20b:free", @@ -9236,7 +9543,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 131072, + maxTokens: 8192, } satisfies Model<"openai-completions">, "openai/gpt-oss-safeguard-20b": { id: "openai/gpt-oss-safeguard-20b", @@ -9491,7 +9798,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 32768, - maxTokens: 4096, + maxTokens: 32768, } satisfies Model<"openai-completions">, "qwen/qwen-max": { id: "qwen/qwen-max", @@ -9731,23 +10038,6 @@ export const MODELS = { contextWindow: 40960, maxTokens: 40960, } satisfies Model<"openai-completions">, - "qwen/qwen3-4b:free": { - id: "qwen/qwen3-4b:free", - name: "Qwen: Qwen3 4B (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 40960, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "qwen/qwen3-8b": { id: "qwen/qwen3-8b", name: "Qwen: Qwen3 8B", @@ -9825,13 +10115,13 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.12, - output: 0.75, - cacheRead: 0.06, + input: 0.15, + output: 0.7999999999999999, + cacheRead: 0.12, cacheWrite: 0, }, contextWindow: 262144, - maxTokens: 65536, + maxTokens: 262144, } satisfies Model<"openai-completions">, "qwen/qwen3-coder-plus": { id: "qwen/qwen3-coder-plus", @@ -10154,7 +10444,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 256000, - maxTokens: 4096, + maxTokens: 32768, } satisfies Model<"openai-completions">, "qwen/qwen3.5-flash-02-23": { id: "qwen/qwen3.5-flash-02-23", @@ -10190,6 +10480,23 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 65536, } satisfies Model<"openai-completions">, + "qwen/qwen3.6-plus": { + id: "qwen/qwen3.6-plus", + name: "Qwen: Qwen3.6 Plus", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.325, + output: 1.95, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 65536, + } satisfies Model<"openai-completions">, "qwen/qwq-32b": { id: "qwen/qwq-32b", name: "Qwen: QwQ 32B", @@ -10207,6 +10514,23 @@ export const MODELS = { contextWindow: 131072, maxTokens: 131072, } satisfies Model<"openai-completions">, + "rekaai/reka-edge": { + id: "rekaai/reka-edge", + name: "Reka Edge", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.09999999999999999, + output: 0.09999999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 16384, + maxTokens: 16384, + } satisfies Model<"openai-completions">, "relace/relace-search": { id: "relace/relace-search", name: "Relace: Relace Search", @@ -10269,28 +10593,11 @@ export const MODELS = { cost: { input: 0.09999999999999999, output: 0.3, - cacheRead: 0.02, - cacheWrite: 0, - }, - contextWindow: 256000, - maxTokens: 256000, - } satisfies Model<"openai-completions">, - "stepfun/step-3.5-flash:free": { - id: "stepfun/step-3.5-flash:free", - name: "StepFun: Step 3.5 Flash (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 256000, - maxTokens: 256000, + contextWindow: 262144, + maxTokens: 65536, } satisfies Model<"openai-completions">, "thedrummer/rocinante-12b": { id: "thedrummer/rocinante-12b", @@ -10479,9 +10786,9 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 30000, } satisfies Model<"openai-completions">, - "x-ai/grok-4.20-beta": { - id: "x-ai/grok-4.20-beta", - name: "xAI: Grok 4.20 Beta", + "x-ai/grok-4.20": { + id: "x-ai/grok-4.20", + name: "xAI: Grok 4.20", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", @@ -10743,9 +11050,43 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.96, - output: 3.1999999999999997, - cacheRead: 0.192, + input: 1.2, + output: 4, + cacheRead: 0.24, + cacheWrite: 0, + }, + contextWindow: 202752, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "z-ai/glm-5.1": { + id: "z-ai/glm-5.1", + name: "Z.ai: GLM 5.1", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.95, + output: 3.15, + cacheRead: 0.475, + cacheWrite: 0, + }, + contextWindow: 202752, + maxTokens: 65535, + } satisfies Model<"openai-completions">, + "z-ai/glm-5v-turbo": { + id: "z-ai/glm-5v-turbo", + name: "Z.ai: GLM 5V Turbo", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.2, + output: 4, + cacheRead: 0.24, cacheWrite: 0, }, contextWindow: 202752, @@ -10772,20 +11113,20 @@ export const MODELS = { } satisfies Model<"anthropic-messages">, "alibaba/qwen-3-235b": { id: "alibaba/qwen-3-235b", - name: "Qwen3-235B-A22B", + name: "Qwen3 235B A22b Instruct 2507", api: "anthropic-messages", provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: false, input: ["text"], cost: { - input: 0.071, - output: 0.463, - cacheRead: 0, + input: 0.6, + output: 1.2, + cacheRead: 0.6, cacheWrite: 0, }, - contextWindow: 40960, - maxTokens: 16384, + contextWindow: 131000, + maxTokens: 40000, } satisfies Model<"anthropic-messages">, "alibaba/qwen-3-30b": { id: "alibaba/qwen-3-30b", @@ -10813,13 +11154,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.29, - output: 0.59, - cacheRead: 0.145, + input: 0.16, + output: 0.64, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 40960, + contextWindow: 128000, + maxTokens: 8192, } satisfies Model<"anthropic-messages">, "alibaba/qwen3-235b-a22b-thinking": { id: "alibaba/qwen3-235b-a22b-thinking", @@ -10847,13 +11188,13 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.39999999999999997, - output: 1.5999999999999999, - cacheRead: 0.022, + input: 1.5, + output: 7.5, + cacheRead: 0.3, cacheWrite: 0, }, contextWindow: 262144, - maxTokens: 66536, + maxTokens: 65536, } satisfies Model<"anthropic-messages">, "alibaba/qwen3-coder-30b-a3b": { id: "alibaba/qwen3-coder-30b-a3b", @@ -10966,13 +11307,13 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.22, - output: 0.88, + input: 0.39999999999999997, + output: 4, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 256000, - maxTokens: 256000, + contextWindow: 131072, + maxTokens: 32768, } satisfies Model<"anthropic-messages">, "alibaba/qwen3.5-flash": { id: "alibaba/qwen3.5-flash", @@ -11008,6 +11349,23 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, + "alibaba/qwen3.6-plus": { + id: "alibaba/qwen3.6-plus", + name: "Qwen 3.6 Plus", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.5, + output: 3, + cacheRead: 0.09999999999999999, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 64000, + } satisfies Model<"anthropic-messages">, "anthropic/claude-3-haiku": { id: "anthropic/claude-3-haiku", name: "Claude 3 Haiku", @@ -11042,40 +11400,6 @@ export const MODELS = { contextWindow: 200000, maxTokens: 8192, } satisfies Model<"anthropic-messages">, - "anthropic/claude-3.5-sonnet": { - id: "anthropic/claude-3.5-sonnet", - name: "Claude 3.5 Sonnet", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: false, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 8192, - } satisfies Model<"anthropic-messages">, - "anthropic/claude-3.5-sonnet-20240620": { - id: "anthropic/claude-3.5-sonnet-20240620", - name: "Claude 3.5 Sonnet (2024-06-20)", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: false, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 8192, - } satisfies Model<"anthropic-messages">, "anthropic/claude-3.7-sonnet": { id: "anthropic/claude-3.7-sonnet", name: "Claude 3.7 Sonnet", @@ -11246,6 +11570,23 @@ export const MODELS = { contextWindow: 131000, maxTokens: 131000, } satisfies Model<"anthropic-messages">, + "arcee-ai/trinity-large-thinking": { + id: "arcee-ai/trinity-large-thinking", + name: "Trinity Large Thinking", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 0.25, + output: 0.8999999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262100, + maxTokens: 80000, + } satisfies Model<"anthropic-messages">, "bytedance/seed-1.6": { id: "bytedance/seed-1.6", name: "Seed 1.6", @@ -11323,13 +11664,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.5, - output: 1.5, - cacheRead: 0, + input: 0.56, + output: 1.68, + cacheRead: 0.28, cacheWrite: 0, }, contextWindow: 163840, - maxTokens: 16384, + maxTokens: 8192, } satisfies Model<"anthropic-messages">, "deepseek/deepseek-v3.1-terminus": { id: "deepseek/deepseek-v3.1-terminus", @@ -11512,7 +11853,7 @@ export const MODELS = { cost: { input: 0.25, output: 1.5, - cacheRead: 0, + cacheRead: 0.03, cacheWrite: 0, }, contextWindow: 1000000, @@ -11535,6 +11876,40 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, + "google/gemma-4-26b-a4b-it": { + id: "google/gemma-4-26b-a4b-it", + name: "Gemma 4 26B A4B IT", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.13, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, + "google/gemma-4-31b-it": { + id: "google/gemma-4-31b-it", + name: "Gemma 4 31B IT", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.14, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, "inception/mercury-2": { id: "inception/mercury-2", name: "Mercury 2", @@ -11569,6 +11944,23 @@ export const MODELS = { contextWindow: 32000, maxTokens: 16384, } satisfies Model<"anthropic-messages">, + "kwaipilot/kat-coder-pro-v2": { + id: "kwaipilot/kat-coder-pro-v2", + name: "Kat Coder Pro V2", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0.06, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 256000, + } satisfies Model<"anthropic-messages">, "meituan/longcat-flash-chat": { id: "meituan/longcat-flash-chat", name: "LongCat Flash Chat", @@ -11586,23 +11978,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 100000, } satisfies Model<"anthropic-messages">, - "meituan/longcat-flash-thinking": { - id: "meituan/longcat-flash-thinking", - name: "LongCat Flash Thinking", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, - input: ["text"], - cost: { - input: 0.15, - output: 1.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 8192, - } satisfies Model<"anthropic-messages">, "meta/llama-3.1-70b": { id: "meta/llama-3.1-70b", name: "Llama 3.1 70B Instruct", @@ -11629,13 +12004,13 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.09999999999999999, - output: 0.09999999999999999, - cacheRead: 0.09999999999999999, + input: 0.22, + output: 0.22, + cacheRead: 0, cacheWrite: 0, }, contextWindow: 128000, - maxTokens: 16384, + maxTokens: 8192, } satisfies Model<"anthropic-messages">, "meta/llama-3.2-11b": { id: "meta/llama-3.2-11b", @@ -12013,20 +12388,20 @@ export const MODELS = { } satisfies Model<"anthropic-messages">, "moonshotai/kimi-k2": { id: "moonshotai/kimi-k2", - name: "Kimi K2", + name: "Kimi K2 Instruct", api: "anthropic-messages", provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: false, input: ["text"], cost: { - input: 0.6, - output: 2.5, - cacheRead: 0.15, + input: 0.5700000000000001, + output: 2.3, + cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 16384, + maxTokens: 131072, } satisfies Model<"anthropic-messages">, "moonshotai/kimi-k2-0905": { id: "moonshotai/kimi-k2-0905", @@ -12039,11 +12414,11 @@ export const MODELS = { cost: { input: 0.6, output: 2.5, - cacheRead: 0.15, + cacheRead: 0.3, cacheWrite: 0, }, contextWindow: 256000, - maxTokens: 16384, + maxTokens: 128000, } satisfies Model<"anthropic-messages">, "moonshotai/kimi-k2-thinking": { id: "moonshotai/kimi-k2-thinking", @@ -12615,12 +12990,12 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.07, - output: 0.3, + input: 0.049999999999999996, + output: 0.19999999999999998, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 131072, maxTokens: 8192, } satisfies Model<"anthropic-messages">, "openai/gpt-oss-safeguard-20b": { @@ -12751,8 +13126,8 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 1, - output: 1, + input: 0, + output: 0, cacheRead: 0, cacheWrite: 0, }, @@ -12768,8 +13143,8 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 3, - output: 15, + input: 0, + output: 0, cacheRead: 0, cacheWrite: 0, }, @@ -12793,23 +13168,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 131072, } satisfies Model<"anthropic-messages">, - "xai/grok-2-vision": { - id: "xai/grok-2-vision", - name: "Grok 2 Vision", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: false, - input: ["text", "image"], - cost: { - input: 2, - output: 10, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 32768, - } satisfies Model<"anthropic-messages">, "xai/grok-3": { id: "xai/grok-3", name: "Grok 3 Beta", @@ -12963,6 +13321,23 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 30000, } satisfies Model<"anthropic-messages">, + "xai/grok-4.20-multi-agent": { + id: "xai/grok-4.20-multi-agent", + name: "Grok 4.20 Multi-Agent", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 2, + output: 6, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 2000000, + } satisfies Model<"anthropic-messages">, "xai/grok-4.20-multi-agent-beta": { id: "xai/grok-4.20-multi-agent-beta", name: "Grok 4.20 Multi Agent Beta", @@ -12980,6 +13355,23 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 2000000, } satisfies Model<"anthropic-messages">, + "xai/grok-4.20-non-reasoning": { + id: "xai/grok-4.20-non-reasoning", + name: "Grok 4.20 Non-Reasoning", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 2000000, + } satisfies Model<"anthropic-messages">, "xai/grok-4.20-non-reasoning-beta": { id: "xai/grok-4.20-non-reasoning-beta", name: "Grok 4.20 Beta Non-Reasoning", @@ -12997,6 +13389,23 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 2000000, } satisfies Model<"anthropic-messages">, + "xai/grok-4.20-reasoning": { + id: "xai/grok-4.20-reasoning", + name: "Grok 4.20 Reasoning", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 2000000, + } satisfies Model<"anthropic-messages">, "xai/grok-4.20-reasoning-beta": { id: "xai/grok-4.20-reasoning-beta", name: "Grok 4.20 Beta Reasoning", @@ -13040,9 +13449,9 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.09999999999999999, - output: 0.3, - cacheRead: 0.02, + input: 0.09, + output: 0.29, + cacheRead: 0.045, cacheWrite: 0, }, contextWindow: 262144, @@ -13176,13 +13585,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.6, - output: 2.2, - cacheRead: 0, + input: 2.25, + output: 2.75, + cacheRead: 2.25, cacheWrite: 0, }, - contextWindow: 200000, - maxTokens: 120000, + contextWindow: 131000, + maxTokens: 40000, } satisfies Model<"anthropic-messages">, "zai/glm-4.7-flash": { id: "zai/glm-4.7-flash", @@ -13252,6 +13661,40 @@ export const MODELS = { contextWindow: 202800, maxTokens: 131100, } satisfies Model<"anthropic-messages">, + "zai/glm-5.1": { + id: "zai/glm-5.1", + name: "GLM 5.1", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 1.4, + output: 4.4, + cacheRead: 0.26, + cacheWrite: 0, + }, + contextWindow: 202800, + maxTokens: 64000, + } satisfies Model<"anthropic-messages">, + "zai/glm-5v-turbo": { + id: "zai/glm-5v-turbo", + name: "GLM 5V Turbo", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.2, + output: 4, + cacheRead: 0.24, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 128000, + } satisfies Model<"anthropic-messages">, }, "xai": { "grok-2": { @@ -13808,6 +14251,24 @@ export const MODELS = { contextWindow: 200000, maxTokens: 131072, } satisfies Model<"openai-completions">, + "glm-4.7-flashx": { + id: "glm-4.7-flashx", + name: "GLM-4.7-FlashX", + api: "openai-completions", + provider: "zai", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"}, + reasoning: true, + input: ["text"], + cost: { + input: 0.07, + output: 0.4, + cacheRead: 0.01, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "glm-5": { id: "glm-5", name: "GLM-5", @@ -13844,5 +14305,41 @@ export const MODELS = { contextWindow: 200000, maxTokens: 131072, } satisfies Model<"openai-completions">, + "glm-5.1": { + id: "glm-5.1", + name: "GLM-5.1", + api: "openai-completions", + provider: "zai", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"}, + reasoning: true, + input: ["text"], + cost: { + input: 1.4, + output: 4.4, + cacheRead: 0.26, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "glm-5v-turbo": { + id: "glm-5v-turbo", + name: "glm-5v-turbo", + api: "openai-completions", + provider: "zai", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.2, + output: 4, + cacheRead: 0.24, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 131072, + } satisfies Model<"openai-completions">, }, } as const;