feat: add anthropic-vertex provider for Claude on Vertex AI (#1533)

* feat: add anthropic-vertex provider for Claude models on Google Vertex AI

Add a new anthropic-vertex provider that enables using Claude models
(Opus 4.6, Sonnet 4.6, Haiku 4.5) through Google Vertex AI using the
@anthropic-ai/vertex-sdk package. Follows the same pattern as the
existing google/google-vertex provider split.

Detection uses ANTHROPIC_VERTEX_PROJECT_ID (same env var as Claude Code)
with CLOUD_ML_REGION for region selection, falling back to us-central1.

Extracts shared Anthropic utilities into anthropic-shared.ts (message
conversion, tool conversion, param building, stream processing) to
avoid duplication between anthropic.ts and anthropic-vertex.ts.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* feat: add full Claude model set for anthropic-vertex provider

Add 200K context window variants for Opus 4.6 and Sonnet 4.6, plus
older models (Sonnet 4.5, Sonnet 4, Opus 4.5, Opus 4.1, Opus 4, Haiku 4.5).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: add @anthropic-ai/vertex-sdk to root dependencies

Required for the published package to resolve the vertex SDK at runtime.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* chore: remove unnecessary comments to match codebase style

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: remove duplicate stream functions after rebase

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Nathan Roe <nathan.roe@carvana.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Nathan Roe 2026-03-20 01:14:13 -04:00 committed by GitHub
parent 111537f460
commit 39cd932abb
14 changed files with 1236 additions and 873 deletions

View file

@ -629,7 +629,7 @@ GSD isn't locked to one provider. It runs on the [Pi SDK](https://github.com/bad
### Built-in Providers
Anthropic, OpenAI, Google (Gemini), OpenRouter, GitHub Copilot, Amazon Bedrock, Azure OpenAI, Google Vertex, Groq, Cerebras, Mistral, xAI, HuggingFace, Vercel AI Gateway, and more.
Anthropic, Anthropic (Vertex AI), OpenAI, Google (Gemini), OpenRouter, GitHub Copilot, Amazon Bedrock, Azure OpenAI, Google Vertex, Groq, Cerebras, Mistral, xAI, HuggingFace, Vercel AI Gateway, and more.
### OAuth / Max Plans

View file

@ -12,7 +12,7 @@ Pi isn't locked to one provider. It supports 20+ providers out of the box and le
- Google Antigravity
**API keys (via environment variables):**
- Anthropic, OpenAI, Azure OpenAI, Google Gemini, Google Vertex, Amazon Bedrock
- Anthropic, Anthropic (Vertex AI), OpenAI, Azure OpenAI, Google Gemini, Google Vertex, Amazon Bedrock
- Mistral, Groq, Cerebras, xAI, OpenRouter, Vercel AI Gateway
- ZAI, OpenCode Zen, OpenCode Go, Hugging Face, Kimi, MiniMax

71
package-lock.json generated
View file

@ -1,12 +1,12 @@
{
"name": "gsd-pi",
"version": "2.31.2",
"version": "2.33.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "gsd-pi",
"version": "2.31.2",
"version": "2.33.1",
"hasInstallScript": true,
"license": "MIT",
"workspaces": [
@ -15,6 +15,7 @@
],
"dependencies": {
"@anthropic-ai/sdk": "^0.73.0",
"@anthropic-ai/vertex-sdk": "^0.14.4",
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
"@clack/prompts": "^1.1.0",
"@google/genai": "^1.40.0",
@ -96,6 +97,56 @@
}
}
},
"node_modules/@anthropic-ai/vertex-sdk": {
"version": "0.14.4",
"resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.14.4.tgz",
"integrity": "sha512-BZUPRWghZxfSFtAxU563wH+jfWBPoedAwsVxG35FhmNsjeV8tyfN+lFriWhCpcZApxA4NdT6Soov+PzfnxxD5g==",
"license": "MIT",
"dependencies": {
"@anthropic-ai/sdk": ">=0.50.3 <1",
"google-auth-library": "^9.4.2"
}
},
"node_modules/@anthropic-ai/vertex-sdk/node_modules/gcp-metadata": {
"version": "6.1.1",
"resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.1.tgz",
"integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==",
"license": "Apache-2.0",
"dependencies": {
"gaxios": "^6.1.1",
"google-logging-utils": "^0.0.2",
"json-bigint": "^1.0.0"
},
"engines": {
"node": ">=14"
}
},
"node_modules/@anthropic-ai/vertex-sdk/node_modules/google-auth-library": {
"version": "9.15.1",
"resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.1.tgz",
"integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==",
"license": "Apache-2.0",
"dependencies": {
"base64-js": "^1.3.0",
"ecdsa-sig-formatter": "^1.0.11",
"gaxios": "^6.1.1",
"gcp-metadata": "^6.1.0",
"gtoken": "^7.0.0",
"jws": "^4.0.0"
},
"engines": {
"node": ">=14"
}
},
"node_modules/@anthropic-ai/vertex-sdk/node_modules/google-logging-utils": {
"version": "0.0.2",
"resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-0.0.2.tgz",
"integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==",
"license": "Apache-2.0",
"engines": {
"node": ">=14"
}
},
"node_modules/@aws-crypto/crc32": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/@aws-crypto/crc32/-/crc32-5.2.0.tgz",
@ -5865,6 +5916,19 @@
"integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==",
"license": "ISC"
},
"node_modules/gtoken": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz",
"integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==",
"license": "MIT",
"dependencies": {
"gaxios": "^6.0.0",
"jws": "^4.0.0"
},
"engines": {
"node": ">=14.0.0"
}
},
"node_modules/has-flag": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
@ -9066,6 +9130,7 @@
"version": "0.57.1",
"dependencies": {
"@anthropic-ai/sdk": "^0.73.0",
"@anthropic-ai/vertex-sdk": "^0.14.4",
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
"@google/genai": "^1.40.0",
"@mistralai/mistralai": "^1.14.1",
@ -9101,7 +9166,7 @@
},
"packages/pi-coding-agent": {
"name": "@gsd/pi-coding-agent",
"version": "2.31.2",
"version": "2.33.1",
"dependencies": {
"@mariozechner/jiti": "^2.6.2",
"@silvia-odwyer/photon-node": "^0.3.4",

View file

@ -84,6 +84,7 @@
},
"dependencies": {
"@anthropic-ai/sdk": "^0.73.0",
"@anthropic-ai/vertex-sdk": "^0.14.4",
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
"@clack/prompts": "^1.1.0",
"@google/genai": "^1.40.0",

View file

@ -24,6 +24,7 @@
},
"dependencies": {
"@anthropic-ai/sdk": "^0.73.0",
"@anthropic-ai/vertex-sdk": "^0.14.4",
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
"@google/genai": "^1.40.0",
"@mistralai/mistralai": "^1.14.1",

View file

@ -73,6 +73,20 @@ export function getEnvApiKey(provider: any): string | undefined {
return process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
}
// Anthropic on Vertex AI uses Application Default Credentials.
// Detected via ANTHROPIC_VERTEX_PROJECT_ID (same env var as Claude Code).
if (provider === "anthropic-vertex") {
const hasProject = !!process.env.ANTHROPIC_VERTEX_PROJECT_ID;
if (hasProject) {
return "<authenticated>";
}
// Fall back to Google Cloud project env vars
const hasGoogleProject = !!(process.env.GOOGLE_CLOUD_PROJECT || process.env.GCLOUD_PROJECT);
if (hasGoogleProject && hasVertexAdcCredentials()) {
return "<authenticated>";
}
}
// Vertex AI uses Application Default Credentials, not API keys.
// Auth is configured via `gcloud auth application-default login`.
if (provider === "google-vertex") {

View file

@ -1827,6 +1827,178 @@ export const MODELS = {
maxTokens: 64000,
} satisfies Model<"anthropic-messages">,
},
"anthropic-vertex": {
"claude-opus-4-6": {
id: "claude-opus-4-6",
name: "Claude Opus 4.6 (Vertex)",
api: "anthropic-vertex",
provider: "anthropic-vertex",
baseUrl: "https://us-central1-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 15,
output: 75,
cacheRead: 1.5,
cacheWrite: 18.75,
},
contextWindow: 200000,
maxTokens: 128000,
} satisfies Model<"anthropic-vertex">,
"claude-opus-4-6[1m]": {
id: "claude-opus-4-6[1m]",
name: "Claude Opus 4.6 1M (Vertex)",
api: "anthropic-vertex",
provider: "anthropic-vertex",
baseUrl: "https://us-central1-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 15,
output: 75,
cacheRead: 1.5,
cacheWrite: 18.75,
},
contextWindow: 1000000,
maxTokens: 128000,
} satisfies Model<"anthropic-vertex">,
"claude-sonnet-4-6": {
id: "claude-sonnet-4-6",
name: "Claude Sonnet 4.6 (Vertex)",
api: "anthropic-vertex",
provider: "anthropic-vertex",
baseUrl: "https://us-central1-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 3,
output: 15,
cacheRead: 0.3,
cacheWrite: 3.75,
},
contextWindow: 200000,
maxTokens: 64000,
} satisfies Model<"anthropic-vertex">,
"claude-sonnet-4-6[1m]": {
id: "claude-sonnet-4-6[1m]",
name: "Claude Sonnet 4.6 1M (Vertex)",
api: "anthropic-vertex",
provider: "anthropic-vertex",
baseUrl: "https://us-central1-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 3,
output: 15,
cacheRead: 0.3,
cacheWrite: 3.75,
},
contextWindow: 1000000,
maxTokens: 64000,
} satisfies Model<"anthropic-vertex">,
"claude-sonnet-4-5@20250929": {
id: "claude-sonnet-4-5@20250929",
name: "Claude Sonnet 4.5 (Vertex)",
api: "anthropic-vertex",
provider: "anthropic-vertex",
baseUrl: "https://us-central1-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 3,
output: 15,
cacheRead: 0.3,
cacheWrite: 3.75,
},
contextWindow: 200000,
maxTokens: 64000,
} satisfies Model<"anthropic-vertex">,
"claude-sonnet-4@20250514": {
id: "claude-sonnet-4@20250514",
name: "Claude Sonnet 4 (Vertex)",
api: "anthropic-vertex",
provider: "anthropic-vertex",
baseUrl: "https://us-central1-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 3,
output: 15,
cacheRead: 0.3,
cacheWrite: 3.75,
},
contextWindow: 200000,
maxTokens: 64000,
} satisfies Model<"anthropic-vertex">,
"claude-opus-4-5@20251101": {
id: "claude-opus-4-5@20251101",
name: "Claude Opus 4.5 (Vertex)",
api: "anthropic-vertex",
provider: "anthropic-vertex",
baseUrl: "https://us-central1-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 15,
output: 75,
cacheRead: 1.5,
cacheWrite: 18.75,
},
contextWindow: 200000,
maxTokens: 32000,
} satisfies Model<"anthropic-vertex">,
"claude-opus-4-1@20250805": {
id: "claude-opus-4-1@20250805",
name: "Claude Opus 4.1 (Vertex)",
api: "anthropic-vertex",
provider: "anthropic-vertex",
baseUrl: "https://us-central1-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 15,
output: 75,
cacheRead: 1.5,
cacheWrite: 18.75,
},
contextWindow: 200000,
maxTokens: 32000,
} satisfies Model<"anthropic-vertex">,
"claude-opus-4@20250514": {
id: "claude-opus-4@20250514",
name: "Claude Opus 4 (Vertex)",
api: "anthropic-vertex",
provider: "anthropic-vertex",
baseUrl: "https://us-central1-aiplatform.googleapis.com",
reasoning: true,
input: ["text", "image"],
cost: {
input: 15,
output: 75,
cacheRead: 1.5,
cacheWrite: 18.75,
},
contextWindow: 200000,
maxTokens: 32000,
} satisfies Model<"anthropic-vertex">,
"claude-haiku-4-5@20251001": {
id: "claude-haiku-4-5@20251001",
name: "Claude Haiku 4.5 (Vertex)",
api: "anthropic-vertex",
provider: "anthropic-vertex",
baseUrl: "https://us-central1-aiplatform.googleapis.com",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.8,
output: 4,
cacheRead: 0.08,
cacheWrite: 1,
},
contextWindow: 200000,
maxTokens: 8192,
} satisfies Model<"anthropic-vertex">,
},
"azure-openai-responses": {
"codex-mini-latest": {
id: "codex-mini-latest",

View file

@ -0,0 +1,761 @@
/**
* Shared utilities for Anthropic providers (direct API and Vertex AI).
*/
import type Anthropic from "@anthropic-ai/sdk";
import type {
ContentBlockParam,
MessageCreateParamsStreaming,
MessageParam,
} from "@anthropic-ai/sdk/resources/messages.js";
import { calculateCost } from "../models.js";
import type {
Api,
AssistantMessage,
CacheRetention,
Context,
ImageContent,
Message,
Model,
ServerToolUseContent,
StopReason,
StreamOptions,
TextContent,
ThinkingContent,
Tool,
ToolCall,
ToolResultMessage,
WebSearchResultContent,
} from "../types.js";
/** API types that use the Anthropic Messages protocol */
export type AnthropicApi = "anthropic-messages" | "anthropic-vertex";
import type { AssistantMessageEventStream } from "../utils/event-stream.js";
import { parseStreamingJson } from "../utils/json-parse.js";
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
import { transformMessages } from "./transform-messages.js";
export type AnthropicEffort = "low" | "medium" | "high" | "max";
export interface AnthropicOptions extends StreamOptions {
thinkingEnabled?: boolean;
thinkingBudgetTokens?: number;
effort?: AnthropicEffort;
interleavedThinking?: boolean;
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
}
const claudeCodeTools = [
"Read",
"Write",
"Edit",
"Bash",
"Grep",
"Glob",
"AskUserQuestion",
"EnterPlanMode",
"ExitPlanMode",
"KillShell",
"NotebookEdit",
"Skill",
"Task",
"TaskOutput",
"TodoWrite",
"WebFetch",
"WebSearch",
];
const ccToolLookup = new Map(claudeCodeTools.map((t) => [t.toLowerCase(), t]));
export const toClaudeCodeName = (name: string) => ccToolLookup.get(name.toLowerCase()) ?? name;
export const fromClaudeCodeName = (name: string, tools?: Tool[]) => {
if (tools && tools.length > 0) {
const lowerName = name.toLowerCase();
const matchedTool = tools.find((tool) => tool.name.toLowerCase() === lowerName);
if (matchedTool) return matchedTool.name;
}
return name;
};
function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention {
if (cacheRetention) {
return cacheRetention;
}
if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") {
return "long";
}
return "short";
}
export function getCacheControl(
baseUrl: string,
cacheRetention?: CacheRetention,
): { retention: CacheRetention; cacheControl?: { type: "ephemeral"; ttl?: "1h" } } {
const retention = resolveCacheRetention(cacheRetention);
if (retention === "none") {
return { retention };
}
const ttl = retention === "long" && baseUrl.includes("api.anthropic.com") ? "1h" : undefined;
return {
retention,
cacheControl: { type: "ephemeral", ...(ttl && { ttl }) },
};
}
export function convertContentBlocks(content: (TextContent | ImageContent)[]):
| string
| Array<
| { type: "text"; text: string }
| {
type: "image";
source: {
type: "base64";
media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp";
data: string;
};
}
> {
const hasImages = content.some((c) => c.type === "image");
if (!hasImages) {
return sanitizeSurrogates(content.map((c) => (c as TextContent).text).join("\n"));
}
const blocks = content.map((block) => {
if (block.type === "text") {
return {
type: "text" as const,
text: sanitizeSurrogates(block.text),
};
}
return {
type: "image" as const,
source: {
type: "base64" as const,
media_type: block.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
data: block.data,
},
};
});
const hasText = blocks.some((b) => b.type === "text");
if (!hasText) {
blocks.unshift({
type: "text" as const,
text: "(see attached image)",
});
}
return blocks;
}
export function supportsAdaptiveThinking(modelId: string): boolean {
return (
modelId.includes("opus-4-6") ||
modelId.includes("opus-4.6") ||
modelId.includes("sonnet-4-6") ||
modelId.includes("sonnet-4.6")
);
}
export function mapThinkingLevelToEffort(level: string | undefined, modelId: string): AnthropicEffort {
switch (level) {
case "minimal":
return "low";
case "low":
return "low";
case "medium":
return "medium";
case "high":
return "high";
case "xhigh":
return modelId.includes("opus-4-6") || modelId.includes("opus-4.6") ? "max" : "high";
default:
return "high";
}
}
export function isTransientNetworkError(error: unknown): boolean {
if (!(error instanceof Error)) return false;
const msg = error.message.toLowerCase();
const code = (error as NodeJS.ErrnoException).code;
return (
code === 'ECONNRESET' ||
code === 'EPIPE' ||
code === 'ETIMEDOUT' ||
code === 'ENOTFOUND' ||
code === 'EAI_AGAIN' ||
msg.includes('connector_closed') ||
msg.includes('socket hang up') ||
msg.includes('network') ||
msg.includes('connection') && msg.includes('closed') ||
msg.includes('fetch failed')
);
}
export function extractRetryAfterMs(headers: Headers | { get(name: string): string | null }, errorText = ""): number | undefined {
const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
const retryAfter = headers.get("retry-after");
if (retryAfter) {
const seconds = Number(retryAfter);
if (Number.isFinite(seconds)) {
const delay = normalizeDelay(seconds * 1000);
if (delay !== undefined) return delay;
}
const asDate = new Date(retryAfter).getTime();
if (!Number.isNaN(asDate)) {
const delay = normalizeDelay(asDate - Date.now());
if (delay !== undefined) return delay;
}
}
for (const header of ["x-ratelimit-reset-requests", "x-ratelimit-reset-tokens"]) {
const value = headers.get(header);
if (value) {
const resetSeconds = Number(value);
if (Number.isFinite(resetSeconds)) {
const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
if (delay !== undefined) return delay;
}
}
}
return undefined;
}
export function normalizeToolCallId(id: string): string {
return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
}
export function convertMessages(
messages: Message[],
model: Model<AnthropicApi>,
isOAuthToken: boolean,
cacheControl?: { type: "ephemeral"; ttl?: "1h" },
): MessageParam[] {
const params: MessageParam[] = [];
const transformedMessages = transformMessages(messages, model, normalizeToolCallId);
for (let i = 0; i < transformedMessages.length; i++) {
const msg = transformedMessages[i];
if (msg.role === "user") {
if (typeof msg.content === "string") {
if (msg.content.trim().length > 0) {
params.push({
role: "user",
content: sanitizeSurrogates(msg.content),
});
}
} else {
const blocks: ContentBlockParam[] = msg.content.map((item) => {
if (item.type === "text") {
return {
type: "text",
text: sanitizeSurrogates(item.text),
};
} else {
return {
type: "image",
source: {
type: "base64",
media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
data: item.data,
},
};
}
});
let filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks;
filteredBlocks = filteredBlocks.filter((b) => {
if (b.type === "text") {
return b.text.trim().length > 0;
}
return true;
});
if (filteredBlocks.length === 0) continue;
params.push({
role: "user",
content: filteredBlocks,
});
}
} else if (msg.role === "assistant") {
const blocks: ContentBlockParam[] = [];
for (const block of msg.content) {
if (block.type === "text") {
if (block.text.trim().length === 0) continue;
blocks.push({
type: "text",
text: sanitizeSurrogates(block.text),
});
} else if (block.type === "thinking") {
if (block.redacted) {
blocks.push({
type: "redacted_thinking",
data: block.thinkingSignature!,
});
continue;
}
if (block.thinking.trim().length === 0) continue;
if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
blocks.push({
type: "text",
text: sanitizeSurrogates(block.thinking),
});
} else {
blocks.push({
type: "thinking",
thinking: sanitizeSurrogates(block.thinking),
signature: block.thinkingSignature,
});
}
} else if (block.type === "toolCall") {
blocks.push({
type: "tool_use",
id: block.id,
name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
input: block.arguments ?? {},
});
} else if (block.type === "serverToolUse") {
blocks.push({
type: "server_tool_use",
id: block.id,
name: block.name,
input: block.input ?? {},
} as any);
} else if (block.type === "webSearchResult") {
blocks.push({
type: "web_search_tool_result",
tool_use_id: block.toolUseId,
content: block.content,
} as any);
}
}
if (blocks.length === 0) continue;
params.push({
role: "assistant",
content: blocks,
});
} else if (msg.role === "toolResult") {
const toolResults: ContentBlockParam[] = [];
toolResults.push({
type: "tool_result",
tool_use_id: msg.toolCallId,
content: convertContentBlocks(msg.content),
is_error: msg.isError,
});
let j = i + 1;
while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
const nextMsg = transformedMessages[j] as ToolResultMessage;
toolResults.push({
type: "tool_result",
tool_use_id: nextMsg.toolCallId,
content: convertContentBlocks(nextMsg.content),
is_error: nextMsg.isError,
});
j++;
}
i = j - 1;
params.push({
role: "user",
content: toolResults,
});
}
}
if (cacheControl && params.length > 0) {
const lastMessage = params[params.length - 1];
if (lastMessage.role === "user") {
if (Array.isArray(lastMessage.content)) {
const lastBlock = lastMessage.content[lastMessage.content.length - 1];
if (
lastBlock &&
(lastBlock.type === "text" || lastBlock.type === "image" || lastBlock.type === "tool_result")
) {
(lastBlock as any).cache_control = cacheControl;
}
} else if (typeof lastMessage.content === "string") {
lastMessage.content = [
{
type: "text",
text: lastMessage.content,
cache_control: cacheControl,
},
] as any;
}
}
}
return params;
}
export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] {
if (!tools) return [];
return tools.map((tool) => {
const jsonSchema = tool.parameters as any;
return {
name: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name,
description: tool.description,
input_schema: {
type: "object" as const,
properties: jsonSchema.properties || {},
required: jsonSchema.required || [],
},
};
});
}
export function buildParams(
model: Model<AnthropicApi>,
context: Context,
isOAuthToken: boolean,
options?: AnthropicOptions,
): MessageCreateParamsStreaming {
const { cacheControl } = getCacheControl(model.baseUrl, options?.cacheRetention);
const apiModelId = model.id.replace(/\[.*\]$/, "");
const params: MessageCreateParamsStreaming = {
model: apiModelId,
messages: convertMessages(context.messages, model, isOAuthToken, cacheControl),
max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
stream: true,
};
if (isOAuthToken) {
params.system = [
{
type: "text",
text: "You are Claude Code, Anthropic's official CLI for Claude.",
...(cacheControl ? { cache_control: cacheControl } : {}),
},
];
if (context.systemPrompt) {
params.system.push({
type: "text",
text: sanitizeSurrogates(context.systemPrompt),
...(cacheControl ? { cache_control: cacheControl } : {}),
});
}
} else if (context.systemPrompt) {
params.system = [
{
type: "text",
text: sanitizeSurrogates(context.systemPrompt),
...(cacheControl ? { cache_control: cacheControl } : {}),
},
];
}
if (options?.temperature !== undefined && !options?.thinkingEnabled) {
params.temperature = options.temperature;
}
if (context.tools) {
params.tools = convertTools(context.tools, isOAuthToken);
}
if (options?.thinkingEnabled && model.reasoning) {
if (supportsAdaptiveThinking(model.id)) {
params.thinking = { type: "adaptive" };
if (options.effort) {
params.output_config = { effort: options.effort };
}
} else {
params.thinking = {
type: "enabled",
budget_tokens: options.thinkingBudgetTokens || 1024,
};
}
}
if (options?.metadata) {
const userId = options.metadata.user_id;
if (typeof userId === "string") {
params.metadata = { user_id: userId };
}
}
if (options?.toolChoice) {
if (typeof options.toolChoice === "string") {
params.tool_choice = { type: options.toolChoice };
} else {
params.tool_choice = options.toolChoice;
}
}
return params;
}
export function mapStopReason(reason: string): StopReason {
switch (reason) {
case "end_turn":
return "stop";
case "max_tokens":
return "length";
case "tool_use":
return "toolUse";
case "refusal":
return "error";
case "pause_turn":
return "stop";
case "stop_sequence":
return "stop";
case "sensitive":
return "error";
default:
throw new Error(`Unhandled stop reason: ${reason}`);
}
}
export interface StreamAnthropicArgs {
client: Anthropic;
model: Model<AnthropicApi>;
context: Context;
isOAuthToken: boolean;
options?: AnthropicOptions;
AnthropicSdkClass?: typeof Anthropic;
}
export function processAnthropicStream(
stream: AssistantMessageEventStream,
args: StreamAnthropicArgs,
): void {
const { client, model, context, isOAuthToken, options, AnthropicSdkClass } = args;
(async () => {
const output: AssistantMessage = {
role: "assistant",
content: [],
api: model.api as Api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
try {
let params = buildParams(model, context, isOAuthToken, options);
const nextParams = await options?.onPayload?.(params, model);
if (nextParams !== undefined) {
params = nextParams as MessageCreateParamsStreaming;
}
const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
stream.push({ type: "start", partial: output });
type Block = (ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | ServerToolUseContent | WebSearchResultContent) & { index: number };
const blocks = output.content as Block[];
for await (const event of anthropicStream) {
if (event.type === "message_start") {
output.usage.input = event.message.usage.input_tokens || 0;
output.usage.output = event.message.usage.output_tokens || 0;
output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
output.usage.totalTokens =
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
calculateCost(model, output.usage);
} else if (event.type === "content_block_start") {
if (event.content_block.type === "text") {
const block: Block = {
type: "text",
text: "",
index: event.index,
};
output.content.push(block);
stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output });
} else if (event.content_block.type === "thinking") {
const block: Block = {
type: "thinking",
thinking: "",
thinkingSignature: "",
index: event.index,
};
output.content.push(block);
stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
} else if (event.content_block.type === "redacted_thinking") {
const block: Block = {
type: "thinking",
thinking: "[Reasoning redacted]",
thinkingSignature: event.content_block.data,
redacted: true,
index: event.index,
};
output.content.push(block);
stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
} else if (event.content_block.type === "tool_use") {
const block: Block = {
type: "toolCall",
id: event.content_block.id,
name: isOAuthToken
? fromClaudeCodeName(event.content_block.name, context.tools)
: event.content_block.name,
arguments: (event.content_block.input as Record<string, any>) ?? {},
partialJson: "",
index: event.index,
};
output.content.push(block);
stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output });
} else if ((event.content_block as any).type === "server_tool_use") {
const serverBlock = event.content_block as any;
const block: Block = {
type: "serverToolUse",
id: serverBlock.id,
name: serverBlock.name,
input: serverBlock.input,
index: event.index,
};
output.content.push(block);
stream.push({ type: "server_tool_use", contentIndex: output.content.length - 1, partial: output });
} else if ((event.content_block as any).type === "web_search_tool_result") {
const resultBlock = event.content_block as any;
const block: Block = {
type: "webSearchResult",
toolUseId: resultBlock.tool_use_id,
content: resultBlock.content,
index: event.index,
};
output.content.push(block);
stream.push({ type: "web_search_result", contentIndex: output.content.length - 1, partial: output });
}
} else if (event.type === "content_block_delta") {
if (event.delta.type === "text_delta") {
const index = blocks.findIndex((b) => b.index === event.index);
const block = blocks[index];
if (block && block.type === "text") {
block.text += event.delta.text;
stream.push({
type: "text_delta",
contentIndex: index,
delta: event.delta.text,
partial: output,
});
}
} else if (event.delta.type === "thinking_delta") {
const index = blocks.findIndex((b) => b.index === event.index);
const block = blocks[index];
if (block && block.type === "thinking") {
block.thinking += event.delta.thinking;
stream.push({
type: "thinking_delta",
contentIndex: index,
delta: event.delta.thinking,
partial: output,
});
}
} else if (event.delta.type === "input_json_delta") {
const index = blocks.findIndex((b) => b.index === event.index);
const block = blocks[index];
if (block && block.type === "toolCall") {
block.partialJson += event.delta.partial_json;
block.arguments = parseStreamingJson(block.partialJson);
stream.push({
type: "toolcall_delta",
contentIndex: index,
delta: event.delta.partial_json,
partial: output,
});
}
} else if (event.delta.type === "signature_delta") {
const index = blocks.findIndex((b) => b.index === event.index);
const block = blocks[index];
if (block && block.type === "thinking") {
block.thinkingSignature = block.thinkingSignature || "";
block.thinkingSignature += event.delta.signature;
}
}
} else if (event.type === "content_block_stop") {
const index = blocks.findIndex((b) => b.index === event.index);
const block = blocks[index];
if (block) {
delete (block as any).index;
if (block.type === "text") {
stream.push({
type: "text_end",
contentIndex: index,
content: block.text,
partial: output,
});
} else if (block.type === "thinking") {
stream.push({
type: "thinking_end",
contentIndex: index,
content: block.thinking,
partial: output,
});
} else if (block.type === "toolCall") {
block.arguments = parseStreamingJson(block.partialJson);
delete (block as any).partialJson;
stream.push({
type: "toolcall_end",
contentIndex: index,
toolCall: block,
partial: output,
});
}
}
} else if (event.type === "message_delta") {
if (event.delta.stop_reason) {
output.stopReason = mapStopReason(event.delta.stop_reason);
}
if (event.usage.input_tokens != null) {
output.usage.input = event.usage.input_tokens;
}
if (event.usage.output_tokens != null) {
output.usage.output = event.usage.output_tokens;
}
if (event.usage.cache_read_input_tokens != null) {
output.usage.cacheRead = event.usage.cache_read_input_tokens;
}
if (event.usage.cache_creation_input_tokens != null) {
output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
}
output.usage.totalTokens =
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
calculateCost(model, output.usage);
}
}
if (options?.signal?.aborted) {
throw new Error("Request was aborted");
}
if (output.stopReason === "aborted" || output.stopReason === "error") {
throw new Error("An unknown error occurred");
}
stream.push({ type: "done", reason: output.stopReason, message: output });
stream.end();
} catch (error) {
for (const block of output.content) delete (block as any).index;
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
if (model.provider === "alibaba-coding-plan") {
output.errorMessage = `[alibaba-coding-plan] ${output.errorMessage}`;
}
if (AnthropicSdkClass && error instanceof AnthropicSdkClass.APIError && error.headers) {
const retryAfterMs = extractRetryAfterMs(error.headers, error.message);
if (retryAfterMs !== undefined) {
output.retryAfterMs = retryAfterMs;
}
}
if (isTransientNetworkError(error)) {
output.retryAfterMs = output.retryAfterMs ?? 5000;
}
stream.push({ type: "error", reason: output.stopReason, error: output });
stream.end();
}
})();
}

View file

@ -0,0 +1,130 @@
// Lazy-loaded: Anthropic Vertex SDK is imported on first use, not at startup.
// This avoids penalizing users who don't use Anthropic Vertex models.
import type Anthropic from "@anthropic-ai/sdk";
import type { AnthropicVertex } from "@anthropic-ai/vertex-sdk";
import { getEnvApiKey } from "../env-api-keys.js";
import type {
Context,
Model,
SimpleStreamOptions,
StreamFunction,
} from "../types.js";
import { AssistantMessageEventStream } from "../utils/event-stream.js";
import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
import {
type AnthropicOptions,
mapThinkingLevelToEffort,
processAnthropicStream,
supportsAdaptiveThinking,
} from "./anthropic-shared.js";
let _AnthropicVertexClass: typeof AnthropicVertex | undefined;
let _AnthropicSdkClass: typeof Anthropic | undefined;
async function getAnthropicVertexClass(): Promise<typeof AnthropicVertex> {
if (!_AnthropicVertexClass) {
const mod = await import("@anthropic-ai/vertex-sdk");
_AnthropicVertexClass = mod.AnthropicVertex;
}
return _AnthropicVertexClass;
}
async function getAnthropicSdkClass(): Promise<typeof Anthropic> {
if (!_AnthropicSdkClass) {
const mod = await import("@anthropic-ai/sdk");
_AnthropicSdkClass = mod.default;
}
return _AnthropicSdkClass;
}
function resolveProjectId(): string {
const projectId = process.env.ANTHROPIC_VERTEX_PROJECT_ID
|| process.env.GOOGLE_CLOUD_PROJECT
|| process.env.GCLOUD_PROJECT;
if (!projectId) {
throw new Error(
"Anthropic Vertex requires a project ID. Set ANTHROPIC_VERTEX_PROJECT_ID, GOOGLE_CLOUD_PROJECT, or GCLOUD_PROJECT.",
);
}
return projectId;
}
function resolveRegion(): string {
return process.env.CLOUD_ML_REGION
|| process.env.GOOGLE_CLOUD_LOCATION
|| "us-central1";
}
async function createVertexClient(): Promise<AnthropicVertex> {
const AnthropicVertexClass = await getAnthropicVertexClass();
const projectId = resolveProjectId();
const region = resolveRegion();
return new AnthropicVertexClass({
projectId,
region,
});
}
export const streamAnthropicVertex: StreamFunction<"anthropic-vertex", AnthropicOptions> = (
model: Model<"anthropic-vertex">,
context: Context,
options?: AnthropicOptions,
): AssistantMessageEventStream => {
const stream = new AssistantMessageEventStream();
(async () => {
const client = await createVertexClient();
const AnthropicSdk = await getAnthropicSdkClass();
processAnthropicStream(stream, {
client: client as unknown as Anthropic,
model,
context,
isOAuthToken: false,
options,
AnthropicSdkClass: AnthropicSdk,
});
})();
return stream;
};
export const streamSimpleAnthropicVertex: StreamFunction<"anthropic-vertex", SimpleStreamOptions> = (
model: Model<"anthropic-vertex">,
context: Context,
options?: SimpleStreamOptions,
): AssistantMessageEventStream => {
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
if (!apiKey) {
throw new Error(`No API key found for provider: ${model.provider}. Set ANTHROPIC_VERTEX_PROJECT_ID to use Claude on Vertex AI.`);
}
const base = buildBaseOptions(model, options, apiKey);
if (!options?.reasoning) {
return streamAnthropicVertex(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);
}
if (supportsAdaptiveThinking(model.id)) {
const effort = mapThinkingLevelToEffort(options.reasoning, model.id);
return streamAnthropicVertex(model, context, {
...base,
thinkingEnabled: true,
effort,
} satisfies AnthropicOptions);
}
const adjusted = adjustMaxTokensForThinking(
base.maxTokens || 0,
model.maxTokens,
options.reasoning,
options.thinkingBudgets,
);
return streamAnthropicVertex(model, context, {
...base,
maxTokens: adjusted.maxTokens,
thinkingEnabled: true,
thinkingBudgetTokens: adjusted.thinkingBudget,
} satisfies AnthropicOptions);
};

View file

@ -1,40 +1,29 @@
// Lazy-loaded: Anthropic SDK (~500ms) is imported on first use, not at startup.
// This avoids penalizing users who don't use Anthropic models.
import type Anthropic from "@anthropic-ai/sdk";
import type {
ContentBlockParam,
MessageCreateParamsStreaming,
MessageParam,
} from "@anthropic-ai/sdk/resources/messages.js";
import { getEnvApiKey } from "../env-api-keys.js";
import { calculateCost } from "../models.js";
import type {
Api,
AssistantMessage,
CacheRetention,
Context,
ImageContent,
Message,
Model,
ServerToolUseContent,
SimpleStreamOptions,
StopReason,
StreamFunction,
StreamOptions,
TextContent,
ThinkingContent,
Tool,
ToolCall,
ToolResultMessage,
WebSearchResultContent,
} from "../types.js";
import { AssistantMessageEventStream } from "../utils/event-stream.js";
import { parseStreamingJson } from "../utils/json-parse.js";
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
import { transformMessages } from "./transform-messages.js";
import {
type AnthropicEffort,
type AnthropicOptions,
extractRetryAfterMs,
mapThinkingLevelToEffort,
processAnthropicStream,
supportsAdaptiveThinking,
} from "./anthropic-shared.js";
// Re-export types used by other modules
export type { AnthropicEffort, AnthropicOptions };
export { extractRetryAfterMs };
let _AnthropicClass: typeof Anthropic | undefined;
async function getAnthropicClass(): Promise<typeof Anthropic> {
@ -45,154 +34,9 @@ async function getAnthropicClass(): Promise<typeof Anthropic> {
return _AnthropicClass;
}
/**
* Resolve cache retention preference.
* Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility.
*/
function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention {
if (cacheRetention) {
return cacheRetention;
}
if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") {
return "long";
}
return "short";
}
function getCacheControl(
baseUrl: string,
cacheRetention?: CacheRetention,
): { retention: CacheRetention; cacheControl?: { type: "ephemeral"; ttl?: "1h" } } {
const retention = resolveCacheRetention(cacheRetention);
if (retention === "none") {
return { retention };
}
const ttl = retention === "long" && baseUrl.includes("api.anthropic.com") ? "1h" : undefined;
return {
retention,
cacheControl: { type: "ephemeral", ...(ttl && { ttl }) },
};
}
// Stealth mode: Mimic Claude Code's tool naming exactly
const claudeCodeVersion = "2.1.62";
// Claude Code 2.x tool names (canonical casing)
// Source: https://cchistory.mariozechner.at/data/prompts-2.1.11.md
// To update: https://github.com/badlogic/cchistory
const claudeCodeTools = [
"Read",
"Write",
"Edit",
"Bash",
"Grep",
"Glob",
"AskUserQuestion",
"EnterPlanMode",
"ExitPlanMode",
"KillShell",
"NotebookEdit",
"Skill",
"Task",
"TaskOutput",
"TodoWrite",
"WebFetch",
"WebSearch",
];
const ccToolLookup = new Map(claudeCodeTools.map((t) => [t.toLowerCase(), t]));
// Convert tool name to CC canonical casing if it matches (case-insensitive)
const toClaudeCodeName = (name: string) => ccToolLookup.get(name.toLowerCase()) ?? name;
const fromClaudeCodeName = (name: string, tools?: Tool[]) => {
if (tools && tools.length > 0) {
const lowerName = name.toLowerCase();
const matchedTool = tools.find((tool) => tool.name.toLowerCase() === lowerName);
if (matchedTool) return matchedTool.name;
}
return name;
};
/**
* Convert content blocks to Anthropic API format
*/
function convertContentBlocks(content: (TextContent | ImageContent)[]):
| string
| Array<
| { type: "text"; text: string }
| {
type: "image";
source: {
type: "base64";
media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp";
data: string;
};
}
> {
// If only text blocks, return as concatenated string for simplicity
const hasImages = content.some((c) => c.type === "image");
if (!hasImages) {
return sanitizeSurrogates(content.map((c) => (c as TextContent).text).join("\n"));
}
// If we have images, convert to content block array
const blocks = content.map((block) => {
if (block.type === "text") {
return {
type: "text" as const,
text: sanitizeSurrogates(block.text),
};
}
return {
type: "image" as const,
source: {
type: "base64" as const,
media_type: block.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
data: block.data,
},
};
});
// If only images (no text), add placeholder text block
const hasText = blocks.some((b) => b.type === "text");
if (!hasText) {
blocks.unshift({
type: "text" as const,
text: "(see attached image)",
});
}
return blocks;
}
export type AnthropicEffort = "low" | "medium" | "high" | "max";
export interface AnthropicOptions extends StreamOptions {
/**
* Enable extended thinking.
* For Opus 4.6 and Sonnet 4.6: uses adaptive thinking (model decides when/how much to think).
* For older models: uses budget-based thinking with thinkingBudgetTokens.
*/
thinkingEnabled?: boolean;
/**
* Token budget for extended thinking (older models only).
* Ignored for Opus 4.6 and Sonnet 4.6, which use adaptive thinking.
*/
thinkingBudgetTokens?: number;
/**
* Effort level for adaptive thinking (Opus 4.6 and Sonnet 4.6).
* Controls how much thinking Claude allocates:
* - "max": Always thinks with no constraints (Opus 4.6 only)
* - "high": Always thinks, deep reasoning (default)
* - "medium": Moderate thinking, may skip for simple queries
* - "low": Minimal thinking, skips for simple tasks
* Ignored for older models.
*/
effort?: AnthropicEffort;
interleavedThinking?: boolean;
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
}
function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]): Record<string, string> {
const merged: Record<string, string> = {};
for (const headers of headerSources) {
@ -203,410 +47,6 @@ function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]):
return merged;
}
/**
* Detect transient network errors that are likely to succeed on retry.
* Covers WebSocket disconnects (Tailscale, VPN), TCP resets, and DNS failures.
*/
function isTransientNetworkError(error: unknown): boolean {
if (!(error instanceof Error)) return false;
const msg = error.message.toLowerCase();
const code = (error as NodeJS.ErrnoException).code;
return (
code === 'ECONNRESET' ||
code === 'EPIPE' ||
code === 'ETIMEDOUT' ||
code === 'ENOTFOUND' ||
code === 'EAI_AGAIN' ||
msg.includes('connector_closed') ||
msg.includes('socket hang up') ||
msg.includes('network') ||
msg.includes('connection') && msg.includes('closed') ||
msg.includes('fetch failed')
);
}
/**
* Extract retry delay from Anthropic error response headers (in milliseconds).
* Checks: retry-after (seconds or RFC date), x-ratelimit-reset-requests, x-ratelimit-reset-tokens.
* Returns undefined if no valid delay is found or if the delay is in the past.
*/
function extractRetryAfterMs(headers: Headers | { get(name: string): string | null }, errorText = ""): number | undefined {
const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
const retryAfter = headers.get("retry-after");
if (retryAfter) {
const seconds = Number(retryAfter);
if (Number.isFinite(seconds)) {
const delay = normalizeDelay(seconds * 1000);
if (delay !== undefined) return delay;
}
const asDate = new Date(retryAfter).getTime();
if (!Number.isNaN(asDate)) {
const delay = normalizeDelay(asDate - Date.now());
if (delay !== undefined) return delay;
}
}
// x-ratelimit-reset-requests / x-ratelimit-reset-tokens are Unix timestamps (seconds)
for (const header of ["x-ratelimit-reset-requests", "x-ratelimit-reset-tokens"]) {
const value = headers.get(header);
if (value) {
const resetSeconds = Number(value);
if (Number.isFinite(resetSeconds)) {
const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
if (delay !== undefined) return delay;
}
}
}
return undefined;
}
export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOptions> = (
model: Model<"anthropic-messages">,
context: Context,
options?: AnthropicOptions,
): AssistantMessageEventStream => {
const stream = new AssistantMessageEventStream();
(async () => {
const output: AssistantMessage = {
role: "assistant",
content: [],
api: model.api as Api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
try {
const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
let copilotDynamicHeaders: Record<string, string> | undefined;
if (model.provider === "github-copilot") {
const hasImages = hasCopilotVisionInput(context.messages);
copilotDynamicHeaders = buildCopilotDynamicHeaders({
messages: context.messages,
hasImages,
});
}
const { client, isOAuthToken } = await createClient(
model,
apiKey,
options?.interleavedThinking ?? true,
options?.headers,
copilotDynamicHeaders,
);
let params = buildParams(model, context, isOAuthToken, options);
const nextParams = await options?.onPayload?.(params, model);
if (nextParams !== undefined) {
params = nextParams as MessageCreateParamsStreaming;
}
const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
stream.push({ type: "start", partial: output });
type Block = (ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | ServerToolUseContent | WebSearchResultContent) & { index: number };
const blocks = output.content as Block[];
for await (const event of anthropicStream) {
if (event.type === "message_start") {
// Capture initial token usage from message_start event
// This ensures we have input token counts even if the stream is aborted early
output.usage.input = event.message.usage.input_tokens || 0;
output.usage.output = event.message.usage.output_tokens || 0;
output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
// Anthropic doesn't provide total_tokens, compute from components
output.usage.totalTokens =
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
calculateCost(model, output.usage);
} else if (event.type === "content_block_start") {
if (event.content_block.type === "text") {
const block: Block = {
type: "text",
text: "",
index: event.index,
};
output.content.push(block);
stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output });
} else if (event.content_block.type === "thinking") {
const block: Block = {
type: "thinking",
thinking: "",
thinkingSignature: "",
index: event.index,
};
output.content.push(block);
stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
} else if (event.content_block.type === "redacted_thinking") {
const block: Block = {
type: "thinking",
thinking: "[Reasoning redacted]",
thinkingSignature: event.content_block.data,
redacted: true,
index: event.index,
};
output.content.push(block);
stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
} else if (event.content_block.type === "tool_use") {
const block: Block = {
type: "toolCall",
id: event.content_block.id,
name: isOAuthToken
? fromClaudeCodeName(event.content_block.name, context.tools)
: event.content_block.name,
arguments: (event.content_block.input as Record<string, any>) ?? {},
partialJson: "",
index: event.index,
};
output.content.push(block);
stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output });
} else if ((event.content_block as any).type === "server_tool_use") {
const serverBlock = event.content_block as any;
const block: Block = {
type: "serverToolUse",
id: serverBlock.id,
name: serverBlock.name,
input: serverBlock.input,
index: event.index,
};
output.content.push(block);
stream.push({ type: "server_tool_use", contentIndex: output.content.length - 1, partial: output });
} else if ((event.content_block as any).type === "web_search_tool_result") {
const resultBlock = event.content_block as any;
const block: Block = {
type: "webSearchResult",
toolUseId: resultBlock.tool_use_id,
content: resultBlock.content,
index: event.index,
};
output.content.push(block);
stream.push({ type: "web_search_result", contentIndex: output.content.length - 1, partial: output });
}
} else if (event.type === "content_block_delta") {
if (event.delta.type === "text_delta") {
const index = blocks.findIndex((b) => b.index === event.index);
const block = blocks[index];
if (block && block.type === "text") {
block.text += event.delta.text;
stream.push({
type: "text_delta",
contentIndex: index,
delta: event.delta.text,
partial: output,
});
}
} else if (event.delta.type === "thinking_delta") {
const index = blocks.findIndex((b) => b.index === event.index);
const block = blocks[index];
if (block && block.type === "thinking") {
block.thinking += event.delta.thinking;
stream.push({
type: "thinking_delta",
contentIndex: index,
delta: event.delta.thinking,
partial: output,
});
}
} else if (event.delta.type === "input_json_delta") {
const index = blocks.findIndex((b) => b.index === event.index);
const block = blocks[index];
if (block && block.type === "toolCall") {
block.partialJson += event.delta.partial_json;
block.arguments = parseStreamingJson(block.partialJson);
stream.push({
type: "toolcall_delta",
contentIndex: index,
delta: event.delta.partial_json,
partial: output,
});
}
} else if (event.delta.type === "signature_delta") {
const index = blocks.findIndex((b) => b.index === event.index);
const block = blocks[index];
if (block && block.type === "thinking") {
block.thinkingSignature = block.thinkingSignature || "";
block.thinkingSignature += event.delta.signature;
}
}
} else if (event.type === "content_block_stop") {
const index = blocks.findIndex((b) => b.index === event.index);
const block = blocks[index];
if (block) {
delete (block as any).index;
if (block.type === "text") {
stream.push({
type: "text_end",
contentIndex: index,
content: block.text,
partial: output,
});
} else if (block.type === "thinking") {
stream.push({
type: "thinking_end",
contentIndex: index,
content: block.thinking,
partial: output,
});
} else if (block.type === "toolCall") {
block.arguments = parseStreamingJson(block.partialJson);
delete (block as any).partialJson;
stream.push({
type: "toolcall_end",
contentIndex: index,
toolCall: block,
partial: output,
});
}
// serverToolUse and webSearchResult blocks just need index cleanup (already emitted on start)
}
} else if (event.type === "message_delta") {
if (event.delta.stop_reason) {
output.stopReason = mapStopReason(event.delta.stop_reason);
}
// Only update usage fields if present (not null).
// Preserves input_tokens from message_start when proxies omit it in message_delta.
if (event.usage.input_tokens != null) {
output.usage.input = event.usage.input_tokens;
}
if (event.usage.output_tokens != null) {
output.usage.output = event.usage.output_tokens;
}
if (event.usage.cache_read_input_tokens != null) {
output.usage.cacheRead = event.usage.cache_read_input_tokens;
}
if (event.usage.cache_creation_input_tokens != null) {
output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
}
// Anthropic doesn't provide total_tokens, compute from components
output.usage.totalTokens =
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
calculateCost(model, output.usage);
}
}
if (options?.signal?.aborted) {
throw new Error("Request was aborted");
}
if (output.stopReason === "aborted" || output.stopReason === "error") {
throw new Error("An unknown error occurred");
}
stream.push({ type: "done", reason: output.stopReason, message: output });
stream.end();
} catch (error) {
for (const block of output.content) delete (block as any).index;
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
if (model.provider === "alibaba-coding-plan") {
output.errorMessage = `[alibaba-coding-plan] ${output.errorMessage}`;
}
const AnthropicSdk = _AnthropicClass;
if (AnthropicSdk && error instanceof AnthropicSdk.APIError && error.headers) {
const retryAfterMs = extractRetryAfterMs(error.headers, error.message);
if (retryAfterMs !== undefined) {
output.retryAfterMs = retryAfterMs;
}
}
// Mark transient network errors as retriable so auto-mode can
// detect them and retry instead of stopping (#833).
if (isTransientNetworkError(error)) {
output.retryAfterMs = output.retryAfterMs ?? 5000;
}
stream.push({ type: "error", reason: output.stopReason, error: output });
stream.end();
}
})();
return stream;
};
/**
* Check if a model supports adaptive thinking (Opus 4.6 and Sonnet 4.6)
*/
function supportsAdaptiveThinking(modelId: string): boolean {
// Opus 4.6 and Sonnet 4.6 model IDs (with or without date suffix)
return (
modelId.includes("opus-4-6") ||
modelId.includes("opus-4.6") ||
modelId.includes("sonnet-4-6") ||
modelId.includes("sonnet-4.6")
);
}
/**
* Map ThinkingLevel to Anthropic effort levels for adaptive thinking.
* Note: effort "max" is only valid on Opus 4.6.
*/
function mapThinkingLevelToEffort(level: SimpleStreamOptions["reasoning"], modelId: string): AnthropicEffort {
switch (level) {
case "minimal":
return "low";
case "low":
return "low";
case "medium":
return "medium";
case "high":
return "high";
case "xhigh":
return modelId.includes("opus-4-6") || modelId.includes("opus-4.6") ? "max" : "high";
default:
return "high";
}
}
export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleStreamOptions> = (
model: Model<"anthropic-messages">,
context: Context,
options?: SimpleStreamOptions,
): AssistantMessageEventStream => {
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
if (!apiKey) {
throw new Error(`No API key for provider: ${model.provider}`);
}
const base = buildBaseOptions(model, options, apiKey);
if (!options?.reasoning) {
return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);
}
// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level
// For older models: use budget-based thinking
if (supportsAdaptiveThinking(model.id)) {
const effort = mapThinkingLevelToEffort(options.reasoning, model.id);
return streamAnthropic(model, context, {
...base,
thinkingEnabled: true,
effort,
} satisfies AnthropicOptions);
}
const adjusted = adjustMaxTokensForThinking(
base.maxTokens || 0,
model.maxTokens,
options.reasoning,
options.thinkingBudgets,
);
return streamAnthropic(model, context, {
...base,
maxTokens: adjusted.maxTokens,
thinkingEnabled: true,
thinkingBudgetTokens: adjusted.thinkingBudget,
} satisfies AnthropicOptions);
};
function isOAuthToken(apiKey: string): boolean {
return apiKey.includes("sk-ant-oat");
}
@ -702,315 +142,83 @@ async function createClient(
return { client, isOAuthToken: false };
}
function buildParams(
export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOptions> = (
model: Model<"anthropic-messages">,
context: Context,
isOAuthToken: boolean,
options?: AnthropicOptions,
): MessageCreateParamsStreaming {
const { cacheControl } = getCacheControl(model.baseUrl, options?.cacheRetention);
// Strip variant suffixes like [1m] from model ID before sending to the API.
// The API only accepts the base model ID (e.g. "claude-opus-4-6"),
// not internal variant identifiers (e.g. "claude-opus-4-6[1m]").
// This applies to all auth methods — API keys, OAuth, and Copilot alike.
const apiModelId = model.id.replace(/\[.*\]$/, "");
const params: MessageCreateParamsStreaming = {
model: apiModelId,
messages: convertMessages(context.messages, model, isOAuthToken, cacheControl),
max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
stream: true,
};
): AssistantMessageEventStream => {
const stream = new AssistantMessageEventStream();
// For OAuth tokens, we MUST include Claude Code identity
if (isOAuthToken) {
params.system = [
{
type: "text",
text: "You are Claude Code, Anthropic's official CLI for Claude.",
...(cacheControl ? { cache_control: cacheControl } : {}),
},
];
if (context.systemPrompt) {
params.system.push({
type: "text",
text: sanitizeSurrogates(context.systemPrompt),
...(cacheControl ? { cache_control: cacheControl } : {}),
(async () => {
const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
let copilotDynamicHeaders: Record<string, string> | undefined;
if (model.provider === "github-copilot") {
const hasImages = hasCopilotVisionInput(context.messages);
copilotDynamicHeaders = buildCopilotDynamicHeaders({
messages: context.messages,
hasImages,
});
}
} else if (context.systemPrompt) {
// Add cache control to system prompt for non-OAuth tokens
params.system = [
{
type: "text",
text: sanitizeSurrogates(context.systemPrompt),
...(cacheControl ? { cache_control: cacheControl } : {}),
},
];
}
// Temperature is incompatible with extended thinking (adaptive or budget-based).
if (options?.temperature !== undefined && !options?.thinkingEnabled) {
params.temperature = options.temperature;
}
const { client, isOAuthToken: isOAuth } = await createClient(
model,
apiKey,
options?.interleavedThinking ?? true,
options?.headers,
copilotDynamicHeaders,
);
if (context.tools) {
params.tools = convertTools(context.tools, isOAuthToken);
}
processAnthropicStream(stream, {
client,
model,
context,
isOAuthToken: isOAuth,
options,
AnthropicSdkClass: _AnthropicClass,
});
})();
// Configure thinking mode: adaptive (Opus 4.6 and Sonnet 4.6) or budget-based (older models)
if (options?.thinkingEnabled && model.reasoning) {
if (supportsAdaptiveThinking(model.id)) {
// Adaptive thinking: Claude decides when and how much to think
params.thinking = { type: "adaptive" };
if (options.effort) {
params.output_config = { effort: options.effort };
}
} else {
// Budget-based thinking for older models
params.thinking = {
type: "enabled",
budget_tokens: options.thinkingBudgetTokens || 1024,
};
}
}
return stream;
};
if (options?.metadata) {
const userId = options.metadata.user_id;
if (typeof userId === "string") {
params.metadata = { user_id: userId };
}
}
if (options?.toolChoice) {
if (typeof options.toolChoice === "string") {
params.tool_choice = { type: options.toolChoice };
} else {
params.tool_choice = options.toolChoice;
}
}
return params;
}
// Normalize tool call IDs to match Anthropic's required pattern and length
function normalizeToolCallId(id: string): string {
return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
}
function convertMessages(
messages: Message[],
export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleStreamOptions> = (
model: Model<"anthropic-messages">,
isOAuthToken: boolean,
cacheControl?: { type: "ephemeral"; ttl?: "1h" },
): MessageParam[] {
const params: MessageParam[] = [];
// Transform messages for cross-provider compatibility
const transformedMessages = transformMessages(messages, model, normalizeToolCallId);
for (let i = 0; i < transformedMessages.length; i++) {
const msg = transformedMessages[i];
if (msg.role === "user") {
if (typeof msg.content === "string") {
if (msg.content.trim().length > 0) {
params.push({
role: "user",
content: sanitizeSurrogates(msg.content),
});
}
} else {
const blocks: ContentBlockParam[] = msg.content.map((item) => {
if (item.type === "text") {
return {
type: "text",
text: sanitizeSurrogates(item.text),
};
} else {
return {
type: "image",
source: {
type: "base64",
media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
data: item.data,
},
};
}
});
let filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks;
filteredBlocks = filteredBlocks.filter((b) => {
if (b.type === "text") {
return b.text.trim().length > 0;
}
return true;
});
if (filteredBlocks.length === 0) continue;
params.push({
role: "user",
content: filteredBlocks,
});
}
} else if (msg.role === "assistant") {
const blocks: ContentBlockParam[] = [];
for (const block of msg.content) {
if (block.type === "text") {
if (block.text.trim().length === 0) continue;
blocks.push({
type: "text",
text: sanitizeSurrogates(block.text),
});
} else if (block.type === "thinking") {
// Redacted thinking: pass the opaque payload back as redacted_thinking
if (block.redacted) {
blocks.push({
type: "redacted_thinking",
data: block.thinkingSignature!,
});
continue;
}
if (block.thinking.trim().length === 0) continue;
// If thinking signature is missing/empty (e.g., from aborted stream),
// convert to plain text block without <thinking> tags to avoid API rejection
// and prevent Claude from mimicking the tags in responses
if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
blocks.push({
type: "text",
text: sanitizeSurrogates(block.thinking),
});
} else {
blocks.push({
type: "thinking",
thinking: sanitizeSurrogates(block.thinking),
signature: block.thinkingSignature,
});
}
} else if (block.type === "toolCall") {
blocks.push({
type: "tool_use",
id: block.id,
name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
input: block.arguments ?? {},
});
} else if (block.type === "serverToolUse") {
blocks.push({
type: "server_tool_use",
id: block.id,
name: block.name,
input: block.input ?? {},
} as any);
} else if (block.type === "webSearchResult") {
blocks.push({
type: "web_search_tool_result",
tool_use_id: block.toolUseId,
content: block.content,
} as any);
}
}
if (blocks.length === 0) continue;
params.push({
role: "assistant",
content: blocks,
});
} else if (msg.role === "toolResult") {
// Collect all consecutive toolResult messages, needed for z.ai Anthropic endpoint
const toolResults: ContentBlockParam[] = [];
// Add the current tool result
toolResults.push({
type: "tool_result",
tool_use_id: msg.toolCallId,
content: convertContentBlocks(msg.content),
is_error: msg.isError,
});
// Look ahead for consecutive toolResult messages
let j = i + 1;
while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult
toolResults.push({
type: "tool_result",
tool_use_id: nextMsg.toolCallId,
content: convertContentBlocks(nextMsg.content),
is_error: nextMsg.isError,
});
j++;
}
// Skip the messages we've already processed
i = j - 1;
// Add a single user message with all tool results
params.push({
role: "user",
content: toolResults,
});
}
context: Context,
options?: SimpleStreamOptions,
): AssistantMessageEventStream => {
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
if (!apiKey) {
throw new Error(`No API key for provider: ${model.provider}`);
}
// Add cache_control to the last user message to cache conversation history
if (cacheControl && params.length > 0) {
const lastMessage = params[params.length - 1];
if (lastMessage.role === "user") {
if (Array.isArray(lastMessage.content)) {
const lastBlock = lastMessage.content[lastMessage.content.length - 1];
if (
lastBlock &&
(lastBlock.type === "text" || lastBlock.type === "image" || lastBlock.type === "tool_result")
) {
(lastBlock as any).cache_control = cacheControl;
}
} else if (typeof lastMessage.content === "string") {
lastMessage.content = [
{
type: "text",
text: lastMessage.content,
cache_control: cacheControl,
},
] as any;
}
}
const base = buildBaseOptions(model, options, apiKey);
if (!options?.reasoning) {
return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);
}
return params;
}
function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] {
if (!tools) return [];
return tools.map((tool) => {
const jsonSchema = tool.parameters as any; // TypeBox already generates JSON Schema
return {
name: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name,
description: tool.description,
input_schema: {
type: "object" as const,
properties: jsonSchema.properties || {},
required: jsonSchema.required || [],
},
};
});
}
function mapStopReason(reason: Anthropic.Messages.StopReason | string): StopReason {
switch (reason) {
case "end_turn":
return "stop";
case "max_tokens":
return "length";
case "tool_use":
return "toolUse";
case "refusal":
return "error";
case "pause_turn": // Stop is good enough -> resubmit
return "stop";
case "stop_sequence":
return "stop"; // We don't supply stop sequences, so this should never happen
case "sensitive": // Content flagged by safety filters (not yet in SDK types)
return "error";
default:
// Handle unknown stop reasons gracefully (API may add new values)
throw new Error(`Unhandled stop reason: ${reason}`);
// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level
// For older models: use budget-based thinking
if (supportsAdaptiveThinking(model.id)) {
const effort = mapThinkingLevelToEffort(options.reasoning, model.id);
return streamAnthropic(model, context, {
...base,
thinkingEnabled: true,
effort,
} satisfies AnthropicOptions);
}
}
const adjusted = adjustMaxTokensForThinking(
base.maxTokens || 0,
model.maxTokens,
options.reasoning,
options.thinkingBudgets,
);
return streamAnthropic(model, context, {
...base,
maxTokens: adjusted.maxTokens,
thinkingEnabled: true,
thinkingBudgetTokens: adjusted.thinkingBudget,
} satisfies AnthropicOptions);
};

View file

@ -3,6 +3,7 @@ import type { AssistantMessage, AssistantMessageEvent, Context, Model, SimpleStr
import { AssistantMessageEventStream } from "../utils/event-stream.js";
import type { BedrockOptions } from "./amazon-bedrock.js";
import { streamAnthropic, streamSimpleAnthropic } from "./anthropic.js";
import { streamAnthropicVertex, streamSimpleAnthropicVertex } from "./anthropic-vertex.js";
import { streamAzureOpenAIResponses, streamSimpleAzureOpenAIResponses } from "./azure-openai-responses.js";
import { streamGoogle, streamSimpleGoogle } from "./google.js";
import { streamGoogleGeminiCli, streamSimpleGoogleGeminiCli } from "./google-gemini-cli.js";
@ -171,6 +172,12 @@ function registerBuiltInApiProviders(): void {
streamSimple: streamSimpleGoogleVertex,
});
registerApiProvider({
api: "anthropic-vertex",
stream: streamAnthropicVertex,
streamSimple: streamSimpleAnthropicVertex,
});
registerApiProvider({
api: "bedrock-converse-stream",
stream: streamBedrockLazy,

View file

@ -9,6 +9,7 @@ export type KnownApi =
| "azure-openai-responses"
| "openai-codex-responses"
| "anthropic-messages"
| "anthropic-vertex"
| "bedrock-converse-stream"
| "google-generative-ai"
| "google-gemini-cli"
@ -19,6 +20,7 @@ export type Api = KnownApi | (string & {});
export type KnownProvider =
| "amazon-bedrock"
| "anthropic"
| "anthropic-vertex"
| "google"
| "google-gemini-cli"
| "google-antigravity"

View file

@ -14,6 +14,7 @@ import type { ModelRegistry } from "./model-registry.js";
const defaultModelPerProvider: Record<KnownProvider, string> = {
"amazon-bedrock": "us.anthropic.claude-opus-4-6-v1",
anthropic: "claude-opus-4-6[1m]",
"anthropic-vertex": "claude-sonnet-4-6",
openai: "gpt-5.4",
"azure-openai-responses": "gpt-5.2",
"openai-codex": "gpt-5.4",

View file

@ -63,6 +63,7 @@ const TOOL_KEYS: ToolKeyConfig[] = [
/** Known LLM provider IDs that, if authed, mean the user doesn't need onboarding */
const LLM_PROVIDER_IDS = [
'anthropic',
'anthropic-vertex',
'openai',
'github-copilot',
'openai-codex',