feat: add anthropic-vertex provider for Claude on Vertex AI (#1533)
* feat: add anthropic-vertex provider for Claude models on Google Vertex AI Add a new anthropic-vertex provider that enables using Claude models (Opus 4.6, Sonnet 4.6, Haiku 4.5) through Google Vertex AI using the @anthropic-ai/vertex-sdk package. Follows the same pattern as the existing google/google-vertex provider split. Detection uses ANTHROPIC_VERTEX_PROJECT_ID (same env var as Claude Code) with CLOUD_ML_REGION for region selection, falling back to us-central1. Extracts shared Anthropic utilities into anthropic-shared.ts (message conversion, tool conversion, param building, stream processing) to avoid duplication between anthropic.ts and anthropic-vertex.ts. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: add full Claude model set for anthropic-vertex provider Add 200K context window variants for Opus 4.6 and Sonnet 4.6, plus older models (Sonnet 4.5, Sonnet 4, Opus 4.5, Opus 4.1, Opus 4, Haiku 4.5). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: add @anthropic-ai/vertex-sdk to root dependencies Required for the published package to resolve the vertex SDK at runtime. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: remove unnecessary comments to match codebase style Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: remove duplicate stream functions after rebase Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Nathan Roe <nathan.roe@carvana.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
111537f460
commit
39cd932abb
14 changed files with 1236 additions and 873 deletions
|
|
@ -629,7 +629,7 @@ GSD isn't locked to one provider. It runs on the [Pi SDK](https://github.com/bad
|
|||
|
||||
### Built-in Providers
|
||||
|
||||
Anthropic, OpenAI, Google (Gemini), OpenRouter, GitHub Copilot, Amazon Bedrock, Azure OpenAI, Google Vertex, Groq, Cerebras, Mistral, xAI, HuggingFace, Vercel AI Gateway, and more.
|
||||
Anthropic, Anthropic (Vertex AI), OpenAI, Google (Gemini), OpenRouter, GitHub Copilot, Amazon Bedrock, Azure OpenAI, Google Vertex, Groq, Cerebras, Mistral, xAI, HuggingFace, Vercel AI Gateway, and more.
|
||||
|
||||
### OAuth / Max Plans
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ Pi isn't locked to one provider. It supports 20+ providers out of the box and le
|
|||
- Google Antigravity
|
||||
|
||||
**API keys (via environment variables):**
|
||||
- Anthropic, OpenAI, Azure OpenAI, Google Gemini, Google Vertex, Amazon Bedrock
|
||||
- Anthropic, Anthropic (Vertex AI), OpenAI, Azure OpenAI, Google Gemini, Google Vertex, Amazon Bedrock
|
||||
- Mistral, Groq, Cerebras, xAI, OpenRouter, Vercel AI Gateway
|
||||
- ZAI, OpenCode Zen, OpenCode Go, Hugging Face, Kimi, MiniMax
|
||||
|
||||
|
|
|
|||
71
package-lock.json
generated
71
package-lock.json
generated
|
|
@ -1,12 +1,12 @@
|
|||
{
|
||||
"name": "gsd-pi",
|
||||
"version": "2.31.2",
|
||||
"version": "2.33.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "gsd-pi",
|
||||
"version": "2.31.2",
|
||||
"version": "2.33.1",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"workspaces": [
|
||||
|
|
@ -15,6 +15,7 @@
|
|||
],
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.73.0",
|
||||
"@anthropic-ai/vertex-sdk": "^0.14.4",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
|
||||
"@clack/prompts": "^1.1.0",
|
||||
"@google/genai": "^1.40.0",
|
||||
|
|
@ -96,6 +97,56 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/vertex-sdk": {
|
||||
"version": "0.14.4",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.14.4.tgz",
|
||||
"integrity": "sha512-BZUPRWghZxfSFtAxU563wH+jfWBPoedAwsVxG35FhmNsjeV8tyfN+lFriWhCpcZApxA4NdT6Soov+PzfnxxD5g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": ">=0.50.3 <1",
|
||||
"google-auth-library": "^9.4.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/vertex-sdk/node_modules/gcp-metadata": {
|
||||
"version": "6.1.1",
|
||||
"resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.1.tgz",
|
||||
"integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"gaxios": "^6.1.1",
|
||||
"google-logging-utils": "^0.0.2",
|
||||
"json-bigint": "^1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/vertex-sdk/node_modules/google-auth-library": {
|
||||
"version": "9.15.1",
|
||||
"resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.1.tgz",
|
||||
"integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"base64-js": "^1.3.0",
|
||||
"ecdsa-sig-formatter": "^1.0.11",
|
||||
"gaxios": "^6.1.1",
|
||||
"gcp-metadata": "^6.1.0",
|
||||
"gtoken": "^7.0.0",
|
||||
"jws": "^4.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/vertex-sdk/node_modules/google-logging-utils": {
|
||||
"version": "0.0.2",
|
||||
"resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-0.0.2.tgz",
|
||||
"integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/@aws-crypto/crc32": {
|
||||
"version": "5.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@aws-crypto/crc32/-/crc32-5.2.0.tgz",
|
||||
|
|
@ -5865,6 +5916,19 @@
|
|||
"integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/gtoken": {
|
||||
"version": "7.1.0",
|
||||
"resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz",
|
||||
"integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"gaxios": "^6.0.0",
|
||||
"jws": "^4.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/has-flag": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
|
||||
|
|
@ -9066,6 +9130,7 @@
|
|||
"version": "0.57.1",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.73.0",
|
||||
"@anthropic-ai/vertex-sdk": "^0.14.4",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
|
||||
"@google/genai": "^1.40.0",
|
||||
"@mistralai/mistralai": "^1.14.1",
|
||||
|
|
@ -9101,7 +9166,7 @@
|
|||
},
|
||||
"packages/pi-coding-agent": {
|
||||
"name": "@gsd/pi-coding-agent",
|
||||
"version": "2.31.2",
|
||||
"version": "2.33.1",
|
||||
"dependencies": {
|
||||
"@mariozechner/jiti": "^2.6.2",
|
||||
"@silvia-odwyer/photon-node": "^0.3.4",
|
||||
|
|
|
|||
|
|
@ -84,6 +84,7 @@
|
|||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.73.0",
|
||||
"@anthropic-ai/vertex-sdk": "^0.14.4",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
|
||||
"@clack/prompts": "^1.1.0",
|
||||
"@google/genai": "^1.40.0",
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.73.0",
|
||||
"@anthropic-ai/vertex-sdk": "^0.14.4",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
|
||||
"@google/genai": "^1.40.0",
|
||||
"@mistralai/mistralai": "^1.14.1",
|
||||
|
|
|
|||
|
|
@ -73,6 +73,20 @@ export function getEnvApiKey(provider: any): string | undefined {
|
|||
return process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
|
||||
}
|
||||
|
||||
// Anthropic on Vertex AI uses Application Default Credentials.
|
||||
// Detected via ANTHROPIC_VERTEX_PROJECT_ID (same env var as Claude Code).
|
||||
if (provider === "anthropic-vertex") {
|
||||
const hasProject = !!process.env.ANTHROPIC_VERTEX_PROJECT_ID;
|
||||
if (hasProject) {
|
||||
return "<authenticated>";
|
||||
}
|
||||
// Fall back to Google Cloud project env vars
|
||||
const hasGoogleProject = !!(process.env.GOOGLE_CLOUD_PROJECT || process.env.GCLOUD_PROJECT);
|
||||
if (hasGoogleProject && hasVertexAdcCredentials()) {
|
||||
return "<authenticated>";
|
||||
}
|
||||
}
|
||||
|
||||
// Vertex AI uses Application Default Credentials, not API keys.
|
||||
// Auth is configured via `gcloud auth application-default login`.
|
||||
if (provider === "google-vertex") {
|
||||
|
|
|
|||
|
|
@ -1827,6 +1827,178 @@ export const MODELS = {
|
|||
maxTokens: 64000,
|
||||
} satisfies Model<"anthropic-messages">,
|
||||
},
|
||||
"anthropic-vertex": {
|
||||
"claude-opus-4-6": {
|
||||
id: "claude-opus-4-6",
|
||||
name: "Claude Opus 4.6 (Vertex)",
|
||||
api: "anthropic-vertex",
|
||||
provider: "anthropic-vertex",
|
||||
baseUrl: "https://us-central1-aiplatform.googleapis.com",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 15,
|
||||
output: 75,
|
||||
cacheRead: 1.5,
|
||||
cacheWrite: 18.75,
|
||||
},
|
||||
contextWindow: 200000,
|
||||
maxTokens: 128000,
|
||||
} satisfies Model<"anthropic-vertex">,
|
||||
"claude-opus-4-6[1m]": {
|
||||
id: "claude-opus-4-6[1m]",
|
||||
name: "Claude Opus 4.6 1M (Vertex)",
|
||||
api: "anthropic-vertex",
|
||||
provider: "anthropic-vertex",
|
||||
baseUrl: "https://us-central1-aiplatform.googleapis.com",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 15,
|
||||
output: 75,
|
||||
cacheRead: 1.5,
|
||||
cacheWrite: 18.75,
|
||||
},
|
||||
contextWindow: 1000000,
|
||||
maxTokens: 128000,
|
||||
} satisfies Model<"anthropic-vertex">,
|
||||
"claude-sonnet-4-6": {
|
||||
id: "claude-sonnet-4-6",
|
||||
name: "Claude Sonnet 4.6 (Vertex)",
|
||||
api: "anthropic-vertex",
|
||||
provider: "anthropic-vertex",
|
||||
baseUrl: "https://us-central1-aiplatform.googleapis.com",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 3,
|
||||
output: 15,
|
||||
cacheRead: 0.3,
|
||||
cacheWrite: 3.75,
|
||||
},
|
||||
contextWindow: 200000,
|
||||
maxTokens: 64000,
|
||||
} satisfies Model<"anthropic-vertex">,
|
||||
"claude-sonnet-4-6[1m]": {
|
||||
id: "claude-sonnet-4-6[1m]",
|
||||
name: "Claude Sonnet 4.6 1M (Vertex)",
|
||||
api: "anthropic-vertex",
|
||||
provider: "anthropic-vertex",
|
||||
baseUrl: "https://us-central1-aiplatform.googleapis.com",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 3,
|
||||
output: 15,
|
||||
cacheRead: 0.3,
|
||||
cacheWrite: 3.75,
|
||||
},
|
||||
contextWindow: 1000000,
|
||||
maxTokens: 64000,
|
||||
} satisfies Model<"anthropic-vertex">,
|
||||
"claude-sonnet-4-5@20250929": {
|
||||
id: "claude-sonnet-4-5@20250929",
|
||||
name: "Claude Sonnet 4.5 (Vertex)",
|
||||
api: "anthropic-vertex",
|
||||
provider: "anthropic-vertex",
|
||||
baseUrl: "https://us-central1-aiplatform.googleapis.com",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 3,
|
||||
output: 15,
|
||||
cacheRead: 0.3,
|
||||
cacheWrite: 3.75,
|
||||
},
|
||||
contextWindow: 200000,
|
||||
maxTokens: 64000,
|
||||
} satisfies Model<"anthropic-vertex">,
|
||||
"claude-sonnet-4@20250514": {
|
||||
id: "claude-sonnet-4@20250514",
|
||||
name: "Claude Sonnet 4 (Vertex)",
|
||||
api: "anthropic-vertex",
|
||||
provider: "anthropic-vertex",
|
||||
baseUrl: "https://us-central1-aiplatform.googleapis.com",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 3,
|
||||
output: 15,
|
||||
cacheRead: 0.3,
|
||||
cacheWrite: 3.75,
|
||||
},
|
||||
contextWindow: 200000,
|
||||
maxTokens: 64000,
|
||||
} satisfies Model<"anthropic-vertex">,
|
||||
"claude-opus-4-5@20251101": {
|
||||
id: "claude-opus-4-5@20251101",
|
||||
name: "Claude Opus 4.5 (Vertex)",
|
||||
api: "anthropic-vertex",
|
||||
provider: "anthropic-vertex",
|
||||
baseUrl: "https://us-central1-aiplatform.googleapis.com",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 15,
|
||||
output: 75,
|
||||
cacheRead: 1.5,
|
||||
cacheWrite: 18.75,
|
||||
},
|
||||
contextWindow: 200000,
|
||||
maxTokens: 32000,
|
||||
} satisfies Model<"anthropic-vertex">,
|
||||
"claude-opus-4-1@20250805": {
|
||||
id: "claude-opus-4-1@20250805",
|
||||
name: "Claude Opus 4.1 (Vertex)",
|
||||
api: "anthropic-vertex",
|
||||
provider: "anthropic-vertex",
|
||||
baseUrl: "https://us-central1-aiplatform.googleapis.com",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 15,
|
||||
output: 75,
|
||||
cacheRead: 1.5,
|
||||
cacheWrite: 18.75,
|
||||
},
|
||||
contextWindow: 200000,
|
||||
maxTokens: 32000,
|
||||
} satisfies Model<"anthropic-vertex">,
|
||||
"claude-opus-4@20250514": {
|
||||
id: "claude-opus-4@20250514",
|
||||
name: "Claude Opus 4 (Vertex)",
|
||||
api: "anthropic-vertex",
|
||||
provider: "anthropic-vertex",
|
||||
baseUrl: "https://us-central1-aiplatform.googleapis.com",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 15,
|
||||
output: 75,
|
||||
cacheRead: 1.5,
|
||||
cacheWrite: 18.75,
|
||||
},
|
||||
contextWindow: 200000,
|
||||
maxTokens: 32000,
|
||||
} satisfies Model<"anthropic-vertex">,
|
||||
"claude-haiku-4-5@20251001": {
|
||||
id: "claude-haiku-4-5@20251001",
|
||||
name: "Claude Haiku 4.5 (Vertex)",
|
||||
api: "anthropic-vertex",
|
||||
provider: "anthropic-vertex",
|
||||
baseUrl: "https://us-central1-aiplatform.googleapis.com",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 0.8,
|
||||
output: 4,
|
||||
cacheRead: 0.08,
|
||||
cacheWrite: 1,
|
||||
},
|
||||
contextWindow: 200000,
|
||||
maxTokens: 8192,
|
||||
} satisfies Model<"anthropic-vertex">,
|
||||
},
|
||||
"azure-openai-responses": {
|
||||
"codex-mini-latest": {
|
||||
id: "codex-mini-latest",
|
||||
|
|
|
|||
761
packages/pi-ai/src/providers/anthropic-shared.ts
Normal file
761
packages/pi-ai/src/providers/anthropic-shared.ts
Normal file
|
|
@ -0,0 +1,761 @@
|
|||
/**
|
||||
* Shared utilities for Anthropic providers (direct API and Vertex AI).
|
||||
*/
|
||||
import type Anthropic from "@anthropic-ai/sdk";
|
||||
import type {
|
||||
ContentBlockParam,
|
||||
MessageCreateParamsStreaming,
|
||||
MessageParam,
|
||||
} from "@anthropic-ai/sdk/resources/messages.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
CacheRetention,
|
||||
Context,
|
||||
ImageContent,
|
||||
Message,
|
||||
Model,
|
||||
ServerToolUseContent,
|
||||
StopReason,
|
||||
StreamOptions,
|
||||
TextContent,
|
||||
ThinkingContent,
|
||||
Tool,
|
||||
ToolCall,
|
||||
ToolResultMessage,
|
||||
WebSearchResultContent,
|
||||
} from "../types.js";
|
||||
|
||||
/** API types that use the Anthropic Messages protocol */
|
||||
export type AnthropicApi = "anthropic-messages" | "anthropic-vertex";
|
||||
import type { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
|
||||
export type AnthropicEffort = "low" | "medium" | "high" | "max";
|
||||
|
||||
export interface AnthropicOptions extends StreamOptions {
|
||||
thinkingEnabled?: boolean;
|
||||
thinkingBudgetTokens?: number;
|
||||
effort?: AnthropicEffort;
|
||||
interleavedThinking?: boolean;
|
||||
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
|
||||
}
|
||||
|
||||
const claudeCodeTools = [
|
||||
"Read",
|
||||
"Write",
|
||||
"Edit",
|
||||
"Bash",
|
||||
"Grep",
|
||||
"Glob",
|
||||
"AskUserQuestion",
|
||||
"EnterPlanMode",
|
||||
"ExitPlanMode",
|
||||
"KillShell",
|
||||
"NotebookEdit",
|
||||
"Skill",
|
||||
"Task",
|
||||
"TaskOutput",
|
||||
"TodoWrite",
|
||||
"WebFetch",
|
||||
"WebSearch",
|
||||
];
|
||||
|
||||
const ccToolLookup = new Map(claudeCodeTools.map((t) => [t.toLowerCase(), t]));
|
||||
|
||||
export const toClaudeCodeName = (name: string) => ccToolLookup.get(name.toLowerCase()) ?? name;
|
||||
export const fromClaudeCodeName = (name: string, tools?: Tool[]) => {
|
||||
if (tools && tools.length > 0) {
|
||||
const lowerName = name.toLowerCase();
|
||||
const matchedTool = tools.find((tool) => tool.name.toLowerCase() === lowerName);
|
||||
if (matchedTool) return matchedTool.name;
|
||||
}
|
||||
return name;
|
||||
};
|
||||
|
||||
function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention {
|
||||
if (cacheRetention) {
|
||||
return cacheRetention;
|
||||
}
|
||||
if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") {
|
||||
return "long";
|
||||
}
|
||||
return "short";
|
||||
}
|
||||
|
||||
export function getCacheControl(
|
||||
baseUrl: string,
|
||||
cacheRetention?: CacheRetention,
|
||||
): { retention: CacheRetention; cacheControl?: { type: "ephemeral"; ttl?: "1h" } } {
|
||||
const retention = resolveCacheRetention(cacheRetention);
|
||||
if (retention === "none") {
|
||||
return { retention };
|
||||
}
|
||||
const ttl = retention === "long" && baseUrl.includes("api.anthropic.com") ? "1h" : undefined;
|
||||
return {
|
||||
retention,
|
||||
cacheControl: { type: "ephemeral", ...(ttl && { ttl }) },
|
||||
};
|
||||
}
|
||||
|
||||
export function convertContentBlocks(content: (TextContent | ImageContent)[]):
|
||||
| string
|
||||
| Array<
|
||||
| { type: "text"; text: string }
|
||||
| {
|
||||
type: "image";
|
||||
source: {
|
||||
type: "base64";
|
||||
media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp";
|
||||
data: string;
|
||||
};
|
||||
}
|
||||
> {
|
||||
const hasImages = content.some((c) => c.type === "image");
|
||||
if (!hasImages) {
|
||||
return sanitizeSurrogates(content.map((c) => (c as TextContent).text).join("\n"));
|
||||
}
|
||||
|
||||
const blocks = content.map((block) => {
|
||||
if (block.type === "text") {
|
||||
return {
|
||||
type: "text" as const,
|
||||
text: sanitizeSurrogates(block.text),
|
||||
};
|
||||
}
|
||||
return {
|
||||
type: "image" as const,
|
||||
source: {
|
||||
type: "base64" as const,
|
||||
media_type: block.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
|
||||
data: block.data,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
const hasText = blocks.some((b) => b.type === "text");
|
||||
if (!hasText) {
|
||||
blocks.unshift({
|
||||
type: "text" as const,
|
||||
text: "(see attached image)",
|
||||
});
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
export function supportsAdaptiveThinking(modelId: string): boolean {
|
||||
return (
|
||||
modelId.includes("opus-4-6") ||
|
||||
modelId.includes("opus-4.6") ||
|
||||
modelId.includes("sonnet-4-6") ||
|
||||
modelId.includes("sonnet-4.6")
|
||||
);
|
||||
}
|
||||
|
||||
export function mapThinkingLevelToEffort(level: string | undefined, modelId: string): AnthropicEffort {
|
||||
switch (level) {
|
||||
case "minimal":
|
||||
return "low";
|
||||
case "low":
|
||||
return "low";
|
||||
case "medium":
|
||||
return "medium";
|
||||
case "high":
|
||||
return "high";
|
||||
case "xhigh":
|
||||
return modelId.includes("opus-4-6") || modelId.includes("opus-4.6") ? "max" : "high";
|
||||
default:
|
||||
return "high";
|
||||
}
|
||||
}
|
||||
|
||||
export function isTransientNetworkError(error: unknown): boolean {
|
||||
if (!(error instanceof Error)) return false;
|
||||
const msg = error.message.toLowerCase();
|
||||
const code = (error as NodeJS.ErrnoException).code;
|
||||
return (
|
||||
code === 'ECONNRESET' ||
|
||||
code === 'EPIPE' ||
|
||||
code === 'ETIMEDOUT' ||
|
||||
code === 'ENOTFOUND' ||
|
||||
code === 'EAI_AGAIN' ||
|
||||
msg.includes('connector_closed') ||
|
||||
msg.includes('socket hang up') ||
|
||||
msg.includes('network') ||
|
||||
msg.includes('connection') && msg.includes('closed') ||
|
||||
msg.includes('fetch failed')
|
||||
);
|
||||
}
|
||||
|
||||
export function extractRetryAfterMs(headers: Headers | { get(name: string): string | null }, errorText = ""): number | undefined {
|
||||
const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
|
||||
|
||||
const retryAfter = headers.get("retry-after");
|
||||
if (retryAfter) {
|
||||
const seconds = Number(retryAfter);
|
||||
if (Number.isFinite(seconds)) {
|
||||
const delay = normalizeDelay(seconds * 1000);
|
||||
if (delay !== undefined) return delay;
|
||||
}
|
||||
const asDate = new Date(retryAfter).getTime();
|
||||
if (!Number.isNaN(asDate)) {
|
||||
const delay = normalizeDelay(asDate - Date.now());
|
||||
if (delay !== undefined) return delay;
|
||||
}
|
||||
}
|
||||
|
||||
for (const header of ["x-ratelimit-reset-requests", "x-ratelimit-reset-tokens"]) {
|
||||
const value = headers.get(header);
|
||||
if (value) {
|
||||
const resetSeconds = Number(value);
|
||||
if (Number.isFinite(resetSeconds)) {
|
||||
const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
|
||||
if (delay !== undefined) return delay;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function normalizeToolCallId(id: string): string {
|
||||
return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
|
||||
}
|
||||
|
||||
export function convertMessages(
|
||||
messages: Message[],
|
||||
model: Model<AnthropicApi>,
|
||||
isOAuthToken: boolean,
|
||||
cacheControl?: { type: "ephemeral"; ttl?: "1h" },
|
||||
): MessageParam[] {
|
||||
const params: MessageParam[] = [];
|
||||
|
||||
const transformedMessages = transformMessages(messages, model, normalizeToolCallId);
|
||||
|
||||
for (let i = 0; i < transformedMessages.length; i++) {
|
||||
const msg = transformedMessages[i];
|
||||
|
||||
if (msg.role === "user") {
|
||||
if (typeof msg.content === "string") {
|
||||
if (msg.content.trim().length > 0) {
|
||||
params.push({
|
||||
role: "user",
|
||||
content: sanitizeSurrogates(msg.content),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
const blocks: ContentBlockParam[] = msg.content.map((item) => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(item.text),
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
type: "image",
|
||||
source: {
|
||||
type: "base64",
|
||||
media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
|
||||
data: item.data,
|
||||
},
|
||||
};
|
||||
}
|
||||
});
|
||||
let filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks;
|
||||
filteredBlocks = filteredBlocks.filter((b) => {
|
||||
if (b.type === "text") {
|
||||
return b.text.trim().length > 0;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
if (filteredBlocks.length === 0) continue;
|
||||
params.push({
|
||||
role: "user",
|
||||
content: filteredBlocks,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const blocks: ContentBlockParam[] = [];
|
||||
|
||||
for (const block of msg.content) {
|
||||
if (block.type === "text") {
|
||||
if (block.text.trim().length === 0) continue;
|
||||
blocks.push({
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(block.text),
|
||||
});
|
||||
} else if (block.type === "thinking") {
|
||||
if (block.redacted) {
|
||||
blocks.push({
|
||||
type: "redacted_thinking",
|
||||
data: block.thinkingSignature!,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if (block.thinking.trim().length === 0) continue;
|
||||
if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
|
||||
blocks.push({
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(block.thinking),
|
||||
});
|
||||
} else {
|
||||
blocks.push({
|
||||
type: "thinking",
|
||||
thinking: sanitizeSurrogates(block.thinking),
|
||||
signature: block.thinkingSignature,
|
||||
});
|
||||
}
|
||||
} else if (block.type === "toolCall") {
|
||||
blocks.push({
|
||||
type: "tool_use",
|
||||
id: block.id,
|
||||
name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
|
||||
input: block.arguments ?? {},
|
||||
});
|
||||
} else if (block.type === "serverToolUse") {
|
||||
blocks.push({
|
||||
type: "server_tool_use",
|
||||
id: block.id,
|
||||
name: block.name,
|
||||
input: block.input ?? {},
|
||||
} as any);
|
||||
} else if (block.type === "webSearchResult") {
|
||||
blocks.push({
|
||||
type: "web_search_tool_result",
|
||||
tool_use_id: block.toolUseId,
|
||||
content: block.content,
|
||||
} as any);
|
||||
}
|
||||
}
|
||||
if (blocks.length === 0) continue;
|
||||
params.push({
|
||||
role: "assistant",
|
||||
content: blocks,
|
||||
});
|
||||
} else if (msg.role === "toolResult") {
|
||||
const toolResults: ContentBlockParam[] = [];
|
||||
|
||||
toolResults.push({
|
||||
type: "tool_result",
|
||||
tool_use_id: msg.toolCallId,
|
||||
content: convertContentBlocks(msg.content),
|
||||
is_error: msg.isError,
|
||||
});
|
||||
|
||||
let j = i + 1;
|
||||
while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
|
||||
const nextMsg = transformedMessages[j] as ToolResultMessage;
|
||||
toolResults.push({
|
||||
type: "tool_result",
|
||||
tool_use_id: nextMsg.toolCallId,
|
||||
content: convertContentBlocks(nextMsg.content),
|
||||
is_error: nextMsg.isError,
|
||||
});
|
||||
j++;
|
||||
}
|
||||
|
||||
i = j - 1;
|
||||
|
||||
params.push({
|
||||
role: "user",
|
||||
content: toolResults,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (cacheControl && params.length > 0) {
|
||||
const lastMessage = params[params.length - 1];
|
||||
if (lastMessage.role === "user") {
|
||||
if (Array.isArray(lastMessage.content)) {
|
||||
const lastBlock = lastMessage.content[lastMessage.content.length - 1];
|
||||
if (
|
||||
lastBlock &&
|
||||
(lastBlock.type === "text" || lastBlock.type === "image" || lastBlock.type === "tool_result")
|
||||
) {
|
||||
(lastBlock as any).cache_control = cacheControl;
|
||||
}
|
||||
} else if (typeof lastMessage.content === "string") {
|
||||
lastMessage.content = [
|
||||
{
|
||||
type: "text",
|
||||
text: lastMessage.content,
|
||||
cache_control: cacheControl,
|
||||
},
|
||||
] as any;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] {
|
||||
if (!tools) return [];
|
||||
|
||||
return tools.map((tool) => {
|
||||
const jsonSchema = tool.parameters as any;
|
||||
|
||||
return {
|
||||
name: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name,
|
||||
description: tool.description,
|
||||
input_schema: {
|
||||
type: "object" as const,
|
||||
properties: jsonSchema.properties || {},
|
||||
required: jsonSchema.required || [],
|
||||
},
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export function buildParams(
|
||||
model: Model<AnthropicApi>,
|
||||
context: Context,
|
||||
isOAuthToken: boolean,
|
||||
options?: AnthropicOptions,
|
||||
): MessageCreateParamsStreaming {
|
||||
const { cacheControl } = getCacheControl(model.baseUrl, options?.cacheRetention);
|
||||
const apiModelId = model.id.replace(/\[.*\]$/, "");
|
||||
const params: MessageCreateParamsStreaming = {
|
||||
model: apiModelId,
|
||||
messages: convertMessages(context.messages, model, isOAuthToken, cacheControl),
|
||||
max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
|
||||
stream: true,
|
||||
};
|
||||
|
||||
if (isOAuthToken) {
|
||||
params.system = [
|
||||
{
|
||||
type: "text",
|
||||
text: "You are Claude Code, Anthropic's official CLI for Claude.",
|
||||
...(cacheControl ? { cache_control: cacheControl } : {}),
|
||||
},
|
||||
];
|
||||
if (context.systemPrompt) {
|
||||
params.system.push({
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(context.systemPrompt),
|
||||
...(cacheControl ? { cache_control: cacheControl } : {}),
|
||||
});
|
||||
}
|
||||
} else if (context.systemPrompt) {
|
||||
params.system = [
|
||||
{
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(context.systemPrompt),
|
||||
...(cacheControl ? { cache_control: cacheControl } : {}),
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
if (options?.temperature !== undefined && !options?.thinkingEnabled) {
|
||||
params.temperature = options.temperature;
|
||||
}
|
||||
|
||||
if (context.tools) {
|
||||
params.tools = convertTools(context.tools, isOAuthToken);
|
||||
}
|
||||
|
||||
if (options?.thinkingEnabled && model.reasoning) {
|
||||
if (supportsAdaptiveThinking(model.id)) {
|
||||
params.thinking = { type: "adaptive" };
|
||||
if (options.effort) {
|
||||
params.output_config = { effort: options.effort };
|
||||
}
|
||||
} else {
|
||||
params.thinking = {
|
||||
type: "enabled",
|
||||
budget_tokens: options.thinkingBudgetTokens || 1024,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (options?.metadata) {
|
||||
const userId = options.metadata.user_id;
|
||||
if (typeof userId === "string") {
|
||||
params.metadata = { user_id: userId };
|
||||
}
|
||||
}
|
||||
|
||||
if (options?.toolChoice) {
|
||||
if (typeof options.toolChoice === "string") {
|
||||
params.tool_choice = { type: options.toolChoice };
|
||||
} else {
|
||||
params.tool_choice = options.toolChoice;
|
||||
}
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
export function mapStopReason(reason: string): StopReason {
|
||||
switch (reason) {
|
||||
case "end_turn":
|
||||
return "stop";
|
||||
case "max_tokens":
|
||||
return "length";
|
||||
case "tool_use":
|
||||
return "toolUse";
|
||||
case "refusal":
|
||||
return "error";
|
||||
case "pause_turn":
|
||||
return "stop";
|
||||
case "stop_sequence":
|
||||
return "stop";
|
||||
case "sensitive":
|
||||
return "error";
|
||||
default:
|
||||
throw new Error(`Unhandled stop reason: ${reason}`);
|
||||
}
|
||||
}
|
||||
|
||||
export interface StreamAnthropicArgs {
|
||||
client: Anthropic;
|
||||
model: Model<AnthropicApi>;
|
||||
context: Context;
|
||||
isOAuthToken: boolean;
|
||||
options?: AnthropicOptions;
|
||||
AnthropicSdkClass?: typeof Anthropic;
|
||||
}
|
||||
|
||||
export function processAnthropicStream(
|
||||
stream: AssistantMessageEventStream,
|
||||
args: StreamAnthropicArgs,
|
||||
): void {
|
||||
const { client, model, context, isOAuthToken, options, AnthropicSdkClass } = args;
|
||||
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: model.api as Api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
try {
|
||||
let params = buildParams(model, context, isOAuthToken, options);
|
||||
const nextParams = await options?.onPayload?.(params, model);
|
||||
if (nextParams !== undefined) {
|
||||
params = nextParams as MessageCreateParamsStreaming;
|
||||
}
|
||||
const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
|
||||
stream.push({ type: "start", partial: output });
|
||||
|
||||
type Block = (ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | ServerToolUseContent | WebSearchResultContent) & { index: number };
|
||||
const blocks = output.content as Block[];
|
||||
|
||||
for await (const event of anthropicStream) {
|
||||
if (event.type === "message_start") {
|
||||
output.usage.input = event.message.usage.input_tokens || 0;
|
||||
output.usage.output = event.message.usage.output_tokens || 0;
|
||||
output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
|
||||
output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
|
||||
output.usage.totalTokens =
|
||||
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
||||
calculateCost(model, output.usage);
|
||||
} else if (event.type === "content_block_start") {
|
||||
if (event.content_block.type === "text") {
|
||||
const block: Block = {
|
||||
type: "text",
|
||||
text: "",
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output });
|
||||
} else if (event.content_block.type === "thinking") {
|
||||
const block: Block = {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: "",
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
|
||||
} else if (event.content_block.type === "redacted_thinking") {
|
||||
const block: Block = {
|
||||
type: "thinking",
|
||||
thinking: "[Reasoning redacted]",
|
||||
thinkingSignature: event.content_block.data,
|
||||
redacted: true,
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
|
||||
} else if (event.content_block.type === "tool_use") {
|
||||
const block: Block = {
|
||||
type: "toolCall",
|
||||
id: event.content_block.id,
|
||||
name: isOAuthToken
|
||||
? fromClaudeCodeName(event.content_block.name, context.tools)
|
||||
: event.content_block.name,
|
||||
arguments: (event.content_block.input as Record<string, any>) ?? {},
|
||||
partialJson: "",
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output });
|
||||
} else if ((event.content_block as any).type === "server_tool_use") {
|
||||
const serverBlock = event.content_block as any;
|
||||
const block: Block = {
|
||||
type: "serverToolUse",
|
||||
id: serverBlock.id,
|
||||
name: serverBlock.name,
|
||||
input: serverBlock.input,
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({ type: "server_tool_use", contentIndex: output.content.length - 1, partial: output });
|
||||
} else if ((event.content_block as any).type === "web_search_tool_result") {
|
||||
const resultBlock = event.content_block as any;
|
||||
const block: Block = {
|
||||
type: "webSearchResult",
|
||||
toolUseId: resultBlock.tool_use_id,
|
||||
content: resultBlock.content,
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({ type: "web_search_result", contentIndex: output.content.length - 1, partial: output });
|
||||
}
|
||||
} else if (event.type === "content_block_delta") {
|
||||
if (event.delta.type === "text_delta") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "text") {
|
||||
block.text += event.delta.text;
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: index,
|
||||
delta: event.delta.text,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "thinking_delta") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "thinking") {
|
||||
block.thinking += event.delta.thinking;
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: index,
|
||||
delta: event.delta.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "input_json_delta") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "toolCall") {
|
||||
block.partialJson += event.delta.partial_json;
|
||||
block.arguments = parseStreamingJson(block.partialJson);
|
||||
stream.push({
|
||||
type: "toolcall_delta",
|
||||
contentIndex: index,
|
||||
delta: event.delta.partial_json,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "signature_delta") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "thinking") {
|
||||
block.thinkingSignature = block.thinkingSignature || "";
|
||||
block.thinkingSignature += event.delta.signature;
|
||||
}
|
||||
}
|
||||
} else if (event.type === "content_block_stop") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block) {
|
||||
delete (block as any).index;
|
||||
if (block.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: index,
|
||||
content: block.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (block.type === "thinking") {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: index,
|
||||
content: block.thinking,
|
||||
partial: output,
|
||||
});
|
||||
} else if (block.type === "toolCall") {
|
||||
block.arguments = parseStreamingJson(block.partialJson);
|
||||
delete (block as any).partialJson;
|
||||
stream.push({
|
||||
type: "toolcall_end",
|
||||
contentIndex: index,
|
||||
toolCall: block,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (event.type === "message_delta") {
|
||||
if (event.delta.stop_reason) {
|
||||
output.stopReason = mapStopReason(event.delta.stop_reason);
|
||||
}
|
||||
if (event.usage.input_tokens != null) {
|
||||
output.usage.input = event.usage.input_tokens;
|
||||
}
|
||||
if (event.usage.output_tokens != null) {
|
||||
output.usage.output = event.usage.output_tokens;
|
||||
}
|
||||
if (event.usage.cache_read_input_tokens != null) {
|
||||
output.usage.cacheRead = event.usage.cache_read_input_tokens;
|
||||
}
|
||||
if (event.usage.cache_creation_input_tokens != null) {
|
||||
output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
|
||||
}
|
||||
output.usage.totalTokens =
|
||||
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
}
|
||||
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
||||
throw new Error("An unknown error occurred");
|
||||
}
|
||||
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
for (const block of output.content) delete (block as any).index;
|
||||
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
||||
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
|
||||
if (model.provider === "alibaba-coding-plan") {
|
||||
output.errorMessage = `[alibaba-coding-plan] ${output.errorMessage}`;
|
||||
}
|
||||
if (AnthropicSdkClass && error instanceof AnthropicSdkClass.APIError && error.headers) {
|
||||
const retryAfterMs = extractRetryAfterMs(error.headers, error.message);
|
||||
if (retryAfterMs !== undefined) {
|
||||
output.retryAfterMs = retryAfterMs;
|
||||
}
|
||||
}
|
||||
if (isTransientNetworkError(error)) {
|
||||
output.retryAfterMs = output.retryAfterMs ?? 5000;
|
||||
}
|
||||
stream.push({ type: "error", reason: output.stopReason, error: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
}
|
||||
130
packages/pi-ai/src/providers/anthropic-vertex.ts
Normal file
130
packages/pi-ai/src/providers/anthropic-vertex.ts
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
// Lazy-loaded: Anthropic Vertex SDK is imported on first use, not at startup.
|
||||
// This avoids penalizing users who don't use Anthropic Vertex models.
|
||||
import type Anthropic from "@anthropic-ai/sdk";
|
||||
import type { AnthropicVertex } from "@anthropic-ai/vertex-sdk";
|
||||
import { getEnvApiKey } from "../env-api-keys.js";
|
||||
import type {
|
||||
Context,
|
||||
Model,
|
||||
SimpleStreamOptions,
|
||||
StreamFunction,
|
||||
} from "../types.js";
|
||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
|
||||
import {
|
||||
type AnthropicOptions,
|
||||
mapThinkingLevelToEffort,
|
||||
processAnthropicStream,
|
||||
supportsAdaptiveThinking,
|
||||
} from "./anthropic-shared.js";
|
||||
|
||||
let _AnthropicVertexClass: typeof AnthropicVertex | undefined;
|
||||
let _AnthropicSdkClass: typeof Anthropic | undefined;
|
||||
|
||||
async function getAnthropicVertexClass(): Promise<typeof AnthropicVertex> {
|
||||
if (!_AnthropicVertexClass) {
|
||||
const mod = await import("@anthropic-ai/vertex-sdk");
|
||||
_AnthropicVertexClass = mod.AnthropicVertex;
|
||||
}
|
||||
return _AnthropicVertexClass;
|
||||
}
|
||||
|
||||
async function getAnthropicSdkClass(): Promise<typeof Anthropic> {
|
||||
if (!_AnthropicSdkClass) {
|
||||
const mod = await import("@anthropic-ai/sdk");
|
||||
_AnthropicSdkClass = mod.default;
|
||||
}
|
||||
return _AnthropicSdkClass;
|
||||
}
|
||||
|
||||
function resolveProjectId(): string {
|
||||
const projectId = process.env.ANTHROPIC_VERTEX_PROJECT_ID
|
||||
|| process.env.GOOGLE_CLOUD_PROJECT
|
||||
|| process.env.GCLOUD_PROJECT;
|
||||
if (!projectId) {
|
||||
throw new Error(
|
||||
"Anthropic Vertex requires a project ID. Set ANTHROPIC_VERTEX_PROJECT_ID, GOOGLE_CLOUD_PROJECT, or GCLOUD_PROJECT.",
|
||||
);
|
||||
}
|
||||
return projectId;
|
||||
}
|
||||
|
||||
function resolveRegion(): string {
|
||||
return process.env.CLOUD_ML_REGION
|
||||
|| process.env.GOOGLE_CLOUD_LOCATION
|
||||
|| "us-central1";
|
||||
}
|
||||
|
||||
async function createVertexClient(): Promise<AnthropicVertex> {
|
||||
const AnthropicVertexClass = await getAnthropicVertexClass();
|
||||
const projectId = resolveProjectId();
|
||||
const region = resolveRegion();
|
||||
|
||||
return new AnthropicVertexClass({
|
||||
projectId,
|
||||
region,
|
||||
});
|
||||
}
|
||||
|
||||
export const streamAnthropicVertex: StreamFunction<"anthropic-vertex", AnthropicOptions> = (
|
||||
model: Model<"anthropic-vertex">,
|
||||
context: Context,
|
||||
options?: AnthropicOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
(async () => {
|
||||
const client = await createVertexClient();
|
||||
const AnthropicSdk = await getAnthropicSdkClass();
|
||||
|
||||
processAnthropicStream(stream, {
|
||||
client: client as unknown as Anthropic,
|
||||
model,
|
||||
context,
|
||||
isOAuthToken: false,
|
||||
options,
|
||||
AnthropicSdkClass: AnthropicSdk,
|
||||
});
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
export const streamSimpleAnthropicVertex: StreamFunction<"anthropic-vertex", SimpleStreamOptions> = (
|
||||
model: Model<"anthropic-vertex">,
|
||||
context: Context,
|
||||
options?: SimpleStreamOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
|
||||
if (!apiKey) {
|
||||
throw new Error(`No API key found for provider: ${model.provider}. Set ANTHROPIC_VERTEX_PROJECT_ID to use Claude on Vertex AI.`);
|
||||
}
|
||||
|
||||
const base = buildBaseOptions(model, options, apiKey);
|
||||
if (!options?.reasoning) {
|
||||
return streamAnthropicVertex(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);
|
||||
}
|
||||
|
||||
if (supportsAdaptiveThinking(model.id)) {
|
||||
const effort = mapThinkingLevelToEffort(options.reasoning, model.id);
|
||||
return streamAnthropicVertex(model, context, {
|
||||
...base,
|
||||
thinkingEnabled: true,
|
||||
effort,
|
||||
} satisfies AnthropicOptions);
|
||||
}
|
||||
|
||||
const adjusted = adjustMaxTokensForThinking(
|
||||
base.maxTokens || 0,
|
||||
model.maxTokens,
|
||||
options.reasoning,
|
||||
options.thinkingBudgets,
|
||||
);
|
||||
|
||||
return streamAnthropicVertex(model, context, {
|
||||
...base,
|
||||
maxTokens: adjusted.maxTokens,
|
||||
thinkingEnabled: true,
|
||||
thinkingBudgetTokens: adjusted.thinkingBudget,
|
||||
} satisfies AnthropicOptions);
|
||||
};
|
||||
|
|
@ -1,40 +1,29 @@
|
|||
// Lazy-loaded: Anthropic SDK (~500ms) is imported on first use, not at startup.
|
||||
// This avoids penalizing users who don't use Anthropic models.
|
||||
import type Anthropic from "@anthropic-ai/sdk";
|
||||
import type {
|
||||
ContentBlockParam,
|
||||
MessageCreateParamsStreaming,
|
||||
MessageParam,
|
||||
} from "@anthropic-ai/sdk/resources/messages.js";
|
||||
import { getEnvApiKey } from "../env-api-keys.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
CacheRetention,
|
||||
Context,
|
||||
ImageContent,
|
||||
Message,
|
||||
Model,
|
||||
ServerToolUseContent,
|
||||
SimpleStreamOptions,
|
||||
StopReason,
|
||||
StreamFunction,
|
||||
StreamOptions,
|
||||
TextContent,
|
||||
ThinkingContent,
|
||||
Tool,
|
||||
ToolCall,
|
||||
ToolResultMessage,
|
||||
WebSearchResultContent,
|
||||
} from "../types.js";
|
||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
|
||||
import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
|
||||
import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
import {
|
||||
type AnthropicEffort,
|
||||
type AnthropicOptions,
|
||||
extractRetryAfterMs,
|
||||
mapThinkingLevelToEffort,
|
||||
processAnthropicStream,
|
||||
supportsAdaptiveThinking,
|
||||
} from "./anthropic-shared.js";
|
||||
|
||||
// Re-export types used by other modules
|
||||
export type { AnthropicEffort, AnthropicOptions };
|
||||
export { extractRetryAfterMs };
|
||||
|
||||
let _AnthropicClass: typeof Anthropic | undefined;
|
||||
async function getAnthropicClass(): Promise<typeof Anthropic> {
|
||||
|
|
@ -45,154 +34,9 @@ async function getAnthropicClass(): Promise<typeof Anthropic> {
|
|||
return _AnthropicClass;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve cache retention preference.
|
||||
* Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility.
|
||||
*/
|
||||
function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention {
|
||||
if (cacheRetention) {
|
||||
return cacheRetention;
|
||||
}
|
||||
if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") {
|
||||
return "long";
|
||||
}
|
||||
return "short";
|
||||
}
|
||||
|
||||
function getCacheControl(
|
||||
baseUrl: string,
|
||||
cacheRetention?: CacheRetention,
|
||||
): { retention: CacheRetention; cacheControl?: { type: "ephemeral"; ttl?: "1h" } } {
|
||||
const retention = resolveCacheRetention(cacheRetention);
|
||||
if (retention === "none") {
|
||||
return { retention };
|
||||
}
|
||||
const ttl = retention === "long" && baseUrl.includes("api.anthropic.com") ? "1h" : undefined;
|
||||
return {
|
||||
retention,
|
||||
cacheControl: { type: "ephemeral", ...(ttl && { ttl }) },
|
||||
};
|
||||
}
|
||||
|
||||
// Stealth mode: Mimic Claude Code's tool naming exactly
|
||||
const claudeCodeVersion = "2.1.62";
|
||||
|
||||
// Claude Code 2.x tool names (canonical casing)
|
||||
// Source: https://cchistory.mariozechner.at/data/prompts-2.1.11.md
|
||||
// To update: https://github.com/badlogic/cchistory
|
||||
const claudeCodeTools = [
|
||||
"Read",
|
||||
"Write",
|
||||
"Edit",
|
||||
"Bash",
|
||||
"Grep",
|
||||
"Glob",
|
||||
"AskUserQuestion",
|
||||
"EnterPlanMode",
|
||||
"ExitPlanMode",
|
||||
"KillShell",
|
||||
"NotebookEdit",
|
||||
"Skill",
|
||||
"Task",
|
||||
"TaskOutput",
|
||||
"TodoWrite",
|
||||
"WebFetch",
|
||||
"WebSearch",
|
||||
];
|
||||
|
||||
const ccToolLookup = new Map(claudeCodeTools.map((t) => [t.toLowerCase(), t]));
|
||||
|
||||
// Convert tool name to CC canonical casing if it matches (case-insensitive)
|
||||
const toClaudeCodeName = (name: string) => ccToolLookup.get(name.toLowerCase()) ?? name;
|
||||
const fromClaudeCodeName = (name: string, tools?: Tool[]) => {
|
||||
if (tools && tools.length > 0) {
|
||||
const lowerName = name.toLowerCase();
|
||||
const matchedTool = tools.find((tool) => tool.name.toLowerCase() === lowerName);
|
||||
if (matchedTool) return matchedTool.name;
|
||||
}
|
||||
return name;
|
||||
};
|
||||
|
||||
/**
|
||||
* Convert content blocks to Anthropic API format
|
||||
*/
|
||||
function convertContentBlocks(content: (TextContent | ImageContent)[]):
|
||||
| string
|
||||
| Array<
|
||||
| { type: "text"; text: string }
|
||||
| {
|
||||
type: "image";
|
||||
source: {
|
||||
type: "base64";
|
||||
media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp";
|
||||
data: string;
|
||||
};
|
||||
}
|
||||
> {
|
||||
// If only text blocks, return as concatenated string for simplicity
|
||||
const hasImages = content.some((c) => c.type === "image");
|
||||
if (!hasImages) {
|
||||
return sanitizeSurrogates(content.map((c) => (c as TextContent).text).join("\n"));
|
||||
}
|
||||
|
||||
// If we have images, convert to content block array
|
||||
const blocks = content.map((block) => {
|
||||
if (block.type === "text") {
|
||||
return {
|
||||
type: "text" as const,
|
||||
text: sanitizeSurrogates(block.text),
|
||||
};
|
||||
}
|
||||
return {
|
||||
type: "image" as const,
|
||||
source: {
|
||||
type: "base64" as const,
|
||||
media_type: block.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
|
||||
data: block.data,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
// If only images (no text), add placeholder text block
|
||||
const hasText = blocks.some((b) => b.type === "text");
|
||||
if (!hasText) {
|
||||
blocks.unshift({
|
||||
type: "text" as const,
|
||||
text: "(see attached image)",
|
||||
});
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
export type AnthropicEffort = "low" | "medium" | "high" | "max";
|
||||
|
||||
export interface AnthropicOptions extends StreamOptions {
|
||||
/**
|
||||
* Enable extended thinking.
|
||||
* For Opus 4.6 and Sonnet 4.6: uses adaptive thinking (model decides when/how much to think).
|
||||
* For older models: uses budget-based thinking with thinkingBudgetTokens.
|
||||
*/
|
||||
thinkingEnabled?: boolean;
|
||||
/**
|
||||
* Token budget for extended thinking (older models only).
|
||||
* Ignored for Opus 4.6 and Sonnet 4.6, which use adaptive thinking.
|
||||
*/
|
||||
thinkingBudgetTokens?: number;
|
||||
/**
|
||||
* Effort level for adaptive thinking (Opus 4.6 and Sonnet 4.6).
|
||||
* Controls how much thinking Claude allocates:
|
||||
* - "max": Always thinks with no constraints (Opus 4.6 only)
|
||||
* - "high": Always thinks, deep reasoning (default)
|
||||
* - "medium": Moderate thinking, may skip for simple queries
|
||||
* - "low": Minimal thinking, skips for simple tasks
|
||||
* Ignored for older models.
|
||||
*/
|
||||
effort?: AnthropicEffort;
|
||||
interleavedThinking?: boolean;
|
||||
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
|
||||
}
|
||||
|
||||
function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]): Record<string, string> {
|
||||
const merged: Record<string, string> = {};
|
||||
for (const headers of headerSources) {
|
||||
|
|
@ -203,410 +47,6 @@ function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]):
|
|||
return merged;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect transient network errors that are likely to succeed on retry.
|
||||
* Covers WebSocket disconnects (Tailscale, VPN), TCP resets, and DNS failures.
|
||||
*/
|
||||
function isTransientNetworkError(error: unknown): boolean {
|
||||
if (!(error instanceof Error)) return false;
|
||||
const msg = error.message.toLowerCase();
|
||||
const code = (error as NodeJS.ErrnoException).code;
|
||||
return (
|
||||
code === 'ECONNRESET' ||
|
||||
code === 'EPIPE' ||
|
||||
code === 'ETIMEDOUT' ||
|
||||
code === 'ENOTFOUND' ||
|
||||
code === 'EAI_AGAIN' ||
|
||||
msg.includes('connector_closed') ||
|
||||
msg.includes('socket hang up') ||
|
||||
msg.includes('network') ||
|
||||
msg.includes('connection') && msg.includes('closed') ||
|
||||
msg.includes('fetch failed')
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract retry delay from Anthropic error response headers (in milliseconds).
|
||||
* Checks: retry-after (seconds or RFC date), x-ratelimit-reset-requests, x-ratelimit-reset-tokens.
|
||||
* Returns undefined if no valid delay is found or if the delay is in the past.
|
||||
*/
|
||||
function extractRetryAfterMs(headers: Headers | { get(name: string): string | null }, errorText = ""): number | undefined {
|
||||
const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
|
||||
|
||||
const retryAfter = headers.get("retry-after");
|
||||
if (retryAfter) {
|
||||
const seconds = Number(retryAfter);
|
||||
if (Number.isFinite(seconds)) {
|
||||
const delay = normalizeDelay(seconds * 1000);
|
||||
if (delay !== undefined) return delay;
|
||||
}
|
||||
const asDate = new Date(retryAfter).getTime();
|
||||
if (!Number.isNaN(asDate)) {
|
||||
const delay = normalizeDelay(asDate - Date.now());
|
||||
if (delay !== undefined) return delay;
|
||||
}
|
||||
}
|
||||
|
||||
// x-ratelimit-reset-requests / x-ratelimit-reset-tokens are Unix timestamps (seconds)
|
||||
for (const header of ["x-ratelimit-reset-requests", "x-ratelimit-reset-tokens"]) {
|
||||
const value = headers.get(header);
|
||||
if (value) {
|
||||
const resetSeconds = Number(value);
|
||||
if (Number.isFinite(resetSeconds)) {
|
||||
const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
|
||||
if (delay !== undefined) return delay;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOptions> = (
|
||||
model: Model<"anthropic-messages">,
|
||||
context: Context,
|
||||
options?: AnthropicOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: model.api as Api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
try {
|
||||
const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
|
||||
|
||||
let copilotDynamicHeaders: Record<string, string> | undefined;
|
||||
if (model.provider === "github-copilot") {
|
||||
const hasImages = hasCopilotVisionInput(context.messages);
|
||||
copilotDynamicHeaders = buildCopilotDynamicHeaders({
|
||||
messages: context.messages,
|
||||
hasImages,
|
||||
});
|
||||
}
|
||||
|
||||
const { client, isOAuthToken } = await createClient(
|
||||
model,
|
||||
apiKey,
|
||||
options?.interleavedThinking ?? true,
|
||||
options?.headers,
|
||||
copilotDynamicHeaders,
|
||||
);
|
||||
let params = buildParams(model, context, isOAuthToken, options);
|
||||
const nextParams = await options?.onPayload?.(params, model);
|
||||
if (nextParams !== undefined) {
|
||||
params = nextParams as MessageCreateParamsStreaming;
|
||||
}
|
||||
const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
|
||||
stream.push({ type: "start", partial: output });
|
||||
|
||||
type Block = (ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | ServerToolUseContent | WebSearchResultContent) & { index: number };
|
||||
const blocks = output.content as Block[];
|
||||
|
||||
for await (const event of anthropicStream) {
|
||||
if (event.type === "message_start") {
|
||||
// Capture initial token usage from message_start event
|
||||
// This ensures we have input token counts even if the stream is aborted early
|
||||
output.usage.input = event.message.usage.input_tokens || 0;
|
||||
output.usage.output = event.message.usage.output_tokens || 0;
|
||||
output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
|
||||
output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
|
||||
// Anthropic doesn't provide total_tokens, compute from components
|
||||
output.usage.totalTokens =
|
||||
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
||||
calculateCost(model, output.usage);
|
||||
} else if (event.type === "content_block_start") {
|
||||
if (event.content_block.type === "text") {
|
||||
const block: Block = {
|
||||
type: "text",
|
||||
text: "",
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output });
|
||||
} else if (event.content_block.type === "thinking") {
|
||||
const block: Block = {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: "",
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
|
||||
} else if (event.content_block.type === "redacted_thinking") {
|
||||
const block: Block = {
|
||||
type: "thinking",
|
||||
thinking: "[Reasoning redacted]",
|
||||
thinkingSignature: event.content_block.data,
|
||||
redacted: true,
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
|
||||
} else if (event.content_block.type === "tool_use") {
|
||||
const block: Block = {
|
||||
type: "toolCall",
|
||||
id: event.content_block.id,
|
||||
name: isOAuthToken
|
||||
? fromClaudeCodeName(event.content_block.name, context.tools)
|
||||
: event.content_block.name,
|
||||
arguments: (event.content_block.input as Record<string, any>) ?? {},
|
||||
partialJson: "",
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output });
|
||||
} else if ((event.content_block as any).type === "server_tool_use") {
|
||||
const serverBlock = event.content_block as any;
|
||||
const block: Block = {
|
||||
type: "serverToolUse",
|
||||
id: serverBlock.id,
|
||||
name: serverBlock.name,
|
||||
input: serverBlock.input,
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({ type: "server_tool_use", contentIndex: output.content.length - 1, partial: output });
|
||||
} else if ((event.content_block as any).type === "web_search_tool_result") {
|
||||
const resultBlock = event.content_block as any;
|
||||
const block: Block = {
|
||||
type: "webSearchResult",
|
||||
toolUseId: resultBlock.tool_use_id,
|
||||
content: resultBlock.content,
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({ type: "web_search_result", contentIndex: output.content.length - 1, partial: output });
|
||||
}
|
||||
} else if (event.type === "content_block_delta") {
|
||||
if (event.delta.type === "text_delta") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "text") {
|
||||
block.text += event.delta.text;
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: index,
|
||||
delta: event.delta.text,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "thinking_delta") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "thinking") {
|
||||
block.thinking += event.delta.thinking;
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: index,
|
||||
delta: event.delta.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "input_json_delta") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "toolCall") {
|
||||
block.partialJson += event.delta.partial_json;
|
||||
block.arguments = parseStreamingJson(block.partialJson);
|
||||
stream.push({
|
||||
type: "toolcall_delta",
|
||||
contentIndex: index,
|
||||
delta: event.delta.partial_json,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "signature_delta") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "thinking") {
|
||||
block.thinkingSignature = block.thinkingSignature || "";
|
||||
block.thinkingSignature += event.delta.signature;
|
||||
}
|
||||
}
|
||||
} else if (event.type === "content_block_stop") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block) {
|
||||
delete (block as any).index;
|
||||
if (block.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: index,
|
||||
content: block.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (block.type === "thinking") {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: index,
|
||||
content: block.thinking,
|
||||
partial: output,
|
||||
});
|
||||
} else if (block.type === "toolCall") {
|
||||
block.arguments = parseStreamingJson(block.partialJson);
|
||||
delete (block as any).partialJson;
|
||||
stream.push({
|
||||
type: "toolcall_end",
|
||||
contentIndex: index,
|
||||
toolCall: block,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
// serverToolUse and webSearchResult blocks just need index cleanup (already emitted on start)
|
||||
}
|
||||
} else if (event.type === "message_delta") {
|
||||
if (event.delta.stop_reason) {
|
||||
output.stopReason = mapStopReason(event.delta.stop_reason);
|
||||
}
|
||||
// Only update usage fields if present (not null).
|
||||
// Preserves input_tokens from message_start when proxies omit it in message_delta.
|
||||
if (event.usage.input_tokens != null) {
|
||||
output.usage.input = event.usage.input_tokens;
|
||||
}
|
||||
if (event.usage.output_tokens != null) {
|
||||
output.usage.output = event.usage.output_tokens;
|
||||
}
|
||||
if (event.usage.cache_read_input_tokens != null) {
|
||||
output.usage.cacheRead = event.usage.cache_read_input_tokens;
|
||||
}
|
||||
if (event.usage.cache_creation_input_tokens != null) {
|
||||
output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
|
||||
}
|
||||
// Anthropic doesn't provide total_tokens, compute from components
|
||||
output.usage.totalTokens =
|
||||
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
}
|
||||
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
||||
throw new Error("An unknown error occurred");
|
||||
}
|
||||
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
for (const block of output.content) delete (block as any).index;
|
||||
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
||||
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
|
||||
if (model.provider === "alibaba-coding-plan") {
|
||||
output.errorMessage = `[alibaba-coding-plan] ${output.errorMessage}`;
|
||||
}
|
||||
const AnthropicSdk = _AnthropicClass;
|
||||
if (AnthropicSdk && error instanceof AnthropicSdk.APIError && error.headers) {
|
||||
const retryAfterMs = extractRetryAfterMs(error.headers, error.message);
|
||||
if (retryAfterMs !== undefined) {
|
||||
output.retryAfterMs = retryAfterMs;
|
||||
}
|
||||
}
|
||||
// Mark transient network errors as retriable so auto-mode can
|
||||
// detect them and retry instead of stopping (#833).
|
||||
if (isTransientNetworkError(error)) {
|
||||
output.retryAfterMs = output.retryAfterMs ?? 5000;
|
||||
}
|
||||
stream.push({ type: "error", reason: output.stopReason, error: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
/**
|
||||
* Check if a model supports adaptive thinking (Opus 4.6 and Sonnet 4.6)
|
||||
*/
|
||||
function supportsAdaptiveThinking(modelId: string): boolean {
|
||||
// Opus 4.6 and Sonnet 4.6 model IDs (with or without date suffix)
|
||||
return (
|
||||
modelId.includes("opus-4-6") ||
|
||||
modelId.includes("opus-4.6") ||
|
||||
modelId.includes("sonnet-4-6") ||
|
||||
modelId.includes("sonnet-4.6")
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Map ThinkingLevel to Anthropic effort levels for adaptive thinking.
|
||||
* Note: effort "max" is only valid on Opus 4.6.
|
||||
*/
|
||||
function mapThinkingLevelToEffort(level: SimpleStreamOptions["reasoning"], modelId: string): AnthropicEffort {
|
||||
switch (level) {
|
||||
case "minimal":
|
||||
return "low";
|
||||
case "low":
|
||||
return "low";
|
||||
case "medium":
|
||||
return "medium";
|
||||
case "high":
|
||||
return "high";
|
||||
case "xhigh":
|
||||
return modelId.includes("opus-4-6") || modelId.includes("opus-4.6") ? "max" : "high";
|
||||
default:
|
||||
return "high";
|
||||
}
|
||||
}
|
||||
|
||||
export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleStreamOptions> = (
|
||||
model: Model<"anthropic-messages">,
|
||||
context: Context,
|
||||
options?: SimpleStreamOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
|
||||
if (!apiKey) {
|
||||
throw new Error(`No API key for provider: ${model.provider}`);
|
||||
}
|
||||
|
||||
const base = buildBaseOptions(model, options, apiKey);
|
||||
if (!options?.reasoning) {
|
||||
return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);
|
||||
}
|
||||
|
||||
// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level
|
||||
// For older models: use budget-based thinking
|
||||
if (supportsAdaptiveThinking(model.id)) {
|
||||
const effort = mapThinkingLevelToEffort(options.reasoning, model.id);
|
||||
return streamAnthropic(model, context, {
|
||||
...base,
|
||||
thinkingEnabled: true,
|
||||
effort,
|
||||
} satisfies AnthropicOptions);
|
||||
}
|
||||
|
||||
const adjusted = adjustMaxTokensForThinking(
|
||||
base.maxTokens || 0,
|
||||
model.maxTokens,
|
||||
options.reasoning,
|
||||
options.thinkingBudgets,
|
||||
);
|
||||
|
||||
return streamAnthropic(model, context, {
|
||||
...base,
|
||||
maxTokens: adjusted.maxTokens,
|
||||
thinkingEnabled: true,
|
||||
thinkingBudgetTokens: adjusted.thinkingBudget,
|
||||
} satisfies AnthropicOptions);
|
||||
};
|
||||
|
||||
function isOAuthToken(apiKey: string): boolean {
|
||||
return apiKey.includes("sk-ant-oat");
|
||||
}
|
||||
|
|
@ -702,315 +142,83 @@ async function createClient(
|
|||
return { client, isOAuthToken: false };
|
||||
}
|
||||
|
||||
function buildParams(
|
||||
export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOptions> = (
|
||||
model: Model<"anthropic-messages">,
|
||||
context: Context,
|
||||
isOAuthToken: boolean,
|
||||
options?: AnthropicOptions,
|
||||
): MessageCreateParamsStreaming {
|
||||
const { cacheControl } = getCacheControl(model.baseUrl, options?.cacheRetention);
|
||||
// Strip variant suffixes like [1m] from model ID before sending to the API.
|
||||
// The API only accepts the base model ID (e.g. "claude-opus-4-6"),
|
||||
// not internal variant identifiers (e.g. "claude-opus-4-6[1m]").
|
||||
// This applies to all auth methods — API keys, OAuth, and Copilot alike.
|
||||
const apiModelId = model.id.replace(/\[.*\]$/, "");
|
||||
const params: MessageCreateParamsStreaming = {
|
||||
model: apiModelId,
|
||||
messages: convertMessages(context.messages, model, isOAuthToken, cacheControl),
|
||||
max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
|
||||
stream: true,
|
||||
};
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
// For OAuth tokens, we MUST include Claude Code identity
|
||||
if (isOAuthToken) {
|
||||
params.system = [
|
||||
{
|
||||
type: "text",
|
||||
text: "You are Claude Code, Anthropic's official CLI for Claude.",
|
||||
...(cacheControl ? { cache_control: cacheControl } : {}),
|
||||
},
|
||||
];
|
||||
if (context.systemPrompt) {
|
||||
params.system.push({
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(context.systemPrompt),
|
||||
...(cacheControl ? { cache_control: cacheControl } : {}),
|
||||
(async () => {
|
||||
const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
|
||||
|
||||
let copilotDynamicHeaders: Record<string, string> | undefined;
|
||||
if (model.provider === "github-copilot") {
|
||||
const hasImages = hasCopilotVisionInput(context.messages);
|
||||
copilotDynamicHeaders = buildCopilotDynamicHeaders({
|
||||
messages: context.messages,
|
||||
hasImages,
|
||||
});
|
||||
}
|
||||
} else if (context.systemPrompt) {
|
||||
// Add cache control to system prompt for non-OAuth tokens
|
||||
params.system = [
|
||||
{
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(context.systemPrompt),
|
||||
...(cacheControl ? { cache_control: cacheControl } : {}),
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
// Temperature is incompatible with extended thinking (adaptive or budget-based).
|
||||
if (options?.temperature !== undefined && !options?.thinkingEnabled) {
|
||||
params.temperature = options.temperature;
|
||||
}
|
||||
const { client, isOAuthToken: isOAuth } = await createClient(
|
||||
model,
|
||||
apiKey,
|
||||
options?.interleavedThinking ?? true,
|
||||
options?.headers,
|
||||
copilotDynamicHeaders,
|
||||
);
|
||||
|
||||
if (context.tools) {
|
||||
params.tools = convertTools(context.tools, isOAuthToken);
|
||||
}
|
||||
processAnthropicStream(stream, {
|
||||
client,
|
||||
model,
|
||||
context,
|
||||
isOAuthToken: isOAuth,
|
||||
options,
|
||||
AnthropicSdkClass: _AnthropicClass,
|
||||
});
|
||||
})();
|
||||
|
||||
// Configure thinking mode: adaptive (Opus 4.6 and Sonnet 4.6) or budget-based (older models)
|
||||
if (options?.thinkingEnabled && model.reasoning) {
|
||||
if (supportsAdaptiveThinking(model.id)) {
|
||||
// Adaptive thinking: Claude decides when and how much to think
|
||||
params.thinking = { type: "adaptive" };
|
||||
if (options.effort) {
|
||||
params.output_config = { effort: options.effort };
|
||||
}
|
||||
} else {
|
||||
// Budget-based thinking for older models
|
||||
params.thinking = {
|
||||
type: "enabled",
|
||||
budget_tokens: options.thinkingBudgetTokens || 1024,
|
||||
};
|
||||
}
|
||||
}
|
||||
return stream;
|
||||
};
|
||||
|
||||
if (options?.metadata) {
|
||||
const userId = options.metadata.user_id;
|
||||
if (typeof userId === "string") {
|
||||
params.metadata = { user_id: userId };
|
||||
}
|
||||
}
|
||||
|
||||
if (options?.toolChoice) {
|
||||
if (typeof options.toolChoice === "string") {
|
||||
params.tool_choice = { type: options.toolChoice };
|
||||
} else {
|
||||
params.tool_choice = options.toolChoice;
|
||||
}
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
// Normalize tool call IDs to match Anthropic's required pattern and length
|
||||
function normalizeToolCallId(id: string): string {
|
||||
return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
|
||||
}
|
||||
|
||||
function convertMessages(
|
||||
messages: Message[],
|
||||
export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleStreamOptions> = (
|
||||
model: Model<"anthropic-messages">,
|
||||
isOAuthToken: boolean,
|
||||
cacheControl?: { type: "ephemeral"; ttl?: "1h" },
|
||||
): MessageParam[] {
|
||||
const params: MessageParam[] = [];
|
||||
|
||||
// Transform messages for cross-provider compatibility
|
||||
const transformedMessages = transformMessages(messages, model, normalizeToolCallId);
|
||||
|
||||
for (let i = 0; i < transformedMessages.length; i++) {
|
||||
const msg = transformedMessages[i];
|
||||
|
||||
if (msg.role === "user") {
|
||||
if (typeof msg.content === "string") {
|
||||
if (msg.content.trim().length > 0) {
|
||||
params.push({
|
||||
role: "user",
|
||||
content: sanitizeSurrogates(msg.content),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
const blocks: ContentBlockParam[] = msg.content.map((item) => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(item.text),
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
type: "image",
|
||||
source: {
|
||||
type: "base64",
|
||||
media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
|
||||
data: item.data,
|
||||
},
|
||||
};
|
||||
}
|
||||
});
|
||||
let filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks;
|
||||
filteredBlocks = filteredBlocks.filter((b) => {
|
||||
if (b.type === "text") {
|
||||
return b.text.trim().length > 0;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
if (filteredBlocks.length === 0) continue;
|
||||
params.push({
|
||||
role: "user",
|
||||
content: filteredBlocks,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const blocks: ContentBlockParam[] = [];
|
||||
|
||||
for (const block of msg.content) {
|
||||
if (block.type === "text") {
|
||||
if (block.text.trim().length === 0) continue;
|
||||
blocks.push({
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(block.text),
|
||||
});
|
||||
} else if (block.type === "thinking") {
|
||||
// Redacted thinking: pass the opaque payload back as redacted_thinking
|
||||
if (block.redacted) {
|
||||
blocks.push({
|
||||
type: "redacted_thinking",
|
||||
data: block.thinkingSignature!,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if (block.thinking.trim().length === 0) continue;
|
||||
// If thinking signature is missing/empty (e.g., from aborted stream),
|
||||
// convert to plain text block without <thinking> tags to avoid API rejection
|
||||
// and prevent Claude from mimicking the tags in responses
|
||||
if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
|
||||
blocks.push({
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(block.thinking),
|
||||
});
|
||||
} else {
|
||||
blocks.push({
|
||||
type: "thinking",
|
||||
thinking: sanitizeSurrogates(block.thinking),
|
||||
signature: block.thinkingSignature,
|
||||
});
|
||||
}
|
||||
} else if (block.type === "toolCall") {
|
||||
blocks.push({
|
||||
type: "tool_use",
|
||||
id: block.id,
|
||||
name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
|
||||
input: block.arguments ?? {},
|
||||
});
|
||||
} else if (block.type === "serverToolUse") {
|
||||
blocks.push({
|
||||
type: "server_tool_use",
|
||||
id: block.id,
|
||||
name: block.name,
|
||||
input: block.input ?? {},
|
||||
} as any);
|
||||
} else if (block.type === "webSearchResult") {
|
||||
blocks.push({
|
||||
type: "web_search_tool_result",
|
||||
tool_use_id: block.toolUseId,
|
||||
content: block.content,
|
||||
} as any);
|
||||
}
|
||||
}
|
||||
if (blocks.length === 0) continue;
|
||||
params.push({
|
||||
role: "assistant",
|
||||
content: blocks,
|
||||
});
|
||||
} else if (msg.role === "toolResult") {
|
||||
// Collect all consecutive toolResult messages, needed for z.ai Anthropic endpoint
|
||||
const toolResults: ContentBlockParam[] = [];
|
||||
|
||||
// Add the current tool result
|
||||
toolResults.push({
|
||||
type: "tool_result",
|
||||
tool_use_id: msg.toolCallId,
|
||||
content: convertContentBlocks(msg.content),
|
||||
is_error: msg.isError,
|
||||
});
|
||||
|
||||
// Look ahead for consecutive toolResult messages
|
||||
let j = i + 1;
|
||||
while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
|
||||
const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult
|
||||
toolResults.push({
|
||||
type: "tool_result",
|
||||
tool_use_id: nextMsg.toolCallId,
|
||||
content: convertContentBlocks(nextMsg.content),
|
||||
is_error: nextMsg.isError,
|
||||
});
|
||||
j++;
|
||||
}
|
||||
|
||||
// Skip the messages we've already processed
|
||||
i = j - 1;
|
||||
|
||||
// Add a single user message with all tool results
|
||||
params.push({
|
||||
role: "user",
|
||||
content: toolResults,
|
||||
});
|
||||
}
|
||||
context: Context,
|
||||
options?: SimpleStreamOptions,
|
||||
): AssistantMessageEventStream => {
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
|
||||
if (!apiKey) {
|
||||
throw new Error(`No API key for provider: ${model.provider}`);
|
||||
}
|
||||
|
||||
// Add cache_control to the last user message to cache conversation history
|
||||
if (cacheControl && params.length > 0) {
|
||||
const lastMessage = params[params.length - 1];
|
||||
if (lastMessage.role === "user") {
|
||||
if (Array.isArray(lastMessage.content)) {
|
||||
const lastBlock = lastMessage.content[lastMessage.content.length - 1];
|
||||
if (
|
||||
lastBlock &&
|
||||
(lastBlock.type === "text" || lastBlock.type === "image" || lastBlock.type === "tool_result")
|
||||
) {
|
||||
(lastBlock as any).cache_control = cacheControl;
|
||||
}
|
||||
} else if (typeof lastMessage.content === "string") {
|
||||
lastMessage.content = [
|
||||
{
|
||||
type: "text",
|
||||
text: lastMessage.content,
|
||||
cache_control: cacheControl,
|
||||
},
|
||||
] as any;
|
||||
}
|
||||
}
|
||||
const base = buildBaseOptions(model, options, apiKey);
|
||||
if (!options?.reasoning) {
|
||||
return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] {
|
||||
if (!tools) return [];
|
||||
|
||||
return tools.map((tool) => {
|
||||
const jsonSchema = tool.parameters as any; // TypeBox already generates JSON Schema
|
||||
|
||||
return {
|
||||
name: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name,
|
||||
description: tool.description,
|
||||
input_schema: {
|
||||
type: "object" as const,
|
||||
properties: jsonSchema.properties || {},
|
||||
required: jsonSchema.required || [],
|
||||
},
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function mapStopReason(reason: Anthropic.Messages.StopReason | string): StopReason {
|
||||
switch (reason) {
|
||||
case "end_turn":
|
||||
return "stop";
|
||||
case "max_tokens":
|
||||
return "length";
|
||||
case "tool_use":
|
||||
return "toolUse";
|
||||
case "refusal":
|
||||
return "error";
|
||||
case "pause_turn": // Stop is good enough -> resubmit
|
||||
return "stop";
|
||||
case "stop_sequence":
|
||||
return "stop"; // We don't supply stop sequences, so this should never happen
|
||||
case "sensitive": // Content flagged by safety filters (not yet in SDK types)
|
||||
return "error";
|
||||
default:
|
||||
// Handle unknown stop reasons gracefully (API may add new values)
|
||||
throw new Error(`Unhandled stop reason: ${reason}`);
|
||||
// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level
|
||||
// For older models: use budget-based thinking
|
||||
if (supportsAdaptiveThinking(model.id)) {
|
||||
const effort = mapThinkingLevelToEffort(options.reasoning, model.id);
|
||||
return streamAnthropic(model, context, {
|
||||
...base,
|
||||
thinkingEnabled: true,
|
||||
effort,
|
||||
} satisfies AnthropicOptions);
|
||||
}
|
||||
}
|
||||
|
||||
const adjusted = adjustMaxTokensForThinking(
|
||||
base.maxTokens || 0,
|
||||
model.maxTokens,
|
||||
options.reasoning,
|
||||
options.thinkingBudgets,
|
||||
);
|
||||
|
||||
return streamAnthropic(model, context, {
|
||||
...base,
|
||||
maxTokens: adjusted.maxTokens,
|
||||
thinkingEnabled: true,
|
||||
thinkingBudgetTokens: adjusted.thinkingBudget,
|
||||
} satisfies AnthropicOptions);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import type { AssistantMessage, AssistantMessageEvent, Context, Model, SimpleStr
|
|||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import type { BedrockOptions } from "./amazon-bedrock.js";
|
||||
import { streamAnthropic, streamSimpleAnthropic } from "./anthropic.js";
|
||||
import { streamAnthropicVertex, streamSimpleAnthropicVertex } from "./anthropic-vertex.js";
|
||||
import { streamAzureOpenAIResponses, streamSimpleAzureOpenAIResponses } from "./azure-openai-responses.js";
|
||||
import { streamGoogle, streamSimpleGoogle } from "./google.js";
|
||||
import { streamGoogleGeminiCli, streamSimpleGoogleGeminiCli } from "./google-gemini-cli.js";
|
||||
|
|
@ -171,6 +172,12 @@ function registerBuiltInApiProviders(): void {
|
|||
streamSimple: streamSimpleGoogleVertex,
|
||||
});
|
||||
|
||||
registerApiProvider({
|
||||
api: "anthropic-vertex",
|
||||
stream: streamAnthropicVertex,
|
||||
streamSimple: streamSimpleAnthropicVertex,
|
||||
});
|
||||
|
||||
registerApiProvider({
|
||||
api: "bedrock-converse-stream",
|
||||
stream: streamBedrockLazy,
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ export type KnownApi =
|
|||
| "azure-openai-responses"
|
||||
| "openai-codex-responses"
|
||||
| "anthropic-messages"
|
||||
| "anthropic-vertex"
|
||||
| "bedrock-converse-stream"
|
||||
| "google-generative-ai"
|
||||
| "google-gemini-cli"
|
||||
|
|
@ -19,6 +20,7 @@ export type Api = KnownApi | (string & {});
|
|||
export type KnownProvider =
|
||||
| "amazon-bedrock"
|
||||
| "anthropic"
|
||||
| "anthropic-vertex"
|
||||
| "google"
|
||||
| "google-gemini-cli"
|
||||
| "google-antigravity"
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ import type { ModelRegistry } from "./model-registry.js";
|
|||
const defaultModelPerProvider: Record<KnownProvider, string> = {
|
||||
"amazon-bedrock": "us.anthropic.claude-opus-4-6-v1",
|
||||
anthropic: "claude-opus-4-6[1m]",
|
||||
"anthropic-vertex": "claude-sonnet-4-6",
|
||||
openai: "gpt-5.4",
|
||||
"azure-openai-responses": "gpt-5.2",
|
||||
"openai-codex": "gpt-5.4",
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ const TOOL_KEYS: ToolKeyConfig[] = [
|
|||
/** Known LLM provider IDs that, if authed, mean the user doesn't need onboarding */
|
||||
const LLM_PROVIDER_IDS = [
|
||||
'anthropic',
|
||||
'anthropic-vertex',
|
||||
'openai',
|
||||
'github-copilot',
|
||||
'openai-codex',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue