diff --git a/README.md b/README.md index d565068a3..79a77fc46 100644 --- a/README.md +++ b/README.md @@ -629,7 +629,7 @@ GSD isn't locked to one provider. It runs on the [Pi SDK](https://github.com/bad ### Built-in Providers -Anthropic, OpenAI, Google (Gemini), OpenRouter, GitHub Copilot, Amazon Bedrock, Azure OpenAI, Google Vertex, Groq, Cerebras, Mistral, xAI, HuggingFace, Vercel AI Gateway, and more. +Anthropic, Anthropic (Vertex AI), OpenAI, Google (Gemini), OpenRouter, GitHub Copilot, Amazon Bedrock, Azure OpenAI, Google Vertex, Groq, Cerebras, Mistral, xAI, HuggingFace, Vercel AI Gateway, and more. ### OAuth / Max Plans diff --git a/docs/what-is-pi/10-providers-models-multi-model-by-default.md b/docs/what-is-pi/10-providers-models-multi-model-by-default.md index 150443fb8..f218ff10d 100644 --- a/docs/what-is-pi/10-providers-models-multi-model-by-default.md +++ b/docs/what-is-pi/10-providers-models-multi-model-by-default.md @@ -12,7 +12,7 @@ Pi isn't locked to one provider. It supports 20+ providers out of the box and le - Google Antigravity **API keys (via environment variables):** -- Anthropic, OpenAI, Azure OpenAI, Google Gemini, Google Vertex, Amazon Bedrock +- Anthropic, Anthropic (Vertex AI), OpenAI, Azure OpenAI, Google Gemini, Google Vertex, Amazon Bedrock - Mistral, Groq, Cerebras, xAI, OpenRouter, Vercel AI Gateway - ZAI, OpenCode Zen, OpenCode Go, Hugging Face, Kimi, MiniMax diff --git a/package-lock.json b/package-lock.json index fa2e3d9a3..f23ad20f4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "gsd-pi", - "version": "2.31.2", + "version": "2.33.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "gsd-pi", - "version": "2.31.2", + "version": "2.33.1", "hasInstallScript": true, "license": "MIT", "workspaces": [ @@ -15,6 +15,7 @@ ], "dependencies": { "@anthropic-ai/sdk": "^0.73.0", + "@anthropic-ai/vertex-sdk": "^0.14.4", "@aws-sdk/client-bedrock-runtime": "^3.983.0", "@clack/prompts": "^1.1.0", "@google/genai": "^1.40.0", @@ -96,6 +97,56 @@ } } }, + "node_modules/@anthropic-ai/vertex-sdk": { + "version": "0.14.4", + "resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.14.4.tgz", + "integrity": "sha512-BZUPRWghZxfSFtAxU563wH+jfWBPoedAwsVxG35FhmNsjeV8tyfN+lFriWhCpcZApxA4NdT6Soov+PzfnxxD5g==", + "license": "MIT", + "dependencies": { + "@anthropic-ai/sdk": ">=0.50.3 <1", + "google-auth-library": "^9.4.2" + } + }, + "node_modules/@anthropic-ai/vertex-sdk/node_modules/gcp-metadata": { + "version": "6.1.1", + "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.1.tgz", + "integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==", + "license": "Apache-2.0", + "dependencies": { + "gaxios": "^6.1.1", + "google-logging-utils": "^0.0.2", + "json-bigint": "^1.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@anthropic-ai/vertex-sdk/node_modules/google-auth-library": { + "version": "9.15.1", + "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.1.tgz", + "integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==", + "license": "Apache-2.0", + "dependencies": { + "base64-js": "^1.3.0", + "ecdsa-sig-formatter": "^1.0.11", + "gaxios": "^6.1.1", + "gcp-metadata": "^6.1.0", + "gtoken": "^7.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@anthropic-ai/vertex-sdk/node_modules/google-logging-utils": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-0.0.2.tgz", + "integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==", + "license": "Apache-2.0", + "engines": { + "node": ">=14" + } + }, "node_modules/@aws-crypto/crc32": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/@aws-crypto/crc32/-/crc32-5.2.0.tgz", @@ -5865,6 +5916,19 @@ "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", "license": "ISC" }, + "node_modules/gtoken": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz", + "integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==", + "license": "MIT", + "dependencies": { + "gaxios": "^6.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, "node_modules/has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -9066,6 +9130,7 @@ "version": "0.57.1", "dependencies": { "@anthropic-ai/sdk": "^0.73.0", + "@anthropic-ai/vertex-sdk": "^0.14.4", "@aws-sdk/client-bedrock-runtime": "^3.983.0", "@google/genai": "^1.40.0", "@mistralai/mistralai": "^1.14.1", @@ -9101,7 +9166,7 @@ }, "packages/pi-coding-agent": { "name": "@gsd/pi-coding-agent", - "version": "2.31.2", + "version": "2.33.1", "dependencies": { "@mariozechner/jiti": "^2.6.2", "@silvia-odwyer/photon-node": "^0.3.4", diff --git a/package.json b/package.json index 7e4c41780..1e1edf44d 100644 --- a/package.json +++ b/package.json @@ -84,6 +84,7 @@ }, "dependencies": { "@anthropic-ai/sdk": "^0.73.0", + "@anthropic-ai/vertex-sdk": "^0.14.4", "@aws-sdk/client-bedrock-runtime": "^3.983.0", "@clack/prompts": "^1.1.0", "@google/genai": "^1.40.0", diff --git a/packages/pi-ai/package.json b/packages/pi-ai/package.json index c7f1da046..45cc4b76f 100644 --- a/packages/pi-ai/package.json +++ b/packages/pi-ai/package.json @@ -24,6 +24,7 @@ }, "dependencies": { "@anthropic-ai/sdk": "^0.73.0", + "@anthropic-ai/vertex-sdk": "^0.14.4", "@aws-sdk/client-bedrock-runtime": "^3.983.0", "@google/genai": "^1.40.0", "@mistralai/mistralai": "^1.14.1", diff --git a/packages/pi-ai/src/env-api-keys.ts b/packages/pi-ai/src/env-api-keys.ts index eaee25545..b6577d99d 100644 --- a/packages/pi-ai/src/env-api-keys.ts +++ b/packages/pi-ai/src/env-api-keys.ts @@ -73,6 +73,20 @@ export function getEnvApiKey(provider: any): string | undefined { return process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY; } + // Anthropic on Vertex AI uses Application Default Credentials. + // Detected via ANTHROPIC_VERTEX_PROJECT_ID (same env var as Claude Code). + if (provider === "anthropic-vertex") { + const hasProject = !!process.env.ANTHROPIC_VERTEX_PROJECT_ID; + if (hasProject) { + return ""; + } + // Fall back to Google Cloud project env vars + const hasGoogleProject = !!(process.env.GOOGLE_CLOUD_PROJECT || process.env.GCLOUD_PROJECT); + if (hasGoogleProject && hasVertexAdcCredentials()) { + return ""; + } + } + // Vertex AI uses Application Default Credentials, not API keys. // Auth is configured via `gcloud auth application-default login`. if (provider === "google-vertex") { diff --git a/packages/pi-ai/src/models.generated.ts b/packages/pi-ai/src/models.generated.ts index fe3112ede..ac56d2069 100644 --- a/packages/pi-ai/src/models.generated.ts +++ b/packages/pi-ai/src/models.generated.ts @@ -1827,6 +1827,178 @@ export const MODELS = { maxTokens: 64000, } satisfies Model<"anthropic-messages">, }, + "anthropic-vertex": { + "claude-opus-4-6": { + id: "claude-opus-4-6", + name: "Claude Opus 4.6 (Vertex)", + api: "anthropic-vertex", + provider: "anthropic-vertex", + baseUrl: "https://us-central1-aiplatform.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 75, + cacheRead: 1.5, + cacheWrite: 18.75, + }, + contextWindow: 200000, + maxTokens: 128000, + } satisfies Model<"anthropic-vertex">, + "claude-opus-4-6[1m]": { + id: "claude-opus-4-6[1m]", + name: "Claude Opus 4.6 1M (Vertex)", + api: "anthropic-vertex", + provider: "anthropic-vertex", + baseUrl: "https://us-central1-aiplatform.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 75, + cacheRead: 1.5, + cacheWrite: 18.75, + }, + contextWindow: 1000000, + maxTokens: 128000, + } satisfies Model<"anthropic-vertex">, + "claude-sonnet-4-6": { + id: "claude-sonnet-4-6", + name: "Claude Sonnet 4.6 (Vertex)", + api: "anthropic-vertex", + provider: "anthropic-vertex", + baseUrl: "https://us-central1-aiplatform.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, + }, + contextWindow: 200000, + maxTokens: 64000, + } satisfies Model<"anthropic-vertex">, + "claude-sonnet-4-6[1m]": { + id: "claude-sonnet-4-6[1m]", + name: "Claude Sonnet 4.6 1M (Vertex)", + api: "anthropic-vertex", + provider: "anthropic-vertex", + baseUrl: "https://us-central1-aiplatform.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, + }, + contextWindow: 1000000, + maxTokens: 64000, + } satisfies Model<"anthropic-vertex">, + "claude-sonnet-4-5@20250929": { + id: "claude-sonnet-4-5@20250929", + name: "Claude Sonnet 4.5 (Vertex)", + api: "anthropic-vertex", + provider: "anthropic-vertex", + baseUrl: "https://us-central1-aiplatform.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, + }, + contextWindow: 200000, + maxTokens: 64000, + } satisfies Model<"anthropic-vertex">, + "claude-sonnet-4@20250514": { + id: "claude-sonnet-4@20250514", + name: "Claude Sonnet 4 (Vertex)", + api: "anthropic-vertex", + provider: "anthropic-vertex", + baseUrl: "https://us-central1-aiplatform.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, + }, + contextWindow: 200000, + maxTokens: 64000, + } satisfies Model<"anthropic-vertex">, + "claude-opus-4-5@20251101": { + id: "claude-opus-4-5@20251101", + name: "Claude Opus 4.5 (Vertex)", + api: "anthropic-vertex", + provider: "anthropic-vertex", + baseUrl: "https://us-central1-aiplatform.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 75, + cacheRead: 1.5, + cacheWrite: 18.75, + }, + contextWindow: 200000, + maxTokens: 32000, + } satisfies Model<"anthropic-vertex">, + "claude-opus-4-1@20250805": { + id: "claude-opus-4-1@20250805", + name: "Claude Opus 4.1 (Vertex)", + api: "anthropic-vertex", + provider: "anthropic-vertex", + baseUrl: "https://us-central1-aiplatform.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 75, + cacheRead: 1.5, + cacheWrite: 18.75, + }, + contextWindow: 200000, + maxTokens: 32000, + } satisfies Model<"anthropic-vertex">, + "claude-opus-4@20250514": { + id: "claude-opus-4@20250514", + name: "Claude Opus 4 (Vertex)", + api: "anthropic-vertex", + provider: "anthropic-vertex", + baseUrl: "https://us-central1-aiplatform.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 75, + cacheRead: 1.5, + cacheWrite: 18.75, + }, + contextWindow: 200000, + maxTokens: 32000, + } satisfies Model<"anthropic-vertex">, + "claude-haiku-4-5@20251001": { + id: "claude-haiku-4-5@20251001", + name: "Claude Haiku 4.5 (Vertex)", + api: "anthropic-vertex", + provider: "anthropic-vertex", + baseUrl: "https://us-central1-aiplatform.googleapis.com", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.8, + output: 4, + cacheRead: 0.08, + cacheWrite: 1, + }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"anthropic-vertex">, + }, "azure-openai-responses": { "codex-mini-latest": { id: "codex-mini-latest", diff --git a/packages/pi-ai/src/providers/anthropic-shared.ts b/packages/pi-ai/src/providers/anthropic-shared.ts new file mode 100644 index 000000000..4425df7dd --- /dev/null +++ b/packages/pi-ai/src/providers/anthropic-shared.ts @@ -0,0 +1,761 @@ +/** + * Shared utilities for Anthropic providers (direct API and Vertex AI). + */ +import type Anthropic from "@anthropic-ai/sdk"; +import type { + ContentBlockParam, + MessageCreateParamsStreaming, + MessageParam, +} from "@anthropic-ai/sdk/resources/messages.js"; +import { calculateCost } from "../models.js"; +import type { + Api, + AssistantMessage, + CacheRetention, + Context, + ImageContent, + Message, + Model, + ServerToolUseContent, + StopReason, + StreamOptions, + TextContent, + ThinkingContent, + Tool, + ToolCall, + ToolResultMessage, + WebSearchResultContent, +} from "../types.js"; + +/** API types that use the Anthropic Messages protocol */ +export type AnthropicApi = "anthropic-messages" | "anthropic-vertex"; +import type { AssistantMessageEventStream } from "../utils/event-stream.js"; +import { parseStreamingJson } from "../utils/json-parse.js"; +import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; +import { transformMessages } from "./transform-messages.js"; + +export type AnthropicEffort = "low" | "medium" | "high" | "max"; + +export interface AnthropicOptions extends StreamOptions { + thinkingEnabled?: boolean; + thinkingBudgetTokens?: number; + effort?: AnthropicEffort; + interleavedThinking?: boolean; + toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string }; +} + +const claudeCodeTools = [ + "Read", + "Write", + "Edit", + "Bash", + "Grep", + "Glob", + "AskUserQuestion", + "EnterPlanMode", + "ExitPlanMode", + "KillShell", + "NotebookEdit", + "Skill", + "Task", + "TaskOutput", + "TodoWrite", + "WebFetch", + "WebSearch", +]; + +const ccToolLookup = new Map(claudeCodeTools.map((t) => [t.toLowerCase(), t])); + +export const toClaudeCodeName = (name: string) => ccToolLookup.get(name.toLowerCase()) ?? name; +export const fromClaudeCodeName = (name: string, tools?: Tool[]) => { + if (tools && tools.length > 0) { + const lowerName = name.toLowerCase(); + const matchedTool = tools.find((tool) => tool.name.toLowerCase() === lowerName); + if (matchedTool) return matchedTool.name; + } + return name; +}; + +function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention { + if (cacheRetention) { + return cacheRetention; + } + if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") { + return "long"; + } + return "short"; +} + +export function getCacheControl( + baseUrl: string, + cacheRetention?: CacheRetention, +): { retention: CacheRetention; cacheControl?: { type: "ephemeral"; ttl?: "1h" } } { + const retention = resolveCacheRetention(cacheRetention); + if (retention === "none") { + return { retention }; + } + const ttl = retention === "long" && baseUrl.includes("api.anthropic.com") ? "1h" : undefined; + return { + retention, + cacheControl: { type: "ephemeral", ...(ttl && { ttl }) }, + }; +} + +export function convertContentBlocks(content: (TextContent | ImageContent)[]): + | string + | Array< + | { type: "text"; text: string } + | { + type: "image"; + source: { + type: "base64"; + media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"; + data: string; + }; + } + > { + const hasImages = content.some((c) => c.type === "image"); + if (!hasImages) { + return sanitizeSurrogates(content.map((c) => (c as TextContent).text).join("\n")); + } + + const blocks = content.map((block) => { + if (block.type === "text") { + return { + type: "text" as const, + text: sanitizeSurrogates(block.text), + }; + } + return { + type: "image" as const, + source: { + type: "base64" as const, + media_type: block.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp", + data: block.data, + }, + }; + }); + + const hasText = blocks.some((b) => b.type === "text"); + if (!hasText) { + blocks.unshift({ + type: "text" as const, + text: "(see attached image)", + }); + } + + return blocks; +} + +export function supportsAdaptiveThinking(modelId: string): boolean { + return ( + modelId.includes("opus-4-6") || + modelId.includes("opus-4.6") || + modelId.includes("sonnet-4-6") || + modelId.includes("sonnet-4.6") + ); +} + +export function mapThinkingLevelToEffort(level: string | undefined, modelId: string): AnthropicEffort { + switch (level) { + case "minimal": + return "low"; + case "low": + return "low"; + case "medium": + return "medium"; + case "high": + return "high"; + case "xhigh": + return modelId.includes("opus-4-6") || modelId.includes("opus-4.6") ? "max" : "high"; + default: + return "high"; + } +} + +export function isTransientNetworkError(error: unknown): boolean { + if (!(error instanceof Error)) return false; + const msg = error.message.toLowerCase(); + const code = (error as NodeJS.ErrnoException).code; + return ( + code === 'ECONNRESET' || + code === 'EPIPE' || + code === 'ETIMEDOUT' || + code === 'ENOTFOUND' || + code === 'EAI_AGAIN' || + msg.includes('connector_closed') || + msg.includes('socket hang up') || + msg.includes('network') || + msg.includes('connection') && msg.includes('closed') || + msg.includes('fetch failed') + ); +} + +export function extractRetryAfterMs(headers: Headers | { get(name: string): string | null }, errorText = ""): number | undefined { + const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined); + + const retryAfter = headers.get("retry-after"); + if (retryAfter) { + const seconds = Number(retryAfter); + if (Number.isFinite(seconds)) { + const delay = normalizeDelay(seconds * 1000); + if (delay !== undefined) return delay; + } + const asDate = new Date(retryAfter).getTime(); + if (!Number.isNaN(asDate)) { + const delay = normalizeDelay(asDate - Date.now()); + if (delay !== undefined) return delay; + } + } + + for (const header of ["x-ratelimit-reset-requests", "x-ratelimit-reset-tokens"]) { + const value = headers.get(header); + if (value) { + const resetSeconds = Number(value); + if (Number.isFinite(resetSeconds)) { + const delay = normalizeDelay(resetSeconds * 1000 - Date.now()); + if (delay !== undefined) return delay; + } + } + } + + return undefined; +} + +export function normalizeToolCallId(id: string): string { + return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64); +} + +export function convertMessages( + messages: Message[], + model: Model, + isOAuthToken: boolean, + cacheControl?: { type: "ephemeral"; ttl?: "1h" }, +): MessageParam[] { + const params: MessageParam[] = []; + + const transformedMessages = transformMessages(messages, model, normalizeToolCallId); + + for (let i = 0; i < transformedMessages.length; i++) { + const msg = transformedMessages[i]; + + if (msg.role === "user") { + if (typeof msg.content === "string") { + if (msg.content.trim().length > 0) { + params.push({ + role: "user", + content: sanitizeSurrogates(msg.content), + }); + } + } else { + const blocks: ContentBlockParam[] = msg.content.map((item) => { + if (item.type === "text") { + return { + type: "text", + text: sanitizeSurrogates(item.text), + }; + } else { + return { + type: "image", + source: { + type: "base64", + media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp", + data: item.data, + }, + }; + } + }); + let filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks; + filteredBlocks = filteredBlocks.filter((b) => { + if (b.type === "text") { + return b.text.trim().length > 0; + } + return true; + }); + if (filteredBlocks.length === 0) continue; + params.push({ + role: "user", + content: filteredBlocks, + }); + } + } else if (msg.role === "assistant") { + const blocks: ContentBlockParam[] = []; + + for (const block of msg.content) { + if (block.type === "text") { + if (block.text.trim().length === 0) continue; + blocks.push({ + type: "text", + text: sanitizeSurrogates(block.text), + }); + } else if (block.type === "thinking") { + if (block.redacted) { + blocks.push({ + type: "redacted_thinking", + data: block.thinkingSignature!, + }); + continue; + } + if (block.thinking.trim().length === 0) continue; + if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) { + blocks.push({ + type: "text", + text: sanitizeSurrogates(block.thinking), + }); + } else { + blocks.push({ + type: "thinking", + thinking: sanitizeSurrogates(block.thinking), + signature: block.thinkingSignature, + }); + } + } else if (block.type === "toolCall") { + blocks.push({ + type: "tool_use", + id: block.id, + name: isOAuthToken ? toClaudeCodeName(block.name) : block.name, + input: block.arguments ?? {}, + }); + } else if (block.type === "serverToolUse") { + blocks.push({ + type: "server_tool_use", + id: block.id, + name: block.name, + input: block.input ?? {}, + } as any); + } else if (block.type === "webSearchResult") { + blocks.push({ + type: "web_search_tool_result", + tool_use_id: block.toolUseId, + content: block.content, + } as any); + } + } + if (blocks.length === 0) continue; + params.push({ + role: "assistant", + content: blocks, + }); + } else if (msg.role === "toolResult") { + const toolResults: ContentBlockParam[] = []; + + toolResults.push({ + type: "tool_result", + tool_use_id: msg.toolCallId, + content: convertContentBlocks(msg.content), + is_error: msg.isError, + }); + + let j = i + 1; + while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") { + const nextMsg = transformedMessages[j] as ToolResultMessage; + toolResults.push({ + type: "tool_result", + tool_use_id: nextMsg.toolCallId, + content: convertContentBlocks(nextMsg.content), + is_error: nextMsg.isError, + }); + j++; + } + + i = j - 1; + + params.push({ + role: "user", + content: toolResults, + }); + } + } + + if (cacheControl && params.length > 0) { + const lastMessage = params[params.length - 1]; + if (lastMessage.role === "user") { + if (Array.isArray(lastMessage.content)) { + const lastBlock = lastMessage.content[lastMessage.content.length - 1]; + if ( + lastBlock && + (lastBlock.type === "text" || lastBlock.type === "image" || lastBlock.type === "tool_result") + ) { + (lastBlock as any).cache_control = cacheControl; + } + } else if (typeof lastMessage.content === "string") { + lastMessage.content = [ + { + type: "text", + text: lastMessage.content, + cache_control: cacheControl, + }, + ] as any; + } + } + } + + return params; +} + +export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] { + if (!tools) return []; + + return tools.map((tool) => { + const jsonSchema = tool.parameters as any; + + return { + name: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name, + description: tool.description, + input_schema: { + type: "object" as const, + properties: jsonSchema.properties || {}, + required: jsonSchema.required || [], + }, + }; + }); +} + +export function buildParams( + model: Model, + context: Context, + isOAuthToken: boolean, + options?: AnthropicOptions, +): MessageCreateParamsStreaming { + const { cacheControl } = getCacheControl(model.baseUrl, options?.cacheRetention); + const apiModelId = model.id.replace(/\[.*\]$/, ""); + const params: MessageCreateParamsStreaming = { + model: apiModelId, + messages: convertMessages(context.messages, model, isOAuthToken, cacheControl), + max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0, + stream: true, + }; + + if (isOAuthToken) { + params.system = [ + { + type: "text", + text: "You are Claude Code, Anthropic's official CLI for Claude.", + ...(cacheControl ? { cache_control: cacheControl } : {}), + }, + ]; + if (context.systemPrompt) { + params.system.push({ + type: "text", + text: sanitizeSurrogates(context.systemPrompt), + ...(cacheControl ? { cache_control: cacheControl } : {}), + }); + } + } else if (context.systemPrompt) { + params.system = [ + { + type: "text", + text: sanitizeSurrogates(context.systemPrompt), + ...(cacheControl ? { cache_control: cacheControl } : {}), + }, + ]; + } + + if (options?.temperature !== undefined && !options?.thinkingEnabled) { + params.temperature = options.temperature; + } + + if (context.tools) { + params.tools = convertTools(context.tools, isOAuthToken); + } + + if (options?.thinkingEnabled && model.reasoning) { + if (supportsAdaptiveThinking(model.id)) { + params.thinking = { type: "adaptive" }; + if (options.effort) { + params.output_config = { effort: options.effort }; + } + } else { + params.thinking = { + type: "enabled", + budget_tokens: options.thinkingBudgetTokens || 1024, + }; + } + } + + if (options?.metadata) { + const userId = options.metadata.user_id; + if (typeof userId === "string") { + params.metadata = { user_id: userId }; + } + } + + if (options?.toolChoice) { + if (typeof options.toolChoice === "string") { + params.tool_choice = { type: options.toolChoice }; + } else { + params.tool_choice = options.toolChoice; + } + } + + return params; +} + +export function mapStopReason(reason: string): StopReason { + switch (reason) { + case "end_turn": + return "stop"; + case "max_tokens": + return "length"; + case "tool_use": + return "toolUse"; + case "refusal": + return "error"; + case "pause_turn": + return "stop"; + case "stop_sequence": + return "stop"; + case "sensitive": + return "error"; + default: + throw new Error(`Unhandled stop reason: ${reason}`); + } +} + +export interface StreamAnthropicArgs { + client: Anthropic; + model: Model; + context: Context; + isOAuthToken: boolean; + options?: AnthropicOptions; + AnthropicSdkClass?: typeof Anthropic; +} + +export function processAnthropicStream( + stream: AssistantMessageEventStream, + args: StreamAnthropicArgs, +): void { + const { client, model, context, isOAuthToken, options, AnthropicSdkClass } = args; + + (async () => { + const output: AssistantMessage = { + role: "assistant", + content: [], + api: model.api as Api, + provider: model.provider, + model: model.id, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }; + + try { + let params = buildParams(model, context, isOAuthToken, options); + const nextParams = await options?.onPayload?.(params, model); + if (nextParams !== undefined) { + params = nextParams as MessageCreateParamsStreaming; + } + const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal }); + stream.push({ type: "start", partial: output }); + + type Block = (ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | ServerToolUseContent | WebSearchResultContent) & { index: number }; + const blocks = output.content as Block[]; + + for await (const event of anthropicStream) { + if (event.type === "message_start") { + output.usage.input = event.message.usage.input_tokens || 0; + output.usage.output = event.message.usage.output_tokens || 0; + output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0; + output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0; + output.usage.totalTokens = + output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite; + calculateCost(model, output.usage); + } else if (event.type === "content_block_start") { + if (event.content_block.type === "text") { + const block: Block = { + type: "text", + text: "", + index: event.index, + }; + output.content.push(block); + stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output }); + } else if (event.content_block.type === "thinking") { + const block: Block = { + type: "thinking", + thinking: "", + thinkingSignature: "", + index: event.index, + }; + output.content.push(block); + stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output }); + } else if (event.content_block.type === "redacted_thinking") { + const block: Block = { + type: "thinking", + thinking: "[Reasoning redacted]", + thinkingSignature: event.content_block.data, + redacted: true, + index: event.index, + }; + output.content.push(block); + stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output }); + } else if (event.content_block.type === "tool_use") { + const block: Block = { + type: "toolCall", + id: event.content_block.id, + name: isOAuthToken + ? fromClaudeCodeName(event.content_block.name, context.tools) + : event.content_block.name, + arguments: (event.content_block.input as Record) ?? {}, + partialJson: "", + index: event.index, + }; + output.content.push(block); + stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output }); + } else if ((event.content_block as any).type === "server_tool_use") { + const serverBlock = event.content_block as any; + const block: Block = { + type: "serverToolUse", + id: serverBlock.id, + name: serverBlock.name, + input: serverBlock.input, + index: event.index, + }; + output.content.push(block); + stream.push({ type: "server_tool_use", contentIndex: output.content.length - 1, partial: output }); + } else if ((event.content_block as any).type === "web_search_tool_result") { + const resultBlock = event.content_block as any; + const block: Block = { + type: "webSearchResult", + toolUseId: resultBlock.tool_use_id, + content: resultBlock.content, + index: event.index, + }; + output.content.push(block); + stream.push({ type: "web_search_result", contentIndex: output.content.length - 1, partial: output }); + } + } else if (event.type === "content_block_delta") { + if (event.delta.type === "text_delta") { + const index = blocks.findIndex((b) => b.index === event.index); + const block = blocks[index]; + if (block && block.type === "text") { + block.text += event.delta.text; + stream.push({ + type: "text_delta", + contentIndex: index, + delta: event.delta.text, + partial: output, + }); + } + } else if (event.delta.type === "thinking_delta") { + const index = blocks.findIndex((b) => b.index === event.index); + const block = blocks[index]; + if (block && block.type === "thinking") { + block.thinking += event.delta.thinking; + stream.push({ + type: "thinking_delta", + contentIndex: index, + delta: event.delta.thinking, + partial: output, + }); + } + } else if (event.delta.type === "input_json_delta") { + const index = blocks.findIndex((b) => b.index === event.index); + const block = blocks[index]; + if (block && block.type === "toolCall") { + block.partialJson += event.delta.partial_json; + block.arguments = parseStreamingJson(block.partialJson); + stream.push({ + type: "toolcall_delta", + contentIndex: index, + delta: event.delta.partial_json, + partial: output, + }); + } + } else if (event.delta.type === "signature_delta") { + const index = blocks.findIndex((b) => b.index === event.index); + const block = blocks[index]; + if (block && block.type === "thinking") { + block.thinkingSignature = block.thinkingSignature || ""; + block.thinkingSignature += event.delta.signature; + } + } + } else if (event.type === "content_block_stop") { + const index = blocks.findIndex((b) => b.index === event.index); + const block = blocks[index]; + if (block) { + delete (block as any).index; + if (block.type === "text") { + stream.push({ + type: "text_end", + contentIndex: index, + content: block.text, + partial: output, + }); + } else if (block.type === "thinking") { + stream.push({ + type: "thinking_end", + contentIndex: index, + content: block.thinking, + partial: output, + }); + } else if (block.type === "toolCall") { + block.arguments = parseStreamingJson(block.partialJson); + delete (block as any).partialJson; + stream.push({ + type: "toolcall_end", + contentIndex: index, + toolCall: block, + partial: output, + }); + } + } + } else if (event.type === "message_delta") { + if (event.delta.stop_reason) { + output.stopReason = mapStopReason(event.delta.stop_reason); + } + if (event.usage.input_tokens != null) { + output.usage.input = event.usage.input_tokens; + } + if (event.usage.output_tokens != null) { + output.usage.output = event.usage.output_tokens; + } + if (event.usage.cache_read_input_tokens != null) { + output.usage.cacheRead = event.usage.cache_read_input_tokens; + } + if (event.usage.cache_creation_input_tokens != null) { + output.usage.cacheWrite = event.usage.cache_creation_input_tokens; + } + output.usage.totalTokens = + output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite; + calculateCost(model, output.usage); + } + } + + if (options?.signal?.aborted) { + throw new Error("Request was aborted"); + } + + if (output.stopReason === "aborted" || output.stopReason === "error") { + throw new Error("An unknown error occurred"); + } + + stream.push({ type: "done", reason: output.stopReason, message: output }); + stream.end(); + } catch (error) { + for (const block of output.content) delete (block as any).index; + output.stopReason = options?.signal?.aborted ? "aborted" : "error"; + output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error); + if (model.provider === "alibaba-coding-plan") { + output.errorMessage = `[alibaba-coding-plan] ${output.errorMessage}`; + } + if (AnthropicSdkClass && error instanceof AnthropicSdkClass.APIError && error.headers) { + const retryAfterMs = extractRetryAfterMs(error.headers, error.message); + if (retryAfterMs !== undefined) { + output.retryAfterMs = retryAfterMs; + } + } + if (isTransientNetworkError(error)) { + output.retryAfterMs = output.retryAfterMs ?? 5000; + } + stream.push({ type: "error", reason: output.stopReason, error: output }); + stream.end(); + } + })(); +} diff --git a/packages/pi-ai/src/providers/anthropic-vertex.ts b/packages/pi-ai/src/providers/anthropic-vertex.ts new file mode 100644 index 000000000..e32d916e7 --- /dev/null +++ b/packages/pi-ai/src/providers/anthropic-vertex.ts @@ -0,0 +1,130 @@ +// Lazy-loaded: Anthropic Vertex SDK is imported on first use, not at startup. +// This avoids penalizing users who don't use Anthropic Vertex models. +import type Anthropic from "@anthropic-ai/sdk"; +import type { AnthropicVertex } from "@anthropic-ai/vertex-sdk"; +import { getEnvApiKey } from "../env-api-keys.js"; +import type { + Context, + Model, + SimpleStreamOptions, + StreamFunction, +} from "../types.js"; +import { AssistantMessageEventStream } from "../utils/event-stream.js"; +import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js"; +import { + type AnthropicOptions, + mapThinkingLevelToEffort, + processAnthropicStream, + supportsAdaptiveThinking, +} from "./anthropic-shared.js"; + +let _AnthropicVertexClass: typeof AnthropicVertex | undefined; +let _AnthropicSdkClass: typeof Anthropic | undefined; + +async function getAnthropicVertexClass(): Promise { + if (!_AnthropicVertexClass) { + const mod = await import("@anthropic-ai/vertex-sdk"); + _AnthropicVertexClass = mod.AnthropicVertex; + } + return _AnthropicVertexClass; +} + +async function getAnthropicSdkClass(): Promise { + if (!_AnthropicSdkClass) { + const mod = await import("@anthropic-ai/sdk"); + _AnthropicSdkClass = mod.default; + } + return _AnthropicSdkClass; +} + +function resolveProjectId(): string { + const projectId = process.env.ANTHROPIC_VERTEX_PROJECT_ID + || process.env.GOOGLE_CLOUD_PROJECT + || process.env.GCLOUD_PROJECT; + if (!projectId) { + throw new Error( + "Anthropic Vertex requires a project ID. Set ANTHROPIC_VERTEX_PROJECT_ID, GOOGLE_CLOUD_PROJECT, or GCLOUD_PROJECT.", + ); + } + return projectId; +} + +function resolveRegion(): string { + return process.env.CLOUD_ML_REGION + || process.env.GOOGLE_CLOUD_LOCATION + || "us-central1"; +} + +async function createVertexClient(): Promise { + const AnthropicVertexClass = await getAnthropicVertexClass(); + const projectId = resolveProjectId(); + const region = resolveRegion(); + + return new AnthropicVertexClass({ + projectId, + region, + }); +} + +export const streamAnthropicVertex: StreamFunction<"anthropic-vertex", AnthropicOptions> = ( + model: Model<"anthropic-vertex">, + context: Context, + options?: AnthropicOptions, +): AssistantMessageEventStream => { + const stream = new AssistantMessageEventStream(); + + (async () => { + const client = await createVertexClient(); + const AnthropicSdk = await getAnthropicSdkClass(); + + processAnthropicStream(stream, { + client: client as unknown as Anthropic, + model, + context, + isOAuthToken: false, + options, + AnthropicSdkClass: AnthropicSdk, + }); + })(); + + return stream; +}; + +export const streamSimpleAnthropicVertex: StreamFunction<"anthropic-vertex", SimpleStreamOptions> = ( + model: Model<"anthropic-vertex">, + context: Context, + options?: SimpleStreamOptions, +): AssistantMessageEventStream => { + const apiKey = options?.apiKey || getEnvApiKey(model.provider); + if (!apiKey) { + throw new Error(`No API key found for provider: ${model.provider}. Set ANTHROPIC_VERTEX_PROJECT_ID to use Claude on Vertex AI.`); + } + + const base = buildBaseOptions(model, options, apiKey); + if (!options?.reasoning) { + return streamAnthropicVertex(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions); + } + + if (supportsAdaptiveThinking(model.id)) { + const effort = mapThinkingLevelToEffort(options.reasoning, model.id); + return streamAnthropicVertex(model, context, { + ...base, + thinkingEnabled: true, + effort, + } satisfies AnthropicOptions); + } + + const adjusted = adjustMaxTokensForThinking( + base.maxTokens || 0, + model.maxTokens, + options.reasoning, + options.thinkingBudgets, + ); + + return streamAnthropicVertex(model, context, { + ...base, + maxTokens: adjusted.maxTokens, + thinkingEnabled: true, + thinkingBudgetTokens: adjusted.thinkingBudget, + } satisfies AnthropicOptions); +}; diff --git a/packages/pi-ai/src/providers/anthropic.ts b/packages/pi-ai/src/providers/anthropic.ts index 1041c96c3..21c0da707 100644 --- a/packages/pi-ai/src/providers/anthropic.ts +++ b/packages/pi-ai/src/providers/anthropic.ts @@ -1,40 +1,29 @@ // Lazy-loaded: Anthropic SDK (~500ms) is imported on first use, not at startup. // This avoids penalizing users who don't use Anthropic models. import type Anthropic from "@anthropic-ai/sdk"; -import type { - ContentBlockParam, - MessageCreateParamsStreaming, - MessageParam, -} from "@anthropic-ai/sdk/resources/messages.js"; import { getEnvApiKey } from "../env-api-keys.js"; -import { calculateCost } from "../models.js"; import type { - Api, - AssistantMessage, - CacheRetention, Context, - ImageContent, - Message, Model, - ServerToolUseContent, SimpleStreamOptions, - StopReason, StreamFunction, - StreamOptions, - TextContent, - ThinkingContent, - Tool, - ToolCall, - ToolResultMessage, - WebSearchResultContent, } from "../types.js"; import { AssistantMessageEventStream } from "../utils/event-stream.js"; -import { parseStreamingJson } from "../utils/json-parse.js"; -import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js"; import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js"; -import { transformMessages } from "./transform-messages.js"; +import { + type AnthropicEffort, + type AnthropicOptions, + extractRetryAfterMs, + mapThinkingLevelToEffort, + processAnthropicStream, + supportsAdaptiveThinking, +} from "./anthropic-shared.js"; + +// Re-export types used by other modules +export type { AnthropicEffort, AnthropicOptions }; +export { extractRetryAfterMs }; let _AnthropicClass: typeof Anthropic | undefined; async function getAnthropicClass(): Promise { @@ -45,154 +34,9 @@ async function getAnthropicClass(): Promise { return _AnthropicClass; } -/** - * Resolve cache retention preference. - * Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility. - */ -function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention { - if (cacheRetention) { - return cacheRetention; - } - if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") { - return "long"; - } - return "short"; -} - -function getCacheControl( - baseUrl: string, - cacheRetention?: CacheRetention, -): { retention: CacheRetention; cacheControl?: { type: "ephemeral"; ttl?: "1h" } } { - const retention = resolveCacheRetention(cacheRetention); - if (retention === "none") { - return { retention }; - } - const ttl = retention === "long" && baseUrl.includes("api.anthropic.com") ? "1h" : undefined; - return { - retention, - cacheControl: { type: "ephemeral", ...(ttl && { ttl }) }, - }; -} - // Stealth mode: Mimic Claude Code's tool naming exactly const claudeCodeVersion = "2.1.62"; -// Claude Code 2.x tool names (canonical casing) -// Source: https://cchistory.mariozechner.at/data/prompts-2.1.11.md -// To update: https://github.com/badlogic/cchistory -const claudeCodeTools = [ - "Read", - "Write", - "Edit", - "Bash", - "Grep", - "Glob", - "AskUserQuestion", - "EnterPlanMode", - "ExitPlanMode", - "KillShell", - "NotebookEdit", - "Skill", - "Task", - "TaskOutput", - "TodoWrite", - "WebFetch", - "WebSearch", -]; - -const ccToolLookup = new Map(claudeCodeTools.map((t) => [t.toLowerCase(), t])); - -// Convert tool name to CC canonical casing if it matches (case-insensitive) -const toClaudeCodeName = (name: string) => ccToolLookup.get(name.toLowerCase()) ?? name; -const fromClaudeCodeName = (name: string, tools?: Tool[]) => { - if (tools && tools.length > 0) { - const lowerName = name.toLowerCase(); - const matchedTool = tools.find((tool) => tool.name.toLowerCase() === lowerName); - if (matchedTool) return matchedTool.name; - } - return name; -}; - -/** - * Convert content blocks to Anthropic API format - */ -function convertContentBlocks(content: (TextContent | ImageContent)[]): - | string - | Array< - | { type: "text"; text: string } - | { - type: "image"; - source: { - type: "base64"; - media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"; - data: string; - }; - } - > { - // If only text blocks, return as concatenated string for simplicity - const hasImages = content.some((c) => c.type === "image"); - if (!hasImages) { - return sanitizeSurrogates(content.map((c) => (c as TextContent).text).join("\n")); - } - - // If we have images, convert to content block array - const blocks = content.map((block) => { - if (block.type === "text") { - return { - type: "text" as const, - text: sanitizeSurrogates(block.text), - }; - } - return { - type: "image" as const, - source: { - type: "base64" as const, - media_type: block.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp", - data: block.data, - }, - }; - }); - - // If only images (no text), add placeholder text block - const hasText = blocks.some((b) => b.type === "text"); - if (!hasText) { - blocks.unshift({ - type: "text" as const, - text: "(see attached image)", - }); - } - - return blocks; -} - -export type AnthropicEffort = "low" | "medium" | "high" | "max"; - -export interface AnthropicOptions extends StreamOptions { - /** - * Enable extended thinking. - * For Opus 4.6 and Sonnet 4.6: uses adaptive thinking (model decides when/how much to think). - * For older models: uses budget-based thinking with thinkingBudgetTokens. - */ - thinkingEnabled?: boolean; - /** - * Token budget for extended thinking (older models only). - * Ignored for Opus 4.6 and Sonnet 4.6, which use adaptive thinking. - */ - thinkingBudgetTokens?: number; - /** - * Effort level for adaptive thinking (Opus 4.6 and Sonnet 4.6). - * Controls how much thinking Claude allocates: - * - "max": Always thinks with no constraints (Opus 4.6 only) - * - "high": Always thinks, deep reasoning (default) - * - "medium": Moderate thinking, may skip for simple queries - * - "low": Minimal thinking, skips for simple tasks - * Ignored for older models. - */ - effort?: AnthropicEffort; - interleavedThinking?: boolean; - toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string }; -} - function mergeHeaders(...headerSources: (Record | undefined)[]): Record { const merged: Record = {}; for (const headers of headerSources) { @@ -203,410 +47,6 @@ function mergeHeaders(...headerSources: (Record | undefined)[]): return merged; } -/** - * Detect transient network errors that are likely to succeed on retry. - * Covers WebSocket disconnects (Tailscale, VPN), TCP resets, and DNS failures. - */ -function isTransientNetworkError(error: unknown): boolean { - if (!(error instanceof Error)) return false; - const msg = error.message.toLowerCase(); - const code = (error as NodeJS.ErrnoException).code; - return ( - code === 'ECONNRESET' || - code === 'EPIPE' || - code === 'ETIMEDOUT' || - code === 'ENOTFOUND' || - code === 'EAI_AGAIN' || - msg.includes('connector_closed') || - msg.includes('socket hang up') || - msg.includes('network') || - msg.includes('connection') && msg.includes('closed') || - msg.includes('fetch failed') - ); -} - -/** - * Extract retry delay from Anthropic error response headers (in milliseconds). - * Checks: retry-after (seconds or RFC date), x-ratelimit-reset-requests, x-ratelimit-reset-tokens. - * Returns undefined if no valid delay is found or if the delay is in the past. - */ -function extractRetryAfterMs(headers: Headers | { get(name: string): string | null }, errorText = ""): number | undefined { - const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined); - - const retryAfter = headers.get("retry-after"); - if (retryAfter) { - const seconds = Number(retryAfter); - if (Number.isFinite(seconds)) { - const delay = normalizeDelay(seconds * 1000); - if (delay !== undefined) return delay; - } - const asDate = new Date(retryAfter).getTime(); - if (!Number.isNaN(asDate)) { - const delay = normalizeDelay(asDate - Date.now()); - if (delay !== undefined) return delay; - } - } - - // x-ratelimit-reset-requests / x-ratelimit-reset-tokens are Unix timestamps (seconds) - for (const header of ["x-ratelimit-reset-requests", "x-ratelimit-reset-tokens"]) { - const value = headers.get(header); - if (value) { - const resetSeconds = Number(value); - if (Number.isFinite(resetSeconds)) { - const delay = normalizeDelay(resetSeconds * 1000 - Date.now()); - if (delay !== undefined) return delay; - } - } - } - - return undefined; -} - -export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOptions> = ( - model: Model<"anthropic-messages">, - context: Context, - options?: AnthropicOptions, -): AssistantMessageEventStream => { - const stream = new AssistantMessageEventStream(); - - (async () => { - const output: AssistantMessage = { - role: "assistant", - content: [], - api: model.api as Api, - provider: model.provider, - model: model.id, - usage: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 0, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - stopReason: "stop", - timestamp: Date.now(), - }; - - try { - const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? ""; - - let copilotDynamicHeaders: Record | undefined; - if (model.provider === "github-copilot") { - const hasImages = hasCopilotVisionInput(context.messages); - copilotDynamicHeaders = buildCopilotDynamicHeaders({ - messages: context.messages, - hasImages, - }); - } - - const { client, isOAuthToken } = await createClient( - model, - apiKey, - options?.interleavedThinking ?? true, - options?.headers, - copilotDynamicHeaders, - ); - let params = buildParams(model, context, isOAuthToken, options); - const nextParams = await options?.onPayload?.(params, model); - if (nextParams !== undefined) { - params = nextParams as MessageCreateParamsStreaming; - } - const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal }); - stream.push({ type: "start", partial: output }); - - type Block = (ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | ServerToolUseContent | WebSearchResultContent) & { index: number }; - const blocks = output.content as Block[]; - - for await (const event of anthropicStream) { - if (event.type === "message_start") { - // Capture initial token usage from message_start event - // This ensures we have input token counts even if the stream is aborted early - output.usage.input = event.message.usage.input_tokens || 0; - output.usage.output = event.message.usage.output_tokens || 0; - output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0; - output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0; - // Anthropic doesn't provide total_tokens, compute from components - output.usage.totalTokens = - output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite; - calculateCost(model, output.usage); - } else if (event.type === "content_block_start") { - if (event.content_block.type === "text") { - const block: Block = { - type: "text", - text: "", - index: event.index, - }; - output.content.push(block); - stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output }); - } else if (event.content_block.type === "thinking") { - const block: Block = { - type: "thinking", - thinking: "", - thinkingSignature: "", - index: event.index, - }; - output.content.push(block); - stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output }); - } else if (event.content_block.type === "redacted_thinking") { - const block: Block = { - type: "thinking", - thinking: "[Reasoning redacted]", - thinkingSignature: event.content_block.data, - redacted: true, - index: event.index, - }; - output.content.push(block); - stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output }); - } else if (event.content_block.type === "tool_use") { - const block: Block = { - type: "toolCall", - id: event.content_block.id, - name: isOAuthToken - ? fromClaudeCodeName(event.content_block.name, context.tools) - : event.content_block.name, - arguments: (event.content_block.input as Record) ?? {}, - partialJson: "", - index: event.index, - }; - output.content.push(block); - stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output }); - } else if ((event.content_block as any).type === "server_tool_use") { - const serverBlock = event.content_block as any; - const block: Block = { - type: "serverToolUse", - id: serverBlock.id, - name: serverBlock.name, - input: serverBlock.input, - index: event.index, - }; - output.content.push(block); - stream.push({ type: "server_tool_use", contentIndex: output.content.length - 1, partial: output }); - } else if ((event.content_block as any).type === "web_search_tool_result") { - const resultBlock = event.content_block as any; - const block: Block = { - type: "webSearchResult", - toolUseId: resultBlock.tool_use_id, - content: resultBlock.content, - index: event.index, - }; - output.content.push(block); - stream.push({ type: "web_search_result", contentIndex: output.content.length - 1, partial: output }); - } - } else if (event.type === "content_block_delta") { - if (event.delta.type === "text_delta") { - const index = blocks.findIndex((b) => b.index === event.index); - const block = blocks[index]; - if (block && block.type === "text") { - block.text += event.delta.text; - stream.push({ - type: "text_delta", - contentIndex: index, - delta: event.delta.text, - partial: output, - }); - } - } else if (event.delta.type === "thinking_delta") { - const index = blocks.findIndex((b) => b.index === event.index); - const block = blocks[index]; - if (block && block.type === "thinking") { - block.thinking += event.delta.thinking; - stream.push({ - type: "thinking_delta", - contentIndex: index, - delta: event.delta.thinking, - partial: output, - }); - } - } else if (event.delta.type === "input_json_delta") { - const index = blocks.findIndex((b) => b.index === event.index); - const block = blocks[index]; - if (block && block.type === "toolCall") { - block.partialJson += event.delta.partial_json; - block.arguments = parseStreamingJson(block.partialJson); - stream.push({ - type: "toolcall_delta", - contentIndex: index, - delta: event.delta.partial_json, - partial: output, - }); - } - } else if (event.delta.type === "signature_delta") { - const index = blocks.findIndex((b) => b.index === event.index); - const block = blocks[index]; - if (block && block.type === "thinking") { - block.thinkingSignature = block.thinkingSignature || ""; - block.thinkingSignature += event.delta.signature; - } - } - } else if (event.type === "content_block_stop") { - const index = blocks.findIndex((b) => b.index === event.index); - const block = blocks[index]; - if (block) { - delete (block as any).index; - if (block.type === "text") { - stream.push({ - type: "text_end", - contentIndex: index, - content: block.text, - partial: output, - }); - } else if (block.type === "thinking") { - stream.push({ - type: "thinking_end", - contentIndex: index, - content: block.thinking, - partial: output, - }); - } else if (block.type === "toolCall") { - block.arguments = parseStreamingJson(block.partialJson); - delete (block as any).partialJson; - stream.push({ - type: "toolcall_end", - contentIndex: index, - toolCall: block, - partial: output, - }); - } - // serverToolUse and webSearchResult blocks just need index cleanup (already emitted on start) - } - } else if (event.type === "message_delta") { - if (event.delta.stop_reason) { - output.stopReason = mapStopReason(event.delta.stop_reason); - } - // Only update usage fields if present (not null). - // Preserves input_tokens from message_start when proxies omit it in message_delta. - if (event.usage.input_tokens != null) { - output.usage.input = event.usage.input_tokens; - } - if (event.usage.output_tokens != null) { - output.usage.output = event.usage.output_tokens; - } - if (event.usage.cache_read_input_tokens != null) { - output.usage.cacheRead = event.usage.cache_read_input_tokens; - } - if (event.usage.cache_creation_input_tokens != null) { - output.usage.cacheWrite = event.usage.cache_creation_input_tokens; - } - // Anthropic doesn't provide total_tokens, compute from components - output.usage.totalTokens = - output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite; - calculateCost(model, output.usage); - } - } - - if (options?.signal?.aborted) { - throw new Error("Request was aborted"); - } - - if (output.stopReason === "aborted" || output.stopReason === "error") { - throw new Error("An unknown error occurred"); - } - - stream.push({ type: "done", reason: output.stopReason, message: output }); - stream.end(); - } catch (error) { - for (const block of output.content) delete (block as any).index; - output.stopReason = options?.signal?.aborted ? "aborted" : "error"; - output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error); - if (model.provider === "alibaba-coding-plan") { - output.errorMessage = `[alibaba-coding-plan] ${output.errorMessage}`; - } - const AnthropicSdk = _AnthropicClass; - if (AnthropicSdk && error instanceof AnthropicSdk.APIError && error.headers) { - const retryAfterMs = extractRetryAfterMs(error.headers, error.message); - if (retryAfterMs !== undefined) { - output.retryAfterMs = retryAfterMs; - } - } - // Mark transient network errors as retriable so auto-mode can - // detect them and retry instead of stopping (#833). - if (isTransientNetworkError(error)) { - output.retryAfterMs = output.retryAfterMs ?? 5000; - } - stream.push({ type: "error", reason: output.stopReason, error: output }); - stream.end(); - } - })(); - - return stream; -}; - -/** - * Check if a model supports adaptive thinking (Opus 4.6 and Sonnet 4.6) - */ -function supportsAdaptiveThinking(modelId: string): boolean { - // Opus 4.6 and Sonnet 4.6 model IDs (with or without date suffix) - return ( - modelId.includes("opus-4-6") || - modelId.includes("opus-4.6") || - modelId.includes("sonnet-4-6") || - modelId.includes("sonnet-4.6") - ); -} - -/** - * Map ThinkingLevel to Anthropic effort levels for adaptive thinking. - * Note: effort "max" is only valid on Opus 4.6. - */ -function mapThinkingLevelToEffort(level: SimpleStreamOptions["reasoning"], modelId: string): AnthropicEffort { - switch (level) { - case "minimal": - return "low"; - case "low": - return "low"; - case "medium": - return "medium"; - case "high": - return "high"; - case "xhigh": - return modelId.includes("opus-4-6") || modelId.includes("opus-4.6") ? "max" : "high"; - default: - return "high"; - } -} - -export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleStreamOptions> = ( - model: Model<"anthropic-messages">, - context: Context, - options?: SimpleStreamOptions, -): AssistantMessageEventStream => { - const apiKey = options?.apiKey || getEnvApiKey(model.provider); - if (!apiKey) { - throw new Error(`No API key for provider: ${model.provider}`); - } - - const base = buildBaseOptions(model, options, apiKey); - if (!options?.reasoning) { - return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions); - } - - // For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level - // For older models: use budget-based thinking - if (supportsAdaptiveThinking(model.id)) { - const effort = mapThinkingLevelToEffort(options.reasoning, model.id); - return streamAnthropic(model, context, { - ...base, - thinkingEnabled: true, - effort, - } satisfies AnthropicOptions); - } - - const adjusted = adjustMaxTokensForThinking( - base.maxTokens || 0, - model.maxTokens, - options.reasoning, - options.thinkingBudgets, - ); - - return streamAnthropic(model, context, { - ...base, - maxTokens: adjusted.maxTokens, - thinkingEnabled: true, - thinkingBudgetTokens: adjusted.thinkingBudget, - } satisfies AnthropicOptions); -}; - function isOAuthToken(apiKey: string): boolean { return apiKey.includes("sk-ant-oat"); } @@ -702,315 +142,83 @@ async function createClient( return { client, isOAuthToken: false }; } -function buildParams( +export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOptions> = ( model: Model<"anthropic-messages">, context: Context, - isOAuthToken: boolean, options?: AnthropicOptions, -): MessageCreateParamsStreaming { - const { cacheControl } = getCacheControl(model.baseUrl, options?.cacheRetention); - // Strip variant suffixes like [1m] from model ID before sending to the API. - // The API only accepts the base model ID (e.g. "claude-opus-4-6"), - // not internal variant identifiers (e.g. "claude-opus-4-6[1m]"). - // This applies to all auth methods — API keys, OAuth, and Copilot alike. - const apiModelId = model.id.replace(/\[.*\]$/, ""); - const params: MessageCreateParamsStreaming = { - model: apiModelId, - messages: convertMessages(context.messages, model, isOAuthToken, cacheControl), - max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0, - stream: true, - }; +): AssistantMessageEventStream => { + const stream = new AssistantMessageEventStream(); - // For OAuth tokens, we MUST include Claude Code identity - if (isOAuthToken) { - params.system = [ - { - type: "text", - text: "You are Claude Code, Anthropic's official CLI for Claude.", - ...(cacheControl ? { cache_control: cacheControl } : {}), - }, - ]; - if (context.systemPrompt) { - params.system.push({ - type: "text", - text: sanitizeSurrogates(context.systemPrompt), - ...(cacheControl ? { cache_control: cacheControl } : {}), + (async () => { + const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? ""; + + let copilotDynamicHeaders: Record | undefined; + if (model.provider === "github-copilot") { + const hasImages = hasCopilotVisionInput(context.messages); + copilotDynamicHeaders = buildCopilotDynamicHeaders({ + messages: context.messages, + hasImages, }); } - } else if (context.systemPrompt) { - // Add cache control to system prompt for non-OAuth tokens - params.system = [ - { - type: "text", - text: sanitizeSurrogates(context.systemPrompt), - ...(cacheControl ? { cache_control: cacheControl } : {}), - }, - ]; - } - // Temperature is incompatible with extended thinking (adaptive or budget-based). - if (options?.temperature !== undefined && !options?.thinkingEnabled) { - params.temperature = options.temperature; - } + const { client, isOAuthToken: isOAuth } = await createClient( + model, + apiKey, + options?.interleavedThinking ?? true, + options?.headers, + copilotDynamicHeaders, + ); - if (context.tools) { - params.tools = convertTools(context.tools, isOAuthToken); - } + processAnthropicStream(stream, { + client, + model, + context, + isOAuthToken: isOAuth, + options, + AnthropicSdkClass: _AnthropicClass, + }); + })(); - // Configure thinking mode: adaptive (Opus 4.6 and Sonnet 4.6) or budget-based (older models) - if (options?.thinkingEnabled && model.reasoning) { - if (supportsAdaptiveThinking(model.id)) { - // Adaptive thinking: Claude decides when and how much to think - params.thinking = { type: "adaptive" }; - if (options.effort) { - params.output_config = { effort: options.effort }; - } - } else { - // Budget-based thinking for older models - params.thinking = { - type: "enabled", - budget_tokens: options.thinkingBudgetTokens || 1024, - }; - } - } + return stream; +}; - if (options?.metadata) { - const userId = options.metadata.user_id; - if (typeof userId === "string") { - params.metadata = { user_id: userId }; - } - } - - if (options?.toolChoice) { - if (typeof options.toolChoice === "string") { - params.tool_choice = { type: options.toolChoice }; - } else { - params.tool_choice = options.toolChoice; - } - } - - return params; -} - -// Normalize tool call IDs to match Anthropic's required pattern and length -function normalizeToolCallId(id: string): string { - return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64); -} - -function convertMessages( - messages: Message[], +export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleStreamOptions> = ( model: Model<"anthropic-messages">, - isOAuthToken: boolean, - cacheControl?: { type: "ephemeral"; ttl?: "1h" }, -): MessageParam[] { - const params: MessageParam[] = []; - - // Transform messages for cross-provider compatibility - const transformedMessages = transformMessages(messages, model, normalizeToolCallId); - - for (let i = 0; i < transformedMessages.length; i++) { - const msg = transformedMessages[i]; - - if (msg.role === "user") { - if (typeof msg.content === "string") { - if (msg.content.trim().length > 0) { - params.push({ - role: "user", - content: sanitizeSurrogates(msg.content), - }); - } - } else { - const blocks: ContentBlockParam[] = msg.content.map((item) => { - if (item.type === "text") { - return { - type: "text", - text: sanitizeSurrogates(item.text), - }; - } else { - return { - type: "image", - source: { - type: "base64", - media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp", - data: item.data, - }, - }; - } - }); - let filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks; - filteredBlocks = filteredBlocks.filter((b) => { - if (b.type === "text") { - return b.text.trim().length > 0; - } - return true; - }); - if (filteredBlocks.length === 0) continue; - params.push({ - role: "user", - content: filteredBlocks, - }); - } - } else if (msg.role === "assistant") { - const blocks: ContentBlockParam[] = []; - - for (const block of msg.content) { - if (block.type === "text") { - if (block.text.trim().length === 0) continue; - blocks.push({ - type: "text", - text: sanitizeSurrogates(block.text), - }); - } else if (block.type === "thinking") { - // Redacted thinking: pass the opaque payload back as redacted_thinking - if (block.redacted) { - blocks.push({ - type: "redacted_thinking", - data: block.thinkingSignature!, - }); - continue; - } - if (block.thinking.trim().length === 0) continue; - // If thinking signature is missing/empty (e.g., from aborted stream), - // convert to plain text block without tags to avoid API rejection - // and prevent Claude from mimicking the tags in responses - if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) { - blocks.push({ - type: "text", - text: sanitizeSurrogates(block.thinking), - }); - } else { - blocks.push({ - type: "thinking", - thinking: sanitizeSurrogates(block.thinking), - signature: block.thinkingSignature, - }); - } - } else if (block.type === "toolCall") { - blocks.push({ - type: "tool_use", - id: block.id, - name: isOAuthToken ? toClaudeCodeName(block.name) : block.name, - input: block.arguments ?? {}, - }); - } else if (block.type === "serverToolUse") { - blocks.push({ - type: "server_tool_use", - id: block.id, - name: block.name, - input: block.input ?? {}, - } as any); - } else if (block.type === "webSearchResult") { - blocks.push({ - type: "web_search_tool_result", - tool_use_id: block.toolUseId, - content: block.content, - } as any); - } - } - if (blocks.length === 0) continue; - params.push({ - role: "assistant", - content: blocks, - }); - } else if (msg.role === "toolResult") { - // Collect all consecutive toolResult messages, needed for z.ai Anthropic endpoint - const toolResults: ContentBlockParam[] = []; - - // Add the current tool result - toolResults.push({ - type: "tool_result", - tool_use_id: msg.toolCallId, - content: convertContentBlocks(msg.content), - is_error: msg.isError, - }); - - // Look ahead for consecutive toolResult messages - let j = i + 1; - while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") { - const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult - toolResults.push({ - type: "tool_result", - tool_use_id: nextMsg.toolCallId, - content: convertContentBlocks(nextMsg.content), - is_error: nextMsg.isError, - }); - j++; - } - - // Skip the messages we've already processed - i = j - 1; - - // Add a single user message with all tool results - params.push({ - role: "user", - content: toolResults, - }); - } + context: Context, + options?: SimpleStreamOptions, +): AssistantMessageEventStream => { + const apiKey = options?.apiKey || getEnvApiKey(model.provider); + if (!apiKey) { + throw new Error(`No API key for provider: ${model.provider}`); } - // Add cache_control to the last user message to cache conversation history - if (cacheControl && params.length > 0) { - const lastMessage = params[params.length - 1]; - if (lastMessage.role === "user") { - if (Array.isArray(lastMessage.content)) { - const lastBlock = lastMessage.content[lastMessage.content.length - 1]; - if ( - lastBlock && - (lastBlock.type === "text" || lastBlock.type === "image" || lastBlock.type === "tool_result") - ) { - (lastBlock as any).cache_control = cacheControl; - } - } else if (typeof lastMessage.content === "string") { - lastMessage.content = [ - { - type: "text", - text: lastMessage.content, - cache_control: cacheControl, - }, - ] as any; - } - } + const base = buildBaseOptions(model, options, apiKey); + if (!options?.reasoning) { + return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions); } - return params; -} - -function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] { - if (!tools) return []; - - return tools.map((tool) => { - const jsonSchema = tool.parameters as any; // TypeBox already generates JSON Schema - - return { - name: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name, - description: tool.description, - input_schema: { - type: "object" as const, - properties: jsonSchema.properties || {}, - required: jsonSchema.required || [], - }, - }; - }); -} - -function mapStopReason(reason: Anthropic.Messages.StopReason | string): StopReason { - switch (reason) { - case "end_turn": - return "stop"; - case "max_tokens": - return "length"; - case "tool_use": - return "toolUse"; - case "refusal": - return "error"; - case "pause_turn": // Stop is good enough -> resubmit - return "stop"; - case "stop_sequence": - return "stop"; // We don't supply stop sequences, so this should never happen - case "sensitive": // Content flagged by safety filters (not yet in SDK types) - return "error"; - default: - // Handle unknown stop reasons gracefully (API may add new values) - throw new Error(`Unhandled stop reason: ${reason}`); + // For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level + // For older models: use budget-based thinking + if (supportsAdaptiveThinking(model.id)) { + const effort = mapThinkingLevelToEffort(options.reasoning, model.id); + return streamAnthropic(model, context, { + ...base, + thinkingEnabled: true, + effort, + } satisfies AnthropicOptions); } -} + + const adjusted = adjustMaxTokensForThinking( + base.maxTokens || 0, + model.maxTokens, + options.reasoning, + options.thinkingBudgets, + ); + + return streamAnthropic(model, context, { + ...base, + maxTokens: adjusted.maxTokens, + thinkingEnabled: true, + thinkingBudgetTokens: adjusted.thinkingBudget, + } satisfies AnthropicOptions); +}; diff --git a/packages/pi-ai/src/providers/register-builtins.ts b/packages/pi-ai/src/providers/register-builtins.ts index f8fbdae35..e149e8f61 100644 --- a/packages/pi-ai/src/providers/register-builtins.ts +++ b/packages/pi-ai/src/providers/register-builtins.ts @@ -3,6 +3,7 @@ import type { AssistantMessage, AssistantMessageEvent, Context, Model, SimpleStr import { AssistantMessageEventStream } from "../utils/event-stream.js"; import type { BedrockOptions } from "./amazon-bedrock.js"; import { streamAnthropic, streamSimpleAnthropic } from "./anthropic.js"; +import { streamAnthropicVertex, streamSimpleAnthropicVertex } from "./anthropic-vertex.js"; import { streamAzureOpenAIResponses, streamSimpleAzureOpenAIResponses } from "./azure-openai-responses.js"; import { streamGoogle, streamSimpleGoogle } from "./google.js"; import { streamGoogleGeminiCli, streamSimpleGoogleGeminiCli } from "./google-gemini-cli.js"; @@ -171,6 +172,12 @@ function registerBuiltInApiProviders(): void { streamSimple: streamSimpleGoogleVertex, }); + registerApiProvider({ + api: "anthropic-vertex", + stream: streamAnthropicVertex, + streamSimple: streamSimpleAnthropicVertex, + }); + registerApiProvider({ api: "bedrock-converse-stream", stream: streamBedrockLazy, diff --git a/packages/pi-ai/src/types.ts b/packages/pi-ai/src/types.ts index 9903b9c79..af3afc5c8 100644 --- a/packages/pi-ai/src/types.ts +++ b/packages/pi-ai/src/types.ts @@ -9,6 +9,7 @@ export type KnownApi = | "azure-openai-responses" | "openai-codex-responses" | "anthropic-messages" + | "anthropic-vertex" | "bedrock-converse-stream" | "google-generative-ai" | "google-gemini-cli" @@ -19,6 +20,7 @@ export type Api = KnownApi | (string & {}); export type KnownProvider = | "amazon-bedrock" | "anthropic" + | "anthropic-vertex" | "google" | "google-gemini-cli" | "google-antigravity" diff --git a/packages/pi-coding-agent/src/core/model-resolver.ts b/packages/pi-coding-agent/src/core/model-resolver.ts index 1119f3d57..bfe6ee86f 100644 --- a/packages/pi-coding-agent/src/core/model-resolver.ts +++ b/packages/pi-coding-agent/src/core/model-resolver.ts @@ -14,6 +14,7 @@ import type { ModelRegistry } from "./model-registry.js"; const defaultModelPerProvider: Record = { "amazon-bedrock": "us.anthropic.claude-opus-4-6-v1", anthropic: "claude-opus-4-6[1m]", + "anthropic-vertex": "claude-sonnet-4-6", openai: "gpt-5.4", "azure-openai-responses": "gpt-5.2", "openai-codex": "gpt-5.4", diff --git a/src/onboarding.ts b/src/onboarding.ts index ed75d2c6c..32bff8bf9 100644 --- a/src/onboarding.ts +++ b/src/onboarding.ts @@ -63,6 +63,7 @@ const TOOL_KEYS: ToolKeyConfig[] = [ /** Known LLM provider IDs that, if authed, mean the user doesn't need onboarding */ const LLM_PROVIDER_IDS = [ 'anthropic', + 'anthropic-vertex', 'openai', 'github-copilot', 'openai-codex',