feat: add anthropic-vertex provider for Claude on Vertex AI (#1533)

* feat: add anthropic-vertex provider for Claude models on Google Vertex AI Add a new anthropic-vertex provider that enables using Claude models (Opus 4.6, Sonnet 4.6, Haiku 4.5) through Google Vertex AI using the @anthropic-ai/vertex-sdk package. Follows the same pattern as the existing google/google-vertex provider split. Detection uses ANTHROPIC_VERTEX_PROJECT_ID (same env var as Claude Code) with CLOUD_ML_REGION for region selection, falling back to us-central1. Extracts shared Anthropic utilities into anthropic-shared.ts (message conversion, tool conversion, param building, stream processing) to avoid duplication between anthropic.ts and anthropic-vertex.ts. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: add full Claude model set for anthropic-vertex provider Add 200K context window variants for Opus 4.6 and Sonnet 4.6, plus older models (Sonnet 4.5, Sonnet 4, Opus 4.5, Opus 4.1, Opus 4, Haiku 4.5). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: add @anthropic-ai/vertex-sdk to root dependencies Required for the published package to resolve the vertex SDK at runtime. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: remove unnecessary comments to match codebase style Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: remove duplicate stream functions after rebase Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Nathan Roe <nathan.roe@carvana.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-20 01:14:13 -04:00 · 2026-03-20 01:14:13 -04:00 · 39cd932abb
commit 39cd932abb
parent 111537f460
14 changed files with 1236 additions and 873 deletions
--- a/README.md
+++ b/README.md
@ -629,7 +629,7 @@ GSD isn't locked to one provider. It runs on the [Pi SDK](https://github.com/bad

 ### Built-in Providers

-Anthropic, OpenAI, Google (Gemini), OpenRouter, GitHub Copilot, Amazon Bedrock, Azure OpenAI, Google Vertex, Groq, Cerebras, Mistral, xAI, HuggingFace, Vercel AI Gateway, and more.
+Anthropic, Anthropic (Vertex AI), OpenAI, Google (Gemini), OpenRouter, GitHub Copilot, Amazon Bedrock, Azure OpenAI, Google Vertex, Groq, Cerebras, Mistral, xAI, HuggingFace, Vercel AI Gateway, and more.

 ### OAuth / Max Plans

--- a/docs/what-is-pi/10-providers-models-multi-model-by-default.md
+++ b/docs/what-is-pi/10-providers-models-multi-model-by-default.md
@ -12,7 +12,7 @@ Pi isn't locked to one provider. It supports 20+ providers out of the box and le
 - Google Antigravity

 **API keys (via environment variables):**
- Anthropic, OpenAI, Azure OpenAI, Google Gemini, Google Vertex, Amazon Bedrock
+- Anthropic, Anthropic (Vertex AI), OpenAI, Azure OpenAI, Google Gemini, Google Vertex, Amazon Bedrock
 - Mistral, Groq, Cerebras, xAI, OpenRouter, Vercel AI Gateway
 - ZAI, OpenCode Zen, OpenCode Go, Hugging Face, Kimi, MiniMax

--- a/package-lock.json
+++ b/package-lock.json
@ -1,12 +1,12 @@
 {
  "name": "gsd-pi",
-  "version": "2.31.2",
+  "version": "2.33.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "gsd-pi",
-      "version": "2.31.2",
+      "version": "2.33.1",
      "hasInstallScript": true,
      "license": "MIT",
      "workspaces": [
@ -15,6 +15,7 @@
      ],
      "dependencies": {
        "@anthropic-ai/sdk": "^0.73.0",
+        "@anthropic-ai/vertex-sdk": "^0.14.4",
        "@aws-sdk/client-bedrock-runtime": "^3.983.0",
        "@clack/prompts": "^1.1.0",
        "@google/genai": "^1.40.0",
@ -96,6 +97,56 @@
        }
      }
    },
+    "node_modules/@anthropic-ai/vertex-sdk": {
+      "version": "0.14.4",
+      "resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.14.4.tgz",
+      "integrity": "sha512-BZUPRWghZxfSFtAxU563wH+jfWBPoedAwsVxG35FhmNsjeV8tyfN+lFriWhCpcZApxA4NdT6Soov+PzfnxxD5g==",
+      "license": "MIT",
+      "dependencies": {
+        "@anthropic-ai/sdk": ">=0.50.3 <1",
+        "google-auth-library": "^9.4.2"
+      }
+    },
+    "node_modules/@anthropic-ai/vertex-sdk/node_modules/gcp-metadata": {
+      "version": "6.1.1",
+      "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.1.tgz",
+      "integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "gaxios": "^6.1.1",
+        "google-logging-utils": "^0.0.2",
+        "json-bigint": "^1.0.0"
+      },
+      "engines": {
+        "node": ">=14"
+      }
+    },
+    "node_modules/@anthropic-ai/vertex-sdk/node_modules/google-auth-library": {
+      "version": "9.15.1",
+      "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.1.tgz",
+      "integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "base64-js": "^1.3.0",
+        "ecdsa-sig-formatter": "^1.0.11",
+        "gaxios": "^6.1.1",
+        "gcp-metadata": "^6.1.0",
+        "gtoken": "^7.0.0",
+        "jws": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=14"
+      }
+    },
+    "node_modules/@anthropic-ai/vertex-sdk/node_modules/google-logging-utils": {
+      "version": "0.0.2",
+      "resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-0.0.2.tgz",
+      "integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==",
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=14"
+      }
+    },
    "node_modules/@aws-crypto/crc32": {
      "version": "5.2.0",
      "resolved": "https://registry.npmjs.org/@aws-crypto/crc32/-/crc32-5.2.0.tgz",
@ -5865,6 +5916,19 @@
      "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==",
      "license": "ISC"
    },
+    "node_modules/gtoken": {
+      "version": "7.1.0",
+      "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz",
+      "integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==",
+      "license": "MIT",
+      "dependencies": {
+        "gaxios": "^6.0.0",
+        "jws": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
    "node_modules/has-flag": {
      "version": "4.0.0",
      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
@ -9066,6 +9130,7 @@
      "version": "0.57.1",
      "dependencies": {
        "@anthropic-ai/sdk": "^0.73.0",
+        "@anthropic-ai/vertex-sdk": "^0.14.4",
        "@aws-sdk/client-bedrock-runtime": "^3.983.0",
        "@google/genai": "^1.40.0",
        "@mistralai/mistralai": "^1.14.1",
@ -9101,7 +9166,7 @@
    },
    "packages/pi-coding-agent": {
      "name": "@gsd/pi-coding-agent",
-      "version": "2.31.2",
+      "version": "2.33.1",
      "dependencies": {
        "@mariozechner/jiti": "^2.6.2",
        "@silvia-odwyer/photon-node": "^0.3.4",
--- a/package.json
+++ b/package.json
@ -84,6 +84,7 @@
  },
  "dependencies": {
    "@anthropic-ai/sdk": "^0.73.0",
+    "@anthropic-ai/vertex-sdk": "^0.14.4",
    "@aws-sdk/client-bedrock-runtime": "^3.983.0",
    "@clack/prompts": "^1.1.0",
    "@google/genai": "^1.40.0",
--- a/packages/pi-ai/package.json
+++ b/packages/pi-ai/package.json
@ -24,6 +24,7 @@
  },
  "dependencies": {
    "@anthropic-ai/sdk": "^0.73.0",
+    "@anthropic-ai/vertex-sdk": "^0.14.4",
    "@aws-sdk/client-bedrock-runtime": "^3.983.0",
    "@google/genai": "^1.40.0",
    "@mistralai/mistralai": "^1.14.1",
--- a/packages/pi-ai/src/env-api-keys.ts
+++ b/packages/pi-ai/src/env-api-keys.ts
@ -73,6 +73,20 @@ export function getEnvApiKey(provider: any): string | undefined {
 		return process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
 	}

+	// Anthropic on Vertex AI uses Application Default Credentials.
+	// Detected via ANTHROPIC_VERTEX_PROJECT_ID (same env var as Claude Code).
+	if (provider === "anthropic-vertex") {
+		const hasProject = !!process.env.ANTHROPIC_VERTEX_PROJECT_ID;
+		if (hasProject) {
+			return "<authenticated>";
+		}
+		// Fall back to Google Cloud project env vars
+		const hasGoogleProject = !!(process.env.GOOGLE_CLOUD_PROJECT || process.env.GCLOUD_PROJECT);
+		if (hasGoogleProject && hasVertexAdcCredentials()) {
+			return "<authenticated>";
+		}
+	}
+
 	// Vertex AI uses Application Default Credentials, not API keys.
 	// Auth is configured via `gcloud auth application-default login`.
 	if (provider === "google-vertex") {
--- a/packages/pi-ai/src/models.generated.ts
+++ b/packages/pi-ai/src/models.generated.ts
@ -1827,6 +1827,178 @@ export const MODELS = {
 			maxTokens: 64000,
 		} satisfies Model<"anthropic-messages">,
 	},
+	"anthropic-vertex": {
+		"claude-opus-4-6": {
+			id: "claude-opus-4-6",
+			name: "Claude Opus 4.6 (Vertex)",
+			api: "anthropic-vertex",
+			provider: "anthropic-vertex",
+			baseUrl: "https://us-central1-aiplatform.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 15,
+				output: 75,
+				cacheRead: 1.5,
+				cacheWrite: 18.75,
+			},
+			contextWindow: 200000,
+			maxTokens: 128000,
+		} satisfies Model<"anthropic-vertex">,
+		"claude-opus-4-6[1m]": {
+			id: "claude-opus-4-6[1m]",
+			name: "Claude Opus 4.6 1M (Vertex)",
+			api: "anthropic-vertex",
+			provider: "anthropic-vertex",
+			baseUrl: "https://us-central1-aiplatform.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 15,
+				output: 75,
+				cacheRead: 1.5,
+				cacheWrite: 18.75,
+			},
+			contextWindow: 1000000,
+			maxTokens: 128000,
+		} satisfies Model<"anthropic-vertex">,
+		"claude-sonnet-4-6": {
+			id: "claude-sonnet-4-6",
+			name: "Claude Sonnet 4.6 (Vertex)",
+			api: "anthropic-vertex",
+			provider: "anthropic-vertex",
+			baseUrl: "https://us-central1-aiplatform.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 3,
+				output: 15,
+				cacheRead: 0.3,
+				cacheWrite: 3.75,
+			},
+			contextWindow: 200000,
+			maxTokens: 64000,
+		} satisfies Model<"anthropic-vertex">,
+		"claude-sonnet-4-6[1m]": {
+			id: "claude-sonnet-4-6[1m]",
+			name: "Claude Sonnet 4.6 1M (Vertex)",
+			api: "anthropic-vertex",
+			provider: "anthropic-vertex",
+			baseUrl: "https://us-central1-aiplatform.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 3,
+				output: 15,
+				cacheRead: 0.3,
+				cacheWrite: 3.75,
+			},
+			contextWindow: 1000000,
+			maxTokens: 64000,
+		} satisfies Model<"anthropic-vertex">,
+		"claude-sonnet-4-5@20250929": {
+			id: "claude-sonnet-4-5@20250929",
+			name: "Claude Sonnet 4.5 (Vertex)",
+			api: "anthropic-vertex",
+			provider: "anthropic-vertex",
+			baseUrl: "https://us-central1-aiplatform.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 3,
+				output: 15,
+				cacheRead: 0.3,
+				cacheWrite: 3.75,
+			},
+			contextWindow: 200000,
+			maxTokens: 64000,
+		} satisfies Model<"anthropic-vertex">,
+		"claude-sonnet-4@20250514": {
+			id: "claude-sonnet-4@20250514",
+			name: "Claude Sonnet 4 (Vertex)",
+			api: "anthropic-vertex",
+			provider: "anthropic-vertex",
+			baseUrl: "https://us-central1-aiplatform.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 3,
+				output: 15,
+				cacheRead: 0.3,
+				cacheWrite: 3.75,
+			},
+			contextWindow: 200000,
+			maxTokens: 64000,
+		} satisfies Model<"anthropic-vertex">,
+		"claude-opus-4-5@20251101": {
+			id: "claude-opus-4-5@20251101",
+			name: "Claude Opus 4.5 (Vertex)",
+			api: "anthropic-vertex",
+			provider: "anthropic-vertex",
+			baseUrl: "https://us-central1-aiplatform.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 15,
+				output: 75,
+				cacheRead: 1.5,
+				cacheWrite: 18.75,
+			},
+			contextWindow: 200000,
+			maxTokens: 32000,
+		} satisfies Model<"anthropic-vertex">,
+		"claude-opus-4-1@20250805": {
+			id: "claude-opus-4-1@20250805",
+			name: "Claude Opus 4.1 (Vertex)",
+			api: "anthropic-vertex",
+			provider: "anthropic-vertex",
+			baseUrl: "https://us-central1-aiplatform.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 15,
+				output: 75,
+				cacheRead: 1.5,
+				cacheWrite: 18.75,
+			},
+			contextWindow: 200000,
+			maxTokens: 32000,
+		} satisfies Model<"anthropic-vertex">,
+		"claude-opus-4@20250514": {
+			id: "claude-opus-4@20250514",
+			name: "Claude Opus 4 (Vertex)",
+			api: "anthropic-vertex",
+			provider: "anthropic-vertex",
+			baseUrl: "https://us-central1-aiplatform.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 15,
+				output: 75,
+				cacheRead: 1.5,
+				cacheWrite: 18.75,
+			},
+			contextWindow: 200000,
+			maxTokens: 32000,
+		} satisfies Model<"anthropic-vertex">,
+		"claude-haiku-4-5@20251001": {
+			id: "claude-haiku-4-5@20251001",
+			name: "Claude Haiku 4.5 (Vertex)",
+			api: "anthropic-vertex",
+			provider: "anthropic-vertex",
+			baseUrl: "https://us-central1-aiplatform.googleapis.com",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0.8,
+				output: 4,
+				cacheRead: 0.08,
+				cacheWrite: 1,
+			},
+			contextWindow: 200000,
+			maxTokens: 8192,
+		} satisfies Model<"anthropic-vertex">,
+	},
 	"azure-openai-responses": {
 		"codex-mini-latest": {
 			id: "codex-mini-latest",
--- a/packages/pi-ai/src/providers/anthropic-shared.ts
+++ b/packages/pi-ai/src/providers/anthropic-shared.ts
@ -0,0 +1,761 @@
+/**
+ * Shared utilities for Anthropic providers (direct API and Vertex AI).
+ */
+import type Anthropic from "@anthropic-ai/sdk";
+import type {
+	ContentBlockParam,
+	MessageCreateParamsStreaming,
+	MessageParam,
+} from "@anthropic-ai/sdk/resources/messages.js";
+import { calculateCost } from "../models.js";
+import type {
+	Api,
+	AssistantMessage,
+	CacheRetention,
+	Context,
+	ImageContent,
+	Message,
+	Model,
+	ServerToolUseContent,
+	StopReason,
+	StreamOptions,
+	TextContent,
+	ThinkingContent,
+	Tool,
+	ToolCall,
+	ToolResultMessage,
+	WebSearchResultContent,
+} from "../types.js";
+
+/** API types that use the Anthropic Messages protocol */
+export type AnthropicApi = "anthropic-messages" | "anthropic-vertex";
+import type { AssistantMessageEventStream } from "../utils/event-stream.js";
+import { parseStreamingJson } from "../utils/json-parse.js";
+import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
+import { transformMessages } from "./transform-messages.js";
+
+export type AnthropicEffort = "low" | "medium" | "high" | "max";
+
+export interface AnthropicOptions extends StreamOptions {
+	thinkingEnabled?: boolean;
+	thinkingBudgetTokens?: number;
+	effort?: AnthropicEffort;
+	interleavedThinking?: boolean;
+	toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
+}
+
+const claudeCodeTools = [
+	"Read",
+	"Write",
+	"Edit",
+	"Bash",
+	"Grep",
+	"Glob",
+	"AskUserQuestion",
+	"EnterPlanMode",
+	"ExitPlanMode",
+	"KillShell",
+	"NotebookEdit",
+	"Skill",
+	"Task",
+	"TaskOutput",
+	"TodoWrite",
+	"WebFetch",
+	"WebSearch",
+];
+
+const ccToolLookup = new Map(claudeCodeTools.map((t) => [t.toLowerCase(), t]));
+
+export const toClaudeCodeName = (name: string) => ccToolLookup.get(name.toLowerCase()) ?? name;
+export const fromClaudeCodeName = (name: string, tools?: Tool[]) => {
+	if (tools && tools.length > 0) {
+		const lowerName = name.toLowerCase();
+		const matchedTool = tools.find((tool) => tool.name.toLowerCase() === lowerName);
+		if (matchedTool) return matchedTool.name;
+	}
+	return name;
+};
+
+function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention {
+	if (cacheRetention) {
+		return cacheRetention;
+	}
+	if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") {
+		return "long";
+	}
+	return "short";
+}
+
+export function getCacheControl(
+	baseUrl: string,
+	cacheRetention?: CacheRetention,
+): { retention: CacheRetention; cacheControl?: { type: "ephemeral"; ttl?: "1h" } } {
+	const retention = resolveCacheRetention(cacheRetention);
+	if (retention === "none") {
+		return { retention };
+	}
+	const ttl = retention === "long" && baseUrl.includes("api.anthropic.com") ? "1h" : undefined;
+	return {
+		retention,
+		cacheControl: { type: "ephemeral", ...(ttl && { ttl }) },
+	};
+}
+
+export function convertContentBlocks(content: (TextContent | ImageContent)[]):
+	| string
+	| Array<
+			| { type: "text"; text: string }
+			| {
+					type: "image";
+					source: {
+						type: "base64";
+						media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp";
+						data: string;
+					};
+			  }
+	  > {
+	const hasImages = content.some((c) => c.type === "image");
+	if (!hasImages) {
+		return sanitizeSurrogates(content.map((c) => (c as TextContent).text).join("\n"));
+	}
+
+	const blocks = content.map((block) => {
+		if (block.type === "text") {
+			return {
+				type: "text" as const,
+				text: sanitizeSurrogates(block.text),
+			};
+		}
+		return {
+			type: "image" as const,
+			source: {
+				type: "base64" as const,
+				media_type: block.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
+				data: block.data,
+			},
+		};
+	});
+
+	const hasText = blocks.some((b) => b.type === "text");
+	if (!hasText) {
+		blocks.unshift({
+			type: "text" as const,
+			text: "(see attached image)",
+		});
+	}
+
+	return blocks;
+}
+
+export function supportsAdaptiveThinking(modelId: string): boolean {
+	return (
+		modelId.includes("opus-4-6") ||
+		modelId.includes("opus-4.6") ||
+		modelId.includes("sonnet-4-6") ||
+		modelId.includes("sonnet-4.6")
+	);
+}
+
+export function mapThinkingLevelToEffort(level: string | undefined, modelId: string): AnthropicEffort {
+	switch (level) {
+		case "minimal":
+			return "low";
+		case "low":
+			return "low";
+		case "medium":
+			return "medium";
+		case "high":
+			return "high";
+		case "xhigh":
+			return modelId.includes("opus-4-6") || modelId.includes("opus-4.6") ? "max" : "high";
+		default:
+			return "high";
+	}
+}
+
+export function isTransientNetworkError(error: unknown): boolean {
+	if (!(error instanceof Error)) return false;
+	const msg = error.message.toLowerCase();
+	const code = (error as NodeJS.ErrnoException).code;
+	return (
+		code === 'ECONNRESET' ||
+		code === 'EPIPE' ||
+		code === 'ETIMEDOUT' ||
+		code === 'ENOTFOUND' ||
+		code === 'EAI_AGAIN' ||
+		msg.includes('connector_closed') ||
+		msg.includes('socket hang up') ||
+		msg.includes('network') ||
+		msg.includes('connection') && msg.includes('closed') ||
+		msg.includes('fetch failed')
+	);
+}
+
+export function extractRetryAfterMs(headers: Headers | { get(name: string): string | null }, errorText = ""): number | undefined {
+	const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
+
+	const retryAfter = headers.get("retry-after");
+	if (retryAfter) {
+		const seconds = Number(retryAfter);
+		if (Number.isFinite(seconds)) {
+			const delay = normalizeDelay(seconds * 1000);
+			if (delay !== undefined) return delay;
+		}
+		const asDate = new Date(retryAfter).getTime();
+		if (!Number.isNaN(asDate)) {
+			const delay = normalizeDelay(asDate - Date.now());
+			if (delay !== undefined) return delay;
+		}
+	}
+
+	for (const header of ["x-ratelimit-reset-requests", "x-ratelimit-reset-tokens"]) {
+		const value = headers.get(header);
+		if (value) {
+			const resetSeconds = Number(value);
+			if (Number.isFinite(resetSeconds)) {
+				const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
+				if (delay !== undefined) return delay;
+			}
+		}
+	}
+
+	return undefined;
+}
+
+export function normalizeToolCallId(id: string): string {
+	return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
+}
+
+export function convertMessages(
+	messages: Message[],
+	model: Model<AnthropicApi>,
+	isOAuthToken: boolean,
+	cacheControl?: { type: "ephemeral"; ttl?: "1h" },
+): MessageParam[] {
+	const params: MessageParam[] = [];
+
+	const transformedMessages = transformMessages(messages, model, normalizeToolCallId);
+
+	for (let i = 0; i < transformedMessages.length; i++) {
+		const msg = transformedMessages[i];
+
+		if (msg.role === "user") {
+			if (typeof msg.content === "string") {
+				if (msg.content.trim().length > 0) {
+					params.push({
+						role: "user",
+						content: sanitizeSurrogates(msg.content),
+					});
+				}
+			} else {
+				const blocks: ContentBlockParam[] = msg.content.map((item) => {
+					if (item.type === "text") {
+						return {
+							type: "text",
+							text: sanitizeSurrogates(item.text),
+						};
+					} else {
+						return {
+							type: "image",
+							source: {
+								type: "base64",
+								media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
+								data: item.data,
+							},
+						};
+					}
+				});
+				let filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks;
+				filteredBlocks = filteredBlocks.filter((b) => {
+					if (b.type === "text") {
+						return b.text.trim().length > 0;
+					}
+					return true;
+				});
+				if (filteredBlocks.length === 0) continue;
+				params.push({
+					role: "user",
+					content: filteredBlocks,
+				});
+			}
+		} else if (msg.role === "assistant") {
+			const blocks: ContentBlockParam[] = [];
+
+			for (const block of msg.content) {
+				if (block.type === "text") {
+					if (block.text.trim().length === 0) continue;
+					blocks.push({
+						type: "text",
+						text: sanitizeSurrogates(block.text),
+					});
+				} else if (block.type === "thinking") {
+					if (block.redacted) {
+						blocks.push({
+							type: "redacted_thinking",
+							data: block.thinkingSignature!,
+						});
+						continue;
+					}
+					if (block.thinking.trim().length === 0) continue;
+					if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
+						blocks.push({
+							type: "text",
+							text: sanitizeSurrogates(block.thinking),
+						});
+					} else {
+						blocks.push({
+							type: "thinking",
+							thinking: sanitizeSurrogates(block.thinking),
+							signature: block.thinkingSignature,
+						});
+					}
+				} else if (block.type === "toolCall") {
+					blocks.push({
+						type: "tool_use",
+						id: block.id,
+						name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
+						input: block.arguments ?? {},
+					});
+				} else if (block.type === "serverToolUse") {
+					blocks.push({
+						type: "server_tool_use",
+						id: block.id,
+						name: block.name,
+						input: block.input ?? {},
+					} as any);
+				} else if (block.type === "webSearchResult") {
+					blocks.push({
+						type: "web_search_tool_result",
+						tool_use_id: block.toolUseId,
+						content: block.content,
+					} as any);
+				}
+			}
+			if (blocks.length === 0) continue;
+			params.push({
+				role: "assistant",
+				content: blocks,
+			});
+		} else if (msg.role === "toolResult") {
+			const toolResults: ContentBlockParam[] = [];
+
+			toolResults.push({
+				type: "tool_result",
+				tool_use_id: msg.toolCallId,
+				content: convertContentBlocks(msg.content),
+				is_error: msg.isError,
+			});
+
+			let j = i + 1;
+			while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
+				const nextMsg = transformedMessages[j] as ToolResultMessage;
+				toolResults.push({
+					type: "tool_result",
+					tool_use_id: nextMsg.toolCallId,
+					content: convertContentBlocks(nextMsg.content),
+					is_error: nextMsg.isError,
+				});
+				j++;
+			}
+
+			i = j - 1;
+
+			params.push({
+				role: "user",
+				content: toolResults,
+			});
+		}
+	}
+
+	if (cacheControl && params.length > 0) {
+		const lastMessage = params[params.length - 1];
+		if (lastMessage.role === "user") {
+			if (Array.isArray(lastMessage.content)) {
+				const lastBlock = lastMessage.content[lastMessage.content.length - 1];
+				if (
+					lastBlock &&
+					(lastBlock.type === "text" || lastBlock.type === "image" || lastBlock.type === "tool_result")
+				) {
+					(lastBlock as any).cache_control = cacheControl;
+				}
+			} else if (typeof lastMessage.content === "string") {
+				lastMessage.content = [
+					{
+						type: "text",
+						text: lastMessage.content,
+						cache_control: cacheControl,
+					},
+				] as any;
+			}
+		}
+	}
+
+	return params;
+}
+
+export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] {
+	if (!tools) return [];
+
+	return tools.map((tool) => {
+		const jsonSchema = tool.parameters as any;
+
+		return {
+			name: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name,
+			description: tool.description,
+			input_schema: {
+				type: "object" as const,
+				properties: jsonSchema.properties || {},
+				required: jsonSchema.required || [],
+			},
+		};
+	});
+}
+
+export function buildParams(
+	model: Model<AnthropicApi>,
+	context: Context,
+	isOAuthToken: boolean,
+	options?: AnthropicOptions,
+): MessageCreateParamsStreaming {
+	const { cacheControl } = getCacheControl(model.baseUrl, options?.cacheRetention);
+	const apiModelId = model.id.replace(/\[.*\]$/, "");
+	const params: MessageCreateParamsStreaming = {
+		model: apiModelId,
+		messages: convertMessages(context.messages, model, isOAuthToken, cacheControl),
+		max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
+		stream: true,
+	};
+
+	if (isOAuthToken) {
+		params.system = [
+			{
+				type: "text",
+				text: "You are Claude Code, Anthropic's official CLI for Claude.",
+				...(cacheControl ? { cache_control: cacheControl } : {}),
+			},
+		];
+		if (context.systemPrompt) {
+			params.system.push({
+				type: "text",
+				text: sanitizeSurrogates(context.systemPrompt),
+				...(cacheControl ? { cache_control: cacheControl } : {}),
+			});
+		}
+	} else if (context.systemPrompt) {
+		params.system = [
+			{
+				type: "text",
+				text: sanitizeSurrogates(context.systemPrompt),
+				...(cacheControl ? { cache_control: cacheControl } : {}),
+			},
+		];
+	}
+
+	if (options?.temperature !== undefined && !options?.thinkingEnabled) {
+		params.temperature = options.temperature;
+	}
+
+	if (context.tools) {
+		params.tools = convertTools(context.tools, isOAuthToken);
+	}
+
+	if (options?.thinkingEnabled && model.reasoning) {
+		if (supportsAdaptiveThinking(model.id)) {
+			params.thinking = { type: "adaptive" };
+			if (options.effort) {
+				params.output_config = { effort: options.effort };
+			}
+		} else {
+			params.thinking = {
+				type: "enabled",
+				budget_tokens: options.thinkingBudgetTokens || 1024,
+			};
+		}
+	}
+
+	if (options?.metadata) {
+		const userId = options.metadata.user_id;
+		if (typeof userId === "string") {
+			params.metadata = { user_id: userId };
+		}
+	}
+
+	if (options?.toolChoice) {
+		if (typeof options.toolChoice === "string") {
+			params.tool_choice = { type: options.toolChoice };
+		} else {
+			params.tool_choice = options.toolChoice;
+		}
+	}
+
+	return params;
+}
+
+export function mapStopReason(reason: string): StopReason {
+	switch (reason) {
+		case "end_turn":
+			return "stop";
+		case "max_tokens":
+			return "length";
+		case "tool_use":
+			return "toolUse";
+		case "refusal":
+			return "error";
+		case "pause_turn":
+			return "stop";
+		case "stop_sequence":
+			return "stop";
+		case "sensitive":
+			return "error";
+		default:
+			throw new Error(`Unhandled stop reason: ${reason}`);
+	}
+}
+
+export interface StreamAnthropicArgs {
+	client: Anthropic;
+	model: Model<AnthropicApi>;
+	context: Context;
+	isOAuthToken: boolean;
+	options?: AnthropicOptions;
+	AnthropicSdkClass?: typeof Anthropic;
+}
+
+export function processAnthropicStream(
+	stream: AssistantMessageEventStream,
+	args: StreamAnthropicArgs,
+): void {
+	const { client, model, context, isOAuthToken, options, AnthropicSdkClass } = args;
+
+	(async () => {
+		const output: AssistantMessage = {
+			role: "assistant",
+			content: [],
+			api: model.api as Api,
+			provider: model.provider,
+			model: model.id,
+			usage: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+				totalTokens: 0,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+			},
+			stopReason: "stop",
+			timestamp: Date.now(),
+		};
+
+		try {
+			let params = buildParams(model, context, isOAuthToken, options);
+			const nextParams = await options?.onPayload?.(params, model);
+			if (nextParams !== undefined) {
+				params = nextParams as MessageCreateParamsStreaming;
+			}
+			const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
+			stream.push({ type: "start", partial: output });
+
+			type Block = (ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | ServerToolUseContent | WebSearchResultContent) & { index: number };
+			const blocks = output.content as Block[];
+
+			for await (const event of anthropicStream) {
+				if (event.type === "message_start") {
+					output.usage.input = event.message.usage.input_tokens || 0;
+					output.usage.output = event.message.usage.output_tokens || 0;
+					output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
+					output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
+					output.usage.totalTokens =
+						output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
+					calculateCost(model, output.usage);
+				} else if (event.type === "content_block_start") {
+					if (event.content_block.type === "text") {
+						const block: Block = {
+							type: "text",
+							text: "",
+							index: event.index,
+						};
+						output.content.push(block);
+						stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output });
+					} else if (event.content_block.type === "thinking") {
+						const block: Block = {
+							type: "thinking",
+							thinking: "",
+							thinkingSignature: "",
+							index: event.index,
+						};
+						output.content.push(block);
+						stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
+					} else if (event.content_block.type === "redacted_thinking") {
+						const block: Block = {
+							type: "thinking",
+							thinking: "[Reasoning redacted]",
+							thinkingSignature: event.content_block.data,
+							redacted: true,
+							index: event.index,
+						};
+						output.content.push(block);
+						stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
+					} else if (event.content_block.type === "tool_use") {
+						const block: Block = {
+							type: "toolCall",
+							id: event.content_block.id,
+							name: isOAuthToken
+								? fromClaudeCodeName(event.content_block.name, context.tools)
+								: event.content_block.name,
+							arguments: (event.content_block.input as Record<string, any>) ?? {},
+							partialJson: "",
+							index: event.index,
+						};
+						output.content.push(block);
+						stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output });
+					} else if ((event.content_block as any).type === "server_tool_use") {
+						const serverBlock = event.content_block as any;
+						const block: Block = {
+							type: "serverToolUse",
+							id: serverBlock.id,
+							name: serverBlock.name,
+							input: serverBlock.input,
+							index: event.index,
+						};
+						output.content.push(block);
+						stream.push({ type: "server_tool_use", contentIndex: output.content.length - 1, partial: output });
+					} else if ((event.content_block as any).type === "web_search_tool_result") {
+						const resultBlock = event.content_block as any;
+						const block: Block = {
+							type: "webSearchResult",
+							toolUseId: resultBlock.tool_use_id,
+							content: resultBlock.content,
+							index: event.index,
+						};
+						output.content.push(block);
+						stream.push({ type: "web_search_result", contentIndex: output.content.length - 1, partial: output });
+					}
+				} else if (event.type === "content_block_delta") {
+					if (event.delta.type === "text_delta") {
+						const index = blocks.findIndex((b) => b.index === event.index);
+						const block = blocks[index];
+						if (block && block.type === "text") {
+							block.text += event.delta.text;
+							stream.push({
+								type: "text_delta",
+								contentIndex: index,
+								delta: event.delta.text,
+								partial: output,
+							});
+						}
+					} else if (event.delta.type === "thinking_delta") {
+						const index = blocks.findIndex((b) => b.index === event.index);
+						const block = blocks[index];
+						if (block && block.type === "thinking") {
+							block.thinking += event.delta.thinking;
+							stream.push({
+								type: "thinking_delta",
+								contentIndex: index,
+								delta: event.delta.thinking,
+								partial: output,
+							});
+						}
+					} else if (event.delta.type === "input_json_delta") {
+						const index = blocks.findIndex((b) => b.index === event.index);
+						const block = blocks[index];
+						if (block && block.type === "toolCall") {
+							block.partialJson += event.delta.partial_json;
+							block.arguments = parseStreamingJson(block.partialJson);
+							stream.push({
+								type: "toolcall_delta",
+								contentIndex: index,
+								delta: event.delta.partial_json,
+								partial: output,
+							});
+						}
+					} else if (event.delta.type === "signature_delta") {
+						const index = blocks.findIndex((b) => b.index === event.index);
+						const block = blocks[index];
+						if (block && block.type === "thinking") {
+							block.thinkingSignature = block.thinkingSignature || "";
+							block.thinkingSignature += event.delta.signature;
+						}
+					}
+				} else if (event.type === "content_block_stop") {
+					const index = blocks.findIndex((b) => b.index === event.index);
+					const block = blocks[index];
+					if (block) {
+						delete (block as any).index;
+						if (block.type === "text") {
+							stream.push({
+								type: "text_end",
+								contentIndex: index,
+								content: block.text,
+								partial: output,
+							});
+						} else if (block.type === "thinking") {
+							stream.push({
+								type: "thinking_end",
+								contentIndex: index,
+								content: block.thinking,
+								partial: output,
+							});
+						} else if (block.type === "toolCall") {
+							block.arguments = parseStreamingJson(block.partialJson);
+							delete (block as any).partialJson;
+							stream.push({
+								type: "toolcall_end",
+								contentIndex: index,
+								toolCall: block,
+								partial: output,
+							});
+						}
+					}
+				} else if (event.type === "message_delta") {
+					if (event.delta.stop_reason) {
+						output.stopReason = mapStopReason(event.delta.stop_reason);
+					}
+					if (event.usage.input_tokens != null) {
+						output.usage.input = event.usage.input_tokens;
+					}
+					if (event.usage.output_tokens != null) {
+						output.usage.output = event.usage.output_tokens;
+					}
+					if (event.usage.cache_read_input_tokens != null) {
+						output.usage.cacheRead = event.usage.cache_read_input_tokens;
+					}
+					if (event.usage.cache_creation_input_tokens != null) {
+						output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
+					}
+					output.usage.totalTokens =
+						output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
+					calculateCost(model, output.usage);
+				}
+			}
+
+			if (options?.signal?.aborted) {
+				throw new Error("Request was aborted");
+			}
+
+			if (output.stopReason === "aborted" || output.stopReason === "error") {
+				throw new Error("An unknown error occurred");
+			}
+
+			stream.push({ type: "done", reason: output.stopReason, message: output });
+			stream.end();
+		} catch (error) {
+			for (const block of output.content) delete (block as any).index;
+			output.stopReason = options?.signal?.aborted ? "aborted" : "error";
+			output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
+			if (model.provider === "alibaba-coding-plan") {
+				output.errorMessage = `[alibaba-coding-plan] ${output.errorMessage}`;
+			}
+			if (AnthropicSdkClass && error instanceof AnthropicSdkClass.APIError && error.headers) {
+				const retryAfterMs = extractRetryAfterMs(error.headers, error.message);
+				if (retryAfterMs !== undefined) {
+					output.retryAfterMs = retryAfterMs;
+				}
+			}
+			if (isTransientNetworkError(error)) {
+				output.retryAfterMs = output.retryAfterMs ?? 5000;
+			}
+			stream.push({ type: "error", reason: output.stopReason, error: output });
+			stream.end();
+		}
+	})();
+}
--- a/packages/pi-ai/src/providers/anthropic-vertex.ts
+++ b/packages/pi-ai/src/providers/anthropic-vertex.ts
@ -0,0 +1,130 @@
+// Lazy-loaded: Anthropic Vertex SDK is imported on first use, not at startup.
+// This avoids penalizing users who don't use Anthropic Vertex models.
+import type Anthropic from "@anthropic-ai/sdk";
+import type { AnthropicVertex } from "@anthropic-ai/vertex-sdk";
+import { getEnvApiKey } from "../env-api-keys.js";
+import type {
+	Context,
+	Model,
+	SimpleStreamOptions,
+	StreamFunction,
+} from "../types.js";
+import { AssistantMessageEventStream } from "../utils/event-stream.js";
+import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
+import {
+	type AnthropicOptions,
+	mapThinkingLevelToEffort,
+	processAnthropicStream,
+	supportsAdaptiveThinking,
+} from "./anthropic-shared.js";
+
+let _AnthropicVertexClass: typeof AnthropicVertex | undefined;
+let _AnthropicSdkClass: typeof Anthropic | undefined;
+
+async function getAnthropicVertexClass(): Promise<typeof AnthropicVertex> {
+	if (!_AnthropicVertexClass) {
+		const mod = await import("@anthropic-ai/vertex-sdk");
+		_AnthropicVertexClass = mod.AnthropicVertex;
+	}
+	return _AnthropicVertexClass;
+}
+
+async function getAnthropicSdkClass(): Promise<typeof Anthropic> {
+	if (!_AnthropicSdkClass) {
+		const mod = await import("@anthropic-ai/sdk");
+		_AnthropicSdkClass = mod.default;
+	}
+	return _AnthropicSdkClass;
+}
+
+function resolveProjectId(): string {
+	const projectId = process.env.ANTHROPIC_VERTEX_PROJECT_ID
+		|| process.env.GOOGLE_CLOUD_PROJECT
+		|| process.env.GCLOUD_PROJECT;
+	if (!projectId) {
+		throw new Error(
+			"Anthropic Vertex requires a project ID. Set ANTHROPIC_VERTEX_PROJECT_ID, GOOGLE_CLOUD_PROJECT, or GCLOUD_PROJECT.",
+		);
+	}
+	return projectId;
+}
+
+function resolveRegion(): string {
+	return process.env.CLOUD_ML_REGION
+		|| process.env.GOOGLE_CLOUD_LOCATION
+		|| "us-central1";
+}
+
+async function createVertexClient(): Promise<AnthropicVertex> {
+	const AnthropicVertexClass = await getAnthropicVertexClass();
+	const projectId = resolveProjectId();
+	const region = resolveRegion();
+
+	return new AnthropicVertexClass({
+		projectId,
+		region,
+	});
+}
+
+export const streamAnthropicVertex: StreamFunction<"anthropic-vertex", AnthropicOptions> = (
+	model: Model<"anthropic-vertex">,
+	context: Context,
+	options?: AnthropicOptions,
+): AssistantMessageEventStream => {
+	const stream = new AssistantMessageEventStream();
+
+	(async () => {
+		const client = await createVertexClient();
+		const AnthropicSdk = await getAnthropicSdkClass();
+
+		processAnthropicStream(stream, {
+			client: client as unknown as Anthropic,
+			model,
+			context,
+			isOAuthToken: false,
+			options,
+			AnthropicSdkClass: AnthropicSdk,
+		});
+	})();
+
+	return stream;
+};
+
+export const streamSimpleAnthropicVertex: StreamFunction<"anthropic-vertex", SimpleStreamOptions> = (
+	model: Model<"anthropic-vertex">,
+	context: Context,
+	options?: SimpleStreamOptions,
+): AssistantMessageEventStream => {
+	const apiKey = options?.apiKey || getEnvApiKey(model.provider);
+	if (!apiKey) {
+		throw new Error(`No API key found for provider: ${model.provider}. Set ANTHROPIC_VERTEX_PROJECT_ID to use Claude on Vertex AI.`);
+	}
+
+	const base = buildBaseOptions(model, options, apiKey);
+	if (!options?.reasoning) {
+		return streamAnthropicVertex(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);
+	}
+
+	if (supportsAdaptiveThinking(model.id)) {
+		const effort = mapThinkingLevelToEffort(options.reasoning, model.id);
+		return streamAnthropicVertex(model, context, {
+			...base,
+			thinkingEnabled: true,
+			effort,
+		} satisfies AnthropicOptions);
+	}
+
+	const adjusted = adjustMaxTokensForThinking(
+		base.maxTokens || 0,
+		model.maxTokens,
+		options.reasoning,
+		options.thinkingBudgets,
+	);
+
+	return streamAnthropicVertex(model, context, {
+		...base,
+		maxTokens: adjusted.maxTokens,
+		thinkingEnabled: true,
+		thinkingBudgetTokens: adjusted.thinkingBudget,
+	} satisfies AnthropicOptions);
+};
--- a/packages/pi-ai/src/providers/anthropic.ts
+++ b/packages/pi-ai/src/providers/anthropic.ts
@ -1,40 +1,29 @@
 // Lazy-loaded: Anthropic SDK (~500ms) is imported on first use, not at startup.
 // This avoids penalizing users who don't use Anthropic models.
 import type Anthropic from "@anthropic-ai/sdk";
-import type {
-	ContentBlockParam,
-	MessageCreateParamsStreaming,
-	MessageParam,
-} from "@anthropic-ai/sdk/resources/messages.js";
 import { getEnvApiKey } from "../env-api-keys.js";
-import { calculateCost } from "../models.js";
 import type {
-	Api,
-	AssistantMessage,
-	CacheRetention,
 	Context,
-	ImageContent,
-	Message,
 	Model,
-	ServerToolUseContent,
 	SimpleStreamOptions,
-	StopReason,
 	StreamFunction,
-	StreamOptions,
-	TextContent,
-	ThinkingContent,
-	Tool,
-	ToolCall,
-	ToolResultMessage,
-	WebSearchResultContent,
 } from "../types.js";
 import { AssistantMessageEventStream } from "../utils/event-stream.js";
-import { parseStreamingJson } from "../utils/json-parse.js";
-import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";

 import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
 import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
-import { transformMessages } from "./transform-messages.js";
+import {
+	type AnthropicEffort,
+	type AnthropicOptions,
+	extractRetryAfterMs,
+	mapThinkingLevelToEffort,
+	processAnthropicStream,
+	supportsAdaptiveThinking,
+} from "./anthropic-shared.js";
+
+// Re-export types used by other modules
+export type { AnthropicEffort, AnthropicOptions };
+export { extractRetryAfterMs };

 let _AnthropicClass: typeof Anthropic | undefined;
 async function getAnthropicClass(): Promise<typeof Anthropic> {
@ -45,154 +34,9 @@ async function getAnthropicClass(): Promise<typeof Anthropic> {
 	return _AnthropicClass;
 }

-/**
- * Resolve cache retention preference.
- * Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility.
- */
-function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention {
-	if (cacheRetention) {
-		return cacheRetention;
-	}
-	if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") {
-		return "long";
-	}
-	return "short";
-}
-
-function getCacheControl(
-	baseUrl: string,
-	cacheRetention?: CacheRetention,
-): { retention: CacheRetention; cacheControl?: { type: "ephemeral"; ttl?: "1h" } } {
-	const retention = resolveCacheRetention(cacheRetention);
-	if (retention === "none") {
-		return { retention };
-	}
-	const ttl = retention === "long" && baseUrl.includes("api.anthropic.com") ? "1h" : undefined;
-	return {
-		retention,
-		cacheControl: { type: "ephemeral", ...(ttl && { ttl }) },
-	};
-}
-
 // Stealth mode: Mimic Claude Code's tool naming exactly
 const claudeCodeVersion = "2.1.62";

-// Claude Code 2.x tool names (canonical casing)
-// Source: https://cchistory.mariozechner.at/data/prompts-2.1.11.md
-// To update: https://github.com/badlogic/cchistory
-const claudeCodeTools = [
-	"Read",
-	"Write",
-	"Edit",
-	"Bash",
-	"Grep",
-	"Glob",
-	"AskUserQuestion",
-	"EnterPlanMode",
-	"ExitPlanMode",
-	"KillShell",
-	"NotebookEdit",
-	"Skill",
-	"Task",
-	"TaskOutput",
-	"TodoWrite",
-	"WebFetch",
-	"WebSearch",
-];
-
-const ccToolLookup = new Map(claudeCodeTools.map((t) => [t.toLowerCase(), t]));
-
-// Convert tool name to CC canonical casing if it matches (case-insensitive)
-const toClaudeCodeName = (name: string) => ccToolLookup.get(name.toLowerCase()) ?? name;
-const fromClaudeCodeName = (name: string, tools?: Tool[]) => {
-	if (tools && tools.length > 0) {
-		const lowerName = name.toLowerCase();
-		const matchedTool = tools.find((tool) => tool.name.toLowerCase() === lowerName);
-		if (matchedTool) return matchedTool.name;
-	}
-	return name;
-};
-
-/**
- * Convert content blocks to Anthropic API format
- */
-function convertContentBlocks(content: (TextContent | ImageContent)[]):
-	| string
-	| Array<
-			| { type: "text"; text: string }
-			| {
-					type: "image";
-					source: {
-						type: "base64";
-						media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp";
-						data: string;
-					};
-			  }
-	  > {
-	// If only text blocks, return as concatenated string for simplicity
-	const hasImages = content.some((c) => c.type === "image");
-	if (!hasImages) {
-		return sanitizeSurrogates(content.map((c) => (c as TextContent).text).join("\n"));
-	}
-
-	// If we have images, convert to content block array
-	const blocks = content.map((block) => {
-		if (block.type === "text") {
-			return {
-				type: "text" as const,
-				text: sanitizeSurrogates(block.text),
-			};
-		}
-		return {
-			type: "image" as const,
-			source: {
-				type: "base64" as const,
-				media_type: block.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
-				data: block.data,
-			},
-		};
-	});
-
-	// If only images (no text), add placeholder text block
-	const hasText = blocks.some((b) => b.type === "text");
-	if (!hasText) {
-		blocks.unshift({
-			type: "text" as const,
-			text: "(see attached image)",
-		});
-	}
-
-	return blocks;
-}
-
-export type AnthropicEffort = "low" | "medium" | "high" | "max";
-
-export interface AnthropicOptions extends StreamOptions {
-	/**
-	 * Enable extended thinking.
-	 * For Opus 4.6 and Sonnet 4.6: uses adaptive thinking (model decides when/how much to think).
-	 * For older models: uses budget-based thinking with thinkingBudgetTokens.
-	 */
-	thinkingEnabled?: boolean;
-	/**
-	 * Token budget for extended thinking (older models only).
-	 * Ignored for Opus 4.6 and Sonnet 4.6, which use adaptive thinking.
-	 */
-	thinkingBudgetTokens?: number;
-	/**
-	 * Effort level for adaptive thinking (Opus 4.6 and Sonnet 4.6).
-	 * Controls how much thinking Claude allocates:
-	 * - "max": Always thinks with no constraints (Opus 4.6 only)
-	 * - "high": Always thinks, deep reasoning (default)
-	 * - "medium": Moderate thinking, may skip for simple queries
-	 * - "low": Minimal thinking, skips for simple tasks
-	 * Ignored for older models.
-	 */
-	effort?: AnthropicEffort;
-	interleavedThinking?: boolean;
-	toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
-}
-
 function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]): Record<string, string> {
 	const merged: Record<string, string> = {};
 	for (const headers of headerSources) {
@ -203,410 +47,6 @@ function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]):
 	return merged;
 }

-/**
- * Detect transient network errors that are likely to succeed on retry.
- * Covers WebSocket disconnects (Tailscale, VPN), TCP resets, and DNS failures.
- */
-function isTransientNetworkError(error: unknown): boolean {
-  if (!(error instanceof Error)) return false;
-  const msg = error.message.toLowerCase();
-  const code = (error as NodeJS.ErrnoException).code;
-  return (
-    code === 'ECONNRESET' ||
-    code === 'EPIPE' ||
-    code === 'ETIMEDOUT' ||
-    code === 'ENOTFOUND' ||
-    code === 'EAI_AGAIN' ||
-    msg.includes('connector_closed') ||
-    msg.includes('socket hang up') ||
-    msg.includes('network') ||
-    msg.includes('connection') && msg.includes('closed') ||
-    msg.includes('fetch failed')
-  );
-}
-
-/**
- * Extract retry delay from Anthropic error response headers (in milliseconds).
- * Checks: retry-after (seconds or RFC date), x-ratelimit-reset-requests, x-ratelimit-reset-tokens.
- * Returns undefined if no valid delay is found or if the delay is in the past.
- */
-function extractRetryAfterMs(headers: Headers | { get(name: string): string | null }, errorText = ""): number | undefined {
-	const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
-
-	const retryAfter = headers.get("retry-after");
-	if (retryAfter) {
-		const seconds = Number(retryAfter);
-		if (Number.isFinite(seconds)) {
-			const delay = normalizeDelay(seconds * 1000);
-			if (delay !== undefined) return delay;
-		}
-		const asDate = new Date(retryAfter).getTime();
-		if (!Number.isNaN(asDate)) {
-			const delay = normalizeDelay(asDate - Date.now());
-			if (delay !== undefined) return delay;
-		}
-	}
-
-	// x-ratelimit-reset-requests / x-ratelimit-reset-tokens are Unix timestamps (seconds)
-	for (const header of ["x-ratelimit-reset-requests", "x-ratelimit-reset-tokens"]) {
-		const value = headers.get(header);
-		if (value) {
-			const resetSeconds = Number(value);
-			if (Number.isFinite(resetSeconds)) {
-				const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
-				if (delay !== undefined) return delay;
-			}
-		}
-	}
-
-	return undefined;
-}
-
-export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOptions> = (
-	model: Model<"anthropic-messages">,
-	context: Context,
-	options?: AnthropicOptions,
-): AssistantMessageEventStream => {
-	const stream = new AssistantMessageEventStream();
-
-	(async () => {
-		const output: AssistantMessage = {
-			role: "assistant",
-			content: [],
-			api: model.api as Api,
-			provider: model.provider,
-			model: model.id,
-			usage: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-				totalTokens: 0,
-				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-			},
-			stopReason: "stop",
-			timestamp: Date.now(),
-		};
-
-		try {
-			const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
-
-			let copilotDynamicHeaders: Record<string, string> | undefined;
-			if (model.provider === "github-copilot") {
-				const hasImages = hasCopilotVisionInput(context.messages);
-				copilotDynamicHeaders = buildCopilotDynamicHeaders({
-					messages: context.messages,
-					hasImages,
-				});
-			}
-
-			const { client, isOAuthToken } = await createClient(
-				model,
-				apiKey,
-				options?.interleavedThinking ?? true,
-				options?.headers,
-				copilotDynamicHeaders,
-			);
-			let params = buildParams(model, context, isOAuthToken, options);
-			const nextParams = await options?.onPayload?.(params, model);
-			if (nextParams !== undefined) {
-				params = nextParams as MessageCreateParamsStreaming;
-			}
-			const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
-			stream.push({ type: "start", partial: output });
-
-			type Block = (ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | ServerToolUseContent | WebSearchResultContent) & { index: number };
-			const blocks = output.content as Block[];
-
-			for await (const event of anthropicStream) {
-				if (event.type === "message_start") {
-					// Capture initial token usage from message_start event
-					// This ensures we have input token counts even if the stream is aborted early
-					output.usage.input = event.message.usage.input_tokens || 0;
-					output.usage.output = event.message.usage.output_tokens || 0;
-					output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
-					output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
-					// Anthropic doesn't provide total_tokens, compute from components
-					output.usage.totalTokens =
-						output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
-					calculateCost(model, output.usage);
-				} else if (event.type === "content_block_start") {
-					if (event.content_block.type === "text") {
-						const block: Block = {
-							type: "text",
-							text: "",
-							index: event.index,
-						};
-						output.content.push(block);
-						stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output });
-					} else if (event.content_block.type === "thinking") {
-						const block: Block = {
-							type: "thinking",
-							thinking: "",
-							thinkingSignature: "",
-							index: event.index,
-						};
-						output.content.push(block);
-						stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
-					} else if (event.content_block.type === "redacted_thinking") {
-						const block: Block = {
-							type: "thinking",
-							thinking: "[Reasoning redacted]",
-							thinkingSignature: event.content_block.data,
-							redacted: true,
-							index: event.index,
-						};
-						output.content.push(block);
-						stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
-					} else if (event.content_block.type === "tool_use") {
-						const block: Block = {
-							type: "toolCall",
-							id: event.content_block.id,
-							name: isOAuthToken
-								? fromClaudeCodeName(event.content_block.name, context.tools)
-								: event.content_block.name,
-							arguments: (event.content_block.input as Record<string, any>) ?? {},
-							partialJson: "",
-							index: event.index,
-						};
-						output.content.push(block);
-						stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output });
-					} else if ((event.content_block as any).type === "server_tool_use") {
-						const serverBlock = event.content_block as any;
-						const block: Block = {
-							type: "serverToolUse",
-							id: serverBlock.id,
-							name: serverBlock.name,
-							input: serverBlock.input,
-							index: event.index,
-						};
-						output.content.push(block);
-						stream.push({ type: "server_tool_use", contentIndex: output.content.length - 1, partial: output });
-					} else if ((event.content_block as any).type === "web_search_tool_result") {
-						const resultBlock = event.content_block as any;
-						const block: Block = {
-							type: "webSearchResult",
-							toolUseId: resultBlock.tool_use_id,
-							content: resultBlock.content,
-							index: event.index,
-						};
-						output.content.push(block);
-						stream.push({ type: "web_search_result", contentIndex: output.content.length - 1, partial: output });
-					}
-				} else if (event.type === "content_block_delta") {
-					if (event.delta.type === "text_delta") {
-						const index = blocks.findIndex((b) => b.index === event.index);
-						const block = blocks[index];
-						if (block && block.type === "text") {
-							block.text += event.delta.text;
-							stream.push({
-								type: "text_delta",
-								contentIndex: index,
-								delta: event.delta.text,
-								partial: output,
-							});
-						}
-					} else if (event.delta.type === "thinking_delta") {
-						const index = blocks.findIndex((b) => b.index === event.index);
-						const block = blocks[index];
-						if (block && block.type === "thinking") {
-							block.thinking += event.delta.thinking;
-							stream.push({
-								type: "thinking_delta",
-								contentIndex: index,
-								delta: event.delta.thinking,
-								partial: output,
-							});
-						}
-					} else if (event.delta.type === "input_json_delta") {
-						const index = blocks.findIndex((b) => b.index === event.index);
-						const block = blocks[index];
-						if (block && block.type === "toolCall") {
-							block.partialJson += event.delta.partial_json;
-							block.arguments = parseStreamingJson(block.partialJson);
-							stream.push({
-								type: "toolcall_delta",
-								contentIndex: index,
-								delta: event.delta.partial_json,
-								partial: output,
-							});
-						}
-					} else if (event.delta.type === "signature_delta") {
-						const index = blocks.findIndex((b) => b.index === event.index);
-						const block = blocks[index];
-						if (block && block.type === "thinking") {
-							block.thinkingSignature = block.thinkingSignature || "";
-							block.thinkingSignature += event.delta.signature;
-						}
-					}
-				} else if (event.type === "content_block_stop") {
-					const index = blocks.findIndex((b) => b.index === event.index);
-					const block = blocks[index];
-					if (block) {
-						delete (block as any).index;
-						if (block.type === "text") {
-							stream.push({
-								type: "text_end",
-								contentIndex: index,
-								content: block.text,
-								partial: output,
-							});
-						} else if (block.type === "thinking") {
-							stream.push({
-								type: "thinking_end",
-								contentIndex: index,
-								content: block.thinking,
-								partial: output,
-							});
-						} else if (block.type === "toolCall") {
-							block.arguments = parseStreamingJson(block.partialJson);
-							delete (block as any).partialJson;
-							stream.push({
-								type: "toolcall_end",
-								contentIndex: index,
-								toolCall: block,
-								partial: output,
-							});
-						}
-						// serverToolUse and webSearchResult blocks just need index cleanup (already emitted on start)
-					}
-				} else if (event.type === "message_delta") {
-					if (event.delta.stop_reason) {
-						output.stopReason = mapStopReason(event.delta.stop_reason);
-					}
-					// Only update usage fields if present (not null).
-					// Preserves input_tokens from message_start when proxies omit it in message_delta.
-					if (event.usage.input_tokens != null) {
-						output.usage.input = event.usage.input_tokens;
-					}
-					if (event.usage.output_tokens != null) {
-						output.usage.output = event.usage.output_tokens;
-					}
-					if (event.usage.cache_read_input_tokens != null) {
-						output.usage.cacheRead = event.usage.cache_read_input_tokens;
-					}
-					if (event.usage.cache_creation_input_tokens != null) {
-						output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
-					}
-					// Anthropic doesn't provide total_tokens, compute from components
-					output.usage.totalTokens =
-						output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
-					calculateCost(model, output.usage);
-				}
-			}
-
-			if (options?.signal?.aborted) {
-				throw new Error("Request was aborted");
-			}
-
-			if (output.stopReason === "aborted" || output.stopReason === "error") {
-				throw new Error("An unknown error occurred");
-			}
-
-			stream.push({ type: "done", reason: output.stopReason, message: output });
-			stream.end();
-		} catch (error) {
-			for (const block of output.content) delete (block as any).index;
-			output.stopReason = options?.signal?.aborted ? "aborted" : "error";
-			output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
-			if (model.provider === "alibaba-coding-plan") {
-				output.errorMessage = `[alibaba-coding-plan] ${output.errorMessage}`;
-			}
-			const AnthropicSdk = _AnthropicClass;
-			if (AnthropicSdk && error instanceof AnthropicSdk.APIError && error.headers) {
-				const retryAfterMs = extractRetryAfterMs(error.headers, error.message);
-				if (retryAfterMs !== undefined) {
-					output.retryAfterMs = retryAfterMs;
-				}
-			}
-			// Mark transient network errors as retriable so auto-mode can
-			// detect them and retry instead of stopping (#833).
-			if (isTransientNetworkError(error)) {
-				output.retryAfterMs = output.retryAfterMs ?? 5000;
-			}
-			stream.push({ type: "error", reason: output.stopReason, error: output });
-			stream.end();
-		}
-	})();
-
-	return stream;
-};
-
-/**
- * Check if a model supports adaptive thinking (Opus 4.6 and Sonnet 4.6)
- */
-function supportsAdaptiveThinking(modelId: string): boolean {
-	// Opus 4.6 and Sonnet 4.6 model IDs (with or without date suffix)
-	return (
-		modelId.includes("opus-4-6") ||
-		modelId.includes("opus-4.6") ||
-		modelId.includes("sonnet-4-6") ||
-		modelId.includes("sonnet-4.6")
-	);
-}
-
-/**
- * Map ThinkingLevel to Anthropic effort levels for adaptive thinking.
- * Note: effort "max" is only valid on Opus 4.6.
- */
-function mapThinkingLevelToEffort(level: SimpleStreamOptions["reasoning"], modelId: string): AnthropicEffort {
-	switch (level) {
-		case "minimal":
-			return "low";
-		case "low":
-			return "low";
-		case "medium":
-			return "medium";
-		case "high":
-			return "high";
-		case "xhigh":
-			return modelId.includes("opus-4-6") || modelId.includes("opus-4.6") ? "max" : "high";
-		default:
-			return "high";
-	}
-}
-
-export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleStreamOptions> = (
-	model: Model<"anthropic-messages">,
-	context: Context,
-	options?: SimpleStreamOptions,
-): AssistantMessageEventStream => {
-	const apiKey = options?.apiKey || getEnvApiKey(model.provider);
-	if (!apiKey) {
-		throw new Error(`No API key for provider: ${model.provider}`);
-	}
-
-	const base = buildBaseOptions(model, options, apiKey);
-	if (!options?.reasoning) {
-		return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);
-	}
-
-	// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level
-	// For older models: use budget-based thinking
-	if (supportsAdaptiveThinking(model.id)) {
-		const effort = mapThinkingLevelToEffort(options.reasoning, model.id);
-		return streamAnthropic(model, context, {
-			...base,
-			thinkingEnabled: true,
-			effort,
-		} satisfies AnthropicOptions);
-	}
-
-	const adjusted = adjustMaxTokensForThinking(
-		base.maxTokens || 0,
-		model.maxTokens,
-		options.reasoning,
-		options.thinkingBudgets,
-	);
-
-	return streamAnthropic(model, context, {
-		...base,
-		maxTokens: adjusted.maxTokens,
-		thinkingEnabled: true,
-		thinkingBudgetTokens: adjusted.thinkingBudget,
-	} satisfies AnthropicOptions);
-};
-
 function isOAuthToken(apiKey: string): boolean {
 	return apiKey.includes("sk-ant-oat");
 }
@ -702,315 +142,83 @@ async function createClient(
 	return { client, isOAuthToken: false };
 }

-function buildParams(
+export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOptions> = (
 	model: Model<"anthropic-messages">,
 	context: Context,
-	isOAuthToken: boolean,
 	options?: AnthropicOptions,
-): MessageCreateParamsStreaming {
-	const { cacheControl } = getCacheControl(model.baseUrl, options?.cacheRetention);
-	// Strip variant suffixes like [1m] from model ID before sending to the API.
-	// The API only accepts the base model ID (e.g. "claude-opus-4-6"),
-	// not internal variant identifiers (e.g. "claude-opus-4-6[1m]").
-	// This applies to all auth methods — API keys, OAuth, and Copilot alike.
-	const apiModelId = model.id.replace(/\[.*\]$/, "");
-	const params: MessageCreateParamsStreaming = {
-		model: apiModelId,
-		messages: convertMessages(context.messages, model, isOAuthToken, cacheControl),
-		max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
-		stream: true,
-	};
+): AssistantMessageEventStream => {
+	const stream = new AssistantMessageEventStream();

-	// For OAuth tokens, we MUST include Claude Code identity
-	if (isOAuthToken) {
-		params.system = [
-			{
-				type: "text",
-				text: "You are Claude Code, Anthropic's official CLI for Claude.",
-				...(cacheControl ? { cache_control: cacheControl } : {}),
-			},
-		];
-		if (context.systemPrompt) {
-			params.system.push({
-				type: "text",
-				text: sanitizeSurrogates(context.systemPrompt),
-				...(cacheControl ? { cache_control: cacheControl } : {}),
+	(async () => {
+		const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
+
+		let copilotDynamicHeaders: Record<string, string> | undefined;
+		if (model.provider === "github-copilot") {
+			const hasImages = hasCopilotVisionInput(context.messages);
+			copilotDynamicHeaders = buildCopilotDynamicHeaders({
+				messages: context.messages,
+				hasImages,
 			});
 		}
-	} else if (context.systemPrompt) {
-		// Add cache control to system prompt for non-OAuth tokens
-		params.system = [
-			{
-				type: "text",
-				text: sanitizeSurrogates(context.systemPrompt),
-				...(cacheControl ? { cache_control: cacheControl } : {}),
-			},
-		];
-	}

-	// Temperature is incompatible with extended thinking (adaptive or budget-based).
-	if (options?.temperature !== undefined && !options?.thinkingEnabled) {
-		params.temperature = options.temperature;
-	}
+		const { client, isOAuthToken: isOAuth } = await createClient(
+			model,
+			apiKey,
+			options?.interleavedThinking ?? true,
+			options?.headers,
+			copilotDynamicHeaders,
+		);

-	if (context.tools) {
-		params.tools = convertTools(context.tools, isOAuthToken);
-	}
+		processAnthropicStream(stream, {
+			client,
+			model,
+			context,
+			isOAuthToken: isOAuth,
+			options,
+			AnthropicSdkClass: _AnthropicClass,
+		});
+	})();

-	// Configure thinking mode: adaptive (Opus 4.6 and Sonnet 4.6) or budget-based (older models)
-	if (options?.thinkingEnabled && model.reasoning) {
-		if (supportsAdaptiveThinking(model.id)) {
-			// Adaptive thinking: Claude decides when and how much to think
-			params.thinking = { type: "adaptive" };
-			if (options.effort) {
-				params.output_config = { effort: options.effort };
-			}
-		} else {
-			// Budget-based thinking for older models
-			params.thinking = {
-				type: "enabled",
-				budget_tokens: options.thinkingBudgetTokens || 1024,
-			};
-		}
-	}
+	return stream;
+};

-	if (options?.metadata) {
-		const userId = options.metadata.user_id;
-		if (typeof userId === "string") {
-			params.metadata = { user_id: userId };
-		}
-	}
-
-	if (options?.toolChoice) {
-		if (typeof options.toolChoice === "string") {
-			params.tool_choice = { type: options.toolChoice };
-		} else {
-			params.tool_choice = options.toolChoice;
-		}
-	}
-
-	return params;
-}
-
-// Normalize tool call IDs to match Anthropic's required pattern and length
-function normalizeToolCallId(id: string): string {
-	return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
-}
-
-function convertMessages(
-	messages: Message[],
+export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleStreamOptions> = (
 	model: Model<"anthropic-messages">,
-	isOAuthToken: boolean,
-	cacheControl?: { type: "ephemeral"; ttl?: "1h" },
-): MessageParam[] {
-	const params: MessageParam[] = [];
-
-	// Transform messages for cross-provider compatibility
-	const transformedMessages = transformMessages(messages, model, normalizeToolCallId);
-
-	for (let i = 0; i < transformedMessages.length; i++) {
-		const msg = transformedMessages[i];
-
-		if (msg.role === "user") {
-			if (typeof msg.content === "string") {
-				if (msg.content.trim().length > 0) {
-					params.push({
-						role: "user",
-						content: sanitizeSurrogates(msg.content),
-					});
-				}
-			} else {
-				const blocks: ContentBlockParam[] = msg.content.map((item) => {
-					if (item.type === "text") {
-						return {
-							type: "text",
-							text: sanitizeSurrogates(item.text),
-						};
-					} else {
-						return {
-							type: "image",
-							source: {
-								type: "base64",
-								media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
-								data: item.data,
-							},
-						};
-					}
-				});
-				let filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks;
-				filteredBlocks = filteredBlocks.filter((b) => {
-					if (b.type === "text") {
-						return b.text.trim().length > 0;
-					}
-					return true;
-				});
-				if (filteredBlocks.length === 0) continue;
-				params.push({
-					role: "user",
-					content: filteredBlocks,
-				});
-			}
-		} else if (msg.role === "assistant") {
-			const blocks: ContentBlockParam[] = [];
-
-			for (const block of msg.content) {
-				if (block.type === "text") {
-					if (block.text.trim().length === 0) continue;
-					blocks.push({
-						type: "text",
-						text: sanitizeSurrogates(block.text),
-					});
-				} else if (block.type === "thinking") {
-					// Redacted thinking: pass the opaque payload back as redacted_thinking
-					if (block.redacted) {
-						blocks.push({
-							type: "redacted_thinking",
-							data: block.thinkingSignature!,
-						});
-						continue;
-					}
-					if (block.thinking.trim().length === 0) continue;
-					// If thinking signature is missing/empty (e.g., from aborted stream),
-					// convert to plain text block without <thinking> tags to avoid API rejection
-					// and prevent Claude from mimicking the tags in responses
-					if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
-						blocks.push({
-							type: "text",
-							text: sanitizeSurrogates(block.thinking),
-						});
-					} else {
-						blocks.push({
-							type: "thinking",
-							thinking: sanitizeSurrogates(block.thinking),
-							signature: block.thinkingSignature,
-						});
-					}
-				} else if (block.type === "toolCall") {
-					blocks.push({
-						type: "tool_use",
-						id: block.id,
-						name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
-						input: block.arguments ?? {},
-					});
-				} else if (block.type === "serverToolUse") {
-					blocks.push({
-						type: "server_tool_use",
-						id: block.id,
-						name: block.name,
-						input: block.input ?? {},
-					} as any);
-				} else if (block.type === "webSearchResult") {
-					blocks.push({
-						type: "web_search_tool_result",
-						tool_use_id: block.toolUseId,
-						content: block.content,
-					} as any);
-				}
-			}
-			if (blocks.length === 0) continue;
-			params.push({
-				role: "assistant",
-				content: blocks,
-			});
-		} else if (msg.role === "toolResult") {
-			// Collect all consecutive toolResult messages, needed for z.ai Anthropic endpoint
-			const toolResults: ContentBlockParam[] = [];
-
-			// Add the current tool result
-			toolResults.push({
-				type: "tool_result",
-				tool_use_id: msg.toolCallId,
-				content: convertContentBlocks(msg.content),
-				is_error: msg.isError,
-			});
-
-			// Look ahead for consecutive toolResult messages
-			let j = i + 1;
-			while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
-				const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult
-				toolResults.push({
-					type: "tool_result",
-					tool_use_id: nextMsg.toolCallId,
-					content: convertContentBlocks(nextMsg.content),
-					is_error: nextMsg.isError,
-				});
-				j++;
-			}
-
-			// Skip the messages we've already processed
-			i = j - 1;
-
-			// Add a single user message with all tool results
-			params.push({
-				role: "user",
-				content: toolResults,
-			});
-		}
+	context: Context,
+	options?: SimpleStreamOptions,
+): AssistantMessageEventStream => {
+	const apiKey = options?.apiKey || getEnvApiKey(model.provider);
+	if (!apiKey) {
+		throw new Error(`No API key for provider: ${model.provider}`);
 	}

-	// Add cache_control to the last user message to cache conversation history
-	if (cacheControl && params.length > 0) {
-		const lastMessage = params[params.length - 1];
-		if (lastMessage.role === "user") {
-			if (Array.isArray(lastMessage.content)) {
-				const lastBlock = lastMessage.content[lastMessage.content.length - 1];
-				if (
-					lastBlock &&
-					(lastBlock.type === "text" || lastBlock.type === "image" || lastBlock.type === "tool_result")
-				) {
-					(lastBlock as any).cache_control = cacheControl;
-				}
-			} else if (typeof lastMessage.content === "string") {
-				lastMessage.content = [
-					{
-						type: "text",
-						text: lastMessage.content,
-						cache_control: cacheControl,
-					},
-				] as any;
-			}
-		}
+	const base = buildBaseOptions(model, options, apiKey);
+	if (!options?.reasoning) {
+		return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);
 	}

-	return params;
-}
-
-function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] {
-	if (!tools) return [];
-
-	return tools.map((tool) => {
-		const jsonSchema = tool.parameters as any; // TypeBox already generates JSON Schema
-
-		return {
-			name: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name,
-			description: tool.description,
-			input_schema: {
-				type: "object" as const,
-				properties: jsonSchema.properties || {},
-				required: jsonSchema.required || [],
-			},
-		};
-	});
-}
-
-function mapStopReason(reason: Anthropic.Messages.StopReason | string): StopReason {
-	switch (reason) {
-		case "end_turn":
-			return "stop";
-		case "max_tokens":
-			return "length";
-		case "tool_use":
-			return "toolUse";
-		case "refusal":
-			return "error";
-		case "pause_turn": // Stop is good enough -> resubmit
-			return "stop";
-		case "stop_sequence":
-			return "stop"; // We don't supply stop sequences, so this should never happen
-		case "sensitive": // Content flagged by safety filters (not yet in SDK types)
-			return "error";
-		default:
-			// Handle unknown stop reasons gracefully (API may add new values)
-			throw new Error(`Unhandled stop reason: ${reason}`);
+	// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level
+	// For older models: use budget-based thinking
+	if (supportsAdaptiveThinking(model.id)) {
+		const effort = mapThinkingLevelToEffort(options.reasoning, model.id);
+		return streamAnthropic(model, context, {
+			...base,
+			thinkingEnabled: true,
+			effort,
+		} satisfies AnthropicOptions);
 	}
-}
+
+	const adjusted = adjustMaxTokensForThinking(
+		base.maxTokens || 0,
+		model.maxTokens,
+		options.reasoning,
+		options.thinkingBudgets,
+	);
+
+	return streamAnthropic(model, context, {
+		...base,
+		maxTokens: adjusted.maxTokens,
+		thinkingEnabled: true,
+		thinkingBudgetTokens: adjusted.thinkingBudget,
+	} satisfies AnthropicOptions);
+};
--- a/packages/pi-ai/src/providers/register-builtins.ts
+++ b/packages/pi-ai/src/providers/register-builtins.ts
@ -3,6 +3,7 @@ import type { AssistantMessage, AssistantMessageEvent, Context, Model, SimpleStr
 import { AssistantMessageEventStream } from "../utils/event-stream.js";
 import type { BedrockOptions } from "./amazon-bedrock.js";
 import { streamAnthropic, streamSimpleAnthropic } from "./anthropic.js";
+import { streamAnthropicVertex, streamSimpleAnthropicVertex } from "./anthropic-vertex.js";
 import { streamAzureOpenAIResponses, streamSimpleAzureOpenAIResponses } from "./azure-openai-responses.js";
 import { streamGoogle, streamSimpleGoogle } from "./google.js";
 import { streamGoogleGeminiCli, streamSimpleGoogleGeminiCli } from "./google-gemini-cli.js";
@ -171,6 +172,12 @@ function registerBuiltInApiProviders(): void {
 		streamSimple: streamSimpleGoogleVertex,
 	});

+	registerApiProvider({
+		api: "anthropic-vertex",
+		stream: streamAnthropicVertex,
+		streamSimple: streamSimpleAnthropicVertex,
+	});
+
 	registerApiProvider({
 		api: "bedrock-converse-stream",
 		stream: streamBedrockLazy,
--- a/packages/pi-ai/src/types.ts
+++ b/packages/pi-ai/src/types.ts
@ -9,6 +9,7 @@ export type KnownApi =
 	| "azure-openai-responses"
 	| "openai-codex-responses"
 	| "anthropic-messages"
+	| "anthropic-vertex"
 	| "bedrock-converse-stream"
 	| "google-generative-ai"
 	| "google-gemini-cli"
@ -19,6 +20,7 @@ export type Api = KnownApi | (string & {});
 export type KnownProvider =
 	| "amazon-bedrock"
 	| "anthropic"
+	| "anthropic-vertex"
 	| "google"
 	| "google-gemini-cli"
 	| "google-antigravity"
--- a/packages/pi-coding-agent/src/core/model-resolver.ts
+++ b/packages/pi-coding-agent/src/core/model-resolver.ts
@ -14,6 +14,7 @@ import type { ModelRegistry } from "./model-registry.js";
 const defaultModelPerProvider: Record<KnownProvider, string> = {
 	"amazon-bedrock": "us.anthropic.claude-opus-4-6-v1",
 	anthropic: "claude-opus-4-6[1m]",
+	"anthropic-vertex": "claude-sonnet-4-6",
 	openai: "gpt-5.4",
 	"azure-openai-responses": "gpt-5.2",
 	"openai-codex": "gpt-5.4",
--- a/src/onboarding.ts
+++ b/src/onboarding.ts
@ -63,6 +63,7 @@ const TOOL_KEYS: ToolKeyConfig[] = [
 /** Known LLM provider IDs that, if authed, mean the user doesn't need onboarding */
 const LLM_PROVIDER_IDS = [
  'anthropic',
+  'anthropic-vertex',
  'openai',
  'github-copilot',
  'openai-codex',