Merge pull request #750 from jeremymcs/fix/startup-lazy-loading

perf: lazy-load LLM provider SDKs to reduce startup time
2026-03-16 20:28:32 -06:00 · 2026-03-16 20:28:32 -06:00 · 6a452f27d9
commit 6a452f27d9
parent ebde7501dd 6d77724378
5 changed files with 101 additions and 37 deletions
--- a/packages/pi-ai/src/providers/anthropic.ts
+++ b/packages/pi-ai/src/providers/anthropic.ts
@ -1,4 +1,6 @@
-import Anthropic from "@anthropic-ai/sdk";
+// Lazy-loaded: Anthropic SDK (~500ms) is imported on first use, not at startup.
+// This avoids penalizing users who don't use Anthropic models.
+import type Anthropic from "@anthropic-ai/sdk";
 import type {
 	ContentBlockParam,
 	MessageCreateParamsStreaming,
@ -32,6 +34,15 @@ import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copi
 import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
 import { transformMessages } from "./transform-messages.js";

+let _AnthropicClass: typeof Anthropic | undefined;
+async function getAnthropicClass(): Promise<typeof Anthropic> {
+	if (!_AnthropicClass) {
+		const mod = await import("@anthropic-ai/sdk");
+		_AnthropicClass = mod.default;
+	}
+	return _AnthropicClass;
+}
+
 /**
 * Resolve cache retention preference.
 * Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility.
@ -265,7 +276,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
 				});
 			}

-			const { client, isOAuthToken } = createClient(
+			const { client, isOAuthToken } = await createClient(
 				model,
 				apiKey,
 				options?.interleavedThinking ?? true,
@ -455,7 +466,8 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
 			if (model.provider === "alibaba-coding-plan") {
 				output.errorMessage = `[alibaba-coding-plan] ${output.errorMessage}`;
 			}
-			if (error instanceof Anthropic.APIError && error.headers) {
+			const AnthropicSdk = _AnthropicClass;
+			if (AnthropicSdk && error instanceof AnthropicSdk.APIError && error.headers) {
 				const retryAfterMs = extractRetryAfterMs(error.headers, error.message);
 				if (retryAfterMs !== undefined) {
 					output.retryAfterMs = retryAfterMs;
@ -548,13 +560,14 @@ function isOAuthToken(apiKey: string): boolean {
 	return apiKey.includes("sk-ant-oat");
 }

-function createClient(
+async function createClient(
 	model: Model<"anthropic-messages">,
 	apiKey: string,
 	interleavedThinking: boolean,
 	optionsHeaders?: Record<string, string>,
 	dynamicHeaders?: Record<string, string>,
-): { client: Anthropic; isOAuthToken: boolean } {
+): Promise<{ client: Anthropic; isOAuthToken: boolean }> {
+	const AnthropicClass = await getAnthropicClass();
 	// Adaptive thinking models (Opus 4.6, Sonnet 4.6) have interleaved thinking built-in.
 	// The beta header is deprecated on Opus 4.6 and redundant on Sonnet 4.6, so skip it.
 	const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinking(model.id);
@ -566,7 +579,7 @@ function createClient(
 			betaFeatures.push("interleaved-thinking-2025-05-14");
 		}

-		const client = new Anthropic({
+		const client = new AnthropicClass({
 			apiKey: null,
 			authToken: apiKey,
 			baseURL: model.baseUrl,
@ -595,7 +608,7 @@ function createClient(

 	// OAuth: Bearer auth, Claude Code identity headers
 	if (isOAuthToken(apiKey)) {
-		const client = new Anthropic({
+		const client = new AnthropicClass({
 			apiKey: null,
 			authToken: apiKey,
 			baseURL: model.baseUrl,
@ -619,7 +632,7 @@ function createClient(
 	// API key auth
 	// Alibaba Coding Plan uses Bearer token auth instead of x-api-key
 	const isAlibabaProvider = model.provider === "alibaba-coding-plan";
-	const client = new Anthropic({
+	const client = new AnthropicClass({
 		apiKey: isAlibabaProvider ? null : apiKey,
 		authToken: isAlibabaProvider ? apiKey : undefined,
 		baseURL: model.baseUrl,
--- a/packages/pi-ai/src/providers/azure-openai-responses.ts
+++ b/packages/pi-ai/src/providers/azure-openai-responses.ts
@ -1,4 +1,6 @@
-import { AzureOpenAI } from "openai";
+// Lazy-loaded: OpenAI SDK (AzureOpenAI) is imported on first use, not at startup.
+// This avoids penalizing users who don't use Azure OpenAI models.
+import type { AzureOpenAI } from "openai";
 import type { ResponseCreateParamsStreaming } from "openai/resources/responses/responses.js";
 import { getEnvApiKey } from "../env-api-keys.js";
 import { supportsXhigh } from "../models.js";
@ -15,6 +17,15 @@ import { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { convertResponsesMessages, convertResponsesTools, processResponsesStream } from "./openai-responses-shared.js";
 import { buildBaseOptions, clampReasoning } from "./simple-options.js";

+let _AzureOpenAIClass: typeof AzureOpenAI | undefined;
+async function getAzureOpenAIClass(): Promise<typeof AzureOpenAI> {
+	if (!_AzureOpenAIClass) {
+		const mod = await import("openai");
+		_AzureOpenAIClass = mod.AzureOpenAI;
+	}
+	return _AzureOpenAIClass;
+}
+
 /**
 * Clamp reasoning effort for models that don't support all levels.
 * gpt-5.x models don't support "minimal" — map to "low".
@ -94,7 +105,7 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
 		try {
 			// Create Azure OpenAI client
 			const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
-			const client = createClient(model, apiKey, options);
+			const client = await createClient(model, apiKey, options);
 			let params = buildParams(model, context, options, deploymentName);
 			const nextParams = await options?.onPayload?.(params, model);
 			if (nextParams !== undefined) {
@ -188,7 +199,7 @@ function resolveAzureConfig(
 	};
 }

-function createClient(model: Model<"azure-openai-responses">, apiKey: string, options?: AzureOpenAIResponsesOptions) {
+async function createClient(model: Model<"azure-openai-responses">, apiKey: string, options?: AzureOpenAIResponsesOptions) {
 	if (!apiKey) {
 		if (!process.env.AZURE_OPENAI_API_KEY) {
 			throw new Error(
@ -205,8 +216,9 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
 	}

 	const { baseUrl, apiVersion } = resolveAzureConfig(model, options);
+	const AzureOpenAIClass = await getAzureOpenAIClass();

-	return new AzureOpenAI({
+	return new AzureOpenAIClass({
 		apiKey,
 		apiVersion,
 		dangerouslyAllowBrowser: true,
--- a/packages/pi-ai/src/providers/google-vertex.ts
+++ b/packages/pi-ai/src/providers/google-vertex.ts
@ -1,9 +1,10 @@
-import {
-	type GenerateContentConfig,
-	type GenerateContentParameters,
-	GoogleGenAI,
-	type ThinkingConfig,
-	ThinkingLevel,
+// Lazy-loaded: Google GenAI SDK is imported on first use, not at startup.
+// This avoids penalizing users who don't use Google Vertex models.
+import type { GoogleGenAI } from "@google/genai";
+import type {
+	GenerateContentConfig,
+	GenerateContentParameters,
+	ThinkingConfig,
 } from "@google/genai";
 import { calculateCost } from "../models.js";
 import type {
@ -33,6 +34,15 @@ import {
 } from "./google-shared.js";
 import { buildBaseOptions, clampReasoning } from "./simple-options.js";

+let _GoogleVertexClass: typeof GoogleGenAI | undefined;
+async function getGoogleVertexClass(): Promise<typeof GoogleGenAI> {
+	if (!_GoogleVertexClass) {
+		const mod = await import("@google/genai");
+		_GoogleVertexClass = mod.GoogleGenAI;
+	}
+	return _GoogleVertexClass;
+}
+
 export interface GoogleVertexOptions extends StreamOptions {
 	toolChoice?: "auto" | "none" | "any";
 	thinking?: {
@ -46,12 +56,14 @@ export interface GoogleVertexOptions extends StreamOptions {

 const API_VERSION = "v1";

-const THINKING_LEVEL_MAP: Record<GoogleThinkingLevel, ThinkingLevel> = {
-	THINKING_LEVEL_UNSPECIFIED: ThinkingLevel.THINKING_LEVEL_UNSPECIFIED,
-	MINIMAL: ThinkingLevel.MINIMAL,
-	LOW: ThinkingLevel.LOW,
-	MEDIUM: ThinkingLevel.MEDIUM,
-	HIGH: ThinkingLevel.HIGH,
+// ThinkingLevel is a string enum where each value equals its key name.
+// Using string literals avoids importing the SDK at module load time.
+const THINKING_LEVEL_MAP: Record<GoogleThinkingLevel, string> = {
+	THINKING_LEVEL_UNSPECIFIED: "THINKING_LEVEL_UNSPECIFIED",
+	MINIMAL: "MINIMAL",
+	LOW: "LOW",
+	MEDIUM: "MEDIUM",
+	HIGH: "HIGH",
 };

 // Counter for generating unique tool call IDs
@ -86,7 +98,7 @@ export const streamGoogleVertex: StreamFunction<"google-vertex", GoogleVertexOpt
 		try {
 			const project = resolveProject(options);
 			const location = resolveLocation(options);
-			const client = createClient(model, project, location, options?.headers);
+			const client = await createClient(model, project, location, options?.headers);
 			let params = buildParams(model, context, options);
 			const nextParams = await options?.onPayload?.(params, model);
 			if (nextParams !== undefined) {
@ -318,12 +330,12 @@ export const streamSimpleGoogleVertex: StreamFunction<"google-vertex", SimpleStr
 	} satisfies GoogleVertexOptions);
 };

-function createClient(
+async function createClient(
 	model: Model<"google-vertex">,
 	project: string,
 	location: string,
 	optionsHeaders?: Record<string, string>,
-): GoogleGenAI {
+): Promise<GoogleGenAI> {
 	const httpOptions: { headers?: Record<string, string> } = {};

 	if (model.headers || optionsHeaders) {
@ -331,8 +343,9 @@ function createClient(
 	}

 	const hasHttpOptions = Object.values(httpOptions).some(Boolean);
+	const GoogleGenAIClass = await getGoogleVertexClass();

-	return new GoogleGenAI({
+	return new GoogleGenAIClass({
 		vertexai: true,
 		project,
 		location,
@ -393,7 +406,9 @@ function buildParams(
 	if (options.thinking?.enabled && model.reasoning) {
 		const thinkingConfig: ThinkingConfig = { includeThoughts: true };
 		if (options.thinking.level !== undefined) {
-			thinkingConfig.thinkingLevel = THINKING_LEVEL_MAP[options.thinking.level];
+			// Cast safe: string values match ThinkingLevel enum values exactly
+			// eslint-disable-next-line @typescript-eslint/no-explicit-any
+			thinkingConfig.thinkingLevel = THINKING_LEVEL_MAP[options.thinking.level] as any;
 		} else if (options.thinking.budgetTokens !== undefined) {
 			thinkingConfig.thinkingBudget = options.thinking.budgetTokens;
 		}
--- a/packages/pi-ai/src/providers/openai-completions.ts
+++ b/packages/pi-ai/src/providers/openai-completions.ts
@ -1,4 +1,6 @@
-import OpenAI from "openai";
+// Lazy-loaded: OpenAI SDK is imported on first use, not at startup.
+// This avoids penalizing users who don't use OpenAI models.
+import type OpenAI from "openai";
 import type {
 	ChatCompletionAssistantMessageParam,
 	ChatCompletionChunk,
@ -33,6 +35,15 @@ import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copi
 import { buildBaseOptions, clampReasoning } from "./simple-options.js";
 import { transformMessages } from "./transform-messages.js";

+let _OpenAICompletionsClass: typeof OpenAI | undefined;
+async function getOpenAICompletionsClass(): Promise<typeof OpenAI> {
+	if (!_OpenAICompletionsClass) {
+		const mod = await import("openai");
+		_OpenAICompletionsClass = mod.default;
+	}
+	return _OpenAICompletionsClass;
+}
+
 /**
 * Check if conversation messages contain tool calls or tool results.
 * This is needed because Anthropic (via proxy) requires the tools param
@ -85,7 +96,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA

 		try {
 			const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
-			const client = createClient(model, context, apiKey, options?.headers);
+			const client = await createClient(model, context, apiKey, options?.headers);
 			let params = buildParams(model, context, options);
 			const nextParams = await options?.onPayload?.(params, model);
 			if (nextParams !== undefined) {
@ -327,7 +338,7 @@ export const streamSimpleOpenAICompletions: StreamFunction<"openai-completions",
 	} satisfies OpenAICompletionsOptions);
 };

-function createClient(
+async function createClient(
 	model: Model<"openai-completions">,
 	context: Context,
 	apiKey?: string,
@ -358,8 +369,9 @@ function createClient(
 	}

 	const isZai = model.provider === "zai" || model.baseUrl.includes("api.z.ai");
+	const OpenAIClass = await getOpenAICompletionsClass();

-	return new OpenAI({
+	return new OpenAIClass({
 		apiKey,
 		baseURL: model.baseUrl,
 		dangerouslyAllowBrowser: true,
--- a/packages/pi-ai/src/providers/openai-responses.ts
+++ b/packages/pi-ai/src/providers/openai-responses.ts
@ -1,4 +1,6 @@
-import OpenAI from "openai";
+// Lazy-loaded: OpenAI SDK is imported on first use, not at startup.
+// This avoids penalizing users who don't use OpenAI models.
+import type OpenAI from "openai";
 import type { ResponseCreateParamsStreaming } from "openai/resources/responses/responses.js";
 import { getEnvApiKey } from "../env-api-keys.js";
 import { supportsXhigh } from "../models.js";
@ -18,6 +20,15 @@ import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copi
 import { convertResponsesMessages, convertResponsesTools, processResponsesStream } from "./openai-responses-shared.js";
 import { buildBaseOptions, clampReasoning } from "./simple-options.js";

+let _OpenAIResponsesClass: typeof OpenAI | undefined;
+async function getOpenAIResponsesClass(): Promise<typeof OpenAI> {
+	if (!_OpenAIResponsesClass) {
+		const mod = await import("openai");
+		_OpenAIResponsesClass = mod.default;
+	}
+	return _OpenAIResponsesClass;
+}
+
 /**
 * Clamp reasoning effort for models that don't support all levels.
 * gpt-5.x models don't support "minimal" — map to "low".
@ -98,7 +109,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses", OpenAIRes
 		try {
 			// Create OpenAI client
 			const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
-			const client = createClient(model, context, apiKey, options?.headers);
+			const client = await createClient(model, context, apiKey, options?.headers);
 			let params = buildParams(model, context, options);
 			const nextParams = await options?.onPayload?.(params, model);
 			if (nextParams !== undefined) {
@ -156,7 +167,7 @@ export const streamSimpleOpenAIResponses: StreamFunction<"openai-responses", Sim
 	} satisfies OpenAIResponsesOptions);
 };

-function createClient(
+async function createClient(
 	model: Model<"openai-responses">,
 	context: Context,
 	apiKey?: string,
@ -186,7 +197,8 @@ function createClient(
 		Object.assign(headers, optionsHeaders);
 	}

-	return new OpenAI({
+	const OpenAIClass = await getOpenAIResponsesClass();
+	return new OpenAIClass({
 		apiKey,
 		baseURL: model.baseUrl,
 		dangerouslyAllowBrowser: true,