Merge pull request #750 from jeremymcs/fix/startup-lazy-loading

perf: lazy-load LLM provider SDKs to reduce startup time
This commit is contained in:
TÂCHES 2026-03-16 20:28:32 -06:00 committed by GitHub
commit 6a452f27d9
5 changed files with 101 additions and 37 deletions

View file

@ -1,4 +1,6 @@
import Anthropic from "@anthropic-ai/sdk";
// Lazy-loaded: Anthropic SDK (~500ms) is imported on first use, not at startup.
// This avoids penalizing users who don't use Anthropic models.
import type Anthropic from "@anthropic-ai/sdk";
import type {
ContentBlockParam,
MessageCreateParamsStreaming,
@ -32,6 +34,15 @@ import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copi
import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
import { transformMessages } from "./transform-messages.js";
let _AnthropicClass: typeof Anthropic | undefined;
async function getAnthropicClass(): Promise<typeof Anthropic> {
if (!_AnthropicClass) {
const mod = await import("@anthropic-ai/sdk");
_AnthropicClass = mod.default;
}
return _AnthropicClass;
}
/**
* Resolve cache retention preference.
* Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility.
@ -265,7 +276,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
});
}
const { client, isOAuthToken } = createClient(
const { client, isOAuthToken } = await createClient(
model,
apiKey,
options?.interleavedThinking ?? true,
@ -455,7 +466,8 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
if (model.provider === "alibaba-coding-plan") {
output.errorMessage = `[alibaba-coding-plan] ${output.errorMessage}`;
}
if (error instanceof Anthropic.APIError && error.headers) {
const AnthropicSdk = _AnthropicClass;
if (AnthropicSdk && error instanceof AnthropicSdk.APIError && error.headers) {
const retryAfterMs = extractRetryAfterMs(error.headers, error.message);
if (retryAfterMs !== undefined) {
output.retryAfterMs = retryAfterMs;
@ -548,13 +560,14 @@ function isOAuthToken(apiKey: string): boolean {
return apiKey.includes("sk-ant-oat");
}
function createClient(
async function createClient(
model: Model<"anthropic-messages">,
apiKey: string,
interleavedThinking: boolean,
optionsHeaders?: Record<string, string>,
dynamicHeaders?: Record<string, string>,
): { client: Anthropic; isOAuthToken: boolean } {
): Promise<{ client: Anthropic; isOAuthToken: boolean }> {
const AnthropicClass = await getAnthropicClass();
// Adaptive thinking models (Opus 4.6, Sonnet 4.6) have interleaved thinking built-in.
// The beta header is deprecated on Opus 4.6 and redundant on Sonnet 4.6, so skip it.
const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinking(model.id);
@ -566,7 +579,7 @@ function createClient(
betaFeatures.push("interleaved-thinking-2025-05-14");
}
const client = new Anthropic({
const client = new AnthropicClass({
apiKey: null,
authToken: apiKey,
baseURL: model.baseUrl,
@ -595,7 +608,7 @@ function createClient(
// OAuth: Bearer auth, Claude Code identity headers
if (isOAuthToken(apiKey)) {
const client = new Anthropic({
const client = new AnthropicClass({
apiKey: null,
authToken: apiKey,
baseURL: model.baseUrl,
@ -619,7 +632,7 @@ function createClient(
// API key auth
// Alibaba Coding Plan uses Bearer token auth instead of x-api-key
const isAlibabaProvider = model.provider === "alibaba-coding-plan";
const client = new Anthropic({
const client = new AnthropicClass({
apiKey: isAlibabaProvider ? null : apiKey,
authToken: isAlibabaProvider ? apiKey : undefined,
baseURL: model.baseUrl,

View file

@ -1,4 +1,6 @@
import { AzureOpenAI } from "openai";
// Lazy-loaded: OpenAI SDK (AzureOpenAI) is imported on first use, not at startup.
// This avoids penalizing users who don't use Azure OpenAI models.
import type { AzureOpenAI } from "openai";
import type { ResponseCreateParamsStreaming } from "openai/resources/responses/responses.js";
import { getEnvApiKey } from "../env-api-keys.js";
import { supportsXhigh } from "../models.js";
@ -15,6 +17,15 @@ import { AssistantMessageEventStream } from "../utils/event-stream.js";
import { convertResponsesMessages, convertResponsesTools, processResponsesStream } from "./openai-responses-shared.js";
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
let _AzureOpenAIClass: typeof AzureOpenAI | undefined;
async function getAzureOpenAIClass(): Promise<typeof AzureOpenAI> {
if (!_AzureOpenAIClass) {
const mod = await import("openai");
_AzureOpenAIClass = mod.AzureOpenAI;
}
return _AzureOpenAIClass;
}
/**
* Clamp reasoning effort for models that don't support all levels.
* gpt-5.x models don't support "minimal" map to "low".
@ -94,7 +105,7 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
try {
// Create Azure OpenAI client
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
const client = createClient(model, apiKey, options);
const client = await createClient(model, apiKey, options);
let params = buildParams(model, context, options, deploymentName);
const nextParams = await options?.onPayload?.(params, model);
if (nextParams !== undefined) {
@ -188,7 +199,7 @@ function resolveAzureConfig(
};
}
function createClient(model: Model<"azure-openai-responses">, apiKey: string, options?: AzureOpenAIResponsesOptions) {
async function createClient(model: Model<"azure-openai-responses">, apiKey: string, options?: AzureOpenAIResponsesOptions) {
if (!apiKey) {
if (!process.env.AZURE_OPENAI_API_KEY) {
throw new Error(
@ -205,8 +216,9 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
}
const { baseUrl, apiVersion } = resolveAzureConfig(model, options);
const AzureOpenAIClass = await getAzureOpenAIClass();
return new AzureOpenAI({
return new AzureOpenAIClass({
apiKey,
apiVersion,
dangerouslyAllowBrowser: true,

View file

@ -1,9 +1,10 @@
import {
type GenerateContentConfig,
type GenerateContentParameters,
GoogleGenAI,
type ThinkingConfig,
ThinkingLevel,
// Lazy-loaded: Google GenAI SDK is imported on first use, not at startup.
// This avoids penalizing users who don't use Google Vertex models.
import type { GoogleGenAI } from "@google/genai";
import type {
GenerateContentConfig,
GenerateContentParameters,
ThinkingConfig,
} from "@google/genai";
import { calculateCost } from "../models.js";
import type {
@ -33,6 +34,15 @@ import {
} from "./google-shared.js";
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
let _GoogleVertexClass: typeof GoogleGenAI | undefined;
async function getGoogleVertexClass(): Promise<typeof GoogleGenAI> {
if (!_GoogleVertexClass) {
const mod = await import("@google/genai");
_GoogleVertexClass = mod.GoogleGenAI;
}
return _GoogleVertexClass;
}
export interface GoogleVertexOptions extends StreamOptions {
toolChoice?: "auto" | "none" | "any";
thinking?: {
@ -46,12 +56,14 @@ export interface GoogleVertexOptions extends StreamOptions {
const API_VERSION = "v1";
const THINKING_LEVEL_MAP: Record<GoogleThinkingLevel, ThinkingLevel> = {
THINKING_LEVEL_UNSPECIFIED: ThinkingLevel.THINKING_LEVEL_UNSPECIFIED,
MINIMAL: ThinkingLevel.MINIMAL,
LOW: ThinkingLevel.LOW,
MEDIUM: ThinkingLevel.MEDIUM,
HIGH: ThinkingLevel.HIGH,
// ThinkingLevel is a string enum where each value equals its key name.
// Using string literals avoids importing the SDK at module load time.
const THINKING_LEVEL_MAP: Record<GoogleThinkingLevel, string> = {
THINKING_LEVEL_UNSPECIFIED: "THINKING_LEVEL_UNSPECIFIED",
MINIMAL: "MINIMAL",
LOW: "LOW",
MEDIUM: "MEDIUM",
HIGH: "HIGH",
};
// Counter for generating unique tool call IDs
@ -86,7 +98,7 @@ export const streamGoogleVertex: StreamFunction<"google-vertex", GoogleVertexOpt
try {
const project = resolveProject(options);
const location = resolveLocation(options);
const client = createClient(model, project, location, options?.headers);
const client = await createClient(model, project, location, options?.headers);
let params = buildParams(model, context, options);
const nextParams = await options?.onPayload?.(params, model);
if (nextParams !== undefined) {
@ -318,12 +330,12 @@ export const streamSimpleGoogleVertex: StreamFunction<"google-vertex", SimpleStr
} satisfies GoogleVertexOptions);
};
function createClient(
async function createClient(
model: Model<"google-vertex">,
project: string,
location: string,
optionsHeaders?: Record<string, string>,
): GoogleGenAI {
): Promise<GoogleGenAI> {
const httpOptions: { headers?: Record<string, string> } = {};
if (model.headers || optionsHeaders) {
@ -331,8 +343,9 @@ function createClient(
}
const hasHttpOptions = Object.values(httpOptions).some(Boolean);
const GoogleGenAIClass = await getGoogleVertexClass();
return new GoogleGenAI({
return new GoogleGenAIClass({
vertexai: true,
project,
location,
@ -393,7 +406,9 @@ function buildParams(
if (options.thinking?.enabled && model.reasoning) {
const thinkingConfig: ThinkingConfig = { includeThoughts: true };
if (options.thinking.level !== undefined) {
thinkingConfig.thinkingLevel = THINKING_LEVEL_MAP[options.thinking.level];
// Cast safe: string values match ThinkingLevel enum values exactly
// eslint-disable-next-line @typescript-eslint/no-explicit-any
thinkingConfig.thinkingLevel = THINKING_LEVEL_MAP[options.thinking.level] as any;
} else if (options.thinking.budgetTokens !== undefined) {
thinkingConfig.thinkingBudget = options.thinking.budgetTokens;
}

View file

@ -1,4 +1,6 @@
import OpenAI from "openai";
// Lazy-loaded: OpenAI SDK is imported on first use, not at startup.
// This avoids penalizing users who don't use OpenAI models.
import type OpenAI from "openai";
import type {
ChatCompletionAssistantMessageParam,
ChatCompletionChunk,
@ -33,6 +35,15 @@ import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copi
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
import { transformMessages } from "./transform-messages.js";
let _OpenAICompletionsClass: typeof OpenAI | undefined;
async function getOpenAICompletionsClass(): Promise<typeof OpenAI> {
if (!_OpenAICompletionsClass) {
const mod = await import("openai");
_OpenAICompletionsClass = mod.default;
}
return _OpenAICompletionsClass;
}
/**
* Check if conversation messages contain tool calls or tool results.
* This is needed because Anthropic (via proxy) requires the tools param
@ -85,7 +96,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA
try {
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
const client = createClient(model, context, apiKey, options?.headers);
const client = await createClient(model, context, apiKey, options?.headers);
let params = buildParams(model, context, options);
const nextParams = await options?.onPayload?.(params, model);
if (nextParams !== undefined) {
@ -327,7 +338,7 @@ export const streamSimpleOpenAICompletions: StreamFunction<"openai-completions",
} satisfies OpenAICompletionsOptions);
};
function createClient(
async function createClient(
model: Model<"openai-completions">,
context: Context,
apiKey?: string,
@ -358,8 +369,9 @@ function createClient(
}
const isZai = model.provider === "zai" || model.baseUrl.includes("api.z.ai");
const OpenAIClass = await getOpenAICompletionsClass();
return new OpenAI({
return new OpenAIClass({
apiKey,
baseURL: model.baseUrl,
dangerouslyAllowBrowser: true,

View file

@ -1,4 +1,6 @@
import OpenAI from "openai";
// Lazy-loaded: OpenAI SDK is imported on first use, not at startup.
// This avoids penalizing users who don't use OpenAI models.
import type OpenAI from "openai";
import type { ResponseCreateParamsStreaming } from "openai/resources/responses/responses.js";
import { getEnvApiKey } from "../env-api-keys.js";
import { supportsXhigh } from "../models.js";
@ -18,6 +20,15 @@ import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copi
import { convertResponsesMessages, convertResponsesTools, processResponsesStream } from "./openai-responses-shared.js";
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
let _OpenAIResponsesClass: typeof OpenAI | undefined;
async function getOpenAIResponsesClass(): Promise<typeof OpenAI> {
if (!_OpenAIResponsesClass) {
const mod = await import("openai");
_OpenAIResponsesClass = mod.default;
}
return _OpenAIResponsesClass;
}
/**
* Clamp reasoning effort for models that don't support all levels.
* gpt-5.x models don't support "minimal" map to "low".
@ -98,7 +109,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses", OpenAIRes
try {
// Create OpenAI client
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
const client = createClient(model, context, apiKey, options?.headers);
const client = await createClient(model, context, apiKey, options?.headers);
let params = buildParams(model, context, options);
const nextParams = await options?.onPayload?.(params, model);
if (nextParams !== undefined) {
@ -156,7 +167,7 @@ export const streamSimpleOpenAIResponses: StreamFunction<"openai-responses", Sim
} satisfies OpenAIResponsesOptions);
};
function createClient(
async function createClient(
model: Model<"openai-responses">,
context: Context,
apiKey?: string,
@ -186,7 +197,8 @@ function createClient(
Object.assign(headers, optionsHeaders);
}
return new OpenAI({
const OpenAIClass = await getOpenAIResponsesClass();
return new OpenAIClass({
apiKey,
baseURL: model.baseUrl,
dangerouslyAllowBrowser: true,