Merge pull request #750 from jeremymcs/fix/startup-lazy-loading
perf: lazy-load LLM provider SDKs to reduce startup time
This commit is contained in:
commit
6a452f27d9
5 changed files with 101 additions and 37 deletions
|
|
@ -1,4 +1,6 @@
|
|||
import Anthropic from "@anthropic-ai/sdk";
|
||||
// Lazy-loaded: Anthropic SDK (~500ms) is imported on first use, not at startup.
|
||||
// This avoids penalizing users who don't use Anthropic models.
|
||||
import type Anthropic from "@anthropic-ai/sdk";
|
||||
import type {
|
||||
ContentBlockParam,
|
||||
MessageCreateParamsStreaming,
|
||||
|
|
@ -32,6 +34,15 @@ import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copi
|
|||
import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
|
||||
let _AnthropicClass: typeof Anthropic | undefined;
|
||||
async function getAnthropicClass(): Promise<typeof Anthropic> {
|
||||
if (!_AnthropicClass) {
|
||||
const mod = await import("@anthropic-ai/sdk");
|
||||
_AnthropicClass = mod.default;
|
||||
}
|
||||
return _AnthropicClass;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve cache retention preference.
|
||||
* Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility.
|
||||
|
|
@ -265,7 +276,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
|
|||
});
|
||||
}
|
||||
|
||||
const { client, isOAuthToken } = createClient(
|
||||
const { client, isOAuthToken } = await createClient(
|
||||
model,
|
||||
apiKey,
|
||||
options?.interleavedThinking ?? true,
|
||||
|
|
@ -455,7 +466,8 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
|
|||
if (model.provider === "alibaba-coding-plan") {
|
||||
output.errorMessage = `[alibaba-coding-plan] ${output.errorMessage}`;
|
||||
}
|
||||
if (error instanceof Anthropic.APIError && error.headers) {
|
||||
const AnthropicSdk = _AnthropicClass;
|
||||
if (AnthropicSdk && error instanceof AnthropicSdk.APIError && error.headers) {
|
||||
const retryAfterMs = extractRetryAfterMs(error.headers, error.message);
|
||||
if (retryAfterMs !== undefined) {
|
||||
output.retryAfterMs = retryAfterMs;
|
||||
|
|
@ -548,13 +560,14 @@ function isOAuthToken(apiKey: string): boolean {
|
|||
return apiKey.includes("sk-ant-oat");
|
||||
}
|
||||
|
||||
function createClient(
|
||||
async function createClient(
|
||||
model: Model<"anthropic-messages">,
|
||||
apiKey: string,
|
||||
interleavedThinking: boolean,
|
||||
optionsHeaders?: Record<string, string>,
|
||||
dynamicHeaders?: Record<string, string>,
|
||||
): { client: Anthropic; isOAuthToken: boolean } {
|
||||
): Promise<{ client: Anthropic; isOAuthToken: boolean }> {
|
||||
const AnthropicClass = await getAnthropicClass();
|
||||
// Adaptive thinking models (Opus 4.6, Sonnet 4.6) have interleaved thinking built-in.
|
||||
// The beta header is deprecated on Opus 4.6 and redundant on Sonnet 4.6, so skip it.
|
||||
const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinking(model.id);
|
||||
|
|
@ -566,7 +579,7 @@ function createClient(
|
|||
betaFeatures.push("interleaved-thinking-2025-05-14");
|
||||
}
|
||||
|
||||
const client = new Anthropic({
|
||||
const client = new AnthropicClass({
|
||||
apiKey: null,
|
||||
authToken: apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
|
|
@ -595,7 +608,7 @@ function createClient(
|
|||
|
||||
// OAuth: Bearer auth, Claude Code identity headers
|
||||
if (isOAuthToken(apiKey)) {
|
||||
const client = new Anthropic({
|
||||
const client = new AnthropicClass({
|
||||
apiKey: null,
|
||||
authToken: apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
|
|
@ -619,7 +632,7 @@ function createClient(
|
|||
// API key auth
|
||||
// Alibaba Coding Plan uses Bearer token auth instead of x-api-key
|
||||
const isAlibabaProvider = model.provider === "alibaba-coding-plan";
|
||||
const client = new Anthropic({
|
||||
const client = new AnthropicClass({
|
||||
apiKey: isAlibabaProvider ? null : apiKey,
|
||||
authToken: isAlibabaProvider ? apiKey : undefined,
|
||||
baseURL: model.baseUrl,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
import { AzureOpenAI } from "openai";
|
||||
// Lazy-loaded: OpenAI SDK (AzureOpenAI) is imported on first use, not at startup.
|
||||
// This avoids penalizing users who don't use Azure OpenAI models.
|
||||
import type { AzureOpenAI } from "openai";
|
||||
import type { ResponseCreateParamsStreaming } from "openai/resources/responses/responses.js";
|
||||
import { getEnvApiKey } from "../env-api-keys.js";
|
||||
import { supportsXhigh } from "../models.js";
|
||||
|
|
@ -15,6 +17,15 @@ import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
|||
import { convertResponsesMessages, convertResponsesTools, processResponsesStream } from "./openai-responses-shared.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
|
||||
let _AzureOpenAIClass: typeof AzureOpenAI | undefined;
|
||||
async function getAzureOpenAIClass(): Promise<typeof AzureOpenAI> {
|
||||
if (!_AzureOpenAIClass) {
|
||||
const mod = await import("openai");
|
||||
_AzureOpenAIClass = mod.AzureOpenAI;
|
||||
}
|
||||
return _AzureOpenAIClass;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clamp reasoning effort for models that don't support all levels.
|
||||
* gpt-5.x models don't support "minimal" — map to "low".
|
||||
|
|
@ -94,7 +105,7 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
|
|||
try {
|
||||
// Create Azure OpenAI client
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
||||
const client = createClient(model, apiKey, options);
|
||||
const client = await createClient(model, apiKey, options);
|
||||
let params = buildParams(model, context, options, deploymentName);
|
||||
const nextParams = await options?.onPayload?.(params, model);
|
||||
if (nextParams !== undefined) {
|
||||
|
|
@ -188,7 +199,7 @@ function resolveAzureConfig(
|
|||
};
|
||||
}
|
||||
|
||||
function createClient(model: Model<"azure-openai-responses">, apiKey: string, options?: AzureOpenAIResponsesOptions) {
|
||||
async function createClient(model: Model<"azure-openai-responses">, apiKey: string, options?: AzureOpenAIResponsesOptions) {
|
||||
if (!apiKey) {
|
||||
if (!process.env.AZURE_OPENAI_API_KEY) {
|
||||
throw new Error(
|
||||
|
|
@ -205,8 +216,9 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
|
|||
}
|
||||
|
||||
const { baseUrl, apiVersion } = resolveAzureConfig(model, options);
|
||||
const AzureOpenAIClass = await getAzureOpenAIClass();
|
||||
|
||||
return new AzureOpenAI({
|
||||
return new AzureOpenAIClass({
|
||||
apiKey,
|
||||
apiVersion,
|
||||
dangerouslyAllowBrowser: true,
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
import {
|
||||
type GenerateContentConfig,
|
||||
type GenerateContentParameters,
|
||||
GoogleGenAI,
|
||||
type ThinkingConfig,
|
||||
ThinkingLevel,
|
||||
// Lazy-loaded: Google GenAI SDK is imported on first use, not at startup.
|
||||
// This avoids penalizing users who don't use Google Vertex models.
|
||||
import type { GoogleGenAI } from "@google/genai";
|
||||
import type {
|
||||
GenerateContentConfig,
|
||||
GenerateContentParameters,
|
||||
ThinkingConfig,
|
||||
} from "@google/genai";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
|
|
@ -33,6 +34,15 @@ import {
|
|||
} from "./google-shared.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
|
||||
let _GoogleVertexClass: typeof GoogleGenAI | undefined;
|
||||
async function getGoogleVertexClass(): Promise<typeof GoogleGenAI> {
|
||||
if (!_GoogleVertexClass) {
|
||||
const mod = await import("@google/genai");
|
||||
_GoogleVertexClass = mod.GoogleGenAI;
|
||||
}
|
||||
return _GoogleVertexClass;
|
||||
}
|
||||
|
||||
export interface GoogleVertexOptions extends StreamOptions {
|
||||
toolChoice?: "auto" | "none" | "any";
|
||||
thinking?: {
|
||||
|
|
@ -46,12 +56,14 @@ export interface GoogleVertexOptions extends StreamOptions {
|
|||
|
||||
const API_VERSION = "v1";
|
||||
|
||||
const THINKING_LEVEL_MAP: Record<GoogleThinkingLevel, ThinkingLevel> = {
|
||||
THINKING_LEVEL_UNSPECIFIED: ThinkingLevel.THINKING_LEVEL_UNSPECIFIED,
|
||||
MINIMAL: ThinkingLevel.MINIMAL,
|
||||
LOW: ThinkingLevel.LOW,
|
||||
MEDIUM: ThinkingLevel.MEDIUM,
|
||||
HIGH: ThinkingLevel.HIGH,
|
||||
// ThinkingLevel is a string enum where each value equals its key name.
|
||||
// Using string literals avoids importing the SDK at module load time.
|
||||
const THINKING_LEVEL_MAP: Record<GoogleThinkingLevel, string> = {
|
||||
THINKING_LEVEL_UNSPECIFIED: "THINKING_LEVEL_UNSPECIFIED",
|
||||
MINIMAL: "MINIMAL",
|
||||
LOW: "LOW",
|
||||
MEDIUM: "MEDIUM",
|
||||
HIGH: "HIGH",
|
||||
};
|
||||
|
||||
// Counter for generating unique tool call IDs
|
||||
|
|
@ -86,7 +98,7 @@ export const streamGoogleVertex: StreamFunction<"google-vertex", GoogleVertexOpt
|
|||
try {
|
||||
const project = resolveProject(options);
|
||||
const location = resolveLocation(options);
|
||||
const client = createClient(model, project, location, options?.headers);
|
||||
const client = await createClient(model, project, location, options?.headers);
|
||||
let params = buildParams(model, context, options);
|
||||
const nextParams = await options?.onPayload?.(params, model);
|
||||
if (nextParams !== undefined) {
|
||||
|
|
@ -318,12 +330,12 @@ export const streamSimpleGoogleVertex: StreamFunction<"google-vertex", SimpleStr
|
|||
} satisfies GoogleVertexOptions);
|
||||
};
|
||||
|
||||
function createClient(
|
||||
async function createClient(
|
||||
model: Model<"google-vertex">,
|
||||
project: string,
|
||||
location: string,
|
||||
optionsHeaders?: Record<string, string>,
|
||||
): GoogleGenAI {
|
||||
): Promise<GoogleGenAI> {
|
||||
const httpOptions: { headers?: Record<string, string> } = {};
|
||||
|
||||
if (model.headers || optionsHeaders) {
|
||||
|
|
@ -331,8 +343,9 @@ function createClient(
|
|||
}
|
||||
|
||||
const hasHttpOptions = Object.values(httpOptions).some(Boolean);
|
||||
const GoogleGenAIClass = await getGoogleVertexClass();
|
||||
|
||||
return new GoogleGenAI({
|
||||
return new GoogleGenAIClass({
|
||||
vertexai: true,
|
||||
project,
|
||||
location,
|
||||
|
|
@ -393,7 +406,9 @@ function buildParams(
|
|||
if (options.thinking?.enabled && model.reasoning) {
|
||||
const thinkingConfig: ThinkingConfig = { includeThoughts: true };
|
||||
if (options.thinking.level !== undefined) {
|
||||
thinkingConfig.thinkingLevel = THINKING_LEVEL_MAP[options.thinking.level];
|
||||
// Cast safe: string values match ThinkingLevel enum values exactly
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
thinkingConfig.thinkingLevel = THINKING_LEVEL_MAP[options.thinking.level] as any;
|
||||
} else if (options.thinking.budgetTokens !== undefined) {
|
||||
thinkingConfig.thinkingBudget = options.thinking.budgetTokens;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
import OpenAI from "openai";
|
||||
// Lazy-loaded: OpenAI SDK is imported on first use, not at startup.
|
||||
// This avoids penalizing users who don't use OpenAI models.
|
||||
import type OpenAI from "openai";
|
||||
import type {
|
||||
ChatCompletionAssistantMessageParam,
|
||||
ChatCompletionChunk,
|
||||
|
|
@ -33,6 +35,15 @@ import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copi
|
|||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
|
||||
let _OpenAICompletionsClass: typeof OpenAI | undefined;
|
||||
async function getOpenAICompletionsClass(): Promise<typeof OpenAI> {
|
||||
if (!_OpenAICompletionsClass) {
|
||||
const mod = await import("openai");
|
||||
_OpenAICompletionsClass = mod.default;
|
||||
}
|
||||
return _OpenAICompletionsClass;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if conversation messages contain tool calls or tool results.
|
||||
* This is needed because Anthropic (via proxy) requires the tools param
|
||||
|
|
@ -85,7 +96,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA
|
|||
|
||||
try {
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
||||
const client = createClient(model, context, apiKey, options?.headers);
|
||||
const client = await createClient(model, context, apiKey, options?.headers);
|
||||
let params = buildParams(model, context, options);
|
||||
const nextParams = await options?.onPayload?.(params, model);
|
||||
if (nextParams !== undefined) {
|
||||
|
|
@ -327,7 +338,7 @@ export const streamSimpleOpenAICompletions: StreamFunction<"openai-completions",
|
|||
} satisfies OpenAICompletionsOptions);
|
||||
};
|
||||
|
||||
function createClient(
|
||||
async function createClient(
|
||||
model: Model<"openai-completions">,
|
||||
context: Context,
|
||||
apiKey?: string,
|
||||
|
|
@ -358,8 +369,9 @@ function createClient(
|
|||
}
|
||||
|
||||
const isZai = model.provider === "zai" || model.baseUrl.includes("api.z.ai");
|
||||
const OpenAIClass = await getOpenAICompletionsClass();
|
||||
|
||||
return new OpenAI({
|
||||
return new OpenAIClass({
|
||||
apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
dangerouslyAllowBrowser: true,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
import OpenAI from "openai";
|
||||
// Lazy-loaded: OpenAI SDK is imported on first use, not at startup.
|
||||
// This avoids penalizing users who don't use OpenAI models.
|
||||
import type OpenAI from "openai";
|
||||
import type { ResponseCreateParamsStreaming } from "openai/resources/responses/responses.js";
|
||||
import { getEnvApiKey } from "../env-api-keys.js";
|
||||
import { supportsXhigh } from "../models.js";
|
||||
|
|
@ -18,6 +20,15 @@ import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copi
|
|||
import { convertResponsesMessages, convertResponsesTools, processResponsesStream } from "./openai-responses-shared.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
|
||||
let _OpenAIResponsesClass: typeof OpenAI | undefined;
|
||||
async function getOpenAIResponsesClass(): Promise<typeof OpenAI> {
|
||||
if (!_OpenAIResponsesClass) {
|
||||
const mod = await import("openai");
|
||||
_OpenAIResponsesClass = mod.default;
|
||||
}
|
||||
return _OpenAIResponsesClass;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clamp reasoning effort for models that don't support all levels.
|
||||
* gpt-5.x models don't support "minimal" — map to "low".
|
||||
|
|
@ -98,7 +109,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses", OpenAIRes
|
|||
try {
|
||||
// Create OpenAI client
|
||||
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
||||
const client = createClient(model, context, apiKey, options?.headers);
|
||||
const client = await createClient(model, context, apiKey, options?.headers);
|
||||
let params = buildParams(model, context, options);
|
||||
const nextParams = await options?.onPayload?.(params, model);
|
||||
if (nextParams !== undefined) {
|
||||
|
|
@ -156,7 +167,7 @@ export const streamSimpleOpenAIResponses: StreamFunction<"openai-responses", Sim
|
|||
} satisfies OpenAIResponsesOptions);
|
||||
};
|
||||
|
||||
function createClient(
|
||||
async function createClient(
|
||||
model: Model<"openai-responses">,
|
||||
context: Context,
|
||||
apiKey?: string,
|
||||
|
|
@ -186,7 +197,8 @@ function createClient(
|
|||
Object.assign(headers, optionsHeaders);
|
||||
}
|
||||
|
||||
return new OpenAI({
|
||||
const OpenAIClass = await getOpenAIResponsesClass();
|
||||
return new OpenAIClass({
|
||||
apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
dangerouslyAllowBrowser: true,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue