pi-ai: add reasoning:auto across all providers + Kimi K2.6
RequestedThinkingLevel adds "auto" to the reasoning option. Each provider
handles it natively:
- Claude 4.x (anthropic/bedrock): adaptive thinking, no effort constraint
- Gemini 2.5 Pro/Flash (google/vertex/gemini-cli): THINKING_LEVEL_UNSPECIFIED
- GPT-5+ (openai-responses/azure): reasoning.effort omitted, model decides
- Kimi (kimi-coding): {"type":"enabled"} without budget_tokens via new
capabilities.thinkingNoBudget flag — model manages reasoning depth
- GLM (zai, thinkingFormat:zai): enable_thinking:true already correct
- MiniMax (anthropic API): explicit budget_tokens required, resolves to medium
ModelCapabilities.thinkingNoBudget: new flag for Anthropic-compatible providers
that accept {"type":"enabled"} without a budget (Kimi API).
models.generated.ts: add Kimi K2.6 (id: kimi-for-coding, beta API); add
thinkingNoBudget capability to all kimi-coding models.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
38d3bd55da
commit
f1da908dcd
15 changed files with 241 additions and 36 deletions
|
|
@ -4616,6 +4616,24 @@ export const MODELS = {
|
|||
} satisfies Model<"openai-completions">,
|
||||
},
|
||||
"kimi-coding": {
|
||||
"kimi-for-coding": {
|
||||
id: "kimi-for-coding",
|
||||
name: "Kimi K2.6",
|
||||
api: "anthropic-messages",
|
||||
provider: "kimi-coding",
|
||||
baseUrl: "https://api.kimi.com/coding",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
capabilities: { thinkingNoBudget: true },
|
||||
cost: {
|
||||
input: 0.6,
|
||||
output: 2.5,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 32768,
|
||||
} satisfies Model<"anthropic-messages">,
|
||||
"k2p5": {
|
||||
id: "k2p5",
|
||||
name: "Kimi K2.5",
|
||||
|
|
@ -4624,6 +4642,7 @@ export const MODELS = {
|
|||
baseUrl: "https://api.kimi.com/coding",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
capabilities: { thinkingNoBudget: true },
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
|
|
@ -4641,6 +4660,7 @@ export const MODELS = {
|
|||
baseUrl: "https://api.kimi.com/coding",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
capabilities: { thinkingNoBudget: true },
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ import type {
|
|||
CacheRetention,
|
||||
Context,
|
||||
Model,
|
||||
RequestedThinkingLevel,
|
||||
SimpleStreamOptions,
|
||||
StopReason,
|
||||
StreamFunction,
|
||||
|
|
@ -42,7 +43,7 @@ import type {
|
|||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
import { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||
import { transformMessagesWithReport } from "./transform-messages.js";
|
||||
|
||||
export interface BedrockOptions extends StreamOptions {
|
||||
|
|
@ -50,7 +51,7 @@ export interface BedrockOptions extends StreamOptions {
|
|||
profile?: string;
|
||||
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
|
||||
/* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-reasoning.html for supported models. */
|
||||
reasoning?: ThinkingLevel;
|
||||
reasoning?: RequestedThinkingLevel;
|
||||
/* Custom token budgets per thinking level. Overrides default budgets. */
|
||||
thinkingBudgets?: ThinkingBudgets;
|
||||
/* Only supported by Claude 4.x models, see https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-extended-thinking.html#claude-messages-extended-thinking-tool-use-interleaved */
|
||||
|
|
@ -226,8 +227,10 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp
|
|||
return streamBedrock(model, context, { ...base, reasoning: undefined } satisfies BedrockOptions);
|
||||
}
|
||||
|
||||
const effectiveReasoning = resolveReasoningLevel(model, options.reasoning);
|
||||
|
||||
if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) {
|
||||
if (supportsAdaptiveThinking(model.id)) {
|
||||
if (supportsAdaptiveThinking(model.id) && isAutoReasoning(options.reasoning)) {
|
||||
return streamBedrock(model, context, {
|
||||
...base,
|
||||
reasoning: options.reasoning,
|
||||
|
|
@ -235,27 +238,35 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp
|
|||
} satisfies BedrockOptions);
|
||||
}
|
||||
|
||||
if (supportsAdaptiveThinking(model.id)) {
|
||||
return streamBedrock(model, context, {
|
||||
...base,
|
||||
reasoning: effectiveReasoning,
|
||||
thinkingBudgets: options.thinkingBudgets,
|
||||
} satisfies BedrockOptions);
|
||||
}
|
||||
|
||||
const adjusted = adjustMaxTokensForThinking(
|
||||
base.maxTokens || 0,
|
||||
model.maxTokens,
|
||||
options.reasoning,
|
||||
effectiveReasoning!,
|
||||
options.thinkingBudgets,
|
||||
);
|
||||
|
||||
return streamBedrock(model, context, {
|
||||
...base,
|
||||
maxTokens: adjusted.maxTokens,
|
||||
reasoning: options.reasoning,
|
||||
reasoning: effectiveReasoning,
|
||||
thinkingBudgets: {
|
||||
...(options.thinkingBudgets || {}),
|
||||
[clampReasoning(options.reasoning)!]: adjusted.thinkingBudget,
|
||||
[clampReasoning(effectiveReasoning)!]: adjusted.thinkingBudget,
|
||||
},
|
||||
} satisfies BedrockOptions);
|
||||
}
|
||||
|
||||
return streamBedrock(model, context, {
|
||||
...base,
|
||||
reasoning: options.reasoning,
|
||||
reasoning: effectiveReasoning,
|
||||
thinkingBudgets: options.thinkingBudgets,
|
||||
} satisfies BedrockOptions);
|
||||
};
|
||||
|
|
@ -407,6 +418,8 @@ export function mapThinkingLevelToEffort(
|
|||
modelId: string,
|
||||
): "low" | "medium" | "high" | "xhigh" | "max" {
|
||||
switch (level) {
|
||||
case "auto":
|
||||
return "medium";
|
||||
case "minimal":
|
||||
case "low":
|
||||
return "low";
|
||||
|
|
@ -709,10 +722,14 @@ export function buildAdditionalModelRequestFields(
|
|||
|
||||
if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) {
|
||||
const result: Record<string, any> = supportsAdaptiveThinking(model.id)
|
||||
? {
|
||||
thinking: { type: "adaptive" },
|
||||
output_config: { effort: mapThinkingLevelToEffort(options.reasoning, model.id) },
|
||||
}
|
||||
? options.reasoning === "auto"
|
||||
? {
|
||||
thinking: { type: "adaptive" },
|
||||
}
|
||||
: {
|
||||
thinking: { type: "adaptive" },
|
||||
output_config: { effort: mapThinkingLevelToEffort(options.reasoning, model.id) },
|
||||
}
|
||||
: (() => {
|
||||
const defaultBudgets: Record<ThinkingLevel, number> = {
|
||||
minimal: 1024,
|
||||
|
|
@ -723,8 +740,9 @@ export function buildAdditionalModelRequestFields(
|
|||
};
|
||||
|
||||
// Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
|
||||
const level = options.reasoning === "xhigh" ? "high" : options.reasoning;
|
||||
const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[options.reasoning];
|
||||
const normalizedReasoning = options.reasoning === "auto" ? "medium" : options.reasoning;
|
||||
const level = normalizedReasoning === "xhigh" ? "high" : normalizedReasoning;
|
||||
const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[normalizedReasoning];
|
||||
|
||||
return {
|
||||
thinking: {
|
||||
|
|
|
|||
|
|
@ -163,6 +163,8 @@ export function supportsAdaptiveThinking(modelId: string): boolean {
|
|||
|
||||
export function mapThinkingLevelToEffort(level: string | undefined, modelId: string): AnthropicEffort {
|
||||
switch (level) {
|
||||
case "auto":
|
||||
return "medium";
|
||||
case "minimal":
|
||||
return "low";
|
||||
case "low":
|
||||
|
|
@ -481,6 +483,10 @@ export function buildParams(
|
|||
if (options.effort) {
|
||||
params.output_config = { effort: options.effort };
|
||||
}
|
||||
} else if (model.capabilities?.thinkingNoBudget) {
|
||||
// Provider accepts {"type":"enabled"} without budget_tokens — model manages depth.
|
||||
// The Anthropic SDK type requires budget_tokens but the kimi-coding API does not.
|
||||
(params as any).thinking = { type: "enabled" };
|
||||
} else {
|
||||
params.thinking = {
|
||||
type: "enabled",
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import type {
|
|||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
|
||||
import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
|
||||
import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
|
||||
import { adjustMaxTokensForThinking, buildBaseOptions, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||
import {
|
||||
type AnthropicEffort,
|
||||
type AnthropicOptions,
|
||||
|
|
@ -194,10 +194,19 @@ export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleS
|
|||
return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);
|
||||
}
|
||||
|
||||
if (isAutoReasoning(options.reasoning) && (supportsAdaptiveThinking(model.id) || model.capabilities?.thinkingNoBudget)) {
|
||||
return streamAnthropic(model, context, {
|
||||
...base,
|
||||
thinkingEnabled: true,
|
||||
} satisfies AnthropicOptions);
|
||||
}
|
||||
|
||||
const effectiveReasoning = resolveReasoningLevel(model, options.reasoning)!;
|
||||
|
||||
// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level
|
||||
// For older models: use budget-based thinking
|
||||
if (supportsAdaptiveThinking(model.id)) {
|
||||
const effort = mapThinkingLevelToEffort(options.reasoning, model.id);
|
||||
const effort = mapThinkingLevelToEffort(effectiveReasoning, model.id);
|
||||
return streamAnthropic(model, context, {
|
||||
...base,
|
||||
thinkingEnabled: true,
|
||||
|
|
@ -208,7 +217,7 @@ export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleS
|
|||
const adjusted = adjustMaxTokensForThinking(
|
||||
base.maxTokens || 0,
|
||||
model.maxTokens,
|
||||
options.reasoning,
|
||||
effectiveReasoning,
|
||||
options.thinkingBudgets,
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ import {
|
|||
finalizeStream,
|
||||
handleStreamError,
|
||||
} from "./openai-shared.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||
|
||||
let _AzureOpenAIClass: typeof AzureOpenAI | undefined;
|
||||
async function getAzureOpenAIClass(): Promise<typeof AzureOpenAI> {
|
||||
|
|
@ -118,7 +118,8 @@ export const streamSimpleAzureOpenAIResponses: StreamFunction<"azure-openai-resp
|
|||
}
|
||||
|
||||
const base = buildBaseOptions(model, options, apiKey);
|
||||
const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
|
||||
const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning);
|
||||
const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning);
|
||||
|
||||
return streamAzureOpenAIResponses(model, context, {
|
||||
...base,
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import {
|
|||
mapToolChoice,
|
||||
retainThoughtSignature,
|
||||
} from "./google-shared.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||
|
||||
/**
|
||||
* Thinking level for Gemini 3 models.
|
||||
|
|
@ -387,7 +387,27 @@ export const streamSimpleGoogleGeminiCli: StreamFunction<"google-gemini-cli", Si
|
|||
} satisfies GoogleGeminiCliOptions);
|
||||
}
|
||||
|
||||
const effort = clampReasoning(options.reasoning)!;
|
||||
if (isAutoReasoning(options.reasoning)) {
|
||||
if (isGemini3Model(model.id)) {
|
||||
return streamGoogleGeminiCli(model, context, {
|
||||
...base,
|
||||
thinking: {
|
||||
enabled: true,
|
||||
level: "THINKING_LEVEL_UNSPECIFIED",
|
||||
},
|
||||
} satisfies GoogleGeminiCliOptions);
|
||||
}
|
||||
|
||||
return streamGoogleGeminiCli(model, context, {
|
||||
...base,
|
||||
thinking: {
|
||||
enabled: true,
|
||||
budgetTokens: -1,
|
||||
},
|
||||
} satisfies GoogleGeminiCliOptions);
|
||||
}
|
||||
|
||||
const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!;
|
||||
if (isGemini3Model(model.id)) {
|
||||
return streamGoogleGeminiCli(model, context, {
|
||||
...base,
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import {
|
|||
mapToolChoice,
|
||||
retainThoughtSignature,
|
||||
} from "./google-shared.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||
|
||||
let _GoogleVertexClass: typeof GoogleGenAI | undefined;
|
||||
async function getGoogleVertexClass(): Promise<typeof GoogleGenAI> {
|
||||
|
|
@ -308,7 +308,28 @@ export const streamSimpleGoogleVertex: StreamFunction<"google-vertex", SimpleStr
|
|||
} satisfies GoogleVertexOptions);
|
||||
}
|
||||
|
||||
const effort = clampReasoning(options.reasoning)!;
|
||||
if (isAutoReasoning(options.reasoning)) {
|
||||
const geminiModel = model as unknown as Model<"google-generative-ai">;
|
||||
if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
|
||||
return streamGoogleVertex(model, context, {
|
||||
...base,
|
||||
thinking: {
|
||||
enabled: true,
|
||||
level: "THINKING_LEVEL_UNSPECIFIED",
|
||||
},
|
||||
} satisfies GoogleVertexOptions);
|
||||
}
|
||||
|
||||
return streamGoogleVertex(model, context, {
|
||||
...base,
|
||||
thinking: {
|
||||
enabled: true,
|
||||
budgetTokens: -1,
|
||||
},
|
||||
} satisfies GoogleVertexOptions);
|
||||
}
|
||||
|
||||
const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!;
|
||||
const geminiModel = model as unknown as Model<"google-generative-ai">;
|
||||
|
||||
if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ import {
|
|||
mapToolChoice,
|
||||
retainThoughtSignature,
|
||||
} from "./google-shared.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||
|
||||
export interface GoogleOptions extends StreamOptions {
|
||||
toolChoice?: "auto" | "none" | "any";
|
||||
|
|
@ -297,7 +297,28 @@ export const streamSimpleGoogle: StreamFunction<"google-generative-ai", SimpleSt
|
|||
return streamGoogle(model, context, { ...base, thinking: { enabled: false } } satisfies GoogleOptions);
|
||||
}
|
||||
|
||||
const effort = clampReasoning(options.reasoning)!;
|
||||
if (isAutoReasoning(options.reasoning)) {
|
||||
const googleModel = model as Model<"google-generative-ai">;
|
||||
if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
|
||||
return streamGoogle(model, context, {
|
||||
...base,
|
||||
thinking: {
|
||||
enabled: true,
|
||||
level: "THINKING_LEVEL_UNSPECIFIED",
|
||||
},
|
||||
} satisfies GoogleOptions);
|
||||
}
|
||||
|
||||
return streamGoogle(model, context, {
|
||||
...base,
|
||||
thinking: {
|
||||
enabled: true,
|
||||
budgetTokens: -1,
|
||||
},
|
||||
} satisfies GoogleOptions);
|
||||
}
|
||||
|
||||
const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!;
|
||||
const googleModel = model as Model<"google-generative-ai">;
|
||||
|
||||
if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
|||
import { shortHash } from "../utils/hash.js";
|
||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||
import { transformMessagesWithReport } from "./transform-messages.js";
|
||||
|
||||
const MISTRAL_TOOL_CALL_ID_LENGTH = 9;
|
||||
|
|
@ -125,7 +125,7 @@ export const streamSimpleMistral: StreamFunction<"mistral-conversations", Simple
|
|||
}
|
||||
|
||||
const base = buildBaseOptions(model, options, apiKey);
|
||||
const reasoning = clampReasoning(options?.reasoning);
|
||||
const reasoning = clampReasoning(resolveReasoningLevel(model, options?.reasoning));
|
||||
|
||||
return streamMistral(model, context, {
|
||||
...base,
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ import type {
|
|||
} from "../types.js";
|
||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { convertResponsesMessages, convertResponsesTools, processResponsesStream } from "./openai-responses-shared.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||
|
||||
// ============================================================================
|
||||
// Configuration
|
||||
|
|
@ -273,7 +273,8 @@ export const streamSimpleOpenAICodexResponses: StreamFunction<"openai-codex-resp
|
|||
}
|
||||
|
||||
const base = buildBaseOptions(model, options, apiKey);
|
||||
const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
|
||||
const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning);
|
||||
const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning);
|
||||
|
||||
return streamOpenAICodexResponses(model, context, {
|
||||
...base,
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import type {
|
|||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||
import {
|
||||
assertStreamSuccess,
|
||||
buildInitialOutput,
|
||||
|
|
@ -302,7 +302,8 @@ export const streamSimpleOpenAICompletions: StreamFunction<"openai-completions",
|
|||
}
|
||||
|
||||
const base = buildBaseOptions(model, options, apiKey);
|
||||
const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
|
||||
const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning);
|
||||
const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning);
|
||||
const toolChoice = (options as OpenAICompletionsOptions | undefined)?.toolChoice;
|
||||
|
||||
return streamOpenAICompletions(model, context, {
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import {
|
|||
finalizeStream,
|
||||
handleStreamError,
|
||||
} from "./openai-shared.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||
|
||||
const OPENAI_TOOL_CALL_PROVIDERS = new Set(["openai", "openai-codex", "opencode"]);
|
||||
|
||||
|
|
@ -56,7 +56,8 @@ function getPromptCacheRetention(baseUrl: string, cacheRetention: CacheRetention
|
|||
|
||||
// OpenAI Responses-specific options
|
||||
export interface OpenAIResponsesOptions extends StreamOptions {
|
||||
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||
/** "auto" means no effort constraint — model decides its own reasoning depth (GPT-5+). */
|
||||
reasoningEffort?: "auto" | "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||
reasoningSummary?: "auto" | "detailed" | "concise" | null;
|
||||
serviceTier?: ResponseCreateParamsStreaming["service_tier"];
|
||||
}
|
||||
|
|
@ -118,7 +119,11 @@ export const streamSimpleOpenAIResponses: StreamFunction<"openai-responses", Sim
|
|||
}
|
||||
|
||||
const base = buildBaseOptions(model, options, apiKey);
|
||||
const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
|
||||
const reasoningEffort: OpenAIResponsesOptions["reasoningEffort"] = isAutoReasoning(options?.reasoning)
|
||||
? "auto"
|
||||
: supportsXhigh(model)
|
||||
? resolveReasoningLevel(model, options?.reasoning)
|
||||
: clampReasoning(resolveReasoningLevel(model, options?.reasoning));
|
||||
|
||||
return streamOpenAIResponses(model, context, {
|
||||
...base,
|
||||
|
|
@ -157,7 +162,12 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
|
|||
|
||||
if (model.reasoning) {
|
||||
params.include = ["reasoning.encrypted_content"];
|
||||
if (options?.reasoningEffort || options?.reasoningSummary) {
|
||||
if (options?.reasoningEffort === "auto") {
|
||||
// Let the model decide its own reasoning depth — no effort constraint.
|
||||
// GPT-5+ will reason as much as it judges necessary, same as
|
||||
// THINKING_LEVEL_UNSPECIFIED for Gemini 2.5.
|
||||
params.reasoning = { summary: options?.reasoningSummary || "auto" };
|
||||
} else if (options?.reasoningEffort || options?.reasoningSummary) {
|
||||
const effort = clampReasoningForModel(model.name, options?.reasoningEffort || "medium") as typeof options.reasoningEffort;
|
||||
params.reasoning = {
|
||||
effort: effort || "medium",
|
||||
|
|
|
|||
45
packages/pi-ai/src/providers/simple-options.test.ts
Normal file
45
packages/pi-ai/src/providers/simple-options.test.ts
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { describe, it } from "node:test";
|
||||
import type { Model } from "../types.js";
|
||||
import { isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||
|
||||
function createModel(overrides: Partial<Model<any>> = {}): Model<any> {
|
||||
return {
|
||||
id: "test-model",
|
||||
name: "Test Model",
|
||||
provider: "openai",
|
||||
api: "openai-responses",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
contextWindow: 128_000,
|
||||
maxTokens: 16_384,
|
||||
input: ["text"],
|
||||
reasoning: true,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe("simple-options reasoning helpers", () => {
|
||||
it("recognizes auto reasoning requests", () => {
|
||||
assert.equal(isAutoReasoning("auto"), true);
|
||||
assert.equal(isAutoReasoning("medium"), false);
|
||||
assert.equal(isAutoReasoning(undefined), false);
|
||||
});
|
||||
|
||||
it("maps auto to medium for reasoning-capable models", () => {
|
||||
assert.equal(resolveReasoningLevel(createModel(), "auto"), "medium");
|
||||
});
|
||||
|
||||
it("maps auto to undefined for models without reasoning support", () => {
|
||||
assert.equal(resolveReasoningLevel(createModel({ reasoning: false }), "auto"), undefined);
|
||||
});
|
||||
|
||||
it("passes through explicit reasoning levels unchanged", () => {
|
||||
assert.equal(resolveReasoningLevel(createModel(), "xhigh"), "xhigh");
|
||||
});
|
||||
});
|
||||
|
|
@ -1,4 +1,12 @@
|
|||
import type { Api, Model, SimpleStreamOptions, StreamOptions, ThinkingBudgets, ThinkingLevel } from "../types.js";
|
||||
import type {
|
||||
Api,
|
||||
Model,
|
||||
RequestedThinkingLevel,
|
||||
SimpleStreamOptions,
|
||||
StreamOptions,
|
||||
ThinkingBudgets,
|
||||
ThinkingLevel,
|
||||
} from "../types.js";
|
||||
|
||||
export function buildBaseOptions(model: Model<Api>, options?: SimpleStreamOptions, apiKey?: string): StreamOptions {
|
||||
return {
|
||||
|
|
@ -19,6 +27,23 @@ export function clampReasoning(effort: ThinkingLevel | undefined): Exclude<Think
|
|||
return effort === "xhigh" ? "high" : effort;
|
||||
}
|
||||
|
||||
export function isAutoReasoning(
|
||||
effort: RequestedThinkingLevel | undefined,
|
||||
): effort is Extract<RequestedThinkingLevel, "auto"> {
|
||||
return effort === "auto";
|
||||
}
|
||||
|
||||
export function resolveReasoningLevel(
|
||||
model: Model<Api>,
|
||||
effort: RequestedThinkingLevel | undefined,
|
||||
): ThinkingLevel | undefined {
|
||||
if (!effort || effort === "auto") {
|
||||
if (!model.reasoning) return undefined;
|
||||
return "medium";
|
||||
}
|
||||
return effort;
|
||||
}
|
||||
|
||||
export function adjustMaxTokensForThinking(
|
||||
baseMaxTokens: number,
|
||||
modelMaxTokens: number,
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ export type KnownProvider =
|
|||
export type Provider = KnownProvider | string;
|
||||
|
||||
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||
export type RequestedThinkingLevel = "auto" | ThinkingLevel;
|
||||
|
||||
/** Token budgets for each thinking level (token-based providers only) */
|
||||
export interface ThinkingBudgets {
|
||||
|
|
@ -116,7 +117,7 @@ export type ProviderStreamOptions = StreamOptions & Record<string, unknown>;
|
|||
|
||||
// Unified options with reasoning passed to streamSimple() and completeSimple()
|
||||
export interface SimpleStreamOptions extends StreamOptions {
|
||||
reasoning?: ThinkingLevel;
|
||||
reasoning?: RequestedThinkingLevel;
|
||||
/** Custom token budgets for thinking levels (token-based providers only) */
|
||||
thinkingBudgets?: ThinkingBudgets;
|
||||
}
|
||||
|
|
@ -359,6 +360,12 @@ export interface ModelCapabilities {
|
|||
* If omitted, the provider-level default is used.
|
||||
*/
|
||||
charsPerToken?: number;
|
||||
/**
|
||||
* Whether this model's Anthropic-compatible thinking API accepts {"type":"enabled"}
|
||||
* without a budget_tokens field. When true, reasoning:"auto" sends no budget
|
||||
* and lets the model decide its own reasoning depth (e.g. Kimi via kimi-coding).
|
||||
*/
|
||||
thinkingNoBudget?: boolean;
|
||||
}
|
||||
|
||||
// Model interface for the unified model system
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue