pi-ai: add reasoning:auto across all providers + Kimi K2.6

RequestedThinkingLevel adds "auto" to the reasoning option. Each provider
handles it natively:

- Claude 4.x (anthropic/bedrock): adaptive thinking, no effort constraint
- Gemini 2.5 Pro/Flash (google/vertex/gemini-cli): THINKING_LEVEL_UNSPECIFIED
- GPT-5+ (openai-responses/azure): reasoning.effort omitted, model decides
- Kimi (kimi-coding): {"type":"enabled"} without budget_tokens via new
  capabilities.thinkingNoBudget flag — model manages reasoning depth
- GLM (zai, thinkingFormat:zai): enable_thinking:true already correct
- MiniMax (anthropic API): explicit budget_tokens required, resolves to medium

ModelCapabilities.thinkingNoBudget: new flag for Anthropic-compatible providers
that accept {"type":"enabled"} without a budget (Kimi API).

models.generated.ts: add Kimi K2.6 (id: kimi-for-coding, beta API); add
thinkingNoBudget capability to all kimi-coding models.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-04-19 21:22:25 +02:00
parent 38d3bd55da
commit f1da908dcd
15 changed files with 241 additions and 36 deletions

View file

@ -4616,6 +4616,24 @@ export const MODELS = {
} satisfies Model<"openai-completions">, } satisfies Model<"openai-completions">,
}, },
"kimi-coding": { "kimi-coding": {
"kimi-for-coding": {
id: "kimi-for-coding",
name: "Kimi K2.6",
api: "anthropic-messages",
provider: "kimi-coding",
baseUrl: "https://api.kimi.com/coding",
reasoning: true,
input: ["text", "image"],
capabilities: { thinkingNoBudget: true },
cost: {
input: 0.6,
output: 2.5,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 32768,
} satisfies Model<"anthropic-messages">,
"k2p5": { "k2p5": {
id: "k2p5", id: "k2p5",
name: "Kimi K2.5", name: "Kimi K2.5",
@ -4624,6 +4642,7 @@ export const MODELS = {
baseUrl: "https://api.kimi.com/coding", baseUrl: "https://api.kimi.com/coding",
reasoning: true, reasoning: true,
input: ["text", "image"], input: ["text", "image"],
capabilities: { thinkingNoBudget: true },
cost: { cost: {
input: 0, input: 0,
output: 0, output: 0,
@ -4641,6 +4660,7 @@ export const MODELS = {
baseUrl: "https://api.kimi.com/coding", baseUrl: "https://api.kimi.com/coding",
reasoning: true, reasoning: true,
input: ["text"], input: ["text"],
capabilities: { thinkingNoBudget: true },
cost: { cost: {
input: 0, input: 0,
output: 0, output: 0,

View file

@ -27,6 +27,7 @@ import type {
CacheRetention, CacheRetention,
Context, Context,
Model, Model,
RequestedThinkingLevel,
SimpleStreamOptions, SimpleStreamOptions,
StopReason, StopReason,
StreamFunction, StreamFunction,
@ -42,7 +43,7 @@ import type {
import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { AssistantMessageEventStream } from "../utils/event-stream.js";
import { parseStreamingJson } from "../utils/json-parse.js"; import { parseStreamingJson } from "../utils/json-parse.js";
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
import { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning } from "./simple-options.js"; import { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
import { transformMessagesWithReport } from "./transform-messages.js"; import { transformMessagesWithReport } from "./transform-messages.js";
export interface BedrockOptions extends StreamOptions { export interface BedrockOptions extends StreamOptions {
@ -50,7 +51,7 @@ export interface BedrockOptions extends StreamOptions {
profile?: string; profile?: string;
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string }; toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
/* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-reasoning.html for supported models. */ /* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-reasoning.html for supported models. */
reasoning?: ThinkingLevel; reasoning?: RequestedThinkingLevel;
/* Custom token budgets per thinking level. Overrides default budgets. */ /* Custom token budgets per thinking level. Overrides default budgets. */
thinkingBudgets?: ThinkingBudgets; thinkingBudgets?: ThinkingBudgets;
/* Only supported by Claude 4.x models, see https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-extended-thinking.html#claude-messages-extended-thinking-tool-use-interleaved */ /* Only supported by Claude 4.x models, see https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-extended-thinking.html#claude-messages-extended-thinking-tool-use-interleaved */
@ -226,8 +227,10 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp
return streamBedrock(model, context, { ...base, reasoning: undefined } satisfies BedrockOptions); return streamBedrock(model, context, { ...base, reasoning: undefined } satisfies BedrockOptions);
} }
const effectiveReasoning = resolveReasoningLevel(model, options.reasoning);
if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) { if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) {
if (supportsAdaptiveThinking(model.id)) { if (supportsAdaptiveThinking(model.id) && isAutoReasoning(options.reasoning)) {
return streamBedrock(model, context, { return streamBedrock(model, context, {
...base, ...base,
reasoning: options.reasoning, reasoning: options.reasoning,
@ -235,27 +238,35 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp
} satisfies BedrockOptions); } satisfies BedrockOptions);
} }
if (supportsAdaptiveThinking(model.id)) {
return streamBedrock(model, context, {
...base,
reasoning: effectiveReasoning,
thinkingBudgets: options.thinkingBudgets,
} satisfies BedrockOptions);
}
const adjusted = adjustMaxTokensForThinking( const adjusted = adjustMaxTokensForThinking(
base.maxTokens || 0, base.maxTokens || 0,
model.maxTokens, model.maxTokens,
options.reasoning, effectiveReasoning!,
options.thinkingBudgets, options.thinkingBudgets,
); );
return streamBedrock(model, context, { return streamBedrock(model, context, {
...base, ...base,
maxTokens: adjusted.maxTokens, maxTokens: adjusted.maxTokens,
reasoning: options.reasoning, reasoning: effectiveReasoning,
thinkingBudgets: { thinkingBudgets: {
...(options.thinkingBudgets || {}), ...(options.thinkingBudgets || {}),
[clampReasoning(options.reasoning)!]: adjusted.thinkingBudget, [clampReasoning(effectiveReasoning)!]: adjusted.thinkingBudget,
}, },
} satisfies BedrockOptions); } satisfies BedrockOptions);
} }
return streamBedrock(model, context, { return streamBedrock(model, context, {
...base, ...base,
reasoning: options.reasoning, reasoning: effectiveReasoning,
thinkingBudgets: options.thinkingBudgets, thinkingBudgets: options.thinkingBudgets,
} satisfies BedrockOptions); } satisfies BedrockOptions);
}; };
@ -407,6 +418,8 @@ export function mapThinkingLevelToEffort(
modelId: string, modelId: string,
): "low" | "medium" | "high" | "xhigh" | "max" { ): "low" | "medium" | "high" | "xhigh" | "max" {
switch (level) { switch (level) {
case "auto":
return "medium";
case "minimal": case "minimal":
case "low": case "low":
return "low"; return "low";
@ -709,8 +722,12 @@ export function buildAdditionalModelRequestFields(
if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) { if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) {
const result: Record<string, any> = supportsAdaptiveThinking(model.id) const result: Record<string, any> = supportsAdaptiveThinking(model.id)
? options.reasoning === "auto"
? { ? {
thinking: { type: "adaptive" }, thinking: { type: "adaptive" },
}
: {
thinking: { type: "adaptive" },
output_config: { effort: mapThinkingLevelToEffort(options.reasoning, model.id) }, output_config: { effort: mapThinkingLevelToEffort(options.reasoning, model.id) },
} }
: (() => { : (() => {
@ -723,8 +740,9 @@ export function buildAdditionalModelRequestFields(
}; };
// Custom budgets override defaults (xhigh not in ThinkingBudgets, use high) // Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
const level = options.reasoning === "xhigh" ? "high" : options.reasoning; const normalizedReasoning = options.reasoning === "auto" ? "medium" : options.reasoning;
const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[options.reasoning]; const level = normalizedReasoning === "xhigh" ? "high" : normalizedReasoning;
const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[normalizedReasoning];
return { return {
thinking: { thinking: {

View file

@ -163,6 +163,8 @@ export function supportsAdaptiveThinking(modelId: string): boolean {
export function mapThinkingLevelToEffort(level: string | undefined, modelId: string): AnthropicEffort { export function mapThinkingLevelToEffort(level: string | undefined, modelId: string): AnthropicEffort {
switch (level) { switch (level) {
case "auto":
return "medium";
case "minimal": case "minimal":
return "low"; return "low";
case "low": case "low":
@ -481,6 +483,10 @@ export function buildParams(
if (options.effort) { if (options.effort) {
params.output_config = { effort: options.effort }; params.output_config = { effort: options.effort };
} }
} else if (model.capabilities?.thinkingNoBudget) {
// Provider accepts {"type":"enabled"} without budget_tokens — model manages depth.
// The Anthropic SDK type requires budget_tokens but the kimi-coding API does not.
(params as any).thinking = { type: "enabled" };
} else { } else {
params.thinking = { params.thinking = {
type: "enabled", type: "enabled",

View file

@ -11,7 +11,7 @@ import type {
import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { AssistantMessageEventStream } from "../utils/event-stream.js";
import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js"; import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js"; import { adjustMaxTokensForThinking, buildBaseOptions, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
import { import {
type AnthropicEffort, type AnthropicEffort,
type AnthropicOptions, type AnthropicOptions,
@ -194,10 +194,19 @@ export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleS
return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions); return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);
} }
if (isAutoReasoning(options.reasoning) && (supportsAdaptiveThinking(model.id) || model.capabilities?.thinkingNoBudget)) {
return streamAnthropic(model, context, {
...base,
thinkingEnabled: true,
} satisfies AnthropicOptions);
}
const effectiveReasoning = resolveReasoningLevel(model, options.reasoning)!;
// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level // For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level
// For older models: use budget-based thinking // For older models: use budget-based thinking
if (supportsAdaptiveThinking(model.id)) { if (supportsAdaptiveThinking(model.id)) {
const effort = mapThinkingLevelToEffort(options.reasoning, model.id); const effort = mapThinkingLevelToEffort(effectiveReasoning, model.id);
return streamAnthropic(model, context, { return streamAnthropic(model, context, {
...base, ...base,
thinkingEnabled: true, thinkingEnabled: true,
@ -208,7 +217,7 @@ export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleS
const adjusted = adjustMaxTokensForThinking( const adjusted = adjustMaxTokensForThinking(
base.maxTokens || 0, base.maxTokens || 0,
model.maxTokens, model.maxTokens,
options.reasoning, effectiveReasoning,
options.thinkingBudgets, options.thinkingBudgets,
); );

View file

@ -20,7 +20,7 @@ import {
finalizeStream, finalizeStream,
handleStreamError, handleStreamError,
} from "./openai-shared.js"; } from "./openai-shared.js";
import { buildBaseOptions, clampReasoning } from "./simple-options.js"; import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
let _AzureOpenAIClass: typeof AzureOpenAI | undefined; let _AzureOpenAIClass: typeof AzureOpenAI | undefined;
async function getAzureOpenAIClass(): Promise<typeof AzureOpenAI> { async function getAzureOpenAIClass(): Promise<typeof AzureOpenAI> {
@ -118,7 +118,8 @@ export const streamSimpleAzureOpenAIResponses: StreamFunction<"azure-openai-resp
} }
const base = buildBaseOptions(model, options, apiKey); const base = buildBaseOptions(model, options, apiKey);
const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning); const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning);
const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning);
return streamAzureOpenAIResponses(model, context, { return streamAzureOpenAIResponses(model, context, {
...base, ...base,

View file

@ -35,7 +35,7 @@ import {
mapToolChoice, mapToolChoice,
retainThoughtSignature, retainThoughtSignature,
} from "./google-shared.js"; } from "./google-shared.js";
import { buildBaseOptions, clampReasoning } from "./simple-options.js"; import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
/** /**
* Thinking level for Gemini 3 models. * Thinking level for Gemini 3 models.
@ -387,7 +387,27 @@ export const streamSimpleGoogleGeminiCli: StreamFunction<"google-gemini-cli", Si
} satisfies GoogleGeminiCliOptions); } satisfies GoogleGeminiCliOptions);
} }
const effort = clampReasoning(options.reasoning)!; if (isAutoReasoning(options.reasoning)) {
if (isGemini3Model(model.id)) {
return streamGoogleGeminiCli(model, context, {
...base,
thinking: {
enabled: true,
level: "THINKING_LEVEL_UNSPECIFIED",
},
} satisfies GoogleGeminiCliOptions);
}
return streamGoogleGeminiCli(model, context, {
...base,
thinking: {
enabled: true,
budgetTokens: -1,
},
} satisfies GoogleGeminiCliOptions);
}
const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!;
if (isGemini3Model(model.id)) { if (isGemini3Model(model.id)) {
return streamGoogleGeminiCli(model, context, { return streamGoogleGeminiCli(model, context, {
...base, ...base,

View file

@ -32,7 +32,7 @@ import {
mapToolChoice, mapToolChoice,
retainThoughtSignature, retainThoughtSignature,
} from "./google-shared.js"; } from "./google-shared.js";
import { buildBaseOptions, clampReasoning } from "./simple-options.js"; import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
let _GoogleVertexClass: typeof GoogleGenAI | undefined; let _GoogleVertexClass: typeof GoogleGenAI | undefined;
async function getGoogleVertexClass(): Promise<typeof GoogleGenAI> { async function getGoogleVertexClass(): Promise<typeof GoogleGenAI> {
@ -308,7 +308,28 @@ export const streamSimpleGoogleVertex: StreamFunction<"google-vertex", SimpleStr
} satisfies GoogleVertexOptions); } satisfies GoogleVertexOptions);
} }
const effort = clampReasoning(options.reasoning)!; if (isAutoReasoning(options.reasoning)) {
const geminiModel = model as unknown as Model<"google-generative-ai">;
if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
return streamGoogleVertex(model, context, {
...base,
thinking: {
enabled: true,
level: "THINKING_LEVEL_UNSPECIFIED",
},
} satisfies GoogleVertexOptions);
}
return streamGoogleVertex(model, context, {
...base,
thinking: {
enabled: true,
budgetTokens: -1,
},
} satisfies GoogleVertexOptions);
}
const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!;
const geminiModel = model as unknown as Model<"google-generative-ai">; const geminiModel = model as unknown as Model<"google-generative-ai">;
if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) { if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {

View file

@ -42,7 +42,7 @@ import {
mapToolChoice, mapToolChoice,
retainThoughtSignature, retainThoughtSignature,
} from "./google-shared.js"; } from "./google-shared.js";
import { buildBaseOptions, clampReasoning } from "./simple-options.js"; import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
export interface GoogleOptions extends StreamOptions { export interface GoogleOptions extends StreamOptions {
toolChoice?: "auto" | "none" | "any"; toolChoice?: "auto" | "none" | "any";
@ -297,7 +297,28 @@ export const streamSimpleGoogle: StreamFunction<"google-generative-ai", SimpleSt
return streamGoogle(model, context, { ...base, thinking: { enabled: false } } satisfies GoogleOptions); return streamGoogle(model, context, { ...base, thinking: { enabled: false } } satisfies GoogleOptions);
} }
const effort = clampReasoning(options.reasoning)!; if (isAutoReasoning(options.reasoning)) {
const googleModel = model as Model<"google-generative-ai">;
if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
return streamGoogle(model, context, {
...base,
thinking: {
enabled: true,
level: "THINKING_LEVEL_UNSPECIFIED",
},
} satisfies GoogleOptions);
}
return streamGoogle(model, context, {
...base,
thinking: {
enabled: true,
budgetTokens: -1,
},
} satisfies GoogleOptions);
}
const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!;
const googleModel = model as Model<"google-generative-ai">; const googleModel = model as Model<"google-generative-ai">;
if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) { if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {

View file

@ -38,7 +38,7 @@ import { AssistantMessageEventStream } from "../utils/event-stream.js";
import { shortHash } from "../utils/hash.js"; import { shortHash } from "../utils/hash.js";
import { parseStreamingJson } from "../utils/json-parse.js"; import { parseStreamingJson } from "../utils/json-parse.js";
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
import { buildBaseOptions, clampReasoning } from "./simple-options.js"; import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
import { transformMessagesWithReport } from "./transform-messages.js"; import { transformMessagesWithReport } from "./transform-messages.js";
const MISTRAL_TOOL_CALL_ID_LENGTH = 9; const MISTRAL_TOOL_CALL_ID_LENGTH = 9;
@ -125,7 +125,7 @@ export const streamSimpleMistral: StreamFunction<"mistral-conversations", Simple
} }
const base = buildBaseOptions(model, options, apiKey); const base = buildBaseOptions(model, options, apiKey);
const reasoning = clampReasoning(options?.reasoning); const reasoning = clampReasoning(resolveReasoningLevel(model, options?.reasoning));
return streamMistral(model, context, { return streamMistral(model, context, {
...base, ...base,

View file

@ -28,7 +28,7 @@ import type {
} from "../types.js"; } from "../types.js";
import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { AssistantMessageEventStream } from "../utils/event-stream.js";
import { convertResponsesMessages, convertResponsesTools, processResponsesStream } from "./openai-responses-shared.js"; import { convertResponsesMessages, convertResponsesTools, processResponsesStream } from "./openai-responses-shared.js";
import { buildBaseOptions, clampReasoning } from "./simple-options.js"; import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
// ============================================================================ // ============================================================================
// Configuration // Configuration
@ -273,7 +273,8 @@ export const streamSimpleOpenAICodexResponses: StreamFunction<"openai-codex-resp
} }
const base = buildBaseOptions(model, options, apiKey); const base = buildBaseOptions(model, options, apiKey);
const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning); const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning);
const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning);
return streamOpenAICodexResponses(model, context, { return streamOpenAICodexResponses(model, context, {
...base, ...base,

View file

@ -31,7 +31,7 @@ import type {
import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { AssistantMessageEventStream } from "../utils/event-stream.js";
import { parseStreamingJson } from "../utils/json-parse.js"; import { parseStreamingJson } from "../utils/json-parse.js";
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
import { buildBaseOptions, clampReasoning } from "./simple-options.js"; import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
import { import {
assertStreamSuccess, assertStreamSuccess,
buildInitialOutput, buildInitialOutput,
@ -302,7 +302,8 @@ export const streamSimpleOpenAICompletions: StreamFunction<"openai-completions",
} }
const base = buildBaseOptions(model, options, apiKey); const base = buildBaseOptions(model, options, apiKey);
const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning); const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning);
const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning);
const toolChoice = (options as OpenAICompletionsOptions | undefined)?.toolChoice; const toolChoice = (options as OpenAICompletionsOptions | undefined)?.toolChoice;
return streamOpenAICompletions(model, context, { return streamOpenAICompletions(model, context, {

View file

@ -22,7 +22,7 @@ import {
finalizeStream, finalizeStream,
handleStreamError, handleStreamError,
} from "./openai-shared.js"; } from "./openai-shared.js";
import { buildBaseOptions, clampReasoning } from "./simple-options.js"; import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
const OPENAI_TOOL_CALL_PROVIDERS = new Set(["openai", "openai-codex", "opencode"]); const OPENAI_TOOL_CALL_PROVIDERS = new Set(["openai", "openai-codex", "opencode"]);
@ -56,7 +56,8 @@ function getPromptCacheRetention(baseUrl: string, cacheRetention: CacheRetention
// OpenAI Responses-specific options // OpenAI Responses-specific options
export interface OpenAIResponsesOptions extends StreamOptions { export interface OpenAIResponsesOptions extends StreamOptions {
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh"; /** "auto" means no effort constraint — model decides its own reasoning depth (GPT-5+). */
reasoningEffort?: "auto" | "minimal" | "low" | "medium" | "high" | "xhigh";
reasoningSummary?: "auto" | "detailed" | "concise" | null; reasoningSummary?: "auto" | "detailed" | "concise" | null;
serviceTier?: ResponseCreateParamsStreaming["service_tier"]; serviceTier?: ResponseCreateParamsStreaming["service_tier"];
} }
@ -118,7 +119,11 @@ export const streamSimpleOpenAIResponses: StreamFunction<"openai-responses", Sim
} }
const base = buildBaseOptions(model, options, apiKey); const base = buildBaseOptions(model, options, apiKey);
const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning); const reasoningEffort: OpenAIResponsesOptions["reasoningEffort"] = isAutoReasoning(options?.reasoning)
? "auto"
: supportsXhigh(model)
? resolveReasoningLevel(model, options?.reasoning)
: clampReasoning(resolveReasoningLevel(model, options?.reasoning));
return streamOpenAIResponses(model, context, { return streamOpenAIResponses(model, context, {
...base, ...base,
@ -157,7 +162,12 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
if (model.reasoning) { if (model.reasoning) {
params.include = ["reasoning.encrypted_content"]; params.include = ["reasoning.encrypted_content"];
if (options?.reasoningEffort || options?.reasoningSummary) { if (options?.reasoningEffort === "auto") {
// Let the model decide its own reasoning depth — no effort constraint.
// GPT-5+ will reason as much as it judges necessary, same as
// THINKING_LEVEL_UNSPECIFIED for Gemini 2.5.
params.reasoning = { summary: options?.reasoningSummary || "auto" };
} else if (options?.reasoningEffort || options?.reasoningSummary) {
const effort = clampReasoningForModel(model.name, options?.reasoningEffort || "medium") as typeof options.reasoningEffort; const effort = clampReasoningForModel(model.name, options?.reasoningEffort || "medium") as typeof options.reasoningEffort;
params.reasoning = { params.reasoning = {
effort: effort || "medium", effort: effort || "medium",

View file

@ -0,0 +1,45 @@
import assert from "node:assert/strict";
import { describe, it } from "node:test";
import type { Model } from "../types.js";
import { isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
function createModel(overrides: Partial<Model<any>> = {}): Model<any> {
return {
id: "test-model",
name: "Test Model",
provider: "openai",
api: "openai-responses",
baseUrl: "https://api.openai.com/v1",
contextWindow: 128_000,
maxTokens: 16_384,
input: ["text"],
reasoning: true,
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
...overrides,
};
}
describe("simple-options reasoning helpers", () => {
it("recognizes auto reasoning requests", () => {
assert.equal(isAutoReasoning("auto"), true);
assert.equal(isAutoReasoning("medium"), false);
assert.equal(isAutoReasoning(undefined), false);
});
it("maps auto to medium for reasoning-capable models", () => {
assert.equal(resolveReasoningLevel(createModel(), "auto"), "medium");
});
it("maps auto to undefined for models without reasoning support", () => {
assert.equal(resolveReasoningLevel(createModel({ reasoning: false }), "auto"), undefined);
});
it("passes through explicit reasoning levels unchanged", () => {
assert.equal(resolveReasoningLevel(createModel(), "xhigh"), "xhigh");
});
});

View file

@ -1,4 +1,12 @@
import type { Api, Model, SimpleStreamOptions, StreamOptions, ThinkingBudgets, ThinkingLevel } from "../types.js"; import type {
Api,
Model,
RequestedThinkingLevel,
SimpleStreamOptions,
StreamOptions,
ThinkingBudgets,
ThinkingLevel,
} from "../types.js";
export function buildBaseOptions(model: Model<Api>, options?: SimpleStreamOptions, apiKey?: string): StreamOptions { export function buildBaseOptions(model: Model<Api>, options?: SimpleStreamOptions, apiKey?: string): StreamOptions {
return { return {
@ -19,6 +27,23 @@ export function clampReasoning(effort: ThinkingLevel | undefined): Exclude<Think
return effort === "xhigh" ? "high" : effort; return effort === "xhigh" ? "high" : effort;
} }
export function isAutoReasoning(
effort: RequestedThinkingLevel | undefined,
): effort is Extract<RequestedThinkingLevel, "auto"> {
return effort === "auto";
}
export function resolveReasoningLevel(
model: Model<Api>,
effort: RequestedThinkingLevel | undefined,
): ThinkingLevel | undefined {
if (!effort || effort === "auto") {
if (!model.reasoning) return undefined;
return "medium";
}
return effort;
}
export function adjustMaxTokensForThinking( export function adjustMaxTokensForThinking(
baseMaxTokens: number, baseMaxTokens: number,
modelMaxTokens: number, modelMaxTokens: number,

View file

@ -50,6 +50,7 @@ export type KnownProvider =
export type Provider = KnownProvider | string; export type Provider = KnownProvider | string;
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh"; export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
export type RequestedThinkingLevel = "auto" | ThinkingLevel;
/** Token budgets for each thinking level (token-based providers only) */ /** Token budgets for each thinking level (token-based providers only) */
export interface ThinkingBudgets { export interface ThinkingBudgets {
@ -116,7 +117,7 @@ export type ProviderStreamOptions = StreamOptions & Record<string, unknown>;
// Unified options with reasoning passed to streamSimple() and completeSimple() // Unified options with reasoning passed to streamSimple() and completeSimple()
export interface SimpleStreamOptions extends StreamOptions { export interface SimpleStreamOptions extends StreamOptions {
reasoning?: ThinkingLevel; reasoning?: RequestedThinkingLevel;
/** Custom token budgets for thinking levels (token-based providers only) */ /** Custom token budgets for thinking levels (token-based providers only) */
thinkingBudgets?: ThinkingBudgets; thinkingBudgets?: ThinkingBudgets;
} }
@ -359,6 +360,12 @@ export interface ModelCapabilities {
* If omitted, the provider-level default is used. * If omitted, the provider-level default is used.
*/ */
charsPerToken?: number; charsPerToken?: number;
/**
* Whether this model's Anthropic-compatible thinking API accepts {"type":"enabled"}
* without a budget_tokens field. When true, reasoning:"auto" sends no budget
* and lets the model decide its own reasoning depth (e.g. Kimi via kimi-coding).
*/
thinkingNoBudget?: boolean;
} }
// Model interface for the unified model system // Model interface for the unified model system