From f1da908dcdb7461a1c1e5705bd90c22860670509 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sun, 19 Apr 2026 21:22:25 +0200 Subject: [PATCH] pi-ai: add reasoning:auto across all providers + Kimi K2.6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RequestedThinkingLevel adds "auto" to the reasoning option. Each provider handles it natively: - Claude 4.x (anthropic/bedrock): adaptive thinking, no effort constraint - Gemini 2.5 Pro/Flash (google/vertex/gemini-cli): THINKING_LEVEL_UNSPECIFIED - GPT-5+ (openai-responses/azure): reasoning.effort omitted, model decides - Kimi (kimi-coding): {"type":"enabled"} without budget_tokens via new capabilities.thinkingNoBudget flag — model manages reasoning depth - GLM (zai, thinkingFormat:zai): enable_thinking:true already correct - MiniMax (anthropic API): explicit budget_tokens required, resolves to medium ModelCapabilities.thinkingNoBudget: new flag for Anthropic-compatible providers that accept {"type":"enabled"} without a budget (Kimi API). models.generated.ts: add Kimi K2.6 (id: kimi-for-coding, beta API); add thinkingNoBudget capability to all kimi-coding models. Co-Authored-By: Claude Sonnet 4.6 --- packages/pi-ai/src/models.generated.ts | 20 +++++++++ .../pi-ai/src/providers/amazon-bedrock.ts | 44 ++++++++++++------ .../pi-ai/src/providers/anthropic-shared.ts | 6 +++ packages/pi-ai/src/providers/anthropic.ts | 15 +++++-- .../src/providers/azure-openai-responses.ts | 5 ++- .../pi-ai/src/providers/google-gemini-cli.ts | 24 +++++++++- packages/pi-ai/src/providers/google-vertex.ts | 25 ++++++++++- packages/pi-ai/src/providers/google.ts | 25 ++++++++++- packages/pi-ai/src/providers/mistral.ts | 4 +- .../src/providers/openai-codex-responses.ts | 5 ++- .../pi-ai/src/providers/openai-completions.ts | 5 ++- .../pi-ai/src/providers/openai-responses.ts | 18 ++++++-- .../src/providers/simple-options.test.ts | 45 +++++++++++++++++++ .../pi-ai/src/providers/simple-options.ts | 27 ++++++++++- packages/pi-ai/src/types.ts | 9 +++- 15 files changed, 241 insertions(+), 36 deletions(-) create mode 100644 packages/pi-ai/src/providers/simple-options.test.ts diff --git a/packages/pi-ai/src/models.generated.ts b/packages/pi-ai/src/models.generated.ts index 793237587..a3a5c11a1 100644 --- a/packages/pi-ai/src/models.generated.ts +++ b/packages/pi-ai/src/models.generated.ts @@ -4616,6 +4616,24 @@ export const MODELS = { } satisfies Model<"openai-completions">, }, "kimi-coding": { + "kimi-for-coding": { + id: "kimi-for-coding", + name: "Kimi K2.6", + api: "anthropic-messages", + provider: "kimi-coding", + baseUrl: "https://api.kimi.com/coding", + reasoning: true, + input: ["text", "image"], + capabilities: { thinkingNoBudget: true }, + cost: { + input: 0.6, + output: 2.5, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 32768, + } satisfies Model<"anthropic-messages">, "k2p5": { id: "k2p5", name: "Kimi K2.5", @@ -4624,6 +4642,7 @@ export const MODELS = { baseUrl: "https://api.kimi.com/coding", reasoning: true, input: ["text", "image"], + capabilities: { thinkingNoBudget: true }, cost: { input: 0, output: 0, @@ -4641,6 +4660,7 @@ export const MODELS = { baseUrl: "https://api.kimi.com/coding", reasoning: true, input: ["text"], + capabilities: { thinkingNoBudget: true }, cost: { input: 0, output: 0, diff --git a/packages/pi-ai/src/providers/amazon-bedrock.ts b/packages/pi-ai/src/providers/amazon-bedrock.ts index 78f076e68..ba780ff44 100644 --- a/packages/pi-ai/src/providers/amazon-bedrock.ts +++ b/packages/pi-ai/src/providers/amazon-bedrock.ts @@ -27,6 +27,7 @@ import type { CacheRetention, Context, Model, + RequestedThinkingLevel, SimpleStreamOptions, StopReason, StreamFunction, @@ -42,7 +43,7 @@ import type { import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; -import { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning } from "./simple-options.js"; +import { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js"; import { transformMessagesWithReport } from "./transform-messages.js"; export interface BedrockOptions extends StreamOptions { @@ -50,7 +51,7 @@ export interface BedrockOptions extends StreamOptions { profile?: string; toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string }; /* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-reasoning.html for supported models. */ - reasoning?: ThinkingLevel; + reasoning?: RequestedThinkingLevel; /* Custom token budgets per thinking level. Overrides default budgets. */ thinkingBudgets?: ThinkingBudgets; /* Only supported by Claude 4.x models, see https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-extended-thinking.html#claude-messages-extended-thinking-tool-use-interleaved */ @@ -226,8 +227,10 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp return streamBedrock(model, context, { ...base, reasoning: undefined } satisfies BedrockOptions); } + const effectiveReasoning = resolveReasoningLevel(model, options.reasoning); + if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) { - if (supportsAdaptiveThinking(model.id)) { + if (supportsAdaptiveThinking(model.id) && isAutoReasoning(options.reasoning)) { return streamBedrock(model, context, { ...base, reasoning: options.reasoning, @@ -235,27 +238,35 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp } satisfies BedrockOptions); } + if (supportsAdaptiveThinking(model.id)) { + return streamBedrock(model, context, { + ...base, + reasoning: effectiveReasoning, + thinkingBudgets: options.thinkingBudgets, + } satisfies BedrockOptions); + } + const adjusted = adjustMaxTokensForThinking( base.maxTokens || 0, model.maxTokens, - options.reasoning, + effectiveReasoning!, options.thinkingBudgets, ); return streamBedrock(model, context, { ...base, maxTokens: adjusted.maxTokens, - reasoning: options.reasoning, + reasoning: effectiveReasoning, thinkingBudgets: { ...(options.thinkingBudgets || {}), - [clampReasoning(options.reasoning)!]: adjusted.thinkingBudget, + [clampReasoning(effectiveReasoning)!]: adjusted.thinkingBudget, }, } satisfies BedrockOptions); } return streamBedrock(model, context, { ...base, - reasoning: options.reasoning, + reasoning: effectiveReasoning, thinkingBudgets: options.thinkingBudgets, } satisfies BedrockOptions); }; @@ -407,6 +418,8 @@ export function mapThinkingLevelToEffort( modelId: string, ): "low" | "medium" | "high" | "xhigh" | "max" { switch (level) { + case "auto": + return "medium"; case "minimal": case "low": return "low"; @@ -709,10 +722,14 @@ export function buildAdditionalModelRequestFields( if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) { const result: Record = supportsAdaptiveThinking(model.id) - ? { - thinking: { type: "adaptive" }, - output_config: { effort: mapThinkingLevelToEffort(options.reasoning, model.id) }, - } + ? options.reasoning === "auto" + ? { + thinking: { type: "adaptive" }, + } + : { + thinking: { type: "adaptive" }, + output_config: { effort: mapThinkingLevelToEffort(options.reasoning, model.id) }, + } : (() => { const defaultBudgets: Record = { minimal: 1024, @@ -723,8 +740,9 @@ export function buildAdditionalModelRequestFields( }; // Custom budgets override defaults (xhigh not in ThinkingBudgets, use high) - const level = options.reasoning === "xhigh" ? "high" : options.reasoning; - const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[options.reasoning]; + const normalizedReasoning = options.reasoning === "auto" ? "medium" : options.reasoning; + const level = normalizedReasoning === "xhigh" ? "high" : normalizedReasoning; + const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[normalizedReasoning]; return { thinking: { diff --git a/packages/pi-ai/src/providers/anthropic-shared.ts b/packages/pi-ai/src/providers/anthropic-shared.ts index 187ebedf6..342c3fa11 100644 --- a/packages/pi-ai/src/providers/anthropic-shared.ts +++ b/packages/pi-ai/src/providers/anthropic-shared.ts @@ -163,6 +163,8 @@ export function supportsAdaptiveThinking(modelId: string): boolean { export function mapThinkingLevelToEffort(level: string | undefined, modelId: string): AnthropicEffort { switch (level) { + case "auto": + return "medium"; case "minimal": return "low"; case "low": @@ -481,6 +483,10 @@ export function buildParams( if (options.effort) { params.output_config = { effort: options.effort }; } + } else if (model.capabilities?.thinkingNoBudget) { + // Provider accepts {"type":"enabled"} without budget_tokens — model manages depth. + // The Anthropic SDK type requires budget_tokens but the kimi-coding API does not. + (params as any).thinking = { type: "enabled" }; } else { params.thinking = { type: "enabled", diff --git a/packages/pi-ai/src/providers/anthropic.ts b/packages/pi-ai/src/providers/anthropic.ts index 93fea4555..e644045b8 100644 --- a/packages/pi-ai/src/providers/anthropic.ts +++ b/packages/pi-ai/src/providers/anthropic.ts @@ -11,7 +11,7 @@ import type { import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js"; -import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js"; +import { adjustMaxTokensForThinking, buildBaseOptions, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js"; import { type AnthropicEffort, type AnthropicOptions, @@ -194,10 +194,19 @@ export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleS return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions); } + if (isAutoReasoning(options.reasoning) && (supportsAdaptiveThinking(model.id) || model.capabilities?.thinkingNoBudget)) { + return streamAnthropic(model, context, { + ...base, + thinkingEnabled: true, + } satisfies AnthropicOptions); + } + + const effectiveReasoning = resolveReasoningLevel(model, options.reasoning)!; + // For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level // For older models: use budget-based thinking if (supportsAdaptiveThinking(model.id)) { - const effort = mapThinkingLevelToEffort(options.reasoning, model.id); + const effort = mapThinkingLevelToEffort(effectiveReasoning, model.id); return streamAnthropic(model, context, { ...base, thinkingEnabled: true, @@ -208,7 +217,7 @@ export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleS const adjusted = adjustMaxTokensForThinking( base.maxTokens || 0, model.maxTokens, - options.reasoning, + effectiveReasoning, options.thinkingBudgets, ); diff --git a/packages/pi-ai/src/providers/azure-openai-responses.ts b/packages/pi-ai/src/providers/azure-openai-responses.ts index 42646f533..9dac639d4 100644 --- a/packages/pi-ai/src/providers/azure-openai-responses.ts +++ b/packages/pi-ai/src/providers/azure-openai-responses.ts @@ -20,7 +20,7 @@ import { finalizeStream, handleStreamError, } from "./openai-shared.js"; -import { buildBaseOptions, clampReasoning } from "./simple-options.js"; +import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js"; let _AzureOpenAIClass: typeof AzureOpenAI | undefined; async function getAzureOpenAIClass(): Promise { @@ -118,7 +118,8 @@ export const streamSimpleAzureOpenAIResponses: StreamFunction<"azure-openai-resp } const base = buildBaseOptions(model, options, apiKey); - const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning); + const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning); + const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning); return streamAzureOpenAIResponses(model, context, { ...base, diff --git a/packages/pi-ai/src/providers/google-gemini-cli.ts b/packages/pi-ai/src/providers/google-gemini-cli.ts index 719d7d283..33d67706b 100644 --- a/packages/pi-ai/src/providers/google-gemini-cli.ts +++ b/packages/pi-ai/src/providers/google-gemini-cli.ts @@ -35,7 +35,7 @@ import { mapToolChoice, retainThoughtSignature, } from "./google-shared.js"; -import { buildBaseOptions, clampReasoning } from "./simple-options.js"; +import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js"; /** * Thinking level for Gemini 3 models. @@ -387,7 +387,27 @@ export const streamSimpleGoogleGeminiCli: StreamFunction<"google-gemini-cli", Si } satisfies GoogleGeminiCliOptions); } - const effort = clampReasoning(options.reasoning)!; + if (isAutoReasoning(options.reasoning)) { + if (isGemini3Model(model.id)) { + return streamGoogleGeminiCli(model, context, { + ...base, + thinking: { + enabled: true, + level: "THINKING_LEVEL_UNSPECIFIED", + }, + } satisfies GoogleGeminiCliOptions); + } + + return streamGoogleGeminiCli(model, context, { + ...base, + thinking: { + enabled: true, + budgetTokens: -1, + }, + } satisfies GoogleGeminiCliOptions); + } + + const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!; if (isGemini3Model(model.id)) { return streamGoogleGeminiCli(model, context, { ...base, diff --git a/packages/pi-ai/src/providers/google-vertex.ts b/packages/pi-ai/src/providers/google-vertex.ts index 89ea1d3c3..d8bcbcaa9 100644 --- a/packages/pi-ai/src/providers/google-vertex.ts +++ b/packages/pi-ai/src/providers/google-vertex.ts @@ -32,7 +32,7 @@ import { mapToolChoice, retainThoughtSignature, } from "./google-shared.js"; -import { buildBaseOptions, clampReasoning } from "./simple-options.js"; +import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js"; let _GoogleVertexClass: typeof GoogleGenAI | undefined; async function getGoogleVertexClass(): Promise { @@ -308,7 +308,28 @@ export const streamSimpleGoogleVertex: StreamFunction<"google-vertex", SimpleStr } satisfies GoogleVertexOptions); } - const effort = clampReasoning(options.reasoning)!; + if (isAutoReasoning(options.reasoning)) { + const geminiModel = model as unknown as Model<"google-generative-ai">; + if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) { + return streamGoogleVertex(model, context, { + ...base, + thinking: { + enabled: true, + level: "THINKING_LEVEL_UNSPECIFIED", + }, + } satisfies GoogleVertexOptions); + } + + return streamGoogleVertex(model, context, { + ...base, + thinking: { + enabled: true, + budgetTokens: -1, + }, + } satisfies GoogleVertexOptions); + } + + const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!; const geminiModel = model as unknown as Model<"google-generative-ai">; if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) { diff --git a/packages/pi-ai/src/providers/google.ts b/packages/pi-ai/src/providers/google.ts index d1f42ed05..240934901 100644 --- a/packages/pi-ai/src/providers/google.ts +++ b/packages/pi-ai/src/providers/google.ts @@ -42,7 +42,7 @@ import { mapToolChoice, retainThoughtSignature, } from "./google-shared.js"; -import { buildBaseOptions, clampReasoning } from "./simple-options.js"; +import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js"; export interface GoogleOptions extends StreamOptions { toolChoice?: "auto" | "none" | "any"; @@ -297,7 +297,28 @@ export const streamSimpleGoogle: StreamFunction<"google-generative-ai", SimpleSt return streamGoogle(model, context, { ...base, thinking: { enabled: false } } satisfies GoogleOptions); } - const effort = clampReasoning(options.reasoning)!; + if (isAutoReasoning(options.reasoning)) { + const googleModel = model as Model<"google-generative-ai">; + if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) { + return streamGoogle(model, context, { + ...base, + thinking: { + enabled: true, + level: "THINKING_LEVEL_UNSPECIFIED", + }, + } satisfies GoogleOptions); + } + + return streamGoogle(model, context, { + ...base, + thinking: { + enabled: true, + budgetTokens: -1, + }, + } satisfies GoogleOptions); + } + + const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!; const googleModel = model as Model<"google-generative-ai">; if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) { diff --git a/packages/pi-ai/src/providers/mistral.ts b/packages/pi-ai/src/providers/mistral.ts index 0a6a28e5c..f8e0502cf 100644 --- a/packages/pi-ai/src/providers/mistral.ts +++ b/packages/pi-ai/src/providers/mistral.ts @@ -38,7 +38,7 @@ import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { shortHash } from "../utils/hash.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; -import { buildBaseOptions, clampReasoning } from "./simple-options.js"; +import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js"; import { transformMessagesWithReport } from "./transform-messages.js"; const MISTRAL_TOOL_CALL_ID_LENGTH = 9; @@ -125,7 +125,7 @@ export const streamSimpleMistral: StreamFunction<"mistral-conversations", Simple } const base = buildBaseOptions(model, options, apiKey); - const reasoning = clampReasoning(options?.reasoning); + const reasoning = clampReasoning(resolveReasoningLevel(model, options?.reasoning)); return streamMistral(model, context, { ...base, diff --git a/packages/pi-ai/src/providers/openai-codex-responses.ts b/packages/pi-ai/src/providers/openai-codex-responses.ts index 294290188..e45a3eee6 100644 --- a/packages/pi-ai/src/providers/openai-codex-responses.ts +++ b/packages/pi-ai/src/providers/openai-codex-responses.ts @@ -28,7 +28,7 @@ import type { } from "../types.js"; import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { convertResponsesMessages, convertResponsesTools, processResponsesStream } from "./openai-responses-shared.js"; -import { buildBaseOptions, clampReasoning } from "./simple-options.js"; +import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js"; // ============================================================================ // Configuration @@ -273,7 +273,8 @@ export const streamSimpleOpenAICodexResponses: StreamFunction<"openai-codex-resp } const base = buildBaseOptions(model, options, apiKey); - const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning); + const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning); + const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning); return streamOpenAICodexResponses(model, context, { ...base, diff --git a/packages/pi-ai/src/providers/openai-completions.ts b/packages/pi-ai/src/providers/openai-completions.ts index 4b39ae4ee..a67f9219d 100644 --- a/packages/pi-ai/src/providers/openai-completions.ts +++ b/packages/pi-ai/src/providers/openai-completions.ts @@ -31,7 +31,7 @@ import type { import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; -import { buildBaseOptions, clampReasoning } from "./simple-options.js"; +import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js"; import { assertStreamSuccess, buildInitialOutput, @@ -302,7 +302,8 @@ export const streamSimpleOpenAICompletions: StreamFunction<"openai-completions", } const base = buildBaseOptions(model, options, apiKey); - const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning); + const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning); + const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning); const toolChoice = (options as OpenAICompletionsOptions | undefined)?.toolChoice; return streamOpenAICompletions(model, context, { diff --git a/packages/pi-ai/src/providers/openai-responses.ts b/packages/pi-ai/src/providers/openai-responses.ts index 484d3c3e3..d0e6222af 100644 --- a/packages/pi-ai/src/providers/openai-responses.ts +++ b/packages/pi-ai/src/providers/openai-responses.ts @@ -22,7 +22,7 @@ import { finalizeStream, handleStreamError, } from "./openai-shared.js"; -import { buildBaseOptions, clampReasoning } from "./simple-options.js"; +import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js"; const OPENAI_TOOL_CALL_PROVIDERS = new Set(["openai", "openai-codex", "opencode"]); @@ -56,7 +56,8 @@ function getPromptCacheRetention(baseUrl: string, cacheRetention: CacheRetention // OpenAI Responses-specific options export interface OpenAIResponsesOptions extends StreamOptions { - reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh"; + /** "auto" means no effort constraint — model decides its own reasoning depth (GPT-5+). */ + reasoningEffort?: "auto" | "minimal" | "low" | "medium" | "high" | "xhigh"; reasoningSummary?: "auto" | "detailed" | "concise" | null; serviceTier?: ResponseCreateParamsStreaming["service_tier"]; } @@ -118,7 +119,11 @@ export const streamSimpleOpenAIResponses: StreamFunction<"openai-responses", Sim } const base = buildBaseOptions(model, options, apiKey); - const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning); + const reasoningEffort: OpenAIResponsesOptions["reasoningEffort"] = isAutoReasoning(options?.reasoning) + ? "auto" + : supportsXhigh(model) + ? resolveReasoningLevel(model, options?.reasoning) + : clampReasoning(resolveReasoningLevel(model, options?.reasoning)); return streamOpenAIResponses(model, context, { ...base, @@ -157,7 +162,12 @@ function buildParams(model: Model<"openai-responses">, context: Context, options if (model.reasoning) { params.include = ["reasoning.encrypted_content"]; - if (options?.reasoningEffort || options?.reasoningSummary) { + if (options?.reasoningEffort === "auto") { + // Let the model decide its own reasoning depth — no effort constraint. + // GPT-5+ will reason as much as it judges necessary, same as + // THINKING_LEVEL_UNSPECIFIED for Gemini 2.5. + params.reasoning = { summary: options?.reasoningSummary || "auto" }; + } else if (options?.reasoningEffort || options?.reasoningSummary) { const effort = clampReasoningForModel(model.name, options?.reasoningEffort || "medium") as typeof options.reasoningEffort; params.reasoning = { effort: effort || "medium", diff --git a/packages/pi-ai/src/providers/simple-options.test.ts b/packages/pi-ai/src/providers/simple-options.test.ts new file mode 100644 index 000000000..93a15749a --- /dev/null +++ b/packages/pi-ai/src/providers/simple-options.test.ts @@ -0,0 +1,45 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import type { Model } from "../types.js"; +import { isAutoReasoning, resolveReasoningLevel } from "./simple-options.js"; + +function createModel(overrides: Partial> = {}): Model { + return { + id: "test-model", + name: "Test Model", + provider: "openai", + api: "openai-responses", + baseUrl: "https://api.openai.com/v1", + contextWindow: 128_000, + maxTokens: 16_384, + input: ["text"], + reasoning: true, + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + ...overrides, + }; +} + +describe("simple-options reasoning helpers", () => { + it("recognizes auto reasoning requests", () => { + assert.equal(isAutoReasoning("auto"), true); + assert.equal(isAutoReasoning("medium"), false); + assert.equal(isAutoReasoning(undefined), false); + }); + + it("maps auto to medium for reasoning-capable models", () => { + assert.equal(resolveReasoningLevel(createModel(), "auto"), "medium"); + }); + + it("maps auto to undefined for models without reasoning support", () => { + assert.equal(resolveReasoningLevel(createModel({ reasoning: false }), "auto"), undefined); + }); + + it("passes through explicit reasoning levels unchanged", () => { + assert.equal(resolveReasoningLevel(createModel(), "xhigh"), "xhigh"); + }); +}); diff --git a/packages/pi-ai/src/providers/simple-options.ts b/packages/pi-ai/src/providers/simple-options.ts index 71c15847b..28fdc7461 100644 --- a/packages/pi-ai/src/providers/simple-options.ts +++ b/packages/pi-ai/src/providers/simple-options.ts @@ -1,4 +1,12 @@ -import type { Api, Model, SimpleStreamOptions, StreamOptions, ThinkingBudgets, ThinkingLevel } from "../types.js"; +import type { + Api, + Model, + RequestedThinkingLevel, + SimpleStreamOptions, + StreamOptions, + ThinkingBudgets, + ThinkingLevel, +} from "../types.js"; export function buildBaseOptions(model: Model, options?: SimpleStreamOptions, apiKey?: string): StreamOptions { return { @@ -19,6 +27,23 @@ export function clampReasoning(effort: ThinkingLevel | undefined): Exclude { + return effort === "auto"; +} + +export function resolveReasoningLevel( + model: Model, + effort: RequestedThinkingLevel | undefined, +): ThinkingLevel | undefined { + if (!effort || effort === "auto") { + if (!model.reasoning) return undefined; + return "medium"; + } + return effort; +} + export function adjustMaxTokensForThinking( baseMaxTokens: number, modelMaxTokens: number, diff --git a/packages/pi-ai/src/types.ts b/packages/pi-ai/src/types.ts index 3f0999af2..9ef901033 100644 --- a/packages/pi-ai/src/types.ts +++ b/packages/pi-ai/src/types.ts @@ -50,6 +50,7 @@ export type KnownProvider = export type Provider = KnownProvider | string; export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh"; +export type RequestedThinkingLevel = "auto" | ThinkingLevel; /** Token budgets for each thinking level (token-based providers only) */ export interface ThinkingBudgets { @@ -116,7 +117,7 @@ export type ProviderStreamOptions = StreamOptions & Record; // Unified options with reasoning passed to streamSimple() and completeSimple() export interface SimpleStreamOptions extends StreamOptions { - reasoning?: ThinkingLevel; + reasoning?: RequestedThinkingLevel; /** Custom token budgets for thinking levels (token-based providers only) */ thinkingBudgets?: ThinkingBudgets; } @@ -359,6 +360,12 @@ export interface ModelCapabilities { * If omitted, the provider-level default is used. */ charsPerToken?: number; + /** + * Whether this model's Anthropic-compatible thinking API accepts {"type":"enabled"} + * without a budget_tokens field. When true, reasoning:"auto" sends no budget + * and lets the model decide its own reasoning depth (e.g. Kimi via kimi-coding). + */ + thinkingNoBudget?: boolean; } // Model interface for the unified model system