pi-ai: add reasoning:auto across all providers + Kimi K2.6

RequestedThinkingLevel adds "auto" to the reasoning option. Each provider handles it natively: - Claude 4.x (anthropic/bedrock): adaptive thinking, no effort constraint - Gemini 2.5 Pro/Flash (google/vertex/gemini-cli): THINKING_LEVEL_UNSPECIFIED - GPT-5+ (openai-responses/azure): reasoning.effort omitted, model decides - Kimi (kimi-coding): {"type":"enabled"} without budget_tokens via new capabilities.thinkingNoBudget flag — model manages reasoning depth - GLM (zai, thinkingFormat:zai): enable_thinking:true already correct - MiniMax (anthropic API): explicit budget_tokens required, resolves to medium ModelCapabilities.thinkingNoBudget: new flag for Anthropic-compatible providers that accept {"type":"enabled"} without a budget (Kimi API). models.generated.ts: add Kimi K2.6 (id: kimi-for-coding, beta API); add thinkingNoBudget capability to all kimi-coding models. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-19 21:22:25 +02:00 · 2026-04-19 21:22:25 +02:00 · f1da908dcd
commit f1da908dcd
parent 38d3bd55da
15 changed files with 241 additions and 36 deletions
--- a/packages/pi-ai/src/models.generated.ts
+++ b/packages/pi-ai/src/models.generated.ts
@ -4616,6 +4616,24 @@ export const MODELS = {
 		} satisfies Model<"openai-completions">,
 	},
 	"kimi-coding": {
 		"kimi-for-coding": {
 			id: "kimi-for-coding",
 			name: "Kimi K2.6",
 			api: "anthropic-messages",
 			provider: "kimi-coding",
 			baseUrl: "https://api.kimi.com/coding",
 			reasoning: true,
 			input: ["text", "image"],
 			capabilities: { thinkingNoBudget: true },
 			cost: {
 				input: 0.6,
 				output: 2.5,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
 			maxTokens: 32768,
 		} satisfies Model<"anthropic-messages">,
 		"k2p5": {
 			id: "k2p5",
 			name: "Kimi K2.5",
@ -4624,6 +4642,7 @@ export const MODELS = {
 			baseUrl: "https://api.kimi.com/coding",
 			reasoning: true,
 			input: ["text", "image"],
 			capabilities: { thinkingNoBudget: true },
 			cost: {
 				input: 0,
 				output: 0,
@ -4641,6 +4660,7 @@ export const MODELS = {
 			baseUrl: "https://api.kimi.com/coding",
 			reasoning: true,
 			input: ["text"],
 			capabilities: { thinkingNoBudget: true },
 			cost: {
 				input: 0,
 				output: 0,
--- a/packages/pi-ai/src/providers/amazon-bedrock.ts
+++ b/packages/pi-ai/src/providers/amazon-bedrock.ts
@ -27,6 +27,7 @@ import type {
 	CacheRetention,
 	Context,
 	Model,
 	RequestedThinkingLevel,
 	SimpleStreamOptions,
 	StopReason,
 	StreamFunction,
@ -42,7 +43,7 @@ import type {
 import { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { parseStreamingJson } from "../utils/json-parse.js";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
-import { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning } from "./simple-options.js";
+import { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
 import { transformMessagesWithReport } from "./transform-messages.js";
 export interface BedrockOptions extends StreamOptions {
@ -50,7 +51,7 @@ export interface BedrockOptions extends StreamOptions {
 	profile?: string;
 	toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
 	/* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-reasoning.html for supported models. */
-	reasoning?: ThinkingLevel;
+	reasoning?: RequestedThinkingLevel;
 	/* Custom token budgets per thinking level. Overrides default budgets. */
 	thinkingBudgets?: ThinkingBudgets;
 	/* Only supported by Claude 4.x models, see https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-extended-thinking.html#claude-messages-extended-thinking-tool-use-interleaved */
@ -226,8 +227,10 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp
 		return streamBedrock(model, context, { ...base, reasoning: undefined } satisfies BedrockOptions);
 	}
 	const effectiveReasoning = resolveReasoningLevel(model, options.reasoning);
 	if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) {
-		if (supportsAdaptiveThinking(model.id)) {
+		if (supportsAdaptiveThinking(model.id) && isAutoReasoning(options.reasoning)) {
 			return streamBedrock(model, context, {
 				...base,
 				reasoning: options.reasoning,
@ -235,27 +238,35 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp
 			} satisfies BedrockOptions);
 		}
 		if (supportsAdaptiveThinking(model.id)) {
 			return streamBedrock(model, context, {
 				...base,
 				reasoning: effectiveReasoning,
 				thinkingBudgets: options.thinkingBudgets,
 			} satisfies BedrockOptions);
 		}
 		const adjusted = adjustMaxTokensForThinking(
 			base.maxTokens || 0,
 			model.maxTokens,
-			options.reasoning,
+			effectiveReasoning!,
 			options.thinkingBudgets,
 		);
 		return streamBedrock(model, context, {
 			...base,
 			maxTokens: adjusted.maxTokens,
-			reasoning: options.reasoning,
+			reasoning: effectiveReasoning,
 			thinkingBudgets: {
 				...(options.thinkingBudgets || {}),
-				[clampReasoning(options.reasoning)!]: adjusted.thinkingBudget,
+				[clampReasoning(effectiveReasoning)!]: adjusted.thinkingBudget,
 			},
 		} satisfies BedrockOptions);
 	}
 	return streamBedrock(model, context, {
 		...base,
-		reasoning: options.reasoning,
+		reasoning: effectiveReasoning,
 		thinkingBudgets: options.thinkingBudgets,
 	} satisfies BedrockOptions);
 };
@ -407,6 +418,8 @@ export function mapThinkingLevelToEffort(
 	modelId: string,
 ): "low" | "medium" | "high" | "xhigh" | "max" {
 	switch (level) {
 		case "auto":
 			return "medium";
 		case "minimal":
 		case "low":
 			return "low";
@ -709,8 +722,12 @@ export function buildAdditionalModelRequestFields(
 	if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) {
 		const result: Record<string, any> = supportsAdaptiveThinking(model.id)
 			? options.reasoning === "auto"
 				? {
 						thinking: { type: "adaptive" },
 					}
 				: {
 						thinking: { type: "adaptive" },
 						output_config: { effort: mapThinkingLevelToEffort(options.reasoning, model.id) },
 					}
 			: (() => {
@ -723,8 +740,9 @@ export function buildAdditionalModelRequestFields(
 					};
 					// Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
-					const level = options.reasoning === "xhigh" ? "high" : options.reasoning;
+					const normalizedReasoning = options.reasoning === "auto" ? "medium" : options.reasoning;
-					const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[options.reasoning];
+					const level = normalizedReasoning === "xhigh" ? "high" : normalizedReasoning;
 					const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[normalizedReasoning];
 					return {
 						thinking: {
--- a/packages/pi-ai/src/providers/anthropic-shared.ts
+++ b/packages/pi-ai/src/providers/anthropic-shared.ts
@ -163,6 +163,8 @@ export function supportsAdaptiveThinking(modelId: string): boolean {
 export function mapThinkingLevelToEffort(level: string | undefined, modelId: string): AnthropicEffort {
 	switch (level) {
 		case "auto":
 			return "medium";
 		case "minimal":
 			return "low";
 		case "low":
@ -481,6 +483,10 @@ export function buildParams(
 			if (options.effort) {
 				params.output_config = { effort: options.effort };
 			}
 		} else if (model.capabilities?.thinkingNoBudget) {
 			// Provider accepts {"type":"enabled"} without budget_tokens — model manages depth.
 			// The Anthropic SDK type requires budget_tokens but the kimi-coding API does not.
 			(params as any).thinking = { type: "enabled" };
 		} else {
 			params.thinking = {
 				type: "enabled",
--- a/packages/pi-ai/src/providers/anthropic.ts
+++ b/packages/pi-ai/src/providers/anthropic.ts
@ -11,7 +11,7 @@ import type {
 import { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
-import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
+import { adjustMaxTokensForThinking, buildBaseOptions, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
 import {
 	type AnthropicEffort,
 	type AnthropicOptions,
@ -194,10 +194,19 @@ export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleS
 		return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);
 	}
 	if (isAutoReasoning(options.reasoning) && (supportsAdaptiveThinking(model.id) || model.capabilities?.thinkingNoBudget)) {
 		return streamAnthropic(model, context, {
 			...base,
 			thinkingEnabled: true,
 		} satisfies AnthropicOptions);
 	}
 	const effectiveReasoning = resolveReasoningLevel(model, options.reasoning)!;
 	// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level
 	// For older models: use budget-based thinking
 	if (supportsAdaptiveThinking(model.id)) {
-		const effort = mapThinkingLevelToEffort(options.reasoning, model.id);
+		const effort = mapThinkingLevelToEffort(effectiveReasoning, model.id);
 		return streamAnthropic(model, context, {
 			...base,
 			thinkingEnabled: true,
@ -208,7 +217,7 @@ export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleS
 	const adjusted = adjustMaxTokensForThinking(
 		base.maxTokens || 0,
 		model.maxTokens,
-		options.reasoning,
+		effectiveReasoning,
 		options.thinkingBudgets,
 	);
--- a/packages/pi-ai/src/providers/azure-openai-responses.ts
+++ b/packages/pi-ai/src/providers/azure-openai-responses.ts
@ -20,7 +20,7 @@ import {
 	finalizeStream,
 	handleStreamError,
 } from "./openai-shared.js";
-import { buildBaseOptions, clampReasoning } from "./simple-options.js";
+import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
 let _AzureOpenAIClass: typeof AzureOpenAI | undefined;
 async function getAzureOpenAIClass(): Promise<typeof AzureOpenAI> {
@ -118,7 +118,8 @@ export const streamSimpleAzureOpenAIResponses: StreamFunction<"azure-openai-resp
 	}
 	const base = buildBaseOptions(model, options, apiKey);
-	const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
+	const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning);
 	const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning);
 	return streamAzureOpenAIResponses(model, context, {
 		...base,
--- a/packages/pi-ai/src/providers/google-gemini-cli.ts
+++ b/packages/pi-ai/src/providers/google-gemini-cli.ts
@ -35,7 +35,7 @@ import {
 	mapToolChoice,
 	retainThoughtSignature,
 } from "./google-shared.js";
-import { buildBaseOptions, clampReasoning } from "./simple-options.js";
+import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
 /**
 * Thinking level for Gemini 3 models.
@ -387,7 +387,27 @@ export const streamSimpleGoogleGeminiCli: StreamFunction<"google-gemini-cli", Si
 		} satisfies GoogleGeminiCliOptions);
 	}
-	const effort = clampReasoning(options.reasoning)!;
+	if (isAutoReasoning(options.reasoning)) {
 		if (isGemini3Model(model.id)) {
 			return streamGoogleGeminiCli(model, context, {
 				...base,
 				thinking: {
 					enabled: true,
 					level: "THINKING_LEVEL_UNSPECIFIED",
 				},
 			} satisfies GoogleGeminiCliOptions);
 		}
 		return streamGoogleGeminiCli(model, context, {
 			...base,
 			thinking: {
 				enabled: true,
 				budgetTokens: -1,
 			},
 		} satisfies GoogleGeminiCliOptions);
 	}
 	const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!;
 	if (isGemini3Model(model.id)) {
 		return streamGoogleGeminiCli(model, context, {
 			...base,
--- a/packages/pi-ai/src/providers/google-vertex.ts
+++ b/packages/pi-ai/src/providers/google-vertex.ts
@ -32,7 +32,7 @@ import {
 	mapToolChoice,
 	retainThoughtSignature,
 } from "./google-shared.js";
-import { buildBaseOptions, clampReasoning } from "./simple-options.js";
+import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
 let _GoogleVertexClass: typeof GoogleGenAI | undefined;
 async function getGoogleVertexClass(): Promise<typeof GoogleGenAI> {
@ -308,7 +308,28 @@ export const streamSimpleGoogleVertex: StreamFunction<"google-vertex", SimpleStr
 		} satisfies GoogleVertexOptions);
 	}
-	const effort = clampReasoning(options.reasoning)!;
+	if (isAutoReasoning(options.reasoning)) {
 		const geminiModel = model as unknown as Model<"google-generative-ai">;
 		if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
 			return streamGoogleVertex(model, context, {
 				...base,
 				thinking: {
 					enabled: true,
 					level: "THINKING_LEVEL_UNSPECIFIED",
 				},
 			} satisfies GoogleVertexOptions);
 		}
 		return streamGoogleVertex(model, context, {
 			...base,
 			thinking: {
 				enabled: true,
 				budgetTokens: -1,
 			},
 		} satisfies GoogleVertexOptions);
 	}
 	const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!;
 	const geminiModel = model as unknown as Model<"google-generative-ai">;
 	if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
--- a/packages/pi-ai/src/providers/google.ts
+++ b/packages/pi-ai/src/providers/google.ts
@ -42,7 +42,7 @@ import {
 	mapToolChoice,
 	retainThoughtSignature,
 } from "./google-shared.js";
-import { buildBaseOptions, clampReasoning } from "./simple-options.js";
+import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
 export interface GoogleOptions extends StreamOptions {
 	toolChoice?: "auto" | "none" | "any";
@ -297,7 +297,28 @@ export const streamSimpleGoogle: StreamFunction<"google-generative-ai", SimpleSt
 		return streamGoogle(model, context, { ...base, thinking: { enabled: false } } satisfies GoogleOptions);
 	}
-	const effort = clampReasoning(options.reasoning)!;
+	if (isAutoReasoning(options.reasoning)) {
 		const googleModel = model as Model<"google-generative-ai">;
 		if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
 			return streamGoogle(model, context, {
 				...base,
 				thinking: {
 					enabled: true,
 					level: "THINKING_LEVEL_UNSPECIFIED",
 				},
 			} satisfies GoogleOptions);
 		}
 		return streamGoogle(model, context, {
 			...base,
 			thinking: {
 				enabled: true,
 				budgetTokens: -1,
 			},
 		} satisfies GoogleOptions);
 	}
 	const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!;
 	const googleModel = model as Model<"google-generative-ai">;
 	if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
--- a/packages/pi-ai/src/providers/mistral.ts
+++ b/packages/pi-ai/src/providers/mistral.ts
@ -38,7 +38,7 @@ import { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { shortHash } from "../utils/hash.js";
 import { parseStreamingJson } from "../utils/json-parse.js";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
-import { buildBaseOptions, clampReasoning } from "./simple-options.js";
+import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
 import { transformMessagesWithReport } from "./transform-messages.js";
 const MISTRAL_TOOL_CALL_ID_LENGTH = 9;
@ -125,7 +125,7 @@ export const streamSimpleMistral: StreamFunction<"mistral-conversations", Simple
 	}
 	const base = buildBaseOptions(model, options, apiKey);
-	const reasoning = clampReasoning(options?.reasoning);
+	const reasoning = clampReasoning(resolveReasoningLevel(model, options?.reasoning));
 	return streamMistral(model, context, {
 		...base,
--- a/packages/pi-ai/src/providers/openai-codex-responses.ts
+++ b/packages/pi-ai/src/providers/openai-codex-responses.ts
@ -28,7 +28,7 @@ import type {
 } from "../types.js";
 import { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { convertResponsesMessages, convertResponsesTools, processResponsesStream } from "./openai-responses-shared.js";
-import { buildBaseOptions, clampReasoning } from "./simple-options.js";
+import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
 // ============================================================================
 // Configuration
@ -273,7 +273,8 @@ export const streamSimpleOpenAICodexResponses: StreamFunction<"openai-codex-resp
 	}
 	const base = buildBaseOptions(model, options, apiKey);
-	const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
+	const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning);
 	const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning);
 	return streamOpenAICodexResponses(model, context, {
 		...base,
--- a/packages/pi-ai/src/providers/openai-completions.ts
+++ b/packages/pi-ai/src/providers/openai-completions.ts
@ -31,7 +31,7 @@ import type {
 import { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { parseStreamingJson } from "../utils/json-parse.js";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
-import { buildBaseOptions, clampReasoning } from "./simple-options.js";
+import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
 import {
 	assertStreamSuccess,
 	buildInitialOutput,
@ -302,7 +302,8 @@ export const streamSimpleOpenAICompletions: StreamFunction<"openai-completions",
 	}
 	const base = buildBaseOptions(model, options, apiKey);
-	const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
+	const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning);
 	const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning);
 	const toolChoice = (options as OpenAICompletionsOptions | undefined)?.toolChoice;
 	return streamOpenAICompletions(model, context, {
--- a/packages/pi-ai/src/providers/openai-responses.ts
+++ b/packages/pi-ai/src/providers/openai-responses.ts
@ -22,7 +22,7 @@ import {
 	finalizeStream,
 	handleStreamError,
 } from "./openai-shared.js";
-import { buildBaseOptions, clampReasoning } from "./simple-options.js";
+import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
 const OPENAI_TOOL_CALL_PROVIDERS = new Set(["openai", "openai-codex", "opencode"]);
@ -56,7 +56,8 @@ function getPromptCacheRetention(baseUrl: string, cacheRetention: CacheRetention
 // OpenAI Responses-specific options
 export interface OpenAIResponsesOptions extends StreamOptions {
-	reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
+	/** "auto" means no effort constraint — model decides its own reasoning depth (GPT-5+). */
 	reasoningEffort?: "auto" | "minimal" | "low" | "medium" | "high" | "xhigh";
 	reasoningSummary?: "auto" | "detailed" | "concise" | null;
 	serviceTier?: ResponseCreateParamsStreaming["service_tier"];
 }
@ -118,7 +119,11 @@ export const streamSimpleOpenAIResponses: StreamFunction<"openai-responses", Sim
 	}
 	const base = buildBaseOptions(model, options, apiKey);
-	const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
+	const reasoningEffort: OpenAIResponsesOptions["reasoningEffort"] = isAutoReasoning(options?.reasoning)
 		? "auto"
 		: supportsXhigh(model)
 			? resolveReasoningLevel(model, options?.reasoning)
 			: clampReasoning(resolveReasoningLevel(model, options?.reasoning));
 	return streamOpenAIResponses(model, context, {
 		...base,
@ -157,7 +162,12 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
 	if (model.reasoning) {
 		params.include = ["reasoning.encrypted_content"];
-		if (options?.reasoningEffort || options?.reasoningSummary) {
+		if (options?.reasoningEffort === "auto") {
 			// Let the model decide its own reasoning depth — no effort constraint.
 			// GPT-5+ will reason as much as it judges necessary, same as
 			// THINKING_LEVEL_UNSPECIFIED for Gemini 2.5.
 			params.reasoning = { summary: options?.reasoningSummary || "auto" };
 		} else if (options?.reasoningEffort || options?.reasoningSummary) {
 			const effort = clampReasoningForModel(model.name, options?.reasoningEffort || "medium") as typeof options.reasoningEffort;
 			params.reasoning = {
 				effort: effort || "medium",
--- a/packages/pi-ai/src/providers/simple-options.test.ts
+++ b/packages/pi-ai/src/providers/simple-options.test.ts
@ -0,0 +1,45 @@
 import assert from "node:assert/strict";
 import { describe, it } from "node:test";
 import type { Model } from "../types.js";
 import { isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
 function createModel(overrides: Partial<Model<any>> = {}): Model<any> {
 	return {
 		id: "test-model",
 		name: "Test Model",
 		provider: "openai",
 		api: "openai-responses",
 		baseUrl: "https://api.openai.com/v1",
 		contextWindow: 128_000,
 		maxTokens: 16_384,
 		input: ["text"],
 		reasoning: true,
 		cost: {
 			input: 0,
 			output: 0,
 			cacheRead: 0,
 			cacheWrite: 0,
 		},
 		...overrides,
 	};
 }
 describe("simple-options reasoning helpers", () => {
 	it("recognizes auto reasoning requests", () => {
 		assert.equal(isAutoReasoning("auto"), true);
 		assert.equal(isAutoReasoning("medium"), false);
 		assert.equal(isAutoReasoning(undefined), false);
 	});
 	it("maps auto to medium for reasoning-capable models", () => {
 		assert.equal(resolveReasoningLevel(createModel(), "auto"), "medium");
 	});
 	it("maps auto to undefined for models without reasoning support", () => {
 		assert.equal(resolveReasoningLevel(createModel({ reasoning: false }), "auto"), undefined);
 	});
 	it("passes through explicit reasoning levels unchanged", () => {
 		assert.equal(resolveReasoningLevel(createModel(), "xhigh"), "xhigh");
 	});
 });
--- a/packages/pi-ai/src/providers/simple-options.ts
+++ b/packages/pi-ai/src/providers/simple-options.ts
@ -1,4 +1,12 @@
-import type { Api, Model, SimpleStreamOptions, StreamOptions, ThinkingBudgets, ThinkingLevel } from "../types.js";
+import type {
 	Api,
 	Model,
 	RequestedThinkingLevel,
 	SimpleStreamOptions,
 	StreamOptions,
 	ThinkingBudgets,
 	ThinkingLevel,
 } from "../types.js";
 export function buildBaseOptions(model: Model<Api>, options?: SimpleStreamOptions, apiKey?: string): StreamOptions {
 	return {
@ -19,6 +27,23 @@ export function clampReasoning(effort: ThinkingLevel | undefined): Exclude<Think
 	return effort === "xhigh" ? "high" : effort;
 }
 export function isAutoReasoning(
 	effort: RequestedThinkingLevel | undefined,
 ): effort is Extract<RequestedThinkingLevel, "auto"> {
 	return effort === "auto";
 }
 export function resolveReasoningLevel(
 	model: Model<Api>,
 	effort: RequestedThinkingLevel | undefined,
 ): ThinkingLevel | undefined {
 	if (!effort || effort === "auto") {
 		if (!model.reasoning) return undefined;
 		return "medium";
 	}
 	return effort;
 }
 export function adjustMaxTokensForThinking(
 	baseMaxTokens: number,
 	modelMaxTokens: number,
--- a/packages/pi-ai/src/types.ts
+++ b/packages/pi-ai/src/types.ts
@ -50,6 +50,7 @@ export type KnownProvider =
 export type Provider = KnownProvider | string;
 export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
 export type RequestedThinkingLevel = "auto" | ThinkingLevel;
 /** Token budgets for each thinking level (token-based providers only) */
 export interface ThinkingBudgets {
@ -116,7 +117,7 @@ export type ProviderStreamOptions = StreamOptions & Record<string, unknown>;
 // Unified options with reasoning passed to streamSimple() and completeSimple()
 export interface SimpleStreamOptions extends StreamOptions {
-	reasoning?: ThinkingLevel;
+	reasoning?: RequestedThinkingLevel;
 	/** Custom token budgets for thinking levels (token-based providers only) */
 	thinkingBudgets?: ThinkingBudgets;
 }
@ -359,6 +360,12 @@ export interface ModelCapabilities {
 	 * If omitted, the provider-level default is used.
 	 */
 	charsPerToken?: number;
 	/**
 	 * Whether this model's Anthropic-compatible thinking API accepts {"type":"enabled"}
 	 * without a budget_tokens field. When true, reasoning:"auto" sends no budget
 	 * and lets the model decide its own reasoning depth (e.g. Kimi via kimi-coding).
 	 */
 	thinkingNoBudget?: boolean;
 }
 // Model interface for the unified model system