pi-ai: add reasoning:auto across all providers + Kimi K2.6
RequestedThinkingLevel adds "auto" to the reasoning option. Each provider
handles it natively:
- Claude 4.x (anthropic/bedrock): adaptive thinking, no effort constraint
- Gemini 2.5 Pro/Flash (google/vertex/gemini-cli): THINKING_LEVEL_UNSPECIFIED
- GPT-5+ (openai-responses/azure): reasoning.effort omitted, model decides
- Kimi (kimi-coding): {"type":"enabled"} without budget_tokens via new
capabilities.thinkingNoBudget flag — model manages reasoning depth
- GLM (zai, thinkingFormat:zai): enable_thinking:true already correct
- MiniMax (anthropic API): explicit budget_tokens required, resolves to medium
ModelCapabilities.thinkingNoBudget: new flag for Anthropic-compatible providers
that accept {"type":"enabled"} without a budget (Kimi API).
models.generated.ts: add Kimi K2.6 (id: kimi-for-coding, beta API); add
thinkingNoBudget capability to all kimi-coding models.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
38d3bd55da
commit
f1da908dcd
15 changed files with 241 additions and 36 deletions
|
|
@ -4616,6 +4616,24 @@ export const MODELS = {
|
||||||
} satisfies Model<"openai-completions">,
|
} satisfies Model<"openai-completions">,
|
||||||
},
|
},
|
||||||
"kimi-coding": {
|
"kimi-coding": {
|
||||||
|
"kimi-for-coding": {
|
||||||
|
id: "kimi-for-coding",
|
||||||
|
name: "Kimi K2.6",
|
||||||
|
api: "anthropic-messages",
|
||||||
|
provider: "kimi-coding",
|
||||||
|
baseUrl: "https://api.kimi.com/coding",
|
||||||
|
reasoning: true,
|
||||||
|
input: ["text", "image"],
|
||||||
|
capabilities: { thinkingNoBudget: true },
|
||||||
|
cost: {
|
||||||
|
input: 0.6,
|
||||||
|
output: 2.5,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
},
|
||||||
|
contextWindow: 262144,
|
||||||
|
maxTokens: 32768,
|
||||||
|
} satisfies Model<"anthropic-messages">,
|
||||||
"k2p5": {
|
"k2p5": {
|
||||||
id: "k2p5",
|
id: "k2p5",
|
||||||
name: "Kimi K2.5",
|
name: "Kimi K2.5",
|
||||||
|
|
@ -4624,6 +4642,7 @@ export const MODELS = {
|
||||||
baseUrl: "https://api.kimi.com/coding",
|
baseUrl: "https://api.kimi.com/coding",
|
||||||
reasoning: true,
|
reasoning: true,
|
||||||
input: ["text", "image"],
|
input: ["text", "image"],
|
||||||
|
capabilities: { thinkingNoBudget: true },
|
||||||
cost: {
|
cost: {
|
||||||
input: 0,
|
input: 0,
|
||||||
output: 0,
|
output: 0,
|
||||||
|
|
@ -4641,6 +4660,7 @@ export const MODELS = {
|
||||||
baseUrl: "https://api.kimi.com/coding",
|
baseUrl: "https://api.kimi.com/coding",
|
||||||
reasoning: true,
|
reasoning: true,
|
||||||
input: ["text"],
|
input: ["text"],
|
||||||
|
capabilities: { thinkingNoBudget: true },
|
||||||
cost: {
|
cost: {
|
||||||
input: 0,
|
input: 0,
|
||||||
output: 0,
|
output: 0,
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ import type {
|
||||||
CacheRetention,
|
CacheRetention,
|
||||||
Context,
|
Context,
|
||||||
Model,
|
Model,
|
||||||
|
RequestedThinkingLevel,
|
||||||
SimpleStreamOptions,
|
SimpleStreamOptions,
|
||||||
StopReason,
|
StopReason,
|
||||||
StreamFunction,
|
StreamFunction,
|
||||||
|
|
@ -42,7 +43,7 @@ import type {
|
||||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||||
import { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning } from "./simple-options.js";
|
import { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||||
import { transformMessagesWithReport } from "./transform-messages.js";
|
import { transformMessagesWithReport } from "./transform-messages.js";
|
||||||
|
|
||||||
export interface BedrockOptions extends StreamOptions {
|
export interface BedrockOptions extends StreamOptions {
|
||||||
|
|
@ -50,7 +51,7 @@ export interface BedrockOptions extends StreamOptions {
|
||||||
profile?: string;
|
profile?: string;
|
||||||
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
|
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
|
||||||
/* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-reasoning.html for supported models. */
|
/* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-reasoning.html for supported models. */
|
||||||
reasoning?: ThinkingLevel;
|
reasoning?: RequestedThinkingLevel;
|
||||||
/* Custom token budgets per thinking level. Overrides default budgets. */
|
/* Custom token budgets per thinking level. Overrides default budgets. */
|
||||||
thinkingBudgets?: ThinkingBudgets;
|
thinkingBudgets?: ThinkingBudgets;
|
||||||
/* Only supported by Claude 4.x models, see https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-extended-thinking.html#claude-messages-extended-thinking-tool-use-interleaved */
|
/* Only supported by Claude 4.x models, see https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-extended-thinking.html#claude-messages-extended-thinking-tool-use-interleaved */
|
||||||
|
|
@ -226,8 +227,10 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp
|
||||||
return streamBedrock(model, context, { ...base, reasoning: undefined } satisfies BedrockOptions);
|
return streamBedrock(model, context, { ...base, reasoning: undefined } satisfies BedrockOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const effectiveReasoning = resolveReasoningLevel(model, options.reasoning);
|
||||||
|
|
||||||
if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) {
|
if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) {
|
||||||
if (supportsAdaptiveThinking(model.id)) {
|
if (supportsAdaptiveThinking(model.id) && isAutoReasoning(options.reasoning)) {
|
||||||
return streamBedrock(model, context, {
|
return streamBedrock(model, context, {
|
||||||
...base,
|
...base,
|
||||||
reasoning: options.reasoning,
|
reasoning: options.reasoning,
|
||||||
|
|
@ -235,27 +238,35 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp
|
||||||
} satisfies BedrockOptions);
|
} satisfies BedrockOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (supportsAdaptiveThinking(model.id)) {
|
||||||
|
return streamBedrock(model, context, {
|
||||||
|
...base,
|
||||||
|
reasoning: effectiveReasoning,
|
||||||
|
thinkingBudgets: options.thinkingBudgets,
|
||||||
|
} satisfies BedrockOptions);
|
||||||
|
}
|
||||||
|
|
||||||
const adjusted = adjustMaxTokensForThinking(
|
const adjusted = adjustMaxTokensForThinking(
|
||||||
base.maxTokens || 0,
|
base.maxTokens || 0,
|
||||||
model.maxTokens,
|
model.maxTokens,
|
||||||
options.reasoning,
|
effectiveReasoning!,
|
||||||
options.thinkingBudgets,
|
options.thinkingBudgets,
|
||||||
);
|
);
|
||||||
|
|
||||||
return streamBedrock(model, context, {
|
return streamBedrock(model, context, {
|
||||||
...base,
|
...base,
|
||||||
maxTokens: adjusted.maxTokens,
|
maxTokens: adjusted.maxTokens,
|
||||||
reasoning: options.reasoning,
|
reasoning: effectiveReasoning,
|
||||||
thinkingBudgets: {
|
thinkingBudgets: {
|
||||||
...(options.thinkingBudgets || {}),
|
...(options.thinkingBudgets || {}),
|
||||||
[clampReasoning(options.reasoning)!]: adjusted.thinkingBudget,
|
[clampReasoning(effectiveReasoning)!]: adjusted.thinkingBudget,
|
||||||
},
|
},
|
||||||
} satisfies BedrockOptions);
|
} satisfies BedrockOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
return streamBedrock(model, context, {
|
return streamBedrock(model, context, {
|
||||||
...base,
|
...base,
|
||||||
reasoning: options.reasoning,
|
reasoning: effectiveReasoning,
|
||||||
thinkingBudgets: options.thinkingBudgets,
|
thinkingBudgets: options.thinkingBudgets,
|
||||||
} satisfies BedrockOptions);
|
} satisfies BedrockOptions);
|
||||||
};
|
};
|
||||||
|
|
@ -407,6 +418,8 @@ export function mapThinkingLevelToEffort(
|
||||||
modelId: string,
|
modelId: string,
|
||||||
): "low" | "medium" | "high" | "xhigh" | "max" {
|
): "low" | "medium" | "high" | "xhigh" | "max" {
|
||||||
switch (level) {
|
switch (level) {
|
||||||
|
case "auto":
|
||||||
|
return "medium";
|
||||||
case "minimal":
|
case "minimal":
|
||||||
case "low":
|
case "low":
|
||||||
return "low";
|
return "low";
|
||||||
|
|
@ -709,8 +722,12 @@ export function buildAdditionalModelRequestFields(
|
||||||
|
|
||||||
if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) {
|
if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) {
|
||||||
const result: Record<string, any> = supportsAdaptiveThinking(model.id)
|
const result: Record<string, any> = supportsAdaptiveThinking(model.id)
|
||||||
|
? options.reasoning === "auto"
|
||||||
? {
|
? {
|
||||||
thinking: { type: "adaptive" },
|
thinking: { type: "adaptive" },
|
||||||
|
}
|
||||||
|
: {
|
||||||
|
thinking: { type: "adaptive" },
|
||||||
output_config: { effort: mapThinkingLevelToEffort(options.reasoning, model.id) },
|
output_config: { effort: mapThinkingLevelToEffort(options.reasoning, model.id) },
|
||||||
}
|
}
|
||||||
: (() => {
|
: (() => {
|
||||||
|
|
@ -723,8 +740,9 @@ export function buildAdditionalModelRequestFields(
|
||||||
};
|
};
|
||||||
|
|
||||||
// Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
|
// Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
|
||||||
const level = options.reasoning === "xhigh" ? "high" : options.reasoning;
|
const normalizedReasoning = options.reasoning === "auto" ? "medium" : options.reasoning;
|
||||||
const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[options.reasoning];
|
const level = normalizedReasoning === "xhigh" ? "high" : normalizedReasoning;
|
||||||
|
const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[normalizedReasoning];
|
||||||
|
|
||||||
return {
|
return {
|
||||||
thinking: {
|
thinking: {
|
||||||
|
|
|
||||||
|
|
@ -163,6 +163,8 @@ export function supportsAdaptiveThinking(modelId: string): boolean {
|
||||||
|
|
||||||
export function mapThinkingLevelToEffort(level: string | undefined, modelId: string): AnthropicEffort {
|
export function mapThinkingLevelToEffort(level: string | undefined, modelId: string): AnthropicEffort {
|
||||||
switch (level) {
|
switch (level) {
|
||||||
|
case "auto":
|
||||||
|
return "medium";
|
||||||
case "minimal":
|
case "minimal":
|
||||||
return "low";
|
return "low";
|
||||||
case "low":
|
case "low":
|
||||||
|
|
@ -481,6 +483,10 @@ export function buildParams(
|
||||||
if (options.effort) {
|
if (options.effort) {
|
||||||
params.output_config = { effort: options.effort };
|
params.output_config = { effort: options.effort };
|
||||||
}
|
}
|
||||||
|
} else if (model.capabilities?.thinkingNoBudget) {
|
||||||
|
// Provider accepts {"type":"enabled"} without budget_tokens — model manages depth.
|
||||||
|
// The Anthropic SDK type requires budget_tokens but the kimi-coding API does not.
|
||||||
|
(params as any).thinking = { type: "enabled" };
|
||||||
} else {
|
} else {
|
||||||
params.thinking = {
|
params.thinking = {
|
||||||
type: "enabled",
|
type: "enabled",
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import type {
|
||||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||||
|
|
||||||
import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
|
import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
|
||||||
import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
|
import { adjustMaxTokensForThinking, buildBaseOptions, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||||
import {
|
import {
|
||||||
type AnthropicEffort,
|
type AnthropicEffort,
|
||||||
type AnthropicOptions,
|
type AnthropicOptions,
|
||||||
|
|
@ -194,10 +194,19 @@ export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleS
|
||||||
return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);
|
return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (isAutoReasoning(options.reasoning) && (supportsAdaptiveThinking(model.id) || model.capabilities?.thinkingNoBudget)) {
|
||||||
|
return streamAnthropic(model, context, {
|
||||||
|
...base,
|
||||||
|
thinkingEnabled: true,
|
||||||
|
} satisfies AnthropicOptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
const effectiveReasoning = resolveReasoningLevel(model, options.reasoning)!;
|
||||||
|
|
||||||
// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level
|
// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level
|
||||||
// For older models: use budget-based thinking
|
// For older models: use budget-based thinking
|
||||||
if (supportsAdaptiveThinking(model.id)) {
|
if (supportsAdaptiveThinking(model.id)) {
|
||||||
const effort = mapThinkingLevelToEffort(options.reasoning, model.id);
|
const effort = mapThinkingLevelToEffort(effectiveReasoning, model.id);
|
||||||
return streamAnthropic(model, context, {
|
return streamAnthropic(model, context, {
|
||||||
...base,
|
...base,
|
||||||
thinkingEnabled: true,
|
thinkingEnabled: true,
|
||||||
|
|
@ -208,7 +217,7 @@ export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleS
|
||||||
const adjusted = adjustMaxTokensForThinking(
|
const adjusted = adjustMaxTokensForThinking(
|
||||||
base.maxTokens || 0,
|
base.maxTokens || 0,
|
||||||
model.maxTokens,
|
model.maxTokens,
|
||||||
options.reasoning,
|
effectiveReasoning,
|
||||||
options.thinkingBudgets,
|
options.thinkingBudgets,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ import {
|
||||||
finalizeStream,
|
finalizeStream,
|
||||||
handleStreamError,
|
handleStreamError,
|
||||||
} from "./openai-shared.js";
|
} from "./openai-shared.js";
|
||||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||||
|
|
||||||
let _AzureOpenAIClass: typeof AzureOpenAI | undefined;
|
let _AzureOpenAIClass: typeof AzureOpenAI | undefined;
|
||||||
async function getAzureOpenAIClass(): Promise<typeof AzureOpenAI> {
|
async function getAzureOpenAIClass(): Promise<typeof AzureOpenAI> {
|
||||||
|
|
@ -118,7 +118,8 @@ export const streamSimpleAzureOpenAIResponses: StreamFunction<"azure-openai-resp
|
||||||
}
|
}
|
||||||
|
|
||||||
const base = buildBaseOptions(model, options, apiKey);
|
const base = buildBaseOptions(model, options, apiKey);
|
||||||
const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
|
const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning);
|
||||||
|
const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning);
|
||||||
|
|
||||||
return streamAzureOpenAIResponses(model, context, {
|
return streamAzureOpenAIResponses(model, context, {
|
||||||
...base,
|
...base,
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ import {
|
||||||
mapToolChoice,
|
mapToolChoice,
|
||||||
retainThoughtSignature,
|
retainThoughtSignature,
|
||||||
} from "./google-shared.js";
|
} from "./google-shared.js";
|
||||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Thinking level for Gemini 3 models.
|
* Thinking level for Gemini 3 models.
|
||||||
|
|
@ -387,7 +387,27 @@ export const streamSimpleGoogleGeminiCli: StreamFunction<"google-gemini-cli", Si
|
||||||
} satisfies GoogleGeminiCliOptions);
|
} satisfies GoogleGeminiCliOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
const effort = clampReasoning(options.reasoning)!;
|
if (isAutoReasoning(options.reasoning)) {
|
||||||
|
if (isGemini3Model(model.id)) {
|
||||||
|
return streamGoogleGeminiCli(model, context, {
|
||||||
|
...base,
|
||||||
|
thinking: {
|
||||||
|
enabled: true,
|
||||||
|
level: "THINKING_LEVEL_UNSPECIFIED",
|
||||||
|
},
|
||||||
|
} satisfies GoogleGeminiCliOptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
return streamGoogleGeminiCli(model, context, {
|
||||||
|
...base,
|
||||||
|
thinking: {
|
||||||
|
enabled: true,
|
||||||
|
budgetTokens: -1,
|
||||||
|
},
|
||||||
|
} satisfies GoogleGeminiCliOptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!;
|
||||||
if (isGemini3Model(model.id)) {
|
if (isGemini3Model(model.id)) {
|
||||||
return streamGoogleGeminiCli(model, context, {
|
return streamGoogleGeminiCli(model, context, {
|
||||||
...base,
|
...base,
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ import {
|
||||||
mapToolChoice,
|
mapToolChoice,
|
||||||
retainThoughtSignature,
|
retainThoughtSignature,
|
||||||
} from "./google-shared.js";
|
} from "./google-shared.js";
|
||||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||||
|
|
||||||
let _GoogleVertexClass: typeof GoogleGenAI | undefined;
|
let _GoogleVertexClass: typeof GoogleGenAI | undefined;
|
||||||
async function getGoogleVertexClass(): Promise<typeof GoogleGenAI> {
|
async function getGoogleVertexClass(): Promise<typeof GoogleGenAI> {
|
||||||
|
|
@ -308,7 +308,28 @@ export const streamSimpleGoogleVertex: StreamFunction<"google-vertex", SimpleStr
|
||||||
} satisfies GoogleVertexOptions);
|
} satisfies GoogleVertexOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
const effort = clampReasoning(options.reasoning)!;
|
if (isAutoReasoning(options.reasoning)) {
|
||||||
|
const geminiModel = model as unknown as Model<"google-generative-ai">;
|
||||||
|
if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
|
||||||
|
return streamGoogleVertex(model, context, {
|
||||||
|
...base,
|
||||||
|
thinking: {
|
||||||
|
enabled: true,
|
||||||
|
level: "THINKING_LEVEL_UNSPECIFIED",
|
||||||
|
},
|
||||||
|
} satisfies GoogleVertexOptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
return streamGoogleVertex(model, context, {
|
||||||
|
...base,
|
||||||
|
thinking: {
|
||||||
|
enabled: true,
|
||||||
|
budgetTokens: -1,
|
||||||
|
},
|
||||||
|
} satisfies GoogleVertexOptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!;
|
||||||
const geminiModel = model as unknown as Model<"google-generative-ai">;
|
const geminiModel = model as unknown as Model<"google-generative-ai">;
|
||||||
|
|
||||||
if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
|
if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,7 @@ import {
|
||||||
mapToolChoice,
|
mapToolChoice,
|
||||||
retainThoughtSignature,
|
retainThoughtSignature,
|
||||||
} from "./google-shared.js";
|
} from "./google-shared.js";
|
||||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||||
|
|
||||||
export interface GoogleOptions extends StreamOptions {
|
export interface GoogleOptions extends StreamOptions {
|
||||||
toolChoice?: "auto" | "none" | "any";
|
toolChoice?: "auto" | "none" | "any";
|
||||||
|
|
@ -297,7 +297,28 @@ export const streamSimpleGoogle: StreamFunction<"google-generative-ai", SimpleSt
|
||||||
return streamGoogle(model, context, { ...base, thinking: { enabled: false } } satisfies GoogleOptions);
|
return streamGoogle(model, context, { ...base, thinking: { enabled: false } } satisfies GoogleOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
const effort = clampReasoning(options.reasoning)!;
|
if (isAutoReasoning(options.reasoning)) {
|
||||||
|
const googleModel = model as Model<"google-generative-ai">;
|
||||||
|
if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
|
||||||
|
return streamGoogle(model, context, {
|
||||||
|
...base,
|
||||||
|
thinking: {
|
||||||
|
enabled: true,
|
||||||
|
level: "THINKING_LEVEL_UNSPECIFIED",
|
||||||
|
},
|
||||||
|
} satisfies GoogleOptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
return streamGoogle(model, context, {
|
||||||
|
...base,
|
||||||
|
thinking: {
|
||||||
|
enabled: true,
|
||||||
|
budgetTokens: -1,
|
||||||
|
},
|
||||||
|
} satisfies GoogleOptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
const effort = clampReasoning(resolveReasoningLevel(model, options.reasoning))!;
|
||||||
const googleModel = model as Model<"google-generative-ai">;
|
const googleModel = model as Model<"google-generative-ai">;
|
||||||
|
|
||||||
if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
|
if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||||
import { shortHash } from "../utils/hash.js";
|
import { shortHash } from "../utils/hash.js";
|
||||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||||
import { transformMessagesWithReport } from "./transform-messages.js";
|
import { transformMessagesWithReport } from "./transform-messages.js";
|
||||||
|
|
||||||
const MISTRAL_TOOL_CALL_ID_LENGTH = 9;
|
const MISTRAL_TOOL_CALL_ID_LENGTH = 9;
|
||||||
|
|
@ -125,7 +125,7 @@ export const streamSimpleMistral: StreamFunction<"mistral-conversations", Simple
|
||||||
}
|
}
|
||||||
|
|
||||||
const base = buildBaseOptions(model, options, apiKey);
|
const base = buildBaseOptions(model, options, apiKey);
|
||||||
const reasoning = clampReasoning(options?.reasoning);
|
const reasoning = clampReasoning(resolveReasoningLevel(model, options?.reasoning));
|
||||||
|
|
||||||
return streamMistral(model, context, {
|
return streamMistral(model, context, {
|
||||||
...base,
|
...base,
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ import type {
|
||||||
} from "../types.js";
|
} from "../types.js";
|
||||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||||
import { convertResponsesMessages, convertResponsesTools, processResponsesStream } from "./openai-responses-shared.js";
|
import { convertResponsesMessages, convertResponsesTools, processResponsesStream } from "./openai-responses-shared.js";
|
||||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Configuration
|
// Configuration
|
||||||
|
|
@ -273,7 +273,8 @@ export const streamSimpleOpenAICodexResponses: StreamFunction<"openai-codex-resp
|
||||||
}
|
}
|
||||||
|
|
||||||
const base = buildBaseOptions(model, options, apiKey);
|
const base = buildBaseOptions(model, options, apiKey);
|
||||||
const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
|
const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning);
|
||||||
|
const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning);
|
||||||
|
|
||||||
return streamOpenAICodexResponses(model, context, {
|
return streamOpenAICodexResponses(model, context, {
|
||||||
...base,
|
...base,
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ import type {
|
||||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
import { buildBaseOptions, clampReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||||
import {
|
import {
|
||||||
assertStreamSuccess,
|
assertStreamSuccess,
|
||||||
buildInitialOutput,
|
buildInitialOutput,
|
||||||
|
|
@ -302,7 +302,8 @@ export const streamSimpleOpenAICompletions: StreamFunction<"openai-completions",
|
||||||
}
|
}
|
||||||
|
|
||||||
const base = buildBaseOptions(model, options, apiKey);
|
const base = buildBaseOptions(model, options, apiKey);
|
||||||
const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
|
const effectiveReasoning = resolveReasoningLevel(model, options?.reasoning);
|
||||||
|
const reasoningEffort = supportsXhigh(model) ? effectiveReasoning : clampReasoning(effectiveReasoning);
|
||||||
const toolChoice = (options as OpenAICompletionsOptions | undefined)?.toolChoice;
|
const toolChoice = (options as OpenAICompletionsOptions | undefined)?.toolChoice;
|
||||||
|
|
||||||
return streamOpenAICompletions(model, context, {
|
return streamOpenAICompletions(model, context, {
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ import {
|
||||||
finalizeStream,
|
finalizeStream,
|
||||||
handleStreamError,
|
handleStreamError,
|
||||||
} from "./openai-shared.js";
|
} from "./openai-shared.js";
|
||||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
import { buildBaseOptions, clampReasoning, isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||||
|
|
||||||
const OPENAI_TOOL_CALL_PROVIDERS = new Set(["openai", "openai-codex", "opencode"]);
|
const OPENAI_TOOL_CALL_PROVIDERS = new Set(["openai", "openai-codex", "opencode"]);
|
||||||
|
|
||||||
|
|
@ -56,7 +56,8 @@ function getPromptCacheRetention(baseUrl: string, cacheRetention: CacheRetention
|
||||||
|
|
||||||
// OpenAI Responses-specific options
|
// OpenAI Responses-specific options
|
||||||
export interface OpenAIResponsesOptions extends StreamOptions {
|
export interface OpenAIResponsesOptions extends StreamOptions {
|
||||||
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
/** "auto" means no effort constraint — model decides its own reasoning depth (GPT-5+). */
|
||||||
|
reasoningEffort?: "auto" | "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||||
reasoningSummary?: "auto" | "detailed" | "concise" | null;
|
reasoningSummary?: "auto" | "detailed" | "concise" | null;
|
||||||
serviceTier?: ResponseCreateParamsStreaming["service_tier"];
|
serviceTier?: ResponseCreateParamsStreaming["service_tier"];
|
||||||
}
|
}
|
||||||
|
|
@ -118,7 +119,11 @@ export const streamSimpleOpenAIResponses: StreamFunction<"openai-responses", Sim
|
||||||
}
|
}
|
||||||
|
|
||||||
const base = buildBaseOptions(model, options, apiKey);
|
const base = buildBaseOptions(model, options, apiKey);
|
||||||
const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
|
const reasoningEffort: OpenAIResponsesOptions["reasoningEffort"] = isAutoReasoning(options?.reasoning)
|
||||||
|
? "auto"
|
||||||
|
: supportsXhigh(model)
|
||||||
|
? resolveReasoningLevel(model, options?.reasoning)
|
||||||
|
: clampReasoning(resolveReasoningLevel(model, options?.reasoning));
|
||||||
|
|
||||||
return streamOpenAIResponses(model, context, {
|
return streamOpenAIResponses(model, context, {
|
||||||
...base,
|
...base,
|
||||||
|
|
@ -157,7 +162,12 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
|
||||||
|
|
||||||
if (model.reasoning) {
|
if (model.reasoning) {
|
||||||
params.include = ["reasoning.encrypted_content"];
|
params.include = ["reasoning.encrypted_content"];
|
||||||
if (options?.reasoningEffort || options?.reasoningSummary) {
|
if (options?.reasoningEffort === "auto") {
|
||||||
|
// Let the model decide its own reasoning depth — no effort constraint.
|
||||||
|
// GPT-5+ will reason as much as it judges necessary, same as
|
||||||
|
// THINKING_LEVEL_UNSPECIFIED for Gemini 2.5.
|
||||||
|
params.reasoning = { summary: options?.reasoningSummary || "auto" };
|
||||||
|
} else if (options?.reasoningEffort || options?.reasoningSummary) {
|
||||||
const effort = clampReasoningForModel(model.name, options?.reasoningEffort || "medium") as typeof options.reasoningEffort;
|
const effort = clampReasoningForModel(model.name, options?.reasoningEffort || "medium") as typeof options.reasoningEffort;
|
||||||
params.reasoning = {
|
params.reasoning = {
|
||||||
effort: effort || "medium",
|
effort: effort || "medium",
|
||||||
|
|
|
||||||
45
packages/pi-ai/src/providers/simple-options.test.ts
Normal file
45
packages/pi-ai/src/providers/simple-options.test.ts
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
import { describe, it } from "node:test";
|
||||||
|
import type { Model } from "../types.js";
|
||||||
|
import { isAutoReasoning, resolveReasoningLevel } from "./simple-options.js";
|
||||||
|
|
||||||
|
function createModel(overrides: Partial<Model<any>> = {}): Model<any> {
|
||||||
|
return {
|
||||||
|
id: "test-model",
|
||||||
|
name: "Test Model",
|
||||||
|
provider: "openai",
|
||||||
|
api: "openai-responses",
|
||||||
|
baseUrl: "https://api.openai.com/v1",
|
||||||
|
contextWindow: 128_000,
|
||||||
|
maxTokens: 16_384,
|
||||||
|
input: ["text"],
|
||||||
|
reasoning: true,
|
||||||
|
cost: {
|
||||||
|
input: 0,
|
||||||
|
output: 0,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
},
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("simple-options reasoning helpers", () => {
|
||||||
|
it("recognizes auto reasoning requests", () => {
|
||||||
|
assert.equal(isAutoReasoning("auto"), true);
|
||||||
|
assert.equal(isAutoReasoning("medium"), false);
|
||||||
|
assert.equal(isAutoReasoning(undefined), false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("maps auto to medium for reasoning-capable models", () => {
|
||||||
|
assert.equal(resolveReasoningLevel(createModel(), "auto"), "medium");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("maps auto to undefined for models without reasoning support", () => {
|
||||||
|
assert.equal(resolveReasoningLevel(createModel({ reasoning: false }), "auto"), undefined);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("passes through explicit reasoning levels unchanged", () => {
|
||||||
|
assert.equal(resolveReasoningLevel(createModel(), "xhigh"), "xhigh");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
@ -1,4 +1,12 @@
|
||||||
import type { Api, Model, SimpleStreamOptions, StreamOptions, ThinkingBudgets, ThinkingLevel } from "../types.js";
|
import type {
|
||||||
|
Api,
|
||||||
|
Model,
|
||||||
|
RequestedThinkingLevel,
|
||||||
|
SimpleStreamOptions,
|
||||||
|
StreamOptions,
|
||||||
|
ThinkingBudgets,
|
||||||
|
ThinkingLevel,
|
||||||
|
} from "../types.js";
|
||||||
|
|
||||||
export function buildBaseOptions(model: Model<Api>, options?: SimpleStreamOptions, apiKey?: string): StreamOptions {
|
export function buildBaseOptions(model: Model<Api>, options?: SimpleStreamOptions, apiKey?: string): StreamOptions {
|
||||||
return {
|
return {
|
||||||
|
|
@ -19,6 +27,23 @@ export function clampReasoning(effort: ThinkingLevel | undefined): Exclude<Think
|
||||||
return effort === "xhigh" ? "high" : effort;
|
return effort === "xhigh" ? "high" : effort;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function isAutoReasoning(
|
||||||
|
effort: RequestedThinkingLevel | undefined,
|
||||||
|
): effort is Extract<RequestedThinkingLevel, "auto"> {
|
||||||
|
return effort === "auto";
|
||||||
|
}
|
||||||
|
|
||||||
|
export function resolveReasoningLevel(
|
||||||
|
model: Model<Api>,
|
||||||
|
effort: RequestedThinkingLevel | undefined,
|
||||||
|
): ThinkingLevel | undefined {
|
||||||
|
if (!effort || effort === "auto") {
|
||||||
|
if (!model.reasoning) return undefined;
|
||||||
|
return "medium";
|
||||||
|
}
|
||||||
|
return effort;
|
||||||
|
}
|
||||||
|
|
||||||
export function adjustMaxTokensForThinking(
|
export function adjustMaxTokensForThinking(
|
||||||
baseMaxTokens: number,
|
baseMaxTokens: number,
|
||||||
modelMaxTokens: number,
|
modelMaxTokens: number,
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,7 @@ export type KnownProvider =
|
||||||
export type Provider = KnownProvider | string;
|
export type Provider = KnownProvider | string;
|
||||||
|
|
||||||
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
|
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||||
|
export type RequestedThinkingLevel = "auto" | ThinkingLevel;
|
||||||
|
|
||||||
/** Token budgets for each thinking level (token-based providers only) */
|
/** Token budgets for each thinking level (token-based providers only) */
|
||||||
export interface ThinkingBudgets {
|
export interface ThinkingBudgets {
|
||||||
|
|
@ -116,7 +117,7 @@ export type ProviderStreamOptions = StreamOptions & Record<string, unknown>;
|
||||||
|
|
||||||
// Unified options with reasoning passed to streamSimple() and completeSimple()
|
// Unified options with reasoning passed to streamSimple() and completeSimple()
|
||||||
export interface SimpleStreamOptions extends StreamOptions {
|
export interface SimpleStreamOptions extends StreamOptions {
|
||||||
reasoning?: ThinkingLevel;
|
reasoning?: RequestedThinkingLevel;
|
||||||
/** Custom token budgets for thinking levels (token-based providers only) */
|
/** Custom token budgets for thinking levels (token-based providers only) */
|
||||||
thinkingBudgets?: ThinkingBudgets;
|
thinkingBudgets?: ThinkingBudgets;
|
||||||
}
|
}
|
||||||
|
|
@ -359,6 +360,12 @@ export interface ModelCapabilities {
|
||||||
* If omitted, the provider-level default is used.
|
* If omitted, the provider-level default is used.
|
||||||
*/
|
*/
|
||||||
charsPerToken?: number;
|
charsPerToken?: number;
|
||||||
|
/**
|
||||||
|
* Whether this model's Anthropic-compatible thinking API accepts {"type":"enabled"}
|
||||||
|
* without a budget_tokens field. When true, reasoning:"auto" sends no budget
|
||||||
|
* and lets the model decide its own reasoning depth (e.g. Kimi via kimi-coding).
|
||||||
|
*/
|
||||||
|
thinkingNoBudget?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Model interface for the unified model system
|
// Model interface for the unified model system
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue