diff --git a/packages/pi-ai/src/providers/amazon-bedrock.ts b/packages/pi-ai/src/providers/amazon-bedrock.ts index 473c90d15..dee0c363e 100644 --- a/packages/pi-ai/src/providers/amazon-bedrock.ts +++ b/packages/pi-ai/src/providers/amazon-bedrock.ts @@ -151,7 +151,7 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt messages: convertMessages(context, model, cacheRetention), system: buildSystemPrompt(context.systemPrompt, model, cacheRetention), inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature }, - toolConfig: convertToolConfig(context.tools, options.toolChoice), + toolConfig: convertToolConfig(context.tools, options.toolChoice, model, cacheRetention), additionalModelRequestFields: buildAdditionalModelRequestFields(model, options), }; const nextCommandInput = await options?.onPayload?.(commandInput, model); @@ -633,6 +633,8 @@ function convertMessages( function convertToolConfig( tools: Tool[] | undefined, toolChoice: BedrockOptions["toolChoice"], + model: Model<"bedrock-converse-stream">, + cacheRetention: CacheRetention, ): ToolConfiguration | undefined { if (!tools?.length || toolChoice === "none") return undefined; @@ -644,6 +646,16 @@ function convertToolConfig( }, })); + // Add cachePoint after last tool for supported models + if (cacheRetention !== "none" && supportsPromptCaching(model)) { + bedrockTools.push({ + cachePoint: { + type: CachePointType.DEFAULT, + ...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}), + }, + } as any); + } + let bedrockToolChoice: ToolChoice | undefined; switch (toolChoice) { case "auto": diff --git a/packages/pi-ai/src/providers/anthropic-shared.test.ts b/packages/pi-ai/src/providers/anthropic-shared.test.ts index 9b6718570..6e08bc52e 100644 --- a/packages/pi-ai/src/providers/anthropic-shared.test.ts +++ b/packages/pi-ai/src/providers/anthropic-shared.test.ts @@ -1,6 +1,60 @@ import { describe, it } from "node:test"; import assert from "node:assert/strict"; -import { mapStopReason } from "./anthropic-shared.js"; +import { convertTools, mapStopReason } from "./anthropic-shared.js"; + +const makeTool = (name: string) => + ({ + name, + description: `desc for ${name}`, + parameters: { + type: "object" as const, + properties: { arg: { type: "string" } }, + required: ["arg"], + }, + }) as any; + +describe("convertTools cache_control", () => { + it("adds cache_control to the last tool when cacheControl is provided", () => { + const tools = [makeTool("Read"), makeTool("Write"), makeTool("Edit")]; + const cacheControl = { type: "ephemeral" as const }; + const result = convertTools(tools, false, cacheControl); + + assert.equal(result.length, 3); + assert.equal((result[0] as any).cache_control, undefined); + assert.equal((result[1] as any).cache_control, undefined); + assert.deepEqual((result[2] as any).cache_control, { type: "ephemeral" }); + }); + + it("does not add cache_control when cacheControl is undefined", () => { + const tools = [makeTool("Read"), makeTool("Write")]; + const result = convertTools(tools, false); + + for (const tool of result) { + assert.equal((tool as any).cache_control, undefined); + } + }); + + it("handles empty tools array without error", () => { + const result = convertTools([], false, { type: "ephemeral" }); + assert.equal(result.length, 0); + }); + + it("passes through ttl when provided", () => { + const tools = [makeTool("Read")]; + const cacheControl = { type: "ephemeral" as const, ttl: "1h" as const }; + const result = convertTools(tools, false, cacheControl); + + assert.deepEqual((result[0] as any).cache_control, { type: "ephemeral", ttl: "1h" }); + }); + + it("single tool gets cache_control", () => { + const tools = [makeTool("Read")]; + const result = convertTools(tools, false, { type: "ephemeral" }); + + assert.equal(result.length, 1); + assert.deepEqual((result[0] as any).cache_control, { type: "ephemeral" }); + }); +}); describe("mapStopReason", () => { it("maps end_turn to stop", () => { diff --git a/packages/pi-ai/src/providers/anthropic-shared.ts b/packages/pi-ai/src/providers/anthropic-shared.ts index 4b9a57ea4..567609147 100644 --- a/packages/pi-ai/src/providers/anthropic-shared.ts +++ b/packages/pi-ai/src/providers/anthropic-shared.ts @@ -394,10 +394,14 @@ export function convertMessages( return params; } -export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] { +export function convertTools( + tools: Tool[], + isOAuthToken: boolean, + cacheControl?: { type: "ephemeral"; ttl?: "1h" }, +): Anthropic.Messages.Tool[] { if (!tools) return []; - return tools.map((tool) => { + const result = tools.map((tool) => { const jsonSchema = tool.parameters as any; return { @@ -410,6 +414,13 @@ export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Me }, }; }); + + // Add cache breakpoint to last tool — covers entire tool block + if (cacheControl && result.length > 0) { + (result[result.length - 1] as any).cache_control = cacheControl; + } + + return result; } export function buildParams( @@ -457,7 +468,7 @@ export function buildParams( } if (context.tools) { - params.tools = convertTools(context.tools, isOAuthToken); + params.tools = convertTools(context.tools, isOAuthToken, cacheControl); } if (options?.thinkingEnabled && model.reasoning) { diff --git a/packages/pi-ai/src/providers/openai-completions.ts b/packages/pi-ai/src/providers/openai-completions.ts index 137e0efaf..51213ad39 100644 --- a/packages/pi-ai/src/providers/openai-completions.ts +++ b/packages/pi-ai/src/providers/openai-completions.ts @@ -343,6 +343,7 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio if (context.tools) { params.tools = convertTools(context.tools, compat); + maybeAddOpenRouterAnthropicToolCacheControl(model, params.tools); } else if (hasToolHistory(context.messages)) { // Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results params.tools = []; @@ -379,6 +380,19 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio return params; } +function maybeAddOpenRouterAnthropicToolCacheControl( + model: Model<"openai-completions">, + tools: OpenAI.Chat.Completions.ChatCompletionTool[] | undefined, +): void { + if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return; + if (!tools?.length) return; + + const lastTool = tools[tools.length - 1]; + if ("function" in lastTool) { + Object.assign(lastTool.function, { cache_control: { type: "ephemeral" } }); + } +} + function mapReasoningEffort( effort: NonNullable, reasoningEffortMap: Partial, string>>,