From 49c05eaaa7f6c062b9d46d9ee526cb43d442e61b Mon Sep 17 00:00:00 2001 From: Jeremy Date: Sun, 29 Mar 2026 22:25:36 -0500 Subject: [PATCH] perf(pi-ai): add cache_control breakpoints to tool definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tool definitions are large and static across turns but had no cache breakpoint. Add a 3rd breakpoint (system → tools → last user message) for Anthropic direct, Bedrock, and OpenRouter Anthropic providers. Closes gsd-build/gsd-2#3176 --- packages/pi-ai/src/providers/amazon-bedrock.ts | 14 +++++++++++++- .../pi-ai/src/providers/anthropic-shared.ts | 17 ++++++++++++++--- .../pi-ai/src/providers/openai-completions.ts | 14 ++++++++++++++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/packages/pi-ai/src/providers/amazon-bedrock.ts b/packages/pi-ai/src/providers/amazon-bedrock.ts index 52b42b4d1..e8df99217 100644 --- a/packages/pi-ai/src/providers/amazon-bedrock.ts +++ b/packages/pi-ai/src/providers/amazon-bedrock.ts @@ -151,7 +151,7 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt messages: convertMessages(context, model, cacheRetention), system: buildSystemPrompt(context.systemPrompt, model, cacheRetention), inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature }, - toolConfig: convertToolConfig(context.tools, options.toolChoice), + toolConfig: convertToolConfig(context.tools, options.toolChoice, model, cacheRetention), additionalModelRequestFields: buildAdditionalModelRequestFields(model, options), }; const nextCommandInput = await options?.onPayload?.(commandInput, model); @@ -633,6 +633,8 @@ function convertMessages( function convertToolConfig( tools: Tool[] | undefined, toolChoice: BedrockOptions["toolChoice"], + model: Model<"bedrock-converse-stream">, + cacheRetention: CacheRetention, ): ToolConfiguration | undefined { if (!tools?.length || toolChoice === "none") return undefined; @@ -644,6 +646,16 @@ function convertToolConfig( }, })); + // Add cachePoint after last tool for supported models + if (cacheRetention !== "none" && supportsPromptCaching(model)) { + bedrockTools.push({ + cachePoint: { + type: CachePointType.DEFAULT, + ...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}), + }, + } as any); + } + let bedrockToolChoice: ToolChoice | undefined; switch (toolChoice) { case "auto": diff --git a/packages/pi-ai/src/providers/anthropic-shared.ts b/packages/pi-ai/src/providers/anthropic-shared.ts index 4425df7dd..693ec54e6 100644 --- a/packages/pi-ai/src/providers/anthropic-shared.ts +++ b/packages/pi-ai/src/providers/anthropic-shared.ts @@ -393,10 +393,14 @@ export function convertMessages( return params; } -export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] { +export function convertTools( + tools: Tool[], + isOAuthToken: boolean, + cacheControl?: { type: "ephemeral"; ttl?: "1h" }, +): Anthropic.Messages.Tool[] { if (!tools) return []; - return tools.map((tool) => { + const result = tools.map((tool) => { const jsonSchema = tool.parameters as any; return { @@ -409,6 +413,13 @@ export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Me }, }; }); + + // Add cache breakpoint to last tool — covers entire tool block + if (cacheControl && result.length > 0) { + (result[result.length - 1] as any).cache_control = cacheControl; + } + + return result; } export function buildParams( @@ -456,7 +467,7 @@ export function buildParams( } if (context.tools) { - params.tools = convertTools(context.tools, isOAuthToken); + params.tools = convertTools(context.tools, isOAuthToken, cacheControl); } if (options?.thinkingEnabled && model.reasoning) { diff --git a/packages/pi-ai/src/providers/openai-completions.ts b/packages/pi-ai/src/providers/openai-completions.ts index 4d6e1a3cf..261082aa2 100644 --- a/packages/pi-ai/src/providers/openai-completions.ts +++ b/packages/pi-ai/src/providers/openai-completions.ts @@ -343,6 +343,7 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio if (context.tools) { params.tools = convertTools(context.tools, compat); + maybeAddOpenRouterAnthropicToolCacheControl(model, params.tools); } else if (hasToolHistory(context.messages)) { // Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results params.tools = []; @@ -379,6 +380,19 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio return params; } +function maybeAddOpenRouterAnthropicToolCacheControl( + model: Model<"openai-completions">, + tools: OpenAI.Chat.Completions.ChatCompletionTool[] | undefined, +): void { + if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return; + if (!tools?.length) return; + + const lastTool = tools[tools.length - 1]; + if ("function" in lastTool) { + Object.assign(lastTool.function, { cache_control: { type: "ephemeral" } }); + } +} + function mapReasoningEffort( effort: NonNullable, reasoningEffortMap: Partial, string>>,