perf(pi-ai): add cache_control breakpoints to tool definitions

Tool definitions are large and static across turns but had no cache
breakpoint. Add a 3rd breakpoint (system → tools → last user message)
for Anthropic direct, Bedrock, and OpenRouter Anthropic providers.

Closes gsd-build/gsd-2#3176
This commit is contained in:
Jeremy 2026-03-29 22:25:36 -05:00
parent 0a2c9b64c6
commit 49c05eaaa7
3 changed files with 41 additions and 4 deletions

View file

@ -151,7 +151,7 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt
messages: convertMessages(context, model, cacheRetention),
system: buildSystemPrompt(context.systemPrompt, model, cacheRetention),
inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature },
toolConfig: convertToolConfig(context.tools, options.toolChoice),
toolConfig: convertToolConfig(context.tools, options.toolChoice, model, cacheRetention),
additionalModelRequestFields: buildAdditionalModelRequestFields(model, options),
};
const nextCommandInput = await options?.onPayload?.(commandInput, model);
@ -633,6 +633,8 @@ function convertMessages(
function convertToolConfig(
tools: Tool[] | undefined,
toolChoice: BedrockOptions["toolChoice"],
model: Model<"bedrock-converse-stream">,
cacheRetention: CacheRetention,
): ToolConfiguration | undefined {
if (!tools?.length || toolChoice === "none") return undefined;
@ -644,6 +646,16 @@ function convertToolConfig(
},
}));
// Add cachePoint after last tool for supported models
if (cacheRetention !== "none" && supportsPromptCaching(model)) {
bedrockTools.push({
cachePoint: {
type: CachePointType.DEFAULT,
...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}),
},
} as any);
}
let bedrockToolChoice: ToolChoice | undefined;
switch (toolChoice) {
case "auto":

View file

@ -393,10 +393,14 @@ export function convertMessages(
return params;
}
export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] {
export function convertTools(
tools: Tool[],
isOAuthToken: boolean,
cacheControl?: { type: "ephemeral"; ttl?: "1h" },
): Anthropic.Messages.Tool[] {
if (!tools) return [];
return tools.map((tool) => {
const result = tools.map((tool) => {
const jsonSchema = tool.parameters as any;
return {
@ -409,6 +413,13 @@ export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Me
},
};
});
// Add cache breakpoint to last tool — covers entire tool block
if (cacheControl && result.length > 0) {
(result[result.length - 1] as any).cache_control = cacheControl;
}
return result;
}
export function buildParams(
@ -456,7 +467,7 @@ export function buildParams(
}
if (context.tools) {
params.tools = convertTools(context.tools, isOAuthToken);
params.tools = convertTools(context.tools, isOAuthToken, cacheControl);
}
if (options?.thinkingEnabled && model.reasoning) {

View file

@ -343,6 +343,7 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
if (context.tools) {
params.tools = convertTools(context.tools, compat);
maybeAddOpenRouterAnthropicToolCacheControl(model, params.tools);
} else if (hasToolHistory(context.messages)) {
// Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
params.tools = [];
@ -379,6 +380,19 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
return params;
}
function maybeAddOpenRouterAnthropicToolCacheControl(
model: Model<"openai-completions">,
tools: OpenAI.Chat.Completions.ChatCompletionTool[] | undefined,
): void {
if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return;
if (!tools?.length) return;
const lastTool = tools[tools.length - 1];
if ("function" in lastTool) {
Object.assign(lastTool.function, { cache_control: { type: "ephemeral" } });
}
}
function mapReasoningEffort(
effort: NonNullable<OpenAICompletionsOptions["reasoningEffort"]>,
reasoningEffortMap: Partial<Record<NonNullable<OpenAICompletionsOptions["reasoningEffort"]>, string>>,