Merge pull request #3177 from jeremymcs/perf/tool-cache-control

perf(pi-ai): add cache_control breakpoints to tool definitions
This commit is contained in:
Jeremy McSpadden 2026-04-11 23:12:07 -05:00 committed by GitHub
commit 74a832563f
4 changed files with 96 additions and 5 deletions

View file

@ -151,7 +151,7 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt
messages: convertMessages(context, model, cacheRetention),
system: buildSystemPrompt(context.systemPrompt, model, cacheRetention),
inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature },
toolConfig: convertToolConfig(context.tools, options.toolChoice),
toolConfig: convertToolConfig(context.tools, options.toolChoice, model, cacheRetention),
additionalModelRequestFields: buildAdditionalModelRequestFields(model, options),
};
const nextCommandInput = await options?.onPayload?.(commandInput, model);
@ -633,6 +633,8 @@ function convertMessages(
function convertToolConfig(
tools: Tool[] | undefined,
toolChoice: BedrockOptions["toolChoice"],
model: Model<"bedrock-converse-stream">,
cacheRetention: CacheRetention,
): ToolConfiguration | undefined {
if (!tools?.length || toolChoice === "none") return undefined;
@ -644,6 +646,16 @@ function convertToolConfig(
},
}));
// Add cachePoint after last tool for supported models
if (cacheRetention !== "none" && supportsPromptCaching(model)) {
bedrockTools.push({
cachePoint: {
type: CachePointType.DEFAULT,
...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}),
},
} as any);
}
let bedrockToolChoice: ToolChoice | undefined;
switch (toolChoice) {
case "auto":

View file

@ -1,6 +1,60 @@
import { describe, it } from "node:test";
import assert from "node:assert/strict";
import { mapStopReason } from "./anthropic-shared.js";
import { convertTools, mapStopReason } from "./anthropic-shared.js";
const makeTool = (name: string) =>
({
name,
description: `desc for ${name}`,
parameters: {
type: "object" as const,
properties: { arg: { type: "string" } },
required: ["arg"],
},
}) as any;
describe("convertTools cache_control", () => {
it("adds cache_control to the last tool when cacheControl is provided", () => {
const tools = [makeTool("Read"), makeTool("Write"), makeTool("Edit")];
const cacheControl = { type: "ephemeral" as const };
const result = convertTools(tools, false, cacheControl);
assert.equal(result.length, 3);
assert.equal((result[0] as any).cache_control, undefined);
assert.equal((result[1] as any).cache_control, undefined);
assert.deepEqual((result[2] as any).cache_control, { type: "ephemeral" });
});
it("does not add cache_control when cacheControl is undefined", () => {
const tools = [makeTool("Read"), makeTool("Write")];
const result = convertTools(tools, false);
for (const tool of result) {
assert.equal((tool as any).cache_control, undefined);
}
});
it("handles empty tools array without error", () => {
const result = convertTools([], false, { type: "ephemeral" });
assert.equal(result.length, 0);
});
it("passes through ttl when provided", () => {
const tools = [makeTool("Read")];
const cacheControl = { type: "ephemeral" as const, ttl: "1h" as const };
const result = convertTools(tools, false, cacheControl);
assert.deepEqual((result[0] as any).cache_control, { type: "ephemeral", ttl: "1h" });
});
it("single tool gets cache_control", () => {
const tools = [makeTool("Read")];
const result = convertTools(tools, false, { type: "ephemeral" });
assert.equal(result.length, 1);
assert.deepEqual((result[0] as any).cache_control, { type: "ephemeral" });
});
});
describe("mapStopReason", () => {
it("maps end_turn to stop", () => {

View file

@ -394,10 +394,14 @@ export function convertMessages(
return params;
}
export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] {
export function convertTools(
tools: Tool[],
isOAuthToken: boolean,
cacheControl?: { type: "ephemeral"; ttl?: "1h" },
): Anthropic.Messages.Tool[] {
if (!tools) return [];
return tools.map((tool) => {
const result = tools.map((tool) => {
const jsonSchema = tool.parameters as any;
return {
@ -410,6 +414,13 @@ export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Me
},
};
});
// Add cache breakpoint to last tool — covers entire tool block
if (cacheControl && result.length > 0) {
(result[result.length - 1] as any).cache_control = cacheControl;
}
return result;
}
export function buildParams(
@ -457,7 +468,7 @@ export function buildParams(
}
if (context.tools) {
params.tools = convertTools(context.tools, isOAuthToken);
params.tools = convertTools(context.tools, isOAuthToken, cacheControl);
}
if (options?.thinkingEnabled && model.reasoning) {

View file

@ -343,6 +343,7 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
if (context.tools) {
params.tools = convertTools(context.tools, compat);
maybeAddOpenRouterAnthropicToolCacheControl(model, params.tools);
} else if (hasToolHistory(context.messages)) {
// Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
params.tools = [];
@ -379,6 +380,19 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
return params;
}
function maybeAddOpenRouterAnthropicToolCacheControl(
model: Model<"openai-completions">,
tools: OpenAI.Chat.Completions.ChatCompletionTool[] | undefined,
): void {
if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return;
if (!tools?.length) return;
const lastTool = tools[tools.length - 1];
if ("function" in lastTool) {
Object.assign(lastTool.function, { cache_control: { type: "ephemeral" } });
}
}
function mapReasoningEffort(
effort: NonNullable<OpenAICompletionsOptions["reasoningEffort"]>,
reasoningEffortMap: Partial<Record<NonNullable<OpenAICompletionsOptions["reasoningEffort"]>, string>>,