Merge pull request #3177 from jeremymcs/perf/tool-cache-control
perf(pi-ai): add cache_control breakpoints to tool definitions
This commit is contained in:
commit
74a832563f
4 changed files with 96 additions and 5 deletions
|
|
@ -151,7 +151,7 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt
|
|||
messages: convertMessages(context, model, cacheRetention),
|
||||
system: buildSystemPrompt(context.systemPrompt, model, cacheRetention),
|
||||
inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature },
|
||||
toolConfig: convertToolConfig(context.tools, options.toolChoice),
|
||||
toolConfig: convertToolConfig(context.tools, options.toolChoice, model, cacheRetention),
|
||||
additionalModelRequestFields: buildAdditionalModelRequestFields(model, options),
|
||||
};
|
||||
const nextCommandInput = await options?.onPayload?.(commandInput, model);
|
||||
|
|
@ -633,6 +633,8 @@ function convertMessages(
|
|||
function convertToolConfig(
|
||||
tools: Tool[] | undefined,
|
||||
toolChoice: BedrockOptions["toolChoice"],
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
cacheRetention: CacheRetention,
|
||||
): ToolConfiguration | undefined {
|
||||
if (!tools?.length || toolChoice === "none") return undefined;
|
||||
|
||||
|
|
@ -644,6 +646,16 @@ function convertToolConfig(
|
|||
},
|
||||
}));
|
||||
|
||||
// Add cachePoint after last tool for supported models
|
||||
if (cacheRetention !== "none" && supportsPromptCaching(model)) {
|
||||
bedrockTools.push({
|
||||
cachePoint: {
|
||||
type: CachePointType.DEFAULT,
|
||||
...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}),
|
||||
},
|
||||
} as any);
|
||||
}
|
||||
|
||||
let bedrockToolChoice: ToolChoice | undefined;
|
||||
switch (toolChoice) {
|
||||
case "auto":
|
||||
|
|
|
|||
|
|
@ -1,6 +1,60 @@
|
|||
import { describe, it } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { mapStopReason } from "./anthropic-shared.js";
|
||||
import { convertTools, mapStopReason } from "./anthropic-shared.js";
|
||||
|
||||
const makeTool = (name: string) =>
|
||||
({
|
||||
name,
|
||||
description: `desc for ${name}`,
|
||||
parameters: {
|
||||
type: "object" as const,
|
||||
properties: { arg: { type: "string" } },
|
||||
required: ["arg"],
|
||||
},
|
||||
}) as any;
|
||||
|
||||
describe("convertTools cache_control", () => {
|
||||
it("adds cache_control to the last tool when cacheControl is provided", () => {
|
||||
const tools = [makeTool("Read"), makeTool("Write"), makeTool("Edit")];
|
||||
const cacheControl = { type: "ephemeral" as const };
|
||||
const result = convertTools(tools, false, cacheControl);
|
||||
|
||||
assert.equal(result.length, 3);
|
||||
assert.equal((result[0] as any).cache_control, undefined);
|
||||
assert.equal((result[1] as any).cache_control, undefined);
|
||||
assert.deepEqual((result[2] as any).cache_control, { type: "ephemeral" });
|
||||
});
|
||||
|
||||
it("does not add cache_control when cacheControl is undefined", () => {
|
||||
const tools = [makeTool("Read"), makeTool("Write")];
|
||||
const result = convertTools(tools, false);
|
||||
|
||||
for (const tool of result) {
|
||||
assert.equal((tool as any).cache_control, undefined);
|
||||
}
|
||||
});
|
||||
|
||||
it("handles empty tools array without error", () => {
|
||||
const result = convertTools([], false, { type: "ephemeral" });
|
||||
assert.equal(result.length, 0);
|
||||
});
|
||||
|
||||
it("passes through ttl when provided", () => {
|
||||
const tools = [makeTool("Read")];
|
||||
const cacheControl = { type: "ephemeral" as const, ttl: "1h" as const };
|
||||
const result = convertTools(tools, false, cacheControl);
|
||||
|
||||
assert.deepEqual((result[0] as any).cache_control, { type: "ephemeral", ttl: "1h" });
|
||||
});
|
||||
|
||||
it("single tool gets cache_control", () => {
|
||||
const tools = [makeTool("Read")];
|
||||
const result = convertTools(tools, false, { type: "ephemeral" });
|
||||
|
||||
assert.equal(result.length, 1);
|
||||
assert.deepEqual((result[0] as any).cache_control, { type: "ephemeral" });
|
||||
});
|
||||
});
|
||||
|
||||
describe("mapStopReason", () => {
|
||||
it("maps end_turn to stop", () => {
|
||||
|
|
|
|||
|
|
@ -394,10 +394,14 @@ export function convertMessages(
|
|||
return params;
|
||||
}
|
||||
|
||||
export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] {
|
||||
export function convertTools(
|
||||
tools: Tool[],
|
||||
isOAuthToken: boolean,
|
||||
cacheControl?: { type: "ephemeral"; ttl?: "1h" },
|
||||
): Anthropic.Messages.Tool[] {
|
||||
if (!tools) return [];
|
||||
|
||||
return tools.map((tool) => {
|
||||
const result = tools.map((tool) => {
|
||||
const jsonSchema = tool.parameters as any;
|
||||
|
||||
return {
|
||||
|
|
@ -410,6 +414,13 @@ export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Me
|
|||
},
|
||||
};
|
||||
});
|
||||
|
||||
// Add cache breakpoint to last tool — covers entire tool block
|
||||
if (cacheControl && result.length > 0) {
|
||||
(result[result.length - 1] as any).cache_control = cacheControl;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
export function buildParams(
|
||||
|
|
@ -457,7 +468,7 @@ export function buildParams(
|
|||
}
|
||||
|
||||
if (context.tools) {
|
||||
params.tools = convertTools(context.tools, isOAuthToken);
|
||||
params.tools = convertTools(context.tools, isOAuthToken, cacheControl);
|
||||
}
|
||||
|
||||
if (options?.thinkingEnabled && model.reasoning) {
|
||||
|
|
|
|||
|
|
@ -343,6 +343,7 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
|
|||
|
||||
if (context.tools) {
|
||||
params.tools = convertTools(context.tools, compat);
|
||||
maybeAddOpenRouterAnthropicToolCacheControl(model, params.tools);
|
||||
} else if (hasToolHistory(context.messages)) {
|
||||
// Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
|
||||
params.tools = [];
|
||||
|
|
@ -379,6 +380,19 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
|
|||
return params;
|
||||
}
|
||||
|
||||
function maybeAddOpenRouterAnthropicToolCacheControl(
|
||||
model: Model<"openai-completions">,
|
||||
tools: OpenAI.Chat.Completions.ChatCompletionTool[] | undefined,
|
||||
): void {
|
||||
if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return;
|
||||
if (!tools?.length) return;
|
||||
|
||||
const lastTool = tools[tools.length - 1];
|
||||
if ("function" in lastTool) {
|
||||
Object.assign(lastTool.function, { cache_control: { type: "ephemeral" } });
|
||||
}
|
||||
}
|
||||
|
||||
function mapReasoningEffort(
|
||||
effort: NonNullable<OpenAICompletionsOptions["reasoningEffort"]>,
|
||||
reasoningEffortMap: Partial<Record<NonNullable<OpenAICompletionsOptions["reasoningEffort"]>, string>>,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue