perf(pi-ai): add cache_control breakpoints to tool definitions
Tool definitions are large and static across turns but had no cache breakpoint. Add a 3rd breakpoint (system → tools → last user message) for Anthropic direct, Bedrock, and OpenRouter Anthropic providers. Closes gsd-build/gsd-2#3176
This commit is contained in:
parent
0a2c9b64c6
commit
49c05eaaa7
3 changed files with 41 additions and 4 deletions
|
|
@ -151,7 +151,7 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt
|
|||
messages: convertMessages(context, model, cacheRetention),
|
||||
system: buildSystemPrompt(context.systemPrompt, model, cacheRetention),
|
||||
inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature },
|
||||
toolConfig: convertToolConfig(context.tools, options.toolChoice),
|
||||
toolConfig: convertToolConfig(context.tools, options.toolChoice, model, cacheRetention),
|
||||
additionalModelRequestFields: buildAdditionalModelRequestFields(model, options),
|
||||
};
|
||||
const nextCommandInput = await options?.onPayload?.(commandInput, model);
|
||||
|
|
@ -633,6 +633,8 @@ function convertMessages(
|
|||
function convertToolConfig(
|
||||
tools: Tool[] | undefined,
|
||||
toolChoice: BedrockOptions["toolChoice"],
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
cacheRetention: CacheRetention,
|
||||
): ToolConfiguration | undefined {
|
||||
if (!tools?.length || toolChoice === "none") return undefined;
|
||||
|
||||
|
|
@ -644,6 +646,16 @@ function convertToolConfig(
|
|||
},
|
||||
}));
|
||||
|
||||
// Add cachePoint after last tool for supported models
|
||||
if (cacheRetention !== "none" && supportsPromptCaching(model)) {
|
||||
bedrockTools.push({
|
||||
cachePoint: {
|
||||
type: CachePointType.DEFAULT,
|
||||
...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}),
|
||||
},
|
||||
} as any);
|
||||
}
|
||||
|
||||
let bedrockToolChoice: ToolChoice | undefined;
|
||||
switch (toolChoice) {
|
||||
case "auto":
|
||||
|
|
|
|||
|
|
@ -393,10 +393,14 @@ export function convertMessages(
|
|||
return params;
|
||||
}
|
||||
|
||||
export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] {
|
||||
export function convertTools(
|
||||
tools: Tool[],
|
||||
isOAuthToken: boolean,
|
||||
cacheControl?: { type: "ephemeral"; ttl?: "1h" },
|
||||
): Anthropic.Messages.Tool[] {
|
||||
if (!tools) return [];
|
||||
|
||||
return tools.map((tool) => {
|
||||
const result = tools.map((tool) => {
|
||||
const jsonSchema = tool.parameters as any;
|
||||
|
||||
return {
|
||||
|
|
@ -409,6 +413,13 @@ export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Me
|
|||
},
|
||||
};
|
||||
});
|
||||
|
||||
// Add cache breakpoint to last tool — covers entire tool block
|
||||
if (cacheControl && result.length > 0) {
|
||||
(result[result.length - 1] as any).cache_control = cacheControl;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
export function buildParams(
|
||||
|
|
@ -456,7 +467,7 @@ export function buildParams(
|
|||
}
|
||||
|
||||
if (context.tools) {
|
||||
params.tools = convertTools(context.tools, isOAuthToken);
|
||||
params.tools = convertTools(context.tools, isOAuthToken, cacheControl);
|
||||
}
|
||||
|
||||
if (options?.thinkingEnabled && model.reasoning) {
|
||||
|
|
|
|||
|
|
@ -343,6 +343,7 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
|
|||
|
||||
if (context.tools) {
|
||||
params.tools = convertTools(context.tools, compat);
|
||||
maybeAddOpenRouterAnthropicToolCacheControl(model, params.tools);
|
||||
} else if (hasToolHistory(context.messages)) {
|
||||
// Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
|
||||
params.tools = [];
|
||||
|
|
@ -379,6 +380,19 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
|
|||
return params;
|
||||
}
|
||||
|
||||
function maybeAddOpenRouterAnthropicToolCacheControl(
|
||||
model: Model<"openai-completions">,
|
||||
tools: OpenAI.Chat.Completions.ChatCompletionTool[] | undefined,
|
||||
): void {
|
||||
if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return;
|
||||
if (!tools?.length) return;
|
||||
|
||||
const lastTool = tools[tools.length - 1];
|
||||
if ("function" in lastTool) {
|
||||
Object.assign(lastTool.function, { cache_control: { type: "ephemeral" } });
|
||||
}
|
||||
}
|
||||
|
||||
function mapReasoningEffort(
|
||||
effort: NonNullable<OpenAICompletionsOptions["reasoningEffort"]>,
|
||||
reasoningEffortMap: Partial<Record<NonNullable<OpenAICompletionsOptions["reasoningEffort"]>, string>>,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue