The Anthropic API's max_uses resets per request — when pause_turn triggers a resubmit, the model gets a fresh budget each time. This allowed unlimited total searches across a research unit, overwhelming the TUI render buffer. Fix: - Count web_search_tool_result blocks in conversation history on each before_provider_request to track cumulative searches per session - Cap total native searches at 15 per session (3 full turns of 5) - Dynamically set max_uses to min(5, remaining) — preserves per-turn cap while enforcing session ceiling - When budget exhausted, omit web_search tool entirely instead of letting the model hit max_uses_exceeded repeatedly - Reset counter on session_start (new agent unit) - Add web search budget guidance to research prompts (defense in depth) Tests: 5 new tests covering budget tracking, exhaustion, and reset. All 35 native-search tests pass.
This commit is contained in:
parent
c9d79a829c
commit
7afefc73ac
4 changed files with 255 additions and 9 deletions
|
|
@ -25,9 +25,10 @@ Then research the codebase and relevant technologies. Narrate key findings and s
|
|||
2. **Skill Discovery ({{skillDiscoveryMode}}):**{{skillDiscoveryInstructions}}
|
||||
3. Explore relevant code. For small/familiar codebases, use `rg`, `find`, and targeted reads. For large or unfamiliar codebases, use `scout` to build a broad map efficiently before diving in.
|
||||
4. Use `resolve_library` / `get_library_docs` for unfamiliar libraries — skip this for libraries already used in the codebase
|
||||
5. Use the **Research** output template from the inlined context above — include only sections that have real content
|
||||
6. If `.gsd/REQUIREMENTS.md` exists, research against it. Identify which Active requirements are table stakes, likely omissions, overbuilt risks, or domain-standard behaviors the user may or may not want.
|
||||
7. Write `{{outputPath}}`
|
||||
5. **Web search budget:** You have a limited budget of web searches (max ~15 per session). Use them strategically — prefer `resolve_library` / `get_library_docs` for library documentation. Do NOT repeat the same or similar queries. If a search didn't find what you need, rephrase once or move on. Target 3-5 total web searches for a typical research unit.
|
||||
6. Use the **Research** output template from the inlined context above — include only sections that have real content
|
||||
7. If `.gsd/REQUIREMENTS.md` exists, research against it. Identify which Active requirements are table stakes, likely omissions, overbuilt risks, or domain-standard behaviors the user may or may not want.
|
||||
8. Write `{{outputPath}}`
|
||||
|
||||
## Strategic Questions to Answer
|
||||
|
||||
|
|
|
|||
|
|
@ -46,8 +46,9 @@ Research what this slice needs. Narrate key findings and surprises as you go —
|
|||
2. **Skill Discovery ({{skillDiscoveryMode}}):**{{skillDiscoveryInstructions}}
|
||||
3. Explore relevant code for this slice's scope. For targeted exploration, use `rg`, `find`, and reads. For broad or unfamiliar subsystems, use `scout` to map the relevant area first.
|
||||
4. Use `resolve_library` / `get_library_docs` for unfamiliar libraries — skip this for libraries already used in the codebase
|
||||
5. Use the **Research** output template from the inlined context above — include only sections that have real content. The template is already inlined above; do NOT attempt to read any template file from disk (there is no `templates/SLICE-RESEARCH.md` — the correct template is already present in this prompt).
|
||||
6. Write `{{outputPath}}`
|
||||
5. **Web search budget:** You have a limited budget of web searches (max ~15 per session). Use them strategically — prefer `resolve_library` / `get_library_docs` for library documentation. Do NOT repeat the same or similar queries. If a search didn't find what you need, rephrase once or move on. Target 3-5 total web searches for a typical research unit.
|
||||
6. Use the **Research** output template from the inlined context above — include only sections that have real content. The template is already inlined above; do NOT attempt to read any template file from disk (there is no `templates/SLICE-RESEARCH.md` — the correct template is already present in this prompt).
|
||||
7. Write `{{outputPath}}`
|
||||
|
||||
The slice directory already exists at `{{slicePath}}/`. Do NOT mkdir — just write the file.
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,16 @@ export const CUSTOM_SEARCH_TOOL_NAMES = ["search-the-web", "search_and_read", "g
|
|||
/** Thinking block types that require signature validation by the API */
|
||||
const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]);
|
||||
|
||||
/**
|
||||
* Maximum number of native web searches allowed per session (agent unit).
|
||||
* The Anthropic API's `max_uses` is per-request — it resets on each API call.
|
||||
* When `pause_turn` triggers a resubmit, the model gets a fresh budget.
|
||||
* This session-level cap prevents unbounded search accumulation (#1309).
|
||||
*
|
||||
* 15 = 3 full turns of 5 searches each — generous for research, but bounded.
|
||||
*/
|
||||
export const MAX_NATIVE_SEARCHES_PER_SESSION = 15;
|
||||
|
||||
/** When true, skip native web search injection and keep Brave/custom tools active on Anthropic. */
|
||||
export function preferBraveSearch(): boolean {
|
||||
// preferences.md takes priority over env var
|
||||
|
|
@ -74,6 +84,11 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
|
|||
let isAnthropicProvider = false;
|
||||
let modelSelectFired = false;
|
||||
|
||||
// Session-level native search counter (#1309).
|
||||
// Tracks cumulative web_search_tool_result blocks across all turns in a session.
|
||||
// Reset on session_start. Used to compute remaining budget for max_uses.
|
||||
let sessionSearchCount = 0;
|
||||
|
||||
// Track provider changes via model selection — also handles diagnostics
|
||||
// since model_select fires AFTER session_start and knows the provider.
|
||||
pi.on("model_select", async (event: any, ctx: any) => {
|
||||
|
|
@ -161,13 +176,41 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
|
|||
);
|
||||
payload.tools = tools;
|
||||
|
||||
// ── Session-level search budget (#1309) ──────────────────────────────
|
||||
// Count web_search_tool_result blocks in the conversation history to
|
||||
// determine how many native searches have already been used this session.
|
||||
// The Anthropic API's max_uses resets per request, so without this guard,
|
||||
// pause_turn → resubmit cycles allow unlimited total searches.
|
||||
if (Array.isArray(messages)) {
|
||||
let historySearchCount = 0;
|
||||
for (const msg of messages) {
|
||||
const content = msg.content;
|
||||
if (!Array.isArray(content)) continue;
|
||||
for (const block of content) {
|
||||
if ((block as any)?.type === "web_search_tool_result") {
|
||||
historySearchCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sync counter from history (handles session restore / context replay)
|
||||
sessionSearchCount = historySearchCount;
|
||||
}
|
||||
|
||||
const remaining = Math.max(0, MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount);
|
||||
|
||||
if (remaining <= 0) {
|
||||
// Budget exhausted — don't inject the search tool at all.
|
||||
// The model will proceed without web search capability.
|
||||
return payload;
|
||||
}
|
||||
|
||||
tools.push({
|
||||
type: "web_search_20250305",
|
||||
name: "web_search",
|
||||
// Cap server-side searches per response to prevent the model from
|
||||
// looping on web_search without synthesizing results (#817).
|
||||
// 5 searches is generous — most queries need 1-2.
|
||||
max_uses: 5,
|
||||
// Cap per-request searches to the lesser of 5 (per-turn cap) or the
|
||||
// remaining session budget (#1309). This prevents the model from
|
||||
// consuming unlimited searches via pause_turn → resubmit cycles.
|
||||
max_uses: Math.min(5, remaining),
|
||||
});
|
||||
|
||||
return payload;
|
||||
|
|
@ -175,6 +218,9 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
|
|||
|
||||
// Basic startup diagnostics — provider-specific info comes from model_select
|
||||
pi.on("session_start", async (_event: any, ctx: any) => {
|
||||
// Reset session-level search budget (#1309)
|
||||
sessionSearchCount = 0;
|
||||
|
||||
const hasBrave = !!process.env.BRAVE_API_KEY;
|
||||
const hasJina = !!process.env.JINA_API_KEY;
|
||||
const hasAnswers = !!process.env.BRAVE_ANSWERS_KEY;
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import {
|
|||
stripThinkingFromHistory,
|
||||
BRAVE_TOOL_NAMES,
|
||||
CUSTOM_SEARCH_TOOL_NAMES,
|
||||
MAX_NATIVE_SEARCHES_PER_SESSION,
|
||||
type NativeSearchPI,
|
||||
} from "../resources/extensions/search-the-web/native-search.ts";
|
||||
|
||||
|
|
@ -688,6 +689,203 @@ test("model_select DOES show notification on explicit user set", async () => {
|
|||
assert.ok(nativeNotif, "Should show notification on explicit 'set' source");
|
||||
});
|
||||
|
||||
// ─── Session-level search budget (#1309) ────────────────────────────────────
|
||||
|
||||
test("session search budget: max_uses decreases as history accumulates search results", async () => {
|
||||
const pi = createMockPI();
|
||||
registerNativeSearchHooks(pi);
|
||||
|
||||
await pi.fire("model_select", {
|
||||
type: "model_select",
|
||||
model: { provider: "anthropic", name: "claude-sonnet-4-6" },
|
||||
previousModel: undefined,
|
||||
source: "set",
|
||||
});
|
||||
|
||||
// Simulate a conversation with 10 web_search_tool_result blocks in history
|
||||
const messages: any[] = [
|
||||
{ role: "user", content: "research this topic" },
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "web_search_tool_result", tool_use_id: "ws1", content: [] },
|
||||
{ type: "web_search_tool_result", tool_use_id: "ws2", content: [] },
|
||||
{ type: "web_search_tool_result", tool_use_id: "ws3", content: [] },
|
||||
{ type: "web_search_tool_result", tool_use_id: "ws4", content: [] },
|
||||
{ type: "web_search_tool_result", tool_use_id: "ws5", content: [] },
|
||||
{ type: "text", text: "Here are some results..." },
|
||||
],
|
||||
},
|
||||
{ role: "user", content: "continue" },
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "web_search_tool_result", tool_use_id: "ws6", content: [] },
|
||||
{ type: "web_search_tool_result", tool_use_id: "ws7", content: [] },
|
||||
{ type: "web_search_tool_result", tool_use_id: "ws8", content: [] },
|
||||
{ type: "web_search_tool_result", tool_use_id: "ws9", content: [] },
|
||||
{ type: "web_search_tool_result", tool_use_id: "ws10", content: [] },
|
||||
{ type: "text", text: "More results..." },
|
||||
],
|
||||
},
|
||||
{ role: "user", content: "keep going" },
|
||||
];
|
||||
|
||||
const payload: Record<string, unknown> = {
|
||||
model: "claude-sonnet-4-6-20250514",
|
||||
tools: [{ name: "bash", type: "custom" }],
|
||||
messages,
|
||||
};
|
||||
|
||||
const result = await pi.fire("before_provider_request", {
|
||||
type: "before_provider_request",
|
||||
payload,
|
||||
});
|
||||
|
||||
const tools = ((result as any)?.tools ?? payload.tools) as any[];
|
||||
const nativeTool = tools.find((t: any) => t.type === "web_search_20250305");
|
||||
assert.ok(nativeTool, "Should still inject web_search when budget remaining");
|
||||
// 15 - 10 = 5 remaining, min(5, 5) = 5
|
||||
assert.equal(nativeTool.max_uses, 5, "Should cap at min(5, remaining)");
|
||||
});
|
||||
|
||||
test("session search budget: reduces max_uses when close to limit", async () => {
|
||||
const pi = createMockPI();
|
||||
registerNativeSearchHooks(pi);
|
||||
|
||||
await pi.fire("model_select", {
|
||||
type: "model_select",
|
||||
model: { provider: "anthropic", name: "claude-sonnet-4-6" },
|
||||
previousModel: undefined,
|
||||
source: "set",
|
||||
});
|
||||
|
||||
// 13 search results in history → only 2 remaining
|
||||
const searchBlocks = Array.from({ length: 13 }, (_, i) => ({
|
||||
type: "web_search_tool_result",
|
||||
tool_use_id: `ws${i}`,
|
||||
content: [],
|
||||
}));
|
||||
|
||||
const messages: any[] = [
|
||||
{ role: "user", content: "research" },
|
||||
{ role: "assistant", content: [...searchBlocks, { type: "text", text: "results" }] },
|
||||
{ role: "user", content: "more" },
|
||||
];
|
||||
|
||||
const payload: Record<string, unknown> = {
|
||||
model: "claude-sonnet-4-6-20250514",
|
||||
tools: [{ name: "bash", type: "custom" }],
|
||||
messages,
|
||||
};
|
||||
|
||||
const result = await pi.fire("before_provider_request", {
|
||||
type: "before_provider_request",
|
||||
payload,
|
||||
});
|
||||
|
||||
const tools = ((result as any)?.tools ?? payload.tools) as any[];
|
||||
const nativeTool = tools.find((t: any) => t.type === "web_search_20250305");
|
||||
assert.ok(nativeTool, "Should still inject when budget > 0");
|
||||
// 15 - 13 = 2 remaining
|
||||
assert.equal(nativeTool.max_uses, 2, "Should reduce max_uses to remaining budget");
|
||||
});
|
||||
|
||||
test("session search budget: omits web_search tool when budget exhausted", async () => {
|
||||
const pi = createMockPI();
|
||||
registerNativeSearchHooks(pi);
|
||||
|
||||
await pi.fire("model_select", {
|
||||
type: "model_select",
|
||||
model: { provider: "anthropic", name: "claude-sonnet-4-6" },
|
||||
previousModel: undefined,
|
||||
source: "set",
|
||||
});
|
||||
|
||||
// 15+ search results in history → budget exhausted
|
||||
const searchBlocks = Array.from({ length: MAX_NATIVE_SEARCHES_PER_SESSION }, (_, i) => ({
|
||||
type: "web_search_tool_result",
|
||||
tool_use_id: `ws${i}`,
|
||||
content: [],
|
||||
}));
|
||||
|
||||
const messages: any[] = [
|
||||
{ role: "user", content: "research" },
|
||||
{ role: "assistant", content: [...searchBlocks, { type: "text", text: "results" }] },
|
||||
{ role: "user", content: "more" },
|
||||
];
|
||||
|
||||
const payload: Record<string, unknown> = {
|
||||
model: "claude-sonnet-4-6-20250514",
|
||||
tools: [{ name: "bash", type: "custom" }],
|
||||
messages,
|
||||
};
|
||||
|
||||
const result = await pi.fire("before_provider_request", {
|
||||
type: "before_provider_request",
|
||||
payload,
|
||||
});
|
||||
|
||||
const tools = ((result as any)?.tools ?? payload.tools) as any[];
|
||||
const nativeTool = tools.find((t: any) => t.type === "web_search_20250305");
|
||||
assert.equal(nativeTool, undefined, "Should NOT inject web_search when budget exhausted (#1309)");
|
||||
// Other tools should remain
|
||||
assert.ok(tools.some((t: any) => t.name === "bash"), "Non-search tools should remain");
|
||||
});
|
||||
|
||||
test("session search budget: resets on session_start", async () => {
|
||||
const pi = createMockPI();
|
||||
registerNativeSearchHooks(pi);
|
||||
|
||||
await pi.fire("model_select", {
|
||||
type: "model_select",
|
||||
model: { provider: "anthropic", name: "claude-sonnet-4-6" },
|
||||
previousModel: undefined,
|
||||
source: "set",
|
||||
});
|
||||
|
||||
// First session: exhaust budget
|
||||
const searchBlocks = Array.from({ length: MAX_NATIVE_SEARCHES_PER_SESSION }, (_, i) => ({
|
||||
type: "web_search_tool_result",
|
||||
tool_use_id: `ws${i}`,
|
||||
content: [],
|
||||
}));
|
||||
|
||||
let payload: Record<string, unknown> = {
|
||||
model: "claude-sonnet-4-6-20250514",
|
||||
tools: [{ name: "bash", type: "custom" }],
|
||||
messages: [
|
||||
{ role: "user", content: "research" },
|
||||
{ role: "assistant", content: [...searchBlocks] },
|
||||
{ role: "user", content: "more" },
|
||||
],
|
||||
};
|
||||
|
||||
await pi.fire("before_provider_request", { type: "before_provider_request", payload });
|
||||
let tools = (payload.tools as any[]);
|
||||
assert.ok(!tools.some((t: any) => t.type === "web_search_20250305"), "Budget should be exhausted");
|
||||
|
||||
// New session starts — counter resets
|
||||
await pi.fire("session_start", { type: "session_start" });
|
||||
|
||||
// New request with no history — full budget available
|
||||
payload = {
|
||||
model: "claude-sonnet-4-6-20250514",
|
||||
tools: [{ name: "bash", type: "custom" }],
|
||||
messages: [{ role: "user", content: "new research" }],
|
||||
};
|
||||
|
||||
const result = await pi.fire("before_provider_request", { type: "before_provider_request", payload });
|
||||
tools = ((result as any)?.tools ?? payload.tools) as any[];
|
||||
const nativeTool = tools.find((t: any) => t.type === "web_search_20250305");
|
||||
assert.ok(nativeTool, "Should inject web_search after session reset");
|
||||
assert.equal(nativeTool.max_uses, 5, "Should have full per-turn budget after reset");
|
||||
});
|
||||
|
||||
test("MAX_NATIVE_SEARCHES_PER_SESSION is exported and equals 15", () => {
|
||||
assert.equal(MAX_NATIVE_SEARCHES_PER_SESSION, 15, "Session budget should be 15 (#1309)");
|
||||
});
|
||||
|
||||
// ─── stripThinkingFromHistory tests ─────────────────────────────────────────
|
||||
|
||||
test("stripThinkingFromHistory removes thinking from earlier assistant messages", () => {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue