diff --git a/src/resources/extensions/search-the-web/native-search.ts b/src/resources/extensions/search-the-web/native-search.ts index a153f8cc3..0f7805528 100644 --- a/src/resources/extensions/search-the-web/native-search.ts +++ b/src/resources/extensions/search-the-web/native-search.ts @@ -176,11 +176,15 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: ); payload.tools = tools; - // ── Session-level search budget (#1309) ────────────────────────────── + // ── Session-level search budget (#1309, #compaction-safe) ───────────── // Count web_search_tool_result blocks in the conversation history to // determine how many native searches have already been used this session. // The Anthropic API's max_uses resets per request, so without this guard, // pause_turn → resubmit cycles allow unlimited total searches. + // + // Use the monotonic high-water mark: take the max of the history count + // and the running counter. This prevents budget resets when context + // compaction removes web_search_tool_result blocks from history. if (Array.isArray(messages)) { let historySearchCount = 0; for (const msg of messages) { @@ -192,8 +196,9 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: } } } - // Sync counter from history (handles session restore / context replay) - sessionSearchCount = historySearchCount; + // High-water mark: never decrease the counter, even if compaction + // removes web_search_tool_result blocks from the visible history. + sessionSearchCount = Math.max(sessionSearchCount, historySearchCount); } const remaining = Math.max(0, MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount); diff --git a/src/resources/extensions/search-the-web/tool-search.ts b/src/resources/extensions/search-the-web/tool-search.ts index 399a399df..e645a502f 100644 --- a/src/resources/extensions/search-the-web/tool-search.ts +++ b/src/resources/extensions/search-the-web/tool-search.ts @@ -106,14 +106,20 @@ searchCache.startPurgeInterval(60_000); // Consecutive duplicate search guard (#949) // Tracks recent query keys to detect and break search loops. -const MAX_CONSECUTIVE_DUPES = 3; +const MAX_CONSECUTIVE_DUPES = 1; let lastSearchKey = ""; let consecutiveDupeCount = 0; -/** Reset session-scoped duplicate-search guard state. */ +// Session-level total search budget (all queries, not just duplicates). +// Prevents unbounded search accumulation across varied queries. +const MAX_SEARCHES_PER_SESSION = 15; +let sessionTotalSearches = 0; + +/** Reset session-scoped search guard state (both duplicate and budget). */ export function resetSearchLoopGuardState(): void { lastSearchKey = ""; consecutiveDupeCount = 0; + sessionTotalSearches = 0; } // Summarizer responses: max 50 entries, 15-minute TTL @@ -357,6 +363,17 @@ export function registerSearchTool(pi: ExtensionAPI) { }; } + // ------------------------------------------------------------------ + // Session-level search budget + // ------------------------------------------------------------------ + if (sessionTotalSearches >= MAX_SEARCHES_PER_SESSION) { + return { + content: [{ type: "text" as const, text: `⚠️ Search budget exhausted: ${sessionTotalSearches}/${MAX_SEARCHES_PER_SESSION} searches used this session. The information you need should already be in previous search results. Stop searching and use those results to proceed with your task.` }], + isError: true, + details: { errorKind: "budget_exhausted", error: `Session search budget exhausted (${MAX_SEARCHES_PER_SESSION})` } satisfies Partial, + }; + } + const count = params.count ?? 5; const wantSummary = params.summary ?? false; @@ -410,6 +427,9 @@ export function registerSearchTool(pi: ExtensionAPI) { consecutiveDupeCount = 1; } + // Count every search that passes the guards toward the session budget. + sessionTotalSearches++; + const cached = searchCache.get(cacheKey); if (cached) { diff --git a/src/tests/native-search.test.ts b/src/tests/native-search.test.ts index 55c964f79..c6ff41310 100644 --- a/src/tests/native-search.test.ts +++ b/src/tests/native-search.test.ts @@ -855,6 +855,51 @@ test("MAX_NATIVE_SEARCHES_PER_SESSION is exported and equals 15", () => { assert.equal(MAX_NATIVE_SEARCHES_PER_SESSION, 15, "Session budget should be 15 (#1309)"); }); +test("session search budget: survives context compaction (high-water mark)", async () => { + const pi = createMockPI(); + registerNativeSearchHooks(pi); + + await pi.fire("model_select", { + type: "model_select", + model: { provider: "anthropic", name: "claude-sonnet-4-6" }, + previousModel: undefined, + source: "set", + }); + + // First request: history has 12 web_search_tool_result blocks + const searchBlocks = Array.from({ length: 12 }, (_, i) => ({ + type: "web_search_tool_result", + tool_use_id: `ws${i}`, + content: [], + })); + + let payload: Record = { + model: "claude-sonnet-4-6-20250514", + tools: [{ name: "bash", type: "custom" }], + messages: [{ role: "user", content: [{ type: "text", text: "search" }, ...searchBlocks] }], + }; + + await pi.fire("before_provider_request", { type: "before_provider_request", payload }); + let tools = payload.tools as any[]; + let nativeTool = tools.find((t: any) => t.type === "web_search_20250305"); + assert.ok(nativeTool, "Should still inject web_search with 12/15 used"); + assert.equal(nativeTool.max_uses, 3, "Should have 3 remaining (15 - 12)"); + + // Second request: context was compacted — search blocks gone from history. + // Without high-water mark, the budget would reset to 15. + payload = { + model: "claude-sonnet-4-6-20250514", + tools: [{ name: "bash", type: "custom" }], + messages: [{ role: "user", content: "compacted context — no search blocks" }], + }; + + await pi.fire("before_provider_request", { type: "before_provider_request", payload }); + tools = payload.tools as any[]; + nativeTool = tools.find((t: any) => t.type === "web_search_20250305"); + assert.ok(nativeTool, "Should still inject web_search with 12/15 used (high-water mark)"); + assert.equal(nativeTool.max_uses, 3, "High-water mark should preserve 12 — only 3 remaining"); +}); + // ─── stripThinkingFromHistory tests ───────────────────────────────────────── test("stripThinkingFromHistory removes thinking from earlier assistant messages", () => { diff --git a/src/tests/search-loop-guard.test.ts b/src/tests/search-loop-guard.test.ts index be4c7023a..c80ff4796 100644 --- a/src/tests/search-loop-guard.test.ts +++ b/src/tests/search-loop-guard.test.ts @@ -11,7 +11,7 @@ import test from "node:test"; import assert from "node:assert/strict"; -import { registerSearchTool } from "../resources/extensions/search-the-web/tool-search.ts"; +import { registerSearchTool, resetSearchLoopGuardState } from "../resources/extensions/search-the-web/tool-search.ts"; import searchExtension from "../resources/extensions/search-the-web/index.ts"; const ORIGINAL_ENV = { @@ -72,6 +72,8 @@ function createMockPI() { const toolsByName = new Map(); let registeredTool: any = null; + let activeTools: string[] = []; + const pi = { on(event: string, handler: (...args: any[]) => unknown) { handlers.push({ event, handler }); @@ -91,6 +93,8 @@ function createMockPI() { getRegisteredTool(name = "search-the-web") { return toolsByName.get(name) ?? registeredTool; }, + getActiveTools() { return activeTools; }, + setActiveTools(tools: string[]) { activeTools = tools; }, writeTempFile: async (_content: string, _opts?: unknown) => "/tmp/search-out.txt", }; @@ -134,18 +138,16 @@ test("search loop guard fires after MAX_CONSECUTIVE_DUPES duplicates", async (t) const execute = tool.execute.bind(tool); - // Calls 1–3: below threshold, should return search results (not an error) - for (let i = 1; i <= 3; i++) { - const result = await callSearch(execute, "loop test query", `call-${i}`); - assert.notEqual(result.isError, true, `call ${i} should not trigger loop guard`); - } + // Call 1: first call should succeed (MAX_CONSECUTIVE_DUPES = 1) + const result1 = await callSearch(execute, "loop test query", "call-1"); + assert.notEqual(result1.isError, true, "call 1 should not trigger loop guard"); - // Call 4: hits the threshold — guard fires - const result4 = await callSearch(execute, "loop test query", "call-4"); - assert.equal(result4.isError, true, "call 4 should trigger the loop guard"); - assert.equal(result4.details?.errorKind, "search_loop"); + // Call 2: identical query — guard fires immediately (threshold = 1) + const result2 = await callSearch(execute, "loop test query", "call-2"); + assert.equal(result2.isError, true, "call 2 should trigger the loop guard"); + assert.equal(result2.details?.errorKind, "search_loop"); assert.ok( - result4.content[0].text.includes("Search loop detected"), + result2.content[0].text.includes("Search loop detected"), "error message should mention search loop" ); }); @@ -174,11 +176,9 @@ test("search loop guard resets at session_start boundary", async (t) => { assert.ok(tool, "search tool should be registered"); const execute = tool.execute.bind(tool); - // Trigger guard in session 1 - for (let i = 1; i <= 4; i++) { - await callSearch(execute, query, `s1-call-${i}`); - } - const guardResult = await callSearch(execute, query, "s1-call-5"); + // Trigger guard in session 1 (call 1 succeeds, call 2 fires guard) + await callSearch(execute, query, "s1-call-1"); + const guardResult = await callSearch(execute, query, "s1-call-2"); assert.equal(guardResult.isError, true, "session 1 should be guarded"); assert.equal(guardResult.details?.errorKind, "search_loop"); @@ -211,28 +211,26 @@ test("search loop guard stays armed after firing — subsequent duplicates immed const tool = pi.getRegisteredTool(); const execute = tool.execute.bind(tool); - // Exhaust the initial window (calls 1–3 succeed, call 4 fires guard) - for (let i = 1; i <= 3; i++) { - await callSearch(execute, query, `call-${i}`); - } - const guardFirst = await callSearch(execute, query, "call-4"); - assert.equal(guardFirst.isError, true, "call 4 should trigger the loop guard"); + // Call 1 succeeds, call 2 fires guard (MAX_CONSECUTIVE_DUPES = 1) + await callSearch(execute, query, "call-1"); + const guardFirst = await callSearch(execute, query, "call-2"); + assert.equal(guardFirst.isError, true, "call 2 should trigger the loop guard"); - // Key regression test: call 5 (and beyond) must ALSO trigger the guard. - // The original bug reset state on trigger, so call 5 was treated as a fresh + // Key regression test: call 3 (and beyond) must ALSO trigger the guard. + // The original bug reset state on trigger, so call 3 was treated as a fresh // first search and the loop restarted. - const guardSecond = await callSearch(execute, query, "call-5"); + const guardSecond = await callSearch(execute, query, "call-3"); assert.equal( guardSecond.isError, true, - "call 5 should STILL trigger the loop guard (guard must stay armed after firing)" + "call 3 should STILL trigger the loop guard (guard must stay armed after firing)" ); assert.equal(guardSecond.details?.errorKind, "search_loop"); - // Call 6 as well — guard should keep firing - const guardThird = await callSearch(execute, query, "call-6"); + // Call 4 as well — guard should keep firing + const guardThird = await callSearch(execute, query, "call-4"); assert.equal( guardThird.isError, true, - "call 6 should STILL trigger the loop guard" + "call 4 should STILL trigger the loop guard" ); }); @@ -255,10 +253,9 @@ test("search loop guard resets cleanly when a different query is issued", async const tool = pi.getRegisteredTool(); const execute = tool.execute.bind(tool); - // Trigger guard for queryA - for (let i = 1; i <= 4; i++) { - await callSearch(execute, queryA, `call-a-${i}`); - } + // Trigger guard for queryA (call 1 succeeds, call 2 fires guard) + await callSearch(execute, queryA, "call-a-1"); + await callSearch(execute, queryA, "call-a-2"); // Issue a different query — should succeed (resets the duplicate counter) const resultB = await callSearch(execute, queryB, "call-b-1"); @@ -267,3 +264,71 @@ test("search loop guard resets cleanly when a different query is issued", async "a different query after guard should not be treated as a loop" ); }); + +test("session search budget blocks after MAX_SEARCHES_PER_SESSION varied queries", async (t) => { + process.env.BRAVE_API_KEY = "test-key-budget"; + delete process.env.TAVILY_API_KEY; + delete process.env.OLLAMA_API_KEY; + const restoreFetch = mockFetch(makeBraveResponse()); + + t.after(() => { + restoreFetch(); + restoreSearchEnv(); + }); + + // Reset guard state (including session budget) and register directly + resetSearchLoopGuardState(); + const pi = createMockPI(); + registerSearchTool(pi as any); + + const tool = pi.getRegisteredTool(); + assert.ok(tool, "search tool should be registered"); + const execute = tool.execute.bind(tool); + + // Issue 15 unique queries — all should succeed (budget = 15) + for (let i = 1; i <= 15; i++) { + const result = await callSearch(execute, `unique budget query ${i}`, `budget-${i}`); + assert.notEqual(result.isError, true, `query ${i} should succeed within budget`); + } + + // Query 16: budget exhausted — should be blocked + const blocked = await callSearch(execute, "one more query", "budget-16"); + assert.equal(blocked.isError, true, "query 16 should be blocked by budget"); + assert.equal(blocked.details?.errorKind, "budget_exhausted"); + assert.ok( + blocked.content[0].text.includes("Search budget exhausted"), + "error message should mention budget" + ); +}); + +test("session search budget resets via resetSearchLoopGuardState", async (t) => { + process.env.BRAVE_API_KEY = "test-key-budget-reset"; + delete process.env.TAVILY_API_KEY; + delete process.env.OLLAMA_API_KEY; + const restoreFetch = mockFetch(makeBraveResponse()); + + t.after(() => { + restoreFetch(); + restoreSearchEnv(); + }); + + // Reset and register directly + resetSearchLoopGuardState(); + const pi = createMockPI(); + registerSearchTool(pi as any); + + const tool = pi.getRegisteredTool(); + const execute = tool.execute.bind(tool); + + // Exhaust budget + for (let i = 1; i <= 15; i++) { + await callSearch(execute, `budget reset query ${i}`, `br-${i}`); + } + const exhausted = await callSearch(execute, "exhausted query", "br-exhausted"); + assert.equal(exhausted.isError, true, "budget should be exhausted"); + + // Reset simulates new session + resetSearchLoopGuardState(); + const fresh = await callSearch(execute, "fresh session query", "br-fresh"); + assert.notEqual(fresh.isError, true, "first query after reset should succeed"); +});