Merge pull request #2584 from jeremymcs/fix/web-search-budget-enforcement

fix(search): enforce hard search budget and survive context compaction
2026-03-25 22:22:12 -06:00 · 2026-03-25 22:22:12 -06:00 · 2d97f042de
commit 2d97f042de
parent 9c2bacd39d 8e7ec7885a
4 changed files with 173 additions and 38 deletions
--- a/src/resources/extensions/search-the-web/native-search.ts
+++ b/src/resources/extensions/search-the-web/native-search.ts
@ -176,11 +176,15 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
    );
    payload.tools = tools;

-    // ── Session-level search budget (#1309) ──────────────────────────────
+    // ── Session-level search budget (#1309, #compaction-safe) ─────────────
    // Count web_search_tool_result blocks in the conversation history to
    // determine how many native searches have already been used this session.
    // The Anthropic API's max_uses resets per request, so without this guard,
    // pause_turn → resubmit cycles allow unlimited total searches.
+    //
+    // Use the monotonic high-water mark: take the max of the history count
+    // and the running counter. This prevents budget resets when context
+    // compaction removes web_search_tool_result blocks from history.
    if (Array.isArray(messages)) {
      let historySearchCount = 0;
      for (const msg of messages) {
@ -192,8 +196,9 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
          }
        }
      }
-      // Sync counter from history (handles session restore / context replay)
-      sessionSearchCount = historySearchCount;
+      // High-water mark: never decrease the counter, even if compaction
+      // removes web_search_tool_result blocks from the visible history.
+      sessionSearchCount = Math.max(sessionSearchCount, historySearchCount);
    }

    const remaining = Math.max(0, MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount);
--- a/src/resources/extensions/search-the-web/tool-search.ts
+++ b/src/resources/extensions/search-the-web/tool-search.ts
@ -106,14 +106,20 @@ searchCache.startPurgeInterval(60_000);

 // Consecutive duplicate search guard (#949)
 // Tracks recent query keys to detect and break search loops.
-const MAX_CONSECUTIVE_DUPES = 3;
+const MAX_CONSECUTIVE_DUPES = 1;
 let lastSearchKey = "";
 let consecutiveDupeCount = 0;

-/** Reset session-scoped duplicate-search guard state. */
+// Session-level total search budget (all queries, not just duplicates).
+// Prevents unbounded search accumulation across varied queries.
+const MAX_SEARCHES_PER_SESSION = 15;
+let sessionTotalSearches = 0;
+
+/** Reset session-scoped search guard state (both duplicate and budget). */
 export function resetSearchLoopGuardState(): void {
  lastSearchKey = "";
  consecutiveDupeCount = 0;
+  sessionTotalSearches = 0;
 }

 // Summarizer responses: max 50 entries, 15-minute TTL
@ -357,6 +363,17 @@ export function registerSearchTool(pi: ExtensionAPI) {
        };
      }

+      // ------------------------------------------------------------------
+      // Session-level search budget
+      // ------------------------------------------------------------------
+      if (sessionTotalSearches >= MAX_SEARCHES_PER_SESSION) {
+        return {
+          content: [{ type: "text" as const, text: `⚠️ Search budget exhausted: ${sessionTotalSearches}/${MAX_SEARCHES_PER_SESSION} searches used this session. The information you need should already be in previous search results. Stop searching and use those results to proceed with your task.` }],
+          isError: true,
+          details: { errorKind: "budget_exhausted", error: `Session search budget exhausted (${MAX_SEARCHES_PER_SESSION})` } satisfies Partial<SearchDetails>,
+        };
+      }
+
      const count = params.count ?? 5;
      const wantSummary = params.summary ?? false;

@ -410,6 +427,9 @@ export function registerSearchTool(pi: ExtensionAPI) {
        consecutiveDupeCount = 1;
      }

+      // Count every search that passes the guards toward the session budget.
+      sessionTotalSearches++;
+
      const cached = searchCache.get(cacheKey);

      if (cached) {
--- a/src/tests/native-search.test.ts
+++ b/src/tests/native-search.test.ts
@ -855,6 +855,51 @@ test("MAX_NATIVE_SEARCHES_PER_SESSION is exported and equals 15", () => {
  assert.equal(MAX_NATIVE_SEARCHES_PER_SESSION, 15, "Session budget should be 15 (#1309)");
 });

+test("session search budget: survives context compaction (high-water mark)", async () => {
+  const pi = createMockPI();
+  registerNativeSearchHooks(pi);
+
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "anthropic", name: "claude-sonnet-4-6" },
+    previousModel: undefined,
+    source: "set",
+  });
+
+  // First request: history has 12 web_search_tool_result blocks
+  const searchBlocks = Array.from({ length: 12 }, (_, i) => ({
+    type: "web_search_tool_result",
+    tool_use_id: `ws${i}`,
+    content: [],
+  }));
+
+  let payload: Record<string, unknown> = {
+    model: "claude-sonnet-4-6-20250514",
+    tools: [{ name: "bash", type: "custom" }],
+    messages: [{ role: "user", content: [{ type: "text", text: "search" }, ...searchBlocks] }],
+  };
+
+  await pi.fire("before_provider_request", { type: "before_provider_request", payload });
+  let tools = payload.tools as any[];
+  let nativeTool = tools.find((t: any) => t.type === "web_search_20250305");
+  assert.ok(nativeTool, "Should still inject web_search with 12/15 used");
+  assert.equal(nativeTool.max_uses, 3, "Should have 3 remaining (15 - 12)");
+
+  // Second request: context was compacted — search blocks gone from history.
+  // Without high-water mark, the budget would reset to 15.
+  payload = {
+    model: "claude-sonnet-4-6-20250514",
+    tools: [{ name: "bash", type: "custom" }],
+    messages: [{ role: "user", content: "compacted context — no search blocks" }],
+  };
+
+  await pi.fire("before_provider_request", { type: "before_provider_request", payload });
+  tools = payload.tools as any[];
+  nativeTool = tools.find((t: any) => t.type === "web_search_20250305");
+  assert.ok(nativeTool, "Should still inject web_search with 12/15 used (high-water mark)");
+  assert.equal(nativeTool.max_uses, 3, "High-water mark should preserve 12 — only 3 remaining");
+});
+
 // ─── stripThinkingFromHistory tests ─────────────────────────────────────────

 test("stripThinkingFromHistory removes thinking from earlier assistant messages", () => {
--- a/src/tests/search-loop-guard.test.ts
+++ b/src/tests/search-loop-guard.test.ts
@ -11,7 +11,7 @@

 import test from "node:test";
 import assert from "node:assert/strict";
-import { registerSearchTool } from "../resources/extensions/search-the-web/tool-search.ts";
+import { registerSearchTool, resetSearchLoopGuardState } from "../resources/extensions/search-the-web/tool-search.ts";
 import searchExtension from "../resources/extensions/search-the-web/index.ts";

 const ORIGINAL_ENV = {
@ -72,6 +72,8 @@ function createMockPI() {
  const toolsByName = new Map<string, any>();
  let registeredTool: any = null;

+  let activeTools: string[] = [];
+
  const pi = {
    on(event: string, handler: (...args: any[]) => unknown) {
      handlers.push({ event, handler });
@ -91,6 +93,8 @@ function createMockPI() {
    getRegisteredTool(name = "search-the-web") {
      return toolsByName.get(name) ?? registeredTool;
    },
+    getActiveTools() { return activeTools; },
+    setActiveTools(tools: string[]) { activeTools = tools; },
    writeTempFile: async (_content: string, _opts?: unknown) => "/tmp/search-out.txt",
  };

@ -134,18 +138,16 @@ test("search loop guard fires after MAX_CONSECUTIVE_DUPES duplicates", async (t)

  const execute = tool.execute.bind(tool);

-  // Calls 1–3: below threshold, should return search results (not an error)
-  for (let i = 1; i <= 3; i++) {
-    const result = await callSearch(execute, "loop test query", `call-${i}`);
-    assert.notEqual(result.isError, true, `call ${i} should not trigger loop guard`);
-  }
+  // Call 1: first call should succeed (MAX_CONSECUTIVE_DUPES = 1)
+  const result1 = await callSearch(execute, "loop test query", "call-1");
+  assert.notEqual(result1.isError, true, "call 1 should not trigger loop guard");

-  // Call 4: hits the threshold — guard fires
-  const result4 = await callSearch(execute, "loop test query", "call-4");
-  assert.equal(result4.isError, true, "call 4 should trigger the loop guard");
-  assert.equal(result4.details?.errorKind, "search_loop");
+  // Call 2: identical query — guard fires immediately (threshold = 1)
+  const result2 = await callSearch(execute, "loop test query", "call-2");
+  assert.equal(result2.isError, true, "call 2 should trigger the loop guard");
+  assert.equal(result2.details?.errorKind, "search_loop");
  assert.ok(
-    result4.content[0].text.includes("Search loop detected"),
+    result2.content[0].text.includes("Search loop detected"),
    "error message should mention search loop"
  );
 });
@ -174,11 +176,9 @@ test("search loop guard resets at session_start boundary", async (t) => {
  assert.ok(tool, "search tool should be registered");
  const execute = tool.execute.bind(tool);

-  // Trigger guard in session 1
-  for (let i = 1; i <= 4; i++) {
-    await callSearch(execute, query, `s1-call-${i}`);
-  }
-  const guardResult = await callSearch(execute, query, "s1-call-5");
+  // Trigger guard in session 1 (call 1 succeeds, call 2 fires guard)
+  await callSearch(execute, query, "s1-call-1");
+  const guardResult = await callSearch(execute, query, "s1-call-2");
  assert.equal(guardResult.isError, true, "session 1 should be guarded");
  assert.equal(guardResult.details?.errorKind, "search_loop");

@ -211,28 +211,26 @@ test("search loop guard stays armed after firing — subsequent duplicates immed
  const tool = pi.getRegisteredTool();
  const execute = tool.execute.bind(tool);

-  // Exhaust the initial window (calls 1–3 succeed, call 4 fires guard)
-  for (let i = 1; i <= 3; i++) {
-    await callSearch(execute, query, `call-${i}`);
-  }
-  const guardFirst = await callSearch(execute, query, "call-4");
-  assert.equal(guardFirst.isError, true, "call 4 should trigger the loop guard");
+  // Call 1 succeeds, call 2 fires guard (MAX_CONSECUTIVE_DUPES = 1)
+  await callSearch(execute, query, "call-1");
+  const guardFirst = await callSearch(execute, query, "call-2");
+  assert.equal(guardFirst.isError, true, "call 2 should trigger the loop guard");

-  // Key regression test: call 5 (and beyond) must ALSO trigger the guard.
-  // The original bug reset state on trigger, so call 5 was treated as a fresh
+  // Key regression test: call 3 (and beyond) must ALSO trigger the guard.
+  // The original bug reset state on trigger, so call 3 was treated as a fresh
  // first search and the loop restarted.
-  const guardSecond = await callSearch(execute, query, "call-5");
+  const guardSecond = await callSearch(execute, query, "call-3");
  assert.equal(
    guardSecond.isError, true,
-    "call 5 should STILL trigger the loop guard (guard must stay armed after firing)"
+    "call 3 should STILL trigger the loop guard (guard must stay armed after firing)"
  );
  assert.equal(guardSecond.details?.errorKind, "search_loop");

-  // Call 6 as well — guard should keep firing
-  const guardThird = await callSearch(execute, query, "call-6");
+  // Call 4 as well — guard should keep firing
+  const guardThird = await callSearch(execute, query, "call-4");
  assert.equal(
    guardThird.isError, true,
-    "call 6 should STILL trigger the loop guard"
+    "call 4 should STILL trigger the loop guard"
  );
 });

@ -255,10 +253,9 @@ test("search loop guard resets cleanly when a different query is issued", async
  const tool = pi.getRegisteredTool();
  const execute = tool.execute.bind(tool);

-  // Trigger guard for queryA
-  for (let i = 1; i <= 4; i++) {
-    await callSearch(execute, queryA, `call-a-${i}`);
-  }
+  // Trigger guard for queryA (call 1 succeeds, call 2 fires guard)
+  await callSearch(execute, queryA, "call-a-1");
+  await callSearch(execute, queryA, "call-a-2");

  // Issue a different query — should succeed (resets the duplicate counter)
  const resultB = await callSearch(execute, queryB, "call-b-1");
@ -267,3 +264,71 @@ test("search loop guard resets cleanly when a different query is issued", async
    "a different query after guard should not be treated as a loop"
  );
 });
+
+test("session search budget blocks after MAX_SEARCHES_PER_SESSION varied queries", async (t) => {
+  process.env.BRAVE_API_KEY = "test-key-budget";
+  delete process.env.TAVILY_API_KEY;
+  delete process.env.OLLAMA_API_KEY;
+  const restoreFetch = mockFetch(makeBraveResponse());
+
+  t.after(() => {
+    restoreFetch();
+    restoreSearchEnv();
+  });
+
+  // Reset guard state (including session budget) and register directly
+  resetSearchLoopGuardState();
+  const pi = createMockPI();
+  registerSearchTool(pi as any);
+
+  const tool = pi.getRegisteredTool();
+  assert.ok(tool, "search tool should be registered");
+  const execute = tool.execute.bind(tool);
+
+  // Issue 15 unique queries — all should succeed (budget = 15)
+  for (let i = 1; i <= 15; i++) {
+    const result = await callSearch(execute, `unique budget query ${i}`, `budget-${i}`);
+    assert.notEqual(result.isError, true, `query ${i} should succeed within budget`);
+  }
+
+  // Query 16: budget exhausted — should be blocked
+  const blocked = await callSearch(execute, "one more query", "budget-16");
+  assert.equal(blocked.isError, true, "query 16 should be blocked by budget");
+  assert.equal(blocked.details?.errorKind, "budget_exhausted");
+  assert.ok(
+    blocked.content[0].text.includes("Search budget exhausted"),
+    "error message should mention budget"
+  );
+});
+
+test("session search budget resets via resetSearchLoopGuardState", async (t) => {
+  process.env.BRAVE_API_KEY = "test-key-budget-reset";
+  delete process.env.TAVILY_API_KEY;
+  delete process.env.OLLAMA_API_KEY;
+  const restoreFetch = mockFetch(makeBraveResponse());
+
+  t.after(() => {
+    restoreFetch();
+    restoreSearchEnv();
+  });
+
+  // Reset and register directly
+  resetSearchLoopGuardState();
+  const pi = createMockPI();
+  registerSearchTool(pi as any);
+
+  const tool = pi.getRegisteredTool();
+  const execute = tool.execute.bind(tool);
+
+  // Exhaust budget
+  for (let i = 1; i <= 15; i++) {
+    await callSearch(execute, `budget reset query ${i}`, `br-${i}`);
+  }
+  const exhausted = await callSearch(execute, "exhausted query", "br-exhausted");
+  assert.equal(exhausted.isError, true, "budget should be exhausted");
+
+  // Reset simulates new session
+  resetSearchLoopGuardState();
+  const fresh = await callSearch(execute, "fresh session query", "br-fresh");
+  assert.notEqual(fresh.isError, true, "first query after reset should succeed");
+});