singularity-forge/src/tests/search-loop-guard.test.ts
Jeremy McSpadden 8e7ec7885a fix(search): enforce hard search budget and survive context compaction
- Native search: use monotonic high-water mark (Math.max) instead of
  overwriting sessionSearchCount from history. Prevents budget reset
  when context compaction removes web_search_tool_result blocks.
- Custom search tool: add MAX_SEARCHES_PER_SESSION=15 hard cap across
  all queries (not just consecutive duplicates). Returns budget_exhausted
  error when limit reached.
- Tighten MAX_CONSECUTIVE_DUPES from 3 to 1 — block on the 2nd identical
  search since cached results make repeats pointless.
- Add tests for compaction-safe high-water mark, session budget
  enforcement, and budget reset on session_start.

Closes #2583
2026-03-25 21:35:09 -05:00

334 lines
11 KiB
TypeScript

/**
* Regression tests for the consecutive duplicate search loop guard.
*
* Covers:
* - Guard fires after MAX_CONSECUTIVE_DUPES identical calls (#949)
* - Guard stays armed after firing — subsequent duplicates immediately
* re-trigger the error (#1671: the original fix reset state on trigger,
* allowing the loop to restart)
* - Guard resets cleanly when a different query is issued
*/
import test from "node:test";
import assert from "node:assert/strict";
import { registerSearchTool, resetSearchLoopGuardState } from "../resources/extensions/search-the-web/tool-search.ts";
import searchExtension from "../resources/extensions/search-the-web/index.ts";
const ORIGINAL_ENV = {
BRAVE_API_KEY: process.env.BRAVE_API_KEY,
TAVILY_API_KEY: process.env.TAVILY_API_KEY,
OLLAMA_API_KEY: process.env.OLLAMA_API_KEY,
};
function restoreSearchEnv() {
if (ORIGINAL_ENV.BRAVE_API_KEY === undefined) delete process.env.BRAVE_API_KEY;
else process.env.BRAVE_API_KEY = ORIGINAL_ENV.BRAVE_API_KEY;
if (ORIGINAL_ENV.TAVILY_API_KEY === undefined) delete process.env.TAVILY_API_KEY;
else process.env.TAVILY_API_KEY = ORIGINAL_ENV.TAVILY_API_KEY;
if (ORIGINAL_ENV.OLLAMA_API_KEY === undefined) delete process.env.OLLAMA_API_KEY;
else process.env.OLLAMA_API_KEY = ORIGINAL_ENV.OLLAMA_API_KEY;
}
// =============================================================================
// Mock helpers
// =============================================================================
/** Minimal Brave search API response fixture. */
function makeBraveResponse() {
return {
query: { original: "test query", more_results_available: false },
web: {
results: [
{
title: "Result One",
url: "https://example.com/one",
description: "First result description.",
},
],
},
};
}
/** Install a mock global fetch that always returns the given body. */
function mockFetch(body: unknown, status = 200) {
const original = global.fetch;
(global as any).fetch = async () => ({
ok: status === 200,
status,
headers: { get: () => null },
json: async () => body,
text: async () => JSON.stringify(body),
});
return () => {
global.fetch = original;
};
}
/** Create a minimal mock PI that captures the registered search tool. */
function createMockPI() {
const handlers: Array<{ event: string; handler: (...args: any[]) => unknown }> = [];
const toolsByName = new Map<string, any>();
let registeredTool: any = null;
let activeTools: string[] = [];
const pi = {
on(event: string, handler: (...args: any[]) => unknown) {
handlers.push({ event, handler });
},
registerCommand(_name: string, _command: unknown) {},
registerTool(tool: any) {
if (typeof tool?.name === "string") {
toolsByName.set(tool.name, tool);
}
registeredTool = tool;
},
async fire(event: string, eventData: unknown, ctx: unknown) {
for (const h of handlers) {
if (h.event === event) await h.handler(eventData, ctx);
}
},
getRegisteredTool(name = "search-the-web") {
return toolsByName.get(name) ?? registeredTool;
},
getActiveTools() { return activeTools; },
setActiveTools(tools: string[]) { activeTools = tools; },
writeTempFile: async (_content: string, _opts?: unknown) => "/tmp/search-out.txt",
};
return pi;
}
/** Call the search tool execute function with the given query. */
async function callSearch(
execute: (...args: any[]) => Promise<any>,
query: string,
callId = "call-1"
) {
const mockCtx = { ui: { notify() {} } };
return execute(callId, { query }, new AbortController().signal, () => {}, mockCtx);
}
// =============================================================================
// Tests
// =============================================================================
/**
* Each test file gets its own module registry, so the module-level loop guard
* state (lastSearchKey, consecutiveDupeCount) starts fresh here.
*/
test("search loop guard fires after MAX_CONSECUTIVE_DUPES duplicates", async (t) => {
process.env.BRAVE_API_KEY = "test-key-loop-guard";
delete process.env.TAVILY_API_KEY;
delete process.env.OLLAMA_API_KEY;
const restoreFetch = mockFetch(makeBraveResponse());
t.after(() => {
restoreFetch();
restoreSearchEnv();
});
const pi = createMockPI();
registerSearchTool(pi as any);
const tool = pi.getRegisteredTool();
assert.ok(tool, "search tool should be registered");
const execute = tool.execute.bind(tool);
// Call 1: first call should succeed (MAX_CONSECUTIVE_DUPES = 1)
const result1 = await callSearch(execute, "loop test query", "call-1");
assert.notEqual(result1.isError, true, "call 1 should not trigger loop guard");
// Call 2: identical query — guard fires immediately (threshold = 1)
const result2 = await callSearch(execute, "loop test query", "call-2");
assert.equal(result2.isError, true, "call 2 should trigger the loop guard");
assert.equal(result2.details?.errorKind, "search_loop");
assert.ok(
result2.content[0].text.includes("Search loop detected"),
"error message should mention search loop"
);
});
test("search loop guard resets at session_start boundary", async (t) => {
process.env.BRAVE_API_KEY = "test-key-loop-guard-session";
delete process.env.TAVILY_API_KEY;
delete process.env.OLLAMA_API_KEY;
const restoreFetch = mockFetch(makeBraveResponse());
const query = "session boundary query";
t.after(() => {
restoreFetch();
restoreSearchEnv();
});
const pi = createMockPI();
const mockCtx = {
hasUI: false,
ui: { notify() {} },
};
searchExtension(pi as any);
await pi.fire("session_start", {}, mockCtx);
const tool = pi.getRegisteredTool();
assert.ok(tool, "search tool should be registered");
const execute = tool.execute.bind(tool);
// Trigger guard in session 1 (call 1 succeeds, call 2 fires guard)
await callSearch(execute, query, "s1-call-1");
const guardResult = await callSearch(execute, query, "s1-call-2");
assert.equal(guardResult.isError, true, "session 1 should be guarded");
assert.equal(guardResult.details?.errorKind, "search_loop");
// New session should clear guard state
await pi.fire("session_start", {}, mockCtx);
const firstCallSession2 = await callSearch(execute, query, "s2-call-1");
assert.notEqual(
firstCallSession2.isError,
true,
"first identical query in a new session should not be blocked by prior session state",
);
});
test("search loop guard stays armed after firing — subsequent duplicates immediately re-trigger (#1671)", async (t) => {
process.env.BRAVE_API_KEY = "test-key-loop-guard-2";
delete process.env.TAVILY_API_KEY;
delete process.env.OLLAMA_API_KEY;
const restoreFetch = mockFetch(makeBraveResponse());
// Use a unique query so module-level state from previous test doesn't interfere
const query = "persistent loop query";
t.after(() => {
restoreFetch();
restoreSearchEnv();
});
const pi = createMockPI();
registerSearchTool(pi as any);
const tool = pi.getRegisteredTool();
const execute = tool.execute.bind(tool);
// Call 1 succeeds, call 2 fires guard (MAX_CONSECUTIVE_DUPES = 1)
await callSearch(execute, query, "call-1");
const guardFirst = await callSearch(execute, query, "call-2");
assert.equal(guardFirst.isError, true, "call 2 should trigger the loop guard");
// Key regression test: call 3 (and beyond) must ALSO trigger the guard.
// The original bug reset state on trigger, so call 3 was treated as a fresh
// first search and the loop restarted.
const guardSecond = await callSearch(execute, query, "call-3");
assert.equal(
guardSecond.isError, true,
"call 3 should STILL trigger the loop guard (guard must stay armed after firing)"
);
assert.equal(guardSecond.details?.errorKind, "search_loop");
// Call 4 as well — guard should keep firing
const guardThird = await callSearch(execute, query, "call-4");
assert.equal(
guardThird.isError, true,
"call 4 should STILL trigger the loop guard"
);
});
test("search loop guard resets cleanly when a different query is issued", async (t) => {
process.env.BRAVE_API_KEY = "test-key-loop-guard-3";
delete process.env.TAVILY_API_KEY;
delete process.env.OLLAMA_API_KEY;
const restoreFetch = mockFetch(makeBraveResponse());
const queryA = "query alpha reset test";
const queryB = "query beta reset test";
t.after(() => {
restoreFetch();
restoreSearchEnv();
});
const pi = createMockPI();
registerSearchTool(pi as any);
const tool = pi.getRegisteredTool();
const execute = tool.execute.bind(tool);
// Trigger guard for queryA (call 1 succeeds, call 2 fires guard)
await callSearch(execute, queryA, "call-a-1");
await callSearch(execute, queryA, "call-a-2");
// Issue a different query — should succeed (resets the duplicate counter)
const resultB = await callSearch(execute, queryB, "call-b-1");
assert.notEqual(
resultB.isError, true,
"a different query after guard should not be treated as a loop"
);
});
test("session search budget blocks after MAX_SEARCHES_PER_SESSION varied queries", async (t) => {
process.env.BRAVE_API_KEY = "test-key-budget";
delete process.env.TAVILY_API_KEY;
delete process.env.OLLAMA_API_KEY;
const restoreFetch = mockFetch(makeBraveResponse());
t.after(() => {
restoreFetch();
restoreSearchEnv();
});
// Reset guard state (including session budget) and register directly
resetSearchLoopGuardState();
const pi = createMockPI();
registerSearchTool(pi as any);
const tool = pi.getRegisteredTool();
assert.ok(tool, "search tool should be registered");
const execute = tool.execute.bind(tool);
// Issue 15 unique queries — all should succeed (budget = 15)
for (let i = 1; i <= 15; i++) {
const result = await callSearch(execute, `unique budget query ${i}`, `budget-${i}`);
assert.notEqual(result.isError, true, `query ${i} should succeed within budget`);
}
// Query 16: budget exhausted — should be blocked
const blocked = await callSearch(execute, "one more query", "budget-16");
assert.equal(blocked.isError, true, "query 16 should be blocked by budget");
assert.equal(blocked.details?.errorKind, "budget_exhausted");
assert.ok(
blocked.content[0].text.includes("Search budget exhausted"),
"error message should mention budget"
);
});
test("session search budget resets via resetSearchLoopGuardState", async (t) => {
process.env.BRAVE_API_KEY = "test-key-budget-reset";
delete process.env.TAVILY_API_KEY;
delete process.env.OLLAMA_API_KEY;
const restoreFetch = mockFetch(makeBraveResponse());
t.after(() => {
restoreFetch();
restoreSearchEnv();
});
// Reset and register directly
resetSearchLoopGuardState();
const pi = createMockPI();
registerSearchTool(pi as any);
const tool = pi.getRegisteredTool();
const execute = tool.execute.bind(tool);
// Exhaust budget
for (let i = 1; i <= 15; i++) {
await callSearch(execute, `budget reset query ${i}`, `br-${i}`);
}
const exhausted = await callSearch(execute, "exhausted query", "br-exhausted");
assert.equal(exhausted.isError, true, "budget should be exhausted");
// Reset simulates new session
resetSearchLoopGuardState();
const fresh = await callSearch(execute, "fresh session query", "br-fresh");
assert.notEqual(fresh.isError, true, "first query after reset should succeed");
});