diff --git a/packages/coding-agent/src/core/providers/web-search-middleware.ts b/packages/coding-agent/src/core/providers/web-search-middleware.ts new file mode 100644 index 000000000..f35db8ca9 --- /dev/null +++ b/packages/coding-agent/src/core/providers/web-search-middleware.ts @@ -0,0 +1,232 @@ +/** + * web-search-middleware.ts — native provider middleware for Anthropic web search injection. + * + * Purpose: inject the web_search tool into Anthropic API requests before they are sent, + * bypassing the jiti extension dispatch hot-path that fires on every LLM API call. + * Placing injection here means it runs as compiled TypeScript with zero extension-dispatch + * overhead, while remaining available to the search-the-web extension for delegation and + * test coverage. + * + * Consumer: sdk.ts onPayload callback (native hot-path) and the search-the-web extension's + * before_provider_request handler (delegation for test coverage and fallback). + */ + +/** All custom search tool names that should be disabled when native search is active. */ +export const CUSTOM_SEARCH_TOOL_NAMES = [ + "search-the-web", + "search_and_read", + "google_search", +]; + +/** + * Maximum number of native web searches allowed per session (agent unit). + * + * Purpose: the Anthropic API's max_uses resets per-request. Without a session-level + * cap, pause_turn → resubmit cycles allow unbounded total searches (#1309). + * 15 = 3 full turns of 5 searches each — generous for research, but bounded. + */ +export const MAX_NATIVE_SEARCHES_PER_SESSION = 15; + +/** Thinking block types that require signature validation by the API. */ +const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]); + +/** + * Module-level preference resolver, set by the search-the-web extension to expose + * PREFERENCES.md-based search_provider overrides. Falls back to env-var only when unset. + * + * Purpose: decouple the middleware from the extension's preference system while still + * respecting user-configured search provider overrides at injection time. + */ +let _preferBraveResolver: (() => boolean) | undefined; + +/** + * Register a function that determines whether to prefer Brave/custom search over + * native Anthropic web search. Called by the search-the-web extension on activation + * to expose PREFERENCES.md-based search_provider overrides to the middleware. + * + * Consumer: search-the-web extension native-search.js, on registration. + */ +export function setPreferBraveResolver(fn: () => boolean): void { + _preferBraveResolver = fn; +} + +/** + * Returns true when the user prefers Brave or another custom search provider over + * native Anthropic web search. Delegates to the registered resolver if available, + * otherwise falls back to the PREFER_BRAVE_SEARCH env var. + */ +function preferBraveSearch(): boolean { + if (_preferBraveResolver) return _preferBraveResolver(); + return ( + process.env.PREFER_BRAVE_SEARCH === "1" || + process.env.PREFER_BRAVE_SEARCH === "true" + ); +} + +/** + * Strip thinking/redacted_thinking blocks from assistant messages in the conversation + * history. + * + * Purpose: the Pi SDK's streaming parser drops server_tool_use and + * web_search_tool_result content blocks (unknown types). When the conversation is + * replayed, assistant messages are incomplete — the Anthropic API detects the + * modification and rejects the request with "thinking blocks cannot be modified." + * Removing thinking blocks from history avoids the rejection; the model generates + * fresh thinking for the current turn. + * + * Consumer: WebSearchMiddleware.applyToPayload, called on every Anthropic request. + */ +export function stripThinkingFromHistory(messages: unknown[]): void { + for (const msg of messages as Array<{ role?: string; content?: unknown }>) { + if (msg.role !== "assistant") continue; + const content = msg.content; + if (!Array.isArray(content)) continue; + msg.content = content.filter( + (block: unknown) => + !THINKING_TYPES.has((block as { type?: string })?.type ?? ""), + ); + } +} + +/** + * Stateful per-session web search middleware. + * + * Purpose: inject the web_search tool definition into Anthropic API requests and enforce + * a session-level search budget so pause_turn → resubmit loops cannot accumulate + * unbounded searches (#1309). A high-water mark prevents budget resets when context + * compaction removes web_search_tool_result blocks from the visible history. + * + * Consumer: sdk.ts createAgentSession (singleton instance, native hot-path) and the + * search-the-web extension's before_provider_request handler (delegation). + */ +export class WebSearchMiddleware { + private sessionSearchCount = 0; + + /** + * Reset the session-level search counter to zero. + * + * Purpose: prevent budget from carrying over across sessions. Must be called on + * session_start and on each registerNativeSearchHooks invocation (test isolation). + * + * Consumer: session_start handler in native-search.js; registerNativeSearchHooks reset. + */ + resetSession(): void { + this.sessionSearchCount = 0; + } + + /** + * Apply Anthropic web search injection to an API request payload. Mutates the + * payload in-place and returns it when changes were made, or undefined when no + * changes were needed (matching the extension before_provider_request return + * convention so the extension runner does not overwrite the payload reference). + * + * Consumer: sdk.ts onPayload callback (return value ignored, mutation used) and + * search-the-web extension before_provider_request handler (return value forwarded). + */ + applyToPayload( + payload: unknown, + model?: { provider?: string; id?: string }, + ): unknown { + if (!payload) return undefined; + + const p = payload as Record; + + // Determine whether this is an Anthropic request. The model argument is the + // most reliable source (comes directly from the resolved Model). Without it, + // fall back to the model name heuristic, excluding known Claude-compatible + // providers that do NOT support the web_search tool (Copilot, MiniMax, Kimi). + let isAnthropic: boolean; + if (model?.provider) { + isAnthropic = model.provider === "anthropic"; + } else { + const name = String(p.model ?? "").toLowerCase(); + isAnthropic = + name.startsWith("claude-") && + !name.includes("minimax") && + !name.includes("kimi") && + !name.includes("copilot"); + } + + if (!isAnthropic) return undefined; + + // Strip thinking blocks from history to avoid signature validation errors + // caused by the SDK dropping server_tool_use/web_search_tool_result blocks. + const messages = p.messages; + if (Array.isArray(messages)) { + stripThinkingFromHistory(messages as unknown[]); + } + + // When preferring Brave/custom search, skip native injection entirely. + if (preferBraveSearch()) return undefined; + + if (!Array.isArray(p.tools)) p.tools = []; + let tools = p.tools as Array>; + + // Don't double-inject if already present (e.g., sdk.ts ran before extension). + if (tools.some((t) => t.type === "web_search_20250305")) return undefined; + + // Remove custom search tool definitions — native web_search is server-side + // and more reliable; keeping both confuses the model (#custom-search-conflict). + tools = tools.filter( + (t) => !CUSTOM_SEARCH_TOOL_NAMES.includes(t.name as string), + ); + p.tools = tools; + + // ── Session-level search budget (#1309, compaction-safe) ────────────────── + // Count web_search_tool_result blocks in history to determine how many native + // searches have already been used this session. Use the monotonic high-water + // mark: take the max of the history count and the running counter so budget + // does not reset when compaction removes result blocks from the visible history. + if (Array.isArray(messages)) { + let historySearchCount = 0; + for (const msg of messages as Array<{ content?: unknown }>) { + const content = msg.content; + if (!Array.isArray(content)) continue; + for (const block of content) { + if ( + (block as { type?: string })?.type === "web_search_tool_result" + ) { + historySearchCount++; + } + } + } + this.sessionSearchCount = Math.max( + this.sessionSearchCount, + historySearchCount, + ); + } + + const remaining = Math.max( + 0, + MAX_NATIVE_SEARCHES_PER_SESSION - this.sessionSearchCount, + ); + + if (remaining <= 0) { + // Budget exhausted — don't inject the search tool at all. + // Return payload to signal it was processed (custom tools were filtered). + return payload; + } + + tools.push({ + type: "web_search_20250305", + name: "web_search", + // Cap per-request searches to the lesser of 5 (per-turn cap) or the + // remaining session budget (#1309). + max_uses: Math.min(5, remaining), + }); + + return payload; + } +} + +/** + * Module-level singleton instance shared by sdk.ts (native hot-path) and the + * search-the-web extension (delegation + session reset). + * + * Purpose: a single counter ensures the session budget is accurate regardless of + * which code path processes each request. The extension resets the counter on + * session_start and on each registerNativeSearchHooks call (test isolation). + * + * Consumer: sdk.ts onPayload, search-the-web extension native-search.js. + */ +export const webSearchMiddleware = new WebSearchMiddleware(); diff --git a/packages/coding-agent/src/core/sdk.ts b/packages/coding-agent/src/core/sdk.ts index 6481175f8..14fb807d1 100644 --- a/packages/coding-agent/src/core/sdk.ts +++ b/packages/coding-agent/src/core/sdk.ts @@ -1,6 +1,7 @@ import { existsSync } from "node:fs"; import { join } from "node:path"; import type { Model } from "@singularity-forge/ai"; +import { webSearchMiddleware } from "./providers/web-search-middleware.js"; /** * Lightweight PATH scan for the `claude` binary — no subprocess, no network. @@ -433,6 +434,11 @@ export async function createAgentSession( }, convertToLlm: convertToLlmWithBlockImages, onPayload: async (payload, currentModel) => { + // Apply native web search middleware before dispatching to extension hooks. + // This is the hot-path: runs as compiled TypeScript, not through jiti dispatch. + // The extension's before_provider_request handler delegates here too; the + // double-injection guard (tools.some(web_search_20250305)) prevents re-injection. + webSearchMiddleware.applyToPayload(payload, currentModel); const runner = extensionRunnerRef.current; if (!runner?.hasHandlers("before_provider_request")) { return payload; diff --git a/packages/coding-agent/src/index.ts b/packages/coding-agent/src/index.ts index ebfb62496..8defcb51b 100644 --- a/packages/coding-agent/src/index.ts +++ b/packages/coding-agent/src/index.ts @@ -215,6 +215,15 @@ export type { ResourceLoader, } from "./core/resource-loader.js"; export { DefaultResourceLoader } from "./core/resource-loader.js"; +// Native provider middleware +export { + CUSTOM_SEARCH_TOOL_NAMES, + MAX_NATIVE_SEARCHES_PER_SESSION, + setPreferBraveResolver, + stripThinkingFromHistory, + WebSearchMiddleware, + webSearchMiddleware, +} from "./core/providers/web-search-middleware.js"; // SDK for programmatic usage export { type CreateAgentSessionOptions, diff --git a/src/resources/extensions/search-the-web/native-search.js b/src/resources/extensions/search-the-web/native-search.js index 6cc279473..2d4052a68 100644 --- a/src/resources/extensions/search-the-web/native-search.js +++ b/src/resources/extensions/search-the-web/native-search.js @@ -3,27 +3,27 @@ * * Extracted from index.ts so it can be unit-tested without importing * the heavy tool-registration modules. + * + * The core injection logic (before_provider_request) now lives in: + * packages/coding-agent/src/core/providers/web-search-middleware.ts + * + * This file exports the constants and functions needed by the extension and by tests, + * and delegates before_provider_request to the native middleware singleton so that + * (a) tests exercise the same code path as production and (b) PREFERENCES.md-based + * search_provider overrides are respected via setPreferBraveResolver. */ +import { + CUSTOM_SEARCH_TOOL_NAMES, + MAX_NATIVE_SEARCHES_PER_SESSION, + setPreferBraveResolver, + stripThinkingFromHistory, + webSearchMiddleware, +} from "@singularity-forge/coding-agent"; import { resolveSearchProviderFromPreferences } from "../sf/preferences.js"; /** Tool names for the Brave-backed custom search tools */ export const BRAVE_TOOL_NAMES = ["search-the-web", "search_and_read"]; /** All custom search tool names that should be disabled when native search is active */ -export const CUSTOM_SEARCH_TOOL_NAMES = [ - "search-the-web", - "search_and_read", - "google_search", -]; -/** Thinking block types that require signature validation by the API */ -const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]); -/** - * Maximum number of native web searches allowed per session (agent unit). - * The Anthropic API's `max_uses` is per-request — it resets on each API call. - * When `pause_turn` triggers a resubmit, the model gets a fresh budget. - * This session-level cap prevents unbounded search accumulation (#1309). - * - * 15 = 3 full turns of 5 searches each — generous for research, but bounded. - */ -export const MAX_NATIVE_SEARCHES_PER_SESSION = 15; +export { CUSTOM_SEARCH_TOOL_NAMES, MAX_NATIVE_SEARCHES_PER_SESSION, stripThinkingFromHistory }; /** * Returns true when the provider supports native Anthropic web_search injection. * @@ -56,30 +56,6 @@ export function preferBraveSearch() { process.env.PREFER_BRAVE_SEARCH === "true" ); } -/** - * Strip thinking/redacted_thinking blocks from assistant messages in the - * conversation history. - * - * Why: The Pi SDK's streaming parser drops `server_tool_use` and - * `web_search_tool_result` content blocks (unknown types). When the - * conversation is replayed, the assistant messages are incomplete — missing - * those blocks. The Anthropic API detects the modification and rejects the - * request with "thinking blocks cannot be modified." - * - * Fix: Remove thinking blocks from all assistant messages in the history. - * In Anthropic's Messages API, the messages array always ends with a user - * message, so every assistant message is from a previous turn that has been - * through a store/replay cycle. The model generates fresh thinking for the - * current turn regardless. - */ -export function stripThinkingFromHistory(messages) { - for (const msg of messages) { - if (msg.role !== "assistant") continue; - const content = msg.content; - if (!Array.isArray(content)) continue; - msg.content = content.filter((block) => !THINKING_TYPES.has(block?.type)); - } -} /** * Register model_select, before_provider_request, and session_start hooks * for native Anthropic web search injection. @@ -87,16 +63,19 @@ export function stripThinkingFromHistory(messages) { * Returns the isAnthropicProvider getter for testing. */ export function registerNativeSearchHooks(pi) { - let isAnthropicProvider = false; - let modelSelectFired = false; - // Session-level native search counter (#1309). - // Tracks cumulative web_search_tool_result blocks across all turns in a session. - // Reset on session_start. Used to compute remaining budget for max_uses. - let sessionSearchCount = 0; + // null = unknown (model_select not yet fired); true/false = provider is/isn't Anthropic. + let isAnthropicProvider = null; + // Register the PREFERENCES.md-aware resolver so the native middleware (shared + // singleton in web-search-middleware.ts) respects search_provider overrides. + // Called here so each test invocation resets the resolver to the current context. + setPreferBraveResolver(preferBraveSearch); + // Reset the shared middleware session counter for this registration. + // In tests, each registerNativeSearchHooks() call starts a fresh counter. + // In production, the session_start handler below resets it on each new session. + webSearchMiddleware.resetSession(); // Track provider changes via model selection — also handles diagnostics // since model_select fires AFTER session_start and knows the provider. pi.on("model_select", async (event, ctx) => { - modelSelectFired = true; const wasAnthropic = isAnthropicProvider; isAnthropicProvider = event.model.provider === "anthropic"; const hasSearchKey = !!( @@ -148,97 +127,26 @@ export function registerNativeSearchHooks(pi) { ); } }); - // Inject native web search into Anthropic API requests + // before_provider_request is now handled natively by WebSearchMiddleware in sdk.ts. + // This handler delegates to the same singleton so that: + // (a) existing tests continue to exercise the injection logic end-to-end, and + // (b) the double-injection guard (tools.some(web_search_20250305)) is a no-op + // in production where sdk.ts already ran the middleware first. + // + // When event.model is absent but model_select has already run (isAnthropicProvider + // is not null), synthesize a provider hint from the cached state so the middleware + // does not fall back to the model-name heuristic and wrongly inject into Copilot + // claude-* requests (#copilot-false-positive). pi.on("before_provider_request", (event) => { - const payload = event.payload; - if (!payload) return; - // Detect Anthropic provider. Use the model object from the event (most - // reliable — comes directly from the resolved Model), then fall back to - // the model_select flag, then to the model name heuristic (last resort). - // The model name heuristic is needed for session restores where - // modelsAreEqual suppresses model_select AND the SDK doesn't pass model. - const eventModel = event.model; - let isAnthropic; - if (eventModel?.provider) { - isAnthropic = eventModel.provider === "anthropic"; - } else if (modelSelectFired) { - isAnthropic = isAnthropicProvider; - } else { - // No provider info available and no model_select event fired. - // Heuristic: models starting with `claude-` are usually Anthropic, - // but we must exclude known clones (github-copilot, minimax, kimi) - // that use the same naming but don't support native web_search. - const name = String(payload.model ?? "").toLowerCase(); - isAnthropic = - name.startsWith("claude-") && - !name.includes("minimax") && - !name.includes("kimi") && - !name.includes("copilot"); + let modelHint = event.model; + if (!modelHint && isAnthropicProvider !== null) { + modelHint = { provider: isAnthropicProvider ? "anthropic" : "not-anthropic" }; } - if (!isAnthropic) return; - // Strip thinking blocks from history to avoid signature validation errors - // caused by the SDK dropping server_tool_use/web_search_tool_result blocks. - const messages = payload.messages; - if (Array.isArray(messages)) { - stripThinkingFromHistory(messages); - } - // When preferring Brave, skip native search injection entirely - if (preferBraveSearch()) return; - if (!Array.isArray(payload.tools)) payload.tools = []; - let tools = payload.tools; - // Don't double-inject if already present - if (tools.some((t) => t.type === "web_search_20250305")) return; - // Remove custom search tool definitions from Anthropic requests. - // Native web_search is server-side and more reliable — keeping both confuses - // the model and causes it to pick custom tools which can fail with network errors. - tools = tools.filter((t) => !CUSTOM_SEARCH_TOOL_NAMES.includes(t.name)); - payload.tools = tools; - // ── Session-level search budget (#1309, #compaction-safe) ───────────── - // Count web_search_tool_result blocks in the conversation history to - // determine how many native searches have already been used this session. - // The Anthropic API's max_uses resets per request, so without this guard, - // pause_turn → resubmit cycles allow unlimited total searches. - // - // Use the monotonic high-water mark: take the max of the history count - // and the running counter. This prevents budget resets when context - // compaction removes web_search_tool_result blocks from history. - if (Array.isArray(messages)) { - let historySearchCount = 0; - for (const msg of messages) { - const content = msg.content; - if (!Array.isArray(content)) continue; - for (const block of content) { - if (block?.type === "web_search_tool_result") { - historySearchCount++; - } - } - } - // High-water mark: never decrease the counter, even if compaction - // removes web_search_tool_result blocks from the visible history. - sessionSearchCount = Math.max(sessionSearchCount, historySearchCount); - } - const remaining = Math.max( - 0, - MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount, - ); - if (remaining <= 0) { - // Budget exhausted — don't inject the search tool at all. - // The model will proceed without web search capability. - return payload; - } - tools.push({ - type: "web_search_20250305", - name: "web_search", - // Cap per-request searches to the lesser of 5 (per-turn cap) or the - // remaining session budget (#1309). This prevents the model from - // consuming unlimited searches via pause_turn → resubmit cycles. - max_uses: Math.min(5, remaining), - }); - return payload; + return webSearchMiddleware.applyToPayload(event.payload, modelHint); }); pi.on("session_start", async (_event, _ctx) => { - // Reset session-level search budget (#1309) - sessionSearchCount = 0; + // Reset the shared middleware session budget (#1309). + webSearchMiddleware.resetSession(); }); return { getIsAnthropic: () => isAnthropicProvider }; }