perf(search): move web-search provider injection from extension hook to native middleware

- Create packages/coding-agent/src/core/providers/web-search-middleware.ts with
  WebSearchMiddleware class: injects web_search tool, enforces session budget (#1309),
  strips thinking blocks from history, and respects PREFERENCES.md search_provider.

- Wire webSearchMiddleware.applyToPayload into sdk.ts onPayload callback (before
  extension hook dispatch) so injection runs as compiled TypeScript with zero
  jiti-dispatch overhead.

- Export WebSearchMiddleware, webSearchMiddleware singleton, setPreferBraveResolver,
  CUSTOM_SEARCH_TOOL_NAMES, MAX_NATIVE_SEARCHES_PER_SESSION, and stripThinkingFromHistory
  from @singularity-forge/coding-agent so the extension can delegate to the same instance.

- Refactor search-the-web/native-search.js: remove self-contained injection logic;
  import and delegate before_provider_request to webSearchMiddleware singleton.
  Use tri-state isAnthropicProvider (null/false/true) to synthesize a provider hint
  when event.model is absent but model_select has already fired — prevents the
  model-name heuristic from wrongly injecting into Copilot claude-* requests.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Mikael Hugo 2026-05-10 22:37:42 +02:00
parent a798aa1f6e
commit 7227912a29
4 changed files with 289 additions and 134 deletions

View file

@ -0,0 +1,232 @@
/**
* web-search-middleware.ts native provider middleware for Anthropic web search injection.
*
* Purpose: inject the web_search tool into Anthropic API requests before they are sent,
* bypassing the jiti extension dispatch hot-path that fires on every LLM API call.
* Placing injection here means it runs as compiled TypeScript with zero extension-dispatch
* overhead, while remaining available to the search-the-web extension for delegation and
* test coverage.
*
* Consumer: sdk.ts onPayload callback (native hot-path) and the search-the-web extension's
* before_provider_request handler (delegation for test coverage and fallback).
*/
/** All custom search tool names that should be disabled when native search is active. */
export const CUSTOM_SEARCH_TOOL_NAMES = [
"search-the-web",
"search_and_read",
"google_search",
];
/**
* Maximum number of native web searches allowed per session (agent unit).
*
* Purpose: the Anthropic API's max_uses resets per-request. Without a session-level
* cap, pause_turn resubmit cycles allow unbounded total searches (#1309).
* 15 = 3 full turns of 5 searches each generous for research, but bounded.
*/
export const MAX_NATIVE_SEARCHES_PER_SESSION = 15;
/** Thinking block types that require signature validation by the API. */
const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]);
/**
* Module-level preference resolver, set by the search-the-web extension to expose
* PREFERENCES.md-based search_provider overrides. Falls back to env-var only when unset.
*
* Purpose: decouple the middleware from the extension's preference system while still
* respecting user-configured search provider overrides at injection time.
*/
let _preferBraveResolver: (() => boolean) | undefined;
/**
* Register a function that determines whether to prefer Brave/custom search over
* native Anthropic web search. Called by the search-the-web extension on activation
* to expose PREFERENCES.md-based search_provider overrides to the middleware.
*
* Consumer: search-the-web extension native-search.js, on registration.
*/
export function setPreferBraveResolver(fn: () => boolean): void {
_preferBraveResolver = fn;
}
/**
* Returns true when the user prefers Brave or another custom search provider over
* native Anthropic web search. Delegates to the registered resolver if available,
* otherwise falls back to the PREFER_BRAVE_SEARCH env var.
*/
function preferBraveSearch(): boolean {
if (_preferBraveResolver) return _preferBraveResolver();
return (
process.env.PREFER_BRAVE_SEARCH === "1" ||
process.env.PREFER_BRAVE_SEARCH === "true"
);
}
/**
* Strip thinking/redacted_thinking blocks from assistant messages in the conversation
* history.
*
* Purpose: the Pi SDK's streaming parser drops server_tool_use and
* web_search_tool_result content blocks (unknown types). When the conversation is
* replayed, assistant messages are incomplete the Anthropic API detects the
* modification and rejects the request with "thinking blocks cannot be modified."
* Removing thinking blocks from history avoids the rejection; the model generates
* fresh thinking for the current turn.
*
* Consumer: WebSearchMiddleware.applyToPayload, called on every Anthropic request.
*/
export function stripThinkingFromHistory(messages: unknown[]): void {
for (const msg of messages as Array<{ role?: string; content?: unknown }>) {
if (msg.role !== "assistant") continue;
const content = msg.content;
if (!Array.isArray(content)) continue;
msg.content = content.filter(
(block: unknown) =>
!THINKING_TYPES.has((block as { type?: string })?.type ?? ""),
);
}
}
/**
* Stateful per-session web search middleware.
*
* Purpose: inject the web_search tool definition into Anthropic API requests and enforce
* a session-level search budget so pause_turn resubmit loops cannot accumulate
* unbounded searches (#1309). A high-water mark prevents budget resets when context
* compaction removes web_search_tool_result blocks from the visible history.
*
* Consumer: sdk.ts createAgentSession (singleton instance, native hot-path) and the
* search-the-web extension's before_provider_request handler (delegation).
*/
export class WebSearchMiddleware {
private sessionSearchCount = 0;
/**
* Reset the session-level search counter to zero.
*
* Purpose: prevent budget from carrying over across sessions. Must be called on
* session_start and on each registerNativeSearchHooks invocation (test isolation).
*
* Consumer: session_start handler in native-search.js; registerNativeSearchHooks reset.
*/
resetSession(): void {
this.sessionSearchCount = 0;
}
/**
* Apply Anthropic web search injection to an API request payload. Mutates the
* payload in-place and returns it when changes were made, or undefined when no
* changes were needed (matching the extension before_provider_request return
* convention so the extension runner does not overwrite the payload reference).
*
* Consumer: sdk.ts onPayload callback (return value ignored, mutation used) and
* search-the-web extension before_provider_request handler (return value forwarded).
*/
applyToPayload(
payload: unknown,
model?: { provider?: string; id?: string },
): unknown {
if (!payload) return undefined;
const p = payload as Record<string, unknown>;
// Determine whether this is an Anthropic request. The model argument is the
// most reliable source (comes directly from the resolved Model). Without it,
// fall back to the model name heuristic, excluding known Claude-compatible
// providers that do NOT support the web_search tool (Copilot, MiniMax, Kimi).
let isAnthropic: boolean;
if (model?.provider) {
isAnthropic = model.provider === "anthropic";
} else {
const name = String(p.model ?? "").toLowerCase();
isAnthropic =
name.startsWith("claude-") &&
!name.includes("minimax") &&
!name.includes("kimi") &&
!name.includes("copilot");
}
if (!isAnthropic) return undefined;
// Strip thinking blocks from history to avoid signature validation errors
// caused by the SDK dropping server_tool_use/web_search_tool_result blocks.
const messages = p.messages;
if (Array.isArray(messages)) {
stripThinkingFromHistory(messages as unknown[]);
}
// When preferring Brave/custom search, skip native injection entirely.
if (preferBraveSearch()) return undefined;
if (!Array.isArray(p.tools)) p.tools = [];
let tools = p.tools as Array<Record<string, unknown>>;
// Don't double-inject if already present (e.g., sdk.ts ran before extension).
if (tools.some((t) => t.type === "web_search_20250305")) return undefined;
// Remove custom search tool definitions — native web_search is server-side
// and more reliable; keeping both confuses the model (#custom-search-conflict).
tools = tools.filter(
(t) => !CUSTOM_SEARCH_TOOL_NAMES.includes(t.name as string),
);
p.tools = tools;
// ── Session-level search budget (#1309, compaction-safe) ──────────────────
// Count web_search_tool_result blocks in history to determine how many native
// searches have already been used this session. Use the monotonic high-water
// mark: take the max of the history count and the running counter so budget
// does not reset when compaction removes result blocks from the visible history.
if (Array.isArray(messages)) {
let historySearchCount = 0;
for (const msg of messages as Array<{ content?: unknown }>) {
const content = msg.content;
if (!Array.isArray(content)) continue;
for (const block of content) {
if (
(block as { type?: string })?.type === "web_search_tool_result"
) {
historySearchCount++;
}
}
}
this.sessionSearchCount = Math.max(
this.sessionSearchCount,
historySearchCount,
);
}
const remaining = Math.max(
0,
MAX_NATIVE_SEARCHES_PER_SESSION - this.sessionSearchCount,
);
if (remaining <= 0) {
// Budget exhausted — don't inject the search tool at all.
// Return payload to signal it was processed (custom tools were filtered).
return payload;
}
tools.push({
type: "web_search_20250305",
name: "web_search",
// Cap per-request searches to the lesser of 5 (per-turn cap) or the
// remaining session budget (#1309).
max_uses: Math.min(5, remaining),
});
return payload;
}
}
/**
* Module-level singleton instance shared by sdk.ts (native hot-path) and the
* search-the-web extension (delegation + session reset).
*
* Purpose: a single counter ensures the session budget is accurate regardless of
* which code path processes each request. The extension resets the counter on
* session_start and on each registerNativeSearchHooks call (test isolation).
*
* Consumer: sdk.ts onPayload, search-the-web extension native-search.js.
*/
export const webSearchMiddleware = new WebSearchMiddleware();

View file

@ -1,6 +1,7 @@
import { existsSync } from "node:fs";
import { join } from "node:path";
import type { Model } from "@singularity-forge/ai";
import { webSearchMiddleware } from "./providers/web-search-middleware.js";
/**
* Lightweight PATH scan for the `claude` binary no subprocess, no network.
@ -433,6 +434,11 @@ export async function createAgentSession(
},
convertToLlm: convertToLlmWithBlockImages,
onPayload: async (payload, currentModel) => {
// Apply native web search middleware before dispatching to extension hooks.
// This is the hot-path: runs as compiled TypeScript, not through jiti dispatch.
// The extension's before_provider_request handler delegates here too; the
// double-injection guard (tools.some(web_search_20250305)) prevents re-injection.
webSearchMiddleware.applyToPayload(payload, currentModel);
const runner = extensionRunnerRef.current;
if (!runner?.hasHandlers("before_provider_request")) {
return payload;

View file

@ -215,6 +215,15 @@ export type {
ResourceLoader,
} from "./core/resource-loader.js";
export { DefaultResourceLoader } from "./core/resource-loader.js";
// Native provider middleware
export {
CUSTOM_SEARCH_TOOL_NAMES,
MAX_NATIVE_SEARCHES_PER_SESSION,
setPreferBraveResolver,
stripThinkingFromHistory,
WebSearchMiddleware,
webSearchMiddleware,
} from "./core/providers/web-search-middleware.js";
// SDK for programmatic usage
export {
type CreateAgentSessionOptions,

View file

@ -3,27 +3,27 @@
*
* Extracted from index.ts so it can be unit-tested without importing
* the heavy tool-registration modules.
*
* The core injection logic (before_provider_request) now lives in:
* packages/coding-agent/src/core/providers/web-search-middleware.ts
*
* This file exports the constants and functions needed by the extension and by tests,
* and delegates before_provider_request to the native middleware singleton so that
* (a) tests exercise the same code path as production and (b) PREFERENCES.md-based
* search_provider overrides are respected via setPreferBraveResolver.
*/
import {
CUSTOM_SEARCH_TOOL_NAMES,
MAX_NATIVE_SEARCHES_PER_SESSION,
setPreferBraveResolver,
stripThinkingFromHistory,
webSearchMiddleware,
} from "@singularity-forge/coding-agent";
import { resolveSearchProviderFromPreferences } from "../sf/preferences.js";
/** Tool names for the Brave-backed custom search tools */
export const BRAVE_TOOL_NAMES = ["search-the-web", "search_and_read"];
/** All custom search tool names that should be disabled when native search is active */
export const CUSTOM_SEARCH_TOOL_NAMES = [
"search-the-web",
"search_and_read",
"google_search",
];
/** Thinking block types that require signature validation by the API */
const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]);
/**
* Maximum number of native web searches allowed per session (agent unit).
* The Anthropic API's `max_uses` is per-request it resets on each API call.
* When `pause_turn` triggers a resubmit, the model gets a fresh budget.
* This session-level cap prevents unbounded search accumulation (#1309).
*
* 15 = 3 full turns of 5 searches each generous for research, but bounded.
*/
export const MAX_NATIVE_SEARCHES_PER_SESSION = 15;
export { CUSTOM_SEARCH_TOOL_NAMES, MAX_NATIVE_SEARCHES_PER_SESSION, stripThinkingFromHistory };
/**
* Returns true when the provider supports native Anthropic web_search injection.
*
@ -56,30 +56,6 @@ export function preferBraveSearch() {
process.env.PREFER_BRAVE_SEARCH === "true"
);
}
/**
* Strip thinking/redacted_thinking blocks from assistant messages in the
* conversation history.
*
* Why: The Pi SDK's streaming parser drops `server_tool_use` and
* `web_search_tool_result` content blocks (unknown types). When the
* conversation is replayed, the assistant messages are incomplete missing
* those blocks. The Anthropic API detects the modification and rejects the
* request with "thinking blocks cannot be modified."
*
* Fix: Remove thinking blocks from all assistant messages in the history.
* In Anthropic's Messages API, the messages array always ends with a user
* message, so every assistant message is from a previous turn that has been
* through a store/replay cycle. The model generates fresh thinking for the
* current turn regardless.
*/
export function stripThinkingFromHistory(messages) {
for (const msg of messages) {
if (msg.role !== "assistant") continue;
const content = msg.content;
if (!Array.isArray(content)) continue;
msg.content = content.filter((block) => !THINKING_TYPES.has(block?.type));
}
}
/**
* Register model_select, before_provider_request, and session_start hooks
* for native Anthropic web search injection.
@ -87,16 +63,19 @@ export function stripThinkingFromHistory(messages) {
* Returns the isAnthropicProvider getter for testing.
*/
export function registerNativeSearchHooks(pi) {
let isAnthropicProvider = false;
let modelSelectFired = false;
// Session-level native search counter (#1309).
// Tracks cumulative web_search_tool_result blocks across all turns in a session.
// Reset on session_start. Used to compute remaining budget for max_uses.
let sessionSearchCount = 0;
// null = unknown (model_select not yet fired); true/false = provider is/isn't Anthropic.
let isAnthropicProvider = null;
// Register the PREFERENCES.md-aware resolver so the native middleware (shared
// singleton in web-search-middleware.ts) respects search_provider overrides.
// Called here so each test invocation resets the resolver to the current context.
setPreferBraveResolver(preferBraveSearch);
// Reset the shared middleware session counter for this registration.
// In tests, each registerNativeSearchHooks() call starts a fresh counter.
// In production, the session_start handler below resets it on each new session.
webSearchMiddleware.resetSession();
// Track provider changes via model selection — also handles diagnostics
// since model_select fires AFTER session_start and knows the provider.
pi.on("model_select", async (event, ctx) => {
modelSelectFired = true;
const wasAnthropic = isAnthropicProvider;
isAnthropicProvider = event.model.provider === "anthropic";
const hasSearchKey = !!(
@ -148,97 +127,26 @@ export function registerNativeSearchHooks(pi) {
);
}
});
// Inject native web search into Anthropic API requests
// before_provider_request is now handled natively by WebSearchMiddleware in sdk.ts.
// This handler delegates to the same singleton so that:
// (a) existing tests continue to exercise the injection logic end-to-end, and
// (b) the double-injection guard (tools.some(web_search_20250305)) is a no-op
// in production where sdk.ts already ran the middleware first.
//
// When event.model is absent but model_select has already run (isAnthropicProvider
// is not null), synthesize a provider hint from the cached state so the middleware
// does not fall back to the model-name heuristic and wrongly inject into Copilot
// claude-* requests (#copilot-false-positive).
pi.on("before_provider_request", (event) => {
const payload = event.payload;
if (!payload) return;
// Detect Anthropic provider. Use the model object from the event (most
// reliable — comes directly from the resolved Model), then fall back to
// the model_select flag, then to the model name heuristic (last resort).
// The model name heuristic is needed for session restores where
// modelsAreEqual suppresses model_select AND the SDK doesn't pass model.
const eventModel = event.model;
let isAnthropic;
if (eventModel?.provider) {
isAnthropic = eventModel.provider === "anthropic";
} else if (modelSelectFired) {
isAnthropic = isAnthropicProvider;
} else {
// No provider info available and no model_select event fired.
// Heuristic: models starting with `claude-` are usually Anthropic,
// but we must exclude known clones (github-copilot, minimax, kimi)
// that use the same naming but don't support native web_search.
const name = String(payload.model ?? "").toLowerCase();
isAnthropic =
name.startsWith("claude-") &&
!name.includes("minimax") &&
!name.includes("kimi") &&
!name.includes("copilot");
let modelHint = event.model;
if (!modelHint && isAnthropicProvider !== null) {
modelHint = { provider: isAnthropicProvider ? "anthropic" : "not-anthropic" };
}
if (!isAnthropic) return;
// Strip thinking blocks from history to avoid signature validation errors
// caused by the SDK dropping server_tool_use/web_search_tool_result blocks.
const messages = payload.messages;
if (Array.isArray(messages)) {
stripThinkingFromHistory(messages);
}
// When preferring Brave, skip native search injection entirely
if (preferBraveSearch()) return;
if (!Array.isArray(payload.tools)) payload.tools = [];
let tools = payload.tools;
// Don't double-inject if already present
if (tools.some((t) => t.type === "web_search_20250305")) return;
// Remove custom search tool definitions from Anthropic requests.
// Native web_search is server-side and more reliable — keeping both confuses
// the model and causes it to pick custom tools which can fail with network errors.
tools = tools.filter((t) => !CUSTOM_SEARCH_TOOL_NAMES.includes(t.name));
payload.tools = tools;
// ── Session-level search budget (#1309, #compaction-safe) ─────────────
// Count web_search_tool_result blocks in the conversation history to
// determine how many native searches have already been used this session.
// The Anthropic API's max_uses resets per request, so without this guard,
// pause_turn → resubmit cycles allow unlimited total searches.
//
// Use the monotonic high-water mark: take the max of the history count
// and the running counter. This prevents budget resets when context
// compaction removes web_search_tool_result blocks from history.
if (Array.isArray(messages)) {
let historySearchCount = 0;
for (const msg of messages) {
const content = msg.content;
if (!Array.isArray(content)) continue;
for (const block of content) {
if (block?.type === "web_search_tool_result") {
historySearchCount++;
}
}
}
// High-water mark: never decrease the counter, even if compaction
// removes web_search_tool_result blocks from the visible history.
sessionSearchCount = Math.max(sessionSearchCount, historySearchCount);
}
const remaining = Math.max(
0,
MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount,
);
if (remaining <= 0) {
// Budget exhausted — don't inject the search tool at all.
// The model will proceed without web search capability.
return payload;
}
tools.push({
type: "web_search_20250305",
name: "web_search",
// Cap per-request searches to the lesser of 5 (per-turn cap) or the
// remaining session budget (#1309). This prevents the model from
// consuming unlimited searches via pause_turn → resubmit cycles.
max_uses: Math.min(5, remaining),
});
return payload;
return webSearchMiddleware.applyToPayload(event.payload, modelHint);
});
pi.on("session_start", async (_event, _ctx) => {
// Reset session-level search budget (#1309)
sessionSearchCount = 0;
// Reset the shared middleware session budget (#1309).
webSearchMiddleware.resetSession();
});
return { getIsAnthropic: () => isAnthropicProvider };
}