diff --git a/src/resources/extensions/search-the-web/native-search.ts b/src/resources/extensions/search-the-web/native-search.ts index 73edba843..3b6cd23d6 100644 --- a/src/resources/extensions/search-the-web/native-search.ts +++ b/src/resources/extensions/search-the-web/native-search.ts @@ -8,6 +8,9 @@ /** Tool names for the Brave-backed custom search tools */ export const BRAVE_TOOL_NAMES = ["search-the-web", "search_and_read"]; +/** Thinking block types that require signature validation by the API */ +const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]); + /** Minimal interface matching the subset of ExtensionAPI we use */ export interface NativeSearchPI { on(event: string, handler: (...args: any[]) => any): void; @@ -15,6 +18,37 @@ export interface NativeSearchPI { setActiveTools(tools: string[]): void; } +/** + * Strip thinking/redacted_thinking blocks from assistant messages in the + * conversation history. + * + * Why: The Pi SDK's streaming parser drops `server_tool_use` and + * `web_search_tool_result` content blocks (unknown types). When the + * conversation is replayed, the assistant messages are incomplete — missing + * those blocks. The Anthropic API detects the modification and rejects the + * request with "thinking blocks cannot be modified." + * + * Fix: Remove thinking blocks from all assistant messages in the history. + * In Anthropic's Messages API, the messages array always ends with a user + * message, so every assistant message is from a previous turn that has been + * through a store/replay cycle. The model generates fresh thinking for the + * current turn regardless. + */ +export function stripThinkingFromHistory( + messages: Array> +): void { + for (const msg of messages) { + if (msg.role !== "assistant") continue; + + const content = msg.content; + if (!Array.isArray(content)) continue; + + msg.content = content.filter( + (block: any) => !THINKING_TYPES.has(block?.type) + ); + } +} + /** * Register model_select, before_provider_request, and session_start hooks * for native Anthropic web search injection. @@ -24,8 +58,9 @@ export interface NativeSearchPI { export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: () => boolean } { let isAnthropicProvider = false; - // Track provider changes via model selection - pi.on("model_select", async (event: any, _ctx: any) => { + // Track provider changes via model selection — also handles diagnostics + // since model_select fires AFTER session_start and knows the provider. + pi.on("model_select", async (event: any, ctx: any) => { const wasAnthropic = isAnthropicProvider; isAnthropicProvider = event.model.provider === "anthropic"; @@ -43,6 +78,16 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: const active = pi.getActiveTools(); pi.setActiveTools([...active, ...BRAVE_TOOL_NAMES]); } + + // Show provider-aware diagnostics on first selection or provider change + if (isAnthropicProvider && !wasAnthropic) { + ctx.ui.notify("Native Anthropic web search active", "info"); + } else if (!isAnthropicProvider && !hasBrave) { + ctx.ui.notify( + "Web search: Set BRAVE_API_KEY or use an Anthropic model for built-in search", + "warning" + ); + } }); // Inject native web search into Anthropic API requests @@ -57,6 +102,13 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: // Keep provider tracking in sync isAnthropicProvider = true; + // Strip thinking blocks from history to avoid signature validation errors + // caused by the SDK dropping server_tool_use/web_search_tool_result blocks. + const messages = payload.messages as Array> | undefined; + if (Array.isArray(messages)) { + stripThinkingFromHistory(messages); + } + if (!Array.isArray(payload.tools)) payload.tools = []; // Don't double-inject if already present @@ -71,26 +123,17 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: return payload; }); - // Startup diagnostics + // Basic startup diagnostics — provider-specific info comes from model_select pi.on("session_start", async (_event: any, ctx: any) => { const hasBrave = !!process.env.BRAVE_API_KEY; const hasJina = !!process.env.JINA_API_KEY; const hasAnswers = !!process.env.BRAVE_ANSWERS_KEY; const parts: string[] = ["Web search v4 loaded"]; - - if (isAnthropicProvider) parts.push("Native search ✓"); if (hasBrave) parts.push("Brave ✓"); if (hasAnswers) parts.push("Answers ✓"); if (hasJina) parts.push("Jina ✓"); - if (!isAnthropicProvider && !hasBrave) { - ctx.ui.notify( - "Web search: Set BRAVE_API_KEY or use an Anthropic model for built-in search", - "warning" - ); - } - ctx.ui.notify(parts.join(" · "), "info"); }); diff --git a/src/tests/native-search.test.ts b/src/tests/native-search.test.ts index c4c413d32..b84db2aea 100644 --- a/src/tests/native-search.test.ts +++ b/src/tests/native-search.test.ts @@ -2,6 +2,7 @@ import test from "node:test"; import assert from "node:assert/strict"; import { registerNativeSearchHooks, + stripThinkingFromHistory, BRAVE_TOOL_NAMES, type NativeSearchPI, } from "../resources/extensions/search-the-web/native-search.ts"; @@ -241,29 +242,28 @@ test("model_select re-enables Brave tools when switching away from Anthropic", a } }); -test("session_start shows 'Native search' when Anthropic provider", async () => { +test("model_select shows 'Native Anthropic web search active' for Anthropic provider", async () => { const pi = createMockPI(); registerNativeSearchHooks(pi); - // Simulate an Anthropic request so isAnthropicProvider becomes true - await pi.fire("before_provider_request", { - type: "before_provider_request", - payload: { model: "claude-sonnet-4-6-20250514", tools: [] }, + await pi.fire("model_select", { + type: "model_select", + model: { provider: "anthropic", name: "claude-sonnet-4-6" }, + previousModel: undefined, + source: "set", }); - await pi.fire("session_start", { type: "session_start" }); - const infoNotif = pi.notifications.find( - (n) => n.level === "info" && n.message.includes("v4") + (n) => n.level === "info" && n.message.includes("Native") ); - assert.ok(infoNotif, "Should have v4 info notification"); + assert.ok(infoNotif, "Should notify about native search on Anthropic model_select"); assert.ok( - infoNotif!.message.includes("Native search"), - `Should include 'Native search' — got: ${infoNotif!.message}` + infoNotif!.message.includes("Native Anthropic web search active"), + `Should say 'Native Anthropic web search active' — got: ${infoNotif!.message}` ); }); -test("session_start shows warning when no Anthropic and no Brave key", async () => { +test("model_select shows warning for non-Anthropic without Brave key", async () => { const originalKey = process.env.BRAVE_API_KEY; delete process.env.BRAVE_API_KEY; @@ -271,11 +271,15 @@ test("session_start shows warning when no Anthropic and no Brave key", async () const pi = createMockPI(); registerNativeSearchHooks(pi); - // Don't fire any model/request events — isAnthropicProvider stays false - await pi.fire("session_start", { type: "session_start" }); + await pi.fire("model_select", { + type: "model_select", + model: { provider: "openai", name: "gpt-4o" }, + previousModel: undefined, + source: "set", + }); const warning = pi.notifications.find((n) => n.level === "warning"); - assert.ok(warning, "Should show warning when no Anthropic and no Brave key"); + assert.ok(warning, "Should show warning for non-Anthropic without Brave key"); assert.ok( warning!.message.includes("Anthropic"), `Warning should mention Anthropic — got: ${warning!.message}` @@ -286,7 +290,23 @@ test("session_start shows warning when no Anthropic and no Brave key", async () } }); -test("session_start does NOT show warning when Brave key present", async () => { +test("session_start shows v4 loaded message", async () => { + const pi = createMockPI(); + registerNativeSearchHooks(pi); + + await pi.fire("session_start", { type: "session_start" }); + + const infoNotif = pi.notifications.find( + (n) => n.level === "info" && n.message.includes("v4") + ); + assert.ok(infoNotif, "Should have v4 info notification"); + assert.ok( + infoNotif!.message.startsWith("Web search v4 loaded"), + `Should start with 'Web search v4 loaded' — got: ${infoNotif!.message}` + ); +}); + +test("session_start shows Brave status when key present", async () => { const originalKey = process.env.BRAVE_API_KEY; process.env.BRAVE_API_KEY = "test-key"; @@ -296,11 +316,11 @@ test("session_start does NOT show warning when Brave key present", async () => { await pi.fire("session_start", { type: "session_start" }); - const warning = pi.notifications.find((n) => n.level === "warning"); - assert.equal(warning, undefined, "Should NOT show warning when Brave key is present"); - const info = pi.notifications.find((n) => n.level === "info"); assert.ok(info!.message.includes("Brave"), "Should mention Brave in status"); + + const warning = pi.notifications.find((n) => n.level === "warning"); + assert.equal(warning, undefined, "Should NOT show warning when Brave key is present"); } finally { if (originalKey) process.env.BRAVE_API_KEY = originalKey; else delete process.env.BRAVE_API_KEY; @@ -310,3 +330,117 @@ test("session_start does NOT show warning when Brave key present", async () => { test("BRAVE_TOOL_NAMES contains expected tool names", () => { assert.deepEqual(BRAVE_TOOL_NAMES, ["search-the-web", "search_and_read"]); }); + +// ─── stripThinkingFromHistory tests ───────────────────────────────────────── + +test("stripThinkingFromHistory removes thinking from earlier assistant messages", () => { + const messages: any[] = [ + { role: "user", content: "hello" }, + { + role: "assistant", + content: [ + { type: "thinking", thinking: "hmm", signature: "sig1" }, + { type: "text", text: "Hi there" }, + ], + }, + { role: "user", content: "search something" }, + ]; + + stripThinkingFromHistory(messages); + + // First assistant message (not latest) — thinking stripped + assert.equal(messages[1].content.length, 1); + assert.equal(messages[1].content[0].type, "text"); +}); + +test("stripThinkingFromHistory strips thinking from all assistant messages", () => { + const messages: any[] = [ + { role: "user", content: "hello" }, + { + role: "assistant", + content: [ + { type: "thinking", thinking: "first thought", signature: "sig1" }, + { type: "text", text: "response 1" }, + ], + }, + { role: "user", content: "follow up" }, + { + role: "assistant", + content: [ + { type: "thinking", thinking: "second thought", signature: "sig2" }, + { type: "text", text: "response 2" }, + ], + }, + { role: "user", content: "another question" }, + ]; + + stripThinkingFromHistory(messages); + + // Both assistant messages — thinking stripped + assert.equal(messages[1].content.length, 1); + assert.equal(messages[1].content[0].type, "text"); + + assert.equal(messages[3].content.length, 1); + assert.equal(messages[3].content[0].type, "text"); +}); + +test("stripThinkingFromHistory removes redacted_thinking too", () => { + const messages: any[] = [ + { role: "user", content: "hello" }, + { + role: "assistant", + content: [ + { type: "redacted_thinking", data: "opaque" }, + { type: "text", text: "response" }, + ], + }, + { role: "user", content: "next" }, + ]; + + stripThinkingFromHistory(messages); + + assert.equal(messages[1].content.length, 1); + assert.equal(messages[1].content[0].type, "text"); +}); + +test("stripThinkingFromHistory strips even single assistant message", () => { + const messages: any[] = [ + { role: "user", content: "hello" }, + { + role: "assistant", + content: [ + { type: "thinking", thinking: "thought", signature: "sig" }, + { type: "text", text: "response" }, + ], + }, + { role: "user", content: "follow up" }, + ]; + + stripThinkingFromHistory(messages); + + // Thinking stripped — all assistant messages are from stored history + assert.equal(messages[1].content.length, 1); + assert.equal(messages[1].content[0].type, "text"); +}); + +test("stripThinkingFromHistory handles no assistant messages", () => { + const messages: any[] = [ + { role: "user", content: "hello" }, + ]; + + // Should not throw + stripThinkingFromHistory(messages); + assert.equal(messages.length, 1); +}); + +test("stripThinkingFromHistory handles string content (no array)", () => { + const messages: any[] = [ + { role: "user", content: "hello" }, + { role: "assistant", content: "just a string" }, + { role: "user", content: "next" }, + ]; + + // Should not throw — string content is skipped + stripThinkingFromHistory(messages); + assert.equal(messages[1].content, "just a string"); +});