fix: strip thinking blocks from history to fix conversation replay error

The Pi SDK's streaming parser drops server_tool_use and web_search_tool_result content blocks. When the conversation is replayed, assistant messages are incomplete, causing the Anthropic API to reject requests with "thinking blocks cannot be modified." Fix: stripThinkingFromHistory() removes thinking/redacted_thinking blocks from all assistant messages before sending, since they're all from stored history. The model generates fresh thinking for each new turn. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 16:19:57 -03:00 · 2026-03-11 16:19:57 -03:00 · 2252a6dfca
commit 2252a6dfca
parent 4ba7930240
2 changed files with 208 additions and 31 deletions
--- a/src/resources/extensions/search-the-web/native-search.ts
+++ b/src/resources/extensions/search-the-web/native-search.ts
@ -8,6 +8,9 @@
 /** Tool names for the Brave-backed custom search tools */
 export const BRAVE_TOOL_NAMES = ["search-the-web", "search_and_read"];

+/** Thinking block types that require signature validation by the API */
+const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]);
+
 /** Minimal interface matching the subset of ExtensionAPI we use */
 export interface NativeSearchPI {
  on(event: string, handler: (...args: any[]) => any): void;
@ -15,6 +18,37 @@ export interface NativeSearchPI {
  setActiveTools(tools: string[]): void;
 }

+/**
+ * Strip thinking/redacted_thinking blocks from assistant messages in the
+ * conversation history.
+ *
+ * Why: The Pi SDK's streaming parser drops `server_tool_use` and
+ * `web_search_tool_result` content blocks (unknown types). When the
+ * conversation is replayed, the assistant messages are incomplete — missing
+ * those blocks. The Anthropic API detects the modification and rejects the
+ * request with "thinking blocks cannot be modified."
+ *
+ * Fix: Remove thinking blocks from all assistant messages in the history.
+ * In Anthropic's Messages API, the messages array always ends with a user
+ * message, so every assistant message is from a previous turn that has been
+ * through a store/replay cycle. The model generates fresh thinking for the
+ * current turn regardless.
+ */
+export function stripThinkingFromHistory(
+  messages: Array<Record<string, unknown>>
+): void {
+  for (const msg of messages) {
+    if (msg.role !== "assistant") continue;
+
+    const content = msg.content;
+    if (!Array.isArray(content)) continue;
+
+    msg.content = content.filter(
+      (block: any) => !THINKING_TYPES.has(block?.type)
+    );
+  }
+}
+
 /**
 * Register model_select, before_provider_request, and session_start hooks
 * for native Anthropic web search injection.
@ -24,8 +58,9 @@ export interface NativeSearchPI {
 export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: () => boolean } {
  let isAnthropicProvider = false;

-  // Track provider changes via model selection
-  pi.on("model_select", async (event: any, _ctx: any) => {
+  // Track provider changes via model selection — also handles diagnostics
+  // since model_select fires AFTER session_start and knows the provider.
+  pi.on("model_select", async (event: any, ctx: any) => {
    const wasAnthropic = isAnthropicProvider;
    isAnthropicProvider = event.model.provider === "anthropic";

@ -43,6 +78,16 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
      const active = pi.getActiveTools();
      pi.setActiveTools([...active, ...BRAVE_TOOL_NAMES]);
    }
+
+    // Show provider-aware diagnostics on first selection or provider change
+    if (isAnthropicProvider && !wasAnthropic) {
+      ctx.ui.notify("Native Anthropic web search active", "info");
+    } else if (!isAnthropicProvider && !hasBrave) {
+      ctx.ui.notify(
+        "Web search: Set BRAVE_API_KEY or use an Anthropic model for built-in search",
+        "warning"
+      );
+    }
  });

  // Inject native web search into Anthropic API requests
@ -57,6 +102,13 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
    // Keep provider tracking in sync
    isAnthropicProvider = true;

+    // Strip thinking blocks from history to avoid signature validation errors
+    // caused by the SDK dropping server_tool_use/web_search_tool_result blocks.
+    const messages = payload.messages as Array<Record<string, unknown>> | undefined;
+    if (Array.isArray(messages)) {
+      stripThinkingFromHistory(messages);
+    }
+
    if (!Array.isArray(payload.tools)) payload.tools = [];

    // Don't double-inject if already present
@ -71,26 +123,17 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
    return payload;
  });

-  // Startup diagnostics
+  // Basic startup diagnostics — provider-specific info comes from model_select
  pi.on("session_start", async (_event: any, ctx: any) => {
    const hasBrave = !!process.env.BRAVE_API_KEY;
    const hasJina = !!process.env.JINA_API_KEY;
    const hasAnswers = !!process.env.BRAVE_ANSWERS_KEY;

    const parts: string[] = ["Web search v4 loaded"];
-
-    if (isAnthropicProvider) parts.push("Native search ✓");
    if (hasBrave) parts.push("Brave ✓");
    if (hasAnswers) parts.push("Answers ✓");
    if (hasJina) parts.push("Jina ✓");

-    if (!isAnthropicProvider && !hasBrave) {
-      ctx.ui.notify(
-        "Web search: Set BRAVE_API_KEY or use an Anthropic model for built-in search",
-        "warning"
-      );
-    }
-
    ctx.ui.notify(parts.join(" · "), "info");
  });

--- a/src/tests/native-search.test.ts
+++ b/src/tests/native-search.test.ts
@ -2,6 +2,7 @@ import test from "node:test";
 import assert from "node:assert/strict";
 import {
  registerNativeSearchHooks,
+  stripThinkingFromHistory,
  BRAVE_TOOL_NAMES,
  type NativeSearchPI,
 } from "../resources/extensions/search-the-web/native-search.ts";
@ -241,29 +242,28 @@ test("model_select re-enables Brave tools when switching away from Anthropic", a
  }
 });

-test("session_start shows 'Native search' when Anthropic provider", async () => {
+test("model_select shows 'Native Anthropic web search active' for Anthropic provider", async () => {
  const pi = createMockPI();
  registerNativeSearchHooks(pi);

-  // Simulate an Anthropic request so isAnthropicProvider becomes true
-  await pi.fire("before_provider_request", {
-    type: "before_provider_request",
-    payload: { model: "claude-sonnet-4-6-20250514", tools: [] },
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "anthropic", name: "claude-sonnet-4-6" },
+    previousModel: undefined,
+    source: "set",
  });

-  await pi.fire("session_start", { type: "session_start" });
-
  const infoNotif = pi.notifications.find(
-    (n) => n.level === "info" && n.message.includes("v4")
+    (n) => n.level === "info" && n.message.includes("Native")
  );
-  assert.ok(infoNotif, "Should have v4 info notification");
+  assert.ok(infoNotif, "Should notify about native search on Anthropic model_select");
  assert.ok(
-    infoNotif!.message.includes("Native search"),
-    `Should include 'Native search' — got: ${infoNotif!.message}`
+    infoNotif!.message.includes("Native Anthropic web search active"),
+    `Should say 'Native Anthropic web search active' — got: ${infoNotif!.message}`
  );
 });

-test("session_start shows warning when no Anthropic and no Brave key", async () => {
+test("model_select shows warning for non-Anthropic without Brave key", async () => {
  const originalKey = process.env.BRAVE_API_KEY;
  delete process.env.BRAVE_API_KEY;

@ -271,11 +271,15 @@ test("session_start shows warning when no Anthropic and no Brave key", async ()
    const pi = createMockPI();
    registerNativeSearchHooks(pi);

-    // Don't fire any model/request events — isAnthropicProvider stays false
-    await pi.fire("session_start", { type: "session_start" });
+    await pi.fire("model_select", {
+      type: "model_select",
+      model: { provider: "openai", name: "gpt-4o" },
+      previousModel: undefined,
+      source: "set",
+    });

    const warning = pi.notifications.find((n) => n.level === "warning");
-    assert.ok(warning, "Should show warning when no Anthropic and no Brave key");
+    assert.ok(warning, "Should show warning for non-Anthropic without Brave key");
    assert.ok(
      warning!.message.includes("Anthropic"),
      `Warning should mention Anthropic — got: ${warning!.message}`
@ -286,7 +290,23 @@ test("session_start shows warning when no Anthropic and no Brave key", async ()
  }
 });

-test("session_start does NOT show warning when Brave key present", async () => {
+test("session_start shows v4 loaded message", async () => {
+  const pi = createMockPI();
+  registerNativeSearchHooks(pi);
+
+  await pi.fire("session_start", { type: "session_start" });
+
+  const infoNotif = pi.notifications.find(
+    (n) => n.level === "info" && n.message.includes("v4")
+  );
+  assert.ok(infoNotif, "Should have v4 info notification");
+  assert.ok(
+    infoNotif!.message.startsWith("Web search v4 loaded"),
+    `Should start with 'Web search v4 loaded' — got: ${infoNotif!.message}`
+  );
+});
+
+test("session_start shows Brave status when key present", async () => {
  const originalKey = process.env.BRAVE_API_KEY;
  process.env.BRAVE_API_KEY = "test-key";

@ -296,11 +316,11 @@ test("session_start does NOT show warning when Brave key present", async () => {

    await pi.fire("session_start", { type: "session_start" });

-    const warning = pi.notifications.find((n) => n.level === "warning");
-    assert.equal(warning, undefined, "Should NOT show warning when Brave key is present");
-
    const info = pi.notifications.find((n) => n.level === "info");
    assert.ok(info!.message.includes("Brave"), "Should mention Brave in status");
+
+    const warning = pi.notifications.find((n) => n.level === "warning");
+    assert.equal(warning, undefined, "Should NOT show warning when Brave key is present");
  } finally {
    if (originalKey) process.env.BRAVE_API_KEY = originalKey;
    else delete process.env.BRAVE_API_KEY;
@ -310,3 +330,117 @@ test("session_start does NOT show warning when Brave key present", async () => {
 test("BRAVE_TOOL_NAMES contains expected tool names", () => {
  assert.deepEqual(BRAVE_TOOL_NAMES, ["search-the-web", "search_and_read"]);
 });
+
+// ─── stripThinkingFromHistory tests ─────────────────────────────────────────
+
+test("stripThinkingFromHistory removes thinking from earlier assistant messages", () => {
+  const messages: any[] = [
+    { role: "user", content: "hello" },
+    {
+      role: "assistant",
+      content: [
+        { type: "thinking", thinking: "hmm", signature: "sig1" },
+        { type: "text", text: "Hi there" },
+      ],
+    },
+    { role: "user", content: "search something" },
+  ];
+
+  stripThinkingFromHistory(messages);
+
+  // First assistant message (not latest) — thinking stripped
+  assert.equal(messages[1].content.length, 1);
+  assert.equal(messages[1].content[0].type, "text");
+});
+
+test("stripThinkingFromHistory strips thinking from all assistant messages", () => {
+  const messages: any[] = [
+    { role: "user", content: "hello" },
+    {
+      role: "assistant",
+      content: [
+        { type: "thinking", thinking: "first thought", signature: "sig1" },
+        { type: "text", text: "response 1" },
+      ],
+    },
+    { role: "user", content: "follow up" },
+    {
+      role: "assistant",
+      content: [
+        { type: "thinking", thinking: "second thought", signature: "sig2" },
+        { type: "text", text: "response 2" },
+      ],
+    },
+    { role: "user", content: "another question" },
+  ];
+
+  stripThinkingFromHistory(messages);
+
+  // Both assistant messages — thinking stripped
+  assert.equal(messages[1].content.length, 1);
+  assert.equal(messages[1].content[0].type, "text");
+
+  assert.equal(messages[3].content.length, 1);
+  assert.equal(messages[3].content[0].type, "text");
+});
+
+test("stripThinkingFromHistory removes redacted_thinking too", () => {
+  const messages: any[] = [
+    { role: "user", content: "hello" },
+    {
+      role: "assistant",
+      content: [
+        { type: "redacted_thinking", data: "opaque" },
+        { type: "text", text: "response" },
+      ],
+    },
+    { role: "user", content: "next" },
+  ];
+
+  stripThinkingFromHistory(messages);
+
+  assert.equal(messages[1].content.length, 1);
+  assert.equal(messages[1].content[0].type, "text");
+});
+
+test("stripThinkingFromHistory strips even single assistant message", () => {
+  const messages: any[] = [
+    { role: "user", content: "hello" },
+    {
+      role: "assistant",
+      content: [
+        { type: "thinking", thinking: "thought", signature: "sig" },
+        { type: "text", text: "response" },
+      ],
+    },
+    { role: "user", content: "follow up" },
+  ];
+
+  stripThinkingFromHistory(messages);
+
+  // Thinking stripped — all assistant messages are from stored history
+  assert.equal(messages[1].content.length, 1);
+  assert.equal(messages[1].content[0].type, "text");
+});
+
+test("stripThinkingFromHistory handles no assistant messages", () => {
+  const messages: any[] = [
+    { role: "user", content: "hello" },
+  ];
+
+  // Should not throw
+  stripThinkingFromHistory(messages);
+  assert.equal(messages.length, 1);
+});
+
+test("stripThinkingFromHistory handles string content (no array)", () => {
+  const messages: any[] = [
+    { role: "user", content: "hello" },
+    { role: "assistant", content: "just a string" },
+    { role: "user", content: "next" },
+  ];
+
+  // Should not throw — string content is skipped
+  stripThinkingFromHistory(messages);
+  assert.equal(messages[1].content, "just a string");
+});