fix(pi-ai): recover XML parameters trapped in JSON strings

This commit is contained in:
mastertyko 2026-04-08 17:43:28 +02:00
parent e3d69ed01a
commit 304b1bf329
7 changed files with 135 additions and 8 deletions

View file

@ -31,7 +31,7 @@ import type {
export type AnthropicApi = "anthropic-messages" | "anthropic-vertex";
import type { AssistantMessageEventStream } from "../utils/event-stream.js";
import { parseStreamingJson } from "../utils/json-parse.js";
import { repairToolJson } from "../utils/repair-tool-json.js";
import { hasXmlParameterTags, repairToolJson } from "../utils/repair-tool-json.js";
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
import { transformMessages } from "./transform-messages.js";
@ -701,12 +701,13 @@ export function processAnthropicStream(
// repair (#2660) before falling back to the lenient streaming
// parser which silently swallows errors.
const raw = block.partialJson ?? "";
const rawForParse = hasXmlParameterTags(raw) ? repairToolJson(raw) : raw;
let parsed: Record<string, any> | undefined;
try {
parsed = JSON.parse(raw);
parsed = JSON.parse(rawForParse);
} catch {
try {
parsed = JSON.parse(repairToolJson(raw));
parsed = JSON.parse(repairToolJson(rawForParse));
} catch {
// Fall through to streaming parser
}

View file

@ -1,5 +1,5 @@
import { parseStreamingJson as nativeParseStreamingJson } from "@gsd/native";
import { hasYamlBulletLists, repairToolJson } from "./repair-tool-json.js";
import { hasXmlParameterTags, hasYamlBulletLists, repairToolJson } from "./repair-tool-json.js";
/**
* Attempts to parse potentially incomplete JSON during streaming.
@ -21,6 +21,16 @@ export function parseStreamingJson<T = any>(partialJson: string | undefined): T
// Fast path: try native streaming parser first
const result = nativeParseStreamingJson<T>(partialJson);
// XML parameter tags can be trapped inside otherwise valid JSON strings,
// so run repair before trusting the native parse result.
if (hasXmlParameterTags(partialJson)) {
try {
return JSON.parse(repairToolJson(partialJson)) as T;
} catch {
// Fall through to the native parser result on incomplete partials
}
}
// If the native parser returned a non-empty result, use it.
// Only attempt repair when the result is empty AND the input
// contains YAML bullet patterns (avoids unnecessary work).

View file

@ -55,6 +55,45 @@ export function hasTruncatedNumbers(json: string): boolean {
return /:\s*,/.test(json) || /:\s*-\s*[,}]/.test(json);
}
type XmlParameterBlock = {
name: string;
value: unknown;
};
const xmlParameterBlockPattern = /<parameter\s+name="([^"]+)"\s*>([\s\S]*?)<\/parameter>/g;
function parseXmlParameterValue(raw: string): unknown {
const trimmed = raw.trim();
if (trimmed === "") return "";
try {
return JSON.parse(trimmed);
} catch {
return trimmed;
}
}
function extractXmlParameterBlocks(text: string): XmlParameterBlock[] {
const blocks: XmlParameterBlock[] = [];
for (const match of text.matchAll(xmlParameterBlockPattern)) {
blocks.push({
name: match[1],
value: parseXmlParameterValue(match[2] ?? ""),
});
}
return blocks;
}
function trimLeakedXmlTail(fieldName: string, value: string): string {
let cut = value.length;
const parameterIndex = value.indexOf("<parameter");
if (parameterIndex >= 0) cut = Math.min(cut, parameterIndex);
const closingTagIndex = value.indexOf(`</${fieldName}>`);
if (closingTagIndex >= 0) cut = Math.min(cut, closingTagIndex);
return value.slice(0, cut).trimEnd();
}
/**
* Strip XML `<parameter>` tags from a JSON string, leaving only the
* text content. This handles the case where the LLM mixes XML
@ -68,6 +107,35 @@ function stripXmlParameterTags(json: string): string {
return cleaned;
}
function promoteXmlParametersToTopLevel(json: string): string {
try {
const parsed = JSON.parse(json) as Record<string, unknown>;
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
return stripXmlParameterTags(json);
}
let changed = false;
for (const [fieldName, value] of Object.entries(parsed)) {
if (typeof value !== "string" || !hasXmlParameterTags(value)) continue;
const blocks = extractXmlParameterBlocks(value);
if (blocks.length === 0) continue;
parsed[fieldName] = trimLeakedXmlTail(fieldName, value);
for (const block of blocks) {
if (!(block.name in parsed)) {
parsed[block.name] = block.value;
}
}
changed = true;
}
return changed ? JSON.stringify(parsed) : stripXmlParameterTags(json);
} catch {
return stripXmlParameterTags(json);
}
}
/**
* Replace truncated numeric values with 0.
* Handles: `"key": ,` `"key": 0,` and `"key": -,` `"key": 0,`
@ -97,7 +165,7 @@ export function repairToolJson(json: string): string {
// Phase 1: Strip XML parameter tags
if (hasXmlParameterTags(repaired)) {
repaired = stripXmlParameterTags(repaired);
repaired = promoteXmlParametersToTopLevel(repaired);
}
// Phase 2: Repair truncated numbers

View file

@ -0,0 +1,17 @@
import { describe, test } from "node:test";
import assert from "node:assert/strict";
import { parseStreamingJson } from "../json-parse.js";
describe("parseStreamingJson — XML parameter recovery (#3751)", () => {
test("promotes XML parameters trapped inside valid JSON string values", () => {
const malformed =
'{"narrative":"text.</narrative>\\n<parameter name=\\"verification\\">all tests pass</parameter>\\n<parameter name=\\"verificationEvidence\\">[\\"npm test\\"]</parameter>","oneLiner":"done"}';
const parsed = parseStreamingJson<Record<string, unknown>>(malformed);
assert.equal(parsed.narrative, "text.");
assert.equal(parsed.verification, "all tests pass");
assert.deepEqual(parsed.verificationEvidence, ["npm test"]);
assert.equal(parsed.oneLiner, "done");
});
});

View file

@ -134,6 +134,19 @@ describe("repairToolJson — XML parameter tag stripping (#3403)", () => {
assert.ok(!repaired.includes("<parameter"), "XML tags should be stripped");
assert.ok(repaired.includes("all tests pass"), "content should be preserved");
});
test("promotes XML parameters trapped inside valid JSON string values", () => {
const malformed =
'{"narrative":"text.</narrative>\\n<parameter name=\\"verification\\">all tests pass</parameter>\\n<parameter name=\\"verificationEvidence\\">[\\"npm test\\"]</parameter>","oneLiner":"done"}';
const repaired = repairToolJson(malformed);
const parsed = JSON.parse(repaired);
assert.equal(parsed.narrative, "text.");
assert.equal(parsed.verification, "all tests pass");
assert.deepEqual(parsed.verificationEvidence, ["npm test"]);
assert.equal(parsed.oneLiner, "done");
assert.ok(!parsed.narrative.includes("<parameter"), "narrative should not retain leaked XML");
});
});
// ═══════════════════════════════════════════════════════════════════════════

View file

@ -16,7 +16,7 @@ import type {
Usage,
WebSearchResultContent,
} from "@gsd/pi-ai";
import { repairToolJson } from "@gsd/pi-ai";
import { hasXmlParameterTags, repairToolJson } from "@gsd/pi-ai";
import type { BetaContentBlock, BetaRawMessageStreamEvent, NonNullableUsage } from "./sdk-types.js";
// ---------------------------------------------------------------------------
@ -242,13 +242,14 @@ export class PartialMessageBuilder {
}
if (block.type === "toolCall") {
const jsonStr = this.toolJsonAccum.get(streamIndex) ?? "{}";
const jsonForParse = hasXmlParameterTags(jsonStr) ? repairToolJson(jsonStr) : jsonStr;
try {
block.arguments = JSON.parse(jsonStr);
block.arguments = JSON.parse(jsonForParse);
} catch {
// JSON.parse failed — attempt repair for YAML-style bullet
// lists that LLMs copy from template formatting (#2660).
try {
block.arguments = JSON.parse(repairToolJson(jsonStr));
block.arguments = JSON.parse(repairToolJson(jsonForParse));
} catch {
// Repair also failed — stream was truncated or garbage.
// Preserve the raw string for diagnostics but signal the

View file

@ -130,4 +130,21 @@ describe("PartialMessageBuilder — malformed tool arguments (#2574)", () => {
assert.equal(event!.toolCall.arguments.title, "done");
}
});
test("XML parameter tags trapped inside valid JSON strings are promoted (#3751)", () => {
const builder = new PartialMessageBuilder("claude-sonnet-4-20250514");
const malformedJson =
'{"narrative":"text.</narrative>\\n<parameter name=\\"verification\\">all tests pass</parameter>\\n<parameter name=\\"verificationEvidence\\">[\\"npm test\\"]</parameter>","oneLiner":"done"}';
const event = feedToolCall(builder, [malformedJson]);
assert.ok(event, "event should not be null");
assert.equal(event!.type, "toolcall_end");
assert.equal((event as any).malformedArguments, undefined);
if (event!.type === "toolcall_end") {
assert.equal(event.toolCall.arguments.narrative, "text.");
assert.equal(event.toolCall.arguments.verification, "all tests pass");
assert.deepEqual(event.toolCall.arguments.verificationEvidence, ["npm test"]);
assert.equal(event.toolCall.arguments.oneLiner, "done");
}
});
});