fix(pi-ai): recover XML parameters trapped in JSON strings
This commit is contained in:
parent
e3d69ed01a
commit
304b1bf329
7 changed files with 135 additions and 8 deletions
|
|
@ -31,7 +31,7 @@ import type {
|
|||
export type AnthropicApi = "anthropic-messages" | "anthropic-vertex";
|
||||
import type { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { repairToolJson } from "../utils/repair-tool-json.js";
|
||||
import { hasXmlParameterTags, repairToolJson } from "../utils/repair-tool-json.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
|
||||
|
|
@ -701,12 +701,13 @@ export function processAnthropicStream(
|
|||
// repair (#2660) before falling back to the lenient streaming
|
||||
// parser which silently swallows errors.
|
||||
const raw = block.partialJson ?? "";
|
||||
const rawForParse = hasXmlParameterTags(raw) ? repairToolJson(raw) : raw;
|
||||
let parsed: Record<string, any> | undefined;
|
||||
try {
|
||||
parsed = JSON.parse(raw);
|
||||
parsed = JSON.parse(rawForParse);
|
||||
} catch {
|
||||
try {
|
||||
parsed = JSON.parse(repairToolJson(raw));
|
||||
parsed = JSON.parse(repairToolJson(rawForParse));
|
||||
} catch {
|
||||
// Fall through to streaming parser
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { parseStreamingJson as nativeParseStreamingJson } from "@gsd/native";
|
||||
import { hasYamlBulletLists, repairToolJson } from "./repair-tool-json.js";
|
||||
import { hasXmlParameterTags, hasYamlBulletLists, repairToolJson } from "./repair-tool-json.js";
|
||||
|
||||
/**
|
||||
* Attempts to parse potentially incomplete JSON during streaming.
|
||||
|
|
@ -21,6 +21,16 @@ export function parseStreamingJson<T = any>(partialJson: string | undefined): T
|
|||
// Fast path: try native streaming parser first
|
||||
const result = nativeParseStreamingJson<T>(partialJson);
|
||||
|
||||
// XML parameter tags can be trapped inside otherwise valid JSON strings,
|
||||
// so run repair before trusting the native parse result.
|
||||
if (hasXmlParameterTags(partialJson)) {
|
||||
try {
|
||||
return JSON.parse(repairToolJson(partialJson)) as T;
|
||||
} catch {
|
||||
// Fall through to the native parser result on incomplete partials
|
||||
}
|
||||
}
|
||||
|
||||
// If the native parser returned a non-empty result, use it.
|
||||
// Only attempt repair when the result is empty AND the input
|
||||
// contains YAML bullet patterns (avoids unnecessary work).
|
||||
|
|
|
|||
|
|
@ -55,6 +55,45 @@ export function hasTruncatedNumbers(json: string): boolean {
|
|||
return /:\s*,/.test(json) || /:\s*-\s*[,}]/.test(json);
|
||||
}
|
||||
|
||||
type XmlParameterBlock = {
|
||||
name: string;
|
||||
value: unknown;
|
||||
};
|
||||
|
||||
const xmlParameterBlockPattern = /<parameter\s+name="([^"]+)"\s*>([\s\S]*?)<\/parameter>/g;
|
||||
|
||||
function parseXmlParameterValue(raw: string): unknown {
|
||||
const trimmed = raw.trim();
|
||||
if (trimmed === "") return "";
|
||||
try {
|
||||
return JSON.parse(trimmed);
|
||||
} catch {
|
||||
return trimmed;
|
||||
}
|
||||
}
|
||||
|
||||
function extractXmlParameterBlocks(text: string): XmlParameterBlock[] {
|
||||
const blocks: XmlParameterBlock[] = [];
|
||||
for (const match of text.matchAll(xmlParameterBlockPattern)) {
|
||||
blocks.push({
|
||||
name: match[1],
|
||||
value: parseXmlParameterValue(match[2] ?? ""),
|
||||
});
|
||||
}
|
||||
return blocks;
|
||||
}
|
||||
|
||||
function trimLeakedXmlTail(fieldName: string, value: string): string {
|
||||
let cut = value.length;
|
||||
const parameterIndex = value.indexOf("<parameter");
|
||||
if (parameterIndex >= 0) cut = Math.min(cut, parameterIndex);
|
||||
|
||||
const closingTagIndex = value.indexOf(`</${fieldName}>`);
|
||||
if (closingTagIndex >= 0) cut = Math.min(cut, closingTagIndex);
|
||||
|
||||
return value.slice(0, cut).trimEnd();
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip XML `<parameter>` tags from a JSON string, leaving only the
|
||||
* text content. This handles the case where the LLM mixes XML
|
||||
|
|
@ -68,6 +107,35 @@ function stripXmlParameterTags(json: string): string {
|
|||
return cleaned;
|
||||
}
|
||||
|
||||
function promoteXmlParametersToTopLevel(json: string): string {
|
||||
try {
|
||||
const parsed = JSON.parse(json) as Record<string, unknown>;
|
||||
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
||||
return stripXmlParameterTags(json);
|
||||
}
|
||||
|
||||
let changed = false;
|
||||
for (const [fieldName, value] of Object.entries(parsed)) {
|
||||
if (typeof value !== "string" || !hasXmlParameterTags(value)) continue;
|
||||
|
||||
const blocks = extractXmlParameterBlocks(value);
|
||||
if (blocks.length === 0) continue;
|
||||
|
||||
parsed[fieldName] = trimLeakedXmlTail(fieldName, value);
|
||||
for (const block of blocks) {
|
||||
if (!(block.name in parsed)) {
|
||||
parsed[block.name] = block.value;
|
||||
}
|
||||
}
|
||||
changed = true;
|
||||
}
|
||||
|
||||
return changed ? JSON.stringify(parsed) : stripXmlParameterTags(json);
|
||||
} catch {
|
||||
return stripXmlParameterTags(json);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace truncated numeric values with 0.
|
||||
* Handles: `"key": ,` → `"key": 0,` and `"key": -,` → `"key": 0,`
|
||||
|
|
@ -97,7 +165,7 @@ export function repairToolJson(json: string): string {
|
|||
|
||||
// Phase 1: Strip XML parameter tags
|
||||
if (hasXmlParameterTags(repaired)) {
|
||||
repaired = stripXmlParameterTags(repaired);
|
||||
repaired = promoteXmlParametersToTopLevel(repaired);
|
||||
}
|
||||
|
||||
// Phase 2: Repair truncated numbers
|
||||
|
|
|
|||
17
packages/pi-ai/src/utils/tests/json-parse.test.ts
Normal file
17
packages/pi-ai/src/utils/tests/json-parse.test.ts
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
import { describe, test } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { parseStreamingJson } from "../json-parse.js";
|
||||
|
||||
describe("parseStreamingJson — XML parameter recovery (#3751)", () => {
|
||||
test("promotes XML parameters trapped inside valid JSON string values", () => {
|
||||
const malformed =
|
||||
'{"narrative":"text.</narrative>\\n<parameter name=\\"verification\\">all tests pass</parameter>\\n<parameter name=\\"verificationEvidence\\">[\\"npm test\\"]</parameter>","oneLiner":"done"}';
|
||||
|
||||
const parsed = parseStreamingJson<Record<string, unknown>>(malformed);
|
||||
|
||||
assert.equal(parsed.narrative, "text.");
|
||||
assert.equal(parsed.verification, "all tests pass");
|
||||
assert.deepEqual(parsed.verificationEvidence, ["npm test"]);
|
||||
assert.equal(parsed.oneLiner, "done");
|
||||
});
|
||||
});
|
||||
|
|
@ -134,6 +134,19 @@ describe("repairToolJson — XML parameter tag stripping (#3403)", () => {
|
|||
assert.ok(!repaired.includes("<parameter"), "XML tags should be stripped");
|
||||
assert.ok(repaired.includes("all tests pass"), "content should be preserved");
|
||||
});
|
||||
|
||||
test("promotes XML parameters trapped inside valid JSON string values", () => {
|
||||
const malformed =
|
||||
'{"narrative":"text.</narrative>\\n<parameter name=\\"verification\\">all tests pass</parameter>\\n<parameter name=\\"verificationEvidence\\">[\\"npm test\\"]</parameter>","oneLiner":"done"}';
|
||||
const repaired = repairToolJson(malformed);
|
||||
const parsed = JSON.parse(repaired);
|
||||
|
||||
assert.equal(parsed.narrative, "text.");
|
||||
assert.equal(parsed.verification, "all tests pass");
|
||||
assert.deepEqual(parsed.verificationEvidence, ["npm test"]);
|
||||
assert.equal(parsed.oneLiner, "done");
|
||||
assert.ok(!parsed.narrative.includes("<parameter"), "narrative should not retain leaked XML");
|
||||
});
|
||||
});
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ import type {
|
|||
Usage,
|
||||
WebSearchResultContent,
|
||||
} from "@gsd/pi-ai";
|
||||
import { repairToolJson } from "@gsd/pi-ai";
|
||||
import { hasXmlParameterTags, repairToolJson } from "@gsd/pi-ai";
|
||||
import type { BetaContentBlock, BetaRawMessageStreamEvent, NonNullableUsage } from "./sdk-types.js";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -242,13 +242,14 @@ export class PartialMessageBuilder {
|
|||
}
|
||||
if (block.type === "toolCall") {
|
||||
const jsonStr = this.toolJsonAccum.get(streamIndex) ?? "{}";
|
||||
const jsonForParse = hasXmlParameterTags(jsonStr) ? repairToolJson(jsonStr) : jsonStr;
|
||||
try {
|
||||
block.arguments = JSON.parse(jsonStr);
|
||||
block.arguments = JSON.parse(jsonForParse);
|
||||
} catch {
|
||||
// JSON.parse failed — attempt repair for YAML-style bullet
|
||||
// lists that LLMs copy from template formatting (#2660).
|
||||
try {
|
||||
block.arguments = JSON.parse(repairToolJson(jsonStr));
|
||||
block.arguments = JSON.parse(repairToolJson(jsonForParse));
|
||||
} catch {
|
||||
// Repair also failed — stream was truncated or garbage.
|
||||
// Preserve the raw string for diagnostics but signal the
|
||||
|
|
|
|||
|
|
@ -130,4 +130,21 @@ describe("PartialMessageBuilder — malformed tool arguments (#2574)", () => {
|
|||
assert.equal(event!.toolCall.arguments.title, "done");
|
||||
}
|
||||
});
|
||||
|
||||
test("XML parameter tags trapped inside valid JSON strings are promoted (#3751)", () => {
|
||||
const builder = new PartialMessageBuilder("claude-sonnet-4-20250514");
|
||||
const malformedJson =
|
||||
'{"narrative":"text.</narrative>\\n<parameter name=\\"verification\\">all tests pass</parameter>\\n<parameter name=\\"verificationEvidence\\">[\\"npm test\\"]</parameter>","oneLiner":"done"}';
|
||||
const event = feedToolCall(builder, [malformedJson]);
|
||||
|
||||
assert.ok(event, "event should not be null");
|
||||
assert.equal(event!.type, "toolcall_end");
|
||||
assert.equal((event as any).malformedArguments, undefined);
|
||||
if (event!.type === "toolcall_end") {
|
||||
assert.equal(event.toolCall.arguments.narrative, "text.");
|
||||
assert.equal(event.toolCall.arguments.verification, "all tests pass");
|
||||
assert.deepEqual(event.toolCall.arguments.verificationEvidence, ["npm test"]);
|
||||
assert.equal(event.toolCall.arguments.oneLiner, "done");
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue