Merge pull request #3495 from Tibsfox/fix/tool-argument-json-robustness

fix(pi-ai): extend repairToolJson to handle XML tags and truncated numbers
This commit is contained in:
Jeremy McSpadden 2026-04-04 19:02:28 -05:00 committed by GitHub
commit 246e706991
2 changed files with 168 additions and 11 deletions

View file

@ -28,20 +28,86 @@ export function hasYamlBulletLists(json: string): boolean {
}
/**
* Attempt to repair YAML-style bullet lists embedded in a JSON string.
* Detect whether a JSON string contains XML parameter tags
* (i.e. `<parameter name="X">value</parameter>`).
*
* Converts patterns like:
* "keyDecisions": - Used Web Notification API..., "keyFiles": - file1
* Some models mix XML tool-call syntax into JSON string values,
* producing hybrid output that fails JSON.parse.
*
* Into:
* "keyDecisions": ["Used Web Notification API..."], "keyFiles": ["file1"]
* @see https://github.com/gsd-build/gsd-2/issues/3403
*/
export function hasXmlParameterTags(json: string): boolean {
return /<\/?parameter[\s>]/.test(json);
}
/**
* Detect whether a JSON string contains truncated numeric values
* (e.g. `"exitCode": -,` or `"durationMs": ,`).
*
* Returns the original string unchanged if no YAML patterns are detected
* Smaller models sometimes emit incomplete numbers when the value
* is cut off mid-generation.
*
* @see https://github.com/gsd-build/gsd-2/issues/3464
*/
export function hasTruncatedNumbers(json: string): boolean {
// Match: colon, optional whitespace, then a comma or } without a value
// Or: colon, optional whitespace, bare minus sign followed by comma/}
return /:\s*,/.test(json) || /:\s*-\s*[,}]/.test(json);
}
/**
* Strip XML `<parameter>` tags from a JSON string, leaving only the
* text content. This handles the case where the LLM mixes XML
* tool-call format into JSON string values.
*/
function stripXmlParameterTags(json: string): string {
// Remove opening tags: <parameter name="X">
let cleaned = json.replace(/<parameter\s+name="[^"]*"\s*>/g, "");
// Remove closing tags: </parameter>
cleaned = cleaned.replace(/<\/parameter>/g, "");
return cleaned;
}
/**
* Replace truncated numeric values with 0.
* Handles: `"key": ,` `"key": 0,` and `"key": -,` `"key": 0,`
*/
function repairTruncatedNumbers(json: string): string {
// Bare comma after colon (missing value entirely)
let repaired = json.replace(/:\s*,/g, ": 0,");
// Bare minus sign followed by comma or closing brace
repaired = repaired.replace(/:\s*-\s*([,}])/g, ": 0$1");
return repaired;
}
/**
* Attempt to repair malformed JSON in LLM tool-call arguments.
*
* Handles three categories of malformation:
*
* 1. **YAML bullet lists** (#2660): `"key": - item1\n - item2` `"key": ["item1", "item2"]`
* 2. **XML parameter tags** (#3403): `<parameter name="X">value</parameter>` stripped to content
* 3. **Truncated numbers** (#3464): `"exitCode": -,` `"exitCode": 0,`
*
* Returns the original string unchanged if no patterns are detected
* or if the repair itself would produce invalid JSON.
*/
export function repairToolJson(json: string): string {
if (!hasYamlBulletLists(json)) {
return json;
let repaired = json;
// Phase 1: Strip XML parameter tags
if (hasXmlParameterTags(repaired)) {
repaired = stripXmlParameterTags(repaired);
}
// Phase 2: Repair truncated numbers
if (hasTruncatedNumbers(repaired)) {
repaired = repairTruncatedNumbers(repaired);
}
// Phase 3: Repair YAML bullet lists
if (!hasYamlBulletLists(repaired)) {
return repaired;
}
// Strategy: find each `"key": - item1\n - item2\n - item3` region and
@ -53,8 +119,6 @@ export function repairToolJson(json: string): string {
// optionally followed by more `- item` lines
// terminated by the next `"key":` or `}` or end of string.
let repaired = json;
// Match a key followed by YAML-style bullet list.
// Capture: (1) the key portion including colon, (2) the bullet-list body,
// (3) the separator (comma or empty) before the next key/bracket.

View file

@ -1,6 +1,6 @@
import { describe, test } from "node:test";
import assert from "node:assert/strict";
import { repairToolJson, hasYamlBulletLists } from "../repair-tool-json.js";
import { repairToolJson, hasYamlBulletLists, hasXmlParameterTags, hasTruncatedNumbers } from "../repair-tool-json.js";
describe("repairToolJson — YAML bullet list repair (#2660)", () => {
// ── Detection ──────────────────────────────────────────────────────────
@ -100,3 +100,96 @@ describe("repairToolJson — YAML bullet list repair (#2660)", () => {
assert.equal(result, valid);
});
});
// ═══════════════════════════════════════════════════════════════════════════
// XML parameter tag repair (#3403)
// ═══════════════════════════════════════════════════════════════════════════
describe("repairToolJson — XML parameter tag stripping (#3403)", () => {
test("hasXmlParameterTags detects opening tags", () => {
assert.equal(
hasXmlParameterTags('<parameter name="narrative">some text</parameter>'),
true,
);
});
test("hasXmlParameterTags returns false for clean JSON", () => {
assert.equal(
hasXmlParameterTags('{"narrative": "some text"}'),
false,
);
});
test("strips XML parameter tags from JSON values", () => {
const malformed = '{"sliceId": "S03", "narrative": <parameter name="narrative">The slice work</parameter>}';
const repaired = repairToolJson(malformed);
// After stripping tags, the content should be parseable or at least tag-free
assert.ok(!repaired.includes("<parameter"), "should not contain <parameter tags");
assert.ok(!repaired.includes("</parameter>"), "should not contain </parameter> tags");
});
test("handles mixed XML and JSON content", () => {
const malformed = '{"oneLiner": "done", "verification": <parameter name="verification">all tests pass</parameter>}';
const repaired = repairToolJson(malformed);
assert.ok(!repaired.includes("<parameter"), "XML tags should be stripped");
assert.ok(repaired.includes("all tests pass"), "content should be preserved");
});
});
// ═══════════════════════════════════════════════════════════════════════════
// Truncated number repair (#3464)
// ═══════════════════════════════════════════════════════════════════════════
describe("repairToolJson — truncated number repair (#3464)", () => {
test("hasTruncatedNumbers detects bare comma after colon", () => {
assert.equal(hasTruncatedNumbers('"exitCode": ,'), true);
});
test("hasTruncatedNumbers detects bare minus before comma", () => {
assert.equal(hasTruncatedNumbers('"exitCode": -,'), true);
});
test("hasTruncatedNumbers detects bare minus before closing brace", () => {
assert.equal(hasTruncatedNumbers('"durationMs": -}'), true);
});
test("hasTruncatedNumbers returns false for valid numbers", () => {
assert.equal(hasTruncatedNumbers('"exitCode": 0, "durationMs": 1234'), false);
});
test("hasTruncatedNumbers returns false for negative numbers", () => {
assert.equal(hasTruncatedNumbers('"exitCode": -1, "offset": -100'), false);
});
test("repairs truncated exitCode with bare comma", () => {
const malformed = '{"command": "npm test", "exitCode": , "verdict": "pass", "durationMs": 500}';
const repaired = repairToolJson(malformed);
const parsed = JSON.parse(repaired);
assert.equal(parsed.exitCode, 0);
assert.equal(parsed.durationMs, 500);
});
test("repairs truncated exitCode with bare minus", () => {
const malformed = '{"command": "npm test", "exitCode": -, "verdict": "pass", "durationMs": 1234}';
const repaired = repairToolJson(malformed);
const parsed = JSON.parse(repaired);
assert.equal(parsed.exitCode, 0);
assert.equal(parsed.verdict, "pass");
});
test("repairs truncated durationMs at end of object", () => {
const malformed = '{"command": "npm test", "exitCode": 0, "verdict": "pass", "durationMs": -}';
const repaired = repairToolJson(malformed);
const parsed = JSON.parse(repaired);
assert.equal(parsed.durationMs, 0);
assert.equal(parsed.exitCode, 0);
});
test("does not mangle valid negative numbers", () => {
const valid = '{"exitCode": -1, "offset": -100}';
const repaired = repairToolJson(valid);
const parsed = JSON.parse(repaired);
assert.equal(parsed.exitCode, -1);
assert.equal(parsed.offset, -100);
});
});