Merge pull request #3495 from Tibsfox/fix/tool-argument-json-robustness
fix(pi-ai): extend repairToolJson to handle XML tags and truncated numbers
This commit is contained in:
commit
246e706991
2 changed files with 168 additions and 11 deletions
|
|
@ -28,20 +28,86 @@ export function hasYamlBulletLists(json: string): boolean {
|
|||
}
|
||||
|
||||
/**
|
||||
* Attempt to repair YAML-style bullet lists embedded in a JSON string.
|
||||
* Detect whether a JSON string contains XML parameter tags
|
||||
* (i.e. `<parameter name="X">value</parameter>`).
|
||||
*
|
||||
* Converts patterns like:
|
||||
* "keyDecisions": - Used Web Notification API..., "keyFiles": - file1
|
||||
* Some models mix XML tool-call syntax into JSON string values,
|
||||
* producing hybrid output that fails JSON.parse.
|
||||
*
|
||||
* Into:
|
||||
* "keyDecisions": ["Used Web Notification API..."], "keyFiles": ["file1"]
|
||||
* @see https://github.com/gsd-build/gsd-2/issues/3403
|
||||
*/
|
||||
export function hasXmlParameterTags(json: string): boolean {
|
||||
return /<\/?parameter[\s>]/.test(json);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect whether a JSON string contains truncated numeric values
|
||||
* (e.g. `"exitCode": -,` or `"durationMs": ,`).
|
||||
*
|
||||
* Returns the original string unchanged if no YAML patterns are detected
|
||||
* Smaller models sometimes emit incomplete numbers when the value
|
||||
* is cut off mid-generation.
|
||||
*
|
||||
* @see https://github.com/gsd-build/gsd-2/issues/3464
|
||||
*/
|
||||
export function hasTruncatedNumbers(json: string): boolean {
|
||||
// Match: colon, optional whitespace, then a comma or } without a value
|
||||
// Or: colon, optional whitespace, bare minus sign followed by comma/}
|
||||
return /:\s*,/.test(json) || /:\s*-\s*[,}]/.test(json);
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip XML `<parameter>` tags from a JSON string, leaving only the
|
||||
* text content. This handles the case where the LLM mixes XML
|
||||
* tool-call format into JSON string values.
|
||||
*/
|
||||
function stripXmlParameterTags(json: string): string {
|
||||
// Remove opening tags: <parameter name="X">
|
||||
let cleaned = json.replace(/<parameter\s+name="[^"]*"\s*>/g, "");
|
||||
// Remove closing tags: </parameter>
|
||||
cleaned = cleaned.replace(/<\/parameter>/g, "");
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace truncated numeric values with 0.
|
||||
* Handles: `"key": ,` → `"key": 0,` and `"key": -,` → `"key": 0,`
|
||||
*/
|
||||
function repairTruncatedNumbers(json: string): string {
|
||||
// Bare comma after colon (missing value entirely)
|
||||
let repaired = json.replace(/:\s*,/g, ": 0,");
|
||||
// Bare minus sign followed by comma or closing brace
|
||||
repaired = repaired.replace(/:\s*-\s*([,}])/g, ": 0$1");
|
||||
return repaired;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to repair malformed JSON in LLM tool-call arguments.
|
||||
*
|
||||
* Handles three categories of malformation:
|
||||
*
|
||||
* 1. **YAML bullet lists** (#2660): `"key": - item1\n - item2` → `"key": ["item1", "item2"]`
|
||||
* 2. **XML parameter tags** (#3403): `<parameter name="X">value</parameter>` → stripped to content
|
||||
* 3. **Truncated numbers** (#3464): `"exitCode": -,` → `"exitCode": 0,`
|
||||
*
|
||||
* Returns the original string unchanged if no patterns are detected
|
||||
* or if the repair itself would produce invalid JSON.
|
||||
*/
|
||||
export function repairToolJson(json: string): string {
|
||||
if (!hasYamlBulletLists(json)) {
|
||||
return json;
|
||||
let repaired = json;
|
||||
|
||||
// Phase 1: Strip XML parameter tags
|
||||
if (hasXmlParameterTags(repaired)) {
|
||||
repaired = stripXmlParameterTags(repaired);
|
||||
}
|
||||
|
||||
// Phase 2: Repair truncated numbers
|
||||
if (hasTruncatedNumbers(repaired)) {
|
||||
repaired = repairTruncatedNumbers(repaired);
|
||||
}
|
||||
|
||||
// Phase 3: Repair YAML bullet lists
|
||||
if (!hasYamlBulletLists(repaired)) {
|
||||
return repaired;
|
||||
}
|
||||
|
||||
// Strategy: find each `"key": - item1\n - item2\n - item3` region and
|
||||
|
|
@ -53,8 +119,6 @@ export function repairToolJson(json: string): string {
|
|||
// optionally followed by more `- item` lines
|
||||
// terminated by the next `"key":` or `}` or end of string.
|
||||
|
||||
let repaired = json;
|
||||
|
||||
// Match a key followed by YAML-style bullet list.
|
||||
// Capture: (1) the key portion including colon, (2) the bullet-list body,
|
||||
// (3) the separator (comma or empty) before the next key/bracket.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { describe, test } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { repairToolJson, hasYamlBulletLists } from "../repair-tool-json.js";
|
||||
import { repairToolJson, hasYamlBulletLists, hasXmlParameterTags, hasTruncatedNumbers } from "../repair-tool-json.js";
|
||||
|
||||
describe("repairToolJson — YAML bullet list repair (#2660)", () => {
|
||||
// ── Detection ──────────────────────────────────────────────────────────
|
||||
|
|
@ -100,3 +100,96 @@ describe("repairToolJson — YAML bullet list repair (#2660)", () => {
|
|||
assert.equal(result, valid);
|
||||
});
|
||||
});
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// XML parameter tag repair (#3403)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
describe("repairToolJson — XML parameter tag stripping (#3403)", () => {
|
||||
test("hasXmlParameterTags detects opening tags", () => {
|
||||
assert.equal(
|
||||
hasXmlParameterTags('<parameter name="narrative">some text</parameter>'),
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test("hasXmlParameterTags returns false for clean JSON", () => {
|
||||
assert.equal(
|
||||
hasXmlParameterTags('{"narrative": "some text"}'),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
test("strips XML parameter tags from JSON values", () => {
|
||||
const malformed = '{"sliceId": "S03", "narrative": <parameter name="narrative">The slice work</parameter>}';
|
||||
const repaired = repairToolJson(malformed);
|
||||
// After stripping tags, the content should be parseable or at least tag-free
|
||||
assert.ok(!repaired.includes("<parameter"), "should not contain <parameter tags");
|
||||
assert.ok(!repaired.includes("</parameter>"), "should not contain </parameter> tags");
|
||||
});
|
||||
|
||||
test("handles mixed XML and JSON content", () => {
|
||||
const malformed = '{"oneLiner": "done", "verification": <parameter name="verification">all tests pass</parameter>}';
|
||||
const repaired = repairToolJson(malformed);
|
||||
assert.ok(!repaired.includes("<parameter"), "XML tags should be stripped");
|
||||
assert.ok(repaired.includes("all tests pass"), "content should be preserved");
|
||||
});
|
||||
});
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Truncated number repair (#3464)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
describe("repairToolJson — truncated number repair (#3464)", () => {
|
||||
test("hasTruncatedNumbers detects bare comma after colon", () => {
|
||||
assert.equal(hasTruncatedNumbers('"exitCode": ,'), true);
|
||||
});
|
||||
|
||||
test("hasTruncatedNumbers detects bare minus before comma", () => {
|
||||
assert.equal(hasTruncatedNumbers('"exitCode": -,'), true);
|
||||
});
|
||||
|
||||
test("hasTruncatedNumbers detects bare minus before closing brace", () => {
|
||||
assert.equal(hasTruncatedNumbers('"durationMs": -}'), true);
|
||||
});
|
||||
|
||||
test("hasTruncatedNumbers returns false for valid numbers", () => {
|
||||
assert.equal(hasTruncatedNumbers('"exitCode": 0, "durationMs": 1234'), false);
|
||||
});
|
||||
|
||||
test("hasTruncatedNumbers returns false for negative numbers", () => {
|
||||
assert.equal(hasTruncatedNumbers('"exitCode": -1, "offset": -100'), false);
|
||||
});
|
||||
|
||||
test("repairs truncated exitCode with bare comma", () => {
|
||||
const malformed = '{"command": "npm test", "exitCode": , "verdict": "pass", "durationMs": 500}';
|
||||
const repaired = repairToolJson(malformed);
|
||||
const parsed = JSON.parse(repaired);
|
||||
assert.equal(parsed.exitCode, 0);
|
||||
assert.equal(parsed.durationMs, 500);
|
||||
});
|
||||
|
||||
test("repairs truncated exitCode with bare minus", () => {
|
||||
const malformed = '{"command": "npm test", "exitCode": -, "verdict": "pass", "durationMs": 1234}';
|
||||
const repaired = repairToolJson(malformed);
|
||||
const parsed = JSON.parse(repaired);
|
||||
assert.equal(parsed.exitCode, 0);
|
||||
assert.equal(parsed.verdict, "pass");
|
||||
});
|
||||
|
||||
test("repairs truncated durationMs at end of object", () => {
|
||||
const malformed = '{"command": "npm test", "exitCode": 0, "verdict": "pass", "durationMs": -}';
|
||||
const repaired = repairToolJson(malformed);
|
||||
const parsed = JSON.parse(repaired);
|
||||
assert.equal(parsed.durationMs, 0);
|
||||
assert.equal(parsed.exitCode, 0);
|
||||
});
|
||||
|
||||
test("does not mangle valid negative numbers", () => {
|
||||
const valid = '{"exitCode": -1, "offset": -100}';
|
||||
const repaired = repairToolJson(valid);
|
||||
const parsed = JSON.parse(repaired);
|
||||
assert.equal(parsed.exitCode, -1);
|
||||
assert.equal(parsed.offset, -100);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue