From 920fed71220b203ae76664b65e235a87eb0bfa55 Mon Sep 17 00:00:00 2001 From: Tibsfox Date: Sat, 4 Apr 2026 03:59:55 -0700 Subject: [PATCH] fix(pi-ai): extend repairToolJson to handle XML tags and truncated numbers The existing repairToolJson only handles YAML bullet lists (#2660). Two additional malformation patterns from smaller models now cause tool call failures and stuck retry loops: 1. XML parameter tags mixed into JSON values (#3403): LLMs (especially Haiku-class) sometimes emit hybrid XML/JSON syntax like value inside JSON string values. Add stripXmlParameterTags() to remove the tags while preserving content. 2. Truncated numeric values (#3464): Smaller models emit incomplete numbers like "exitCode": -, or "durationMs": , when values are cut off mid-generation. Add repairTruncatedNumbers() to replace these with 0. Both repairs run before the existing YAML bullet repair phase. The AJV validation layer (coerceTypes: true) then handles any remaining string-to-number coercion. Adds 13 new tests covering detection and repair for both patterns. Closes #3464, closes #3403, addresses #3369 --- packages/pi-ai/src/utils/repair-tool-json.ts | 84 ++++++++++++++-- .../src/utils/tests/repair-tool-json.test.ts | 95 ++++++++++++++++++- 2 files changed, 168 insertions(+), 11 deletions(-) diff --git a/packages/pi-ai/src/utils/repair-tool-json.ts b/packages/pi-ai/src/utils/repair-tool-json.ts index 166e8ce21..8cb86d28c 100644 --- a/packages/pi-ai/src/utils/repair-tool-json.ts +++ b/packages/pi-ai/src/utils/repair-tool-json.ts @@ -28,20 +28,86 @@ export function hasYamlBulletLists(json: string): boolean { } /** - * Attempt to repair YAML-style bullet lists embedded in a JSON string. + * Detect whether a JSON string contains XML parameter tags + * (i.e. `value`). * - * Converts patterns like: - * "keyDecisions": - Used Web Notification API..., "keyFiles": - file1 + * Some models mix XML tool-call syntax into JSON string values, + * producing hybrid output that fails JSON.parse. * - * Into: - * "keyDecisions": ["Used Web Notification API..."], "keyFiles": ["file1"] + * @see https://github.com/gsd-build/gsd-2/issues/3403 + */ +export function hasXmlParameterTags(json: string): boolean { + return /<\/?parameter[\s>]/.test(json); +} + +/** + * Detect whether a JSON string contains truncated numeric values + * (e.g. `"exitCode": -,` or `"durationMs": ,`). * - * Returns the original string unchanged if no YAML patterns are detected + * Smaller models sometimes emit incomplete numbers when the value + * is cut off mid-generation. + * + * @see https://github.com/gsd-build/gsd-2/issues/3464 + */ +export function hasTruncatedNumbers(json: string): boolean { + // Match: colon, optional whitespace, then a comma or } without a value + // Or: colon, optional whitespace, bare minus sign followed by comma/} + return /:\s*,/.test(json) || /:\s*-\s*[,}]/.test(json); +} + +/** + * Strip XML `` tags from a JSON string, leaving only the + * text content. This handles the case where the LLM mixes XML + * tool-call format into JSON string values. + */ +function stripXmlParameterTags(json: string): string { + // Remove opening tags: + let cleaned = json.replace(//g, ""); + // Remove closing tags: + cleaned = cleaned.replace(/<\/parameter>/g, ""); + return cleaned; +} + +/** + * Replace truncated numeric values with 0. + * Handles: `"key": ,` → `"key": 0,` and `"key": -,` → `"key": 0,` + */ +function repairTruncatedNumbers(json: string): string { + // Bare comma after colon (missing value entirely) + let repaired = json.replace(/:\s*,/g, ": 0,"); + // Bare minus sign followed by comma or closing brace + repaired = repaired.replace(/:\s*-\s*([,}])/g, ": 0$1"); + return repaired; +} + +/** + * Attempt to repair malformed JSON in LLM tool-call arguments. + * + * Handles three categories of malformation: + * + * 1. **YAML bullet lists** (#2660): `"key": - item1\n - item2` → `"key": ["item1", "item2"]` + * 2. **XML parameter tags** (#3403): `value` → stripped to content + * 3. **Truncated numbers** (#3464): `"exitCode": -,` → `"exitCode": 0,` + * + * Returns the original string unchanged if no patterns are detected * or if the repair itself would produce invalid JSON. */ export function repairToolJson(json: string): string { - if (!hasYamlBulletLists(json)) { - return json; + let repaired = json; + + // Phase 1: Strip XML parameter tags + if (hasXmlParameterTags(repaired)) { + repaired = stripXmlParameterTags(repaired); + } + + // Phase 2: Repair truncated numbers + if (hasTruncatedNumbers(repaired)) { + repaired = repairTruncatedNumbers(repaired); + } + + // Phase 3: Repair YAML bullet lists + if (!hasYamlBulletLists(repaired)) { + return repaired; } // Strategy: find each `"key": - item1\n - item2\n - item3` region and @@ -53,8 +119,6 @@ export function repairToolJson(json: string): string { // optionally followed by more `- item` lines // terminated by the next `"key":` or `}` or end of string. - let repaired = json; - // Match a key followed by YAML-style bullet list. // Capture: (1) the key portion including colon, (2) the bullet-list body, // (3) the separator (comma or empty) before the next key/bracket. diff --git a/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts b/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts index 35bd03cbb..b709d026e 100644 --- a/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts +++ b/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts @@ -1,6 +1,6 @@ import { describe, test } from "node:test"; import assert from "node:assert/strict"; -import { repairToolJson, hasYamlBulletLists } from "../repair-tool-json.js"; +import { repairToolJson, hasYamlBulletLists, hasXmlParameterTags, hasTruncatedNumbers } from "../repair-tool-json.js"; describe("repairToolJson — YAML bullet list repair (#2660)", () => { // ── Detection ────────────────────────────────────────────────────────── @@ -100,3 +100,96 @@ describe("repairToolJson — YAML bullet list repair (#2660)", () => { assert.equal(result, valid); }); }); + +// ═══════════════════════════════════════════════════════════════════════════ +// XML parameter tag repair (#3403) +// ═══════════════════════════════════════════════════════════════════════════ + +describe("repairToolJson — XML parameter tag stripping (#3403)", () => { + test("hasXmlParameterTags detects opening tags", () => { + assert.equal( + hasXmlParameterTags('some text'), + true, + ); + }); + + test("hasXmlParameterTags returns false for clean JSON", () => { + assert.equal( + hasXmlParameterTags('{"narrative": "some text"}'), + false, + ); + }); + + test("strips XML parameter tags from JSON values", () => { + const malformed = '{"sliceId": "S03", "narrative": The slice work}'; + const repaired = repairToolJson(malformed); + // After stripping tags, the content should be parseable or at least tag-free + assert.ok(!repaired.includes(""), "should not contain tags"); + }); + + test("handles mixed XML and JSON content", () => { + const malformed = '{"oneLiner": "done", "verification": all tests pass}'; + const repaired = repairToolJson(malformed); + assert.ok(!repaired.includes(" { + test("hasTruncatedNumbers detects bare comma after colon", () => { + assert.equal(hasTruncatedNumbers('"exitCode": ,'), true); + }); + + test("hasTruncatedNumbers detects bare minus before comma", () => { + assert.equal(hasTruncatedNumbers('"exitCode": -,'), true); + }); + + test("hasTruncatedNumbers detects bare minus before closing brace", () => { + assert.equal(hasTruncatedNumbers('"durationMs": -}'), true); + }); + + test("hasTruncatedNumbers returns false for valid numbers", () => { + assert.equal(hasTruncatedNumbers('"exitCode": 0, "durationMs": 1234'), false); + }); + + test("hasTruncatedNumbers returns false for negative numbers", () => { + assert.equal(hasTruncatedNumbers('"exitCode": -1, "offset": -100'), false); + }); + + test("repairs truncated exitCode with bare comma", () => { + const malformed = '{"command": "npm test", "exitCode": , "verdict": "pass", "durationMs": 500}'; + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + assert.equal(parsed.exitCode, 0); + assert.equal(parsed.durationMs, 500); + }); + + test("repairs truncated exitCode with bare minus", () => { + const malformed = '{"command": "npm test", "exitCode": -, "verdict": "pass", "durationMs": 1234}'; + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + assert.equal(parsed.exitCode, 0); + assert.equal(parsed.verdict, "pass"); + }); + + test("repairs truncated durationMs at end of object", () => { + const malformed = '{"command": "npm test", "exitCode": 0, "verdict": "pass", "durationMs": -}'; + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + assert.equal(parsed.durationMs, 0); + assert.equal(parsed.exitCode, 0); + }); + + test("does not mangle valid negative numbers", () => { + const valid = '{"exitCode": -1, "offset": -100}'; + const repaired = repairToolJson(valid); + const parsed = JSON.parse(repaired); + assert.equal(parsed.exitCode, -1); + assert.equal(parsed.offset, -100); + }); +});