From 2f3ffbfc10d135e138fbfe42aa5f8c9450c0c2b5 Mon Sep 17 00:00:00 2001 From: Tom Boucher Date: Mon, 30 Mar 2026 16:37:09 -0400 Subject: [PATCH] fix: repair YAML bullet lists in malformed tool-call JSON (#3090) * fix: repair YAML bullet lists in malformed tool-call JSON (#2660) When LLMs copy YAML template formatting into tool-call arguments, they produce `"key": - item` instead of `"key": ["item"]`, causing JSON parse errors that block milestone completion. Add a repairToolJson() utility that detects and converts YAML-style bullet lists into JSON arrays before parsing. Integrated into both the PartialMessageBuilder (claude-code-cli) and the anthropic-shared streaming provider, with fallback in parseStreamingJson for all other providers. Closes #2660 Co-Authored-By: Claude Opus 4.6 * fix: use .js import extension in repair-tool-json test Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: Claude Opus 4.6 --- packages/pi-ai/src/index.ts | 1 + .../pi-ai/src/providers/anthropic-shared.ts | 17 ++- packages/pi-ai/src/utils/json-parse.ts | 29 ++++- packages/pi-ai/src/utils/repair-tool-json.ts | 88 +++++++++++++++ .../src/utils/tests/repair-tool-json.test.ts | 102 ++++++++++++++++++ .../claude-code-cli/partial-builder.ts | 19 ++-- .../tests/partial-builder.test.ts | 28 +++++ 7 files changed, 276 insertions(+), 8 deletions(-) create mode 100644 packages/pi-ai/src/utils/repair-tool-json.ts create mode 100644 packages/pi-ai/src/utils/tests/repair-tool-json.test.ts diff --git a/packages/pi-ai/src/index.ts b/packages/pi-ai/src/index.ts index a75aaf7f4..c8d9e1e8c 100644 --- a/packages/pi-ai/src/index.ts +++ b/packages/pi-ai/src/index.ts @@ -27,4 +27,5 @@ export type { } from "./utils/oauth/types.js"; export * from "./utils/overflow.js"; export * from "./utils/typebox-helpers.js"; +export * from "./utils/repair-tool-json.js"; export * from "./utils/validation.js"; diff --git a/packages/pi-ai/src/providers/anthropic-shared.ts b/packages/pi-ai/src/providers/anthropic-shared.ts index d7cc0b41d..b7229bf7e 100644 --- a/packages/pi-ai/src/providers/anthropic-shared.ts +++ b/packages/pi-ai/src/providers/anthropic-shared.ts @@ -31,6 +31,7 @@ import type { export type AnthropicApi = "anthropic-messages" | "anthropic-vertex"; import type { AssistantMessageEventStream } from "../utils/event-stream.js"; import { parseStreamingJson } from "../utils/json-parse.js"; +import { repairToolJson } from "../utils/repair-tool-json.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; import { transformMessages } from "./transform-messages.js"; @@ -696,7 +697,21 @@ export function processAnthropicStream( partial: output, }); } else if (block.type === "toolCall") { - block.arguments = parseStreamingJson(block.partialJson); + // Try strict parse first; if it fails, attempt YAML bullet + // repair (#2660) before falling back to the lenient streaming + // parser which silently swallows errors. + const raw = block.partialJson ?? ""; + let parsed: Record | undefined; + try { + parsed = JSON.parse(raw); + } catch { + try { + parsed = JSON.parse(repairToolJson(raw)); + } catch { + // Fall through to streaming parser + } + } + block.arguments = parsed ?? parseStreamingJson(block.partialJson); delete (block as any).partialJson; stream.push({ type: "toolcall_end", diff --git a/packages/pi-ai/src/utils/json-parse.ts b/packages/pi-ai/src/utils/json-parse.ts index ad907e8d0..727713132 100644 --- a/packages/pi-ai/src/utils/json-parse.ts +++ b/packages/pi-ai/src/utils/json-parse.ts @@ -1,14 +1,41 @@ import { parseStreamingJson as nativeParseStreamingJson } from "@gsd/native"; +import { hasYamlBulletLists, repairToolJson } from "./repair-tool-json.js"; /** * Attempts to parse potentially incomplete JSON during streaming. * Always returns a valid object, even if the JSON is incomplete. * * Uses the native Rust streaming JSON parser for performance. + * Falls back to YAML bullet-list repair when the native parser + * returns an empty object from input that contains YAML-style + * bullet lists copied from template formatting (#2660). * * @param partialJson The partial JSON string from streaming * @returns Parsed object or empty object if parsing fails */ export function parseStreamingJson(partialJson: string | undefined): T { - return nativeParseStreamingJson(partialJson); + if (!partialJson || partialJson.trim() === "") { + return {} as T; + } + + // Fast path: try native streaming parser first + const result = nativeParseStreamingJson(partialJson); + + // If the native parser returned a non-empty result, use it. + // Only attempt repair when the result is empty AND the input + // contains YAML bullet patterns (avoids unnecessary work). + if ( + result && + typeof result === "object" && + Object.keys(result as object).length === 0 && + hasYamlBulletLists(partialJson) + ) { + try { + return JSON.parse(repairToolJson(partialJson)) as T; + } catch { + // Repair failed — return the empty object from native parser + } + } + + return result; } diff --git a/packages/pi-ai/src/utils/repair-tool-json.ts b/packages/pi-ai/src/utils/repair-tool-json.ts new file mode 100644 index 000000000..166e8ce21 --- /dev/null +++ b/packages/pi-ai/src/utils/repair-tool-json.ts @@ -0,0 +1,88 @@ +/** + * Repair malformed JSON in LLM tool-call arguments. + * + * LLMs sometimes copy YAML template formatting into JSON tool arguments, + * producing patterns like: + * + * "keyDecisions": - Used Web Notification API..., + * "keyFiles": - src-tauri/src/lib.rs — Extended... + * + * instead of valid JSON arrays: + * + * "keyDecisions": ["Used Web Notification API..."], + * "keyFiles": ["src-tauri/src/lib.rs — Extended..."] + * + * This module detects and repairs such patterns before JSON.parse is called. + * + * @see https://github.com/gsd-build/gsd-2/issues/2660 + */ + +/** + * Detect whether a JSON string contains YAML-style bullet-list values + * (i.e. `"key": - item` instead of `"key": ["item"]`). + */ +export function hasYamlBulletLists(json: string): boolean { + // Match: "key": followed by whitespace then a dash-space pattern (YAML bullet) + // The negative lookahead excludes negative numbers (e.g. "key": -1) + return /"\s*:\s*-\s+(?!\d)/.test(json); +} + +/** + * Attempt to repair YAML-style bullet lists embedded in a JSON string. + * + * Converts patterns like: + * "keyDecisions": - Used Web Notification API..., "keyFiles": - file1 + * + * Into: + * "keyDecisions": ["Used Web Notification API..."], "keyFiles": ["file1"] + * + * Returns the original string unchanged if no YAML patterns are detected + * or if the repair itself would produce invalid JSON. + */ +export function repairToolJson(json: string): string { + if (!hasYamlBulletLists(json)) { + return json; + } + + // Strategy: find each `"key": - item1\n - item2\n - item3` region and + // wrap items in a JSON array. + // + // We work on the raw string because the JSON is not parseable yet. + // The pattern we target: + // "someKey":\s*- item text (possibly multiline) + // optionally followed by more `- item` lines + // terminated by the next `"key":` or `}` or end of string. + + let repaired = json; + + // Match a key followed by YAML-style bullet list. + // Capture: (1) the key portion including colon, (2) the bullet-list body, + // (3) the separator (comma or empty) before the next key/bracket. + // The bullet list body ends at the next `"key":` or `}` or `]` or end of string. + const keyBulletPattern = + /("(?:[^"\\]|\\.)*"\s*:\s*)(- .+?)(,?\s*)(?="(?:[^"\\]|\\.)*"\s*:|[}\]]|$)/gs; + + repaired = repaired.replace( + keyBulletPattern, + (_match, keyPart: string, bulletBody: string, separator: string) => { + // Split the bullet body into individual items on `- ` boundaries. + // Items may contain embedded newlines for multi-line values. + const items = bulletBody + .split(/\n?\s*- /) + .filter((s) => s.trim().length > 0) + .map((s) => s.replace(/,\s*$/, "").trim()); + + // JSON-encode each item as a string, then wrap in an array. + const jsonArray = "[" + items.map((item) => JSON.stringify(item)).join(", ") + "]"; + + // Re-emit the separator (comma) so the next key is properly delimited + const sep = separator.trim() ? separator : (/^\s*"/.test(separator + "x") ? ", " : ""); + return keyPart + jsonArray + sep; + }, + ); + + // Strip trailing commas before } or ] (common in repaired JSON) + repaired = repaired.replace(/,(\s*[}\]])/g, "$1"); + + return repaired; +} diff --git a/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts b/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts new file mode 100644 index 000000000..35bd03cbb --- /dev/null +++ b/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts @@ -0,0 +1,102 @@ +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { repairToolJson, hasYamlBulletLists } from "../repair-tool-json.js"; + +describe("repairToolJson — YAML bullet list repair (#2660)", () => { + // ── Detection ────────────────────────────────────────────────────────── + + test("hasYamlBulletLists detects YAML-style bullets", () => { + assert.equal( + hasYamlBulletLists('"keyDecisions": - Used Web Notification API'), + true, + ); + }); + + test("hasYamlBulletLists ignores negative numbers", () => { + assert.equal( + hasYamlBulletLists('"offset": -1'), + false, + "negative number should not be detected as YAML bullet", + ); + }); + + test("hasYamlBulletLists returns false for valid JSON", () => { + assert.equal( + hasYamlBulletLists('{"keyDecisions": ["item1", "item2"]}'), + false, + ); + }); + + // ── Single bullet item ──────────────────────────────────────────────── + + test("repairs single YAML bullet to JSON array", () => { + const malformed = '{"keyDecisions": - Used Web Notification API}'; + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + assert.deepEqual(parsed.keyDecisions, ["Used Web Notification API"]); + }); + + // ── Multiple bullet items (newline-separated) ───────────────────────── + + test("repairs multiple YAML bullets separated by newlines", () => { + const malformed = + '{"keyDecisions": - Used Web Notification API\n - Chose Tauri over Electron\n - Adopted SQLite for storage, "title": "M005"}'; + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + assert.deepEqual(parsed.keyDecisions, [ + "Used Web Notification API", + "Chose Tauri over Electron", + "Adopted SQLite for storage", + ]); + assert.equal(parsed.title, "M005"); + }); + + // ── Multiple fields with YAML bullets ───────────────────────────────── + + test("repairs multiple fields each with YAML bullet lists", () => { + const malformed = + '{"keyDecisions": - decision one\n - decision two, "keyFiles": - src/lib.rs — Extended menu\n - src/main.ts — Entry point, "title": "done"}'; + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + assert.deepEqual(parsed.keyDecisions, ["decision one", "decision two"]); + assert.deepEqual(parsed.keyFiles, [ + "src/lib.rs \u2014 Extended menu", + "src/main.ts \u2014 Entry point", + ]); + assert.equal(parsed.title, "done"); + }); + + // ── Exact reproduction from issue #2660 ─────────────────────────────── + + test("repairs the exact malformed JSON from issue #2660", () => { + const malformed = `{"milestoneId": "M005", "title": "Native Desktop Polish", "oneLiner": "summary", "narrative": "details", "successCriteriaResults": "all pass", "definitionOfDoneResults": "all done", "requirementOutcomes": "met", "keyDecisions": - Used Web Notification API (new window.Notification()) instead of Tauri sendNotification wrapper, "keyFiles": - src-tauri/src/lib.rs \u2014 Extended menu builder with notification toggle, "lessonsLearned": - Always test notification permissions before sending, "followUps": "none", "deviations": "none", "verificationPassed": true}`; + + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + + assert.equal(parsed.milestoneId, "M005"); + assert.equal(parsed.title, "Native Desktop Polish"); + assert.ok(Array.isArray(parsed.keyDecisions), "keyDecisions should be an array"); + assert.ok(parsed.keyDecisions[0].includes("Web Notification API")); + assert.ok(Array.isArray(parsed.keyFiles), "keyFiles should be an array"); + assert.ok(parsed.keyFiles[0].includes("src-tauri/src/lib.rs")); + assert.ok(Array.isArray(parsed.lessonsLearned), "lessonsLearned should be an array"); + assert.equal(parsed.verificationPassed, true); + }); + + // ── Passthrough for valid JSON ──────────────────────────────────────── + + test("returns valid JSON unchanged", () => { + const valid = '{"keyDecisions": ["item1", "item2"], "count": -5}'; + const result = repairToolJson(valid); + assert.equal(result, valid, "valid JSON should be returned unchanged"); + }); + + // ── Negative numbers are preserved ──────────────────────────────────── + + test("does not mangle negative numbers", () => { + const valid = '{"offset": -1, "limit": -100}'; + const result = repairToolJson(valid); + assert.equal(result, valid); + }); +}); diff --git a/src/resources/extensions/claude-code-cli/partial-builder.ts b/src/resources/extensions/claude-code-cli/partial-builder.ts index 99bd7ca0f..533835505 100644 --- a/src/resources/extensions/claude-code-cli/partial-builder.ts +++ b/src/resources/extensions/claude-code-cli/partial-builder.ts @@ -16,6 +16,7 @@ import type { Usage, WebSearchResultContent, } from "@gsd/pi-ai"; +import { repairToolJson } from "@gsd/pi-ai"; import type { BetaContentBlock, BetaRawMessageStreamEvent, NonNullableUsage } from "./sdk-types.js"; // --------------------------------------------------------------------------- @@ -244,12 +245,18 @@ export class PartialMessageBuilder { try { block.arguments = JSON.parse(jsonStr); } catch { - // Stream was truncated mid-tool-call — JSON is garbage. - // Preserve the raw string for diagnostics but signal the - // malformation explicitly so downstream consumers can - // distinguish this from a healthy tool completion (#2574). - block.arguments = { _raw: jsonStr }; - return { type: "toolcall_end", contentIndex, toolCall: block, partial: this.partial, malformedArguments: true }; + // JSON.parse failed — attempt repair for YAML-style bullet + // lists that LLMs copy from template formatting (#2660). + try { + block.arguments = JSON.parse(repairToolJson(jsonStr)); + } catch { + // Repair also failed — stream was truncated or garbage. + // Preserve the raw string for diagnostics but signal the + // malformation explicitly so downstream consumers can + // distinguish this from a healthy tool completion (#2574). + block.arguments = { _raw: jsonStr }; + return { type: "toolcall_end", contentIndex, toolCall: block, partial: this.partial, malformedArguments: true }; + } } return { type: "toolcall_end", contentIndex, toolCall: block, partial: this.partial }; } diff --git a/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts b/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts index 2a9612986..2ad1e6b0a 100644 --- a/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts +++ b/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts @@ -102,4 +102,32 @@ describe("PartialMessageBuilder — malformed tool arguments (#2574)", () => { "non-JSON content should set malformedArguments: true", ); }); + + test("YAML bullet lists repaired to JSON arrays (#2660)", () => { + const builder = new PartialMessageBuilder("claude-sonnet-4-20250514"); + const malformedJson = + '{"milestoneId": "M005", "keyDecisions": - Used Web Notification API, "keyFiles": - src/lib.rs, "title": "done"}'; + const event = feedToolCall(builder, [malformedJson]); + + assert.ok(event, "event should not be null"); + assert.equal(event!.type, "toolcall_end"); + // Repaired YAML bullets should NOT set malformedArguments + assert.equal( + (event as any).malformedArguments, + undefined, + "repaired YAML bullets should not set malformedArguments", + ); + if (event!.type === "toolcall_end") { + assert.equal(event!.toolCall.arguments.milestoneId, "M005"); + assert.ok( + Array.isArray(event!.toolCall.arguments.keyDecisions), + "keyDecisions should be repaired to an array", + ); + assert.ok( + Array.isArray(event!.toolCall.arguments.keyFiles), + "keyFiles should be repaired to an array", + ); + assert.equal(event!.toolCall.arguments.title, "done"); + } + }); });