fix: repair YAML bullet lists in malformed tool-call JSON (#3090)

* fix: repair YAML bullet lists in malformed tool-call JSON (#2660)

When LLMs copy YAML template formatting into tool-call arguments, they
produce `"key": - item` instead of `"key": ["item"]`, causing JSON parse
errors that block milestone completion. Add a repairToolJson() utility
that detects and converts YAML-style bullet lists into JSON arrays before
parsing. Integrated into both the PartialMessageBuilder (claude-code-cli)
and the anthropic-shared streaming provider, with fallback in
parseStreamingJson for all other providers.

Closes #2660

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: use .js import extension in repair-tool-json test

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Tom Boucher 2026-03-30 16:37:09 -04:00 committed by GitHub
parent c924f9f1f8
commit 2f3ffbfc10
7 changed files with 276 additions and 8 deletions

View file

@ -27,4 +27,5 @@ export type {
} from "./utils/oauth/types.js";
export * from "./utils/overflow.js";
export * from "./utils/typebox-helpers.js";
export * from "./utils/repair-tool-json.js";
export * from "./utils/validation.js";

View file

@ -31,6 +31,7 @@ import type {
export type AnthropicApi = "anthropic-messages" | "anthropic-vertex";
import type { AssistantMessageEventStream } from "../utils/event-stream.js";
import { parseStreamingJson } from "../utils/json-parse.js";
import { repairToolJson } from "../utils/repair-tool-json.js";
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
import { transformMessages } from "./transform-messages.js";
@ -696,7 +697,21 @@ export function processAnthropicStream(
partial: output,
});
} else if (block.type === "toolCall") {
block.arguments = parseStreamingJson(block.partialJson);
// Try strict parse first; if it fails, attempt YAML bullet
// repair (#2660) before falling back to the lenient streaming
// parser which silently swallows errors.
const raw = block.partialJson ?? "";
let parsed: Record<string, any> | undefined;
try {
parsed = JSON.parse(raw);
} catch {
try {
parsed = JSON.parse(repairToolJson(raw));
} catch {
// Fall through to streaming parser
}
}
block.arguments = parsed ?? parseStreamingJson(block.partialJson);
delete (block as any).partialJson;
stream.push({
type: "toolcall_end",

View file

@ -1,14 +1,41 @@
import { parseStreamingJson as nativeParseStreamingJson } from "@gsd/native";
import { hasYamlBulletLists, repairToolJson } from "./repair-tool-json.js";
/**
* Attempts to parse potentially incomplete JSON during streaming.
* Always returns a valid object, even if the JSON is incomplete.
*
* Uses the native Rust streaming JSON parser for performance.
* Falls back to YAML bullet-list repair when the native parser
* returns an empty object from input that contains YAML-style
* bullet lists copied from template formatting (#2660).
*
* @param partialJson The partial JSON string from streaming
* @returns Parsed object or empty object if parsing fails
*/
export function parseStreamingJson<T = any>(partialJson: string | undefined): T {
return nativeParseStreamingJson<T>(partialJson);
if (!partialJson || partialJson.trim() === "") {
return {} as T;
}
// Fast path: try native streaming parser first
const result = nativeParseStreamingJson<T>(partialJson);
// If the native parser returned a non-empty result, use it.
// Only attempt repair when the result is empty AND the input
// contains YAML bullet patterns (avoids unnecessary work).
if (
result &&
typeof result === "object" &&
Object.keys(result as object).length === 0 &&
hasYamlBulletLists(partialJson)
) {
try {
return JSON.parse(repairToolJson(partialJson)) as T;
} catch {
// Repair failed — return the empty object from native parser
}
}
return result;
}

View file

@ -0,0 +1,88 @@
/**
* Repair malformed JSON in LLM tool-call arguments.
*
* LLMs sometimes copy YAML template formatting into JSON tool arguments,
* producing patterns like:
*
* "keyDecisions": - Used Web Notification API...,
* "keyFiles": - src-tauri/src/lib.rs Extended...
*
* instead of valid JSON arrays:
*
* "keyDecisions": ["Used Web Notification API..."],
* "keyFiles": ["src-tauri/src/lib.rs — Extended..."]
*
* This module detects and repairs such patterns before JSON.parse is called.
*
* @see https://github.com/gsd-build/gsd-2/issues/2660
*/
/**
* Detect whether a JSON string contains YAML-style bullet-list values
* (i.e. `"key": - item` instead of `"key": ["item"]`).
*/
export function hasYamlBulletLists(json: string): boolean {
// Match: "key": followed by whitespace then a dash-space pattern (YAML bullet)
// The negative lookahead excludes negative numbers (e.g. "key": -1)
return /"\s*:\s*-\s+(?!\d)/.test(json);
}
/**
* Attempt to repair YAML-style bullet lists embedded in a JSON string.
*
* Converts patterns like:
* "keyDecisions": - Used Web Notification API..., "keyFiles": - file1
*
* Into:
* "keyDecisions": ["Used Web Notification API..."], "keyFiles": ["file1"]
*
* Returns the original string unchanged if no YAML patterns are detected
* or if the repair itself would produce invalid JSON.
*/
export function repairToolJson(json: string): string {
if (!hasYamlBulletLists(json)) {
return json;
}
// Strategy: find each `"key": - item1\n - item2\n - item3` region and
// wrap items in a JSON array.
//
// We work on the raw string because the JSON is not parseable yet.
// The pattern we target:
// "someKey":\s*- item text (possibly multiline)
// optionally followed by more `- item` lines
// terminated by the next `"key":` or `}` or end of string.
let repaired = json;
// Match a key followed by YAML-style bullet list.
// Capture: (1) the key portion including colon, (2) the bullet-list body,
// (3) the separator (comma or empty) before the next key/bracket.
// The bullet list body ends at the next `"key":` or `}` or `]` or end of string.
const keyBulletPattern =
/("(?:[^"\\]|\\.)*"\s*:\s*)(- .+?)(,?\s*)(?="(?:[^"\\]|\\.)*"\s*:|[}\]]|$)/gs;
repaired = repaired.replace(
keyBulletPattern,
(_match, keyPart: string, bulletBody: string, separator: string) => {
// Split the bullet body into individual items on `- ` boundaries.
// Items may contain embedded newlines for multi-line values.
const items = bulletBody
.split(/\n?\s*- /)
.filter((s) => s.trim().length > 0)
.map((s) => s.replace(/,\s*$/, "").trim());
// JSON-encode each item as a string, then wrap in an array.
const jsonArray = "[" + items.map((item) => JSON.stringify(item)).join(", ") + "]";
// Re-emit the separator (comma) so the next key is properly delimited
const sep = separator.trim() ? separator : (/^\s*"/.test(separator + "x") ? ", " : "");
return keyPart + jsonArray + sep;
},
);
// Strip trailing commas before } or ] (common in repaired JSON)
repaired = repaired.replace(/,(\s*[}\]])/g, "$1");
return repaired;
}

View file

@ -0,0 +1,102 @@
import { describe, test } from "node:test";
import assert from "node:assert/strict";
import { repairToolJson, hasYamlBulletLists } from "../repair-tool-json.js";
describe("repairToolJson — YAML bullet list repair (#2660)", () => {
// ── Detection ──────────────────────────────────────────────────────────
test("hasYamlBulletLists detects YAML-style bullets", () => {
assert.equal(
hasYamlBulletLists('"keyDecisions": - Used Web Notification API'),
true,
);
});
test("hasYamlBulletLists ignores negative numbers", () => {
assert.equal(
hasYamlBulletLists('"offset": -1'),
false,
"negative number should not be detected as YAML bullet",
);
});
test("hasYamlBulletLists returns false for valid JSON", () => {
assert.equal(
hasYamlBulletLists('{"keyDecisions": ["item1", "item2"]}'),
false,
);
});
// ── Single bullet item ────────────────────────────────────────────────
test("repairs single YAML bullet to JSON array", () => {
const malformed = '{"keyDecisions": - Used Web Notification API}';
const repaired = repairToolJson(malformed);
const parsed = JSON.parse(repaired);
assert.deepEqual(parsed.keyDecisions, ["Used Web Notification API"]);
});
// ── Multiple bullet items (newline-separated) ─────────────────────────
test("repairs multiple YAML bullets separated by newlines", () => {
const malformed =
'{"keyDecisions": - Used Web Notification API\n - Chose Tauri over Electron\n - Adopted SQLite for storage, "title": "M005"}';
const repaired = repairToolJson(malformed);
const parsed = JSON.parse(repaired);
assert.deepEqual(parsed.keyDecisions, [
"Used Web Notification API",
"Chose Tauri over Electron",
"Adopted SQLite for storage",
]);
assert.equal(parsed.title, "M005");
});
// ── Multiple fields with YAML bullets ─────────────────────────────────
test("repairs multiple fields each with YAML bullet lists", () => {
const malformed =
'{"keyDecisions": - decision one\n - decision two, "keyFiles": - src/lib.rs — Extended menu\n - src/main.ts — Entry point, "title": "done"}';
const repaired = repairToolJson(malformed);
const parsed = JSON.parse(repaired);
assert.deepEqual(parsed.keyDecisions, ["decision one", "decision two"]);
assert.deepEqual(parsed.keyFiles, [
"src/lib.rs \u2014 Extended menu",
"src/main.ts \u2014 Entry point",
]);
assert.equal(parsed.title, "done");
});
// ── Exact reproduction from issue #2660 ───────────────────────────────
test("repairs the exact malformed JSON from issue #2660", () => {
const malformed = `{"milestoneId": "M005", "title": "Native Desktop Polish", "oneLiner": "summary", "narrative": "details", "successCriteriaResults": "all pass", "definitionOfDoneResults": "all done", "requirementOutcomes": "met", "keyDecisions": - Used Web Notification API (new window.Notification()) instead of Tauri sendNotification wrapper, "keyFiles": - src-tauri/src/lib.rs \u2014 Extended menu builder with notification toggle, "lessonsLearned": - Always test notification permissions before sending, "followUps": "none", "deviations": "none", "verificationPassed": true}`;
const repaired = repairToolJson(malformed);
const parsed = JSON.parse(repaired);
assert.equal(parsed.milestoneId, "M005");
assert.equal(parsed.title, "Native Desktop Polish");
assert.ok(Array.isArray(parsed.keyDecisions), "keyDecisions should be an array");
assert.ok(parsed.keyDecisions[0].includes("Web Notification API"));
assert.ok(Array.isArray(parsed.keyFiles), "keyFiles should be an array");
assert.ok(parsed.keyFiles[0].includes("src-tauri/src/lib.rs"));
assert.ok(Array.isArray(parsed.lessonsLearned), "lessonsLearned should be an array");
assert.equal(parsed.verificationPassed, true);
});
// ── Passthrough for valid JSON ────────────────────────────────────────
test("returns valid JSON unchanged", () => {
const valid = '{"keyDecisions": ["item1", "item2"], "count": -5}';
const result = repairToolJson(valid);
assert.equal(result, valid, "valid JSON should be returned unchanged");
});
// ── Negative numbers are preserved ────────────────────────────────────
test("does not mangle negative numbers", () => {
const valid = '{"offset": -1, "limit": -100}';
const result = repairToolJson(valid);
assert.equal(result, valid);
});
});

View file

@ -16,6 +16,7 @@ import type {
Usage,
WebSearchResultContent,
} from "@gsd/pi-ai";
import { repairToolJson } from "@gsd/pi-ai";
import type { BetaContentBlock, BetaRawMessageStreamEvent, NonNullableUsage } from "./sdk-types.js";
// ---------------------------------------------------------------------------
@ -244,12 +245,18 @@ export class PartialMessageBuilder {
try {
block.arguments = JSON.parse(jsonStr);
} catch {
// Stream was truncated mid-tool-call — JSON is garbage.
// Preserve the raw string for diagnostics but signal the
// malformation explicitly so downstream consumers can
// distinguish this from a healthy tool completion (#2574).
block.arguments = { _raw: jsonStr };
return { type: "toolcall_end", contentIndex, toolCall: block, partial: this.partial, malformedArguments: true };
// JSON.parse failed — attempt repair for YAML-style bullet
// lists that LLMs copy from template formatting (#2660).
try {
block.arguments = JSON.parse(repairToolJson(jsonStr));
} catch {
// Repair also failed — stream was truncated or garbage.
// Preserve the raw string for diagnostics but signal the
// malformation explicitly so downstream consumers can
// distinguish this from a healthy tool completion (#2574).
block.arguments = { _raw: jsonStr };
return { type: "toolcall_end", contentIndex, toolCall: block, partial: this.partial, malformedArguments: true };
}
}
return { type: "toolcall_end", contentIndex, toolCall: block, partial: this.partial };
}

View file

@ -102,4 +102,32 @@ describe("PartialMessageBuilder — malformed tool arguments (#2574)", () => {
"non-JSON content should set malformedArguments: true",
);
});
test("YAML bullet lists repaired to JSON arrays (#2660)", () => {
const builder = new PartialMessageBuilder("claude-sonnet-4-20250514");
const malformedJson =
'{"milestoneId": "M005", "keyDecisions": - Used Web Notification API, "keyFiles": - src/lib.rs, "title": "done"}';
const event = feedToolCall(builder, [malformedJson]);
assert.ok(event, "event should not be null");
assert.equal(event!.type, "toolcall_end");
// Repaired YAML bullets should NOT set malformedArguments
assert.equal(
(event as any).malformedArguments,
undefined,
"repaired YAML bullets should not set malformedArguments",
);
if (event!.type === "toolcall_end") {
assert.equal(event!.toolCall.arguments.milestoneId, "M005");
assert.ok(
Array.isArray(event!.toolCall.arguments.keyDecisions),
"keyDecisions should be repaired to an array",
);
assert.ok(
Array.isArray(event!.toolCall.arguments.keyFiles),
"keyFiles should be repaired to an array",
);
assert.equal(event!.toolCall.arguments.title, "done");
}
});
});