feat: native Rust streaming JSON parser (#266)

* feat: add native Rust streaming JSON parser for LLM tool call argument parsing

Replaces the JS partial-json library with a Rust implementation exposed via napi-rs.
The parser handles incomplete JSON from streaming deltas by closing unclosed strings,
objects, arrays, removing trailing commas, and completing truncated literals.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix: handle truncated numbers and remove dead partial-json dependency

Adds truncated number recovery (e.g. `{"key": 12`, `{"key": 3.`, `{"key": 1e`)
to the Rust streaming JSON parser, and removes the now-unused `partial-json`
npm dependency from pi-ai.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
TÂCHES 2026-03-13 16:21:58 -06:00 committed by GitHub
parent 9b0f31e39a
commit 8b9cfae9e9
10 changed files with 618 additions and 19 deletions

1
native/Cargo.lock generated
View file

@ -544,6 +544,7 @@ dependencies = [
"napi-build",
"napi-derive",
"regex",
"serde_json",
"similar",
"smallvec",
"syntect",

View file

@ -27,6 +27,7 @@ image = { version = "0.25", default-features = false, features = [
napi = { version = "2", features = ["napi8"] }
napi-derive = "2"
regex = "1"
serde_json = "1"
similar = "2"
smallvec = "1"
syntect = { version = "5", default-features = false, features = ["default-syntaxes", "default-themes", "regex-fancy"] }

View file

@ -0,0 +1,410 @@
//! Streaming JSON parser via N-API.
//!
//! Exposes fast JSON parsing with partial/incomplete JSON recovery
//! for use during LLM streaming tool call argument parsing.
use napi::bindgen_prelude::*;
use napi_derive::napi;
/// Parse a complete JSON string. Returns the parsed value or an error.
#[napi(js_name = "parseJson")]
pub fn parse_json(env: Env, text: String) -> Result<napi::JsUnknown> {
let value: serde_json::Value =
serde_json::from_str(&text).map_err(|e| Error::from_reason(format!("{e}")))?;
serde_value_to_napi(&env, &value)
}
/// Parse potentially incomplete JSON by closing unclosed structures.
#[napi(js_name = "parsePartialJson")]
pub fn parse_partial_json(env: Env, text: String) -> Result<napi::JsUnknown> {
let fixed = fix_partial_json(&text);
let value: serde_json::Value =
serde_json::from_str(&fixed).map_err(|e| Error::from_reason(format!("{e}")))?;
serde_value_to_napi(&env, &value)
}
/// Try full JSON parse first; fall back to partial parse. Returns `{}` on total failure.
#[napi(js_name = "parseStreamingJson")]
pub fn parse_streaming_json(env: Env, text: String) -> Result<napi::JsUnknown> {
let trimmed = text.trim();
if trimmed.is_empty() {
// Return empty object
let obj = env.create_object()?;
return Ok(obj.into_unknown());
}
// Fast path: try complete parse
if let Ok(value) = serde_json::from_str::<serde_json::Value>(trimmed) {
return serde_value_to_napi(&env, &value);
}
// Slow path: fix partial JSON
let fixed = fix_partial_json(trimmed);
if let Ok(value) = serde_json::from_str::<serde_json::Value>(&fixed) {
return serde_value_to_napi(&env, &value);
}
// Total failure: return empty object
let obj = env.create_object()?;
Ok(obj.into_unknown())
}
/// Fix incomplete JSON by closing unclosed strings, objects, arrays,
/// removing trailing commas, and handling truncated values.
fn fix_partial_json(input: &str) -> String {
let mut result = String::with_capacity(input.len() + 16);
let mut stack: Vec<char> = Vec::new(); // tracks expected closing chars
let mut in_string = false;
let mut escape_next = false;
let chars: Vec<char> = input.chars().collect();
let len = chars.len();
let mut i = 0;
while i < len {
let ch = chars[i];
if escape_next {
result.push(ch);
escape_next = false;
i += 1;
continue;
}
if in_string {
if ch == '\\' {
result.push(ch);
escape_next = true;
} else if ch == '"' {
result.push(ch);
in_string = false;
} else {
result.push(ch);
}
i += 1;
continue;
}
// Not in a string
match ch {
'"' => {
result.push(ch);
in_string = true;
}
'{' => {
result.push(ch);
stack.push('}');
}
'[' => {
result.push(ch);
stack.push(']');
}
'}' | ']' => {
// Remove trailing comma before closing
remove_trailing_comma(&mut result);
result.push(ch);
if let Some(expected) = stack.last() {
if *expected == ch {
stack.pop();
}
}
}
_ => {
result.push(ch);
}
}
i += 1;
}
// If we ended inside an escape sequence within a string
if escape_next && in_string {
// Drop the trailing backslash (incomplete escape)
result.pop();
}
// Close unclosed string
if in_string {
result.push('"');
}
// Remove any trailing comma before we close structures
remove_trailing_comma(&mut result);
// Handle truncated values: if last meaningful token looks like a key with colon but no value
handle_truncated_value(&mut result);
// Close unclosed structures
while let Some(closer) = stack.pop() {
remove_trailing_comma(&mut result);
result.push(closer);
}
result
}
/// Remove trailing comma (and whitespace before it) from the result buffer.
fn remove_trailing_comma(result: &mut String) {
let trimmed_len = result.trim_end().len();
if trimmed_len > 0 {
let last_non_ws = result.as_bytes()[trimmed_len - 1];
if last_non_ws == b',' {
result.truncate(trimmed_len - 1);
}
}
}
/// Handle truncated values after a colon (e.g., `{"key":` or `{"key": tr`)
fn handle_truncated_value(result: &mut String) {
let trimmed = result.trim_end();
// If ends with colon, add null
if trimmed.ends_with(':') {
result.push_str("null");
return;
}
let bytes = trimmed.as_bytes();
let len = bytes.len();
// Check for truncated number: digits (possibly with leading minus, dot, or 'e')
// at the end after a value-position character
if len > 0 {
let last = bytes[len - 1];
if last.is_ascii_digit() || last == b'.' || last == b'-' || last == b'e' || last == b'E' || last == b'+' {
// Walk backwards to find the start of the number-like token
let mut start = len;
while start > 0 {
let b = bytes[start - 1];
if b.is_ascii_digit() || b == b'.' || b == b'-' || b == b'e' || b == b'E' || b == b'+' {
start -= 1;
} else {
break;
}
}
if start < len {
let before = trimmed[..start].trim_end();
if before.ends_with(':') || before.ends_with(',') || before.ends_with('[') {
let token = &trimmed[start..];
// If it doesn't parse as a valid number, truncate to the last valid portion
if token.parse::<f64>().is_err() {
// Strip trailing non-digit chars (e.g. "12." -> "12", "1e" -> "1")
let mut valid_end = token.len();
while valid_end > 0 && !token.as_bytes()[valid_end - 1].is_ascii_digit() {
valid_end -= 1;
}
if valid_end > 0 {
result.truncate(start + valid_end);
} else {
// Just a minus or dot with no digits — replace with 0
result.truncate(start);
result.push('0');
}
}
// If it parses fine, leave it as-is
return;
}
}
}
}
// Check for truncated boolean/null literals after a value-position character
for prefix in &["tru", "tr", "t", "fals", "fal", "fa", "f", "nul", "nu", "n"] {
if trimmed.ends_with(prefix) {
let before = trimmed[..len - prefix.len()].trim_end();
if before.ends_with(':') || before.ends_with(',') || before.ends_with('[') {
let full = match prefix.as_bytes()[0] {
b't' => "true",
b'f' => "false",
b'n' => "null",
_ => unreachable!(),
};
result.truncate(len - prefix.len());
result.push_str(full);
return;
}
}
}
}
/// Convert a serde_json::Value to a napi JsUnknown.
fn serde_value_to_napi(env: &Env, value: &serde_json::Value) -> Result<napi::JsUnknown> {
match value {
serde_json::Value::Null => {
env.get_null().map(|v| v.into_unknown())
}
serde_json::Value::Bool(b) => {
env.get_boolean(*b).map(|v| v.into_unknown())
}
serde_json::Value::Number(n) => {
if let Some(i) = n.as_i64() {
// Use i32 if it fits, otherwise f64
if i >= i64::from(i32::MIN) && i <= i64::from(i32::MAX) {
env.create_int32(i as i32).map(|v| v.into_unknown())
} else {
env.create_double(i as f64).map(|v| v.into_unknown())
}
} else if let Some(f) = n.as_f64() {
env.create_double(f).map(|v| v.into_unknown())
} else {
env.get_null().map(|v| v.into_unknown())
}
}
serde_json::Value::String(s) => {
env.create_string(s).map(|v| v.into_unknown())
}
serde_json::Value::Array(arr) => {
let mut js_arr = env.create_array_with_length(arr.len())?;
for (idx, item) in arr.iter().enumerate() {
let js_val = serde_value_to_napi(env, item)?;
js_arr.set_element(idx as u32, js_val)?;
}
Ok(js_arr.into_unknown())
}
serde_json::Value::Object(map) => {
let mut obj = env.create_object()?;
for (key, val) in map {
let js_val = serde_value_to_napi(env, val)?;
obj.set_named_property(key, js_val)?;
}
Ok(obj.into_unknown())
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fix_complete_json() {
let input = r#"{"key": "value", "num": 42}"#;
let fixed = fix_partial_json(input);
let _: serde_json::Value = serde_json::from_str(&fixed).unwrap();
}
#[test]
fn test_fix_unclosed_string() {
let input = r#"{"key": "val"#;
let fixed = fix_partial_json(input);
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
assert_eq!(v["key"], "val");
}
#[test]
fn test_fix_unclosed_object() {
let input = r#"{"key": "value""#;
let fixed = fix_partial_json(input);
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
assert_eq!(v["key"], "value");
}
#[test]
fn test_fix_unclosed_array() {
let input = r#"{"arr": [1, 2, 3"#;
let fixed = fix_partial_json(input);
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
assert_eq!(v["arr"].as_array().unwrap().len(), 3);
}
#[test]
fn test_fix_trailing_comma() {
let input = r#"{"a": 1, "b": 2,}"#;
let fixed = fix_partial_json(input);
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
assert_eq!(v["a"], 1);
assert_eq!(v["b"], 2);
}
#[test]
fn test_fix_truncated_after_colon() {
let input = r#"{"key":"#;
let fixed = fix_partial_json(input);
let _: serde_json::Value = serde_json::from_str(&fixed).unwrap();
}
#[test]
fn test_fix_truncated_true() {
let input = r#"{"key": tr"#;
let fixed = fix_partial_json(input);
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
assert_eq!(v["key"], true);
}
#[test]
fn test_fix_truncated_false() {
let input = r#"{"key": fal"#;
let fixed = fix_partial_json(input);
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
assert_eq!(v["key"], false);
}
#[test]
fn test_fix_truncated_null() {
let input = r#"{"key": nu"#;
let fixed = fix_partial_json(input);
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
assert!(v["key"].is_null());
}
#[test]
fn test_fix_nested_partial() {
let input = r#"{"a": {"b": [1, 2"#;
let fixed = fix_partial_json(input);
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
assert_eq!(v["a"]["b"].as_array().unwrap().len(), 2);
}
#[test]
fn test_empty_input() {
let fixed = fix_partial_json("");
assert_eq!(fixed, "");
}
#[test]
fn test_fix_trailing_comma_in_array() {
let input = r#"[1, 2, 3,]"#;
let fixed = fix_partial_json(input);
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
assert_eq!(v.as_array().unwrap().len(), 3);
}
#[test]
fn test_fix_truncated_number() {
let input = r#"{"key": 12"#;
let fixed = fix_partial_json(input);
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
assert_eq!(v["key"], 12);
}
#[test]
fn test_fix_truncated_decimal() {
let input = r#"{"key": 3."#;
let fixed = fix_partial_json(input);
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
assert_eq!(v["key"], 3);
}
#[test]
fn test_fix_truncated_negative_number() {
let input = r#"{"key": -"#;
let fixed = fix_partial_json(input);
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
assert_eq!(v["key"], 0);
}
#[test]
fn test_fix_truncated_exponent() {
let input = r#"{"key": 1e"#;
let fixed = fix_partial_json(input);
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
assert_eq!(v["key"], 1);
}
#[test]
fn test_fix_truncated_number_in_array() {
let input = r#"[1, 42"#;
let fixed = fix_partial_json(input);
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
assert_eq!(v[0], 1);
assert_eq!(v[1], 42);
}
}

View file

@ -23,3 +23,4 @@ mod text;
mod ttsr;
mod gsd_parser;
mod image;
mod json_parse;

View file

@ -0,0 +1,158 @@
import { test, describe } from "node:test";
import assert from "node:assert/strict";
import { createRequire } from "node:module";
import * as path from "node:path";
import { fileURLToPath } from "node:url";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const require = createRequire(import.meta.url);
const addonDir = path.resolve(__dirname, "..", "..", "..", "..", "native", "addon");
const platformTag = `${process.platform}-${process.arch}`;
const candidates = [
path.join(addonDir, `gsd_engine.${platformTag}.node`),
path.join(addonDir, "gsd_engine.dev.node"),
];
let native;
for (const candidate of candidates) {
try {
native = require(candidate);
break;
} catch {
// try next
}
}
if (!native) {
console.error("Native addon not found. Run `npm run build:native -w @gsd/native` first.");
process.exit(1);
}
describe("native json: parseJson()", () => {
test("parses complete JSON object", () => {
const result = native.parseJson('{"key": "value", "num": 42}');
assert.equal(result.key, "value");
assert.equal(result.num, 42);
});
test("parses JSON array", () => {
const result = native.parseJson("[1, 2, 3]");
assert.deepEqual(result, [1, 2, 3]);
});
test("parses JSON string", () => {
const result = native.parseJson('"hello"');
assert.equal(result, "hello");
});
test("parses JSON number", () => {
const result = native.parseJson("42.5");
assert.equal(result, 42.5);
});
test("parses JSON boolean", () => {
assert.equal(native.parseJson("true"), true);
assert.equal(native.parseJson("false"), false);
});
test("parses JSON null", () => {
assert.equal(native.parseJson("null"), null);
});
test("throws on invalid JSON", () => {
assert.throws(() => native.parseJson("{invalid}"));
});
});
describe("native json: parsePartialJson()", () => {
test("parses complete JSON unchanged", () => {
const result = native.parsePartialJson('{"key": "value"}');
assert.equal(result.key, "value");
});
test("closes unclosed string", () => {
const result = native.parsePartialJson('{"key": "val');
assert.equal(result.key, "val");
});
test("closes unclosed object", () => {
const result = native.parsePartialJson('{"key": "value"');
assert.equal(result.key, "value");
});
test("closes unclosed array", () => {
const result = native.parsePartialJson('{"arr": [1, 2, 3');
assert.deepEqual(result.arr, [1, 2, 3]);
});
test("removes trailing comma in object", () => {
const result = native.parsePartialJson('{"a": 1, "b": 2,}');
assert.equal(result.a, 1);
assert.equal(result.b, 2);
});
test("removes trailing comma in array", () => {
const result = native.parsePartialJson("[1, 2, 3,]");
assert.deepEqual(result, [1, 2, 3]);
});
test("handles truncated value after colon", () => {
const result = native.parsePartialJson('{"key":');
assert.equal(result.key, null);
});
test("handles truncated true", () => {
const result = native.parsePartialJson('{"key": tr');
assert.equal(result.key, true);
});
test("handles truncated false", () => {
const result = native.parsePartialJson('{"key": fal');
assert.equal(result.key, false);
});
test("handles truncated null", () => {
const result = native.parsePartialJson('{"key": nu');
assert.equal(result.key, null);
});
test("handles nested partial structures", () => {
const result = native.parsePartialJson('{"a": {"b": [1, 2');
assert.deepEqual(result.a.b, [1, 2]);
});
});
describe("native json: parseStreamingJson()", () => {
test("returns empty object for empty string", () => {
const result = native.parseStreamingJson("");
assert.deepEqual(result, {});
});
test("returns empty object for whitespace", () => {
const result = native.parseStreamingJson(" ");
assert.deepEqual(result, {});
});
test("parses complete JSON", () => {
const result = native.parseStreamingJson('{"tool": "search", "args": {"query": "test"}}');
assert.equal(result.tool, "search");
assert.equal(result.args.query, "test");
});
test("parses partial JSON (streaming scenario)", () => {
const result = native.parseStreamingJson('{"tool": "search", "args": {"query": "te');
assert.equal(result.tool, "search");
assert.equal(result.args.query, "te");
});
test("handles deeply nested partial JSON", () => {
const result = native.parseStreamingJson('{"a": {"b": {"c": [1, 2, {"d": "val');
assert.equal(result.a.b.c[2].d, "val");
});
test("handles escaped characters in strings", () => {
const result = native.parseStreamingJson('{"path": "C:\\\\Users\\\\test');
assert.ok(result.path.includes("C:\\Users\\test"));
});
});

View file

@ -93,6 +93,12 @@ export type { NativeImageHandle } from "./image/index.js";
export { ttsrCompileRules, ttsrCheckBuffer, ttsrFreeRules } from "./ttsr/index.js";
export type { TtsrHandle, TtsrRuleInput } from "./ttsr/index.js";
export {
parseJson,
parsePartialJson,
parseStreamingJson,
} from "./json-parse/index.js";
export {
parseFrontmatter,
extractSection as nativeExtractSection,

View file

@ -0,0 +1,34 @@
/**
* Streaming JSON parser via native Rust bindings.
*
* Provides fast JSON parsing with recovery for incomplete/partial JSON,
* used during LLM streaming tool call argument parsing.
*/
import { native } from "../native.js";
/**
* Parse a complete JSON string. Throws on invalid JSON.
*/
export function parseJson<T = unknown>(text: string): T {
return native.parseJson(text) as T;
}
/**
* Parse potentially incomplete JSON by closing unclosed structures.
* Handles unclosed strings, objects, arrays, trailing commas, and truncated literals.
*/
export function parsePartialJson<T = unknown>(text: string): T {
return native.parsePartialJson(text) as T;
}
/**
* Try full JSON parse first; fall back to partial parse.
* Returns `{}` on total failure. Drop-in replacement for the JS streaming parser.
*/
export function parseStreamingJson<T = unknown>(text: string | undefined): T {
if (!text || text.trim() === "") {
return {} as T;
}
return native.parseStreamingJson(text) as T;
}

View file

@ -129,4 +129,7 @@ export const native = loadNative() as {
extractAllSections: (content: string, level?: number) => string;
batchParseGsdFiles: (directory: string) => unknown;
parseRoadmapFile: (content: string) => unknown;
parseJson: (text: string) => unknown;
parsePartialJson: (text: string) => unknown;
parseStreamingJson: (text: string) => unknown;
};

View file

@ -32,7 +32,6 @@
"ajv-formats": "^3.0.1",
"chalk": "^5.6.2",
"openai": "6.26.0",
"partial-json": "^0.1.7",
"proxy-agent": "^6.5.0",
"undici": "^7.19.1",
"zod-to-json-schema": "^3.24.6"

View file

@ -1,28 +1,14 @@
import { parse as partialParse } from "partial-json";
import { parseStreamingJson as nativeParseStreamingJson } from "@gsd/native";
/**
* Attempts to parse potentially incomplete JSON during streaming.
* Always returns a valid object, even if the JSON is incomplete.
*
* Uses the native Rust streaming JSON parser for performance.
*
* @param partialJson The partial JSON string from streaming
* @returns Parsed object or empty object if parsing fails
*/
export function parseStreamingJson<T = any>(partialJson: string | undefined): T {
if (!partialJson || partialJson.trim() === "") {
return {} as T;
}
// Try standard parsing first (fastest for complete JSON)
try {
return JSON.parse(partialJson) as T;
} catch {
// Try partial-json for incomplete JSON
try {
const result = partialParse(partialJson);
return (result ?? {}) as T;
} catch {
// If all parsing fails, return empty object
return {} as T;
}
}
return nativeParseStreamingJson<T>(partialJson);
}