feat: native Rust streaming JSON parser (#266)
* feat: add native Rust streaming JSON parser for LLM tool call argument parsing
Replaces the JS partial-json library with a Rust implementation exposed via napi-rs.
The parser handles incomplete JSON from streaming deltas by closing unclosed strings,
objects, arrays, removing trailing commas, and completing truncated literals.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* fix: handle truncated numbers and remove dead partial-json dependency
Adds truncated number recovery (e.g. `{"key": 12`, `{"key": 3.`, `{"key": 1e`)
to the Rust streaming JSON parser, and removes the now-unused `partial-json`
npm dependency from pi-ai.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---------
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
9b0f31e39a
commit
8b9cfae9e9
10 changed files with 618 additions and 19 deletions
1
native/Cargo.lock
generated
1
native/Cargo.lock
generated
|
|
@ -544,6 +544,7 @@ dependencies = [
|
|||
"napi-build",
|
||||
"napi-derive",
|
||||
"regex",
|
||||
"serde_json",
|
||||
"similar",
|
||||
"smallvec",
|
||||
"syntect",
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ image = { version = "0.25", default-features = false, features = [
|
|||
napi = { version = "2", features = ["napi8"] }
|
||||
napi-derive = "2"
|
||||
regex = "1"
|
||||
serde_json = "1"
|
||||
similar = "2"
|
||||
smallvec = "1"
|
||||
syntect = { version = "5", default-features = false, features = ["default-syntaxes", "default-themes", "regex-fancy"] }
|
||||
|
|
|
|||
410
native/crates/engine/src/json_parse.rs
Normal file
410
native/crates/engine/src/json_parse.rs
Normal file
|
|
@ -0,0 +1,410 @@
|
|||
//! Streaming JSON parser via N-API.
|
||||
//!
|
||||
//! Exposes fast JSON parsing with partial/incomplete JSON recovery
|
||||
//! for use during LLM streaming tool call argument parsing.
|
||||
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi_derive::napi;
|
||||
|
||||
/// Parse a complete JSON string. Returns the parsed value or an error.
|
||||
#[napi(js_name = "parseJson")]
|
||||
pub fn parse_json(env: Env, text: String) -> Result<napi::JsUnknown> {
|
||||
let value: serde_json::Value =
|
||||
serde_json::from_str(&text).map_err(|e| Error::from_reason(format!("{e}")))?;
|
||||
serde_value_to_napi(&env, &value)
|
||||
}
|
||||
|
||||
/// Parse potentially incomplete JSON by closing unclosed structures.
|
||||
#[napi(js_name = "parsePartialJson")]
|
||||
pub fn parse_partial_json(env: Env, text: String) -> Result<napi::JsUnknown> {
|
||||
let fixed = fix_partial_json(&text);
|
||||
let value: serde_json::Value =
|
||||
serde_json::from_str(&fixed).map_err(|e| Error::from_reason(format!("{e}")))?;
|
||||
serde_value_to_napi(&env, &value)
|
||||
}
|
||||
|
||||
/// Try full JSON parse first; fall back to partial parse. Returns `{}` on total failure.
|
||||
#[napi(js_name = "parseStreamingJson")]
|
||||
pub fn parse_streaming_json(env: Env, text: String) -> Result<napi::JsUnknown> {
|
||||
let trimmed = text.trim();
|
||||
if trimmed.is_empty() {
|
||||
// Return empty object
|
||||
let obj = env.create_object()?;
|
||||
return Ok(obj.into_unknown());
|
||||
}
|
||||
|
||||
// Fast path: try complete parse
|
||||
if let Ok(value) = serde_json::from_str::<serde_json::Value>(trimmed) {
|
||||
return serde_value_to_napi(&env, &value);
|
||||
}
|
||||
|
||||
// Slow path: fix partial JSON
|
||||
let fixed = fix_partial_json(trimmed);
|
||||
if let Ok(value) = serde_json::from_str::<serde_json::Value>(&fixed) {
|
||||
return serde_value_to_napi(&env, &value);
|
||||
}
|
||||
|
||||
// Total failure: return empty object
|
||||
let obj = env.create_object()?;
|
||||
Ok(obj.into_unknown())
|
||||
}
|
||||
|
||||
/// Fix incomplete JSON by closing unclosed strings, objects, arrays,
|
||||
/// removing trailing commas, and handling truncated values.
|
||||
fn fix_partial_json(input: &str) -> String {
|
||||
let mut result = String::with_capacity(input.len() + 16);
|
||||
let mut stack: Vec<char> = Vec::new(); // tracks expected closing chars
|
||||
let mut in_string = false;
|
||||
let mut escape_next = false;
|
||||
let chars: Vec<char> = input.chars().collect();
|
||||
let len = chars.len();
|
||||
let mut i = 0;
|
||||
|
||||
while i < len {
|
||||
let ch = chars[i];
|
||||
|
||||
if escape_next {
|
||||
result.push(ch);
|
||||
escape_next = false;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if in_string {
|
||||
if ch == '\\' {
|
||||
result.push(ch);
|
||||
escape_next = true;
|
||||
} else if ch == '"' {
|
||||
result.push(ch);
|
||||
in_string = false;
|
||||
} else {
|
||||
result.push(ch);
|
||||
}
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Not in a string
|
||||
match ch {
|
||||
'"' => {
|
||||
result.push(ch);
|
||||
in_string = true;
|
||||
}
|
||||
'{' => {
|
||||
result.push(ch);
|
||||
stack.push('}');
|
||||
}
|
||||
'[' => {
|
||||
result.push(ch);
|
||||
stack.push(']');
|
||||
}
|
||||
'}' | ']' => {
|
||||
// Remove trailing comma before closing
|
||||
remove_trailing_comma(&mut result);
|
||||
result.push(ch);
|
||||
if let Some(expected) = stack.last() {
|
||||
if *expected == ch {
|
||||
stack.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
result.push(ch);
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// If we ended inside an escape sequence within a string
|
||||
if escape_next && in_string {
|
||||
// Drop the trailing backslash (incomplete escape)
|
||||
result.pop();
|
||||
}
|
||||
|
||||
// Close unclosed string
|
||||
if in_string {
|
||||
result.push('"');
|
||||
}
|
||||
|
||||
// Remove any trailing comma before we close structures
|
||||
remove_trailing_comma(&mut result);
|
||||
|
||||
// Handle truncated values: if last meaningful token looks like a key with colon but no value
|
||||
handle_truncated_value(&mut result);
|
||||
|
||||
// Close unclosed structures
|
||||
while let Some(closer) = stack.pop() {
|
||||
remove_trailing_comma(&mut result);
|
||||
result.push(closer);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Remove trailing comma (and whitespace before it) from the result buffer.
|
||||
fn remove_trailing_comma(result: &mut String) {
|
||||
let trimmed_len = result.trim_end().len();
|
||||
if trimmed_len > 0 {
|
||||
let last_non_ws = result.as_bytes()[trimmed_len - 1];
|
||||
if last_non_ws == b',' {
|
||||
result.truncate(trimmed_len - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle truncated values after a colon (e.g., `{"key":` or `{"key": tr`)
|
||||
fn handle_truncated_value(result: &mut String) {
|
||||
let trimmed = result.trim_end();
|
||||
|
||||
// If ends with colon, add null
|
||||
if trimmed.ends_with(':') {
|
||||
result.push_str("null");
|
||||
return;
|
||||
}
|
||||
|
||||
let bytes = trimmed.as_bytes();
|
||||
let len = bytes.len();
|
||||
|
||||
// Check for truncated number: digits (possibly with leading minus, dot, or 'e')
|
||||
// at the end after a value-position character
|
||||
if len > 0 {
|
||||
let last = bytes[len - 1];
|
||||
if last.is_ascii_digit() || last == b'.' || last == b'-' || last == b'e' || last == b'E' || last == b'+' {
|
||||
// Walk backwards to find the start of the number-like token
|
||||
let mut start = len;
|
||||
while start > 0 {
|
||||
let b = bytes[start - 1];
|
||||
if b.is_ascii_digit() || b == b'.' || b == b'-' || b == b'e' || b == b'E' || b == b'+' {
|
||||
start -= 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if start < len {
|
||||
let before = trimmed[..start].trim_end();
|
||||
if before.ends_with(':') || before.ends_with(',') || before.ends_with('[') {
|
||||
let token = &trimmed[start..];
|
||||
// If it doesn't parse as a valid number, truncate to the last valid portion
|
||||
if token.parse::<f64>().is_err() {
|
||||
// Strip trailing non-digit chars (e.g. "12." -> "12", "1e" -> "1")
|
||||
let mut valid_end = token.len();
|
||||
while valid_end > 0 && !token.as_bytes()[valid_end - 1].is_ascii_digit() {
|
||||
valid_end -= 1;
|
||||
}
|
||||
if valid_end > 0 {
|
||||
result.truncate(start + valid_end);
|
||||
} else {
|
||||
// Just a minus or dot with no digits — replace with 0
|
||||
result.truncate(start);
|
||||
result.push('0');
|
||||
}
|
||||
}
|
||||
// If it parses fine, leave it as-is
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for truncated boolean/null literals after a value-position character
|
||||
for prefix in &["tru", "tr", "t", "fals", "fal", "fa", "f", "nul", "nu", "n"] {
|
||||
if trimmed.ends_with(prefix) {
|
||||
let before = trimmed[..len - prefix.len()].trim_end();
|
||||
if before.ends_with(':') || before.ends_with(',') || before.ends_with('[') {
|
||||
let full = match prefix.as_bytes()[0] {
|
||||
b't' => "true",
|
||||
b'f' => "false",
|
||||
b'n' => "null",
|
||||
_ => unreachable!(),
|
||||
};
|
||||
result.truncate(len - prefix.len());
|
||||
result.push_str(full);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a serde_json::Value to a napi JsUnknown.
|
||||
fn serde_value_to_napi(env: &Env, value: &serde_json::Value) -> Result<napi::JsUnknown> {
|
||||
match value {
|
||||
serde_json::Value::Null => {
|
||||
env.get_null().map(|v| v.into_unknown())
|
||||
}
|
||||
serde_json::Value::Bool(b) => {
|
||||
env.get_boolean(*b).map(|v| v.into_unknown())
|
||||
}
|
||||
serde_json::Value::Number(n) => {
|
||||
if let Some(i) = n.as_i64() {
|
||||
// Use i32 if it fits, otherwise f64
|
||||
if i >= i64::from(i32::MIN) && i <= i64::from(i32::MAX) {
|
||||
env.create_int32(i as i32).map(|v| v.into_unknown())
|
||||
} else {
|
||||
env.create_double(i as f64).map(|v| v.into_unknown())
|
||||
}
|
||||
} else if let Some(f) = n.as_f64() {
|
||||
env.create_double(f).map(|v| v.into_unknown())
|
||||
} else {
|
||||
env.get_null().map(|v| v.into_unknown())
|
||||
}
|
||||
}
|
||||
serde_json::Value::String(s) => {
|
||||
env.create_string(s).map(|v| v.into_unknown())
|
||||
}
|
||||
serde_json::Value::Array(arr) => {
|
||||
let mut js_arr = env.create_array_with_length(arr.len())?;
|
||||
for (idx, item) in arr.iter().enumerate() {
|
||||
let js_val = serde_value_to_napi(env, item)?;
|
||||
js_arr.set_element(idx as u32, js_val)?;
|
||||
}
|
||||
Ok(js_arr.into_unknown())
|
||||
}
|
||||
serde_json::Value::Object(map) => {
|
||||
let mut obj = env.create_object()?;
|
||||
for (key, val) in map {
|
||||
let js_val = serde_value_to_napi(env, val)?;
|
||||
obj.set_named_property(key, js_val)?;
|
||||
}
|
||||
Ok(obj.into_unknown())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_fix_complete_json() {
|
||||
let input = r#"{"key": "value", "num": 42}"#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let _: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_unclosed_string() {
|
||||
let input = r#"{"key": "val"#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
assert_eq!(v["key"], "val");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_unclosed_object() {
|
||||
let input = r#"{"key": "value""#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
assert_eq!(v["key"], "value");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_unclosed_array() {
|
||||
let input = r#"{"arr": [1, 2, 3"#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
assert_eq!(v["arr"].as_array().unwrap().len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_trailing_comma() {
|
||||
let input = r#"{"a": 1, "b": 2,}"#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
assert_eq!(v["a"], 1);
|
||||
assert_eq!(v["b"], 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_truncated_after_colon() {
|
||||
let input = r#"{"key":"#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let _: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_truncated_true() {
|
||||
let input = r#"{"key": tr"#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
assert_eq!(v["key"], true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_truncated_false() {
|
||||
let input = r#"{"key": fal"#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
assert_eq!(v["key"], false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_truncated_null() {
|
||||
let input = r#"{"key": nu"#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
assert!(v["key"].is_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_nested_partial() {
|
||||
let input = r#"{"a": {"b": [1, 2"#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
assert_eq!(v["a"]["b"].as_array().unwrap().len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_input() {
|
||||
let fixed = fix_partial_json("");
|
||||
assert_eq!(fixed, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_trailing_comma_in_array() {
|
||||
let input = r#"[1, 2, 3,]"#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
assert_eq!(v.as_array().unwrap().len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_truncated_number() {
|
||||
let input = r#"{"key": 12"#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
assert_eq!(v["key"], 12);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_truncated_decimal() {
|
||||
let input = r#"{"key": 3."#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
assert_eq!(v["key"], 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_truncated_negative_number() {
|
||||
let input = r#"{"key": -"#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
assert_eq!(v["key"], 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_truncated_exponent() {
|
||||
let input = r#"{"key": 1e"#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
assert_eq!(v["key"], 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_truncated_number_in_array() {
|
||||
let input = r#"[1, 42"#;
|
||||
let fixed = fix_partial_json(input);
|
||||
let v: serde_json::Value = serde_json::from_str(&fixed).unwrap();
|
||||
assert_eq!(v[0], 1);
|
||||
assert_eq!(v[1], 42);
|
||||
}
|
||||
}
|
||||
|
|
@ -23,3 +23,4 @@ mod text;
|
|||
mod ttsr;
|
||||
mod gsd_parser;
|
||||
mod image;
|
||||
mod json_parse;
|
||||
|
|
|
|||
158
packages/native/src/__tests__/json-parse.test.mjs
Normal file
158
packages/native/src/__tests__/json-parse.test.mjs
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
import { test, describe } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { createRequire } from "node:module";
|
||||
import * as path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const require = createRequire(import.meta.url);
|
||||
|
||||
const addonDir = path.resolve(__dirname, "..", "..", "..", "..", "native", "addon");
|
||||
const platformTag = `${process.platform}-${process.arch}`;
|
||||
const candidates = [
|
||||
path.join(addonDir, `gsd_engine.${platformTag}.node`),
|
||||
path.join(addonDir, "gsd_engine.dev.node"),
|
||||
];
|
||||
|
||||
let native;
|
||||
for (const candidate of candidates) {
|
||||
try {
|
||||
native = require(candidate);
|
||||
break;
|
||||
} catch {
|
||||
// try next
|
||||
}
|
||||
}
|
||||
|
||||
if (!native) {
|
||||
console.error("Native addon not found. Run `npm run build:native -w @gsd/native` first.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
describe("native json: parseJson()", () => {
|
||||
test("parses complete JSON object", () => {
|
||||
const result = native.parseJson('{"key": "value", "num": 42}');
|
||||
assert.equal(result.key, "value");
|
||||
assert.equal(result.num, 42);
|
||||
});
|
||||
|
||||
test("parses JSON array", () => {
|
||||
const result = native.parseJson("[1, 2, 3]");
|
||||
assert.deepEqual(result, [1, 2, 3]);
|
||||
});
|
||||
|
||||
test("parses JSON string", () => {
|
||||
const result = native.parseJson('"hello"');
|
||||
assert.equal(result, "hello");
|
||||
});
|
||||
|
||||
test("parses JSON number", () => {
|
||||
const result = native.parseJson("42.5");
|
||||
assert.equal(result, 42.5);
|
||||
});
|
||||
|
||||
test("parses JSON boolean", () => {
|
||||
assert.equal(native.parseJson("true"), true);
|
||||
assert.equal(native.parseJson("false"), false);
|
||||
});
|
||||
|
||||
test("parses JSON null", () => {
|
||||
assert.equal(native.parseJson("null"), null);
|
||||
});
|
||||
|
||||
test("throws on invalid JSON", () => {
|
||||
assert.throws(() => native.parseJson("{invalid}"));
|
||||
});
|
||||
});
|
||||
|
||||
describe("native json: parsePartialJson()", () => {
|
||||
test("parses complete JSON unchanged", () => {
|
||||
const result = native.parsePartialJson('{"key": "value"}');
|
||||
assert.equal(result.key, "value");
|
||||
});
|
||||
|
||||
test("closes unclosed string", () => {
|
||||
const result = native.parsePartialJson('{"key": "val');
|
||||
assert.equal(result.key, "val");
|
||||
});
|
||||
|
||||
test("closes unclosed object", () => {
|
||||
const result = native.parsePartialJson('{"key": "value"');
|
||||
assert.equal(result.key, "value");
|
||||
});
|
||||
|
||||
test("closes unclosed array", () => {
|
||||
const result = native.parsePartialJson('{"arr": [1, 2, 3');
|
||||
assert.deepEqual(result.arr, [1, 2, 3]);
|
||||
});
|
||||
|
||||
test("removes trailing comma in object", () => {
|
||||
const result = native.parsePartialJson('{"a": 1, "b": 2,}');
|
||||
assert.equal(result.a, 1);
|
||||
assert.equal(result.b, 2);
|
||||
});
|
||||
|
||||
test("removes trailing comma in array", () => {
|
||||
const result = native.parsePartialJson("[1, 2, 3,]");
|
||||
assert.deepEqual(result, [1, 2, 3]);
|
||||
});
|
||||
|
||||
test("handles truncated value after colon", () => {
|
||||
const result = native.parsePartialJson('{"key":');
|
||||
assert.equal(result.key, null);
|
||||
});
|
||||
|
||||
test("handles truncated true", () => {
|
||||
const result = native.parsePartialJson('{"key": tr');
|
||||
assert.equal(result.key, true);
|
||||
});
|
||||
|
||||
test("handles truncated false", () => {
|
||||
const result = native.parsePartialJson('{"key": fal');
|
||||
assert.equal(result.key, false);
|
||||
});
|
||||
|
||||
test("handles truncated null", () => {
|
||||
const result = native.parsePartialJson('{"key": nu');
|
||||
assert.equal(result.key, null);
|
||||
});
|
||||
|
||||
test("handles nested partial structures", () => {
|
||||
const result = native.parsePartialJson('{"a": {"b": [1, 2');
|
||||
assert.deepEqual(result.a.b, [1, 2]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("native json: parseStreamingJson()", () => {
|
||||
test("returns empty object for empty string", () => {
|
||||
const result = native.parseStreamingJson("");
|
||||
assert.deepEqual(result, {});
|
||||
});
|
||||
|
||||
test("returns empty object for whitespace", () => {
|
||||
const result = native.parseStreamingJson(" ");
|
||||
assert.deepEqual(result, {});
|
||||
});
|
||||
|
||||
test("parses complete JSON", () => {
|
||||
const result = native.parseStreamingJson('{"tool": "search", "args": {"query": "test"}}');
|
||||
assert.equal(result.tool, "search");
|
||||
assert.equal(result.args.query, "test");
|
||||
});
|
||||
|
||||
test("parses partial JSON (streaming scenario)", () => {
|
||||
const result = native.parseStreamingJson('{"tool": "search", "args": {"query": "te');
|
||||
assert.equal(result.tool, "search");
|
||||
assert.equal(result.args.query, "te");
|
||||
});
|
||||
|
||||
test("handles deeply nested partial JSON", () => {
|
||||
const result = native.parseStreamingJson('{"a": {"b": {"c": [1, 2, {"d": "val');
|
||||
assert.equal(result.a.b.c[2].d, "val");
|
||||
});
|
||||
|
||||
test("handles escaped characters in strings", () => {
|
||||
const result = native.parseStreamingJson('{"path": "C:\\\\Users\\\\test');
|
||||
assert.ok(result.path.includes("C:\\Users\\test"));
|
||||
});
|
||||
});
|
||||
|
|
@ -93,6 +93,12 @@ export type { NativeImageHandle } from "./image/index.js";
|
|||
|
||||
export { ttsrCompileRules, ttsrCheckBuffer, ttsrFreeRules } from "./ttsr/index.js";
|
||||
export type { TtsrHandle, TtsrRuleInput } from "./ttsr/index.js";
|
||||
export {
|
||||
parseJson,
|
||||
parsePartialJson,
|
||||
parseStreamingJson,
|
||||
} from "./json-parse/index.js";
|
||||
|
||||
export {
|
||||
parseFrontmatter,
|
||||
extractSection as nativeExtractSection,
|
||||
|
|
|
|||
34
packages/native/src/json-parse/index.ts
Normal file
34
packages/native/src/json-parse/index.ts
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
/**
|
||||
* Streaming JSON parser via native Rust bindings.
|
||||
*
|
||||
* Provides fast JSON parsing with recovery for incomplete/partial JSON,
|
||||
* used during LLM streaming tool call argument parsing.
|
||||
*/
|
||||
|
||||
import { native } from "../native.js";
|
||||
|
||||
/**
|
||||
* Parse a complete JSON string. Throws on invalid JSON.
|
||||
*/
|
||||
export function parseJson<T = unknown>(text: string): T {
|
||||
return native.parseJson(text) as T;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse potentially incomplete JSON by closing unclosed structures.
|
||||
* Handles unclosed strings, objects, arrays, trailing commas, and truncated literals.
|
||||
*/
|
||||
export function parsePartialJson<T = unknown>(text: string): T {
|
||||
return native.parsePartialJson(text) as T;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try full JSON parse first; fall back to partial parse.
|
||||
* Returns `{}` on total failure. Drop-in replacement for the JS streaming parser.
|
||||
*/
|
||||
export function parseStreamingJson<T = unknown>(text: string | undefined): T {
|
||||
if (!text || text.trim() === "") {
|
||||
return {} as T;
|
||||
}
|
||||
return native.parseStreamingJson(text) as T;
|
||||
}
|
||||
|
|
@ -129,4 +129,7 @@ export const native = loadNative() as {
|
|||
extractAllSections: (content: string, level?: number) => string;
|
||||
batchParseGsdFiles: (directory: string) => unknown;
|
||||
parseRoadmapFile: (content: string) => unknown;
|
||||
parseJson: (text: string) => unknown;
|
||||
parsePartialJson: (text: string) => unknown;
|
||||
parseStreamingJson: (text: string) => unknown;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@
|
|||
"ajv-formats": "^3.0.1",
|
||||
"chalk": "^5.6.2",
|
||||
"openai": "6.26.0",
|
||||
"partial-json": "^0.1.7",
|
||||
"proxy-agent": "^6.5.0",
|
||||
"undici": "^7.19.1",
|
||||
"zod-to-json-schema": "^3.24.6"
|
||||
|
|
|
|||
|
|
@ -1,28 +1,14 @@
|
|||
import { parse as partialParse } from "partial-json";
|
||||
import { parseStreamingJson as nativeParseStreamingJson } from "@gsd/native";
|
||||
|
||||
/**
|
||||
* Attempts to parse potentially incomplete JSON during streaming.
|
||||
* Always returns a valid object, even if the JSON is incomplete.
|
||||
*
|
||||
* Uses the native Rust streaming JSON parser for performance.
|
||||
*
|
||||
* @param partialJson The partial JSON string from streaming
|
||||
* @returns Parsed object or empty object if parsing fails
|
||||
*/
|
||||
export function parseStreamingJson<T = any>(partialJson: string | undefined): T {
|
||||
if (!partialJson || partialJson.trim() === "") {
|
||||
return {} as T;
|
||||
}
|
||||
|
||||
// Try standard parsing first (fastest for complete JSON)
|
||||
try {
|
||||
return JSON.parse(partialJson) as T;
|
||||
} catch {
|
||||
// Try partial-json for incomplete JSON
|
||||
try {
|
||||
const result = partialParse(partialJson);
|
||||
return (result ?? {}) as T;
|
||||
} catch {
|
||||
// If all parsing fails, return empty object
|
||||
return {} as T;
|
||||
}
|
||||
}
|
||||
return nativeParseStreamingJson<T>(partialJson);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue