From 4c97d59536a74ac34da87e28e64a965c2f92bd8b Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Fri, 13 Mar 2026 14:00:39 -0600 Subject: [PATCH] feat: add native Rust diff engine for edit tool Move the edit tool's hot-path diffing operations from JS to native Rust: - `normalizeForFuzzyMatch`: single-pass Unicode normalization (smart quotes, dashes, special spaces, trailing whitespace) - `fuzzyFindText`: exact-then-fuzzy substring search with UTF-16 index conversion for JS compatibility - `generateDiff`: unified diff generation using the `similar` crate (Myers' algorithm with optimizations) The Rust module at `native/crates/engine/src/diff.rs` exposes three napi functions. The TypeScript wrapper at `packages/native/src/diff/` follows the existing module pattern. `edit-diff.ts` now delegates to native implementations while keeping line-ending handling and file I/O in JS. 18 tests covering normalization, fuzzy matching (including UTF-16 index correctness with emoji/surrogate pairs), and diff generation. Co-Authored-By: Claude Opus 4.6 (1M context) --- native/Cargo.lock | 8 +- native/crates/engine/Cargo.toml | 1 + native/crates/engine/src/diff.rs | 421 ++++++++++++++++++ native/crates/engine/src/lib.rs | 1 + packages/native/src/__tests__/diff.test.mjs | 189 ++++++++ packages/native/src/diff/index.ts | 61 +++ packages/native/src/diff/types.ts | 24 + packages/native/src/index.ts | 7 + packages/native/src/native.ts | 3 + .../src/core/tools/edit-diff.ts | 180 +------- 10 files changed, 737 insertions(+), 158 deletions(-) create mode 100644 native/crates/engine/src/diff.rs create mode 100644 packages/native/src/__tests__/diff.test.mjs create mode 100644 packages/native/src/diff/index.ts create mode 100644 packages/native/src/diff/types.ts diff --git a/native/Cargo.lock b/native/Cargo.lock index 453057f5b..8be67af29 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -543,7 +543,7 @@ dependencies = [ "napi", "napi-build", "napi-derive", - "regex", + "similar", "smallvec", "syntect", "unicode-segmentation", @@ -1207,6 +1207,12 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + [[package]] name = "siphasher" version = "1.0.2" diff --git a/native/crates/engine/Cargo.toml b/native/crates/engine/Cargo.toml index e1ed956c0..9cecd67a9 100644 --- a/native/crates/engine/Cargo.toml +++ b/native/crates/engine/Cargo.toml @@ -27,6 +27,7 @@ image = { version = "0.25", default-features = false, features = [ napi = { version = "2", features = ["napi8"] } napi-derive = "2" regex = "1" +similar = "2" smallvec = "1" syntect = { version = "5", default-features = false, features = ["default-syntaxes", "default-themes", "regex-fancy"] } unicode-segmentation = "1" diff --git a/native/crates/engine/src/diff.rs b/native/crates/engine/src/diff.rs new file mode 100644 index 000000000..4f83529e3 --- /dev/null +++ b/native/crates/engine/src/diff.rs @@ -0,0 +1,421 @@ +//! Fuzzy text matching and unified diff generation for the edit tool. +//! +//! Replaces the JS `edit-diff.ts` hot path with native Rust: +//! - `normalizeForFuzzyMatch`: Unicode normalization (smart quotes, dashes, special spaces, trailing whitespace) +//! - `fuzzyFindText`: exact-then-fuzzy substring search +//! - `generateDiff`: unified diff with line numbers and context, matching the JS output format + +use napi_derive::napi; + +// --------------------------------------------------------------------------- +// normalizeForFuzzyMatch +// --------------------------------------------------------------------------- + +/// Normalize text for fuzzy matching: +/// - Strip trailing whitespace from each line +/// - Smart single quotes → ' +/// - Smart double quotes → " +/// - Various dashes/hyphens → - +/// - Special Unicode spaces → regular space +#[napi(js_name = "normalizeForFuzzyMatch")] +pub fn normalize_for_fuzzy_match(text: String) -> String { + normalize_impl(&text) +} + +fn normalize_impl(text: &str) -> String { + let mut out = String::with_capacity(text.len()); + + for (i, line) in text.split('\n').enumerate() { + if i > 0 { + out.push('\n'); + } + let trimmed = line.trim_end(); + for ch in trimmed.chars() { + out.push(normalize_char(ch)); + } + } + + out +} + +#[inline] +fn normalize_char(ch: char) -> char { + match ch { + // Smart single quotes → ' + '\u{2018}' | '\u{2019}' | '\u{201A}' | '\u{201B}' => '\'', + // Smart double quotes → " + '\u{201C}' | '\u{201D}' | '\u{201E}' | '\u{201F}' => '"', + // Various dashes/hyphens → - + '\u{2010}' | '\u{2011}' | '\u{2012}' | '\u{2013}' | '\u{2014}' | '\u{2015}' + | '\u{2212}' => '-', + // Special spaces → regular space + '\u{00A0}' | '\u{2002}' | '\u{2003}' | '\u{2004}' | '\u{2005}' | '\u{2006}' + | '\u{2007}' | '\u{2008}' | '\u{2009}' | '\u{200A}' | '\u{202F}' | '\u{205F}' + | '\u{3000}' => ' ', + _ => ch, + } +} + +// --------------------------------------------------------------------------- +// fuzzyFindText +// --------------------------------------------------------------------------- + +#[napi(object)] +pub struct FuzzyMatchResult { + pub found: bool, + pub index: i32, + pub match_length: i32, + pub used_fuzzy_match: bool, + /// When exact match: original content. When fuzzy match: normalized content. + pub content_for_replacement: String, +} + +/// Convert a UTF-8 byte offset to a JS string index (UTF-16 code unit offset). +fn byte_offset_to_utf16(s: &str, byte_offset: usize) -> usize { + s[..byte_offset].chars().map(|c| c.len_utf16()).sum() +} + +/// Get the UTF-16 code unit length of a UTF-8 string. +fn utf16_len(s: &str) -> usize { + s.chars().map(|c| c.len_utf16()).sum() +} + +/// Find `old_text` in `content`, trying exact match first, then fuzzy match. +/// +/// Returns indices and lengths as UTF-16 code unit offsets (compatible with +/// JS `String.prototype.substring()`). +/// +/// When fuzzy matching is used, `content_for_replacement` is the normalized +/// version of `content` (trailing whitespace stripped, Unicode quotes/dashes +/// normalized to ASCII). +#[napi(js_name = "fuzzyFindText")] +pub fn fuzzy_find_text(content: String, old_text: String) -> FuzzyMatchResult { + // Try exact match first + if let Some(byte_idx) = content.find(&old_text) { + return FuzzyMatchResult { + found: true, + index: byte_offset_to_utf16(&content, byte_idx) as i32, + match_length: utf16_len(&old_text) as i32, + used_fuzzy_match: false, + content_for_replacement: content, + }; + } + + // Try fuzzy match + let fuzzy_content = normalize_impl(&content); + let fuzzy_old_text = normalize_impl(&old_text); + + if let Some(byte_idx) = fuzzy_content.find(&fuzzy_old_text) { + FuzzyMatchResult { + found: true, + index: byte_offset_to_utf16(&fuzzy_content, byte_idx) as i32, + match_length: utf16_len(&fuzzy_old_text) as i32, + used_fuzzy_match: true, + content_for_replacement: fuzzy_content, + } + } else { + FuzzyMatchResult { + found: false, + index: -1, + match_length: 0, + used_fuzzy_match: false, + content_for_replacement: content, + } + } +} + +// --------------------------------------------------------------------------- +// generateDiff +// --------------------------------------------------------------------------- + +#[napi(object)] +pub struct DiffResult { + pub diff: String, + pub first_changed_line: Option, +} + +/// Generate a unified diff string with line numbers and context. +/// +/// Uses the `similar` crate (Myers' diff algorithm with optimizations). +/// Output format matches the JS `generateDiffString`: +/// - `+N line` for additions +/// - `-N line` for removals +/// - ` N line` for context +/// - ` ... ` for skipped context +#[napi(js_name = "generateDiff")] +pub fn generate_diff(old_content: String, new_content: String, context_lines: Option) -> DiffResult { + let context = context_lines.unwrap_or(4) as usize; + generate_diff_impl(&old_content, &new_content, context) +} + +fn generate_diff_impl(old_content: &str, new_content: &str, context_lines: usize) -> DiffResult { + let old_lines: Vec<&str> = old_content.split('\n').collect(); + let new_lines: Vec<&str> = new_content.split('\n').collect(); + + let max_line_num = old_lines.len().max(new_lines.len()); + let line_num_width = if max_line_num == 0 { + 1 + } else { + max_line_num.to_string().len() + }; + + // Use similar crate for diffing + let diff = similar::TextDiff::configure() + .algorithm(similar::Algorithm::Myers) + .diff_lines(old_content, new_content); + + let mut output: Vec = Vec::new(); + let mut old_line_num: usize = 1; + let mut new_line_num: usize = 1; + let mut last_was_change = false; + let mut first_changed_line: Option = None; + + // Build parts from diff ops, matching the JS `diff` npm package structure + #[derive(Debug)] + enum PartTag { + Equal, + Added, + Removed, + } + + struct Part { + tag: PartTag, + lines: Vec, + } + + let mut parts: Vec = Vec::new(); + + for op in diff.ops() { + match op { + similar::DiffOp::Equal { old_index, len, .. } => { + let lines: Vec = old_lines[*old_index..*old_index + *len] + .iter() + .map(|s| s.to_string()) + .collect(); + parts.push(Part { tag: PartTag::Equal, lines }); + } + similar::DiffOp::Delete { old_index, old_len, .. } => { + let lines: Vec = old_lines[*old_index..*old_index + *old_len] + .iter() + .map(|s| s.to_string()) + .collect(); + parts.push(Part { tag: PartTag::Removed, lines }); + } + similar::DiffOp::Insert { new_index, new_len, .. } => { + let lines: Vec = new_lines[*new_index..*new_index + *new_len] + .iter() + .map(|s| s.to_string()) + .collect(); + parts.push(Part { tag: PartTag::Added, lines }); + } + similar::DiffOp::Replace { + old_index, old_len, new_index, new_len, .. + } => { + let del_lines: Vec = old_lines[*old_index..*old_index + *old_len] + .iter() + .map(|s| s.to_string()) + .collect(); + parts.push(Part { tag: PartTag::Removed, lines: del_lines }); + + let ins_lines: Vec = new_lines[*new_index..*new_index + *new_len] + .iter() + .map(|s| s.to_string()) + .collect(); + parts.push(Part { tag: PartTag::Added, lines: ins_lines }); + } + } + } + + for (i, part) in parts.iter().enumerate() { + let raw = &part.lines; + + match part.tag { + PartTag::Added | PartTag::Removed => { + if first_changed_line.is_none() { + first_changed_line = Some(new_line_num as i32); + } + + for line in raw { + match part.tag { + PartTag::Added => { + let num = format!("{:>width$}", new_line_num, width = line_num_width); + output.push(format!("+{} {}", num, line)); + new_line_num += 1; + } + PartTag::Removed => { + let num = format!("{:>width$}", old_line_num, width = line_num_width); + output.push(format!("-{} {}", num, line)); + old_line_num += 1; + } + _ => unreachable!(), + } + } + last_was_change = true; + } + PartTag::Equal => { + let next_part_is_change = i < parts.len() - 1 + && matches!(parts[i + 1].tag, PartTag::Added | PartTag::Removed); + + if last_was_change || next_part_is_change { + let mut lines_to_show = raw.as_slice(); + let mut skip_start = 0usize; + let mut skip_end = 0usize; + + if !last_was_change { + // Show only last N lines as leading context + skip_start = raw.len().saturating_sub(context_lines); + lines_to_show = &raw[skip_start..]; + } + + if !next_part_is_change && lines_to_show.len() > context_lines { + // Show only first N lines as trailing context + skip_end = lines_to_show.len() - context_lines; + lines_to_show = &lines_to_show[..context_lines]; + } + + if skip_start > 0 { + output.push(format!( + " {:>width$} ...", + "", + width = line_num_width + )); + old_line_num += skip_start; + new_line_num += skip_start; + } + + for line in lines_to_show { + let num = format!("{:>width$}", old_line_num, width = line_num_width); + output.push(format!(" {} {}", num, line)); + old_line_num += 1; + new_line_num += 1; + } + + if skip_end > 0 { + output.push(format!( + " {:>width$} ...", + "", + width = line_num_width + )); + old_line_num += skip_end; + new_line_num += skip_end; + } + } else { + old_line_num += raw.len(); + new_line_num += raw.len(); + } + + last_was_change = false; + } + } + } + + DiffResult { + diff: output.join("\n"), + first_changed_line, + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_normalize_smart_quotes() { + let input = "\u{201C}hello\u{201D} \u{2018}world\u{2019}"; + assert_eq!(normalize_impl(input), "\"hello\" 'world'"); + } + + #[test] + fn test_normalize_dashes() { + let input = "a\u{2013}b\u{2014}c\u{2212}d"; + assert_eq!(normalize_impl(input), "a-b-c-d"); + } + + #[test] + fn test_normalize_special_spaces() { + let input = "a\u{00A0}b\u{2003}c\u{3000}d"; + assert_eq!(normalize_impl(input), "a b c d"); + } + + #[test] + fn test_normalize_trailing_whitespace() { + let input = "hello \nworld "; + assert_eq!(normalize_impl(input), "hello\nworld"); + } + + #[test] + fn test_fuzzy_find_exact() { + let result = fuzzy_find_text("hello world".to_string(), "world".to_string()); + assert!(result.found); + assert_eq!(result.index, 6); + assert_eq!(result.match_length, 5); + assert!(!result.used_fuzzy_match); + } + + #[test] + fn test_fuzzy_find_with_smart_quotes() { + let content = "let x = \u{201C}hello\u{201D};".to_string(); + let old_text = "let x = \"hello\";".to_string(); + let result = fuzzy_find_text(content, old_text); + assert!(result.found); + assert!(result.used_fuzzy_match); + } + + #[test] + fn test_fuzzy_find_not_found() { + let result = fuzzy_find_text("hello world".to_string(), "xyz".to_string()); + assert!(!result.found); + assert_eq!(result.index, -1); + } + + #[test] + fn test_generate_diff_basic() { + let old = "line1\nline2\nline3"; + let new_text = "line1\nmodified\nline3"; + let result = generate_diff_impl(old, new_text, 4); + assert!(result.diff.contains("-")); + assert!(result.diff.contains("+")); + assert!(result.diff.contains("line2")); + assert!(result.diff.contains("modified")); + assert!(result.first_changed_line.is_some()); + } + + #[test] + fn test_generate_diff_addition() { + let old = "line1\nline3"; + let new_text = "line1\nline2\nline3"; + let result = generate_diff_impl(old, new_text, 4); + assert!(result.diff.contains("+")); + assert!(result.diff.contains("line2")); + } + + #[test] + fn test_generate_diff_deletion() { + let old = "line1\nline2\nline3"; + let new_text = "line1\nline3"; + let result = generate_diff_impl(old, new_text, 4); + assert!(result.diff.contains("-")); + assert!(result.diff.contains("line2")); + } + + #[test] + fn test_generate_diff_context_ellipsis() { + let mut old_lines: Vec = (1..=20).map(|i| format!("line{}", i)).collect(); + let old = old_lines.join("\n"); + old_lines[10] = "modified".to_string(); + let new_text = old_lines.join("\n"); + let result = generate_diff_impl(&old, &new_text, 2); + assert!(result.diff.contains("...")); + } + + #[test] + fn test_generate_diff_empty() { + let result = generate_diff_impl("same", "same", 4); + assert!(result.diff.is_empty()); + assert!(result.first_changed_line.is_none()); + } +} diff --git a/native/crates/engine/src/lib.rs b/native/crates/engine/src/lib.rs index d85986280..6796f57a4 100644 --- a/native/crates/engine/src/lib.rs +++ b/native/crates/engine/src/lib.rs @@ -9,6 +9,7 @@ mod ast; mod clipboard; +mod diff; mod fd; mod fs_cache; mod glob; diff --git a/packages/native/src/__tests__/diff.test.mjs b/packages/native/src/__tests__/diff.test.mjs new file mode 100644 index 000000000..9429fd972 --- /dev/null +++ b/packages/native/src/__tests__/diff.test.mjs @@ -0,0 +1,189 @@ +import { test, describe } from "node:test"; +import assert from "node:assert/strict"; +import { createRequire } from "node:module"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const require = createRequire(import.meta.url); + +// Load the native addon directly +const addonDir = path.resolve( + __dirname, + "..", + "..", + "..", + "..", + "native", + "addon", +); +const platformTag = `${process.platform}-${process.arch}`; +const candidates = [ + path.join(addonDir, `gsd_engine.${platformTag}.node`), + path.join(addonDir, "gsd_engine.dev.node"), +]; + +let native; +for (const candidate of candidates) { + try { + native = require(candidate); + break; + } catch { + // try next + } +} + +if (!native) { + console.error( + "Native addon not found. Run `npm run build:native -w @gsd/native` first.", + ); + process.exit(1); +} + +// ── normalizeForFuzzyMatch ────────────────────────────────────────────── + +describe("normalizeForFuzzyMatch", () => { + test("strips trailing whitespace per line", () => { + assert.equal(native.normalizeForFuzzyMatch("hello \nworld "), "hello\nworld"); + }); + + test("normalizes smart quotes to ASCII", () => { + assert.equal( + native.normalizeForFuzzyMatch("\u201Chello\u201D \u2018world\u2019"), + '"hello" \'world\'', + ); + }); + + test("normalizes dashes to ASCII hyphen", () => { + assert.equal(native.normalizeForFuzzyMatch("a\u2013b\u2014c"), "a-b-c"); + }); + + test("normalizes special spaces to regular space", () => { + assert.equal(native.normalizeForFuzzyMatch("a\u00A0b\u3000c"), "a b c"); + }); + + test("handles empty string", () => { + assert.equal(native.normalizeForFuzzyMatch(""), ""); + }); + + test("preserves leading whitespace", () => { + assert.equal(native.normalizeForFuzzyMatch(" hello "), " hello"); + }); +}); + +// ── fuzzyFindText ─────────────────────────────────────────────────────── + +describe("fuzzyFindText", () => { + test("finds exact match", () => { + const result = native.fuzzyFindText("hello world", "world"); + assert.equal(result.found, true); + assert.equal(result.index, 6); + assert.equal(result.matchLength, 5); + assert.equal(result.usedFuzzyMatch, false); + assert.equal(result.contentForReplacement, "hello world"); + }); + + test("finds fuzzy match with smart quotes", () => { + const content = 'let x = \u201Chello\u201D;'; + const oldText = 'let x = "hello";'; + const result = native.fuzzyFindText(content, oldText); + assert.equal(result.found, true); + assert.equal(result.usedFuzzyMatch, true); + }); + + test("returns not found for missing text", () => { + const result = native.fuzzyFindText("hello world", "xyz"); + assert.equal(result.found, false); + assert.equal(result.index, -1); + assert.equal(result.matchLength, 0); + }); + + test("returns correct UTF-16 index for non-ASCII content", () => { + // Emoji U+1F600 is 2 UTF-16 code units (surrogate pair), 4 UTF-8 bytes + const content = "\u{1F600}hello"; + const result = native.fuzzyFindText(content, "hello"); + assert.equal(result.found, true); + // Emoji is 2 UTF-16 code units, so "hello" starts at index 2 + assert.equal(result.index, 2); + assert.equal(result.matchLength, 5); + }); + + test("index is compatible with JS substring()", () => { + const content = "abc\u{1F600}def"; + const result = native.fuzzyFindText(content, "def"); + assert.equal(result.found, true); + // "abc" = 3, emoji = 2 UTF-16 code units → index 5 + assert.equal(result.index, 5); + // Verify substring works correctly with the returned index + const extracted = result.contentForReplacement.substring( + result.index, + result.index + result.matchLength, + ); + assert.equal(extracted, "def"); + }); + + test("fuzzy match with trailing whitespace differences", () => { + const content = "hello \nworld "; + const oldText = "hello\nworld"; + const result = native.fuzzyFindText(content, oldText); + assert.equal(result.found, true); + assert.equal(result.usedFuzzyMatch, true); + }); +}); + +// ── generateDiff ──────────────────────────────────────────────────────── + +describe("generateDiff", () => { + test("generates diff for a line change", () => { + const old = "line1\nline2\nline3"; + const newText = "line1\nmodified\nline3"; + const result = native.generateDiff(old, newText); + assert.ok(result.diff.includes("line2")); + assert.ok(result.diff.includes("modified")); + assert.ok(result.diff.includes("-")); + assert.ok(result.diff.includes("+")); + assert.notEqual(result.firstChangedLine, null); + }); + + test("generates diff for an addition", () => { + const old = "line1\nline3"; + const newText = "line1\nline2\nline3"; + const result = native.generateDiff(old, newText); + assert.ok(result.diff.includes("+")); + assert.ok(result.diff.includes("line2")); + }); + + test("generates diff for a deletion", () => { + const old = "line1\nline2\nline3"; + const newText = "line1\nline3"; + const result = native.generateDiff(old, newText); + assert.ok(result.diff.includes("-")); + assert.ok(result.diff.includes("line2")); + }); + + test("returns empty diff for identical content", () => { + const result = native.generateDiff("same", "same"); + assert.equal(result.diff, ""); + // napi-rs maps Option::None to undefined (not null) + assert.equal(result.firstChangedLine, undefined); + }); + + test("respects context lines parameter", () => { + const lines = Array.from({ length: 20 }, (_, i) => `line${i + 1}`); + const old = lines.join("\n"); + lines[10] = "modified"; + const newText = lines.join("\n"); + const result = native.generateDiff(old, newText, 2); + assert.ok(result.diff.includes("...")); + }); + + test("default context is 4 lines", () => { + const lines = Array.from({ length: 20 }, (_, i) => `line${i + 1}`); + const old = lines.join("\n"); + lines[10] = "modified"; + const newText = lines.join("\n"); + const result = native.generateDiff(old, newText); + // Should show 4 context lines before and after + assert.ok(result.diff.length > 0); + }); +}); diff --git a/packages/native/src/diff/index.ts b/packages/native/src/diff/index.ts new file mode 100644 index 000000000..3966408bb --- /dev/null +++ b/packages/native/src/diff/index.ts @@ -0,0 +1,61 @@ +/** + * Native fuzzy text matching and diff generation for the edit tool. + * + * Uses the `similar` Rust crate (Myers' algorithm) for O(n+d) diffing, + * and single-pass Unicode normalization for fuzzy matching. + */ + +import { native } from "../native.js"; +import type { DiffResult, FuzzyMatchResult } from "./types.js"; + +export type { DiffResult, FuzzyMatchResult }; + +/** + * Normalize text for fuzzy matching: + * - Strip trailing whitespace from each line + * - Smart quotes to ASCII equivalents + * - Unicode dashes/hyphens to ASCII hyphen + * - Special Unicode spaces to regular space + */ +export function normalizeForFuzzyMatch(text: string): string { + return (native as Record).normalizeForFuzzyMatch( + text, + ) as string; +} + +/** + * Find `oldText` in `content`, trying exact match first, then fuzzy match. + * + * When fuzzy matching is used, `contentForReplacement` is the normalized + * version of `content`. + */ +export function fuzzyFindText( + content: string, + oldText: string, +): FuzzyMatchResult { + return (native as Record).fuzzyFindText( + content, + oldText, + ) as FuzzyMatchResult; +} + +/** + * Generate a unified diff string with line numbers and context. + * + * Uses Myers' diff algorithm via the `similar` Rust crate. + * + * @param oldContent Original text + * @param newContent Modified text + * @param contextLines Number of context lines around changes (default: 4) + */ +export function generateDiff( + oldContent: string, + newContent: string, + contextLines?: number, +): DiffResult { + return (native as Record).generateDiff( + oldContent, + newContent, + contextLines, + ) as DiffResult; +} diff --git a/packages/native/src/diff/types.ts b/packages/native/src/diff/types.ts new file mode 100644 index 000000000..e9c3f1427 --- /dev/null +++ b/packages/native/src/diff/types.ts @@ -0,0 +1,24 @@ +/** Result of fuzzy text matching (exact match tried first, then normalized). */ +export interface FuzzyMatchResult { + /** Whether a match was found. */ + found: boolean; + /** UTF-16 code unit index where the match starts (-1 if not found). */ + index: number; + /** Length of the matched text in UTF-16 code units (0 if not found). */ + matchLength: number; + /** Whether fuzzy (normalized) matching was used instead of exact. */ + usedFuzzyMatch: boolean; + /** + * Content to use for replacement operations. + * Original content when exact match; normalized content when fuzzy match. + */ + contentForReplacement: string; +} + +/** Result of unified diff generation. */ +export interface DiffResult { + /** The unified diff string with line numbers. */ + diff: string; + /** Line number of the first change in the new file (undefined if no changes). */ + firstChangedLine: number | undefined; +} diff --git a/packages/native/src/index.ts b/packages/native/src/index.ts index 9971a66a2..a9876cd56 100644 --- a/packages/native/src/index.ts +++ b/packages/native/src/index.ts @@ -74,6 +74,13 @@ export { } from "./text/index.js"; export type { SliceResult, ExtractSegmentsResult } from "./text/index.js"; +export { + normalizeForFuzzyMatch, + fuzzyFindText, + generateDiff, +} from "./diff/index.js"; +export type { FuzzyMatchResult, DiffResult } from "./diff/index.js"; + export { fuzzyFind } from "./fd/index.js"; export type { FuzzyFindMatch, diff --git a/packages/native/src/native.ts b/packages/native/src/native.ts index 505fa2a93..731f8f8bd 100644 --- a/packages/native/src/native.ts +++ b/packages/native/src/native.ts @@ -87,6 +87,9 @@ export const native = loadNative() as { sanitizeText: (text: string) => string; visibleWidth: (text: string, tabWidth?: number) => number; fuzzyFind: (options: unknown) => unknown; + normalizeForFuzzyMatch: (text: string) => string; + fuzzyFindText: (content: string, oldText: string) => unknown; + generateDiff: (oldContent: string, newContent: string, contextLines?: number) => unknown; NativeImage: unknown; ttsrCompileRules: (rules: unknown[]) => number; ttsrCheckBuffer: (handle: number, buffer: string) => string[]; diff --git a/packages/pi-coding-agent/src/core/tools/edit-diff.ts b/packages/pi-coding-agent/src/core/tools/edit-diff.ts index 17f017bf9..b973ca3d9 100644 --- a/packages/pi-coding-agent/src/core/tools/edit-diff.ts +++ b/packages/pi-coding-agent/src/core/tools/edit-diff.ts @@ -1,9 +1,16 @@ /** * Shared diff computation utilities for the edit tool. * Used by both edit.ts (for execution) and tool-execution.ts (for preview rendering). + * + * Hot-path functions (fuzzyFindText, normalizeForFuzzyMatch, generateDiffString) + * delegate to the native Rust engine for performance on large files. */ -import * as Diff from "diff"; +import { + fuzzyFindText as nativeFuzzyFindText, + generateDiff as nativeGenerateDiff, + normalizeForFuzzyMatch as nativeNormalizeForFuzzyMatch, +} from "@gsd/native"; import { constants } from "fs"; import { access, readFile } from "fs/promises"; import { resolveToCwd } from "./path-utils.js"; @@ -25,32 +32,14 @@ export function restoreLineEndings(text: string, ending: "\r\n" | "\n"): string } /** - * Normalize text for fuzzy matching. Applies progressive transformations: + * Normalize text for fuzzy matching (native Rust implementation). * - Strip trailing whitespace from each line * - Normalize smart quotes to ASCII equivalents * - Normalize Unicode dashes/hyphens to ASCII hyphen * - Normalize special Unicode spaces to regular space */ export function normalizeForFuzzyMatch(text: string): string { - return ( - text - // Strip trailing whitespace per line - .split("\n") - .map((line) => line.trimEnd()) - .join("\n") - // Smart single quotes → ' - .replace(/[\u2018\u2019\u201A\u201B]/g, "'") - // Smart double quotes → " - .replace(/[\u201C\u201D\u201E\u201F]/g, '"') - // Various dashes/hyphens → - - // U+2010 hyphen, U+2011 non-breaking hyphen, U+2012 figure dash, - // U+2013 en-dash, U+2014 em-dash, U+2015 horizontal bar, U+2212 minus - .replace(/[\u2010\u2011\u2012\u2013\u2014\u2015\u2212]/g, "-") - // Special spaces → regular space - // U+00A0 NBSP, U+2002-U+200A various spaces, U+202F narrow NBSP, - // U+205F medium math space, U+3000 ideographic space - .replace(/[\u00A0\u2002-\u200A\u202F\u205F\u3000]/g, " ") - ); + return nativeNormalizeForFuzzyMatch(text); } export interface FuzzyMatchResult { @@ -70,49 +59,14 @@ export interface FuzzyMatchResult { } /** - * Find oldText in content, trying exact match first, then fuzzy match. + * Find oldText in content, trying exact match first, then fuzzy match + * (native Rust implementation). + * * When fuzzy matching is used, the returned contentForReplacement is the - * fuzzy-normalized version of the content (trailing whitespace stripped, - * Unicode quotes/dashes normalized to ASCII). + * fuzzy-normalized version of the content. */ export function fuzzyFindText(content: string, oldText: string): FuzzyMatchResult { - // Try exact match first - const exactIndex = content.indexOf(oldText); - if (exactIndex !== -1) { - return { - found: true, - index: exactIndex, - matchLength: oldText.length, - usedFuzzyMatch: false, - contentForReplacement: content, - }; - } - - // Try fuzzy match - work entirely in normalized space - const fuzzyContent = normalizeForFuzzyMatch(content); - const fuzzyOldText = normalizeForFuzzyMatch(oldText); - const fuzzyIndex = fuzzyContent.indexOf(fuzzyOldText); - - if (fuzzyIndex === -1) { - return { - found: false, - index: -1, - matchLength: 0, - usedFuzzyMatch: false, - contentForReplacement: content, - }; - } - - // When fuzzy matching, we work in the normalized space for replacement. - // This means the output will have normalized whitespace/quotes/dashes, - // which is acceptable since we're fixing minor formatting differences anyway. - return { - found: true, - index: fuzzyIndex, - matchLength: fuzzyOldText.length, - usedFuzzyMatch: true, - contentForReplacement: fuzzyContent, - }; + return nativeFuzzyFindText(content, oldText); } /** Strip UTF-8 BOM if present, return both the BOM (if any) and the text without it */ @@ -121,7 +75,9 @@ export function stripBom(content: string): { bom: string; text: string } { } /** - * Generate a unified diff string with line numbers and context. + * Generate a unified diff string with line numbers and context + * (native Rust implementation using Myers' algorithm via the `similar` crate). + * * Returns both the diff string and the first changed line number (in the new file). */ export function generateDiffString( @@ -129,101 +85,11 @@ export function generateDiffString( newContent: string, contextLines = 4, ): { diff: string; firstChangedLine: number | undefined } { - const parts = Diff.diffLines(oldContent, newContent); - const output: string[] = []; - - const oldLines = oldContent.split("\n"); - const newLines = newContent.split("\n"); - const maxLineNum = Math.max(oldLines.length, newLines.length); - const lineNumWidth = String(maxLineNum).length; - - let oldLineNum = 1; - let newLineNum = 1; - let lastWasChange = false; - let firstChangedLine: number | undefined; - - for (let i = 0; i < parts.length; i++) { - const part = parts[i]; - const raw = part.value.split("\n"); - if (raw[raw.length - 1] === "") { - raw.pop(); - } - - if (part.added || part.removed) { - // Capture the first changed line (in the new file) - if (firstChangedLine === undefined) { - firstChangedLine = newLineNum; - } - - // Show the change - for (const line of raw) { - if (part.added) { - const lineNum = String(newLineNum).padStart(lineNumWidth, " "); - output.push(`+${lineNum} ${line}`); - newLineNum++; - } else { - // removed - const lineNum = String(oldLineNum).padStart(lineNumWidth, " "); - output.push(`-${lineNum} ${line}`); - oldLineNum++; - } - } - lastWasChange = true; - } else { - // Context lines - only show a few before/after changes - const nextPartIsChange = i < parts.length - 1 && (parts[i + 1].added || parts[i + 1].removed); - - if (lastWasChange || nextPartIsChange) { - // Show context - let linesToShow = raw; - let skipStart = 0; - let skipEnd = 0; - - if (!lastWasChange) { - // Show only last N lines as leading context - skipStart = Math.max(0, raw.length - contextLines); - linesToShow = raw.slice(skipStart); - } - - if (!nextPartIsChange && linesToShow.length > contextLines) { - // Show only first N lines as trailing context - skipEnd = linesToShow.length - contextLines; - linesToShow = linesToShow.slice(0, contextLines); - } - - // Add ellipsis if we skipped lines at start - if (skipStart > 0) { - output.push(` ${"".padStart(lineNumWidth, " ")} ...`); - // Update line numbers for the skipped leading context - oldLineNum += skipStart; - newLineNum += skipStart; - } - - for (const line of linesToShow) { - const lineNum = String(oldLineNum).padStart(lineNumWidth, " "); - output.push(` ${lineNum} ${line}`); - oldLineNum++; - newLineNum++; - } - - // Add ellipsis if we skipped lines at end - if (skipEnd > 0) { - output.push(` ${"".padStart(lineNumWidth, " ")} ...`); - // Update line numbers for the skipped trailing context - oldLineNum += skipEnd; - newLineNum += skipEnd; - } - } else { - // Skip these context lines entirely - oldLineNum += raw.length; - newLineNum += raw.length; - } - - lastWasChange = false; - } - } - - return { diff: output.join("\n"), firstChangedLine }; + const result = nativeGenerateDiff(oldContent, newContent, contextLines); + return { + diff: result.diff, + firstChangedLine: result.firstChangedLine ?? undefined, + }; } export interface EditDiffResult {