feat: native Rust diff engine for edit tool
feat: native Rust diff engine for edit tool
This commit is contained in:
commit
b37819e30a
10 changed files with 737 additions and 158 deletions
8
native/Cargo.lock
generated
8
native/Cargo.lock
generated
|
|
@ -543,7 +543,7 @@ dependencies = [
|
|||
"napi",
|
||||
"napi-build",
|
||||
"napi-derive",
|
||||
"regex",
|
||||
"similar",
|
||||
"smallvec",
|
||||
"syntect",
|
||||
"unicode-segmentation",
|
||||
|
|
@ -1207,6 +1207,12 @@ version = "0.3.8"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
|
||||
|
||||
[[package]]
|
||||
name = "similar"
|
||||
version = "2.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "1.0.2"
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ image = { version = "0.25", default-features = false, features = [
|
|||
napi = { version = "2", features = ["napi8"] }
|
||||
napi-derive = "2"
|
||||
regex = "1"
|
||||
similar = "2"
|
||||
smallvec = "1"
|
||||
syntect = { version = "5", default-features = false, features = ["default-syntaxes", "default-themes", "regex-fancy"] }
|
||||
unicode-segmentation = "1"
|
||||
|
|
|
|||
421
native/crates/engine/src/diff.rs
Normal file
421
native/crates/engine/src/diff.rs
Normal file
|
|
@ -0,0 +1,421 @@
|
|||
//! Fuzzy text matching and unified diff generation for the edit tool.
|
||||
//!
|
||||
//! Replaces the JS `edit-diff.ts` hot path with native Rust:
|
||||
//! - `normalizeForFuzzyMatch`: Unicode normalization (smart quotes, dashes, special spaces, trailing whitespace)
|
||||
//! - `fuzzyFindText`: exact-then-fuzzy substring search
|
||||
//! - `generateDiff`: unified diff with line numbers and context, matching the JS output format
|
||||
|
||||
use napi_derive::napi;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// normalizeForFuzzyMatch
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Normalize text for fuzzy matching:
|
||||
/// - Strip trailing whitespace from each line
|
||||
/// - Smart single quotes → '
|
||||
/// - Smart double quotes → "
|
||||
/// - Various dashes/hyphens → -
|
||||
/// - Special Unicode spaces → regular space
|
||||
#[napi(js_name = "normalizeForFuzzyMatch")]
|
||||
pub fn normalize_for_fuzzy_match(text: String) -> String {
|
||||
normalize_impl(&text)
|
||||
}
|
||||
|
||||
fn normalize_impl(text: &str) -> String {
|
||||
let mut out = String::with_capacity(text.len());
|
||||
|
||||
for (i, line) in text.split('\n').enumerate() {
|
||||
if i > 0 {
|
||||
out.push('\n');
|
||||
}
|
||||
let trimmed = line.trim_end();
|
||||
for ch in trimmed.chars() {
|
||||
out.push(normalize_char(ch));
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn normalize_char(ch: char) -> char {
|
||||
match ch {
|
||||
// Smart single quotes → '
|
||||
'\u{2018}' | '\u{2019}' | '\u{201A}' | '\u{201B}' => '\'',
|
||||
// Smart double quotes → "
|
||||
'\u{201C}' | '\u{201D}' | '\u{201E}' | '\u{201F}' => '"',
|
||||
// Various dashes/hyphens → -
|
||||
'\u{2010}' | '\u{2011}' | '\u{2012}' | '\u{2013}' | '\u{2014}' | '\u{2015}'
|
||||
| '\u{2212}' => '-',
|
||||
// Special spaces → regular space
|
||||
'\u{00A0}' | '\u{2002}' | '\u{2003}' | '\u{2004}' | '\u{2005}' | '\u{2006}'
|
||||
| '\u{2007}' | '\u{2008}' | '\u{2009}' | '\u{200A}' | '\u{202F}' | '\u{205F}'
|
||||
| '\u{3000}' => ' ',
|
||||
_ => ch,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// fuzzyFindText
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[napi(object)]
|
||||
pub struct FuzzyMatchResult {
|
||||
pub found: bool,
|
||||
pub index: i32,
|
||||
pub match_length: i32,
|
||||
pub used_fuzzy_match: bool,
|
||||
/// When exact match: original content. When fuzzy match: normalized content.
|
||||
pub content_for_replacement: String,
|
||||
}
|
||||
|
||||
/// Convert a UTF-8 byte offset to a JS string index (UTF-16 code unit offset).
|
||||
fn byte_offset_to_utf16(s: &str, byte_offset: usize) -> usize {
|
||||
s[..byte_offset].chars().map(|c| c.len_utf16()).sum()
|
||||
}
|
||||
|
||||
/// Get the UTF-16 code unit length of a UTF-8 string.
|
||||
fn utf16_len(s: &str) -> usize {
|
||||
s.chars().map(|c| c.len_utf16()).sum()
|
||||
}
|
||||
|
||||
/// Find `old_text` in `content`, trying exact match first, then fuzzy match.
|
||||
///
|
||||
/// Returns indices and lengths as UTF-16 code unit offsets (compatible with
|
||||
/// JS `String.prototype.substring()`).
|
||||
///
|
||||
/// When fuzzy matching is used, `content_for_replacement` is the normalized
|
||||
/// version of `content` (trailing whitespace stripped, Unicode quotes/dashes
|
||||
/// normalized to ASCII).
|
||||
#[napi(js_name = "fuzzyFindText")]
|
||||
pub fn fuzzy_find_text(content: String, old_text: String) -> FuzzyMatchResult {
|
||||
// Try exact match first
|
||||
if let Some(byte_idx) = content.find(&old_text) {
|
||||
return FuzzyMatchResult {
|
||||
found: true,
|
||||
index: byte_offset_to_utf16(&content, byte_idx) as i32,
|
||||
match_length: utf16_len(&old_text) as i32,
|
||||
used_fuzzy_match: false,
|
||||
content_for_replacement: content,
|
||||
};
|
||||
}
|
||||
|
||||
// Try fuzzy match
|
||||
let fuzzy_content = normalize_impl(&content);
|
||||
let fuzzy_old_text = normalize_impl(&old_text);
|
||||
|
||||
if let Some(byte_idx) = fuzzy_content.find(&fuzzy_old_text) {
|
||||
FuzzyMatchResult {
|
||||
found: true,
|
||||
index: byte_offset_to_utf16(&fuzzy_content, byte_idx) as i32,
|
||||
match_length: utf16_len(&fuzzy_old_text) as i32,
|
||||
used_fuzzy_match: true,
|
||||
content_for_replacement: fuzzy_content,
|
||||
}
|
||||
} else {
|
||||
FuzzyMatchResult {
|
||||
found: false,
|
||||
index: -1,
|
||||
match_length: 0,
|
||||
used_fuzzy_match: false,
|
||||
content_for_replacement: content,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// generateDiff
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[napi(object)]
|
||||
pub struct DiffResult {
|
||||
pub diff: String,
|
||||
pub first_changed_line: Option<i32>,
|
||||
}
|
||||
|
||||
/// Generate a unified diff string with line numbers and context.
|
||||
///
|
||||
/// Uses the `similar` crate (Myers' diff algorithm with optimizations).
|
||||
/// Output format matches the JS `generateDiffString`:
|
||||
/// - `+N line` for additions
|
||||
/// - `-N line` for removals
|
||||
/// - ` N line` for context
|
||||
/// - ` ... ` for skipped context
|
||||
#[napi(js_name = "generateDiff")]
|
||||
pub fn generate_diff(old_content: String, new_content: String, context_lines: Option<u32>) -> DiffResult {
|
||||
let context = context_lines.unwrap_or(4) as usize;
|
||||
generate_diff_impl(&old_content, &new_content, context)
|
||||
}
|
||||
|
||||
fn generate_diff_impl(old_content: &str, new_content: &str, context_lines: usize) -> DiffResult {
|
||||
let old_lines: Vec<&str> = old_content.split('\n').collect();
|
||||
let new_lines: Vec<&str> = new_content.split('\n').collect();
|
||||
|
||||
let max_line_num = old_lines.len().max(new_lines.len());
|
||||
let line_num_width = if max_line_num == 0 {
|
||||
1
|
||||
} else {
|
||||
max_line_num.to_string().len()
|
||||
};
|
||||
|
||||
// Use similar crate for diffing
|
||||
let diff = similar::TextDiff::configure()
|
||||
.algorithm(similar::Algorithm::Myers)
|
||||
.diff_lines(old_content, new_content);
|
||||
|
||||
let mut output: Vec<String> = Vec::new();
|
||||
let mut old_line_num: usize = 1;
|
||||
let mut new_line_num: usize = 1;
|
||||
let mut last_was_change = false;
|
||||
let mut first_changed_line: Option<i32> = None;
|
||||
|
||||
// Build parts from diff ops, matching the JS `diff` npm package structure
|
||||
#[derive(Debug)]
|
||||
enum PartTag {
|
||||
Equal,
|
||||
Added,
|
||||
Removed,
|
||||
}
|
||||
|
||||
struct Part {
|
||||
tag: PartTag,
|
||||
lines: Vec<String>,
|
||||
}
|
||||
|
||||
let mut parts: Vec<Part> = Vec::new();
|
||||
|
||||
for op in diff.ops() {
|
||||
match op {
|
||||
similar::DiffOp::Equal { old_index, len, .. } => {
|
||||
let lines: Vec<String> = old_lines[*old_index..*old_index + *len]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
parts.push(Part { tag: PartTag::Equal, lines });
|
||||
}
|
||||
similar::DiffOp::Delete { old_index, old_len, .. } => {
|
||||
let lines: Vec<String> = old_lines[*old_index..*old_index + *old_len]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
parts.push(Part { tag: PartTag::Removed, lines });
|
||||
}
|
||||
similar::DiffOp::Insert { new_index, new_len, .. } => {
|
||||
let lines: Vec<String> = new_lines[*new_index..*new_index + *new_len]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
parts.push(Part { tag: PartTag::Added, lines });
|
||||
}
|
||||
similar::DiffOp::Replace {
|
||||
old_index, old_len, new_index, new_len, ..
|
||||
} => {
|
||||
let del_lines: Vec<String> = old_lines[*old_index..*old_index + *old_len]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
parts.push(Part { tag: PartTag::Removed, lines: del_lines });
|
||||
|
||||
let ins_lines: Vec<String> = new_lines[*new_index..*new_index + *new_len]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
parts.push(Part { tag: PartTag::Added, lines: ins_lines });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i, part) in parts.iter().enumerate() {
|
||||
let raw = &part.lines;
|
||||
|
||||
match part.tag {
|
||||
PartTag::Added | PartTag::Removed => {
|
||||
if first_changed_line.is_none() {
|
||||
first_changed_line = Some(new_line_num as i32);
|
||||
}
|
||||
|
||||
for line in raw {
|
||||
match part.tag {
|
||||
PartTag::Added => {
|
||||
let num = format!("{:>width$}", new_line_num, width = line_num_width);
|
||||
output.push(format!("+{} {}", num, line));
|
||||
new_line_num += 1;
|
||||
}
|
||||
PartTag::Removed => {
|
||||
let num = format!("{:>width$}", old_line_num, width = line_num_width);
|
||||
output.push(format!("-{} {}", num, line));
|
||||
old_line_num += 1;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
last_was_change = true;
|
||||
}
|
||||
PartTag::Equal => {
|
||||
let next_part_is_change = i < parts.len() - 1
|
||||
&& matches!(parts[i + 1].tag, PartTag::Added | PartTag::Removed);
|
||||
|
||||
if last_was_change || next_part_is_change {
|
||||
let mut lines_to_show = raw.as_slice();
|
||||
let mut skip_start = 0usize;
|
||||
let mut skip_end = 0usize;
|
||||
|
||||
if !last_was_change {
|
||||
// Show only last N lines as leading context
|
||||
skip_start = raw.len().saturating_sub(context_lines);
|
||||
lines_to_show = &raw[skip_start..];
|
||||
}
|
||||
|
||||
if !next_part_is_change && lines_to_show.len() > context_lines {
|
||||
// Show only first N lines as trailing context
|
||||
skip_end = lines_to_show.len() - context_lines;
|
||||
lines_to_show = &lines_to_show[..context_lines];
|
||||
}
|
||||
|
||||
if skip_start > 0 {
|
||||
output.push(format!(
|
||||
" {:>width$} ...",
|
||||
"",
|
||||
width = line_num_width
|
||||
));
|
||||
old_line_num += skip_start;
|
||||
new_line_num += skip_start;
|
||||
}
|
||||
|
||||
for line in lines_to_show {
|
||||
let num = format!("{:>width$}", old_line_num, width = line_num_width);
|
||||
output.push(format!(" {} {}", num, line));
|
||||
old_line_num += 1;
|
||||
new_line_num += 1;
|
||||
}
|
||||
|
||||
if skip_end > 0 {
|
||||
output.push(format!(
|
||||
" {:>width$} ...",
|
||||
"",
|
||||
width = line_num_width
|
||||
));
|
||||
old_line_num += skip_end;
|
||||
new_line_num += skip_end;
|
||||
}
|
||||
} else {
|
||||
old_line_num += raw.len();
|
||||
new_line_num += raw.len();
|
||||
}
|
||||
|
||||
last_was_change = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DiffResult {
|
||||
diff: output.join("\n"),
|
||||
first_changed_line,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_normalize_smart_quotes() {
|
||||
let input = "\u{201C}hello\u{201D} \u{2018}world\u{2019}";
|
||||
assert_eq!(normalize_impl(input), "\"hello\" 'world'");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_dashes() {
|
||||
let input = "a\u{2013}b\u{2014}c\u{2212}d";
|
||||
assert_eq!(normalize_impl(input), "a-b-c-d");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_special_spaces() {
|
||||
let input = "a\u{00A0}b\u{2003}c\u{3000}d";
|
||||
assert_eq!(normalize_impl(input), "a b c d");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_trailing_whitespace() {
|
||||
let input = "hello \nworld ";
|
||||
assert_eq!(normalize_impl(input), "hello\nworld");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fuzzy_find_exact() {
|
||||
let result = fuzzy_find_text("hello world".to_string(), "world".to_string());
|
||||
assert!(result.found);
|
||||
assert_eq!(result.index, 6);
|
||||
assert_eq!(result.match_length, 5);
|
||||
assert!(!result.used_fuzzy_match);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fuzzy_find_with_smart_quotes() {
|
||||
let content = "let x = \u{201C}hello\u{201D};".to_string();
|
||||
let old_text = "let x = \"hello\";".to_string();
|
||||
let result = fuzzy_find_text(content, old_text);
|
||||
assert!(result.found);
|
||||
assert!(result.used_fuzzy_match);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fuzzy_find_not_found() {
|
||||
let result = fuzzy_find_text("hello world".to_string(), "xyz".to_string());
|
||||
assert!(!result.found);
|
||||
assert_eq!(result.index, -1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_diff_basic() {
|
||||
let old = "line1\nline2\nline3";
|
||||
let new_text = "line1\nmodified\nline3";
|
||||
let result = generate_diff_impl(old, new_text, 4);
|
||||
assert!(result.diff.contains("-"));
|
||||
assert!(result.diff.contains("+"));
|
||||
assert!(result.diff.contains("line2"));
|
||||
assert!(result.diff.contains("modified"));
|
||||
assert!(result.first_changed_line.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_diff_addition() {
|
||||
let old = "line1\nline3";
|
||||
let new_text = "line1\nline2\nline3";
|
||||
let result = generate_diff_impl(old, new_text, 4);
|
||||
assert!(result.diff.contains("+"));
|
||||
assert!(result.diff.contains("line2"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_diff_deletion() {
|
||||
let old = "line1\nline2\nline3";
|
||||
let new_text = "line1\nline3";
|
||||
let result = generate_diff_impl(old, new_text, 4);
|
||||
assert!(result.diff.contains("-"));
|
||||
assert!(result.diff.contains("line2"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_diff_context_ellipsis() {
|
||||
let mut old_lines: Vec<String> = (1..=20).map(|i| format!("line{}", i)).collect();
|
||||
let old = old_lines.join("\n");
|
||||
old_lines[10] = "modified".to_string();
|
||||
let new_text = old_lines.join("\n");
|
||||
let result = generate_diff_impl(&old, &new_text, 2);
|
||||
assert!(result.diff.contains("..."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_diff_empty() {
|
||||
let result = generate_diff_impl("same", "same", 4);
|
||||
assert!(result.diff.is_empty());
|
||||
assert!(result.first_changed_line.is_none());
|
||||
}
|
||||
}
|
||||
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
mod ast;
|
||||
mod clipboard;
|
||||
mod diff;
|
||||
mod fd;
|
||||
mod fs_cache;
|
||||
mod glob;
|
||||
|
|
|
|||
189
packages/native/src/__tests__/diff.test.mjs
Normal file
189
packages/native/src/__tests__/diff.test.mjs
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
import { test, describe } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { createRequire } from "node:module";
|
||||
import * as path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const require = createRequire(import.meta.url);
|
||||
|
||||
// Load the native addon directly
|
||||
const addonDir = path.resolve(
|
||||
__dirname,
|
||||
"..",
|
||||
"..",
|
||||
"..",
|
||||
"..",
|
||||
"native",
|
||||
"addon",
|
||||
);
|
||||
const platformTag = `${process.platform}-${process.arch}`;
|
||||
const candidates = [
|
||||
path.join(addonDir, `gsd_engine.${platformTag}.node`),
|
||||
path.join(addonDir, "gsd_engine.dev.node"),
|
||||
];
|
||||
|
||||
let native;
|
||||
for (const candidate of candidates) {
|
||||
try {
|
||||
native = require(candidate);
|
||||
break;
|
||||
} catch {
|
||||
// try next
|
||||
}
|
||||
}
|
||||
|
||||
if (!native) {
|
||||
console.error(
|
||||
"Native addon not found. Run `npm run build:native -w @gsd/native` first.",
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// ── normalizeForFuzzyMatch ──────────────────────────────────────────────
|
||||
|
||||
describe("normalizeForFuzzyMatch", () => {
|
||||
test("strips trailing whitespace per line", () => {
|
||||
assert.equal(native.normalizeForFuzzyMatch("hello \nworld "), "hello\nworld");
|
||||
});
|
||||
|
||||
test("normalizes smart quotes to ASCII", () => {
|
||||
assert.equal(
|
||||
native.normalizeForFuzzyMatch("\u201Chello\u201D \u2018world\u2019"),
|
||||
'"hello" \'world\'',
|
||||
);
|
||||
});
|
||||
|
||||
test("normalizes dashes to ASCII hyphen", () => {
|
||||
assert.equal(native.normalizeForFuzzyMatch("a\u2013b\u2014c"), "a-b-c");
|
||||
});
|
||||
|
||||
test("normalizes special spaces to regular space", () => {
|
||||
assert.equal(native.normalizeForFuzzyMatch("a\u00A0b\u3000c"), "a b c");
|
||||
});
|
||||
|
||||
test("handles empty string", () => {
|
||||
assert.equal(native.normalizeForFuzzyMatch(""), "");
|
||||
});
|
||||
|
||||
test("preserves leading whitespace", () => {
|
||||
assert.equal(native.normalizeForFuzzyMatch(" hello "), " hello");
|
||||
});
|
||||
});
|
||||
|
||||
// ── fuzzyFindText ───────────────────────────────────────────────────────
|
||||
|
||||
describe("fuzzyFindText", () => {
|
||||
test("finds exact match", () => {
|
||||
const result = native.fuzzyFindText("hello world", "world");
|
||||
assert.equal(result.found, true);
|
||||
assert.equal(result.index, 6);
|
||||
assert.equal(result.matchLength, 5);
|
||||
assert.equal(result.usedFuzzyMatch, false);
|
||||
assert.equal(result.contentForReplacement, "hello world");
|
||||
});
|
||||
|
||||
test("finds fuzzy match with smart quotes", () => {
|
||||
const content = 'let x = \u201Chello\u201D;';
|
||||
const oldText = 'let x = "hello";';
|
||||
const result = native.fuzzyFindText(content, oldText);
|
||||
assert.equal(result.found, true);
|
||||
assert.equal(result.usedFuzzyMatch, true);
|
||||
});
|
||||
|
||||
test("returns not found for missing text", () => {
|
||||
const result = native.fuzzyFindText("hello world", "xyz");
|
||||
assert.equal(result.found, false);
|
||||
assert.equal(result.index, -1);
|
||||
assert.equal(result.matchLength, 0);
|
||||
});
|
||||
|
||||
test("returns correct UTF-16 index for non-ASCII content", () => {
|
||||
// Emoji U+1F600 is 2 UTF-16 code units (surrogate pair), 4 UTF-8 bytes
|
||||
const content = "\u{1F600}hello";
|
||||
const result = native.fuzzyFindText(content, "hello");
|
||||
assert.equal(result.found, true);
|
||||
// Emoji is 2 UTF-16 code units, so "hello" starts at index 2
|
||||
assert.equal(result.index, 2);
|
||||
assert.equal(result.matchLength, 5);
|
||||
});
|
||||
|
||||
test("index is compatible with JS substring()", () => {
|
||||
const content = "abc\u{1F600}def";
|
||||
const result = native.fuzzyFindText(content, "def");
|
||||
assert.equal(result.found, true);
|
||||
// "abc" = 3, emoji = 2 UTF-16 code units → index 5
|
||||
assert.equal(result.index, 5);
|
||||
// Verify substring works correctly with the returned index
|
||||
const extracted = result.contentForReplacement.substring(
|
||||
result.index,
|
||||
result.index + result.matchLength,
|
||||
);
|
||||
assert.equal(extracted, "def");
|
||||
});
|
||||
|
||||
test("fuzzy match with trailing whitespace differences", () => {
|
||||
const content = "hello \nworld ";
|
||||
const oldText = "hello\nworld";
|
||||
const result = native.fuzzyFindText(content, oldText);
|
||||
assert.equal(result.found, true);
|
||||
assert.equal(result.usedFuzzyMatch, true);
|
||||
});
|
||||
});
|
||||
|
||||
// ── generateDiff ────────────────────────────────────────────────────────
|
||||
|
||||
describe("generateDiff", () => {
|
||||
test("generates diff for a line change", () => {
|
||||
const old = "line1\nline2\nline3";
|
||||
const newText = "line1\nmodified\nline3";
|
||||
const result = native.generateDiff(old, newText);
|
||||
assert.ok(result.diff.includes("line2"));
|
||||
assert.ok(result.diff.includes("modified"));
|
||||
assert.ok(result.diff.includes("-"));
|
||||
assert.ok(result.diff.includes("+"));
|
||||
assert.notEqual(result.firstChangedLine, null);
|
||||
});
|
||||
|
||||
test("generates diff for an addition", () => {
|
||||
const old = "line1\nline3";
|
||||
const newText = "line1\nline2\nline3";
|
||||
const result = native.generateDiff(old, newText);
|
||||
assert.ok(result.diff.includes("+"));
|
||||
assert.ok(result.diff.includes("line2"));
|
||||
});
|
||||
|
||||
test("generates diff for a deletion", () => {
|
||||
const old = "line1\nline2\nline3";
|
||||
const newText = "line1\nline3";
|
||||
const result = native.generateDiff(old, newText);
|
||||
assert.ok(result.diff.includes("-"));
|
||||
assert.ok(result.diff.includes("line2"));
|
||||
});
|
||||
|
||||
test("returns empty diff for identical content", () => {
|
||||
const result = native.generateDiff("same", "same");
|
||||
assert.equal(result.diff, "");
|
||||
// napi-rs maps Option::None to undefined (not null)
|
||||
assert.equal(result.firstChangedLine, undefined);
|
||||
});
|
||||
|
||||
test("respects context lines parameter", () => {
|
||||
const lines = Array.from({ length: 20 }, (_, i) => `line${i + 1}`);
|
||||
const old = lines.join("\n");
|
||||
lines[10] = "modified";
|
||||
const newText = lines.join("\n");
|
||||
const result = native.generateDiff(old, newText, 2);
|
||||
assert.ok(result.diff.includes("..."));
|
||||
});
|
||||
|
||||
test("default context is 4 lines", () => {
|
||||
const lines = Array.from({ length: 20 }, (_, i) => `line${i + 1}`);
|
||||
const old = lines.join("\n");
|
||||
lines[10] = "modified";
|
||||
const newText = lines.join("\n");
|
||||
const result = native.generateDiff(old, newText);
|
||||
// Should show 4 context lines before and after
|
||||
assert.ok(result.diff.length > 0);
|
||||
});
|
||||
});
|
||||
61
packages/native/src/diff/index.ts
Normal file
61
packages/native/src/diff/index.ts
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
/**
|
||||
* Native fuzzy text matching and diff generation for the edit tool.
|
||||
*
|
||||
* Uses the `similar` Rust crate (Myers' algorithm) for O(n+d) diffing,
|
||||
* and single-pass Unicode normalization for fuzzy matching.
|
||||
*/
|
||||
|
||||
import { native } from "../native.js";
|
||||
import type { DiffResult, FuzzyMatchResult } from "./types.js";
|
||||
|
||||
export type { DiffResult, FuzzyMatchResult };
|
||||
|
||||
/**
|
||||
* Normalize text for fuzzy matching:
|
||||
* - Strip trailing whitespace from each line
|
||||
* - Smart quotes to ASCII equivalents
|
||||
* - Unicode dashes/hyphens to ASCII hyphen
|
||||
* - Special Unicode spaces to regular space
|
||||
*/
|
||||
export function normalizeForFuzzyMatch(text: string): string {
|
||||
return (native as Record<string, Function>).normalizeForFuzzyMatch(
|
||||
text,
|
||||
) as string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find `oldText` in `content`, trying exact match first, then fuzzy match.
|
||||
*
|
||||
* When fuzzy matching is used, `contentForReplacement` is the normalized
|
||||
* version of `content`.
|
||||
*/
|
||||
export function fuzzyFindText(
|
||||
content: string,
|
||||
oldText: string,
|
||||
): FuzzyMatchResult {
|
||||
return (native as Record<string, Function>).fuzzyFindText(
|
||||
content,
|
||||
oldText,
|
||||
) as FuzzyMatchResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a unified diff string with line numbers and context.
|
||||
*
|
||||
* Uses Myers' diff algorithm via the `similar` Rust crate.
|
||||
*
|
||||
* @param oldContent Original text
|
||||
* @param newContent Modified text
|
||||
* @param contextLines Number of context lines around changes (default: 4)
|
||||
*/
|
||||
export function generateDiff(
|
||||
oldContent: string,
|
||||
newContent: string,
|
||||
contextLines?: number,
|
||||
): DiffResult {
|
||||
return (native as Record<string, Function>).generateDiff(
|
||||
oldContent,
|
||||
newContent,
|
||||
contextLines,
|
||||
) as DiffResult;
|
||||
}
|
||||
24
packages/native/src/diff/types.ts
Normal file
24
packages/native/src/diff/types.ts
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
/** Result of fuzzy text matching (exact match tried first, then normalized). */
|
||||
export interface FuzzyMatchResult {
|
||||
/** Whether a match was found. */
|
||||
found: boolean;
|
||||
/** UTF-16 code unit index where the match starts (-1 if not found). */
|
||||
index: number;
|
||||
/** Length of the matched text in UTF-16 code units (0 if not found). */
|
||||
matchLength: number;
|
||||
/** Whether fuzzy (normalized) matching was used instead of exact. */
|
||||
usedFuzzyMatch: boolean;
|
||||
/**
|
||||
* Content to use for replacement operations.
|
||||
* Original content when exact match; normalized content when fuzzy match.
|
||||
*/
|
||||
contentForReplacement: string;
|
||||
}
|
||||
|
||||
/** Result of unified diff generation. */
|
||||
export interface DiffResult {
|
||||
/** The unified diff string with line numbers. */
|
||||
diff: string;
|
||||
/** Line number of the first change in the new file (undefined if no changes). */
|
||||
firstChangedLine: number | undefined;
|
||||
}
|
||||
|
|
@ -74,6 +74,13 @@ export {
|
|||
} from "./text/index.js";
|
||||
export type { SliceResult, ExtractSegmentsResult } from "./text/index.js";
|
||||
|
||||
export {
|
||||
normalizeForFuzzyMatch,
|
||||
fuzzyFindText,
|
||||
generateDiff,
|
||||
} from "./diff/index.js";
|
||||
export type { FuzzyMatchResult, DiffResult } from "./diff/index.js";
|
||||
|
||||
export { fuzzyFind } from "./fd/index.js";
|
||||
export type {
|
||||
FuzzyFindMatch,
|
||||
|
|
|
|||
|
|
@ -87,6 +87,9 @@ export const native = loadNative() as {
|
|||
sanitizeText: (text: string) => string;
|
||||
visibleWidth: (text: string, tabWidth?: number) => number;
|
||||
fuzzyFind: (options: unknown) => unknown;
|
||||
normalizeForFuzzyMatch: (text: string) => string;
|
||||
fuzzyFindText: (content: string, oldText: string) => unknown;
|
||||
generateDiff: (oldContent: string, newContent: string, contextLines?: number) => unknown;
|
||||
NativeImage: unknown;
|
||||
ttsrCompileRules: (rules: unknown[]) => number;
|
||||
ttsrCheckBuffer: (handle: number, buffer: string) => string[];
|
||||
|
|
|
|||
|
|
@ -1,9 +1,16 @@
|
|||
/**
|
||||
* Shared diff computation utilities for the edit tool.
|
||||
* Used by both edit.ts (for execution) and tool-execution.ts (for preview rendering).
|
||||
*
|
||||
* Hot-path functions (fuzzyFindText, normalizeForFuzzyMatch, generateDiffString)
|
||||
* delegate to the native Rust engine for performance on large files.
|
||||
*/
|
||||
|
||||
import * as Diff from "diff";
|
||||
import {
|
||||
fuzzyFindText as nativeFuzzyFindText,
|
||||
generateDiff as nativeGenerateDiff,
|
||||
normalizeForFuzzyMatch as nativeNormalizeForFuzzyMatch,
|
||||
} from "@gsd/native";
|
||||
import { constants } from "fs";
|
||||
import { access, readFile } from "fs/promises";
|
||||
import { resolveToCwd } from "./path-utils.js";
|
||||
|
|
@ -25,32 +32,14 @@ export function restoreLineEndings(text: string, ending: "\r\n" | "\n"): string
|
|||
}
|
||||
|
||||
/**
|
||||
* Normalize text for fuzzy matching. Applies progressive transformations:
|
||||
* Normalize text for fuzzy matching (native Rust implementation).
|
||||
* - Strip trailing whitespace from each line
|
||||
* - Normalize smart quotes to ASCII equivalents
|
||||
* - Normalize Unicode dashes/hyphens to ASCII hyphen
|
||||
* - Normalize special Unicode spaces to regular space
|
||||
*/
|
||||
export function normalizeForFuzzyMatch(text: string): string {
|
||||
return (
|
||||
text
|
||||
// Strip trailing whitespace per line
|
||||
.split("\n")
|
||||
.map((line) => line.trimEnd())
|
||||
.join("\n")
|
||||
// Smart single quotes → '
|
||||
.replace(/[\u2018\u2019\u201A\u201B]/g, "'")
|
||||
// Smart double quotes → "
|
||||
.replace(/[\u201C\u201D\u201E\u201F]/g, '"')
|
||||
// Various dashes/hyphens → -
|
||||
// U+2010 hyphen, U+2011 non-breaking hyphen, U+2012 figure dash,
|
||||
// U+2013 en-dash, U+2014 em-dash, U+2015 horizontal bar, U+2212 minus
|
||||
.replace(/[\u2010\u2011\u2012\u2013\u2014\u2015\u2212]/g, "-")
|
||||
// Special spaces → regular space
|
||||
// U+00A0 NBSP, U+2002-U+200A various spaces, U+202F narrow NBSP,
|
||||
// U+205F medium math space, U+3000 ideographic space
|
||||
.replace(/[\u00A0\u2002-\u200A\u202F\u205F\u3000]/g, " ")
|
||||
);
|
||||
return nativeNormalizeForFuzzyMatch(text);
|
||||
}
|
||||
|
||||
export interface FuzzyMatchResult {
|
||||
|
|
@ -70,49 +59,14 @@ export interface FuzzyMatchResult {
|
|||
}
|
||||
|
||||
/**
|
||||
* Find oldText in content, trying exact match first, then fuzzy match.
|
||||
* Find oldText in content, trying exact match first, then fuzzy match
|
||||
* (native Rust implementation).
|
||||
*
|
||||
* When fuzzy matching is used, the returned contentForReplacement is the
|
||||
* fuzzy-normalized version of the content (trailing whitespace stripped,
|
||||
* Unicode quotes/dashes normalized to ASCII).
|
||||
* fuzzy-normalized version of the content.
|
||||
*/
|
||||
export function fuzzyFindText(content: string, oldText: string): FuzzyMatchResult {
|
||||
// Try exact match first
|
||||
const exactIndex = content.indexOf(oldText);
|
||||
if (exactIndex !== -1) {
|
||||
return {
|
||||
found: true,
|
||||
index: exactIndex,
|
||||
matchLength: oldText.length,
|
||||
usedFuzzyMatch: false,
|
||||
contentForReplacement: content,
|
||||
};
|
||||
}
|
||||
|
||||
// Try fuzzy match - work entirely in normalized space
|
||||
const fuzzyContent = normalizeForFuzzyMatch(content);
|
||||
const fuzzyOldText = normalizeForFuzzyMatch(oldText);
|
||||
const fuzzyIndex = fuzzyContent.indexOf(fuzzyOldText);
|
||||
|
||||
if (fuzzyIndex === -1) {
|
||||
return {
|
||||
found: false,
|
||||
index: -1,
|
||||
matchLength: 0,
|
||||
usedFuzzyMatch: false,
|
||||
contentForReplacement: content,
|
||||
};
|
||||
}
|
||||
|
||||
// When fuzzy matching, we work in the normalized space for replacement.
|
||||
// This means the output will have normalized whitespace/quotes/dashes,
|
||||
// which is acceptable since we're fixing minor formatting differences anyway.
|
||||
return {
|
||||
found: true,
|
||||
index: fuzzyIndex,
|
||||
matchLength: fuzzyOldText.length,
|
||||
usedFuzzyMatch: true,
|
||||
contentForReplacement: fuzzyContent,
|
||||
};
|
||||
return nativeFuzzyFindText(content, oldText);
|
||||
}
|
||||
|
||||
/** Strip UTF-8 BOM if present, return both the BOM (if any) and the text without it */
|
||||
|
|
@ -121,7 +75,9 @@ export function stripBom(content: string): { bom: string; text: string } {
|
|||
}
|
||||
|
||||
/**
|
||||
* Generate a unified diff string with line numbers and context.
|
||||
* Generate a unified diff string with line numbers and context
|
||||
* (native Rust implementation using Myers' algorithm via the `similar` crate).
|
||||
*
|
||||
* Returns both the diff string and the first changed line number (in the new file).
|
||||
*/
|
||||
export function generateDiffString(
|
||||
|
|
@ -129,101 +85,11 @@ export function generateDiffString(
|
|||
newContent: string,
|
||||
contextLines = 4,
|
||||
): { diff: string; firstChangedLine: number | undefined } {
|
||||
const parts = Diff.diffLines(oldContent, newContent);
|
||||
const output: string[] = [];
|
||||
|
||||
const oldLines = oldContent.split("\n");
|
||||
const newLines = newContent.split("\n");
|
||||
const maxLineNum = Math.max(oldLines.length, newLines.length);
|
||||
const lineNumWidth = String(maxLineNum).length;
|
||||
|
||||
let oldLineNum = 1;
|
||||
let newLineNum = 1;
|
||||
let lastWasChange = false;
|
||||
let firstChangedLine: number | undefined;
|
||||
|
||||
for (let i = 0; i < parts.length; i++) {
|
||||
const part = parts[i];
|
||||
const raw = part.value.split("\n");
|
||||
if (raw[raw.length - 1] === "") {
|
||||
raw.pop();
|
||||
}
|
||||
|
||||
if (part.added || part.removed) {
|
||||
// Capture the first changed line (in the new file)
|
||||
if (firstChangedLine === undefined) {
|
||||
firstChangedLine = newLineNum;
|
||||
}
|
||||
|
||||
// Show the change
|
||||
for (const line of raw) {
|
||||
if (part.added) {
|
||||
const lineNum = String(newLineNum).padStart(lineNumWidth, " ");
|
||||
output.push(`+${lineNum} ${line}`);
|
||||
newLineNum++;
|
||||
} else {
|
||||
// removed
|
||||
const lineNum = String(oldLineNum).padStart(lineNumWidth, " ");
|
||||
output.push(`-${lineNum} ${line}`);
|
||||
oldLineNum++;
|
||||
}
|
||||
}
|
||||
lastWasChange = true;
|
||||
} else {
|
||||
// Context lines - only show a few before/after changes
|
||||
const nextPartIsChange = i < parts.length - 1 && (parts[i + 1].added || parts[i + 1].removed);
|
||||
|
||||
if (lastWasChange || nextPartIsChange) {
|
||||
// Show context
|
||||
let linesToShow = raw;
|
||||
let skipStart = 0;
|
||||
let skipEnd = 0;
|
||||
|
||||
if (!lastWasChange) {
|
||||
// Show only last N lines as leading context
|
||||
skipStart = Math.max(0, raw.length - contextLines);
|
||||
linesToShow = raw.slice(skipStart);
|
||||
}
|
||||
|
||||
if (!nextPartIsChange && linesToShow.length > contextLines) {
|
||||
// Show only first N lines as trailing context
|
||||
skipEnd = linesToShow.length - contextLines;
|
||||
linesToShow = linesToShow.slice(0, contextLines);
|
||||
}
|
||||
|
||||
// Add ellipsis if we skipped lines at start
|
||||
if (skipStart > 0) {
|
||||
output.push(` ${"".padStart(lineNumWidth, " ")} ...`);
|
||||
// Update line numbers for the skipped leading context
|
||||
oldLineNum += skipStart;
|
||||
newLineNum += skipStart;
|
||||
}
|
||||
|
||||
for (const line of linesToShow) {
|
||||
const lineNum = String(oldLineNum).padStart(lineNumWidth, " ");
|
||||
output.push(` ${lineNum} ${line}`);
|
||||
oldLineNum++;
|
||||
newLineNum++;
|
||||
}
|
||||
|
||||
// Add ellipsis if we skipped lines at end
|
||||
if (skipEnd > 0) {
|
||||
output.push(` ${"".padStart(lineNumWidth, " ")} ...`);
|
||||
// Update line numbers for the skipped trailing context
|
||||
oldLineNum += skipEnd;
|
||||
newLineNum += skipEnd;
|
||||
}
|
||||
} else {
|
||||
// Skip these context lines entirely
|
||||
oldLineNum += raw.length;
|
||||
newLineNum += raw.length;
|
||||
}
|
||||
|
||||
lastWasChange = false;
|
||||
}
|
||||
}
|
||||
|
||||
return { diff: output.join("\n"), firstChangedLine };
|
||||
const result = nativeGenerateDiff(oldContent, newContent, contextLines);
|
||||
return {
|
||||
diff: result.diff,
|
||||
firstChangedLine: result.firstChangedLine ?? undefined,
|
||||
};
|
||||
}
|
||||
|
||||
export interface EditDiffResult {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue