feat: add native Rust diff engine for edit tool

Move the edit tool's hot-path diffing operations from JS to native Rust:
- `normalizeForFuzzyMatch`: single-pass Unicode normalization (smart quotes,
  dashes, special spaces, trailing whitespace)
- `fuzzyFindText`: exact-then-fuzzy substring search with UTF-16 index
  conversion for JS compatibility
- `generateDiff`: unified diff generation using the `similar` crate
  (Myers' algorithm with optimizations)

The Rust module at `native/crates/engine/src/diff.rs` exposes three napi
functions. The TypeScript wrapper at `packages/native/src/diff/` follows
the existing module pattern. `edit-diff.ts` now delegates to native
implementations while keeping line-ending handling and file I/O in JS.

18 tests covering normalization, fuzzy matching (including UTF-16 index
correctness with emoji/surrogate pairs), and diff generation.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Lex Christopherson 2026-03-13 14:00:39 -06:00
parent 886bc9b571
commit 4c97d59536
10 changed files with 737 additions and 158 deletions

8
native/Cargo.lock generated
View file

@ -543,7 +543,7 @@ dependencies = [
"napi",
"napi-build",
"napi-derive",
"regex",
"similar",
"smallvec",
"syntect",
"unicode-segmentation",
@ -1207,6 +1207,12 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
[[package]]
name = "similar"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
[[package]]
name = "siphasher"
version = "1.0.2"

View file

@ -27,6 +27,7 @@ image = { version = "0.25", default-features = false, features = [
napi = { version = "2", features = ["napi8"] }
napi-derive = "2"
regex = "1"
similar = "2"
smallvec = "1"
syntect = { version = "5", default-features = false, features = ["default-syntaxes", "default-themes", "regex-fancy"] }
unicode-segmentation = "1"

View file

@ -0,0 +1,421 @@
//! Fuzzy text matching and unified diff generation for the edit tool.
//!
//! Replaces the JS `edit-diff.ts` hot path with native Rust:
//! - `normalizeForFuzzyMatch`: Unicode normalization (smart quotes, dashes, special spaces, trailing whitespace)
//! - `fuzzyFindText`: exact-then-fuzzy substring search
//! - `generateDiff`: unified diff with line numbers and context, matching the JS output format
use napi_derive::napi;
// ---------------------------------------------------------------------------
// normalizeForFuzzyMatch
// ---------------------------------------------------------------------------
/// Normalize text for fuzzy matching:
/// - Strip trailing whitespace from each line
/// - Smart single quotes → '
/// - Smart double quotes → "
/// - Various dashes/hyphens → -
/// - Special Unicode spaces → regular space
#[napi(js_name = "normalizeForFuzzyMatch")]
pub fn normalize_for_fuzzy_match(text: String) -> String {
normalize_impl(&text)
}
fn normalize_impl(text: &str) -> String {
let mut out = String::with_capacity(text.len());
for (i, line) in text.split('\n').enumerate() {
if i > 0 {
out.push('\n');
}
let trimmed = line.trim_end();
for ch in trimmed.chars() {
out.push(normalize_char(ch));
}
}
out
}
#[inline]
fn normalize_char(ch: char) -> char {
match ch {
// Smart single quotes → '
'\u{2018}' | '\u{2019}' | '\u{201A}' | '\u{201B}' => '\'',
// Smart double quotes → "
'\u{201C}' | '\u{201D}' | '\u{201E}' | '\u{201F}' => '"',
// Various dashes/hyphens → -
'\u{2010}' | '\u{2011}' | '\u{2012}' | '\u{2013}' | '\u{2014}' | '\u{2015}'
| '\u{2212}' => '-',
// Special spaces → regular space
'\u{00A0}' | '\u{2002}' | '\u{2003}' | '\u{2004}' | '\u{2005}' | '\u{2006}'
| '\u{2007}' | '\u{2008}' | '\u{2009}' | '\u{200A}' | '\u{202F}' | '\u{205F}'
| '\u{3000}' => ' ',
_ => ch,
}
}
// ---------------------------------------------------------------------------
// fuzzyFindText
// ---------------------------------------------------------------------------
#[napi(object)]
pub struct FuzzyMatchResult {
pub found: bool,
pub index: i32,
pub match_length: i32,
pub used_fuzzy_match: bool,
/// When exact match: original content. When fuzzy match: normalized content.
pub content_for_replacement: String,
}
/// Convert a UTF-8 byte offset to a JS string index (UTF-16 code unit offset).
fn byte_offset_to_utf16(s: &str, byte_offset: usize) -> usize {
s[..byte_offset].chars().map(|c| c.len_utf16()).sum()
}
/// Get the UTF-16 code unit length of a UTF-8 string.
fn utf16_len(s: &str) -> usize {
s.chars().map(|c| c.len_utf16()).sum()
}
/// Find `old_text` in `content`, trying exact match first, then fuzzy match.
///
/// Returns indices and lengths as UTF-16 code unit offsets (compatible with
/// JS `String.prototype.substring()`).
///
/// When fuzzy matching is used, `content_for_replacement` is the normalized
/// version of `content` (trailing whitespace stripped, Unicode quotes/dashes
/// normalized to ASCII).
#[napi(js_name = "fuzzyFindText")]
pub fn fuzzy_find_text(content: String, old_text: String) -> FuzzyMatchResult {
// Try exact match first
if let Some(byte_idx) = content.find(&old_text) {
return FuzzyMatchResult {
found: true,
index: byte_offset_to_utf16(&content, byte_idx) as i32,
match_length: utf16_len(&old_text) as i32,
used_fuzzy_match: false,
content_for_replacement: content,
};
}
// Try fuzzy match
let fuzzy_content = normalize_impl(&content);
let fuzzy_old_text = normalize_impl(&old_text);
if let Some(byte_idx) = fuzzy_content.find(&fuzzy_old_text) {
FuzzyMatchResult {
found: true,
index: byte_offset_to_utf16(&fuzzy_content, byte_idx) as i32,
match_length: utf16_len(&fuzzy_old_text) as i32,
used_fuzzy_match: true,
content_for_replacement: fuzzy_content,
}
} else {
FuzzyMatchResult {
found: false,
index: -1,
match_length: 0,
used_fuzzy_match: false,
content_for_replacement: content,
}
}
}
// ---------------------------------------------------------------------------
// generateDiff
// ---------------------------------------------------------------------------
#[napi(object)]
pub struct DiffResult {
pub diff: String,
pub first_changed_line: Option<i32>,
}
/// Generate a unified diff string with line numbers and context.
///
/// Uses the `similar` crate (Myers' diff algorithm with optimizations).
/// Output format matches the JS `generateDiffString`:
/// - `+N line` for additions
/// - `-N line` for removals
/// - ` N line` for context
/// - ` ... ` for skipped context
#[napi(js_name = "generateDiff")]
pub fn generate_diff(old_content: String, new_content: String, context_lines: Option<u32>) -> DiffResult {
let context = context_lines.unwrap_or(4) as usize;
generate_diff_impl(&old_content, &new_content, context)
}
fn generate_diff_impl(old_content: &str, new_content: &str, context_lines: usize) -> DiffResult {
let old_lines: Vec<&str> = old_content.split('\n').collect();
let new_lines: Vec<&str> = new_content.split('\n').collect();
let max_line_num = old_lines.len().max(new_lines.len());
let line_num_width = if max_line_num == 0 {
1
} else {
max_line_num.to_string().len()
};
// Use similar crate for diffing
let diff = similar::TextDiff::configure()
.algorithm(similar::Algorithm::Myers)
.diff_lines(old_content, new_content);
let mut output: Vec<String> = Vec::new();
let mut old_line_num: usize = 1;
let mut new_line_num: usize = 1;
let mut last_was_change = false;
let mut first_changed_line: Option<i32> = None;
// Build parts from diff ops, matching the JS `diff` npm package structure
#[derive(Debug)]
enum PartTag {
Equal,
Added,
Removed,
}
struct Part {
tag: PartTag,
lines: Vec<String>,
}
let mut parts: Vec<Part> = Vec::new();
for op in diff.ops() {
match op {
similar::DiffOp::Equal { old_index, len, .. } => {
let lines: Vec<String> = old_lines[*old_index..*old_index + *len]
.iter()
.map(|s| s.to_string())
.collect();
parts.push(Part { tag: PartTag::Equal, lines });
}
similar::DiffOp::Delete { old_index, old_len, .. } => {
let lines: Vec<String> = old_lines[*old_index..*old_index + *old_len]
.iter()
.map(|s| s.to_string())
.collect();
parts.push(Part { tag: PartTag::Removed, lines });
}
similar::DiffOp::Insert { new_index, new_len, .. } => {
let lines: Vec<String> = new_lines[*new_index..*new_index + *new_len]
.iter()
.map(|s| s.to_string())
.collect();
parts.push(Part { tag: PartTag::Added, lines });
}
similar::DiffOp::Replace {
old_index, old_len, new_index, new_len, ..
} => {
let del_lines: Vec<String> = old_lines[*old_index..*old_index + *old_len]
.iter()
.map(|s| s.to_string())
.collect();
parts.push(Part { tag: PartTag::Removed, lines: del_lines });
let ins_lines: Vec<String> = new_lines[*new_index..*new_index + *new_len]
.iter()
.map(|s| s.to_string())
.collect();
parts.push(Part { tag: PartTag::Added, lines: ins_lines });
}
}
}
for (i, part) in parts.iter().enumerate() {
let raw = &part.lines;
match part.tag {
PartTag::Added | PartTag::Removed => {
if first_changed_line.is_none() {
first_changed_line = Some(new_line_num as i32);
}
for line in raw {
match part.tag {
PartTag::Added => {
let num = format!("{:>width$}", new_line_num, width = line_num_width);
output.push(format!("+{} {}", num, line));
new_line_num += 1;
}
PartTag::Removed => {
let num = format!("{:>width$}", old_line_num, width = line_num_width);
output.push(format!("-{} {}", num, line));
old_line_num += 1;
}
_ => unreachable!(),
}
}
last_was_change = true;
}
PartTag::Equal => {
let next_part_is_change = i < parts.len() - 1
&& matches!(parts[i + 1].tag, PartTag::Added | PartTag::Removed);
if last_was_change || next_part_is_change {
let mut lines_to_show = raw.as_slice();
let mut skip_start = 0usize;
let mut skip_end = 0usize;
if !last_was_change {
// Show only last N lines as leading context
skip_start = raw.len().saturating_sub(context_lines);
lines_to_show = &raw[skip_start..];
}
if !next_part_is_change && lines_to_show.len() > context_lines {
// Show only first N lines as trailing context
skip_end = lines_to_show.len() - context_lines;
lines_to_show = &lines_to_show[..context_lines];
}
if skip_start > 0 {
output.push(format!(
" {:>width$} ...",
"",
width = line_num_width
));
old_line_num += skip_start;
new_line_num += skip_start;
}
for line in lines_to_show {
let num = format!("{:>width$}", old_line_num, width = line_num_width);
output.push(format!(" {} {}", num, line));
old_line_num += 1;
new_line_num += 1;
}
if skip_end > 0 {
output.push(format!(
" {:>width$} ...",
"",
width = line_num_width
));
old_line_num += skip_end;
new_line_num += skip_end;
}
} else {
old_line_num += raw.len();
new_line_num += raw.len();
}
last_was_change = false;
}
}
}
DiffResult {
diff: output.join("\n"),
first_changed_line,
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_normalize_smart_quotes() {
let input = "\u{201C}hello\u{201D} \u{2018}world\u{2019}";
assert_eq!(normalize_impl(input), "\"hello\" 'world'");
}
#[test]
fn test_normalize_dashes() {
let input = "a\u{2013}b\u{2014}c\u{2212}d";
assert_eq!(normalize_impl(input), "a-b-c-d");
}
#[test]
fn test_normalize_special_spaces() {
let input = "a\u{00A0}b\u{2003}c\u{3000}d";
assert_eq!(normalize_impl(input), "a b c d");
}
#[test]
fn test_normalize_trailing_whitespace() {
let input = "hello \nworld ";
assert_eq!(normalize_impl(input), "hello\nworld");
}
#[test]
fn test_fuzzy_find_exact() {
let result = fuzzy_find_text("hello world".to_string(), "world".to_string());
assert!(result.found);
assert_eq!(result.index, 6);
assert_eq!(result.match_length, 5);
assert!(!result.used_fuzzy_match);
}
#[test]
fn test_fuzzy_find_with_smart_quotes() {
let content = "let x = \u{201C}hello\u{201D};".to_string();
let old_text = "let x = \"hello\";".to_string();
let result = fuzzy_find_text(content, old_text);
assert!(result.found);
assert!(result.used_fuzzy_match);
}
#[test]
fn test_fuzzy_find_not_found() {
let result = fuzzy_find_text("hello world".to_string(), "xyz".to_string());
assert!(!result.found);
assert_eq!(result.index, -1);
}
#[test]
fn test_generate_diff_basic() {
let old = "line1\nline2\nline3";
let new_text = "line1\nmodified\nline3";
let result = generate_diff_impl(old, new_text, 4);
assert!(result.diff.contains("-"));
assert!(result.diff.contains("+"));
assert!(result.diff.contains("line2"));
assert!(result.diff.contains("modified"));
assert!(result.first_changed_line.is_some());
}
#[test]
fn test_generate_diff_addition() {
let old = "line1\nline3";
let new_text = "line1\nline2\nline3";
let result = generate_diff_impl(old, new_text, 4);
assert!(result.diff.contains("+"));
assert!(result.diff.contains("line2"));
}
#[test]
fn test_generate_diff_deletion() {
let old = "line1\nline2\nline3";
let new_text = "line1\nline3";
let result = generate_diff_impl(old, new_text, 4);
assert!(result.diff.contains("-"));
assert!(result.diff.contains("line2"));
}
#[test]
fn test_generate_diff_context_ellipsis() {
let mut old_lines: Vec<String> = (1..=20).map(|i| format!("line{}", i)).collect();
let old = old_lines.join("\n");
old_lines[10] = "modified".to_string();
let new_text = old_lines.join("\n");
let result = generate_diff_impl(&old, &new_text, 2);
assert!(result.diff.contains("..."));
}
#[test]
fn test_generate_diff_empty() {
let result = generate_diff_impl("same", "same", 4);
assert!(result.diff.is_empty());
assert!(result.first_changed_line.is_none());
}
}

View file

@ -9,6 +9,7 @@
mod ast;
mod clipboard;
mod diff;
mod fd;
mod fs_cache;
mod glob;

View file

@ -0,0 +1,189 @@
import { test, describe } from "node:test";
import assert from "node:assert/strict";
import { createRequire } from "node:module";
import * as path from "node:path";
import { fileURLToPath } from "node:url";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const require = createRequire(import.meta.url);
// Load the native addon directly
const addonDir = path.resolve(
__dirname,
"..",
"..",
"..",
"..",
"native",
"addon",
);
const platformTag = `${process.platform}-${process.arch}`;
const candidates = [
path.join(addonDir, `gsd_engine.${platformTag}.node`),
path.join(addonDir, "gsd_engine.dev.node"),
];
let native;
for (const candidate of candidates) {
try {
native = require(candidate);
break;
} catch {
// try next
}
}
if (!native) {
console.error(
"Native addon not found. Run `npm run build:native -w @gsd/native` first.",
);
process.exit(1);
}
// ── normalizeForFuzzyMatch ──────────────────────────────────────────────
describe("normalizeForFuzzyMatch", () => {
test("strips trailing whitespace per line", () => {
assert.equal(native.normalizeForFuzzyMatch("hello \nworld "), "hello\nworld");
});
test("normalizes smart quotes to ASCII", () => {
assert.equal(
native.normalizeForFuzzyMatch("\u201Chello\u201D \u2018world\u2019"),
'"hello" \'world\'',
);
});
test("normalizes dashes to ASCII hyphen", () => {
assert.equal(native.normalizeForFuzzyMatch("a\u2013b\u2014c"), "a-b-c");
});
test("normalizes special spaces to regular space", () => {
assert.equal(native.normalizeForFuzzyMatch("a\u00A0b\u3000c"), "a b c");
});
test("handles empty string", () => {
assert.equal(native.normalizeForFuzzyMatch(""), "");
});
test("preserves leading whitespace", () => {
assert.equal(native.normalizeForFuzzyMatch(" hello "), " hello");
});
});
// ── fuzzyFindText ───────────────────────────────────────────────────────
describe("fuzzyFindText", () => {
test("finds exact match", () => {
const result = native.fuzzyFindText("hello world", "world");
assert.equal(result.found, true);
assert.equal(result.index, 6);
assert.equal(result.matchLength, 5);
assert.equal(result.usedFuzzyMatch, false);
assert.equal(result.contentForReplacement, "hello world");
});
test("finds fuzzy match with smart quotes", () => {
const content = 'let x = \u201Chello\u201D;';
const oldText = 'let x = "hello";';
const result = native.fuzzyFindText(content, oldText);
assert.equal(result.found, true);
assert.equal(result.usedFuzzyMatch, true);
});
test("returns not found for missing text", () => {
const result = native.fuzzyFindText("hello world", "xyz");
assert.equal(result.found, false);
assert.equal(result.index, -1);
assert.equal(result.matchLength, 0);
});
test("returns correct UTF-16 index for non-ASCII content", () => {
// Emoji U+1F600 is 2 UTF-16 code units (surrogate pair), 4 UTF-8 bytes
const content = "\u{1F600}hello";
const result = native.fuzzyFindText(content, "hello");
assert.equal(result.found, true);
// Emoji is 2 UTF-16 code units, so "hello" starts at index 2
assert.equal(result.index, 2);
assert.equal(result.matchLength, 5);
});
test("index is compatible with JS substring()", () => {
const content = "abc\u{1F600}def";
const result = native.fuzzyFindText(content, "def");
assert.equal(result.found, true);
// "abc" = 3, emoji = 2 UTF-16 code units → index 5
assert.equal(result.index, 5);
// Verify substring works correctly with the returned index
const extracted = result.contentForReplacement.substring(
result.index,
result.index + result.matchLength,
);
assert.equal(extracted, "def");
});
test("fuzzy match with trailing whitespace differences", () => {
const content = "hello \nworld ";
const oldText = "hello\nworld";
const result = native.fuzzyFindText(content, oldText);
assert.equal(result.found, true);
assert.equal(result.usedFuzzyMatch, true);
});
});
// ── generateDiff ────────────────────────────────────────────────────────
describe("generateDiff", () => {
test("generates diff for a line change", () => {
const old = "line1\nline2\nline3";
const newText = "line1\nmodified\nline3";
const result = native.generateDiff(old, newText);
assert.ok(result.diff.includes("line2"));
assert.ok(result.diff.includes("modified"));
assert.ok(result.diff.includes("-"));
assert.ok(result.diff.includes("+"));
assert.notEqual(result.firstChangedLine, null);
});
test("generates diff for an addition", () => {
const old = "line1\nline3";
const newText = "line1\nline2\nline3";
const result = native.generateDiff(old, newText);
assert.ok(result.diff.includes("+"));
assert.ok(result.diff.includes("line2"));
});
test("generates diff for a deletion", () => {
const old = "line1\nline2\nline3";
const newText = "line1\nline3";
const result = native.generateDiff(old, newText);
assert.ok(result.diff.includes("-"));
assert.ok(result.diff.includes("line2"));
});
test("returns empty diff for identical content", () => {
const result = native.generateDiff("same", "same");
assert.equal(result.diff, "");
// napi-rs maps Option::None to undefined (not null)
assert.equal(result.firstChangedLine, undefined);
});
test("respects context lines parameter", () => {
const lines = Array.from({ length: 20 }, (_, i) => `line${i + 1}`);
const old = lines.join("\n");
lines[10] = "modified";
const newText = lines.join("\n");
const result = native.generateDiff(old, newText, 2);
assert.ok(result.diff.includes("..."));
});
test("default context is 4 lines", () => {
const lines = Array.from({ length: 20 }, (_, i) => `line${i + 1}`);
const old = lines.join("\n");
lines[10] = "modified";
const newText = lines.join("\n");
const result = native.generateDiff(old, newText);
// Should show 4 context lines before and after
assert.ok(result.diff.length > 0);
});
});

View file

@ -0,0 +1,61 @@
/**
* Native fuzzy text matching and diff generation for the edit tool.
*
* Uses the `similar` Rust crate (Myers' algorithm) for O(n+d) diffing,
* and single-pass Unicode normalization for fuzzy matching.
*/
import { native } from "../native.js";
import type { DiffResult, FuzzyMatchResult } from "./types.js";
export type { DiffResult, FuzzyMatchResult };
/**
* Normalize text for fuzzy matching:
* - Strip trailing whitespace from each line
* - Smart quotes to ASCII equivalents
* - Unicode dashes/hyphens to ASCII hyphen
* - Special Unicode spaces to regular space
*/
export function normalizeForFuzzyMatch(text: string): string {
return (native as Record<string, Function>).normalizeForFuzzyMatch(
text,
) as string;
}
/**
* Find `oldText` in `content`, trying exact match first, then fuzzy match.
*
* When fuzzy matching is used, `contentForReplacement` is the normalized
* version of `content`.
*/
export function fuzzyFindText(
content: string,
oldText: string,
): FuzzyMatchResult {
return (native as Record<string, Function>).fuzzyFindText(
content,
oldText,
) as FuzzyMatchResult;
}
/**
* Generate a unified diff string with line numbers and context.
*
* Uses Myers' diff algorithm via the `similar` Rust crate.
*
* @param oldContent Original text
* @param newContent Modified text
* @param contextLines Number of context lines around changes (default: 4)
*/
export function generateDiff(
oldContent: string,
newContent: string,
contextLines?: number,
): DiffResult {
return (native as Record<string, Function>).generateDiff(
oldContent,
newContent,
contextLines,
) as DiffResult;
}

View file

@ -0,0 +1,24 @@
/** Result of fuzzy text matching (exact match tried first, then normalized). */
export interface FuzzyMatchResult {
/** Whether a match was found. */
found: boolean;
/** UTF-16 code unit index where the match starts (-1 if not found). */
index: number;
/** Length of the matched text in UTF-16 code units (0 if not found). */
matchLength: number;
/** Whether fuzzy (normalized) matching was used instead of exact. */
usedFuzzyMatch: boolean;
/**
* Content to use for replacement operations.
* Original content when exact match; normalized content when fuzzy match.
*/
contentForReplacement: string;
}
/** Result of unified diff generation. */
export interface DiffResult {
/** The unified diff string with line numbers. */
diff: string;
/** Line number of the first change in the new file (undefined if no changes). */
firstChangedLine: number | undefined;
}

View file

@ -74,6 +74,13 @@ export {
} from "./text/index.js";
export type { SliceResult, ExtractSegmentsResult } from "./text/index.js";
export {
normalizeForFuzzyMatch,
fuzzyFindText,
generateDiff,
} from "./diff/index.js";
export type { FuzzyMatchResult, DiffResult } from "./diff/index.js";
export { fuzzyFind } from "./fd/index.js";
export type {
FuzzyFindMatch,

View file

@ -87,6 +87,9 @@ export const native = loadNative() as {
sanitizeText: (text: string) => string;
visibleWidth: (text: string, tabWidth?: number) => number;
fuzzyFind: (options: unknown) => unknown;
normalizeForFuzzyMatch: (text: string) => string;
fuzzyFindText: (content: string, oldText: string) => unknown;
generateDiff: (oldContent: string, newContent: string, contextLines?: number) => unknown;
NativeImage: unknown;
ttsrCompileRules: (rules: unknown[]) => number;
ttsrCheckBuffer: (handle: number, buffer: string) => string[];

View file

@ -1,9 +1,16 @@
/**
* Shared diff computation utilities for the edit tool.
* Used by both edit.ts (for execution) and tool-execution.ts (for preview rendering).
*
* Hot-path functions (fuzzyFindText, normalizeForFuzzyMatch, generateDiffString)
* delegate to the native Rust engine for performance on large files.
*/
import * as Diff from "diff";
import {
fuzzyFindText as nativeFuzzyFindText,
generateDiff as nativeGenerateDiff,
normalizeForFuzzyMatch as nativeNormalizeForFuzzyMatch,
} from "@gsd/native";
import { constants } from "fs";
import { access, readFile } from "fs/promises";
import { resolveToCwd } from "./path-utils.js";
@ -25,32 +32,14 @@ export function restoreLineEndings(text: string, ending: "\r\n" | "\n"): string
}
/**
* Normalize text for fuzzy matching. Applies progressive transformations:
* Normalize text for fuzzy matching (native Rust implementation).
* - Strip trailing whitespace from each line
* - Normalize smart quotes to ASCII equivalents
* - Normalize Unicode dashes/hyphens to ASCII hyphen
* - Normalize special Unicode spaces to regular space
*/
export function normalizeForFuzzyMatch(text: string): string {
return (
text
// Strip trailing whitespace per line
.split("\n")
.map((line) => line.trimEnd())
.join("\n")
// Smart single quotes → '
.replace(/[\u2018\u2019\u201A\u201B]/g, "'")
// Smart double quotes → "
.replace(/[\u201C\u201D\u201E\u201F]/g, '"')
// Various dashes/hyphens → -
// U+2010 hyphen, U+2011 non-breaking hyphen, U+2012 figure dash,
// U+2013 en-dash, U+2014 em-dash, U+2015 horizontal bar, U+2212 minus
.replace(/[\u2010\u2011\u2012\u2013\u2014\u2015\u2212]/g, "-")
// Special spaces → regular space
// U+00A0 NBSP, U+2002-U+200A various spaces, U+202F narrow NBSP,
// U+205F medium math space, U+3000 ideographic space
.replace(/[\u00A0\u2002-\u200A\u202F\u205F\u3000]/g, " ")
);
return nativeNormalizeForFuzzyMatch(text);
}
export interface FuzzyMatchResult {
@ -70,49 +59,14 @@ export interface FuzzyMatchResult {
}
/**
* Find oldText in content, trying exact match first, then fuzzy match.
* Find oldText in content, trying exact match first, then fuzzy match
* (native Rust implementation).
*
* When fuzzy matching is used, the returned contentForReplacement is the
* fuzzy-normalized version of the content (trailing whitespace stripped,
* Unicode quotes/dashes normalized to ASCII).
* fuzzy-normalized version of the content.
*/
export function fuzzyFindText(content: string, oldText: string): FuzzyMatchResult {
// Try exact match first
const exactIndex = content.indexOf(oldText);
if (exactIndex !== -1) {
return {
found: true,
index: exactIndex,
matchLength: oldText.length,
usedFuzzyMatch: false,
contentForReplacement: content,
};
}
// Try fuzzy match - work entirely in normalized space
const fuzzyContent = normalizeForFuzzyMatch(content);
const fuzzyOldText = normalizeForFuzzyMatch(oldText);
const fuzzyIndex = fuzzyContent.indexOf(fuzzyOldText);
if (fuzzyIndex === -1) {
return {
found: false,
index: -1,
matchLength: 0,
usedFuzzyMatch: false,
contentForReplacement: content,
};
}
// When fuzzy matching, we work in the normalized space for replacement.
// This means the output will have normalized whitespace/quotes/dashes,
// which is acceptable since we're fixing minor formatting differences anyway.
return {
found: true,
index: fuzzyIndex,
matchLength: fuzzyOldText.length,
usedFuzzyMatch: true,
contentForReplacement: fuzzyContent,
};
return nativeFuzzyFindText(content, oldText);
}
/** Strip UTF-8 BOM if present, return both the BOM (if any) and the text without it */
@ -121,7 +75,9 @@ export function stripBom(content: string): { bom: string; text: string } {
}
/**
* Generate a unified diff string with line numbers and context.
* Generate a unified diff string with line numbers and context
* (native Rust implementation using Myers' algorithm via the `similar` crate).
*
* Returns both the diff string and the first changed line number (in the new file).
*/
export function generateDiffString(
@ -129,101 +85,11 @@ export function generateDiffString(
newContent: string,
contextLines = 4,
): { diff: string; firstChangedLine: number | undefined } {
const parts = Diff.diffLines(oldContent, newContent);
const output: string[] = [];
const oldLines = oldContent.split("\n");
const newLines = newContent.split("\n");
const maxLineNum = Math.max(oldLines.length, newLines.length);
const lineNumWidth = String(maxLineNum).length;
let oldLineNum = 1;
let newLineNum = 1;
let lastWasChange = false;
let firstChangedLine: number | undefined;
for (let i = 0; i < parts.length; i++) {
const part = parts[i];
const raw = part.value.split("\n");
if (raw[raw.length - 1] === "") {
raw.pop();
}
if (part.added || part.removed) {
// Capture the first changed line (in the new file)
if (firstChangedLine === undefined) {
firstChangedLine = newLineNum;
}
// Show the change
for (const line of raw) {
if (part.added) {
const lineNum = String(newLineNum).padStart(lineNumWidth, " ");
output.push(`+${lineNum} ${line}`);
newLineNum++;
} else {
// removed
const lineNum = String(oldLineNum).padStart(lineNumWidth, " ");
output.push(`-${lineNum} ${line}`);
oldLineNum++;
}
}
lastWasChange = true;
} else {
// Context lines - only show a few before/after changes
const nextPartIsChange = i < parts.length - 1 && (parts[i + 1].added || parts[i + 1].removed);
if (lastWasChange || nextPartIsChange) {
// Show context
let linesToShow = raw;
let skipStart = 0;
let skipEnd = 0;
if (!lastWasChange) {
// Show only last N lines as leading context
skipStart = Math.max(0, raw.length - contextLines);
linesToShow = raw.slice(skipStart);
}
if (!nextPartIsChange && linesToShow.length > contextLines) {
// Show only first N lines as trailing context
skipEnd = linesToShow.length - contextLines;
linesToShow = linesToShow.slice(0, contextLines);
}
// Add ellipsis if we skipped lines at start
if (skipStart > 0) {
output.push(` ${"".padStart(lineNumWidth, " ")} ...`);
// Update line numbers for the skipped leading context
oldLineNum += skipStart;
newLineNum += skipStart;
}
for (const line of linesToShow) {
const lineNum = String(oldLineNum).padStart(lineNumWidth, " ");
output.push(` ${lineNum} ${line}`);
oldLineNum++;
newLineNum++;
}
// Add ellipsis if we skipped lines at end
if (skipEnd > 0) {
output.push(` ${"".padStart(lineNumWidth, " ")} ...`);
// Update line numbers for the skipped trailing context
oldLineNum += skipEnd;
newLineNum += skipEnd;
}
} else {
// Skip these context lines entirely
oldLineNum += raw.length;
newLineNum += raw.length;
}
lastWasChange = false;
}
}
return { diff: output.join("\n"), firstChangedLine };
const result = nativeGenerateDiff(oldContent, newContent, contextLines);
return {
diff: result.diff,
firstChangedLine: result.firstChangedLine ?? undefined,
};
}
export interface EditDiffResult {