singularity-forge/rust-engine/crates/engine/src/edit.rs

794 lines
27 KiB
Rust

//! Fast atomic file edits with LSP-compatible position semantics.
//!
//! Applies an array of `TextEdit { range, newText }` to a file and writes the
//! result atomically (write to temp file in same directory, fsync, rename).
//! Edits are sorted in descending order by start position so positions remain
//! valid during application - there is no rope; one allocation, one splice
//! pass, one write.
//!
//! Position semantics match LSP: `line` is 0-based, `character` is the count
//! of UTF-16 code units from the line start.
use napi::{Error, Result, Status};
use napi_derive::napi;
use std::collections::HashSet;
use std::fs::{self, File, OpenOptions};
use std::io::Write;
use std::path::{Path, PathBuf};
#[napi(object)]
pub struct Position {
/// 0-based line number.
pub line: u32,
/// 0-based offset in UTF-16 code units from line start (LSP convention).
pub character: u32,
}
#[napi(object)]
pub struct Range {
pub start: Position,
pub end: Position,
}
#[napi(object)]
pub struct TextEdit {
pub range: Range,
#[napi(js_name = "newText")]
pub new_text: String,
}
#[napi(object)]
pub struct ApplyEditsOptions {
/// fsync the temp file and parent dir before/after rename. Default true.
pub fsync: Option<bool>,
}
#[napi(object)]
#[derive(Debug)]
pub struct ApplyEditsResult {
/// Number of edits applied.
#[napi(js_name = "editsApplied")]
pub edits_applied: u32,
/// Final file size in bytes after the write.
#[napi(js_name = "bytesWritten")]
pub bytes_written: u32,
}
#[napi(object)]
pub struct TextDocumentEdit {
#[napi(js_name = "filePath")]
pub file_path: String,
pub edits: Vec<TextEdit>,
}
#[napi(object)]
#[derive(Debug)]
pub struct WorkspaceEditFileResult {
#[napi(js_name = "filePath")]
pub file_path: String,
#[napi(js_name = "editsApplied")]
pub edits_applied: u32,
#[napi(js_name = "bytesWritten")]
pub bytes_written: u32,
}
#[napi(object)]
#[derive(Debug)]
pub struct ApplyWorkspaceEditResult {
#[napi(js_name = "filesChanged")]
pub files_changed: u32,
#[napi(js_name = "totalEditsApplied")]
pub total_edits_applied: u32,
pub files: Vec<WorkspaceEditFileResult>,
}
// ─── In-memory edit pipeline ──────────────────────────────────────────────
/// Apply `edits` to `original` bytes (which must be valid UTF-8) and return
/// the resulting bytes. `context` is a human-readable label (e.g. file path)
/// used only in error messages.
fn compute_new_bytes(original: &[u8], edits: &[TextEdit], context: &str) -> Result<Vec<u8>> {
let content = std::str::from_utf8(original).map_err(|e| {
Error::new(
Status::InvalidArg,
format!(
"{context}: file is not valid UTF-8 at byte {}: {e}",
e.valid_up_to()
),
)
})?;
if edits.is_empty() {
return Ok(original.to_vec());
}
let line_starts = compute_line_starts(content);
// Resolve each edit to (start_byte, end_byte, new_text).
let mut resolved: Vec<(usize, usize, String)> = Vec::with_capacity(edits.len());
for (idx, e) in edits.iter().enumerate() {
let start = position_to_byte(content, &line_starts, &e.range.start).ok_or_else(|| {
Error::new(
Status::InvalidArg,
format!(
"{context}: edit[{idx}]: start position line {} character {} is out of range",
e.range.start.line, e.range.start.character
),
)
})?;
let end = position_to_byte(content, &line_starts, &e.range.end).ok_or_else(|| {
Error::new(
Status::InvalidArg,
format!(
"{context}: edit[{idx}]: end position line {} character {} is out of range",
e.range.end.line, e.range.end.character
),
)
})?;
if start > end {
return Err(Error::new(
Status::InvalidArg,
format!("{context}: edit[{idx}]: start ({start}) > end ({end}) in byte offsets"),
));
}
resolved.push((start, end, e.new_text.clone()));
}
// Sort ascending by start to detect overlaps deterministically.
resolved.sort_by_key(|(s, _, _)| *s);
for w in resolved.windows(2) {
let (_, prev_end, _) = &w[0];
let (next_start, _, _) = &w[1];
if next_start < prev_end {
return Err(Error::new(
Status::InvalidArg,
format!(
"{context}: overlapping edits: prev ends at {prev_end}, next starts at {next_start}"
),
));
}
}
// Sort descending by start so we can splice from the back without
// invalidating earlier offsets.
resolved.sort_by(|a, b| b.0.cmp(&a.0));
let mut out = original.to_vec();
for (start, end, new_text) in &resolved {
out.splice(*start..*end, new_text.bytes());
}
Ok(out)
}
// ─── Atomic-write helpers ─────────────────────────────────────────────────
/// Return a sibling `.{name}.applyEdits.<pid>.<tid>` path for `final_path`.
fn make_tmp_path(final_path: &Path) -> std::io::Result<PathBuf> {
let parent = final_path.parent().ok_or_else(|| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
format!("path has no parent: {}", final_path.display()),
)
})?;
let file_name = final_path
.file_name()
.and_then(|s| s.to_str())
.ok_or_else(|| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
format!("path has no filename: {}", final_path.display()),
)
})?;
let tmp_name = format!(
".{file_name}.applyEdits.{}.{}",
std::process::id(),
thread_id()
);
Ok(parent.join(tmp_name))
}
/// Write `content` to `tmp_path`, optionally fsyncing before returning.
/// Does NOT rename; the caller is responsible for cleanup on error.
fn write_tmp(tmp_path: &Path, content: &[u8], do_fsync: bool) -> std::io::Result<()> {
let mut f = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(tmp_path)?;
f.write_all(content)?;
if do_fsync {
f.sync_all()?;
}
Ok(())
}
/// Atomic write: temp file in same dir → fsync → rename → fsync parent.
fn atomic_write(path: &Path, content: &[u8], do_fsync: bool) -> std::io::Result<()> {
let parent = path.parent().ok_or_else(|| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
format!("path has no parent: {}", path.display()),
)
})?;
let tmp_path = make_tmp_path(path)?;
write_tmp(&tmp_path, content, do_fsync)?;
// Rename is atomic on POSIX; on Windows it's atomic for files on the same
// volume, which the same-directory placement guarantees.
let rename_result = fs::rename(&tmp_path, path);
if rename_result.is_err() {
// Best-effort cleanup of the temp file before propagating.
let _ = fs::remove_file(&tmp_path);
return rename_result;
}
if do_fsync {
// fsync the directory so the rename hits the disk's filesystem journal,
// not just the page cache. Best-effort: not all platforms support
// directory fsync (Windows ignores it).
if let Ok(dir_fd) = File::open(parent) {
let _ = dir_fd.sync_all();
}
}
Ok(())
}
// ─── Public API ───────────────────────────────────────────────────────────
/// Apply LSP-style TextEdits to a file atomically.
///
/// Steps:
/// 1. Read file as UTF-8.
/// 2. Resolve every (line, character-utf16) pair to a byte offset.
/// 3. Validate that no two edits overlap.
/// 4. Sort edits in descending order by start byte.
/// 5. Splice into a single output Vec<u8>.
/// 6. Write to `<file>.tmp.<pid>`, fsync, atomic rename, fsync parent dir.
///
/// Errors:
/// - File missing or unreadable
/// - File is not valid UTF-8
/// - Any range references a line/character position that does not exist
/// - Any two edits overlap
/// - Write or rename fails
#[napi(js_name = "applyEdits")]
pub fn apply_edits(
file_path: String,
edits: Vec<TextEdit>,
options: Option<ApplyEditsOptions>,
) -> Result<ApplyEditsResult> {
let opts = options.unwrap_or(ApplyEditsOptions { fsync: None });
let do_fsync = opts.fsync.unwrap_or(true);
let path = PathBuf::from(&file_path);
let bytes = fs::read(&path)
.map_err(|e| Error::new(Status::GenericFailure, format!("read {file_path}: {e}")))?;
if edits.is_empty() {
return Ok(ApplyEditsResult {
edits_applied: 0,
bytes_written: bytes.len() as u32,
});
}
let out = compute_new_bytes(&bytes, &edits, &file_path)?;
atomic_write(&path, &out, do_fsync)
.map_err(|e| Error::new(Status::GenericFailure, format!("write {file_path}: {e}")))?;
Ok(ApplyEditsResult {
edits_applied: edits.len() as u32,
bytes_written: out.len() as u32,
})
}
/// Apply LSP-style WorkspaceEdit (multiple files) atomically using two-phase commit.
///
/// Phase 1 (validate + stage): for each file, read it, compute the new bytes,
/// write them to a sibling `.tmp` file with fsync. If ANY file fails (I/O,
/// UTF-8, overlap), all staged `.tmp` files are cleaned up and the originals
/// are left untouched.
///
/// Phase 2 (commit): rename every staged `.tmp` over its original. If a rename
/// fails partway, remaining `.tmp` files are cleaned up and an error is returned
/// that includes how many files were successfully renamed.
///
/// After all renames: fsync each unique parent directory once.
#[napi(js_name = "applyWorkspaceEdit")]
pub fn apply_workspace_edit(
document_edits: Vec<TextDocumentEdit>,
options: Option<ApplyEditsOptions>,
) -> Result<ApplyWorkspaceEditResult> {
if document_edits.is_empty() {
return Ok(ApplyWorkspaceEditResult {
files_changed: 0,
total_edits_applied: 0,
files: vec![],
});
}
let opts = options.unwrap_or(ApplyEditsOptions { fsync: None });
let do_fsync = opts.fsync.unwrap_or(true);
// ── Phase 1: validate + stage ────────────────────────────────────────
// staged[i] = (final_path, tmp_path, new_bytes, edits_applied)
let mut staged: Vec<(PathBuf, PathBuf, Vec<u8>, u32)> =
Vec::with_capacity(document_edits.len());
for doc_edit in &document_edits {
let path = PathBuf::from(&doc_edit.file_path);
let bytes = match fs::read(&path) {
Ok(b) => b,
Err(e) => {
// Cleanup already-staged tmps before returning.
for (_, tmp, _, _) in &staged {
let _ = fs::remove_file(tmp);
}
return Err(Error::new(
Status::GenericFailure,
format!("read {}: {e}", doc_edit.file_path),
));
}
};
let new_bytes = match compute_new_bytes(&bytes, &doc_edit.edits, &doc_edit.file_path) {
Ok(b) => b,
Err(e) => {
for (_, tmp, _, _) in &staged {
let _ = fs::remove_file(tmp);
}
return Err(e);
}
};
let tmp_path = match make_tmp_path(&path) {
Ok(p) => p,
Err(e) => {
for (_, tmp, _, _) in &staged {
let _ = fs::remove_file(tmp);
}
return Err(Error::new(
Status::GenericFailure,
format!("make_tmp_path {}: {e}", doc_edit.file_path),
));
}
};
if let Err(e) = write_tmp(&tmp_path, &new_bytes, do_fsync) {
let _ = fs::remove_file(&tmp_path);
for (_, tmp, _, _) in &staged {
let _ = fs::remove_file(tmp);
}
return Err(Error::new(
Status::GenericFailure,
format!("write_tmp {}: {e}", doc_edit.file_path),
));
}
let edits_applied = if doc_edit.edits.is_empty() {
0
} else {
doc_edit.edits.len() as u32
};
staged.push((path, tmp_path, new_bytes, edits_applied));
}
// ── Phase 2: commit ──────────────────────────────────────────────────
let mut file_results: Vec<WorkspaceEditFileResult> = Vec::with_capacity(staged.len());
for (succeeded, (final_path, tmp_path, new_bytes, edits_applied)) in staged.iter().enumerate() {
if let Err(e) = fs::rename(tmp_path, final_path) {
// Cleanup remaining staged tmps (including this one if rename failed
// before touching the original).
let _ = fs::remove_file(tmp_path);
for (_, remaining_tmp, _, _) in staged.iter().skip(succeeded + 1) {
let _ = fs::remove_file(remaining_tmp);
}
return Err(Error::new(
Status::GenericFailure,
format!(
"rename failed for {} after {succeeded} successful renames: {e}",
final_path.display()
),
));
}
file_results.push(WorkspaceEditFileResult {
file_path: final_path.to_string_lossy().into_owned(),
edits_applied: *edits_applied,
bytes_written: new_bytes.len() as u32,
});
}
// ── fsync parent directories (deduplicated) ──────────────────────────
if do_fsync {
let mut parents: HashSet<PathBuf> = HashSet::new();
for (final_path, _, _, _) in &staged {
if let Some(parent) = final_path.parent() {
parents.insert(parent.to_path_buf());
}
}
for parent in &parents {
if let Ok(dir_fd) = File::open(parent) {
let _ = dir_fd.sync_all();
}
}
}
let total_edits_applied: u32 = file_results.iter().map(|r| r.edits_applied).sum();
Ok(ApplyWorkspaceEditResult {
files_changed: file_results.len() as u32,
total_edits_applied,
files: file_results,
})
}
// ─── Private helpers ──────────────────────────────────────────────────────
/// Pre-compute byte offsets where each line begins (line 0 = byte 0; line N
/// begins at the byte after the (N-1)th '\n').
fn compute_line_starts(content: &str) -> Vec<usize> {
let mut starts = Vec::with_capacity(content.len() / 40 + 1);
starts.push(0);
for (i, c) in content.char_indices() {
if c == '\n' {
starts.push(i + 1);
}
}
starts
}
/// Convert an LSP `Position { line, character (UTF-16) }` to a byte offset
/// into `content`. Returns `None` if the line index is past EOF or the UTF-16
/// character offset is past the line's text.
fn position_to_byte(content: &str, line_starts: &[usize], pos: &Position) -> Option<usize> {
let line_idx = pos.line as usize;
if line_idx >= line_starts.len() {
return None;
}
let line_start = line_starts[line_idx];
let line_end = line_starts
.get(line_idx + 1)
.copied()
.unwrap_or(content.len());
let line_text = &content[line_start..line_end];
let mut utf16_units: u32 = 0;
for (offset_in_line, c) in line_text.char_indices() {
if c == '\n' || c == '\r' {
return if utf16_units == pos.character {
Some(line_start + offset_in_line)
} else {
None
};
}
if utf16_units >= pos.character {
return Some(line_start + offset_in_line);
}
utf16_units += c.len_utf16() as u32;
}
if utf16_units == pos.character {
Some(line_end)
} else {
None
}
}
fn thread_id() -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut h = DefaultHasher::new();
std::thread::current().id().hash(&mut h);
h.finish()
}
// ─── Tests ────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use std::io::Read;
use std::path::PathBuf;
fn tmp_file(content: &str) -> PathBuf {
use std::sync::atomic::{AtomicU64, Ordering};
static COUNTER: AtomicU64 = AtomicU64::new(0);
let dir = std::env::temp_dir();
let path = dir.join(format!(
"forge-edit-test-{}-{}-{}",
std::process::id(),
thread_id(),
COUNTER.fetch_add(1, Ordering::Relaxed),
));
fs::write(&path, content).unwrap();
path
}
fn read(path: &PathBuf) -> String {
let mut s = String::new();
File::open(path).unwrap().read_to_string(&mut s).unwrap();
s
}
fn pos(line: u32, character: u32) -> Position {
Position { line, character }
}
fn range(s_line: u32, s_char: u32, e_line: u32, e_char: u32) -> Range {
Range {
start: pos(s_line, s_char),
end: pos(e_line, e_char),
}
}
#[test]
fn replaces_single_word() {
let path = tmp_file("hello world\n");
let edits = vec![TextEdit {
range: range(0, 6, 0, 11),
new_text: "Earth".into(),
}];
let r = apply_edits(path.to_string_lossy().into(), edits, None).unwrap();
assert_eq!(r.edits_applied, 1);
assert_eq!(read(&path), "hello Earth\n");
fs::remove_file(&path).ok();
}
#[test]
fn applies_multiple_non_overlapping_edits() {
let path = tmp_file("aaa bbb ccc\n");
let edits = vec![
TextEdit {
range: range(0, 0, 0, 3),
new_text: "XXX".into(),
},
TextEdit {
range: range(0, 8, 0, 11),
new_text: "YYY".into(),
},
];
let r = apply_edits(path.to_string_lossy().into(), edits, None).unwrap();
assert_eq!(r.edits_applied, 2);
assert_eq!(read(&path), "XXX bbb YYY\n");
fs::remove_file(&path).ok();
}
#[test]
fn applies_multiline_edit() {
let path = tmp_file("line one\nline two\nline three\n");
let edits = vec![TextEdit {
range: range(0, 5, 2, 4),
new_text: "ONE\n_REPLACED_\nfour".into(),
}];
apply_edits(path.to_string_lossy().into(), edits, None).unwrap();
assert_eq!(read(&path), "line ONE\n_REPLACED_\nfour three\n");
fs::remove_file(&path).ok();
}
#[test]
fn rejects_overlapping_edits() {
let path = tmp_file("aaaaa\n");
let edits = vec![
TextEdit {
range: range(0, 0, 0, 3),
new_text: "X".into(),
},
TextEdit {
range: range(0, 2, 0, 5),
new_text: "Y".into(),
},
];
let err = apply_edits(path.to_string_lossy().into(), edits, None).unwrap_err();
assert!(err.reason.contains("overlapping"), "got: {}", err.reason);
// Original file untouched
assert_eq!(read(&path), "aaaaa\n");
fs::remove_file(&path).ok();
}
#[test]
fn rejects_out_of_range_line() {
let path = tmp_file("only one line\n");
let edits = vec![TextEdit {
range: range(99, 0, 99, 1),
new_text: "x".into(),
}];
let err = apply_edits(path.to_string_lossy().into(), edits, None).unwrap_err();
assert!(err.reason.contains("out of range"), "got: {}", err.reason);
fs::remove_file(&path).ok();
}
#[test]
fn handles_utf16_code_units_for_emoji() {
// 😀 is one Unicode codepoint but two UTF-16 code units (surrogate pair).
// LSP measures `character` in UTF-16 units, so character=2 should land
// just after the emoji.
let path = tmp_file("a😀b\n");
let edits = vec![TextEdit {
// Replace just the emoji: chars 1..3 (UTF-16 units, since emoji is 2)
range: range(0, 1, 0, 3),
new_text: "X".into(),
}];
apply_edits(path.to_string_lossy().into(), edits, None).unwrap();
assert_eq!(read(&path), "aXb\n");
fs::remove_file(&path).ok();
}
#[test]
fn empty_edits_no_op() {
let path = tmp_file("unchanged\n");
let r = apply_edits(path.to_string_lossy().into(), vec![], None).unwrap();
assert_eq!(r.edits_applied, 0);
assert_eq!(read(&path), "unchanged\n");
fs::remove_file(&path).ok();
}
#[test]
fn insertion_at_position() {
// Pure insertion: range start == range end
let path = tmp_file("ab\n");
let edits = vec![TextEdit {
range: range(0, 1, 0, 1),
new_text: "X".into(),
}];
apply_edits(path.to_string_lossy().into(), edits, None).unwrap();
assert_eq!(read(&path), "aXb\n");
fs::remove_file(&path).ok();
}
#[test]
fn append_after_eof() {
let path = tmp_file("first line\nsecond line");
let edits = vec![TextEdit {
range: range(1, 11, 1, 11),
new_text: "\nthird line".into(),
}];
apply_edits(path.to_string_lossy().into(), edits, None).unwrap();
assert_eq!(read(&path), "first line\nsecond line\nthird line");
fs::remove_file(&path).ok();
}
#[test]
fn allows_disabling_fsync_for_speed() {
let path = tmp_file("hi\n");
let edits = vec![TextEdit {
range: range(0, 0, 0, 2),
new_text: "yo".into(),
}];
apply_edits(
path.to_string_lossy().into(),
edits,
Some(ApplyEditsOptions { fsync: Some(false) }),
)
.unwrap();
assert_eq!(read(&path), "yo\n");
fs::remove_file(&path).ok();
}
// ── workspace_edit tests ──────────────────────────────────────────────
#[test]
fn workspace_edit_applies_to_two_files() {
let path_a = tmp_file("alpha beta\n");
let path_b = tmp_file("foo bar\n");
let doc_edits = vec![
TextDocumentEdit {
file_path: path_a.to_string_lossy().into_owned(),
edits: vec![TextEdit {
range: range(0, 6, 0, 10),
new_text: "gamma".into(),
}],
},
TextDocumentEdit {
file_path: path_b.to_string_lossy().into_owned(),
edits: vec![TextEdit {
range: range(0, 4, 0, 7),
new_text: "baz".into(),
}],
},
];
let r = apply_workspace_edit(doc_edits, None).unwrap();
assert_eq!(r.files_changed, 2);
assert_eq!(r.total_edits_applied, 2);
assert_eq!(read(&path_a), "alpha gamma\n");
assert_eq!(read(&path_b), "foo baz\n");
fs::remove_file(&path_a).ok();
fs::remove_file(&path_b).ok();
}
#[test]
fn workspace_edit_rolls_back_on_phase1_error() {
let path_a = tmp_file("good file\n");
let path_b = tmp_file("aaaaa\n");
let path_c = tmp_file("another good file\n");
// path_b has overlapping edits — should cause Phase 1 failure.
let doc_edits = vec![
TextDocumentEdit {
file_path: path_a.to_string_lossy().into_owned(),
edits: vec![TextEdit {
range: range(0, 0, 0, 4),
new_text: "nice".into(),
}],
},
TextDocumentEdit {
file_path: path_b.to_string_lossy().into_owned(),
edits: vec![
TextEdit {
range: range(0, 0, 0, 3),
new_text: "X".into(),
},
TextEdit {
range: range(0, 2, 0, 5),
new_text: "Y".into(),
},
],
},
TextDocumentEdit {
file_path: path_c.to_string_lossy().into_owned(),
edits: vec![TextEdit {
range: range(0, 8, 0, 12),
new_text: "great".into(),
}],
},
];
let err = apply_workspace_edit(doc_edits, None).unwrap_err();
assert!(err.reason.contains("overlapping"), "got: {}", err.reason);
// All originals must be untouched.
assert_eq!(read(&path_a), "good file\n");
assert_eq!(read(&path_b), "aaaaa\n");
assert_eq!(read(&path_c), "another good file\n");
// No leftover .tmp files.
let tmp_a = make_tmp_path(&path_a).unwrap();
let tmp_b = make_tmp_path(&path_b).unwrap();
let tmp_c = make_tmp_path(&path_c).unwrap();
assert!(!tmp_a.exists(), "stale tmp left behind for path_a");
assert!(!tmp_b.exists(), "stale tmp left behind for path_b");
assert!(!tmp_c.exists(), "stale tmp left behind for path_c");
fs::remove_file(&path_a).ok();
fs::remove_file(&path_b).ok();
fs::remove_file(&path_c).ok();
}
#[test]
fn workspace_edit_empty_input() {
let r = apply_workspace_edit(vec![], None).unwrap();
assert_eq!(r.files_changed, 0);
assert_eq!(r.total_edits_applied, 0);
assert!(r.files.is_empty());
}
#[test]
fn workspace_edit_handles_missing_file() {
let missing = std::env::temp_dir().join("forge-edit-test-nonexistent-99999999.txt");
// Ensure it really doesn't exist.
let _ = fs::remove_file(&missing);
let doc_edits = vec![TextDocumentEdit {
file_path: missing.to_string_lossy().into_owned(),
edits: vec![TextEdit {
range: range(0, 0, 0, 1),
new_text: "x".into(),
}],
}];
let err = apply_workspace_edit(doc_edits, None).unwrap_err();
// Error must mention the path.
assert!(
err.reason.contains("forge-edit-test-nonexistent-99999999"),
"error does not mention path: {}",
err.reason
);
}
}