feat: add native glob and fs_cache modules with gitignore-aware discovery (#226)
Port glob, glob_util, and fs_cache modules from Oh My Pi's pi-natives crate, adapted for napi-rs v2. Provides gitignore-respecting filesystem discovery with a TTL-based scan cache, mtime sorting, file-type filtering, and node_modules exclusion. Includes a task module for async N-API work scheduling with cooperative cancellation (timeout-based), TypeScript type declarations and wrapper, and 12 integration tests covering pattern matching, recursion, gitignore, maxResults, sortByMtime, fileType filtering, and cache invalidation. Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c5bc8625a4
commit
c36c8bd0b0
13 changed files with 1341 additions and 2 deletions
66
native/Cargo.lock
generated
66
native/Cargo.lock
generated
|
|
@ -78,6 +78,20 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dashmap"
|
||||
version = "6.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
"hashbrown",
|
||||
"lock_api",
|
||||
"once_cell",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.15.0"
|
||||
|
|
@ -156,7 +170,10 @@ dependencies = [
|
|||
name = "gsd-engine"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"dashmap",
|
||||
"globset",
|
||||
"gsd-grep",
|
||||
"ignore",
|
||||
"libc",
|
||||
"napi",
|
||||
"napi-build",
|
||||
|
|
@ -174,6 +191,12 @@ dependencies = [
|
|||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
||||
|
||||
[[package]]
|
||||
name = "ignore"
|
||||
version = "0.4.25"
|
||||
|
|
@ -206,6 +229,15 @@ dependencies = [
|
|||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
|
||||
dependencies = [
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.29"
|
||||
|
|
@ -290,6 +322,19 @@ version = "1.21.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot_core"
|
||||
version = "0.9.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.106"
|
||||
|
|
@ -328,6 +373,15 @@ dependencies = [
|
|||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.5.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.12.3"
|
||||
|
|
@ -366,6 +420,12 @@ dependencies = [
|
|||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.27"
|
||||
|
|
@ -401,6 +461,12 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.15.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.117"
|
||||
|
|
|
|||
|
|
@ -12,6 +12,9 @@ crate-type = ["cdylib"]
|
|||
|
||||
[dependencies]
|
||||
gsd-grep = { path = "../grep" }
|
||||
dashmap = "6"
|
||||
globset = "0.4"
|
||||
ignore = "0.4"
|
||||
napi = { version = "2", features = ["napi8"] }
|
||||
napi-derive = "2"
|
||||
|
||||
|
|
|
|||
423
native/crates/engine/src/fs_cache.rs
Normal file
423
native/crates/engine/src/fs_cache.rs
Normal file
|
|
@ -0,0 +1,423 @@
|
|||
//! Shared filesystem scan cache for discovery tools (glob).
|
||||
//!
|
||||
//! Provides a TTL-based cache of scanned directory entries, with:
|
||||
//! - Global policy (no per-call TTL tuning)
|
||||
//! - Explicit invalidation for agent file mutations
|
||||
//! - Empty-result fast recheck to avoid stale negatives
|
||||
//!
|
||||
//! # Policy Configuration (environment overrides)
|
||||
//! - `FS_SCAN_CACHE_TTL_MS` – default `1000`
|
||||
//! - `FS_SCAN_EMPTY_RECHECK_MS` – default `200`
|
||||
//! - `FS_SCAN_CACHE_MAX_ENTRIES` – default `16`
|
||||
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
path::{Path, PathBuf},
|
||||
sync::LazyLock,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use dashmap::DashMap;
|
||||
use ignore::WalkBuilder;
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi_derive::napi;
|
||||
|
||||
use crate::task;
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Public types (re-exported by glob)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[napi]
|
||||
pub enum FileType {
|
||||
/// Regular file.
|
||||
File = 1,
|
||||
/// Directory.
|
||||
Dir = 2,
|
||||
/// Symbolic link.
|
||||
Symlink = 3,
|
||||
}
|
||||
|
||||
/// A single filesystem entry from a directory scan.
|
||||
#[derive(Clone)]
|
||||
#[napi(object)]
|
||||
pub struct GlobMatch {
|
||||
/// Relative path from the search root, using forward slashes.
|
||||
pub path: String,
|
||||
/// Resolved filesystem type for the match.
|
||||
#[napi(js_name = "fileType")]
|
||||
pub file_type: FileType,
|
||||
/// Modification time in milliseconds since Unix epoch (from
|
||||
/// `symlink_metadata`).
|
||||
pub mtime: Option<f64>,
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Cache policy
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
const DEFAULT_CACHE_TTL_MS: u64 = 1_000;
|
||||
const DEFAULT_EMPTY_RECHECK_MS: u64 = 200;
|
||||
const DEFAULT_MAX_CACHE_ENTRIES: usize = 16;
|
||||
|
||||
fn env_u64(name: &str, default: u64) -> u64 {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
fn env_usize(name: &str, default: usize) -> usize {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
/// Configured cache TTL in milliseconds.
|
||||
pub fn cache_ttl_ms() -> u64 {
|
||||
env_u64("FS_SCAN_CACHE_TTL_MS", DEFAULT_CACHE_TTL_MS)
|
||||
}
|
||||
|
||||
/// Configured empty-result recheck threshold in milliseconds.
|
||||
pub fn empty_recheck_ms() -> u64 {
|
||||
env_u64("FS_SCAN_EMPTY_RECHECK_MS", DEFAULT_EMPTY_RECHECK_MS)
|
||||
}
|
||||
|
||||
fn max_cache_entries() -> usize {
|
||||
env_usize("FS_SCAN_CACHE_MAX_ENTRIES", DEFAULT_MAX_CACHE_ENTRIES)
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Cache internals
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
|
||||
struct CacheKey {
|
||||
root: PathBuf,
|
||||
include_hidden: bool,
|
||||
use_gitignore: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct CacheEntry {
|
||||
created_at: Instant,
|
||||
entries: Vec<GlobMatch>,
|
||||
}
|
||||
|
||||
static FS_CACHE: LazyLock<DashMap<CacheKey, CacheEntry>> = LazyLock::new(DashMap::new);
|
||||
|
||||
/// Result of a cache-aware scan, including the age of the cached data.
|
||||
pub struct ScanResult {
|
||||
/// Scanned filesystem entries.
|
||||
pub entries: Vec<GlobMatch>,
|
||||
/// How old the cached data is in milliseconds (0 = freshly scanned).
|
||||
pub cache_age_ms: u64,
|
||||
}
|
||||
|
||||
fn evict_oldest() {
|
||||
let max = max_cache_entries();
|
||||
if FS_CACHE.len() > max {
|
||||
if let Some(oldest_key) = FS_CACHE
|
||||
.iter()
|
||||
.min_by_key(|entry| entry.value().created_at)
|
||||
.map(|entry| entry.key().clone())
|
||||
{
|
||||
FS_CACHE.remove(&oldest_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Path utilities
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/// Resolve a search path string to a canonical `PathBuf` (must be a directory).
|
||||
pub fn resolve_search_path(path: &str) -> Result<PathBuf> {
|
||||
let candidate = PathBuf::from(path);
|
||||
let root = if candidate.is_absolute() {
|
||||
candidate
|
||||
} else {
|
||||
let cwd = std::env::current_dir()
|
||||
.map_err(|err| Error::from_reason(format!("Failed to resolve cwd: {err}")))?;
|
||||
cwd.join(candidate)
|
||||
};
|
||||
let metadata = std::fs::metadata(&root)
|
||||
.map_err(|err| Error::from_reason(format!("Path not found: {err}")))?;
|
||||
if !metadata.is_dir() {
|
||||
return Err(Error::from_reason(
|
||||
"Search path must be a directory".to_string(),
|
||||
));
|
||||
}
|
||||
Ok(std::fs::canonicalize(&root).unwrap_or(root))
|
||||
}
|
||||
|
||||
/// Normalize a filesystem path to a forward-slash relative string.
|
||||
pub fn normalize_relative_path<'a>(root: &Path, path: &'a Path) -> Cow<'a, str> {
|
||||
let relative = path.strip_prefix(root).unwrap_or(path);
|
||||
if cfg!(windows) {
|
||||
let relative = relative.to_string_lossy();
|
||||
if relative.contains('\\') {
|
||||
Cow::Owned(relative.replace('\\', "/"))
|
||||
} else {
|
||||
relative
|
||||
}
|
||||
} else {
|
||||
relative.to_string_lossy()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains_component(path: &Path, target: &str) -> bool {
|
||||
path.components().any(|component| {
|
||||
component
|
||||
.as_os_str()
|
||||
.to_str()
|
||||
.is_some_and(|value| value == target)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn should_skip_path(path: &Path, mentions_node_modules: bool) -> bool {
|
||||
if contains_component(path, ".git") {
|
||||
return true;
|
||||
}
|
||||
if !mentions_node_modules && contains_component(path, "node_modules") {
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub fn classify_file_type(path: &Path) -> Option<(FileType, Option<f64>)> {
|
||||
let metadata = std::fs::symlink_metadata(path).ok()?;
|
||||
let file_type = metadata.file_type();
|
||||
let mtime_ms = metadata
|
||||
.modified()
|
||||
.ok()
|
||||
.and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
|
||||
.map(|d| d.as_millis() as f64);
|
||||
if file_type.is_symlink() {
|
||||
Some((FileType::Symlink, mtime_ms))
|
||||
} else if file_type.is_dir() {
|
||||
Some((FileType::Dir, mtime_ms))
|
||||
} else {
|
||||
Some((FileType::File, mtime_ms))
|
||||
}
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Walker + collection
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/// Builds a deterministic filesystem walker configured for visibility and
|
||||
/// ignore rules.
|
||||
pub fn build_walker(root: &Path, include_hidden: bool, use_gitignore: bool) -> WalkBuilder {
|
||||
let mut builder = WalkBuilder::new(root);
|
||||
builder
|
||||
.hidden(!include_hidden)
|
||||
.follow_links(false)
|
||||
.sort_by_file_path(|a, b| a.cmp(b));
|
||||
|
||||
if use_gitignore {
|
||||
builder
|
||||
.git_ignore(true)
|
||||
.git_exclude(true)
|
||||
.git_global(true)
|
||||
.ignore(true)
|
||||
.parents(true);
|
||||
} else {
|
||||
builder
|
||||
.git_ignore(false)
|
||||
.git_exclude(false)
|
||||
.git_global(false)
|
||||
.ignore(false)
|
||||
.parents(false);
|
||||
}
|
||||
|
||||
builder
|
||||
}
|
||||
|
||||
/// Scans filesystem entries and records normalized relative paths with file
|
||||
/// metadata.
|
||||
fn collect_entries(
|
||||
root: &Path,
|
||||
include_hidden: bool,
|
||||
use_gitignore: bool,
|
||||
ct: &task::CancelToken,
|
||||
) -> Result<Vec<GlobMatch>> {
|
||||
let builder = build_walker(root, include_hidden, use_gitignore);
|
||||
let mut entries = Vec::new();
|
||||
|
||||
for entry in builder.build() {
|
||||
ct.heartbeat()?;
|
||||
|
||||
let Ok(entry) = entry else { continue };
|
||||
let path = entry.path();
|
||||
if should_skip_path(path, true) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let relative = normalize_relative_path(root, path);
|
||||
if relative.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some((file_type, mtime)) = classify_file_type(path) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
entries.push(GlobMatch {
|
||||
path: relative.into_owned(),
|
||||
file_type,
|
||||
mtime,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Cache API
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/// Returns scanned entries using the global TTL cache policy.
|
||||
///
|
||||
/// The returned [`ScanResult::cache_age_ms`] lets callers implement
|
||||
/// empty-result fast recheck: if a query produces zero matches and the cache is
|
||||
/// older than [`empty_recheck_ms()`], call [`force_rescan`] before returning
|
||||
/// empty.
|
||||
pub fn get_or_scan(
|
||||
root: &Path,
|
||||
include_hidden: bool,
|
||||
use_gitignore: bool,
|
||||
ct: &task::CancelToken,
|
||||
) -> Result<ScanResult> {
|
||||
let ttl = cache_ttl_ms();
|
||||
if ttl == 0 {
|
||||
let entries = collect_entries(root, include_hidden, use_gitignore, ct)?;
|
||||
return Ok(ScanResult {
|
||||
entries,
|
||||
cache_age_ms: 0,
|
||||
});
|
||||
}
|
||||
|
||||
let key = CacheKey {
|
||||
root: root.to_path_buf(),
|
||||
include_hidden,
|
||||
use_gitignore,
|
||||
};
|
||||
|
||||
let now = Instant::now();
|
||||
if let Some(entry) = FS_CACHE.get(&key) {
|
||||
let age = now.duration_since(entry.created_at);
|
||||
if age < Duration::from_millis(ttl) {
|
||||
return Ok(ScanResult {
|
||||
entries: entry.entries.clone(),
|
||||
cache_age_ms: age.as_millis() as u64,
|
||||
});
|
||||
}
|
||||
drop(entry);
|
||||
FS_CACHE.remove(&key);
|
||||
}
|
||||
|
||||
let entries = collect_entries(root, include_hidden, use_gitignore, ct)?;
|
||||
FS_CACHE.insert(
|
||||
key,
|
||||
CacheEntry {
|
||||
created_at: now,
|
||||
entries: entries.clone(),
|
||||
},
|
||||
);
|
||||
evict_oldest();
|
||||
Ok(ScanResult {
|
||||
entries,
|
||||
cache_age_ms: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Force a fresh scan, replacing any existing cache entry.
|
||||
///
|
||||
/// When `store` is false, the fresh scan result is returned without
|
||||
/// repopulating the cache.
|
||||
pub fn force_rescan(
|
||||
root: &Path,
|
||||
include_hidden: bool,
|
||||
use_gitignore: bool,
|
||||
store: bool,
|
||||
ct: &task::CancelToken,
|
||||
) -> Result<Vec<GlobMatch>> {
|
||||
let key = CacheKey {
|
||||
root: root.to_path_buf(),
|
||||
include_hidden,
|
||||
use_gitignore,
|
||||
};
|
||||
FS_CACHE.remove(&key);
|
||||
|
||||
let entries = collect_entries(root, include_hidden, use_gitignore, ct)?;
|
||||
if store {
|
||||
let now = Instant::now();
|
||||
FS_CACHE.insert(
|
||||
key,
|
||||
CacheEntry {
|
||||
created_at: now,
|
||||
entries: entries.clone(),
|
||||
},
|
||||
);
|
||||
evict_oldest();
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Invalidation
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/// Invalidate cache entries whose root contains `target`.
|
||||
pub fn invalidate_path(target: &Path) {
|
||||
let keys_to_remove: Vec<CacheKey> = FS_CACHE
|
||||
.iter()
|
||||
.filter(|entry| target.starts_with(&entry.key().root))
|
||||
.map(|entry| entry.key().clone())
|
||||
.collect();
|
||||
for key in keys_to_remove {
|
||||
FS_CACHE.remove(&key);
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear the entire scan cache.
|
||||
pub fn invalidate_all() {
|
||||
FS_CACHE.clear();
|
||||
}
|
||||
|
||||
/// Invalidate the filesystem scan cache.
|
||||
///
|
||||
/// When called with a path, removes entries for roots containing that path.
|
||||
/// When called without a path, clears the entire cache.
|
||||
///
|
||||
/// Intended to be called after agent file mutations (write, edit, rename,
|
||||
/// delete).
|
||||
#[napi(js_name = "invalidateFsScanCache")]
|
||||
pub fn invalidate_fs_scan_cache(path: Option<String>) {
|
||||
match path {
|
||||
Some(p) => {
|
||||
let candidate = PathBuf::from(&p);
|
||||
let absolute = if candidate.is_absolute() {
|
||||
candidate
|
||||
} else if let Ok(cwd) = std::env::current_dir() {
|
||||
cwd.join(candidate)
|
||||
} else {
|
||||
PathBuf::from(&p)
|
||||
};
|
||||
let target = std::fs::canonicalize(&absolute)
|
||||
.or_else(|_| {
|
||||
absolute
|
||||
.parent()
|
||||
.and_then(|parent| std::fs::canonicalize(parent).ok())
|
||||
.and_then(|parent| absolute.file_name().map(|name| parent.join(name)))
|
||||
.ok_or_else(|| std::io::Error::from(std::io::ErrorKind::NotFound))
|
||||
})
|
||||
.unwrap_or(absolute);
|
||||
invalidate_path(&target);
|
||||
}
|
||||
None => invalidate_all(),
|
||||
}
|
||||
}
|
||||
275
native/crates/engine/src/glob.rs
Normal file
275
native/crates/engine/src/glob.rs
Normal file
|
|
@ -0,0 +1,275 @@
|
|||
//! Filesystem discovery with glob patterns, ignore semantics, and shared scan
|
||||
//! caching.
|
||||
//!
|
||||
//! # Overview
|
||||
//! Resolves a search root, obtains scanned entries via [`fs_cache`], applies
|
||||
//! glob matching plus optional file-type filtering, and optionally streams each
|
||||
//! accepted match through a callback.
|
||||
//!
|
||||
//! The walker always skips `.git`, and skips `node_modules` unless explicitly
|
||||
//! requested.
|
||||
//!
|
||||
//! # Example
|
||||
//! ```ignore
|
||||
//! // JS: await native.glob({ pattern: "*.rs", path: "." })
|
||||
//! ```
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use globset::GlobSet;
|
||||
use napi::{
|
||||
bindgen_prelude::*,
|
||||
threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode},
|
||||
};
|
||||
use napi_derive::napi;
|
||||
|
||||
pub use crate::fs_cache::{FileType, GlobMatch};
|
||||
use crate::{fs_cache, glob_util, task};
|
||||
|
||||
/// Input options for `glob`, including traversal, filtering, and cancellation.
|
||||
#[napi(object)]
|
||||
pub struct GlobOptions {
|
||||
/// Glob pattern to match (e.g., "*.ts").
|
||||
pub pattern: String,
|
||||
/// Directory to search.
|
||||
pub path: String,
|
||||
/// Filter by file type: "file", "dir", or "symlink". Symlinks are
|
||||
/// matched for file/dir filters based on their target type.
|
||||
#[napi(js_name = "fileType")]
|
||||
pub file_type: Option<FileType>,
|
||||
/// Match simple patterns recursively by default (`*.ts` -> recursive).
|
||||
pub recursive: Option<bool>,
|
||||
/// Include hidden files (default: false).
|
||||
pub hidden: Option<bool>,
|
||||
/// Maximum number of results to return.
|
||||
#[napi(js_name = "maxResults")]
|
||||
pub max_results: Option<u32>,
|
||||
/// Respect .gitignore files (default: true).
|
||||
pub gitignore: Option<bool>,
|
||||
/// Enable shared filesystem scan cache (default: false).
|
||||
pub cache: Option<bool>,
|
||||
/// Sort results by mtime (most recent first) before applying limit.
|
||||
#[napi(js_name = "sortByMtime")]
|
||||
pub sort_by_mtime: Option<bool>,
|
||||
/// Include `node_modules` entries when the pattern does not explicitly
|
||||
/// mention them.
|
||||
#[napi(js_name = "includeNodeModules")]
|
||||
pub include_node_modules: Option<bool>,
|
||||
/// Timeout in milliseconds for the operation.
|
||||
#[napi(js_name = "timeoutMs")]
|
||||
pub timeout_ms: Option<u32>,
|
||||
}
|
||||
|
||||
/// Result payload returned by a glob operation.
|
||||
#[napi(object)]
|
||||
pub struct GlobResult {
|
||||
/// Matched filesystem entries.
|
||||
pub matches: Vec<GlobMatch>,
|
||||
/// Number of returned matches (`matches.len()`), clamped to `u32::MAX`.
|
||||
pub total_matches: u32,
|
||||
}
|
||||
|
||||
/// Internal runtime config for a single glob execution.
|
||||
struct GlobConfig {
|
||||
root: std::path::PathBuf,
|
||||
pattern: String,
|
||||
recursive: bool,
|
||||
include_hidden: bool,
|
||||
file_type_filter: Option<FileType>,
|
||||
max_results: usize,
|
||||
use_gitignore: bool,
|
||||
mentions_node_modules: bool,
|
||||
sort_by_mtime: bool,
|
||||
use_cache: bool,
|
||||
}
|
||||
|
||||
fn resolve_symlink_target_type(root: &Path, relative_path: &str) -> Option<FileType> {
|
||||
let target_path = root.join(relative_path);
|
||||
let metadata = std::fs::metadata(target_path).ok()?;
|
||||
if metadata.is_dir() {
|
||||
Some(FileType::Dir)
|
||||
} else if metadata.is_file() {
|
||||
Some(FileType::File)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_file_type_filter(entry: &GlobMatch, config: &GlobConfig) -> Option<FileType> {
|
||||
let Some(filter) = config.file_type_filter else {
|
||||
return Some(entry.file_type);
|
||||
};
|
||||
if entry.file_type == filter {
|
||||
return Some(entry.file_type);
|
||||
}
|
||||
if entry.file_type != FileType::Symlink {
|
||||
return None;
|
||||
}
|
||||
match filter {
|
||||
FileType::File | FileType::Dir => {
|
||||
let resolved = resolve_symlink_target_type(&config.root, &entry.path)?;
|
||||
if resolved == filter {
|
||||
Some(resolved)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
FileType::Symlink => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Filter and collect matching entries from a pre-scanned list.
|
||||
fn filter_entries(
|
||||
entries: &[GlobMatch],
|
||||
glob_set: &GlobSet,
|
||||
config: &GlobConfig,
|
||||
on_match: Option<&ThreadsafeFunction<GlobMatch>>,
|
||||
ct: &task::CancelToken,
|
||||
) -> Result<Vec<GlobMatch>> {
|
||||
let mut matches = Vec::new();
|
||||
if config.max_results == 0 {
|
||||
return Ok(matches);
|
||||
}
|
||||
|
||||
for entry in entries {
|
||||
ct.heartbeat()?;
|
||||
if fs_cache::should_skip_path(Path::new(&entry.path), config.mentions_node_modules) {
|
||||
continue;
|
||||
}
|
||||
if !glob_set.is_match(&entry.path) {
|
||||
continue;
|
||||
}
|
||||
let Some(effective_file_type) = apply_file_type_filter(entry, config) else {
|
||||
continue;
|
||||
};
|
||||
let mut matched_entry = entry.clone();
|
||||
matched_entry.file_type = effective_file_type;
|
||||
if let Some(callback) = on_match {
|
||||
callback.call(
|
||||
Ok(matched_entry.clone()),
|
||||
ThreadsafeFunctionCallMode::NonBlocking,
|
||||
);
|
||||
}
|
||||
|
||||
matches.push(matched_entry);
|
||||
if !config.sort_by_mtime && matches.len() >= config.max_results {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(matches)
|
||||
}
|
||||
|
||||
/// Executes matching/filtering over scanned entries and optionally streams each
|
||||
/// hit.
|
||||
fn run_glob(
|
||||
config: GlobConfig,
|
||||
on_match: Option<&ThreadsafeFunction<GlobMatch>>,
|
||||
ct: task::CancelToken,
|
||||
) -> Result<GlobResult> {
|
||||
let glob_set = glob_util::compile_glob(&config.pattern, config.recursive)?;
|
||||
if config.max_results == 0 {
|
||||
return Ok(GlobResult {
|
||||
matches: Vec::new(),
|
||||
total_matches: 0,
|
||||
});
|
||||
}
|
||||
|
||||
let mut matches = if config.use_cache {
|
||||
let scan =
|
||||
fs_cache::get_or_scan(&config.root, config.include_hidden, config.use_gitignore, &ct)?;
|
||||
let mut matches = filter_entries(&scan.entries, &glob_set, &config, on_match, &ct)?;
|
||||
// Empty-result recheck: if we got zero matches from a cached scan that's old
|
||||
// enough, force a rescan and try once more before returning empty.
|
||||
if matches.is_empty() && scan.cache_age_ms >= fs_cache::empty_recheck_ms() {
|
||||
let fresh = fs_cache::force_rescan(
|
||||
&config.root,
|
||||
config.include_hidden,
|
||||
config.use_gitignore,
|
||||
true,
|
||||
&ct,
|
||||
)?;
|
||||
matches = filter_entries(&fresh, &glob_set, &config, on_match, &ct)?;
|
||||
}
|
||||
matches
|
||||
} else {
|
||||
let fresh = fs_cache::force_rescan(
|
||||
&config.root,
|
||||
config.include_hidden,
|
||||
config.use_gitignore,
|
||||
false,
|
||||
&ct,
|
||||
)?;
|
||||
filter_entries(&fresh, &glob_set, &config, on_match, &ct)?
|
||||
};
|
||||
|
||||
if config.sort_by_mtime {
|
||||
matches.sort_by(|a, b| {
|
||||
let a_mtime = a.mtime.unwrap_or(0.0);
|
||||
let b_mtime = b.mtime.unwrap_or(0.0);
|
||||
b_mtime
|
||||
.partial_cmp(&a_mtime)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
matches.truncate(config.max_results);
|
||||
}
|
||||
let total_matches = matches.len().min(u32::MAX as usize) as u32;
|
||||
Ok(GlobResult {
|
||||
matches,
|
||||
total_matches,
|
||||
})
|
||||
}
|
||||
|
||||
/// Find filesystem entries matching a glob pattern.
|
||||
///
|
||||
/// Resolves the search root, scans entries, applies glob and optional file-type
|
||||
/// filters, and optionally streams each accepted match through `on_match`.
|
||||
///
|
||||
/// If `sortByMtime` is enabled, all matching entries are collected, sorted by
|
||||
/// descending mtime, then truncated to `maxResults`.
|
||||
#[napi(js_name = "glob")]
|
||||
pub fn glob(
|
||||
options: GlobOptions,
|
||||
#[napi(ts_arg_type = "((match: GlobMatch) => void) | undefined | null")] on_match: Option<
|
||||
ThreadsafeFunction<GlobMatch>,
|
||||
>,
|
||||
) -> task::Async<GlobResult> {
|
||||
let GlobOptions {
|
||||
pattern,
|
||||
path,
|
||||
file_type,
|
||||
recursive,
|
||||
hidden,
|
||||
max_results,
|
||||
gitignore,
|
||||
sort_by_mtime,
|
||||
cache,
|
||||
include_node_modules,
|
||||
timeout_ms,
|
||||
} = options;
|
||||
|
||||
let pattern = pattern.trim();
|
||||
let pattern = if pattern.is_empty() { "*" } else { pattern };
|
||||
let pattern = pattern.to_string();
|
||||
|
||||
let ct = task::CancelToken::new(timeout_ms);
|
||||
|
||||
task::blocking("glob", ct, move |ct| {
|
||||
run_glob(
|
||||
GlobConfig {
|
||||
root: fs_cache::resolve_search_path(&path)?,
|
||||
include_hidden: hidden.unwrap_or(false),
|
||||
file_type_filter: file_type,
|
||||
recursive: recursive.unwrap_or(true),
|
||||
max_results: max_results.map_or(usize::MAX, |value| value as usize),
|
||||
use_gitignore: gitignore.unwrap_or(true),
|
||||
mentions_node_modules: include_node_modules
|
||||
.unwrap_or_else(|| pattern.contains("node_modules")),
|
||||
sort_by_mtime: sort_by_mtime.unwrap_or(false),
|
||||
use_cache: cache.unwrap_or(false),
|
||||
pattern,
|
||||
},
|
||||
on_match.as_ref(),
|
||||
ct,
|
||||
)
|
||||
})
|
||||
}
|
||||
109
native/crates/engine/src/glob_util.rs
Normal file
109
native/crates/engine/src/glob_util.rs
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
//! Shared glob-pattern helpers used by [`crate::glob`].
|
||||
|
||||
use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
|
||||
use napi::bindgen_prelude::*;
|
||||
|
||||
/// Normalize a raw glob string: fix path separators, optionally prepend `**/`
|
||||
/// for recursive matching, and close any unclosed `{` alternation groups.
|
||||
pub fn build_glob_pattern(glob: &str, recursive: bool) -> String {
|
||||
let normalized = glob.replace('\\', "/");
|
||||
let pattern = if !recursive || normalized.contains('/') || normalized.starts_with("**") {
|
||||
normalized
|
||||
} else {
|
||||
format!("**/{normalized}")
|
||||
};
|
||||
fix_unclosed_braces(pattern)
|
||||
}
|
||||
|
||||
/// Compile a glob pattern string into a [`GlobSet`].
|
||||
///
|
||||
/// When `recursive` is true, simple patterns (no path separators, no leading
|
||||
/// `**`) are automatically prefixed with `**/`.
|
||||
pub fn compile_glob(glob: &str, recursive: bool) -> Result<GlobSet> {
|
||||
let mut builder = GlobSetBuilder::new();
|
||||
let pattern = build_glob_pattern(glob, recursive);
|
||||
let glob = GlobBuilder::new(&pattern)
|
||||
.literal_separator(true)
|
||||
.build()
|
||||
.map_err(|err| Error::from_reason(format!("Invalid glob pattern: {err}")))?;
|
||||
builder.add(glob);
|
||||
builder
|
||||
.build()
|
||||
.map_err(|err| Error::from_reason(format!("Failed to build glob matcher: {err}")))
|
||||
}
|
||||
|
||||
/// Close unclosed `{` alternation groups in a glob pattern.
|
||||
///
|
||||
/// LLMs occasionally produce patterns like `*.{ts,js` without the closing `}`.
|
||||
/// Rather than failing, we append the missing braces.
|
||||
fn fix_unclosed_braces(pattern: String) -> String {
|
||||
let opens = pattern.chars().filter(|&c| c == '{').count();
|
||||
let closes = pattern.chars().filter(|&c| c == '}').count();
|
||||
if opens > closes {
|
||||
let mut fixed = pattern;
|
||||
for _ in 0..(opens - closes) {
|
||||
fixed.push('}');
|
||||
}
|
||||
fixed
|
||||
} else {
|
||||
pattern
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn simple_pattern_gets_recursive_prefix() {
|
||||
assert_eq!(build_glob_pattern("*.ts", true), "**/*.ts");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pattern_with_path_stays_as_is() {
|
||||
assert_eq!(build_glob_pattern("src/*.ts", true), "src/*.ts");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn already_recursive_pattern_unchanged() {
|
||||
assert_eq!(build_glob_pattern("**/*.rs", true), "**/*.rs");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_recursive_keeps_simple_pattern() {
|
||||
assert_eq!(build_glob_pattern("*.ts", false), "*.ts");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backslashes_normalized() {
|
||||
assert_eq!(build_glob_pattern("src\\**\\*.ts", true), "src/**/*.ts");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unclosed_brace_gets_closed() {
|
||||
assert_eq!(
|
||||
build_glob_pattern("*.{ts,tsx,js", true),
|
||||
"**/*.{ts,tsx,js}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deeply_unclosed_braces_all_closed() {
|
||||
assert_eq!(build_glob_pattern("{a,{b,c}", true), "**/{a,{b,c}}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn balanced_braces_unchanged() {
|
||||
assert_eq!(build_glob_pattern("*.{ts,js}", true), "**/*.{ts,js}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compile_glob_accepts_valid_pattern() {
|
||||
assert!(compile_glob("*.ts", true).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compile_glob_fixes_unclosed_brace() {
|
||||
assert!(compile_glob("*.{ts,tsx,js", true).is_ok());
|
||||
}
|
||||
}
|
||||
|
|
@ -8,5 +8,9 @@
|
|||
|
||||
#![allow(clippy::needless_pass_by_value)]
|
||||
|
||||
mod fs_cache;
|
||||
mod glob;
|
||||
mod glob_util;
|
||||
mod grep;
|
||||
mod ps;
|
||||
mod task;
|
||||
|
|
|
|||
107
native/crates/engine/src/task.rs
Normal file
107
native/crates/engine/src/task.rs
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
//! Blocking work scheduling for N-API exports.
|
||||
//!
|
||||
//! Runs CPU-bound or blocking Rust work on libuv's thread pool via napi's
|
||||
//! `Task` trait, with cooperative cancellation support.
|
||||
//!
|
||||
//! # Cancellation
|
||||
//! Pass a `CancelToken` to blocking tasks. Work must check
|
||||
//! `CancelToken::heartbeat()` periodically to respect cancellation.
|
||||
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use napi::{Env, Error, Result, Task, bindgen_prelude::*};
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Cancellation
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Token for cooperative cancellation of blocking work.
|
||||
///
|
||||
/// Call `heartbeat()` periodically inside long-running work to check for
|
||||
/// cancellation requests from timeouts.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct CancelToken {
|
||||
deadline: Option<Instant>,
|
||||
}
|
||||
|
||||
impl From<()> for CancelToken {
|
||||
fn from((): ()) -> Self {
|
||||
Self::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl CancelToken {
|
||||
/// Create a new cancel token from an optional timeout in milliseconds.
|
||||
pub fn new(timeout_ms: Option<u32>) -> Self {
|
||||
let mut result = Self::default();
|
||||
if let Some(timeout_ms) = timeout_ms {
|
||||
result.deadline = Some(Instant::now() + Duration::from_millis(timeout_ms as u64));
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Check if cancellation has been requested.
|
||||
///
|
||||
/// Returns `Ok(())` if work should continue, or an error if timed out.
|
||||
pub fn heartbeat(&self) -> Result<()> {
|
||||
if let Some(deadline) = self.deadline {
|
||||
if deadline < Instant::now() {
|
||||
return Err(Error::from_reason("Aborted: Timeout"));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Blocking Task - libuv thread pool integration
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Task that runs blocking work on libuv's thread pool.
|
||||
pub struct Blocking<T>
|
||||
where
|
||||
T: Send + 'static,
|
||||
{
|
||||
cancel_token: CancelToken,
|
||||
work: Option<Box<dyn FnOnce(CancelToken) -> Result<T> + Send>>,
|
||||
}
|
||||
|
||||
impl<T> Task for Blocking<T>
|
||||
where
|
||||
T: ToNapiValue + Send + 'static + TypeName,
|
||||
{
|
||||
type JsValue = T;
|
||||
type Output = T;
|
||||
|
||||
fn compute(&mut self) -> Result<Self::Output> {
|
||||
let work = self
|
||||
.work
|
||||
.take()
|
||||
.ok_or_else(|| Error::from_reason("BlockingTask: work already consumed"))?;
|
||||
work(self.cancel_token.clone())
|
||||
}
|
||||
|
||||
fn resolve(&mut self, _env: Env, output: Self::Output) -> Result<Self::JsValue> {
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
|
||||
pub type Async<T> = AsyncTask<Blocking<T>>;
|
||||
|
||||
/// Create an `AsyncTask` that runs blocking work on libuv's thread pool.
|
||||
///
|
||||
/// Returns `AsyncTask<Blocking<T>>` which becomes `Promise<T>` on the JS side.
|
||||
pub fn blocking<T, F>(
|
||||
_tag: &'static str,
|
||||
cancel_token: impl Into<CancelToken>,
|
||||
work: F,
|
||||
) -> AsyncTask<Blocking<T>>
|
||||
where
|
||||
F: FnOnce(CancelToken) -> Result<T> + Send + 'static,
|
||||
T: ToNapiValue + TypeName + Send + 'static,
|
||||
{
|
||||
AsyncTask::new(Blocking {
|
||||
cancel_token: cancel_token.into(),
|
||||
work: Some(Box::new(work)),
|
||||
})
|
||||
}
|
||||
|
|
@ -1,14 +1,14 @@
|
|||
{
|
||||
"name": "@gsd/native",
|
||||
"version": "0.1.0",
|
||||
"description": "Native Rust bindings for GSD \u2014 high-performance grep via N-API",
|
||||
"description": "Native Rust bindings for GSD — high-performance grep, glob, and process management via N-API",
|
||||
"type": "module",
|
||||
"main": "./src/index.ts",
|
||||
"types": "./src/index.ts",
|
||||
"scripts": {
|
||||
"build:native": "node ../../native/scripts/build.js",
|
||||
"build:native:dev": "node ../../native/scripts/build.js --dev",
|
||||
"test": "node --test src/__tests__/grep.test.mjs src/__tests__/ps.test.mjs"
|
||||
"test": "node --test src/__tests__/grep.test.mjs src/__tests__/ps.test.mjs src/__tests__/glob.test.mjs"
|
||||
},
|
||||
"exports": {
|
||||
".": {
|
||||
|
|
@ -22,6 +22,10 @@
|
|||
"./ps": {
|
||||
"types": "./src/ps/index.ts",
|
||||
"import": "./src/ps/index.ts"
|
||||
},
|
||||
"./glob": {
|
||||
"types": "./src/glob/index.ts",
|
||||
"import": "./src/glob/index.ts"
|
||||
}
|
||||
},
|
||||
"files": [
|
||||
|
|
|
|||
237
packages/native/src/__tests__/glob.test.mjs
Normal file
237
packages/native/src/__tests__/glob.test.mjs
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
import { test, describe } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { createRequire } from "node:module";
|
||||
import * as path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import * as fs from "node:fs";
|
||||
import * as os from "node:os";
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const require = createRequire(import.meta.url);
|
||||
|
||||
// Load the native addon directly
|
||||
const addonDir = path.resolve(
|
||||
__dirname,
|
||||
"..",
|
||||
"..",
|
||||
"..",
|
||||
"..",
|
||||
"native",
|
||||
"addon",
|
||||
);
|
||||
const platformTag = `${process.platform}-${process.arch}`;
|
||||
const candidates = [
|
||||
path.join(addonDir, `gsd_engine.${platformTag}.node`),
|
||||
path.join(addonDir, "gsd_engine.dev.node"),
|
||||
];
|
||||
|
||||
let native;
|
||||
for (const candidate of candidates) {
|
||||
try {
|
||||
native = require(candidate);
|
||||
break;
|
||||
} catch {
|
||||
// try next
|
||||
}
|
||||
}
|
||||
|
||||
if (!native) {
|
||||
console.error(
|
||||
"Native addon not found. Run `npm run build:native -w @gsd/native` first.",
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
describe("native glob: glob()", () => {
|
||||
test("finds files matching a pattern", async (t) => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
|
||||
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
|
||||
|
||||
fs.writeFileSync(path.join(tmpDir, "file1.ts"), "const a = 1;");
|
||||
fs.writeFileSync(path.join(tmpDir, "file2.ts"), "const b = 2;");
|
||||
fs.writeFileSync(path.join(tmpDir, "file3.js"), "const c = 3;");
|
||||
|
||||
const result = await native.glob({ pattern: "*.ts", path: tmpDir });
|
||||
|
||||
assert.equal(result.totalMatches, 2);
|
||||
assert.equal(result.matches.length, 2);
|
||||
const paths = result.matches.map((m) => m.path).sort();
|
||||
assert.deepEqual(paths, ["file1.ts", "file2.ts"]);
|
||||
});
|
||||
|
||||
test("recursive matching into subdirectories", async (t) => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
|
||||
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
|
||||
|
||||
fs.mkdirSync(path.join(tmpDir, "src"));
|
||||
fs.mkdirSync(path.join(tmpDir, "src", "nested"));
|
||||
fs.writeFileSync(path.join(tmpDir, "root.ts"), "");
|
||||
fs.writeFileSync(path.join(tmpDir, "src", "a.ts"), "");
|
||||
fs.writeFileSync(path.join(tmpDir, "src", "nested", "b.ts"), "");
|
||||
|
||||
const result = await native.glob({ pattern: "*.ts", path: tmpDir });
|
||||
|
||||
assert.equal(result.totalMatches, 3);
|
||||
const paths = result.matches.map((m) => m.path).sort();
|
||||
assert.ok(paths.includes("root.ts"));
|
||||
assert.ok(paths.includes("src/a.ts"));
|
||||
assert.ok(paths.includes("src/nested/b.ts"));
|
||||
});
|
||||
|
||||
test("respects maxResults limit", async (t) => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
|
||||
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
|
||||
|
||||
for (let i = 0; i < 10; i++) {
|
||||
fs.writeFileSync(path.join(tmpDir, `file${i}.txt`), "");
|
||||
}
|
||||
|
||||
const result = await native.glob({
|
||||
pattern: "*.txt",
|
||||
path: tmpDir,
|
||||
maxResults: 3,
|
||||
});
|
||||
|
||||
assert.equal(result.matches.length, 3);
|
||||
assert.equal(result.totalMatches, 3);
|
||||
});
|
||||
|
||||
test("filters by file type (directories only)", async (t) => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
|
||||
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
|
||||
|
||||
fs.mkdirSync(path.join(tmpDir, "dir1"));
|
||||
fs.mkdirSync(path.join(tmpDir, "dir2"));
|
||||
fs.writeFileSync(path.join(tmpDir, "file.txt"), "");
|
||||
|
||||
const result = await native.glob({
|
||||
pattern: "*",
|
||||
path: tmpDir,
|
||||
recursive: false,
|
||||
fileType: 2, // Dir
|
||||
});
|
||||
|
||||
assert.equal(result.totalMatches, 2);
|
||||
const paths = result.matches.map((m) => m.path).sort();
|
||||
assert.deepEqual(paths, ["dir1", "dir2"]);
|
||||
});
|
||||
|
||||
test("respects .gitignore", async (t) => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
|
||||
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
|
||||
|
||||
// Init a git repo so .gitignore is respected
|
||||
fs.mkdirSync(path.join(tmpDir, ".git"));
|
||||
fs.writeFileSync(path.join(tmpDir, ".gitignore"), "ignored.txt\n");
|
||||
fs.writeFileSync(path.join(tmpDir, "kept.txt"), "");
|
||||
fs.writeFileSync(path.join(tmpDir, "ignored.txt"), "");
|
||||
|
||||
const result = await native.glob({
|
||||
pattern: "*.txt",
|
||||
path: tmpDir,
|
||||
gitignore: true,
|
||||
});
|
||||
|
||||
assert.equal(result.totalMatches, 1);
|
||||
assert.equal(result.matches[0].path, "kept.txt");
|
||||
});
|
||||
|
||||
test("includes gitignored files when gitignore=false", async (t) => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
|
||||
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
|
||||
|
||||
fs.mkdirSync(path.join(tmpDir, ".git"));
|
||||
fs.writeFileSync(path.join(tmpDir, ".gitignore"), "ignored.txt\n");
|
||||
fs.writeFileSync(path.join(tmpDir, "kept.txt"), "");
|
||||
fs.writeFileSync(path.join(tmpDir, "ignored.txt"), "");
|
||||
|
||||
const result = await native.glob({
|
||||
pattern: "*.txt",
|
||||
path: tmpDir,
|
||||
gitignore: false,
|
||||
});
|
||||
|
||||
assert.equal(result.totalMatches, 2);
|
||||
});
|
||||
|
||||
test("skips node_modules by default", async (t) => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
|
||||
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
|
||||
|
||||
fs.mkdirSync(path.join(tmpDir, "node_modules"));
|
||||
fs.writeFileSync(path.join(tmpDir, "node_modules", "dep.js"), "");
|
||||
fs.writeFileSync(path.join(tmpDir, "app.js"), "");
|
||||
|
||||
const result = await native.glob({
|
||||
pattern: "*.js",
|
||||
path: tmpDir,
|
||||
gitignore: false,
|
||||
});
|
||||
|
||||
assert.equal(result.totalMatches, 1);
|
||||
assert.equal(result.matches[0].path, "app.js");
|
||||
});
|
||||
|
||||
test("sortByMtime returns most recent first", async (t) => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
|
||||
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
|
||||
|
||||
fs.writeFileSync(path.join(tmpDir, "old.txt"), "old");
|
||||
// Ensure different mtime
|
||||
const now = new Date();
|
||||
fs.utimesSync(
|
||||
path.join(tmpDir, "old.txt"),
|
||||
new Date(now.getTime() - 5000),
|
||||
new Date(now.getTime() - 5000),
|
||||
);
|
||||
fs.writeFileSync(path.join(tmpDir, "new.txt"), "new");
|
||||
|
||||
const result = await native.glob({
|
||||
pattern: "*.txt",
|
||||
path: tmpDir,
|
||||
sortByMtime: true,
|
||||
});
|
||||
|
||||
assert.equal(result.totalMatches, 2);
|
||||
assert.equal(result.matches[0].path, "new.txt");
|
||||
assert.equal(result.matches[1].path, "old.txt");
|
||||
});
|
||||
|
||||
test("errors on non-existent path", async () => {
|
||||
await assert.rejects(
|
||||
() =>
|
||||
native.glob({
|
||||
pattern: "*.txt",
|
||||
path: "/nonexistent/path/that/does/not/exist",
|
||||
}),
|
||||
/Path not found/,
|
||||
);
|
||||
});
|
||||
|
||||
test("returns mtime for each entry", async (t) => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
|
||||
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
|
||||
|
||||
fs.writeFileSync(path.join(tmpDir, "test.txt"), "content");
|
||||
|
||||
const result = await native.glob({ pattern: "*.txt", path: tmpDir });
|
||||
|
||||
assert.equal(result.matches.length, 1);
|
||||
assert.ok(typeof result.matches[0].mtime === "number");
|
||||
// mtime should be within the last minute
|
||||
const oneMinuteAgo = Date.now() - 60_000;
|
||||
assert.ok(result.matches[0].mtime > oneMinuteAgo);
|
||||
});
|
||||
});
|
||||
|
||||
describe("native glob: invalidateFsScanCache()", () => {
|
||||
test("can be called with a path", () => {
|
||||
// Should not throw
|
||||
native.invalidateFsScanCache("/tmp");
|
||||
});
|
||||
|
||||
test("can be called without arguments", () => {
|
||||
// Should not throw
|
||||
native.invalidateFsScanCache();
|
||||
});
|
||||
});
|
||||
44
packages/native/src/glob/index.ts
Normal file
44
packages/native/src/glob/index.ts
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
/**
|
||||
* Native glob module using N-API.
|
||||
*
|
||||
* Gitignore-respecting filesystem discovery backed by Rust's `ignore` and
|
||||
* `globset` crates, with an optional TTL-based scan cache for repeated queries.
|
||||
*/
|
||||
|
||||
import { native } from "../native.js";
|
||||
import type {
|
||||
GlobMatch,
|
||||
GlobOptions,
|
||||
GlobResult,
|
||||
} from "./types.js";
|
||||
|
||||
export type { FileType, GlobMatch, GlobOptions, GlobResult } from "./types.js";
|
||||
|
||||
/**
|
||||
* Find filesystem entries matching a glob pattern.
|
||||
*
|
||||
* Respects .gitignore by default. Skips `.git` and `node_modules` unless
|
||||
* the pattern explicitly mentions them.
|
||||
*
|
||||
* @param options - Glob search options (pattern, path, filters, etc.)
|
||||
* @param onMatch - Optional streaming callback invoked for each match.
|
||||
* @returns Promise resolving to matched entries.
|
||||
*/
|
||||
export function glob(
|
||||
options: GlobOptions,
|
||||
onMatch?: (match: GlobMatch) => void,
|
||||
): Promise<GlobResult> {
|
||||
return native.glob(options, onMatch) as Promise<GlobResult>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Invalidate the filesystem scan cache.
|
||||
*
|
||||
* Call after file mutations (write, edit, rename, delete) to ensure
|
||||
* subsequent glob queries see fresh data.
|
||||
*
|
||||
* @param path - Specific path to invalidate, or omit to clear all.
|
||||
*/
|
||||
export function invalidateFsScanCache(path?: string): void {
|
||||
native.invalidateFsScanCache(path);
|
||||
}
|
||||
53
packages/native/src/glob/types.ts
Normal file
53
packages/native/src/glob/types.ts
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
/** File type classification for filesystem entries. */
|
||||
export const enum FileType {
|
||||
/** Regular file. */
|
||||
File = 1,
|
||||
/** Directory. */
|
||||
Dir = 2,
|
||||
/** Symbolic link. */
|
||||
Symlink = 3,
|
||||
}
|
||||
|
||||
/** A single filesystem entry matched by a glob operation. */
|
||||
export interface GlobMatch {
|
||||
/** Relative path from the search root, using forward slashes. */
|
||||
path: string;
|
||||
/** Resolved filesystem type for the match. */
|
||||
fileType: FileType;
|
||||
/** Modification time in milliseconds since Unix epoch. */
|
||||
mtime: number | null;
|
||||
}
|
||||
|
||||
/** Options for the glob operation. */
|
||||
export interface GlobOptions {
|
||||
/** Glob pattern to match (e.g., "*.ts"). */
|
||||
pattern: string;
|
||||
/** Directory to search. */
|
||||
path: string;
|
||||
/** Filter by file type: File (1), Dir (2), or Symlink (3). */
|
||||
fileType?: FileType;
|
||||
/** Match simple patterns recursively by default (default: true). */
|
||||
recursive?: boolean;
|
||||
/** Include hidden files (default: false). */
|
||||
hidden?: boolean;
|
||||
/** Maximum number of results to return. */
|
||||
maxResults?: number;
|
||||
/** Respect .gitignore files (default: true). */
|
||||
gitignore?: boolean;
|
||||
/** Enable shared filesystem scan cache (default: false). */
|
||||
cache?: boolean;
|
||||
/** Sort results by mtime (most recent first) before applying limit. */
|
||||
sortByMtime?: boolean;
|
||||
/** Include node_modules entries (default: false, unless pattern mentions it). */
|
||||
includeNodeModules?: boolean;
|
||||
/** Timeout in milliseconds for the operation. */
|
||||
timeoutMs?: number;
|
||||
}
|
||||
|
||||
/** Result payload returned by a glob operation. */
|
||||
export interface GlobResult {
|
||||
/** Matched filesystem entries. */
|
||||
matches: GlobMatch[];
|
||||
/** Number of returned matches. */
|
||||
totalMatches: number;
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@
|
|||
* Modules:
|
||||
* - grep: ripgrep-backed regex search (content + filesystem)
|
||||
* - ps: cross-platform process tree management
|
||||
* - glob: gitignore-respecting filesystem discovery with scan caching
|
||||
*/
|
||||
|
||||
export { searchContent, grep } from "./grep/index.js";
|
||||
|
|
@ -23,3 +24,11 @@ export {
|
|||
processGroupId,
|
||||
killProcessGroup,
|
||||
} from "./ps/index.js";
|
||||
|
||||
export { glob, invalidateFsScanCache } from "./glob/index.js";
|
||||
export type {
|
||||
FileType,
|
||||
GlobMatch,
|
||||
GlobOptions,
|
||||
GlobResult,
|
||||
} from "./glob/index.js";
|
||||
|
|
|
|||
|
|
@ -47,4 +47,9 @@ export const native = loadNative() as {
|
|||
listDescendants: (pid: number) => number[];
|
||||
processGroupId: (pid: number) => number | null;
|
||||
killProcessGroup: (pgid: number, signal: number) => boolean;
|
||||
glob: (
|
||||
options: unknown,
|
||||
onMatch?: ((match: unknown) => void) | undefined | null,
|
||||
) => Promise<unknown>;
|
||||
invalidateFsScanCache: (path?: string) => void;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue