feat: add native glob and fs_cache modules with gitignore-aware discovery (#226)

Port glob, glob_util, and fs_cache modules from Oh My Pi's pi-natives crate,
adapted for napi-rs v2. Provides gitignore-respecting filesystem discovery
with a TTL-based scan cache, mtime sorting, file-type filtering, and
node_modules exclusion.

Includes a task module for async N-API work scheduling with cooperative
cancellation (timeout-based), TypeScript type declarations and wrapper,
and 12 integration tests covering pattern matching, recursion, gitignore,
maxResults, sortByMtime, fileType filtering, and cache invalidation.

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
TÂCHES 2026-03-13 12:45:56 -06:00 committed by GitHub
parent c5bc8625a4
commit c36c8bd0b0
13 changed files with 1341 additions and 2 deletions

66
native/Cargo.lock generated
View file

@ -78,6 +78,20 @@ dependencies = [
"syn",
]
[[package]]
name = "dashmap"
version = "6.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
dependencies = [
"cfg-if",
"crossbeam-utils",
"hashbrown",
"lock_api",
"once_cell",
"parking_lot_core",
]
[[package]]
name = "either"
version = "1.15.0"
@ -156,7 +170,10 @@ dependencies = [
name = "gsd-engine"
version = "0.1.0"
dependencies = [
"dashmap",
"globset",
"gsd-grep",
"ignore",
"libc",
"napi",
"napi-build",
@ -174,6 +191,12 @@ dependencies = [
"rayon",
]
[[package]]
name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
[[package]]
name = "ignore"
version = "0.4.25"
@ -206,6 +229,15 @@ dependencies = [
"windows-link",
]
[[package]]
name = "lock_api"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
dependencies = [
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.29"
@ -290,6 +322,19 @@ version = "1.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
[[package]]
name = "parking_lot_core"
version = "0.9.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-link",
]
[[package]]
name = "proc-macro2"
version = "1.0.106"
@ -328,6 +373,15 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "redox_syscall"
version = "0.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
dependencies = [
"bitflags",
]
[[package]]
name = "regex"
version = "1.12.3"
@ -366,6 +420,12 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "semver"
version = "1.0.27"
@ -401,6 +461,12 @@ dependencies = [
"syn",
]
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "syn"
version = "2.0.117"

View file

@ -12,6 +12,9 @@ crate-type = ["cdylib"]
[dependencies]
gsd-grep = { path = "../grep" }
dashmap = "6"
globset = "0.4"
ignore = "0.4"
napi = { version = "2", features = ["napi8"] }
napi-derive = "2"

View file

@ -0,0 +1,423 @@
//! Shared filesystem scan cache for discovery tools (glob).
//!
//! Provides a TTL-based cache of scanned directory entries, with:
//! - Global policy (no per-call TTL tuning)
//! - Explicit invalidation for agent file mutations
//! - Empty-result fast recheck to avoid stale negatives
//!
//! # Policy Configuration (environment overrides)
//! - `FS_SCAN_CACHE_TTL_MS` default `1000`
//! - `FS_SCAN_EMPTY_RECHECK_MS` default `200`
//! - `FS_SCAN_CACHE_MAX_ENTRIES` default `16`
use std::{
borrow::Cow,
path::{Path, PathBuf},
sync::LazyLock,
time::{Duration, Instant},
};
use dashmap::DashMap;
use ignore::WalkBuilder;
use napi::bindgen_prelude::*;
use napi_derive::napi;
use crate::task;
// ═══════════════════════════════════════════════════════════════════════════
// Public types (re-exported by glob)
// ═══════════════════════════════════════════════════════════════════════════
#[derive(Debug, PartialEq, Eq)]
#[napi]
pub enum FileType {
/// Regular file.
File = 1,
/// Directory.
Dir = 2,
/// Symbolic link.
Symlink = 3,
}
/// A single filesystem entry from a directory scan.
#[derive(Clone)]
#[napi(object)]
pub struct GlobMatch {
/// Relative path from the search root, using forward slashes.
pub path: String,
/// Resolved filesystem type for the match.
#[napi(js_name = "fileType")]
pub file_type: FileType,
/// Modification time in milliseconds since Unix epoch (from
/// `symlink_metadata`).
pub mtime: Option<f64>,
}
// ═══════════════════════════════════════════════════════════════════════════
// Cache policy
// ═══════════════════════════════════════════════════════════════════════════
const DEFAULT_CACHE_TTL_MS: u64 = 1_000;
const DEFAULT_EMPTY_RECHECK_MS: u64 = 200;
const DEFAULT_MAX_CACHE_ENTRIES: usize = 16;
fn env_u64(name: &str, default: u64) -> u64 {
std::env::var(name)
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(default)
}
fn env_usize(name: &str, default: usize) -> usize {
std::env::var(name)
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(default)
}
/// Configured cache TTL in milliseconds.
pub fn cache_ttl_ms() -> u64 {
env_u64("FS_SCAN_CACHE_TTL_MS", DEFAULT_CACHE_TTL_MS)
}
/// Configured empty-result recheck threshold in milliseconds.
pub fn empty_recheck_ms() -> u64 {
env_u64("FS_SCAN_EMPTY_RECHECK_MS", DEFAULT_EMPTY_RECHECK_MS)
}
fn max_cache_entries() -> usize {
env_usize("FS_SCAN_CACHE_MAX_ENTRIES", DEFAULT_MAX_CACHE_ENTRIES)
}
// ═══════════════════════════════════════════════════════════════════════════
// Cache internals
// ═══════════════════════════════════════════════════════════════════════════
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
struct CacheKey {
root: PathBuf,
include_hidden: bool,
use_gitignore: bool,
}
#[derive(Clone)]
struct CacheEntry {
created_at: Instant,
entries: Vec<GlobMatch>,
}
static FS_CACHE: LazyLock<DashMap<CacheKey, CacheEntry>> = LazyLock::new(DashMap::new);
/// Result of a cache-aware scan, including the age of the cached data.
pub struct ScanResult {
/// Scanned filesystem entries.
pub entries: Vec<GlobMatch>,
/// How old the cached data is in milliseconds (0 = freshly scanned).
pub cache_age_ms: u64,
}
fn evict_oldest() {
let max = max_cache_entries();
if FS_CACHE.len() > max {
if let Some(oldest_key) = FS_CACHE
.iter()
.min_by_key(|entry| entry.value().created_at)
.map(|entry| entry.key().clone())
{
FS_CACHE.remove(&oldest_key);
}
}
}
// ═══════════════════════════════════════════════════════════════════════════
// Path utilities
// ═══════════════════════════════════════════════════════════════════════════
/// Resolve a search path string to a canonical `PathBuf` (must be a directory).
pub fn resolve_search_path(path: &str) -> Result<PathBuf> {
let candidate = PathBuf::from(path);
let root = if candidate.is_absolute() {
candidate
} else {
let cwd = std::env::current_dir()
.map_err(|err| Error::from_reason(format!("Failed to resolve cwd: {err}")))?;
cwd.join(candidate)
};
let metadata = std::fs::metadata(&root)
.map_err(|err| Error::from_reason(format!("Path not found: {err}")))?;
if !metadata.is_dir() {
return Err(Error::from_reason(
"Search path must be a directory".to_string(),
));
}
Ok(std::fs::canonicalize(&root).unwrap_or(root))
}
/// Normalize a filesystem path to a forward-slash relative string.
pub fn normalize_relative_path<'a>(root: &Path, path: &'a Path) -> Cow<'a, str> {
let relative = path.strip_prefix(root).unwrap_or(path);
if cfg!(windows) {
let relative = relative.to_string_lossy();
if relative.contains('\\') {
Cow::Owned(relative.replace('\\', "/"))
} else {
relative
}
} else {
relative.to_string_lossy()
}
}
pub fn contains_component(path: &Path, target: &str) -> bool {
path.components().any(|component| {
component
.as_os_str()
.to_str()
.is_some_and(|value| value == target)
})
}
pub fn should_skip_path(path: &Path, mentions_node_modules: bool) -> bool {
if contains_component(path, ".git") {
return true;
}
if !mentions_node_modules && contains_component(path, "node_modules") {
return true;
}
false
}
pub fn classify_file_type(path: &Path) -> Option<(FileType, Option<f64>)> {
let metadata = std::fs::symlink_metadata(path).ok()?;
let file_type = metadata.file_type();
let mtime_ms = metadata
.modified()
.ok()
.and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
.map(|d| d.as_millis() as f64);
if file_type.is_symlink() {
Some((FileType::Symlink, mtime_ms))
} else if file_type.is_dir() {
Some((FileType::Dir, mtime_ms))
} else {
Some((FileType::File, mtime_ms))
}
}
// ═══════════════════════════════════════════════════════════════════════════
// Walker + collection
// ═══════════════════════════════════════════════════════════════════════════
/// Builds a deterministic filesystem walker configured for visibility and
/// ignore rules.
pub fn build_walker(root: &Path, include_hidden: bool, use_gitignore: bool) -> WalkBuilder {
let mut builder = WalkBuilder::new(root);
builder
.hidden(!include_hidden)
.follow_links(false)
.sort_by_file_path(|a, b| a.cmp(b));
if use_gitignore {
builder
.git_ignore(true)
.git_exclude(true)
.git_global(true)
.ignore(true)
.parents(true);
} else {
builder
.git_ignore(false)
.git_exclude(false)
.git_global(false)
.ignore(false)
.parents(false);
}
builder
}
/// Scans filesystem entries and records normalized relative paths with file
/// metadata.
fn collect_entries(
root: &Path,
include_hidden: bool,
use_gitignore: bool,
ct: &task::CancelToken,
) -> Result<Vec<GlobMatch>> {
let builder = build_walker(root, include_hidden, use_gitignore);
let mut entries = Vec::new();
for entry in builder.build() {
ct.heartbeat()?;
let Ok(entry) = entry else { continue };
let path = entry.path();
if should_skip_path(path, true) {
continue;
}
let relative = normalize_relative_path(root, path);
if relative.is_empty() {
continue;
}
let Some((file_type, mtime)) = classify_file_type(path) else {
continue;
};
entries.push(GlobMatch {
path: relative.into_owned(),
file_type,
mtime,
});
}
Ok(entries)
}
// ═══════════════════════════════════════════════════════════════════════════
// Cache API
// ═══════════════════════════════════════════════════════════════════════════
/// Returns scanned entries using the global TTL cache policy.
///
/// The returned [`ScanResult::cache_age_ms`] lets callers implement
/// empty-result fast recheck: if a query produces zero matches and the cache is
/// older than [`empty_recheck_ms()`], call [`force_rescan`] before returning
/// empty.
pub fn get_or_scan(
root: &Path,
include_hidden: bool,
use_gitignore: bool,
ct: &task::CancelToken,
) -> Result<ScanResult> {
let ttl = cache_ttl_ms();
if ttl == 0 {
let entries = collect_entries(root, include_hidden, use_gitignore, ct)?;
return Ok(ScanResult {
entries,
cache_age_ms: 0,
});
}
let key = CacheKey {
root: root.to_path_buf(),
include_hidden,
use_gitignore,
};
let now = Instant::now();
if let Some(entry) = FS_CACHE.get(&key) {
let age = now.duration_since(entry.created_at);
if age < Duration::from_millis(ttl) {
return Ok(ScanResult {
entries: entry.entries.clone(),
cache_age_ms: age.as_millis() as u64,
});
}
drop(entry);
FS_CACHE.remove(&key);
}
let entries = collect_entries(root, include_hidden, use_gitignore, ct)?;
FS_CACHE.insert(
key,
CacheEntry {
created_at: now,
entries: entries.clone(),
},
);
evict_oldest();
Ok(ScanResult {
entries,
cache_age_ms: 0,
})
}
/// Force a fresh scan, replacing any existing cache entry.
///
/// When `store` is false, the fresh scan result is returned without
/// repopulating the cache.
pub fn force_rescan(
root: &Path,
include_hidden: bool,
use_gitignore: bool,
store: bool,
ct: &task::CancelToken,
) -> Result<Vec<GlobMatch>> {
let key = CacheKey {
root: root.to_path_buf(),
include_hidden,
use_gitignore,
};
FS_CACHE.remove(&key);
let entries = collect_entries(root, include_hidden, use_gitignore, ct)?;
if store {
let now = Instant::now();
FS_CACHE.insert(
key,
CacheEntry {
created_at: now,
entries: entries.clone(),
},
);
evict_oldest();
}
Ok(entries)
}
// ═══════════════════════════════════════════════════════════════════════════
// Invalidation
// ═══════════════════════════════════════════════════════════════════════════
/// Invalidate cache entries whose root contains `target`.
pub fn invalidate_path(target: &Path) {
let keys_to_remove: Vec<CacheKey> = FS_CACHE
.iter()
.filter(|entry| target.starts_with(&entry.key().root))
.map(|entry| entry.key().clone())
.collect();
for key in keys_to_remove {
FS_CACHE.remove(&key);
}
}
/// Clear the entire scan cache.
pub fn invalidate_all() {
FS_CACHE.clear();
}
/// Invalidate the filesystem scan cache.
///
/// When called with a path, removes entries for roots containing that path.
/// When called without a path, clears the entire cache.
///
/// Intended to be called after agent file mutations (write, edit, rename,
/// delete).
#[napi(js_name = "invalidateFsScanCache")]
pub fn invalidate_fs_scan_cache(path: Option<String>) {
match path {
Some(p) => {
let candidate = PathBuf::from(&p);
let absolute = if candidate.is_absolute() {
candidate
} else if let Ok(cwd) = std::env::current_dir() {
cwd.join(candidate)
} else {
PathBuf::from(&p)
};
let target = std::fs::canonicalize(&absolute)
.or_else(|_| {
absolute
.parent()
.and_then(|parent| std::fs::canonicalize(parent).ok())
.and_then(|parent| absolute.file_name().map(|name| parent.join(name)))
.ok_or_else(|| std::io::Error::from(std::io::ErrorKind::NotFound))
})
.unwrap_or(absolute);
invalidate_path(&target);
}
None => invalidate_all(),
}
}

View file

@ -0,0 +1,275 @@
//! Filesystem discovery with glob patterns, ignore semantics, and shared scan
//! caching.
//!
//! # Overview
//! Resolves a search root, obtains scanned entries via [`fs_cache`], applies
//! glob matching plus optional file-type filtering, and optionally streams each
//! accepted match through a callback.
//!
//! The walker always skips `.git`, and skips `node_modules` unless explicitly
//! requested.
//!
//! # Example
//! ```ignore
//! // JS: await native.glob({ pattern: "*.rs", path: "." })
//! ```
use std::path::Path;
use globset::GlobSet;
use napi::{
bindgen_prelude::*,
threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode},
};
use napi_derive::napi;
pub use crate::fs_cache::{FileType, GlobMatch};
use crate::{fs_cache, glob_util, task};
/// Input options for `glob`, including traversal, filtering, and cancellation.
#[napi(object)]
pub struct GlobOptions {
/// Glob pattern to match (e.g., "*.ts").
pub pattern: String,
/// Directory to search.
pub path: String,
/// Filter by file type: "file", "dir", or "symlink". Symlinks are
/// matched for file/dir filters based on their target type.
#[napi(js_name = "fileType")]
pub file_type: Option<FileType>,
/// Match simple patterns recursively by default (`*.ts` -> recursive).
pub recursive: Option<bool>,
/// Include hidden files (default: false).
pub hidden: Option<bool>,
/// Maximum number of results to return.
#[napi(js_name = "maxResults")]
pub max_results: Option<u32>,
/// Respect .gitignore files (default: true).
pub gitignore: Option<bool>,
/// Enable shared filesystem scan cache (default: false).
pub cache: Option<bool>,
/// Sort results by mtime (most recent first) before applying limit.
#[napi(js_name = "sortByMtime")]
pub sort_by_mtime: Option<bool>,
/// Include `node_modules` entries when the pattern does not explicitly
/// mention them.
#[napi(js_name = "includeNodeModules")]
pub include_node_modules: Option<bool>,
/// Timeout in milliseconds for the operation.
#[napi(js_name = "timeoutMs")]
pub timeout_ms: Option<u32>,
}
/// Result payload returned by a glob operation.
#[napi(object)]
pub struct GlobResult {
/// Matched filesystem entries.
pub matches: Vec<GlobMatch>,
/// Number of returned matches (`matches.len()`), clamped to `u32::MAX`.
pub total_matches: u32,
}
/// Internal runtime config for a single glob execution.
struct GlobConfig {
root: std::path::PathBuf,
pattern: String,
recursive: bool,
include_hidden: bool,
file_type_filter: Option<FileType>,
max_results: usize,
use_gitignore: bool,
mentions_node_modules: bool,
sort_by_mtime: bool,
use_cache: bool,
}
fn resolve_symlink_target_type(root: &Path, relative_path: &str) -> Option<FileType> {
let target_path = root.join(relative_path);
let metadata = std::fs::metadata(target_path).ok()?;
if metadata.is_dir() {
Some(FileType::Dir)
} else if metadata.is_file() {
Some(FileType::File)
} else {
None
}
}
fn apply_file_type_filter(entry: &GlobMatch, config: &GlobConfig) -> Option<FileType> {
let Some(filter) = config.file_type_filter else {
return Some(entry.file_type);
};
if entry.file_type == filter {
return Some(entry.file_type);
}
if entry.file_type != FileType::Symlink {
return None;
}
match filter {
FileType::File | FileType::Dir => {
let resolved = resolve_symlink_target_type(&config.root, &entry.path)?;
if resolved == filter {
Some(resolved)
} else {
None
}
}
FileType::Symlink => None,
}
}
/// Filter and collect matching entries from a pre-scanned list.
fn filter_entries(
entries: &[GlobMatch],
glob_set: &GlobSet,
config: &GlobConfig,
on_match: Option<&ThreadsafeFunction<GlobMatch>>,
ct: &task::CancelToken,
) -> Result<Vec<GlobMatch>> {
let mut matches = Vec::new();
if config.max_results == 0 {
return Ok(matches);
}
for entry in entries {
ct.heartbeat()?;
if fs_cache::should_skip_path(Path::new(&entry.path), config.mentions_node_modules) {
continue;
}
if !glob_set.is_match(&entry.path) {
continue;
}
let Some(effective_file_type) = apply_file_type_filter(entry, config) else {
continue;
};
let mut matched_entry = entry.clone();
matched_entry.file_type = effective_file_type;
if let Some(callback) = on_match {
callback.call(
Ok(matched_entry.clone()),
ThreadsafeFunctionCallMode::NonBlocking,
);
}
matches.push(matched_entry);
if !config.sort_by_mtime && matches.len() >= config.max_results {
break;
}
}
Ok(matches)
}
/// Executes matching/filtering over scanned entries and optionally streams each
/// hit.
fn run_glob(
config: GlobConfig,
on_match: Option<&ThreadsafeFunction<GlobMatch>>,
ct: task::CancelToken,
) -> Result<GlobResult> {
let glob_set = glob_util::compile_glob(&config.pattern, config.recursive)?;
if config.max_results == 0 {
return Ok(GlobResult {
matches: Vec::new(),
total_matches: 0,
});
}
let mut matches = if config.use_cache {
let scan =
fs_cache::get_or_scan(&config.root, config.include_hidden, config.use_gitignore, &ct)?;
let mut matches = filter_entries(&scan.entries, &glob_set, &config, on_match, &ct)?;
// Empty-result recheck: if we got zero matches from a cached scan that's old
// enough, force a rescan and try once more before returning empty.
if matches.is_empty() && scan.cache_age_ms >= fs_cache::empty_recheck_ms() {
let fresh = fs_cache::force_rescan(
&config.root,
config.include_hidden,
config.use_gitignore,
true,
&ct,
)?;
matches = filter_entries(&fresh, &glob_set, &config, on_match, &ct)?;
}
matches
} else {
let fresh = fs_cache::force_rescan(
&config.root,
config.include_hidden,
config.use_gitignore,
false,
&ct,
)?;
filter_entries(&fresh, &glob_set, &config, on_match, &ct)?
};
if config.sort_by_mtime {
matches.sort_by(|a, b| {
let a_mtime = a.mtime.unwrap_or(0.0);
let b_mtime = b.mtime.unwrap_or(0.0);
b_mtime
.partial_cmp(&a_mtime)
.unwrap_or(std::cmp::Ordering::Equal)
});
matches.truncate(config.max_results);
}
let total_matches = matches.len().min(u32::MAX as usize) as u32;
Ok(GlobResult {
matches,
total_matches,
})
}
/// Find filesystem entries matching a glob pattern.
///
/// Resolves the search root, scans entries, applies glob and optional file-type
/// filters, and optionally streams each accepted match through `on_match`.
///
/// If `sortByMtime` is enabled, all matching entries are collected, sorted by
/// descending mtime, then truncated to `maxResults`.
#[napi(js_name = "glob")]
pub fn glob(
options: GlobOptions,
#[napi(ts_arg_type = "((match: GlobMatch) => void) | undefined | null")] on_match: Option<
ThreadsafeFunction<GlobMatch>,
>,
) -> task::Async<GlobResult> {
let GlobOptions {
pattern,
path,
file_type,
recursive,
hidden,
max_results,
gitignore,
sort_by_mtime,
cache,
include_node_modules,
timeout_ms,
} = options;
let pattern = pattern.trim();
let pattern = if pattern.is_empty() { "*" } else { pattern };
let pattern = pattern.to_string();
let ct = task::CancelToken::new(timeout_ms);
task::blocking("glob", ct, move |ct| {
run_glob(
GlobConfig {
root: fs_cache::resolve_search_path(&path)?,
include_hidden: hidden.unwrap_or(false),
file_type_filter: file_type,
recursive: recursive.unwrap_or(true),
max_results: max_results.map_or(usize::MAX, |value| value as usize),
use_gitignore: gitignore.unwrap_or(true),
mentions_node_modules: include_node_modules
.unwrap_or_else(|| pattern.contains("node_modules")),
sort_by_mtime: sort_by_mtime.unwrap_or(false),
use_cache: cache.unwrap_or(false),
pattern,
},
on_match.as_ref(),
ct,
)
})
}

View file

@ -0,0 +1,109 @@
//! Shared glob-pattern helpers used by [`crate::glob`].
use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
use napi::bindgen_prelude::*;
/// Normalize a raw glob string: fix path separators, optionally prepend `**/`
/// for recursive matching, and close any unclosed `{` alternation groups.
pub fn build_glob_pattern(glob: &str, recursive: bool) -> String {
let normalized = glob.replace('\\', "/");
let pattern = if !recursive || normalized.contains('/') || normalized.starts_with("**") {
normalized
} else {
format!("**/{normalized}")
};
fix_unclosed_braces(pattern)
}
/// Compile a glob pattern string into a [`GlobSet`].
///
/// When `recursive` is true, simple patterns (no path separators, no leading
/// `**`) are automatically prefixed with `**/`.
pub fn compile_glob(glob: &str, recursive: bool) -> Result<GlobSet> {
let mut builder = GlobSetBuilder::new();
let pattern = build_glob_pattern(glob, recursive);
let glob = GlobBuilder::new(&pattern)
.literal_separator(true)
.build()
.map_err(|err| Error::from_reason(format!("Invalid glob pattern: {err}")))?;
builder.add(glob);
builder
.build()
.map_err(|err| Error::from_reason(format!("Failed to build glob matcher: {err}")))
}
/// Close unclosed `{` alternation groups in a glob pattern.
///
/// LLMs occasionally produce patterns like `*.{ts,js` without the closing `}`.
/// Rather than failing, we append the missing braces.
fn fix_unclosed_braces(pattern: String) -> String {
let opens = pattern.chars().filter(|&c| c == '{').count();
let closes = pattern.chars().filter(|&c| c == '}').count();
if opens > closes {
let mut fixed = pattern;
for _ in 0..(opens - closes) {
fixed.push('}');
}
fixed
} else {
pattern
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn simple_pattern_gets_recursive_prefix() {
assert_eq!(build_glob_pattern("*.ts", true), "**/*.ts");
}
#[test]
fn pattern_with_path_stays_as_is() {
assert_eq!(build_glob_pattern("src/*.ts", true), "src/*.ts");
}
#[test]
fn already_recursive_pattern_unchanged() {
assert_eq!(build_glob_pattern("**/*.rs", true), "**/*.rs");
}
#[test]
fn non_recursive_keeps_simple_pattern() {
assert_eq!(build_glob_pattern("*.ts", false), "*.ts");
}
#[test]
fn backslashes_normalized() {
assert_eq!(build_glob_pattern("src\\**\\*.ts", true), "src/**/*.ts");
}
#[test]
fn unclosed_brace_gets_closed() {
assert_eq!(
build_glob_pattern("*.{ts,tsx,js", true),
"**/*.{ts,tsx,js}"
);
}
#[test]
fn deeply_unclosed_braces_all_closed() {
assert_eq!(build_glob_pattern("{a,{b,c}", true), "**/{a,{b,c}}");
}
#[test]
fn balanced_braces_unchanged() {
assert_eq!(build_glob_pattern("*.{ts,js}", true), "**/*.{ts,js}");
}
#[test]
fn compile_glob_accepts_valid_pattern() {
assert!(compile_glob("*.ts", true).is_ok());
}
#[test]
fn compile_glob_fixes_unclosed_brace() {
assert!(compile_glob("*.{ts,tsx,js", true).is_ok());
}
}

View file

@ -8,5 +8,9 @@
#![allow(clippy::needless_pass_by_value)]
mod fs_cache;
mod glob;
mod glob_util;
mod grep;
mod ps;
mod task;

View file

@ -0,0 +1,107 @@
//! Blocking work scheduling for N-API exports.
//!
//! Runs CPU-bound or blocking Rust work on libuv's thread pool via napi's
//! `Task` trait, with cooperative cancellation support.
//!
//! # Cancellation
//! Pass a `CancelToken` to blocking tasks. Work must check
//! `CancelToken::heartbeat()` periodically to respect cancellation.
use std::time::{Duration, Instant};
use napi::{Env, Error, Result, Task, bindgen_prelude::*};
// ─────────────────────────────────────────────────────────────────────────────
// Cancellation
// ─────────────────────────────────────────────────────────────────────────────
/// Token for cooperative cancellation of blocking work.
///
/// Call `heartbeat()` periodically inside long-running work to check for
/// cancellation requests from timeouts.
#[derive(Clone, Default)]
pub struct CancelToken {
deadline: Option<Instant>,
}
impl From<()> for CancelToken {
fn from((): ()) -> Self {
Self::default()
}
}
impl CancelToken {
/// Create a new cancel token from an optional timeout in milliseconds.
pub fn new(timeout_ms: Option<u32>) -> Self {
let mut result = Self::default();
if let Some(timeout_ms) = timeout_ms {
result.deadline = Some(Instant::now() + Duration::from_millis(timeout_ms as u64));
}
result
}
/// Check if cancellation has been requested.
///
/// Returns `Ok(())` if work should continue, or an error if timed out.
pub fn heartbeat(&self) -> Result<()> {
if let Some(deadline) = self.deadline {
if deadline < Instant::now() {
return Err(Error::from_reason("Aborted: Timeout"));
}
}
Ok(())
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Blocking Task - libuv thread pool integration
// ─────────────────────────────────────────────────────────────────────────────
/// Task that runs blocking work on libuv's thread pool.
pub struct Blocking<T>
where
T: Send + 'static,
{
cancel_token: CancelToken,
work: Option<Box<dyn FnOnce(CancelToken) -> Result<T> + Send>>,
}
impl<T> Task for Blocking<T>
where
T: ToNapiValue + Send + 'static + TypeName,
{
type JsValue = T;
type Output = T;
fn compute(&mut self) -> Result<Self::Output> {
let work = self
.work
.take()
.ok_or_else(|| Error::from_reason("BlockingTask: work already consumed"))?;
work(self.cancel_token.clone())
}
fn resolve(&mut self, _env: Env, output: Self::Output) -> Result<Self::JsValue> {
Ok(output)
}
}
pub type Async<T> = AsyncTask<Blocking<T>>;
/// Create an `AsyncTask` that runs blocking work on libuv's thread pool.
///
/// Returns `AsyncTask<Blocking<T>>` which becomes `Promise<T>` on the JS side.
pub fn blocking<T, F>(
_tag: &'static str,
cancel_token: impl Into<CancelToken>,
work: F,
) -> AsyncTask<Blocking<T>>
where
F: FnOnce(CancelToken) -> Result<T> + Send + 'static,
T: ToNapiValue + TypeName + Send + 'static,
{
AsyncTask::new(Blocking {
cancel_token: cancel_token.into(),
work: Some(Box::new(work)),
})
}

View file

@ -1,14 +1,14 @@
{
"name": "@gsd/native",
"version": "0.1.0",
"description": "Native Rust bindings for GSD \u2014 high-performance grep via N-API",
"description": "Native Rust bindings for GSD — high-performance grep, glob, and process management via N-API",
"type": "module",
"main": "./src/index.ts",
"types": "./src/index.ts",
"scripts": {
"build:native": "node ../../native/scripts/build.js",
"build:native:dev": "node ../../native/scripts/build.js --dev",
"test": "node --test src/__tests__/grep.test.mjs src/__tests__/ps.test.mjs"
"test": "node --test src/__tests__/grep.test.mjs src/__tests__/ps.test.mjs src/__tests__/glob.test.mjs"
},
"exports": {
".": {
@ -22,6 +22,10 @@
"./ps": {
"types": "./src/ps/index.ts",
"import": "./src/ps/index.ts"
},
"./glob": {
"types": "./src/glob/index.ts",
"import": "./src/glob/index.ts"
}
},
"files": [

View file

@ -0,0 +1,237 @@
import { test, describe } from "node:test";
import assert from "node:assert/strict";
import { createRequire } from "node:module";
import * as path from "node:path";
import { fileURLToPath } from "node:url";
import * as fs from "node:fs";
import * as os from "node:os";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const require = createRequire(import.meta.url);
// Load the native addon directly
const addonDir = path.resolve(
__dirname,
"..",
"..",
"..",
"..",
"native",
"addon",
);
const platformTag = `${process.platform}-${process.arch}`;
const candidates = [
path.join(addonDir, `gsd_engine.${platformTag}.node`),
path.join(addonDir, "gsd_engine.dev.node"),
];
let native;
for (const candidate of candidates) {
try {
native = require(candidate);
break;
} catch {
// try next
}
}
if (!native) {
console.error(
"Native addon not found. Run `npm run build:native -w @gsd/native` first.",
);
process.exit(1);
}
describe("native glob: glob()", () => {
test("finds files matching a pattern", async (t) => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
fs.writeFileSync(path.join(tmpDir, "file1.ts"), "const a = 1;");
fs.writeFileSync(path.join(tmpDir, "file2.ts"), "const b = 2;");
fs.writeFileSync(path.join(tmpDir, "file3.js"), "const c = 3;");
const result = await native.glob({ pattern: "*.ts", path: tmpDir });
assert.equal(result.totalMatches, 2);
assert.equal(result.matches.length, 2);
const paths = result.matches.map((m) => m.path).sort();
assert.deepEqual(paths, ["file1.ts", "file2.ts"]);
});
test("recursive matching into subdirectories", async (t) => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
fs.mkdirSync(path.join(tmpDir, "src"));
fs.mkdirSync(path.join(tmpDir, "src", "nested"));
fs.writeFileSync(path.join(tmpDir, "root.ts"), "");
fs.writeFileSync(path.join(tmpDir, "src", "a.ts"), "");
fs.writeFileSync(path.join(tmpDir, "src", "nested", "b.ts"), "");
const result = await native.glob({ pattern: "*.ts", path: tmpDir });
assert.equal(result.totalMatches, 3);
const paths = result.matches.map((m) => m.path).sort();
assert.ok(paths.includes("root.ts"));
assert.ok(paths.includes("src/a.ts"));
assert.ok(paths.includes("src/nested/b.ts"));
});
test("respects maxResults limit", async (t) => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
for (let i = 0; i < 10; i++) {
fs.writeFileSync(path.join(tmpDir, `file${i}.txt`), "");
}
const result = await native.glob({
pattern: "*.txt",
path: tmpDir,
maxResults: 3,
});
assert.equal(result.matches.length, 3);
assert.equal(result.totalMatches, 3);
});
test("filters by file type (directories only)", async (t) => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
fs.mkdirSync(path.join(tmpDir, "dir1"));
fs.mkdirSync(path.join(tmpDir, "dir2"));
fs.writeFileSync(path.join(tmpDir, "file.txt"), "");
const result = await native.glob({
pattern: "*",
path: tmpDir,
recursive: false,
fileType: 2, // Dir
});
assert.equal(result.totalMatches, 2);
const paths = result.matches.map((m) => m.path).sort();
assert.deepEqual(paths, ["dir1", "dir2"]);
});
test("respects .gitignore", async (t) => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
// Init a git repo so .gitignore is respected
fs.mkdirSync(path.join(tmpDir, ".git"));
fs.writeFileSync(path.join(tmpDir, ".gitignore"), "ignored.txt\n");
fs.writeFileSync(path.join(tmpDir, "kept.txt"), "");
fs.writeFileSync(path.join(tmpDir, "ignored.txt"), "");
const result = await native.glob({
pattern: "*.txt",
path: tmpDir,
gitignore: true,
});
assert.equal(result.totalMatches, 1);
assert.equal(result.matches[0].path, "kept.txt");
});
test("includes gitignored files when gitignore=false", async (t) => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
fs.mkdirSync(path.join(tmpDir, ".git"));
fs.writeFileSync(path.join(tmpDir, ".gitignore"), "ignored.txt\n");
fs.writeFileSync(path.join(tmpDir, "kept.txt"), "");
fs.writeFileSync(path.join(tmpDir, "ignored.txt"), "");
const result = await native.glob({
pattern: "*.txt",
path: tmpDir,
gitignore: false,
});
assert.equal(result.totalMatches, 2);
});
test("skips node_modules by default", async (t) => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
fs.mkdirSync(path.join(tmpDir, "node_modules"));
fs.writeFileSync(path.join(tmpDir, "node_modules", "dep.js"), "");
fs.writeFileSync(path.join(tmpDir, "app.js"), "");
const result = await native.glob({
pattern: "*.js",
path: tmpDir,
gitignore: false,
});
assert.equal(result.totalMatches, 1);
assert.equal(result.matches[0].path, "app.js");
});
test("sortByMtime returns most recent first", async (t) => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
fs.writeFileSync(path.join(tmpDir, "old.txt"), "old");
// Ensure different mtime
const now = new Date();
fs.utimesSync(
path.join(tmpDir, "old.txt"),
new Date(now.getTime() - 5000),
new Date(now.getTime() - 5000),
);
fs.writeFileSync(path.join(tmpDir, "new.txt"), "new");
const result = await native.glob({
pattern: "*.txt",
path: tmpDir,
sortByMtime: true,
});
assert.equal(result.totalMatches, 2);
assert.equal(result.matches[0].path, "new.txt");
assert.equal(result.matches[1].path, "old.txt");
});
test("errors on non-existent path", async () => {
await assert.rejects(
() =>
native.glob({
pattern: "*.txt",
path: "/nonexistent/path/that/does/not/exist",
}),
/Path not found/,
);
});
test("returns mtime for each entry", async (t) => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-glob-test-"));
t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
fs.writeFileSync(path.join(tmpDir, "test.txt"), "content");
const result = await native.glob({ pattern: "*.txt", path: tmpDir });
assert.equal(result.matches.length, 1);
assert.ok(typeof result.matches[0].mtime === "number");
// mtime should be within the last minute
const oneMinuteAgo = Date.now() - 60_000;
assert.ok(result.matches[0].mtime > oneMinuteAgo);
});
});
describe("native glob: invalidateFsScanCache()", () => {
test("can be called with a path", () => {
// Should not throw
native.invalidateFsScanCache("/tmp");
});
test("can be called without arguments", () => {
// Should not throw
native.invalidateFsScanCache();
});
});

View file

@ -0,0 +1,44 @@
/**
* Native glob module using N-API.
*
* Gitignore-respecting filesystem discovery backed by Rust's `ignore` and
* `globset` crates, with an optional TTL-based scan cache for repeated queries.
*/
import { native } from "../native.js";
import type {
GlobMatch,
GlobOptions,
GlobResult,
} from "./types.js";
export type { FileType, GlobMatch, GlobOptions, GlobResult } from "./types.js";
/**
* Find filesystem entries matching a glob pattern.
*
* Respects .gitignore by default. Skips `.git` and `node_modules` unless
* the pattern explicitly mentions them.
*
* @param options - Glob search options (pattern, path, filters, etc.)
* @param onMatch - Optional streaming callback invoked for each match.
* @returns Promise resolving to matched entries.
*/
export function glob(
options: GlobOptions,
onMatch?: (match: GlobMatch) => void,
): Promise<GlobResult> {
return native.glob(options, onMatch) as Promise<GlobResult>;
}
/**
* Invalidate the filesystem scan cache.
*
* Call after file mutations (write, edit, rename, delete) to ensure
* subsequent glob queries see fresh data.
*
* @param path - Specific path to invalidate, or omit to clear all.
*/
export function invalidateFsScanCache(path?: string): void {
native.invalidateFsScanCache(path);
}

View file

@ -0,0 +1,53 @@
/** File type classification for filesystem entries. */
export const enum FileType {
/** Regular file. */
File = 1,
/** Directory. */
Dir = 2,
/** Symbolic link. */
Symlink = 3,
}
/** A single filesystem entry matched by a glob operation. */
export interface GlobMatch {
/** Relative path from the search root, using forward slashes. */
path: string;
/** Resolved filesystem type for the match. */
fileType: FileType;
/** Modification time in milliseconds since Unix epoch. */
mtime: number | null;
}
/** Options for the glob operation. */
export interface GlobOptions {
/** Glob pattern to match (e.g., "*.ts"). */
pattern: string;
/** Directory to search. */
path: string;
/** Filter by file type: File (1), Dir (2), or Symlink (3). */
fileType?: FileType;
/** Match simple patterns recursively by default (default: true). */
recursive?: boolean;
/** Include hidden files (default: false). */
hidden?: boolean;
/** Maximum number of results to return. */
maxResults?: number;
/** Respect .gitignore files (default: true). */
gitignore?: boolean;
/** Enable shared filesystem scan cache (default: false). */
cache?: boolean;
/** Sort results by mtime (most recent first) before applying limit. */
sortByMtime?: boolean;
/** Include node_modules entries (default: false, unless pattern mentions it). */
includeNodeModules?: boolean;
/** Timeout in milliseconds for the operation. */
timeoutMs?: number;
}
/** Result payload returned by a glob operation. */
export interface GlobResult {
/** Matched filesystem entries. */
matches: GlobMatch[];
/** Number of returned matches. */
totalMatches: number;
}

View file

@ -4,6 +4,7 @@
* Modules:
* - grep: ripgrep-backed regex search (content + filesystem)
* - ps: cross-platform process tree management
* - glob: gitignore-respecting filesystem discovery with scan caching
*/
export { searchContent, grep } from "./grep/index.js";
@ -23,3 +24,11 @@ export {
processGroupId,
killProcessGroup,
} from "./ps/index.js";
export { glob, invalidateFsScanCache } from "./glob/index.js";
export type {
FileType,
GlobMatch,
GlobOptions,
GlobResult,
} from "./glob/index.js";

View file

@ -47,4 +47,9 @@ export const native = loadNative() as {
listDescendants: (pid: number) => number[];
processGroupId: (pid: number) => number | null;
killProcessGroup: (pgid: number, signal: number) => boolean;
glob: (
options: unknown,
onMatch?: ((match: unknown) => void) | undefined | null,
) => Promise<unknown>;
invalidateFsScanCache: (path?: string) => void;
};