//! Shared filesystem scan cache for discovery tools (glob). //! //! Provides a TTL-based cache of scanned directory entries, with: //! - Global policy (no per-call TTL tuning) //! - Explicit invalidation for agent file mutations //! - Empty-result fast recheck to avoid stale negatives //! //! # Policy Configuration (environment overrides) //! - `FS_SCAN_CACHE_TTL_MS` – default `1000` //! - `FS_SCAN_EMPTY_RECHECK_MS` – default `200` //! - `FS_SCAN_CACHE_MAX_ENTRIES` – default `16` use std::{ borrow::Cow, ops::Deref, path::{Path, PathBuf}, sync::{Arc, LazyLock}, time::{Duration, Instant}, }; use dashmap::DashMap; use ignore::WalkBuilder; use napi::bindgen_prelude::*; use napi_derive::napi; use crate::task; // ═══════════════════════════════════════════════════════════════════════════ // Public types (re-exported by glob) // ═══════════════════════════════════════════════════════════════════════════ #[derive(Debug, PartialEq, Eq)] #[napi] pub enum FileType { /// Regular file. File = 1, /// Directory. Dir = 2, /// Symbolic link. Symlink = 3, } /// A single filesystem entry from a directory scan. #[derive(Clone)] #[napi(object)] pub struct GlobMatch { /// Relative path from the search root, using forward slashes. pub path: String, /// Resolved filesystem type for the match. #[napi(js_name = "fileType")] pub file_type: FileType, /// Modification time in milliseconds since Unix epoch (from /// `symlink_metadata`). pub mtime: Option, } // ═══════════════════════════════════════════════════════════════════════════ // Cache policy // ═══════════════════════════════════════════════════════════════════════════ const DEFAULT_CACHE_TTL_MS: u64 = 1_000; const DEFAULT_EMPTY_RECHECK_MS: u64 = 200; const DEFAULT_MAX_CACHE_ENTRIES: usize = 16; fn env_u64(name: &str, default: u64) -> u64 { std::env::var(name) .ok() .and_then(|v| v.parse().ok()) .unwrap_or(default) } fn env_usize(name: &str, default: usize) -> usize { std::env::var(name) .ok() .and_then(|v| v.parse().ok()) .unwrap_or(default) } /// Configured cache TTL in milliseconds. pub fn cache_ttl_ms() -> u64 { env_u64("FS_SCAN_CACHE_TTL_MS", DEFAULT_CACHE_TTL_MS) } /// Configured empty-result recheck threshold in milliseconds. pub fn empty_recheck_ms() -> u64 { env_u64("FS_SCAN_EMPTY_RECHECK_MS", DEFAULT_EMPTY_RECHECK_MS) } fn max_cache_entries() -> usize { env_usize("FS_SCAN_CACHE_MAX_ENTRIES", DEFAULT_MAX_CACHE_ENTRIES) } // ═══════════════════════════════════════════════════════════════════════════ // Cache internals // ═══════════════════════════════════════════════════════════════════════════ #[derive(Clone, Debug, Eq, Hash, PartialEq)] struct CacheKey { root: PathBuf, include_hidden: bool, use_gitignore: bool, } #[derive(Clone)] pub struct SharedGlobEntries(Arc<[GlobMatch]>); impl SharedGlobEntries { fn from_vec(entries: Vec) -> Self { Self(Arc::from(entries)) } } impl Deref for SharedGlobEntries { type Target = [GlobMatch]; fn deref(&self) -> &Self::Target { self.0.as_ref() } } #[derive(Clone)] struct CacheEntry { created_at: Instant, entries: SharedGlobEntries, } static FS_CACHE: LazyLock> = LazyLock::new(DashMap::new); /// Result of a cache-aware scan, including the age of the cached data. pub struct ScanResult { /// Scanned filesystem entries. pub entries: SharedGlobEntries, /// How old the cached data is in milliseconds (0 = freshly scanned). pub cache_age_ms: u64, } fn evict_oldest() { let max = max_cache_entries(); if FS_CACHE.len() > max { if let Some(oldest_key) = FS_CACHE .iter() .min_by_key(|entry| entry.value().created_at) .map(|entry| entry.key().clone()) { FS_CACHE.remove(&oldest_key); } } } // ═══════════════════════════════════════════════════════════════════════════ // Path utilities // ═══════════════════════════════════════════════════════════════════════════ /// Resolve a search path string to a canonical `PathBuf` (must be a directory). pub fn resolve_search_path(path: &str) -> Result { let candidate = PathBuf::from(path); let root = if candidate.is_absolute() { candidate } else { let cwd = std::env::current_dir() .map_err(|err| Error::from_reason(format!("Failed to resolve cwd: {err}")))?; cwd.join(candidate) }; let metadata = std::fs::metadata(&root) .map_err(|err| Error::from_reason(format!("Path not found: {err}")))?; if !metadata.is_dir() { return Err(Error::from_reason( "Search path must be a directory".to_string(), )); } Ok(std::fs::canonicalize(&root).unwrap_or(root)) } /// Normalize a filesystem path to a forward-slash relative string. pub fn normalize_relative_path<'a>(root: &Path, path: &'a Path) -> Cow<'a, str> { let relative = path.strip_prefix(root).unwrap_or(path); if cfg!(windows) { let relative = relative.to_string_lossy(); if relative.contains('\\') { Cow::Owned(relative.replace('\\', "/")) } else { relative } } else { relative.to_string_lossy() } } pub fn contains_component(path: &Path, target: &str) -> bool { path.components().any(|component| { component .as_os_str() .to_str() .is_some_and(|value| value == target) }) } pub fn should_skip_path(path: &Path, mentions_node_modules: bool) -> bool { if contains_component(path, ".git") { return true; } if !mentions_node_modules && contains_component(path, "node_modules") { return true; } false } pub fn classify_file_type(path: &Path) -> Option<(FileType, Option)> { let metadata = std::fs::symlink_metadata(path).ok()?; let file_type = metadata.file_type(); let mtime_ms = metadata .modified() .ok() .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok()) .map(|d| d.as_millis() as f64); if file_type.is_symlink() { Some((FileType::Symlink, mtime_ms)) } else if file_type.is_dir() { Some((FileType::Dir, mtime_ms)) } else { Some((FileType::File, mtime_ms)) } } // ═══════════════════════════════════════════════════════════════════════════ // Walker + collection // ═══════════════════════════════════════════════════════════════════════════ /// Builds a deterministic filesystem walker configured for visibility and /// ignore rules. pub fn build_walker(root: &Path, include_hidden: bool, use_gitignore: bool) -> WalkBuilder { let mut builder = WalkBuilder::new(root); builder .hidden(!include_hidden) .follow_links(false) .sort_by_file_path(|a, b| a.cmp(b)); if use_gitignore { builder .git_ignore(true) .git_exclude(true) .git_global(true) .ignore(true) .parents(true); } else { builder .git_ignore(false) .git_exclude(false) .git_global(false) .ignore(false) .parents(false); } builder } /// Scans filesystem entries and records normalized relative paths with file /// metadata. fn collect_entries( root: &Path, include_hidden: bool, use_gitignore: bool, ct: &task::CancelToken, ) -> Result> { let builder = build_walker(root, include_hidden, use_gitignore); let mut entries = Vec::new(); for entry in builder.build() { ct.heartbeat()?; let Ok(entry) = entry else { continue }; let path = entry.path(); if should_skip_path(path, true) { continue; } let relative = normalize_relative_path(root, path); if relative.is_empty() { continue; } let Some((file_type, mtime)) = classify_file_type(path) else { continue; }; entries.push(GlobMatch { path: relative.into_owned(), file_type, mtime, }); } Ok(entries) } // ═══════════════════════════════════════════════════════════════════════════ // Cache API // ═══════════════════════════════════════════════════════════════════════════ /// Returns scanned entries using the global TTL cache policy. /// /// The returned [`ScanResult::cache_age_ms`] lets callers implement /// empty-result fast recheck: if a query produces zero matches and the cache is /// older than [`empty_recheck_ms()`], call [`force_rescan`] before returning /// empty. pub fn get_or_scan( root: &Path, include_hidden: bool, use_gitignore: bool, ct: &task::CancelToken, ) -> Result { let ttl = cache_ttl_ms(); if ttl == 0 { let entries = SharedGlobEntries::from_vec(collect_entries(root, include_hidden, use_gitignore, ct)?); return Ok(ScanResult { entries, cache_age_ms: 0, }); } let key = CacheKey { root: root.to_path_buf(), include_hidden, use_gitignore, }; let now = Instant::now(); if let Some(entry) = FS_CACHE.get(&key) { let age = now.duration_since(entry.created_at); if age < Duration::from_millis(ttl) { return Ok(ScanResult { entries: entry.entries.clone(), cache_age_ms: age.as_millis() as u64, }); } drop(entry); FS_CACHE.remove(&key); } let entries = SharedGlobEntries::from_vec(collect_entries(root, include_hidden, use_gitignore, ct)?); FS_CACHE.insert( key, CacheEntry { created_at: now, entries: entries.clone(), }, ); evict_oldest(); Ok(ScanResult { entries, cache_age_ms: 0, }) } /// Force a fresh scan, replacing any existing cache entry. /// /// When `store` is false, the fresh scan result is returned without /// repopulating the cache. pub fn force_rescan( root: &Path, include_hidden: bool, use_gitignore: bool, store: bool, ct: &task::CancelToken, ) -> Result { let key = CacheKey { root: root.to_path_buf(), include_hidden, use_gitignore, }; FS_CACHE.remove(&key); let entries = SharedGlobEntries::from_vec(collect_entries(root, include_hidden, use_gitignore, ct)?); if store { let now = Instant::now(); FS_CACHE.insert( key, CacheEntry { created_at: now, entries: entries.clone(), }, ); evict_oldest(); } Ok(entries) } // ═══════════════════════════════════════════════════════════════════════════ // Invalidation // ═══════════════════════════════════════════════════════════════════════════ /// Invalidate cache entries whose root contains `target`. pub fn invalidate_path(target: &Path) { let keys_to_remove: Vec = FS_CACHE .iter() .filter(|entry| target.starts_with(&entry.key().root)) .map(|entry| entry.key().clone()) .collect(); for key in keys_to_remove { FS_CACHE.remove(&key); } } /// Clear the entire scan cache. pub fn invalidate_all() { FS_CACHE.clear(); } /// Invalidate the filesystem scan cache. /// /// When called with a path, removes entries for roots containing that path. /// When called without a path, clears the entire cache. /// /// Intended to be called after agent file mutations (write, edit, rename, /// delete). #[napi(js_name = "invalidateFsScanCache")] pub fn invalidate_fs_scan_cache(path: Option) { match path { Some(p) => { let candidate = PathBuf::from(&p); let absolute = if candidate.is_absolute() { candidate } else if let Ok(cwd) = std::env::current_dir() { cwd.join(candidate) } else { PathBuf::from(&p) }; let target = std::fs::canonicalize(&absolute) .or_else(|_| { absolute .parent() .and_then(|parent| std::fs::canonicalize(parent).ok()) .and_then(|parent| absolute.file_name().map(|name| parent.join(name))) .ok_or_else(|| std::io::Error::from(std::io::ErrorKind::NotFound)) }) .unwrap_or(absolute); invalidate_path(&target); } None => invalidate_all(), } }