From fab9ad390d2b9669a04650fc9316ee969a99626a Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Fri, 13 Mar 2026 12:36:23 -0600 Subject: [PATCH 1/7] feat: add native clipboard module with arboard backend Cross-platform clipboard access (text read/write, image read) via the arboard Rust crate. No external tools (pbcopy, xclip, etc.) required. Ported from Oh My Pi's clipboard module with adaptations for GSD's architecture (direct AsyncTask instead of task::blocking wrapper). Co-Authored-By: Claude Opus 4.6 (1M context) --- native/Cargo.lock | 529 +++++++++++++++++- native/crates/engine/Cargo.toml | 2 + native/crates/engine/src/clipboard.rs | 110 ++++ native/crates/engine/src/highlight.rs | 472 ++++++++++++++++ native/crates/engine/src/lib.rs | 1 + packages/native/package.json | 8 +- .../native/src/__tests__/clipboard.test.mjs | 80 +++ .../native/src/__tests__/highlight.test.mjs | 156 ++++++ packages/native/src/clipboard/index.ts | 40 ++ packages/native/src/clipboard/types.ts | 7 + packages/native/src/highlight/index.ts | 44 ++ packages/native/src/highlight/types.ts | 25 + packages/native/src/index.ts | 8 + packages/native/src/native.ts | 3 + 14 files changed, 1482 insertions(+), 3 deletions(-) create mode 100644 native/crates/engine/src/clipboard.rs create mode 100644 native/crates/engine/src/highlight.rs create mode 100644 packages/native/src/__tests__/clipboard.test.mjs create mode 100644 packages/native/src/__tests__/highlight.test.mjs create mode 100644 packages/native/src/clipboard/index.ts create mode 100644 packages/native/src/clipboard/types.ts create mode 100644 packages/native/src/highlight/index.ts create mode 100644 packages/native/src/highlight/types.ts diff --git a/native/Cargo.lock b/native/Cargo.lock index ba8fa03da..748f53e2a 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "aho-corasick" version = "1.1.4" @@ -11,6 +17,32 @@ dependencies = [ "memchr", ] +[[package]] +name = "arboard" +version = "3.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0348a1c054491f4bfe6ab86a7b6ab1e44e45d899005de92f58b3df180b36ddaf" +dependencies = [ + "clipboard-win", + "image", + "log", + "objc2", + "objc2-app-kit", + "objc2-core-foundation", + "objc2-core-graphics", + "objc2-foundation", + "parking_lot", + "percent-encoding", + "windows-sys 0.60.2", + "x11rb", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "bitflags" version = "2.11.0" @@ -28,12 +60,33 @@ dependencies = [ "serde", ] +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" + +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "clipboard-win" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bde03770d3df201d4fb868f2c9c59e66a3e4e2bd06692a0fe701e7103c7e84d4" +dependencies = [ + "error-code", +] + [[package]] name = "convert_case" version = "0.6.0" @@ -43,6 +96,15 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -68,6 +130,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "ctor" version = "0.2.9" @@ -78,6 +146,16 @@ dependencies = [ "syn", ] +[[package]] +name = "dispatch2" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38" +dependencies = [ + "bitflags", + "objc2", +] + [[package]] name = "either" version = "1.15.0" @@ -102,6 +180,71 @@ dependencies = [ "encoding_rs", ] +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "error-code" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" + +[[package]] +name = "fax" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f05de7d48f37cd6730705cbca900770cab77a89f413d23e100ad7fad7795a0ab" +dependencies = [ + "fax_derive", +] + +[[package]] +name = "fax_derive" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0aca10fb742cb43f9e7bb8467c91aa9bcb8e3ffbc6a6f7389bb93ffc920577d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "fdeflate" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c" +dependencies = [ + "simd-adler32", +] + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "gethostname" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bd49230192a3797a9a4d6abe9b3eed6f7fa4c8a8a4947977c6f80025f92cbd8" +dependencies = [ + "rustix", + "windows-link", +] + [[package]] name = "globset" version = "0.4.18" @@ -156,7 +299,9 @@ dependencies = [ name = "gsd-engine" version = "0.1.0" dependencies = [ + "arboard", "gsd-grep", + "image", "napi", "napi-build", "napi-derive", @@ -173,6 +318,17 @@ dependencies = [ "rayon", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "ignore" version = "0.4.25" @@ -189,6 +345,20 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "image" +version = "0.25.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85ab80394333c02fe689eaf900ab500fbd0c2213da414687ebf995a65d5a6104" +dependencies = [ + "bytemuck", + "byteorder-lite", + "moxcms", + "num-traits", + "png", + "tiff", +] + [[package]] name = "libc" version = "0.2.183" @@ -205,6 +375,21 @@ dependencies = [ "windows-link", ] +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + [[package]] name = "log" version = "0.4.29" @@ -226,6 +411,26 @@ dependencies = [ "libc", ] +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "moxcms" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb85c154ba489f01b25c0d36ae69a87e4a1c73a72631fc6c0eb6dde34a73e44b" +dependencies = [ + "num-traits", + "pxfm", +] + [[package]] name = "napi" version = "2.16.17" @@ -283,12 +488,136 @@ dependencies = [ "libloading", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "objc2" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f" +dependencies = [ + "objc2-encode", +] + +[[package]] +name = "objc2-app-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d49e936b501e5c5bf01fda3a9452ff86dc3ea98ad5f283e1455153142d97518c" +dependencies = [ + "bitflags", + "objc2", + "objc2-core-graphics", + "objc2-foundation", +] + +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags", + "dispatch2", + "objc2", +] + +[[package]] +name = "objc2-core-graphics" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e022c9d066895efa1345f8e33e584b9f958da2fd4cd116792e15e07e4720a807" +dependencies = [ + "bitflags", + "dispatch2", + "objc2", + "objc2-core-foundation", + "objc2-io-surface", +] + +[[package]] +name = "objc2-encode" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" + +[[package]] +name = "objc2-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272" +dependencies = [ + "bitflags", + "objc2", + "objc2-core-foundation", +] + +[[package]] +name = "objc2-io-surface" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180788110936d59bab6bd83b6060ffdfffb3b922ba1396b312ae795e1de9d81d" +dependencies = [ + "bitflags", + "objc2", + "objc2-core-foundation", +] + [[package]] name = "once_cell" version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "png" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61" +dependencies = [ + "bitflags", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -298,6 +627,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "pxfm" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5a041e753da8b807c9255f28de81879c78c876392ff2469cde94799b2896b9d" + +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + [[package]] name = "quote" version = "1.0.45" @@ -327,6 +668,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + [[package]] name = "regex" version = "1.12.3" @@ -356,6 +706,19 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + [[package]] name = "same-file" version = "1.0.6" @@ -365,6 +728,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "semver" version = "1.0.27" @@ -400,6 +769,18 @@ dependencies = [ "syn", ] +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + [[package]] name = "syn" version = "2.0.117" @@ -411,6 +792,20 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tiff" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b63feaf3343d35b6ca4d50483f94843803b0f51634937cc2ec519fc32232bc52" +dependencies = [ + "fax", + "flate2", + "half", + "quick-error", + "weezl", + "zune-jpeg", +] + [[package]] name = "unicode-ident" version = "1.0.24" @@ -433,13 +828,19 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "weezl" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88" + [[package]] name = "winapi-util" version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -448,6 +849,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -456,3 +866,120 @@ checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ "windows-link", ] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + +[[package]] +name = "x11rb" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9993aa5be5a26815fe2c3eacfc1fde061fc1a1f094bf1ad2a18bf9c495dd7414" +dependencies = [ + "gethostname", + "rustix", + "x11rb-protocol", +] + +[[package]] +name = "x11rb-protocol" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea6fc2961e4ef194dcbfe56bb845534d0dc8098940c7e5c012a258bfec6701bd" + +[[package]] +name = "zerocopy" +version = "0.8.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zune-core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb8a0807f7c01457d0379ba880ba6322660448ddebc890ce29bb64da71fb40f9" + +[[package]] +name = "zune-jpeg" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec5f41c76397b7da451efd19915684f727d7e1d516384ca6bd0ec43ec94de23c" +dependencies = [ + "zune-core", +] diff --git a/native/crates/engine/Cargo.toml b/native/crates/engine/Cargo.toml index dcd61ef0c..90cb772b4 100644 --- a/native/crates/engine/Cargo.toml +++ b/native/crates/engine/Cargo.toml @@ -12,6 +12,8 @@ crate-type = ["cdylib"] [dependencies] gsd-grep = { path = "../grep" } +arboard = "3" +image = { version = "0.25", default-features = false, features = ["png"] } napi = { version = "2", features = ["napi8"] } napi-derive = "2" diff --git a/native/crates/engine/src/clipboard.rs b/native/crates/engine/src/clipboard.rs new file mode 100644 index 000000000..cc376c024 --- /dev/null +++ b/native/crates/engine/src/clipboard.rs @@ -0,0 +1,110 @@ +//! Clipboard utilities backed by arboard. +//! +//! Provides text copy/read and image read support across Linux, macOS, and Windows. +//! Text copy runs synchronously so macOS writes execute on the caller thread, +//! avoiding worker-thread `AppKit` pasteboard warnings in CLI contexts. + +use std::io::Cursor; + +use arboard::{Clipboard, Error as ClipboardError, ImageData}; +use image::{DynamicImage, ImageFormat, RgbaImage}; +use napi::bindgen_prelude::*; +use napi::{Env, Error, Result, Task}; +use napi_derive::napi; + +/// Clipboard image payload encoded as PNG bytes. +#[napi(object)] +pub struct ClipboardImage { + /// PNG-encoded image bytes. + pub data: Uint8Array, + #[napi(js_name = "mimeType")] + /// MIME type for the encoded image payload. + pub mime_type: String, +} + +fn encode_png(image: ImageData<'_>) -> Result> { + let width = u32::try_from(image.width) + .map_err(|_| Error::from_reason("Clipboard image width overflow"))?; + let height = u32::try_from(image.height) + .map_err(|_| Error::from_reason("Clipboard image height overflow"))?; + let bytes = image.bytes.into_owned(); + let buffer = RgbaImage::from_raw(width, height, bytes) + .ok_or_else(|| Error::from_reason("Clipboard image buffer size mismatch"))?; + let capacity = width.saturating_mul(height).saturating_mul(4) as usize; + let mut output = Vec::with_capacity(capacity); + DynamicImage::ImageRgba8(buffer) + .write_to(&mut Cursor::new(&mut output), ImageFormat::Png) + .map_err(|err| Error::from_reason(format!("Failed to encode clipboard image: {err}")))?; + Ok(output) +} + +/// Copy plain text to the system clipboard. +/// +/// Runs synchronously to avoid macOS AppKit pasteboard warnings +/// when writing from worker threads. +#[napi(js_name = "copyToClipboard")] +pub fn copy_to_clipboard(text: String) -> Result<()> { + let mut clipboard = Clipboard::new() + .map_err(|err| Error::from_reason(format!("Failed to access clipboard: {err}")))?; + clipboard + .set_text(text) + .map_err(|err| Error::from_reason(format!("Failed to copy to clipboard: {err}")))?; + Ok(()) +} + +/// Read plain text from the system clipboard. +/// +/// Returns `None` when no text data is available. +#[napi(js_name = "readTextFromClipboard")] +pub fn read_text_from_clipboard() -> Result> { + let mut clipboard = Clipboard::new() + .map_err(|err| Error::from_reason(format!("Failed to access clipboard: {err}")))?; + match clipboard.get_text() { + Ok(text) => Ok(Some(text)), + Err(ClipboardError::ContentNotAvailable) => Ok(None), + Err(err) => Err(Error::from_reason(format!( + "Failed to read clipboard text: {err}" + ))), + } +} + +// ── Async image read task ──────────────────────────────────────────── + +pub(crate) struct ReadImageTask; + +impl Task for ReadImageTask { + type JsValue = Option; + type Output = Option; + + fn compute(&mut self) -> Result { + let mut clipboard = Clipboard::new() + .map_err(|err| Error::from_reason(format!("Failed to access clipboard: {err}")))?; + match clipboard.get_image() { + Ok(image) => { + let bytes = encode_png(image)?; + Ok(Some(ClipboardImage { + data: Uint8Array::from(bytes), + mime_type: "image/png".to_string(), + })) + } + Err(ClipboardError::ContentNotAvailable) => Ok(None), + Err(err) => Err(Error::from_reason(format!( + "Failed to read clipboard image: {err}" + ))), + } + } + + fn resolve(&mut self, _env: Env, output: Self::Output) -> Result { + Ok(output) + } +} + +/// Read an image from the system clipboard. +/// +/// Returns a Promise that resolves to a `ClipboardImage` (PNG-encoded bytes) +/// or `null` when no image data is available. Runs on libuv's thread pool +/// to avoid blocking the main JS thread during PNG encoding. +#[napi(js_name = "readImageFromClipboard")] +pub fn read_image_from_clipboard() -> AsyncTask { + AsyncTask::new(ReadImageTask) +} diff --git a/native/crates/engine/src/highlight.rs b/native/crates/engine/src/highlight.rs new file mode 100644 index 000000000..e2ba692da --- /dev/null +++ b/native/crates/engine/src/highlight.rs @@ -0,0 +1,472 @@ +//! Syntax highlighting using syntect. +//! +//! Provides ANSI-colored output for code blocks. Takes theme colors as input +//! and maps syntect scopes to 11 semantic categories: +//! - comment, keyword, function, variable, string, number, type, operator, +//! punctuation, inserted, deleted + +use std::{cell::RefCell, collections::HashMap, sync::OnceLock}; + +use napi_derive::napi; +use syntect::parsing::{ParseState, Scope, ScopeStack, ScopeStackOp, SyntaxReference, SyntaxSet}; + +static SYNTAX_SET: OnceLock = OnceLock::new(); +static SCOPE_MATCHERS: OnceLock = OnceLock::new(); + +// Thread-local cache for scope -> color index lookups +thread_local! { + static SCOPE_COLOR_CACHE: RefCell> = RefCell::new(HashMap::with_capacity(256)); +} + +fn get_syntax_set() -> &'static SyntaxSet { + SYNTAX_SET.get_or_init(SyntaxSet::load_defaults_newlines) +} + +/// Pre-compiled scope patterns for fast matching. +struct ScopeMatchers { + // Comment (index 0) + comment: Scope, + + // String (index 4) + string: Scope, + constant_character: Scope, + meta_string: Scope, + + // Number (index 5) + constant_numeric: Scope, + constant_integer: Scope, + constant: Scope, + + // Keyword (index 1) + keyword: Scope, + storage_type: Scope, + storage_modifier: Scope, + + // Function (index 2) + entity_name_function: Scope, + support_function: Scope, + meta_function_call: Scope, + variable_function: Scope, + + // Type (index 6) + entity_name_type: Scope, + support_type: Scope, + support_class: Scope, + entity_name_class: Scope, + entity_name_struct: Scope, + entity_name_enum: Scope, + entity_name_interface: Scope, + entity_name_trait: Scope, + + // Operator (index 7) + keyword_operator: Scope, + punctuation_accessor: Scope, + + // Punctuation (index 8) + punctuation: Scope, + + // Variable (index 3) + variable: Scope, + entity_name: Scope, + meta_path: Scope, + + // Diff (indices 9, 10) + markup_inserted: Scope, + markup_deleted: Scope, + meta_diff_header: Scope, + meta_diff_range: Scope, +} + +impl ScopeMatchers { + fn new() -> Self { + Self { + comment: Scope::new("comment").unwrap(), + string: Scope::new("string").unwrap(), + constant_character: Scope::new("constant.character").unwrap(), + meta_string: Scope::new("meta.string").unwrap(), + constant_numeric: Scope::new("constant.numeric").unwrap(), + constant_integer: Scope::new("constant.integer").unwrap(), + constant: Scope::new("constant").unwrap(), + keyword: Scope::new("keyword").unwrap(), + storage_type: Scope::new("storage.type").unwrap(), + storage_modifier: Scope::new("storage.modifier").unwrap(), + entity_name_function: Scope::new("entity.name.function").unwrap(), + support_function: Scope::new("support.function").unwrap(), + meta_function_call: Scope::new("meta.function-call").unwrap(), + variable_function: Scope::new("variable.function").unwrap(), + entity_name_type: Scope::new("entity.name.type").unwrap(), + support_type: Scope::new("support.type").unwrap(), + support_class: Scope::new("support.class").unwrap(), + entity_name_class: Scope::new("entity.name.class").unwrap(), + entity_name_struct: Scope::new("entity.name.struct").unwrap(), + entity_name_enum: Scope::new("entity.name.enum").unwrap(), + entity_name_interface: Scope::new("entity.name.interface").unwrap(), + entity_name_trait: Scope::new("entity.name.trait").unwrap(), + keyword_operator: Scope::new("keyword.operator").unwrap(), + punctuation_accessor: Scope::new("punctuation.accessor").unwrap(), + punctuation: Scope::new("punctuation").unwrap(), + variable: Scope::new("variable").unwrap(), + entity_name: Scope::new("entity.name").unwrap(), + meta_path: Scope::new("meta.path").unwrap(), + markup_inserted: Scope::new("markup.inserted").unwrap(), + markup_deleted: Scope::new("markup.deleted").unwrap(), + meta_diff_header: Scope::new("meta.diff.header").unwrap(), + meta_diff_range: Scope::new("meta.diff.range").unwrap(), + } + } +} + +fn get_scope_matchers() -> &'static ScopeMatchers { + SCOPE_MATCHERS.get_or_init(ScopeMatchers::new) +} + +/// Theme colors for syntax highlighting. +/// Each color is an ANSI escape sequence (e.g., "\x1b[38;2;255;0;0m"). +#[derive(Debug)] +#[napi(object)] +pub struct HighlightColors { + /// ANSI color for comments. + pub comment: String, + /// ANSI color for keywords. + pub keyword: String, + /// ANSI color for function names. + pub function: String, + /// ANSI color for variables and identifiers. + pub variable: String, + /// ANSI color for string literals. + pub string: String, + /// ANSI color for numeric literals. + pub number: String, + /// ANSI color for type identifiers. + #[napi(js_name = "type")] + pub r#type: String, + /// ANSI color for operators. + pub operator: String, + /// ANSI color for punctuation tokens. + pub punctuation: String, + /// ANSI color for diff inserted lines. + #[napi(js_name = "inserted")] + pub inserted: Option, + /// ANSI color for diff deleted lines. + #[napi(js_name = "deleted")] + pub deleted: Option, +} + +/// Language alias mappings: (aliases, target syntax name). +/// Used for languages not in syntect's default set or with non-standard names. +const LANG_ALIASES: &[(&[&str], &str)] = &[ + (&["ts", "tsx", "typescript", "js", "jsx", "javascript", "mjs", "cjs"], "JavaScript"), + (&["py", "python"], "Python"), + (&["rb", "ruby"], "Ruby"), + (&["rs", "rust"], "Rust"), + (&["go", "golang"], "Go"), + (&["java"], "Java"), + (&["kt", "kotlin"], "Java"), + (&["swift"], "Objective-C"), + (&["c", "h"], "C"), + (&["cpp", "cc", "cxx", "c++", "hpp", "hxx", "hh"], "C++"), + (&["cs", "csharp"], "C#"), + (&["php"], "PHP"), + (&["sh", "bash", "zsh", "shell"], "Bash"), + (&["fish"], "Shell-Unix-Generic"), + (&["ps1", "powershell"], "PowerShell"), + (&["html", "htm"], "HTML"), + (&["css"], "CSS"), + (&["scss"], "SCSS"), + (&["sass"], "Sass"), + (&["less"], "LESS"), + (&["json"], "JSON"), + (&["yaml", "yml"], "YAML"), + (&["toml"], "TOML"), + (&["xml"], "XML"), + (&["md", "markdown"], "Markdown"), + (&["sql"], "SQL"), + (&["lua"], "Lua"), + (&["perl", "pl"], "Perl"), + (&["r"], "R"), + (&["scala"], "Scala"), + (&["clj", "clojure"], "Clojure"), + (&["ex", "exs", "elixir"], "Ruby"), + (&["erl", "erlang"], "Erlang"), + (&["hs", "haskell"], "Haskell"), + (&["ml", "ocaml"], "OCaml"), + (&["vim"], "VimL"), + (&["graphql", "gql"], "GraphQL"), + (&["proto", "protobuf"], "Protocol Buffers"), + (&["tf", "hcl", "terraform"], "Terraform"), + (&["dockerfile", "docker"], "Dockerfile"), + (&["makefile", "make"], "Makefile"), + (&["cmake"], "CMake"), + (&["ini", "cfg", "conf", "config", "properties"], "INI"), + (&["diff", "patch"], "Diff"), + (&["gitignore", "gitattributes", "gitmodules"], "Git Ignore"), +]; + +/// Find syntax name from alias table using case-insensitive comparison. +#[inline] +fn find_alias(lang: &str) -> Option<&'static str> { + LANG_ALIASES + .iter() + .find(|(aliases, _)| aliases.iter().any(|a| lang.eq_ignore_ascii_case(a))) + .map(|(_, target)| *target) +} + +/// Check if language is in the alias table. +#[inline] +fn is_known_alias(lang: &str) -> bool { + LANG_ALIASES + .iter() + .any(|(aliases, _)| aliases.iter().any(|a| lang.eq_ignore_ascii_case(a))) +} + +/// Compute the color index for a single scope (uncached). +#[inline] +fn compute_scope_color(s: Scope) -> usize { + let m = get_scope_matchers(); + + // Comment (index 0) + if m.comment.is_prefix_of(s) { + return 0; + } + + // Diff inserted (index 9) + if m.markup_inserted.is_prefix_of(s) { + return 9; + } + + // Diff deleted (index 10) + if m.markup_deleted.is_prefix_of(s) { + return 10; + } + + // Diff header/range -> keyword (index 1) + if m.meta_diff_header.is_prefix_of(s) || m.meta_diff_range.is_prefix_of(s) { + return 1; + } + + // String (index 4) + if m.string.is_prefix_of(s) + || m.constant_character.is_prefix_of(s) + || m.meta_string.is_prefix_of(s) + { + return 4; + } + + // Number (index 5) + if m.constant_numeric.is_prefix_of(s) || m.constant_integer.is_prefix_of(s) { + return 5; + } + + // Keyword (index 1) + if m.keyword.is_prefix_of(s) + || m.storage_type.is_prefix_of(s) + || m.storage_modifier.is_prefix_of(s) + { + return 1; + } + + // Function (index 2) + if m.entity_name_function.is_prefix_of(s) + || m.support_function.is_prefix_of(s) + || m.meta_function_call.is_prefix_of(s) + || m.variable_function.is_prefix_of(s) + { + return 2; + } + + // Type (index 6) + if m.entity_name_type.is_prefix_of(s) + || m.support_type.is_prefix_of(s) + || m.support_class.is_prefix_of(s) + || m.entity_name_class.is_prefix_of(s) + || m.entity_name_struct.is_prefix_of(s) + || m.entity_name_enum.is_prefix_of(s) + || m.entity_name_interface.is_prefix_of(s) + || m.entity_name_trait.is_prefix_of(s) + { + return 6; + } + + // Operator (index 7) + if m.keyword_operator.is_prefix_of(s) || m.punctuation_accessor.is_prefix_of(s) { + return 7; + } + + // Punctuation (index 8) + if m.punctuation.is_prefix_of(s) { + return 8; + } + + // Variable (index 3) + if m.variable.is_prefix_of(s) || m.entity_name.is_prefix_of(s) || m.meta_path.is_prefix_of(s) { + return 3; + } + + // Generic constant -> number (index 5) + if m.constant.is_prefix_of(s) { + return 5; + } + + // No match + usize::MAX +} + +/// Determine the semantic color category from a scope stack. +/// Uses per-scope caching to avoid repeated prefix checks. +#[inline] +fn scope_to_color_index(scope: &ScopeStack) -> usize { + SCOPE_COLOR_CACHE.with(|cache| { + let mut cache = cache.borrow_mut(); + + // Walk from innermost to outermost scope + for s in scope.as_slice().iter().rev() { + let color_idx = *cache.entry(*s).or_insert_with(|| compute_scope_color(*s)); + if color_idx != usize::MAX { + return color_idx; + } + } + + usize::MAX + }) +} + +/// Find the appropriate syntax for a language name. +fn find_syntax<'a>(ss: &'a SyntaxSet, lang: &str) -> Option<&'a SyntaxReference> { + // Direct name/token match (syntect APIs are case-insensitive) + if let Some(syn) = ss.find_syntax_by_token(lang) { + return Some(syn); + } + + // Extension-based match + if let Some(syn) = ss.find_syntax_by_extension(lang) { + return Some(syn); + } + + // Alias lookup for languages not in syntect's default set + let alias = find_alias(lang)?; + + ss.find_syntax_by_name(alias) + .or_else(|| ss.find_syntax_by_token(alias)) +} + +/// Highlight code and return ANSI-colored lines. +/// +/// # Arguments +/// * `code` - The source code to highlight +/// * `lang` - Language identifier (e.g., "rust", "typescript", "python") +/// * `colors` - Theme colors as ANSI escape sequences +/// +/// # Returns +/// Highlighted code with ANSI color codes, or the original code if highlighting +/// fails. +#[napi(js_name = "highlightCode")] +pub fn highlight_code(code: String, lang: Option, colors: HighlightColors) -> String { + let inserted = colors.inserted.as_deref().unwrap_or(""); + let deleted = colors.deleted.as_deref().unwrap_or(""); + + // Color palette as array for quick indexing + let palette = [ + colors.comment.as_str(), // 0 + colors.keyword.as_str(), // 1 + colors.function.as_str(), // 2 + colors.variable.as_str(), // 3 + colors.string.as_str(), // 4 + colors.number.as_str(), // 5 + colors.r#type.as_str(), // 6 + colors.operator.as_str(), // 7 + colors.punctuation.as_str(), // 8 + inserted, // 9 + deleted, // 10 + ]; + + let ss = get_syntax_set(); + + // Find syntax for the language + let syntax = match &lang { + Some(l) => find_syntax(ss, l), + None => None, + } + .unwrap_or_else(|| ss.find_syntax_plain_text()); + + let mut parse_state = ParseState::new(syntax); + let mut scope_stack = ScopeStack::new(); + let mut result = String::with_capacity(code.len() * 2); + + for line in syntect::util::LinesWithEndings::from(code.as_str()) { + let Ok(ops) = parse_state.parse_line(line, ss) else { + // Parse error - append unhighlighted line and continue + result.push_str(line); + continue; + }; + + let mut prev_end = 0; + for (offset, op) in ops { + let offset = offset.min(line.len()); + + // Output text BEFORE this operation using current scope + if offset > prev_end { + let text = &line[prev_end..offset]; + let color_idx = scope_to_color_index(&scope_stack); + + if color_idx < palette.len() && !palette[color_idx].is_empty() { + result.push_str(palette[color_idx]); + result.push_str(text); + result.push_str("\x1b[39m"); + } else { + result.push_str(text); + } + } + prev_end = offset; + + // Now apply scope operation for NEXT segment + match op { + ScopeStackOp::Push(scope) => { + scope_stack.push(scope); + }, + ScopeStackOp::Pop(count) => { + for _ in 0..count { + scope_stack.pop(); + } + }, + ScopeStackOp::Restore | ScopeStackOp::Clear(_) | ScopeStackOp::Noop => {}, + } + } + + // Output remaining text with current scope + if prev_end < line.len() { + let text = &line[prev_end..]; + let color_idx = scope_to_color_index(&scope_stack); + + if color_idx < palette.len() && !palette[color_idx].is_empty() { + result.push_str(palette[color_idx]); + result.push_str(text); + result.push_str("\x1b[39m"); + } else { + result.push_str(text); + } + } + } + + result +} + +/// Check if a language is supported for highlighting. +/// Returns true if the language has either direct support or a fallback +/// mapping. +#[napi(js_name = "supportsLanguage")] +pub fn supports_language(lang: String) -> bool { + if is_known_alias(&lang) { + return true; + } + + // Fall back to direct syntax lookup + let ss = get_syntax_set(); + find_syntax(ss, &lang).is_some() +} + +/// Get list of supported languages. +#[napi(js_name = "getSupportedLanguages")] +pub fn get_supported_languages() -> Vec { + let ss = get_syntax_set(); + ss.syntaxes().iter().map(|s| s.name.clone()).collect() +} diff --git a/native/crates/engine/src/lib.rs b/native/crates/engine/src/lib.rs index 82985849b..8ab224c6c 100644 --- a/native/crates/engine/src/lib.rs +++ b/native/crates/engine/src/lib.rs @@ -8,4 +8,5 @@ #![allow(clippy::needless_pass_by_value)] +mod clipboard; mod grep; diff --git a/packages/native/package.json b/packages/native/package.json index 84de3dfb3..a195cc0af 100644 --- a/packages/native/package.json +++ b/packages/native/package.json @@ -1,14 +1,14 @@ { "name": "@gsd/native", "version": "0.1.0", - "description": "Native Rust bindings for GSD — high-performance grep via N-API", + "description": "Native Rust bindings for GSD — high-performance grep and clipboard via N-API", "type": "module", "main": "./src/index.ts", "types": "./src/index.ts", "scripts": { "build:native": "node ../../native/scripts/build.js", "build:native:dev": "node ../../native/scripts/build.js --dev", - "test": "node --test src/__tests__/grep.test.mjs" + "test": "node --test src/__tests__/grep.test.mjs src/__tests__/clipboard.test.mjs" }, "exports": { ".": { @@ -18,6 +18,10 @@ "./grep": { "types": "./src/grep/index.ts", "import": "./src/grep/index.ts" + }, + "./clipboard": { + "types": "./src/clipboard/index.ts", + "import": "./src/clipboard/index.ts" } }, "files": [ diff --git a/packages/native/src/__tests__/clipboard.test.mjs b/packages/native/src/__tests__/clipboard.test.mjs new file mode 100644 index 000000000..cabec6375 --- /dev/null +++ b/packages/native/src/__tests__/clipboard.test.mjs @@ -0,0 +1,80 @@ +import { test, describe } from "node:test"; +import assert from "node:assert/strict"; +import { createRequire } from "node:module"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const require = createRequire(import.meta.url); + +// Load the native addon directly +const addonDir = path.resolve(__dirname, "..", "..", "..", "..", "native", "addon"); +const platformTag = `${process.platform}-${process.arch}`; +const candidates = [ + path.join(addonDir, `gsd_engine.${platformTag}.node`), + path.join(addonDir, "gsd_engine.dev.node"), +]; + +let native; +for (const candidate of candidates) { + try { + native = require(candidate); + break; + } catch { + // try next + } +} + +if (!native) { + console.error("Native addon not found. Run build:native first."); + process.exit(1); +} + +describe("native clipboard: copyToClipboard()", () => { + test("copies text without throwing", () => { + assert.doesNotThrow(() => { + native.copyToClipboard("GSD clipboard test"); + }); + }); + + test("accepts empty string", () => { + assert.doesNotThrow(() => { + native.copyToClipboard(""); + }); + }); + + test("accepts unicode text", () => { + assert.doesNotThrow(() => { + native.copyToClipboard("Hello 世界"); + }); + }); +}); + +describe("native clipboard: readTextFromClipboard()", () => { + test("reads back text that was copied", () => { + const testText = `GSD clipboard roundtrip ${Date.now()}`; + native.copyToClipboard(testText); + const result = native.readTextFromClipboard(); + assert.equal(result, testText); + }); + + test("returns a string or null", () => { + const result = native.readTextFromClipboard(); + assert.ok(result === null || typeof result === "string"); + }); +}); + +describe("native clipboard: readImageFromClipboard()", () => { + test("returns a promise", () => { + const result = native.readImageFromClipboard(); + assert.ok(result instanceof Promise); + }); + + test("resolves to ClipboardImage or null", async () => { + const result = await native.readImageFromClipboard(); + if (result !== null) { + assert.ok(result.data instanceof Uint8Array, "data should be Uint8Array"); + assert.equal(result.mimeType, "image/png"); + } + }); +}); diff --git a/packages/native/src/__tests__/highlight.test.mjs b/packages/native/src/__tests__/highlight.test.mjs new file mode 100644 index 000000000..db16dd5be --- /dev/null +++ b/packages/native/src/__tests__/highlight.test.mjs @@ -0,0 +1,156 @@ +import { test, describe } from "node:test"; +import assert from "node:assert/strict"; +import { createRequire } from "node:module"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const require = createRequire(import.meta.url); + +// Load the native addon directly +const addonDir = path.resolve(__dirname, "..", "..", "..", "..", "native", "addon"); +const platformTag = `${process.platform}-${process.arch}`; +const candidates = [ + path.join(addonDir, `gsd_engine.${platformTag}.node`), + path.join(addonDir, "gsd_engine.dev.node"), +]; + +let native; +for (const candidate of candidates) { + try { + native = require(candidate); + break; + } catch { + // try next + } +} + +if (!native) { + console.error("Native addon not found. Run `npm run build:native -w @gsd/native` first."); + process.exit(1); +} + +const testColors = { + comment: "\x1b[38;2;106;153;85m", + keyword: "\x1b[38;2;197;134;192m", + function: "\x1b[38;2;220;220;170m", + variable: "\x1b[38;2;156;220;254m", + string: "\x1b[38;2;206;145;120m", + number: "\x1b[38;2;181;206;168m", + type: "\x1b[38;2;78;201;176m", + operator: "\x1b[38;2;212;212;212m", + punctuation: "\x1b[38;2;212;212;212m", +}; + +describe("native highlight: highlightCode()", () => { + test("highlights JavaScript code with ANSI colors", () => { + const code = 'const x = 42;\n'; + const result = native.highlightCode(code, "javascript", testColors); + + // Result should contain ANSI escape sequences + assert.ok(result.includes("\x1b["), "should contain ANSI escape codes"); + // Result should contain the original tokens + assert.ok(result.includes("const"), "should contain 'const'"); + assert.ok(result.includes("42"), "should contain '42'"); + // Reset codes should be present + assert.ok(result.includes("\x1b[39m"), "should contain ANSI reset codes"); + }); + + test("returns unhighlighted code for unknown language", () => { + const code = "some random text\n"; + const result = native.highlightCode(code, "nonexistent_lang_xyz", testColors); + + // Plain text syntax should pass through without color codes on plain content + assert.ok(typeof result === "string"); + assert.ok(result.includes("some random text")); + }); + + test("handles null language gracefully", () => { + const code = "hello world\n"; + const result = native.highlightCode(code, null, testColors); + + assert.ok(typeof result === "string"); + assert.ok(result.includes("hello world")); + }); + + test("handles empty code", () => { + const result = native.highlightCode("", "javascript", testColors); + assert.equal(result, ""); + }); + + test("handles multiline code", () => { + const code = 'function foo() {\n return "bar";\n}\n'; + const result = native.highlightCode(code, "javascript", testColors); + + assert.ok(result.includes("function")); + assert.ok(result.includes("foo")); + assert.ok(result.includes("return")); + assert.ok(result.includes('"bar"')); + }); + + test("supports optional inserted/deleted colors", () => { + const colorsWithDiff = { + ...testColors, + inserted: "\x1b[38;2;0;255;0m", + deleted: "\x1b[38;2;255;0;0m", + }; + const code = "+added line\n-removed line\n"; + const result = native.highlightCode(code, "diff", colorsWithDiff); + + assert.ok(typeof result === "string"); + assert.ok(result.length > 0); + }); +}); + +describe("native highlight: supportsLanguage()", () => { + test("returns true for known aliases", () => { + assert.ok(native.supportsLanguage("javascript")); + assert.ok(native.supportsLanguage("typescript")); + assert.ok(native.supportsLanguage("python")); + assert.ok(native.supportsLanguage("rust")); + assert.ok(native.supportsLanguage("go")); + assert.ok(native.supportsLanguage("bash")); + }); + + test("returns true case-insensitively", () => { + assert.ok(native.supportsLanguage("JavaScript")); + assert.ok(native.supportsLanguage("PYTHON")); + assert.ok(native.supportsLanguage("Rust")); + }); + + test("returns true for short aliases", () => { + assert.ok(native.supportsLanguage("ts")); + assert.ok(native.supportsLanguage("py")); + assert.ok(native.supportsLanguage("rs")); + assert.ok(native.supportsLanguage("rb")); + assert.ok(native.supportsLanguage("sh")); + }); + + test("returns false for completely unknown languages", () => { + assert.equal(native.supportsLanguage("nonexistent_lang_xyz"), false); + }); +}); + +describe("native highlight: getSupportedLanguages()", () => { + test("returns an array of language names", () => { + const langs = native.getSupportedLanguages(); + assert.ok(Array.isArray(langs)); + assert.ok(langs.length > 0, "should have at least one language"); + }); + + test("includes common languages", () => { + const langs = native.getSupportedLanguages(); + // These are syntect default syntax names + assert.ok(langs.includes("JavaScript"), "should include JavaScript"); + assert.ok(langs.includes("Python"), "should include Python"); + assert.ok(langs.includes("Rust"), "should include Rust"); + assert.ok(langs.includes("C"), "should include C"); + }); + + test("returns strings", () => { + const langs = native.getSupportedLanguages(); + for (const lang of langs) { + assert.equal(typeof lang, "string"); + } + }); +}); diff --git a/packages/native/src/clipboard/index.ts b/packages/native/src/clipboard/index.ts new file mode 100644 index 000000000..a363942af --- /dev/null +++ b/packages/native/src/clipboard/index.ts @@ -0,0 +1,40 @@ +/** + * Native clipboard access using N-API. + * + * Cross-platform clipboard read/write backed by the `arboard` Rust crate. + * No external tools (pbcopy, xclip, etc.) required. + */ + +import { native } from "../native.js"; +import type { ClipboardImage } from "./types.js"; + +export type { ClipboardImage }; + +/** + * Copy plain text to the system clipboard. + * + * Runs synchronously to avoid macOS AppKit pasteboard warnings + * when writing from worker threads. + */ +export function copyToClipboard(text: string): void { + native.copyToClipboard(text); +} + +/** + * Read plain text from the system clipboard. + * + * Returns `null` when no text data is available. + */ +export function readTextFromClipboard(): string | null { + return native.readTextFromClipboard() as string | null; +} + +/** + * Read an image from the system clipboard. + * + * Returns a Promise that resolves to a `ClipboardImage` (PNG-encoded bytes) + * or `null` when no image data is available. + */ +export function readImageFromClipboard(): Promise { + return native.readImageFromClipboard() as Promise; +} diff --git a/packages/native/src/clipboard/types.ts b/packages/native/src/clipboard/types.ts new file mode 100644 index 000000000..0ca3f508e --- /dev/null +++ b/packages/native/src/clipboard/types.ts @@ -0,0 +1,7 @@ +/** Clipboard image payload encoded as PNG bytes. */ +export interface ClipboardImage { + /** PNG-encoded image bytes. */ + data: Uint8Array; + /** MIME type for the encoded image payload (always "image/png"). */ + mimeType: string; +} diff --git a/packages/native/src/highlight/index.ts b/packages/native/src/highlight/index.ts new file mode 100644 index 000000000..85d3f1d07 --- /dev/null +++ b/packages/native/src/highlight/index.ts @@ -0,0 +1,44 @@ +/** + * Syntect-based syntax highlighting via N-API. + * + * Provides ANSI-colored output for code blocks using semantic scope matching + * across 11 token categories. + */ + +import { native } from "../native.js"; +import type { HighlightColors } from "./types.js"; + +export type { HighlightColors }; + +/** + * Highlight source code and return ANSI-colored output. + * + * @param code - The source code to highlight + * @param lang - Language identifier (e.g., "rust", "typescript", "python"), or null for plain text + * @param colors - Theme colors as ANSI escape sequences + * @returns Highlighted code with ANSI color codes + */ +export function highlightCode( + code: string, + lang: string | null, + colors: HighlightColors, +): string { + return native.highlightCode(code, lang, colors) as string; +} + +/** + * Check if a language is supported for highlighting. + * + * Returns true if the language has either direct syntect support or a + * fallback alias mapping. + */ +export function supportsLanguage(lang: string): boolean { + return native.supportsLanguage(lang) as boolean; +} + +/** + * Get list of all supported language names from syntect's default syntax set. + */ +export function getSupportedLanguages(): string[] { + return native.getSupportedLanguages() as string[]; +} diff --git a/packages/native/src/highlight/types.ts b/packages/native/src/highlight/types.ts new file mode 100644 index 000000000..deae5267e --- /dev/null +++ b/packages/native/src/highlight/types.ts @@ -0,0 +1,25 @@ +/** Theme colors for syntax highlighting as ANSI escape sequences. */ +export interface HighlightColors { + /** ANSI color for comments. */ + comment: string; + /** ANSI color for keywords. */ + keyword: string; + /** ANSI color for function names. */ + function: string; + /** ANSI color for variables and identifiers. */ + variable: string; + /** ANSI color for string literals. */ + string: string; + /** ANSI color for numeric literals. */ + number: string; + /** ANSI color for type identifiers. */ + type: string; + /** ANSI color for operators. */ + operator: string; + /** ANSI color for punctuation tokens. */ + punctuation: string; + /** ANSI color for diff inserted lines. */ + inserted?: string; + /** ANSI color for diff deleted lines. */ + deleted?: string; +} diff --git a/packages/native/src/index.ts b/packages/native/src/index.ts index 3c5cfdf83..4e3737609 100644 --- a/packages/native/src/index.ts +++ b/packages/native/src/index.ts @@ -2,9 +2,17 @@ * @gsd/native — High-performance Rust modules exposed via N-API. * * Modules: + * - clipboard: native clipboard access (text + image) * - grep: ripgrep-backed regex search (content + filesystem) */ +export { + copyToClipboard, + readTextFromClipboard, + readImageFromClipboard, +} from "./clipboard/index.js"; +export type { ClipboardImage } from "./clipboard/index.js"; + export { searchContent, grep } from "./grep/index.js"; export type { ContextLine, diff --git a/packages/native/src/native.ts b/packages/native/src/native.ts index 93aa1a09d..613fe3aea 100644 --- a/packages/native/src/native.ts +++ b/packages/native/src/native.ts @@ -43,4 +43,7 @@ function loadNative(): Record { export const native = loadNative() as { search: (content: Buffer | Uint8Array, options: unknown) => unknown; grep: (options: unknown) => unknown; + copyToClipboard: (text: string) => void; + readTextFromClipboard: () => string | null; + readImageFromClipboard: () => Promise; }; From 0b288f389fb57b85caa3dbd8b9d93802a6e37bce Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Fri, 13 Mar 2026 12:40:22 -0600 Subject: [PATCH 2/7] feat: add html-to-markdown native module Port HTML-to-Markdown conversion from Oh My Pi's html module using html-to-markdown-rs. Exposes `htmlToMarkdown()` via N-API with options for content cleaning (strip nav/forms/headers/footers) and image skipping. Co-Authored-By: Claude Opus 4.6 (1M context) --- native/Cargo.lock | 931 ++++++++++++++++++++ native/crates/engine/Cargo.toml | 1 + native/crates/engine/src/html.rs | 44 + native/crates/engine/src/lib.rs | 1 + packages/native/package.json | 6 +- packages/native/src/__tests__/html.test.mjs | 98 +++ packages/native/src/html/index.ts | 24 + packages/native/src/html/types.ts | 7 + packages/native/src/index.ts | 4 + packages/native/src/native.ts | 1 + 10 files changed, 1116 insertions(+), 1 deletion(-) create mode 100644 native/crates/engine/src/html.rs create mode 100644 packages/native/src/__tests__/html.test.mjs create mode 100644 packages/native/src/html/index.ts create mode 100644 packages/native/src/html/types.ts diff --git a/native/Cargo.lock b/native/Cargo.lock index ba8fa03da..be0931d5b 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -2,6 +2,19 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "const-random", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -11,6 +24,54 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "ast-grep-core" +version = "0.39.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "057ae90e7256ebf85f840b1638268df0142c9d19467d500b790631fd301acc27" +dependencies = [ + "bit-set", + "regex", + "thiserror", + "tree-sitter", +] + +[[package]] +name = "astral-tl" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d90933ffb0f97e2fc2e0de21da9d3f20597b804012d199843a6fe7c2810d28f3" +dependencies = [ + "memchr", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "2.11.0" @@ -28,12 +89,42 @@ dependencies = [ "serde", ] +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + [[package]] name = "convert_case" version = "0.6.0" @@ -68,6 +159,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "ctor" version = "0.2.9" @@ -102,6 +199,41 @@ dependencies = [ "encoding_rs", ] +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "globset" version = "0.4.18" @@ -152,11 +284,62 @@ dependencies = [ "memmap2", ] +[[package]] +name = "gsd-ast" +version = "0.1.0" +dependencies = [ + "ast-grep-core", + "globset", + "ignore", + "napi", + "napi-derive", + "phf", + "tree-sitter", + "tree-sitter-bash", + "tree-sitter-c", + "tree-sitter-c-sharp", + "tree-sitter-cpp", + "tree-sitter-css", + "tree-sitter-diff", + "tree-sitter-elixir", + "tree-sitter-go", + "tree-sitter-haskell", + "tree-sitter-hcl", + "tree-sitter-html", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-json", + "tree-sitter-julia", + "tree-sitter-kotlin-sg", + "tree-sitter-lua", + "tree-sitter-make", + "tree-sitter-md", + "tree-sitter-nix", + "tree-sitter-objc", + "tree-sitter-odin", + "tree-sitter-php", + "tree-sitter-python", + "tree-sitter-regex", + "tree-sitter-ruby", + "tree-sitter-rust", + "tree-sitter-scala", + "tree-sitter-solidity", + "tree-sitter-starlark", + "tree-sitter-swift", + "tree-sitter-toml-ng", + "tree-sitter-typescript", + "tree-sitter-verilog", + "tree-sitter-xml", + "tree-sitter-yaml", + "tree-sitter-zig", +] + [[package]] name = "gsd-engine" version = "0.1.0" dependencies = [ "gsd-grep", + "html-to-markdown-rs", "napi", "napi-build", "napi-derive", @@ -173,6 +356,53 @@ dependencies = [ "rayon", ] +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "html-escape" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476" +dependencies = [ + "utf8-width", +] + +[[package]] +name = "html-to-markdown-rs" +version = "2.28.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9377e16af590b764fd98fd176027cf8831c5335f8964f3f643753e38913a4e" +dependencies = [ + "ahash", + "astral-tl", + "base64", + "html-escape", + "html5ever", + "lru", + "once_cell", + "regex", + "thiserror", +] + +[[package]] +name = "html5ever" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1054432bae2f14e0061e33d23402fbaa67a921d319d56adc6bcf887ddad1cbc2" +dependencies = [ + "log", + "markup5ever", +] + [[package]] name = "ignore" version = "0.4.25" @@ -189,6 +419,22 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + [[package]] name = "libc" version = "0.2.183" @@ -205,12 +451,41 @@ dependencies = [ "windows-link", ] +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + [[package]] name = "log" version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "lru" +version = "0.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1dc47f592c06f33f8e3aea9591776ec7c9f9e4124778ff8a3c3b87159f7e593" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "markup5ever" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8983d30f2915feeaaab2d6babdd6bc7e9ed1a00b66b5e6d74df19aa9c0e91862" +dependencies = [ + "log", + "tendril", + "web_atoms", +] + [[package]] name = "memchr" version = "2.8.0" @@ -283,12 +558,100 @@ dependencies = [ "libloading", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "once_cell" version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros", + "phf_shared", + "serde", +] + +[[package]] +name = "phf_codegen" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro2" version = "1.0.106" @@ -327,6 +690,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + [[package]] name = "regex" version = "1.12.3" @@ -365,6 +737,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "semver" version = "1.0.27" @@ -400,6 +778,68 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "indexmap", + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "string_cache" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a18596f8c785a729f2819c0f6a7eae6ebeebdfffbfe4214ae6b087f690e31901" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared", + "precomputed-hash", +] + +[[package]] +name = "string_cache_codegen" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "585635e46db231059f76c5849798146164652513eb9e8ab2685939dd90f29b69" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + [[package]] name = "syn" version = "2.0.117" @@ -411,6 +851,435 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tendril" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4790fc369d5a530f4b544b094e31388b9b3a37c0f4652ade4505945f5660d24" +dependencies = [ + "new_debug_unreachable", + "utf-8", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tree-sitter" +version = "0.25.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-bash" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5ec769279cc91b561d3df0d8a5deb26b0ad40d183127f409494d6d8fc53062" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c-sharp" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67f06accca7b45351758663b8215089e643d53bd9a660ce0349314263737fcb0" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-css" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5cbc5e18f29a2c6d6435891f42569525cf95435a3e01c2f1947abcde178686f" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-diff" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfe1e5ca280a65dfe5ba4205c1bcc84edf486464fed315db53dee6da9a335889" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-elixir" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66dd064a762ed95bfc29857fa3cb7403bb1e5cb88112de0f6341b7e47284ba40" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-go" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8560a4d2f835cc0d4d2c2e03cbd0dde2f6114b43bc491164238d333e28b16ea" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-haskell" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "977c51e504548cba13fc27cb5a2edab2124cf6716a1934915d07ab99523b05a4" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-hcl" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a7b2cc3d7121553b84309fab9d11b3ff3d420403eef9ae50f9fd1cd9d9cf012" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-html" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261b708e5d92061ede329babaaa427b819329a9d427a1d710abb0f67bbef63ee" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-java" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-json" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86a5d6b3ea17e06e7a34aabeadd68f5866c0d0f9359155d432095f8b751865e4" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-julia" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4144731a178812ee867619b1e98b3b91e54c1652304b26e5ebe3175b701de323" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-kotlin-sg" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0e175b7530765d1e36ad234a7acaa8b2a3316153f239d724376c7ee5e8d8e98" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" + +[[package]] +name = "tree-sitter-lua" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb9adf0965fec58e7660cbb3a059dbb12ebeec9459e6dcbae3db004739641e" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-make" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5998dc7cbcbdab19fae8aefef982bf2d6544513d8d2e69cc44aec4c63810104" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-md" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2efd398be546456c814598ee56c0f51769a77241511b4a58077815d120afa882" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-nix" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4952a9733f3a98f6683a0ccd1035d84ab7a52f7e84eeed58548d86765ad92de3" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-objc" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca8bb556423fc176f0535e79d525f783a6684d3c9da81bf9d905303c129e1d2" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-odin" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24db210fe9ba2237c71c5030d7b146c7025420ba72dd8013d13cd822c3a8d77a" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-php" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c17c3ab69052c5eeaa7ff5cd972dd1bc25d1b97ee779fec391ad3b5df5592" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-python" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-regex" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd8a59be9f0ac131fd8f062eaaba14882b2fa5a6a7882a20134cb1d60df2e625" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-ruby" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9b18034c684a2420722be8b2a91c9c44f2546b631c039edf575ccba8c61be1" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-scala" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b4f354028b5fcf1d0c77f1c6d84cd5a579f29a1e43cb61551ec6580e9a99229" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-solidity" +version = "1.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eacf8875b70879f0cb670c60b233ad0b68752d9e1474e6c3ef168eea8a90b25" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-starlark" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8934f282d085cc4b9ee28aa688aa3fbe8aa3766201c2a6252f411d45b4c3a721" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-swift" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ef216011c3e3df4fa864736f347cb8d509b1066cf0c8549fb1fd81ac9832e59" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-toml-ng" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9adc2c898ae49730e857d75be403da3f92bb81d8e37a2f918a08dd10de5ebb1" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-verilog" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e7e0360395852f1f6ff5b7b82c72dc6557d181073188df1d60ec469ea69c66" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-xml" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e670041f591d994f54d597ddcd8f4ebc930e282c4c76a42268743b71f0c8b6b3" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-yaml" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53c223db85f05e34794f065454843b0668ebc15d240ada63e2b5939f43ce7c97" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-zig" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab11fc124851b0db4dd5e55983bbd9631192e93238389dcd44521715e5d53e28" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "unicode-ident" version = "1.0.24" @@ -423,6 +1292,24 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "utf8-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1292c0d970b54115d14f2492fe0170adf21d68a1de108eebc51c1df4f346a091" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "walkdir" version = "2.5.0" @@ -433,6 +1320,24 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "web_atoms" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57a9779e9f04d2ac1ce317aee707aa2f6b773afba7b931222bff6983843b1576" +dependencies = [ + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", +] + [[package]] name = "winapi-util" version = "0.1.11" @@ -456,3 +1361,29 @@ checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ "windows-link", ] + +[[package]] +name = "zerocopy" +version = "0.8.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/native/crates/engine/Cargo.toml b/native/crates/engine/Cargo.toml index dcd61ef0c..a6daaca0a 100644 --- a/native/crates/engine/Cargo.toml +++ b/native/crates/engine/Cargo.toml @@ -12,6 +12,7 @@ crate-type = ["cdylib"] [dependencies] gsd-grep = { path = "../grep" } +html-to-markdown-rs = { version = "2", default-features = false } napi = { version = "2", features = ["napi8"] } napi-derive = "2" diff --git a/native/crates/engine/src/html.rs b/native/crates/engine/src/html.rs new file mode 100644 index 000000000..2cc44c047 --- /dev/null +++ b/native/crates/engine/src/html.rs @@ -0,0 +1,44 @@ +//! HTML to Markdown conversion via N-API. +//! +//! Wraps `html-to-markdown-rs` and exposes it as a JS-callable N-API export. + +use html_to_markdown_rs::{convert, ConversionOptions, PreprocessingOptions, PreprocessingPreset}; +use napi::bindgen_prelude::*; +use napi_derive::napi; + +/// Options for HTML to Markdown conversion. +#[napi(object)] +#[derive(Debug, Default)] +pub struct HtmlToMarkdownOptions { + /// Remove navigation elements, forms, headers, footers. + #[napi(js_name = "cleanContent")] + pub clean_content: Option, + /// Skip images during conversion. + #[napi(js_name = "skipImages")] + pub skip_images: Option, +} + +/// Convert HTML source to Markdown with optional preprocessing. +/// +/// Strips boilerplate (nav, forms, headers, footers) when `cleanContent` is true. +/// Returns the Markdown string. +#[napi(js_name = "htmlToMarkdown")] +pub fn html_to_markdown(html: String, options: Option) -> Result { + let options = options.unwrap_or_default(); + let clean_content = options.clean_content.unwrap_or(false); + let skip_images = options.skip_images.unwrap_or(false); + + let conversion_opts = ConversionOptions { + skip_images, + preprocessing: PreprocessingOptions { + enabled: clean_content, + preset: PreprocessingPreset::Aggressive, + remove_navigation: true, + remove_forms: true, + }, + ..Default::default() + }; + + convert(&html, Some(conversion_opts)) + .map_err(|err| Error::from_reason(format!("HTML conversion error: {err}"))) +} diff --git a/native/crates/engine/src/lib.rs b/native/crates/engine/src/lib.rs index 82985849b..f4dca80dc 100644 --- a/native/crates/engine/src/lib.rs +++ b/native/crates/engine/src/lib.rs @@ -9,3 +9,4 @@ #![allow(clippy::needless_pass_by_value)] mod grep; +mod html; diff --git a/packages/native/package.json b/packages/native/package.json index 84de3dfb3..f81b574d6 100644 --- a/packages/native/package.json +++ b/packages/native/package.json @@ -8,7 +8,7 @@ "scripts": { "build:native": "node ../../native/scripts/build.js", "build:native:dev": "node ../../native/scripts/build.js --dev", - "test": "node --test src/__tests__/grep.test.mjs" + "test": "node --test src/__tests__/grep.test.mjs src/__tests__/html.test.mjs" }, "exports": { ".": { @@ -18,6 +18,10 @@ "./grep": { "types": "./src/grep/index.ts", "import": "./src/grep/index.ts" + }, + "./html": { + "types": "./src/html/index.ts", + "import": "./src/html/index.ts" } }, "files": [ diff --git a/packages/native/src/__tests__/html.test.mjs b/packages/native/src/__tests__/html.test.mjs new file mode 100644 index 000000000..31e21c463 --- /dev/null +++ b/packages/native/src/__tests__/html.test.mjs @@ -0,0 +1,98 @@ +import { test, describe } from "node:test"; +import assert from "node:assert/strict"; +import { createRequire } from "node:module"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const require = createRequire(import.meta.url); + +const addonDir = path.resolve(__dirname, "..", "..", "..", "..", "native", "addon"); +const platformTag = `${process.platform}-${process.arch}`; +const candidates = [ + path.join(addonDir, `gsd_engine.${platformTag}.node`), + path.join(addonDir, "gsd_engine.dev.node"), +]; + +let native; +for (const candidate of candidates) { + try { + native = require(candidate); + break; + } catch { + // try next + } +} + +if (!native) { + console.error("Native addon not found. Run `npm run build:native -w @gsd/native` first."); + process.exit(1); +} + +describe("native html: htmlToMarkdown()", () => { + test("converts basic HTML to markdown", () => { + const html = "

Hello

World

"; + const result = native.htmlToMarkdown(html); + assert.ok(result.includes("Hello"), "Should contain heading text"); + assert.ok(result.includes("World"), "Should contain paragraph text"); + }); + + test("converts links to markdown links", () => { + const html = '

Visit Example

'; + const result = native.htmlToMarkdown(html); + assert.ok(result.includes("[Example]"), "Should contain markdown link text"); + assert.ok(result.includes("(https://example.com)"), "Should contain markdown link URL"); + }); + + test("converts lists to markdown", () => { + const html = "
  • First
  • Second
  • Third
"; + const result = native.htmlToMarkdown(html); + assert.ok(result.includes("First"), "Should contain first item"); + assert.ok(result.includes("Second"), "Should contain second item"); + assert.ok(result.includes("Third"), "Should contain third item"); + }); + + test("converts bold and italic", () => { + const html = "

bold and italic

"; + const result = native.htmlToMarkdown(html); + assert.ok(result.includes("**bold**") || result.includes("__bold__"), "Should contain bold"); + assert.ok(result.includes("*italic*") || result.includes("_italic_"), "Should contain italic"); + }); + + test("handles empty HTML", () => { + const result = native.htmlToMarkdown(""); + assert.equal(typeof result, "string"); + }); + + test("handles plain text", () => { + const result = native.htmlToMarkdown("Just plain text"); + assert.ok(result.includes("Just plain text"), "Should preserve plain text"); + }); + + test("accepts skipImages option", () => { + const html = '

Title

Content with photo image

'; + const result = native.htmlToMarkdown(html, { skipImages: true }); + assert.ok(result.includes("Title"), "Should contain heading"); + assert.ok(result.includes("Content"), "Should contain paragraph text"); + }); + + test("accepts cleanContent option", () => { + const html = '

Article

Body text.

Copyright
'; + const result = native.htmlToMarkdown(html, { cleanContent: true }); + assert.ok(result.includes("Article") || result.includes("Body text"), "Should contain main content"); + }); + + test("converts code blocks", () => { + const html = "
const x = 1;
"; + const result = native.htmlToMarkdown(html); + assert.ok(result.includes("const x = 1;"), "Should contain code content"); + }); + + test("converts complex nested HTML", () => { + const html = '

Section

Text with bold link.

  • Item one
  • Item two
'; + const result = native.htmlToMarkdown(html); + assert.ok(result.includes("Section"), "Should contain heading"); + assert.ok(result.includes("example.com"), "Should contain link"); + assert.ok(result.includes("one"), "Should contain list items"); + }); +}); diff --git a/packages/native/src/html/index.ts b/packages/native/src/html/index.ts new file mode 100644 index 000000000..28886b7a2 --- /dev/null +++ b/packages/native/src/html/index.ts @@ -0,0 +1,24 @@ +/** + * HTML to Markdown conversion via native Rust bindings. + * + * Uses `html-to-markdown-rs` under the hood for high-performance + * conversion with optional content cleaning (stripping nav, forms, etc.). + */ + +import { native } from "../native.js"; +import type { HtmlToMarkdownOptions } from "./types.js"; + +export type { HtmlToMarkdownOptions }; + +/** + * Convert an HTML string to Markdown. + * + * When `cleanContent` is true, boilerplate elements (nav, forms, headers, + * footers) are stripped before conversion. + */ +export function htmlToMarkdown( + html: string, + options?: HtmlToMarkdownOptions, +): string { + return native.htmlToMarkdown(html, options ?? {}) as string; +} diff --git a/packages/native/src/html/types.ts b/packages/native/src/html/types.ts new file mode 100644 index 000000000..a8984c7a8 --- /dev/null +++ b/packages/native/src/html/types.ts @@ -0,0 +1,7 @@ +/** Options for HTML to Markdown conversion. */ +export interface HtmlToMarkdownOptions { + /** Remove navigation elements, forms, headers, footers. */ + cleanContent?: boolean; + /** Skip images during conversion. */ + skipImages?: boolean; +} diff --git a/packages/native/src/index.ts b/packages/native/src/index.ts index 3c5cfdf83..8cbc7f81d 100644 --- a/packages/native/src/index.ts +++ b/packages/native/src/index.ts @@ -3,6 +3,7 @@ * * Modules: * - grep: ripgrep-backed regex search (content + filesystem) + * - html: HTML to Markdown conversion */ export { searchContent, grep } from "./grep/index.js"; @@ -15,3 +16,6 @@ export type { SearchOptions, SearchResult, } from "./grep/index.js"; + +export { htmlToMarkdown } from "./html/index.js"; +export type { HtmlToMarkdownOptions } from "./html/index.js"; diff --git a/packages/native/src/native.ts b/packages/native/src/native.ts index 93aa1a09d..17a4aa1b2 100644 --- a/packages/native/src/native.ts +++ b/packages/native/src/native.ts @@ -43,4 +43,5 @@ function loadNative(): Record { export const native = loadNative() as { search: (content: Buffer | Uint8Array, options: unknown) => unknown; grep: (options: unknown) => unknown; + htmlToMarkdown: (html: string, options: unknown) => unknown; }; From a74d2061c13be7accf0ff9c892aea61bf6656f5d Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Fri, 13 Mar 2026 12:40:22 -0600 Subject: [PATCH 3/7] feat: add html-to-markdown native module Port HTML-to-Markdown conversion from Oh My Pi's html module using html-to-markdown-rs. Exposes `htmlToMarkdown()` via N-API with options for content cleaning (strip nav/forms/headers/footers) and image skipping. Co-Authored-By: Claude Opus 4.6 (1M context) --- native/Cargo.lock | 931 ++++++++++++++++++++ native/crates/engine/Cargo.toml | 1 + native/crates/engine/src/html.rs | 44 + native/crates/engine/src/lib.rs | 1 + packages/native/package.json | 6 +- packages/native/src/__tests__/html.test.mjs | 98 +++ packages/native/src/html/index.ts | 24 + packages/native/src/html/types.ts | 7 + packages/native/src/index.ts | 4 + packages/native/src/native.ts | 1 + 10 files changed, 1116 insertions(+), 1 deletion(-) create mode 100644 native/crates/engine/src/html.rs create mode 100644 packages/native/src/__tests__/html.test.mjs create mode 100644 packages/native/src/html/index.ts create mode 100644 packages/native/src/html/types.ts diff --git a/native/Cargo.lock b/native/Cargo.lock index ba8fa03da..be0931d5b 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -2,6 +2,19 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "const-random", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -11,6 +24,54 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "ast-grep-core" +version = "0.39.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "057ae90e7256ebf85f840b1638268df0142c9d19467d500b790631fd301acc27" +dependencies = [ + "bit-set", + "regex", + "thiserror", + "tree-sitter", +] + +[[package]] +name = "astral-tl" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d90933ffb0f97e2fc2e0de21da9d3f20597b804012d199843a6fe7c2810d28f3" +dependencies = [ + "memchr", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "2.11.0" @@ -28,12 +89,42 @@ dependencies = [ "serde", ] +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + [[package]] name = "convert_case" version = "0.6.0" @@ -68,6 +159,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "ctor" version = "0.2.9" @@ -102,6 +199,41 @@ dependencies = [ "encoding_rs", ] +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "globset" version = "0.4.18" @@ -152,11 +284,62 @@ dependencies = [ "memmap2", ] +[[package]] +name = "gsd-ast" +version = "0.1.0" +dependencies = [ + "ast-grep-core", + "globset", + "ignore", + "napi", + "napi-derive", + "phf", + "tree-sitter", + "tree-sitter-bash", + "tree-sitter-c", + "tree-sitter-c-sharp", + "tree-sitter-cpp", + "tree-sitter-css", + "tree-sitter-diff", + "tree-sitter-elixir", + "tree-sitter-go", + "tree-sitter-haskell", + "tree-sitter-hcl", + "tree-sitter-html", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-json", + "tree-sitter-julia", + "tree-sitter-kotlin-sg", + "tree-sitter-lua", + "tree-sitter-make", + "tree-sitter-md", + "tree-sitter-nix", + "tree-sitter-objc", + "tree-sitter-odin", + "tree-sitter-php", + "tree-sitter-python", + "tree-sitter-regex", + "tree-sitter-ruby", + "tree-sitter-rust", + "tree-sitter-scala", + "tree-sitter-solidity", + "tree-sitter-starlark", + "tree-sitter-swift", + "tree-sitter-toml-ng", + "tree-sitter-typescript", + "tree-sitter-verilog", + "tree-sitter-xml", + "tree-sitter-yaml", + "tree-sitter-zig", +] + [[package]] name = "gsd-engine" version = "0.1.0" dependencies = [ "gsd-grep", + "html-to-markdown-rs", "napi", "napi-build", "napi-derive", @@ -173,6 +356,53 @@ dependencies = [ "rayon", ] +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "html-escape" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476" +dependencies = [ + "utf8-width", +] + +[[package]] +name = "html-to-markdown-rs" +version = "2.28.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9377e16af590b764fd98fd176027cf8831c5335f8964f3f643753e38913a4e" +dependencies = [ + "ahash", + "astral-tl", + "base64", + "html-escape", + "html5ever", + "lru", + "once_cell", + "regex", + "thiserror", +] + +[[package]] +name = "html5ever" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1054432bae2f14e0061e33d23402fbaa67a921d319d56adc6bcf887ddad1cbc2" +dependencies = [ + "log", + "markup5ever", +] + [[package]] name = "ignore" version = "0.4.25" @@ -189,6 +419,22 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + [[package]] name = "libc" version = "0.2.183" @@ -205,12 +451,41 @@ dependencies = [ "windows-link", ] +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + [[package]] name = "log" version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "lru" +version = "0.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1dc47f592c06f33f8e3aea9591776ec7c9f9e4124778ff8a3c3b87159f7e593" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "markup5ever" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8983d30f2915feeaaab2d6babdd6bc7e9ed1a00b66b5e6d74df19aa9c0e91862" +dependencies = [ + "log", + "tendril", + "web_atoms", +] + [[package]] name = "memchr" version = "2.8.0" @@ -283,12 +558,100 @@ dependencies = [ "libloading", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "once_cell" version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros", + "phf_shared", + "serde", +] + +[[package]] +name = "phf_codegen" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro2" version = "1.0.106" @@ -327,6 +690,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + [[package]] name = "regex" version = "1.12.3" @@ -365,6 +737,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "semver" version = "1.0.27" @@ -400,6 +778,68 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "indexmap", + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "string_cache" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a18596f8c785a729f2819c0f6a7eae6ebeebdfffbfe4214ae6b087f690e31901" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared", + "precomputed-hash", +] + +[[package]] +name = "string_cache_codegen" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "585635e46db231059f76c5849798146164652513eb9e8ab2685939dd90f29b69" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + [[package]] name = "syn" version = "2.0.117" @@ -411,6 +851,435 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tendril" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4790fc369d5a530f4b544b094e31388b9b3a37c0f4652ade4505945f5660d24" +dependencies = [ + "new_debug_unreachable", + "utf-8", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tree-sitter" +version = "0.25.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-bash" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5ec769279cc91b561d3df0d8a5deb26b0ad40d183127f409494d6d8fc53062" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c-sharp" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67f06accca7b45351758663b8215089e643d53bd9a660ce0349314263737fcb0" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-css" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5cbc5e18f29a2c6d6435891f42569525cf95435a3e01c2f1947abcde178686f" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-diff" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfe1e5ca280a65dfe5ba4205c1bcc84edf486464fed315db53dee6da9a335889" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-elixir" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66dd064a762ed95bfc29857fa3cb7403bb1e5cb88112de0f6341b7e47284ba40" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-go" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8560a4d2f835cc0d4d2c2e03cbd0dde2f6114b43bc491164238d333e28b16ea" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-haskell" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "977c51e504548cba13fc27cb5a2edab2124cf6716a1934915d07ab99523b05a4" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-hcl" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a7b2cc3d7121553b84309fab9d11b3ff3d420403eef9ae50f9fd1cd9d9cf012" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-html" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261b708e5d92061ede329babaaa427b819329a9d427a1d710abb0f67bbef63ee" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-java" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-json" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86a5d6b3ea17e06e7a34aabeadd68f5866c0d0f9359155d432095f8b751865e4" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-julia" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4144731a178812ee867619b1e98b3b91e54c1652304b26e5ebe3175b701de323" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-kotlin-sg" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0e175b7530765d1e36ad234a7acaa8b2a3316153f239d724376c7ee5e8d8e98" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" + +[[package]] +name = "tree-sitter-lua" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb9adf0965fec58e7660cbb3a059dbb12ebeec9459e6dcbae3db004739641e" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-make" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5998dc7cbcbdab19fae8aefef982bf2d6544513d8d2e69cc44aec4c63810104" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-md" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2efd398be546456c814598ee56c0f51769a77241511b4a58077815d120afa882" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-nix" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4952a9733f3a98f6683a0ccd1035d84ab7a52f7e84eeed58548d86765ad92de3" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-objc" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca8bb556423fc176f0535e79d525f783a6684d3c9da81bf9d905303c129e1d2" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-odin" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24db210fe9ba2237c71c5030d7b146c7025420ba72dd8013d13cd822c3a8d77a" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-php" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c17c3ab69052c5eeaa7ff5cd972dd1bc25d1b97ee779fec391ad3b5df5592" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-python" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-regex" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd8a59be9f0ac131fd8f062eaaba14882b2fa5a6a7882a20134cb1d60df2e625" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-ruby" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9b18034c684a2420722be8b2a91c9c44f2546b631c039edf575ccba8c61be1" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-scala" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b4f354028b5fcf1d0c77f1c6d84cd5a579f29a1e43cb61551ec6580e9a99229" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-solidity" +version = "1.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eacf8875b70879f0cb670c60b233ad0b68752d9e1474e6c3ef168eea8a90b25" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-starlark" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8934f282d085cc4b9ee28aa688aa3fbe8aa3766201c2a6252f411d45b4c3a721" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-swift" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ef216011c3e3df4fa864736f347cb8d509b1066cf0c8549fb1fd81ac9832e59" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-toml-ng" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9adc2c898ae49730e857d75be403da3f92bb81d8e37a2f918a08dd10de5ebb1" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-verilog" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e7e0360395852f1f6ff5b7b82c72dc6557d181073188df1d60ec469ea69c66" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-xml" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e670041f591d994f54d597ddcd8f4ebc930e282c4c76a42268743b71f0c8b6b3" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-yaml" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53c223db85f05e34794f065454843b0668ebc15d240ada63e2b5939f43ce7c97" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-zig" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab11fc124851b0db4dd5e55983bbd9631192e93238389dcd44521715e5d53e28" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "unicode-ident" version = "1.0.24" @@ -423,6 +1292,24 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "utf8-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1292c0d970b54115d14f2492fe0170adf21d68a1de108eebc51c1df4f346a091" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "walkdir" version = "2.5.0" @@ -433,6 +1320,24 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "web_atoms" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57a9779e9f04d2ac1ce317aee707aa2f6b773afba7b931222bff6983843b1576" +dependencies = [ + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", +] + [[package]] name = "winapi-util" version = "0.1.11" @@ -456,3 +1361,29 @@ checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ "windows-link", ] + +[[package]] +name = "zerocopy" +version = "0.8.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/native/crates/engine/Cargo.toml b/native/crates/engine/Cargo.toml index dcd61ef0c..a6daaca0a 100644 --- a/native/crates/engine/Cargo.toml +++ b/native/crates/engine/Cargo.toml @@ -12,6 +12,7 @@ crate-type = ["cdylib"] [dependencies] gsd-grep = { path = "../grep" } +html-to-markdown-rs = { version = "2", default-features = false } napi = { version = "2", features = ["napi8"] } napi-derive = "2" diff --git a/native/crates/engine/src/html.rs b/native/crates/engine/src/html.rs new file mode 100644 index 000000000..2cc44c047 --- /dev/null +++ b/native/crates/engine/src/html.rs @@ -0,0 +1,44 @@ +//! HTML to Markdown conversion via N-API. +//! +//! Wraps `html-to-markdown-rs` and exposes it as a JS-callable N-API export. + +use html_to_markdown_rs::{convert, ConversionOptions, PreprocessingOptions, PreprocessingPreset}; +use napi::bindgen_prelude::*; +use napi_derive::napi; + +/// Options for HTML to Markdown conversion. +#[napi(object)] +#[derive(Debug, Default)] +pub struct HtmlToMarkdownOptions { + /// Remove navigation elements, forms, headers, footers. + #[napi(js_name = "cleanContent")] + pub clean_content: Option, + /// Skip images during conversion. + #[napi(js_name = "skipImages")] + pub skip_images: Option, +} + +/// Convert HTML source to Markdown with optional preprocessing. +/// +/// Strips boilerplate (nav, forms, headers, footers) when `cleanContent` is true. +/// Returns the Markdown string. +#[napi(js_name = "htmlToMarkdown")] +pub fn html_to_markdown(html: String, options: Option) -> Result { + let options = options.unwrap_or_default(); + let clean_content = options.clean_content.unwrap_or(false); + let skip_images = options.skip_images.unwrap_or(false); + + let conversion_opts = ConversionOptions { + skip_images, + preprocessing: PreprocessingOptions { + enabled: clean_content, + preset: PreprocessingPreset::Aggressive, + remove_navigation: true, + remove_forms: true, + }, + ..Default::default() + }; + + convert(&html, Some(conversion_opts)) + .map_err(|err| Error::from_reason(format!("HTML conversion error: {err}"))) +} diff --git a/native/crates/engine/src/lib.rs b/native/crates/engine/src/lib.rs index 82985849b..f4dca80dc 100644 --- a/native/crates/engine/src/lib.rs +++ b/native/crates/engine/src/lib.rs @@ -9,3 +9,4 @@ #![allow(clippy::needless_pass_by_value)] mod grep; +mod html; diff --git a/packages/native/package.json b/packages/native/package.json index 84de3dfb3..f81b574d6 100644 --- a/packages/native/package.json +++ b/packages/native/package.json @@ -8,7 +8,7 @@ "scripts": { "build:native": "node ../../native/scripts/build.js", "build:native:dev": "node ../../native/scripts/build.js --dev", - "test": "node --test src/__tests__/grep.test.mjs" + "test": "node --test src/__tests__/grep.test.mjs src/__tests__/html.test.mjs" }, "exports": { ".": { @@ -18,6 +18,10 @@ "./grep": { "types": "./src/grep/index.ts", "import": "./src/grep/index.ts" + }, + "./html": { + "types": "./src/html/index.ts", + "import": "./src/html/index.ts" } }, "files": [ diff --git a/packages/native/src/__tests__/html.test.mjs b/packages/native/src/__tests__/html.test.mjs new file mode 100644 index 000000000..31e21c463 --- /dev/null +++ b/packages/native/src/__tests__/html.test.mjs @@ -0,0 +1,98 @@ +import { test, describe } from "node:test"; +import assert from "node:assert/strict"; +import { createRequire } from "node:module"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const require = createRequire(import.meta.url); + +const addonDir = path.resolve(__dirname, "..", "..", "..", "..", "native", "addon"); +const platformTag = `${process.platform}-${process.arch}`; +const candidates = [ + path.join(addonDir, `gsd_engine.${platformTag}.node`), + path.join(addonDir, "gsd_engine.dev.node"), +]; + +let native; +for (const candidate of candidates) { + try { + native = require(candidate); + break; + } catch { + // try next + } +} + +if (!native) { + console.error("Native addon not found. Run `npm run build:native -w @gsd/native` first."); + process.exit(1); +} + +describe("native html: htmlToMarkdown()", () => { + test("converts basic HTML to markdown", () => { + const html = "

Hello

World

"; + const result = native.htmlToMarkdown(html); + assert.ok(result.includes("Hello"), "Should contain heading text"); + assert.ok(result.includes("World"), "Should contain paragraph text"); + }); + + test("converts links to markdown links", () => { + const html = '

Visit Example

'; + const result = native.htmlToMarkdown(html); + assert.ok(result.includes("[Example]"), "Should contain markdown link text"); + assert.ok(result.includes("(https://example.com)"), "Should contain markdown link URL"); + }); + + test("converts lists to markdown", () => { + const html = "
  • First
  • Second
  • Third
"; + const result = native.htmlToMarkdown(html); + assert.ok(result.includes("First"), "Should contain first item"); + assert.ok(result.includes("Second"), "Should contain second item"); + assert.ok(result.includes("Third"), "Should contain third item"); + }); + + test("converts bold and italic", () => { + const html = "

bold and italic

"; + const result = native.htmlToMarkdown(html); + assert.ok(result.includes("**bold**") || result.includes("__bold__"), "Should contain bold"); + assert.ok(result.includes("*italic*") || result.includes("_italic_"), "Should contain italic"); + }); + + test("handles empty HTML", () => { + const result = native.htmlToMarkdown(""); + assert.equal(typeof result, "string"); + }); + + test("handles plain text", () => { + const result = native.htmlToMarkdown("Just plain text"); + assert.ok(result.includes("Just plain text"), "Should preserve plain text"); + }); + + test("accepts skipImages option", () => { + const html = '

Title

Content with photo image

'; + const result = native.htmlToMarkdown(html, { skipImages: true }); + assert.ok(result.includes("Title"), "Should contain heading"); + assert.ok(result.includes("Content"), "Should contain paragraph text"); + }); + + test("accepts cleanContent option", () => { + const html = '

Article

Body text.

Copyright
'; + const result = native.htmlToMarkdown(html, { cleanContent: true }); + assert.ok(result.includes("Article") || result.includes("Body text"), "Should contain main content"); + }); + + test("converts code blocks", () => { + const html = "
const x = 1;
"; + const result = native.htmlToMarkdown(html); + assert.ok(result.includes("const x = 1;"), "Should contain code content"); + }); + + test("converts complex nested HTML", () => { + const html = '

Section

Text with bold link.

  • Item one
  • Item two
'; + const result = native.htmlToMarkdown(html); + assert.ok(result.includes("Section"), "Should contain heading"); + assert.ok(result.includes("example.com"), "Should contain link"); + assert.ok(result.includes("one"), "Should contain list items"); + }); +}); diff --git a/packages/native/src/html/index.ts b/packages/native/src/html/index.ts new file mode 100644 index 000000000..28886b7a2 --- /dev/null +++ b/packages/native/src/html/index.ts @@ -0,0 +1,24 @@ +/** + * HTML to Markdown conversion via native Rust bindings. + * + * Uses `html-to-markdown-rs` under the hood for high-performance + * conversion with optional content cleaning (stripping nav, forms, etc.). + */ + +import { native } from "../native.js"; +import type { HtmlToMarkdownOptions } from "./types.js"; + +export type { HtmlToMarkdownOptions }; + +/** + * Convert an HTML string to Markdown. + * + * When `cleanContent` is true, boilerplate elements (nav, forms, headers, + * footers) are stripped before conversion. + */ +export function htmlToMarkdown( + html: string, + options?: HtmlToMarkdownOptions, +): string { + return native.htmlToMarkdown(html, options ?? {}) as string; +} diff --git a/packages/native/src/html/types.ts b/packages/native/src/html/types.ts new file mode 100644 index 000000000..a8984c7a8 --- /dev/null +++ b/packages/native/src/html/types.ts @@ -0,0 +1,7 @@ +/** Options for HTML to Markdown conversion. */ +export interface HtmlToMarkdownOptions { + /** Remove navigation elements, forms, headers, footers. */ + cleanContent?: boolean; + /** Skip images during conversion. */ + skipImages?: boolean; +} diff --git a/packages/native/src/index.ts b/packages/native/src/index.ts index 3c5cfdf83..8cbc7f81d 100644 --- a/packages/native/src/index.ts +++ b/packages/native/src/index.ts @@ -3,6 +3,7 @@ * * Modules: * - grep: ripgrep-backed regex search (content + filesystem) + * - html: HTML to Markdown conversion */ export { searchContent, grep } from "./grep/index.js"; @@ -15,3 +16,6 @@ export type { SearchOptions, SearchResult, } from "./grep/index.js"; + +export { htmlToMarkdown } from "./html/index.js"; +export type { HtmlToMarkdownOptions } from "./html/index.js"; diff --git a/packages/native/src/native.ts b/packages/native/src/native.ts index 93aa1a09d..17a4aa1b2 100644 --- a/packages/native/src/native.ts +++ b/packages/native/src/native.ts @@ -43,4 +43,5 @@ function loadNative(): Record { export const native = loadNative() as { search: (content: Buffer | Uint8Array, options: unknown) => unknown; grep: (options: unknown) => unknown; + htmlToMarkdown: (html: string, options: unknown) => unknown; }; From b669f9f580b0db6d2081000337bb42f12c9f8d9c Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Fri, 13 Mar 2026 12:42:42 -0600 Subject: [PATCH 4/7] feat: add ANSI-aware text measurement and slicing native module Port Oh My Pi's optimized text utilities to GSD's native engine: - wrapTextWithAnsi: word-wrap preserving ANSI codes across breaks - truncateToWidth: truncate with ellipsis options - sliceWithWidth: column-range extraction - extractSegments: split around overlay regions - sanitizeText: strip ANSI, remove control chars, normalize CR - visibleWidth: display width excluding ANSI sequences Single-pass ANSI scanning, ASCII fast-path, grapheme-aware Unicode width measurement, and zero-copy input via UTF-16 JsString interop. Includes 19 Rust unit tests and 33 Node.js integration tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- native/Cargo.lock | 15 + native/crates/engine/Cargo.toml | 3 + native/crates/engine/src/lib.rs | 1 + native/crates/engine/src/text.rs | 1536 +++++++++++++++++++ packages/native/package.json | 8 +- packages/native/src/__tests__/text.test.mjs | 262 ++++ packages/native/src/index.ts | 12 + packages/native/src/native.ts | 25 + packages/native/src/text/index.ts | 125 ++ packages/native/src/text/types.ts | 29 + 10 files changed, 2014 insertions(+), 2 deletions(-) create mode 100644 native/crates/engine/src/text.rs create mode 100644 packages/native/src/__tests__/text.test.mjs create mode 100644 packages/native/src/text/index.ts create mode 100644 packages/native/src/text/types.ts diff --git a/native/Cargo.lock b/native/Cargo.lock index ba8fa03da..164bafec7 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -160,6 +160,9 @@ dependencies = [ "napi", "napi-build", "napi-derive", + "smallvec", + "unicode-segmentation", + "unicode-width", ] [[package]] @@ -400,6 +403,12 @@ dependencies = [ "syn", ] +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + [[package]] name = "syn" version = "2.0.117" @@ -423,6 +432,12 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + [[package]] name = "walkdir" version = "2.5.0" diff --git a/native/crates/engine/Cargo.toml b/native/crates/engine/Cargo.toml index dcd61ef0c..2946238ad 100644 --- a/native/crates/engine/Cargo.toml +++ b/native/crates/engine/Cargo.toml @@ -14,6 +14,9 @@ crate-type = ["cdylib"] gsd-grep = { path = "../grep" } napi = { version = "2", features = ["napi8"] } napi-derive = "2" +smallvec = "1" +unicode-segmentation = "1" +unicode-width = "0.2" [build-dependencies] napi-build = "2" diff --git a/native/crates/engine/src/lib.rs b/native/crates/engine/src/lib.rs index 82985849b..0646808ad 100644 --- a/native/crates/engine/src/lib.rs +++ b/native/crates/engine/src/lib.rs @@ -9,3 +9,4 @@ #![allow(clippy::needless_pass_by_value)] mod grep; +mod text; diff --git a/native/crates/engine/src/text.rs b/native/crates/engine/src/text.rs new file mode 100644 index 000000000..1f080741d --- /dev/null +++ b/native/crates/engine/src/text.rs @@ -0,0 +1,1536 @@ +//! ANSI-aware text measurement and slicing utilities. +//! +//! Optimized for JS string interop (UTF-16). +//! - Single-pass ANSI scanning (no O(n^2) `next_ansi` rescans) +//! - ASCII fast-path (no grapheme segmentation, no UTF-8 conversion) +//! - Non-ASCII uses a reused scratch String for grapheme segmentation +//! - Width checks early-exit +//! - Ellipsis decoded lazily +//! - truncateToWidth returns the original `JsString` when possible + +use std::cell::RefCell; + +use napi::{JsString, bindgen_prelude::*}; +use napi_derive::napi; +use smallvec::{SmallVec, smallvec}; +use unicode_segmentation::UnicodeSegmentation; +use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; + +const DEFAULT_TAB_WIDTH: usize = 3; +const MIN_TAB_WIDTH: usize = 1; +const MAX_TAB_WIDTH: usize = 16; +const ESC: u16 = 0x1b; + +#[inline] +const fn clamp_tab_width(tab_width: Option) -> usize { + let width = match tab_width { + Some(tab_width) => tab_width as usize, + None => DEFAULT_TAB_WIDTH, + }; + if width < MIN_TAB_WIDTH { + MIN_TAB_WIDTH + } else if width > MAX_TAB_WIDTH { + MAX_TAB_WIDTH + } else { + width + } +} + +/// Clamp a u64 to u32::MAX, returning as u32. +#[inline] +const fn clamp_u32(v: u64) -> u32 { + if v > u32::MAX as u64 { + u32::MAX + } else { + v as u32 + } +} + +fn utf16_to_string(data: impl AsRef<[u16]>) -> String { + let mut slice = data.as_ref(); + // Strip trailing null terminators (from JsStringUtf16::as_slice()) + while slice.last() == Some(&0) { + slice = &slice[..slice.len() - 1]; + } + String::from_utf16_lossy(slice) +} + +// ============================================================================ +// Results +// ============================================================================ + +#[napi(object)] +pub struct SliceResult { + /// UTF-16 slice containing the selected text. + pub text: String, + /// Visible width of the slice in terminal cells. + pub width: u32, +} + +#[napi(object)] +pub struct ExtractSegmentsResult { + /// UTF-16 content before the overlay region. + pub before: String, + #[napi(js_name = "beforeWidth")] + /// Visible width of the `before` segment. + pub before_width: u32, + /// UTF-16 content after the overlay region. + pub after: String, + #[napi(js_name = "afterWidth")] + /// Visible width of the `after` segment. + pub after_width: u32, +} + +// ============================================================================ +// ANSI State Tracking - Zero Allocation +// ============================================================================ + +const ATTR_BOLD: u16 = 1 << 0; +const ATTR_DIM: u16 = 1 << 1; +const ATTR_ITALIC: u16 = 1 << 2; +const ATTR_UNDERLINE: u16 = 1 << 3; +const ATTR_BLINK: u16 = 1 << 4; +const ATTR_INVERSE: u16 = 1 << 6; +const ATTR_HIDDEN: u16 = 1 << 7; +const ATTR_STRIKE: u16 = 1 << 8; + +type ColorVal = u32; +const COLOR_NONE: ColorVal = 0; + +#[derive(Clone, Copy, Default)] +struct AnsiState { + attrs: u16, + fg: ColorVal, + bg: ColorVal, +} + +impl AnsiState { + #[inline] + const fn new() -> Self { + Self { attrs: 0, fg: COLOR_NONE, bg: COLOR_NONE } + } + + #[inline] + const fn is_empty(&self) -> bool { + self.attrs == 0 && self.fg == COLOR_NONE && self.bg == COLOR_NONE + } + + #[inline] + const fn reset(&mut self) { + *self = Self::new(); + } + + fn apply_sgr_u16(&mut self, params: &[u16]) { + if params.is_empty() { + self.reset(); + return; + } + + let mut i = 0; + while i < params.len() { + let (code, next_i) = parse_sgr_num_u16(params, i); + i = next_i; + + match code { + 0 => self.reset(), + 1 => self.attrs |= ATTR_BOLD, + 2 => self.attrs |= ATTR_DIM, + 3 => self.attrs |= ATTR_ITALIC, + 4 => self.attrs |= ATTR_UNDERLINE, + 5 => self.attrs |= ATTR_BLINK, + 7 => self.attrs |= ATTR_INVERSE, + 8 => self.attrs |= ATTR_HIDDEN, + 9 => self.attrs |= ATTR_STRIKE, + + 21 => self.attrs &= !ATTR_BOLD, + 22 => self.attrs &= !(ATTR_BOLD | ATTR_DIM), + 23 => self.attrs &= !ATTR_ITALIC, + 24 => self.attrs &= !ATTR_UNDERLINE, + 25 => self.attrs &= !ATTR_BLINK, + 27 => self.attrs &= !ATTR_INVERSE, + 28 => self.attrs &= !ATTR_HIDDEN, + 29 => self.attrs &= !ATTR_STRIKE, + + 30..=37 => self.fg = (code - 29) as ColorVal, + 39 => self.fg = COLOR_NONE, + 40..=47 => self.bg = (code - 39) as ColorVal, + 49 => self.bg = COLOR_NONE, + 90..=97 => self.fg = (code - 81) as ColorVal, + 100..=107 => self.bg = (code - 91) as ColorVal, + + 38 | 48 => { + let (mode, ni) = parse_sgr_num_u16(params, i); + i = ni; + + let color = match mode { + 5 => { + let (idx, ni) = parse_sgr_num_u16(params, i); + i = ni; + 0x100 | (idx as ColorVal & 0xff) + }, + 2 => { + let (r, ni) = parse_sgr_num_u16(params, i); + let (g, ni) = parse_sgr_num_u16(params, ni); + let (b, ni) = parse_sgr_num_u16(params, ni); + i = ni; + 0x1000000 + | ((r as ColorVal & 0xff) << 16) + | ((g as ColorVal & 0xff) << 8) + | (b as ColorVal & 0xff) + }, + _ => continue, + }; + + if code == 38 { + self.fg = color; + } else { + self.bg = color; + } + }, + + _ => {}, + } + } + } + + fn write_restore_u16(&self, out: &mut Vec) { + if self.is_empty() { + return; + } + + out.extend_from_slice(&[ESC, b'[' as u16]); + let mut first = true; + + macro_rules! push_code { + ($code:expr) => {{ + if !first { + out.push(b';' as u16); + } + first = false; + write_u32_u16(out, $code); + }}; + } + + if self.attrs & ATTR_BOLD != 0 { + push_code!(1); + } + if self.attrs & ATTR_DIM != 0 { + push_code!(2); + } + if self.attrs & ATTR_ITALIC != 0 { + push_code!(3); + } + if self.attrs & ATTR_UNDERLINE != 0 { + push_code!(4); + } + if self.attrs & ATTR_BLINK != 0 { + push_code!(5); + } + if self.attrs & ATTR_INVERSE != 0 { + push_code!(7); + } + if self.attrs & ATTR_HIDDEN != 0 { + push_code!(8); + } + if self.attrs & ATTR_STRIKE != 0 { + push_code!(9); + } + + write_color_u16(out, self.fg, 38, &mut first); + write_color_u16(out, self.bg, 48, &mut first); + + out.push(b'm' as u16); + } +} + +#[inline] +fn write_color_u16(out: &mut Vec, color: ColorVal, base: u32, first: &mut bool) { + if color == COLOR_NONE { + return; + } + + if !*first { + out.push(b';' as u16); + } + *first = false; + + if color < 0x100 { + let code = if color <= 8 { color + 29 } else { color + 81 }; + let code = if base == 48 { code + 10 } else { code }; + write_u32_u16(out, code); + } else if color < 0x1000000 { + write_u32_u16(out, base); + out.extend_from_slice(&[b';' as u16, b'5' as u16, b';' as u16]); + write_u32_u16(out, color & 0xff); + } else { + write_u32_u16(out, base); + out.extend_from_slice(&[b';' as u16, b'2' as u16, b';' as u16]); + write_u32_u16(out, (color >> 16) & 0xff); + out.push(b';' as u16); + write_u32_u16(out, (color >> 8) & 0xff); + out.push(b';' as u16); + write_u32_u16(out, color & 0xff); + } +} + +#[inline] +fn parse_sgr_num_u16(params: &[u16], mut i: usize) -> (u32, usize) { + while i < params.len() && params[i] == b';' as u16 { + i += 1; + } + + let mut val: u32 = 0; + while i < params.len() { + let b = params[i]; + if b == b';' as u16 { + i += 1; + break; + } + if (b'0' as u16..=b'9' as u16).contains(&b) { + val = val + .saturating_mul(10) + .saturating_add((b - b'0' as u16) as u32); + } + i += 1; + } + (val, i) +} + +#[inline] +fn write_u32_u16(out: &mut Vec, mut val: u32) { + if val == 0 { + out.push(b'0' as u16); + return; + } + let start = out.len(); + while val > 0 { + out.push(b'0' as u16 + (val % 10) as u16); + val /= 10; + } + out[start..].reverse(); +} + +// ============================================================================ +// ANSI Sequence Detection - UTF-16 +// ============================================================================ + +#[inline] +fn ansi_seq_len_u16(data: &[u16], pos: usize) -> Option { + if pos >= data.len() || data[pos] != ESC { + return None; + } + if pos + 1 >= data.len() { + return None; + } + + match data[pos + 1] { + 0x5b => { + // '[' CSI + for (i, b) in data[pos + 2..].iter().enumerate() { + if (0x40..=0x7e).contains(b) { + return Some(i + 3); + } + } + None + }, + 0x5d => { + // ']' OSC + for (i, &b) in data[pos + 2..].iter().enumerate() { + if b == 0x07 { + return Some(i + 3); + } + if b == ESC && data.get(pos + 2 + i + 1) == Some(&0x5c) { + return Some(i + 4); + } + } + None + }, + 0x50 | 0x58 | 0x5e | 0x5f => { + // 'P' DCS, 'X' SOS, '^' PM, '_' APC (terminated by ST) + for (i, &b) in data[pos + 2..].iter().enumerate() { + if b == ESC && data.get(pos + 2 + i + 1) == Some(&0x5c) { + return Some(i + 4); + } + } + None + }, + 0x20..=0x2f => { + // ESC + intermediates + final byte + for (i, b) in data[pos + 2..].iter().enumerate() { + if (0x30..=0x7e).contains(b) { + return Some(i + 3); + } + } + None + }, + 0x40..=0x7e => Some(2), + _ => None, + } +} + +#[inline] +fn is_sgr_u16(seq: &[u16]) -> bool { + seq.len() >= 3 && seq[1] == b'[' as u16 && *seq.last().unwrap() == b'm' as u16 +} + +// ============================================================================ +// Grapheme / Width +// ============================================================================ + +#[inline] +const fn ascii_cell_width_u16(u: u16, tab_width: usize) -> usize { + let b = u as u8; + match b { + b'\t' => tab_width, + 0x20..=0x7e => 1, + _ => 0, + } +} + +#[inline] +fn grapheme_width_str(g: &str, tab_width: usize) -> usize { + if g == "\t" { + return tab_width; + } + let mut it = g.chars(); + let Some(c0) = it.next() else { + return 0; + }; + if it.next().is_none() { + return UnicodeWidthChar::width(c0).unwrap_or(0); + } + UnicodeWidthStr::width(g) +} + +thread_local! { + static SCRATCH: RefCell = const { RefCell::new(String::new()) }; +} + +/// Iterate graphemes in a non-ASCII UTF-16 segment. +/// +/// Callback returns `true` to continue, `false` to stop early. +#[inline] +fn for_each_grapheme_u16_slow(segment: &[u16], tab_width: usize, mut f: F) -> bool +where + F: FnMut(&[u16], usize) -> bool, +{ + if segment.is_empty() { + return true; + } + + SCRATCH.with_borrow_mut(|scratch| { + scratch.clear(); + scratch.reserve(segment.len()); + + for r in std::char::decode_utf16(segment.iter().copied()) { + scratch.push(r.unwrap_or('\u{FFFD}')); + } + + let mut utf16_pos = 0usize; + for g in scratch.graphemes(true) { + let w = grapheme_width_str(g, tab_width); + + let g_u16_len: usize = g.chars().map(|c| c.len_utf16()).sum(); + let u16_slice = &segment[utf16_pos..utf16_pos + g_u16_len]; + utf16_pos += g_u16_len; + + if !f(u16_slice, w) { + return false; + } + } + + true + }) +} + +/// Visible width, with early-exit if width exceeds `limit`. +fn visible_width_u16_up_to(data: &[u16], limit: usize, tab_width: usize) -> (usize, bool) { + let mut width = 0usize; + let mut i = 0usize; + let len = data.len(); + + while i < len { + if data[i] == ESC { + if let Some(seq_len) = ansi_seq_len_u16(data, i) { + i += seq_len; + continue; + } + i += 1; + continue; + } + + let start = i; + let mut is_ascii = true; + while i < len && data[i] != ESC { + if data[i] > 0x7f { + is_ascii = false; + } + i += 1; + } + let seg = &data[start..i]; + + if is_ascii { + for &u in seg { + width += ascii_cell_width_u16(u, tab_width); + if width > limit { + return (width, true); + } + } + } else { + let ok = for_each_grapheme_u16_slow(seg, tab_width, |_, w| { + width += w; + width <= limit + }); + if !ok { + return (width, true); + } + } + } + + (width, width > limit) +} + +fn visible_width_u16(data: &[u16], tab_width: usize) -> usize { + visible_width_u16_up_to(data, usize::MAX, tab_width).0 +} + +// ============================================================================ +// wrapTextWithAnsi +// ============================================================================ + +#[inline] +fn write_active_codes(state: &AnsiState, out: &mut Vec) { + if !state.is_empty() { + state.write_restore_u16(out); + } +} + +#[inline] +fn write_line_end_reset(state: &AnsiState, out: &mut Vec) { + let has_underline = state.attrs & ATTR_UNDERLINE != 0; + let has_strike = state.attrs & ATTR_STRIKE != 0; + if !has_underline && !has_strike { + return; + } + + out.extend_from_slice(&[ESC, b'[' as u16]); + if has_underline { + out.extend_from_slice(&[b'2' as u16, b'4' as u16]); + if has_strike { + out.push(b';' as u16); + } + } + if has_strike { + out.extend_from_slice(&[b'2' as u16, b'9' as u16]); + } + out.push(b'm' as u16); +} + +fn update_state_from_text(data: &[u16], state: &mut AnsiState) { + let mut i = 0usize; + while i < data.len() { + if data[i] == ESC { + if let Some(seq_len) = ansi_seq_len_u16(data, i) { + let seq = &data[i..i + seq_len]; + if is_sgr_u16(seq) { + state.apply_sgr_u16(&seq[2..seq_len - 1]); + } + i += seq_len; + continue; + } + } + i += 1; + } +} + +fn token_is_whitespace(token: &[u16]) -> bool { + let mut i = 0usize; + while i < token.len() { + if token[i] == ESC { + if let Some(seq_len) = ansi_seq_len_u16(token, i) { + i += seq_len; + continue; + } + } + if token[i] != b' ' as u16 { + return false; + } + i += 1; + } + true +} + +fn trim_end_spaces_in_place(line: &mut Vec) { + while let Some(&last) = line.last() { + if last == b' ' as u16 { + line.pop(); + } else { + break; + } + } +} + +fn split_into_tokens_with_ansi(line: &[u16]) -> SmallVec<[Vec; 4]> { + let mut tokens = SmallVec::<[Vec; 4]>::new(); + let mut current = Vec::::new(); + let mut pending_ansi = SmallVec::<[u16; 32]>::new(); + let mut in_whitespace = false; + let mut i = 0usize; + + while i < line.len() { + if line[i] == ESC { + if let Some(seq_len) = ansi_seq_len_u16(line, i) { + pending_ansi.extend_from_slice(&line[i..i + seq_len]); + i += seq_len; + continue; + } + } + + let ch = line[i]; + let char_is_space = ch == b' ' as u16; + if char_is_space != in_whitespace && !current.is_empty() { + tokens.push(current); + current = Vec::new(); + } + + if !pending_ansi.is_empty() { + current.extend_from_slice(&pending_ansi); + pending_ansi.clear(); + } + + in_whitespace = char_is_space; + current.push(ch); + i += 1; + } + + if !pending_ansi.is_empty() { + current.extend_from_slice(&pending_ansi); + } + + if !current.is_empty() { + tokens.push(current); + } + + tokens +} + +fn break_long_word( + word: &[u16], + width: usize, + tab_width: usize, + state: &mut AnsiState, +) -> SmallVec<[Vec; 4]> { + let mut lines = SmallVec::<[Vec; 4]>::new(); + let mut current_line = Vec::::new(); + write_active_codes(state, &mut current_line); + let mut current_width = 0usize; + let mut i = 0usize; + + while i < word.len() { + if word[i] == ESC { + if let Some(seq_len) = ansi_seq_len_u16(word, i) { + let seq = &word[i..i + seq_len]; + current_line.extend_from_slice(seq); + if is_sgr_u16(seq) { + state.apply_sgr_u16(&seq[2..seq_len - 1]); + } + i += seq_len; + continue; + } + } + + let start = i; + let mut is_ascii = true; + while i < word.len() && word[i] != ESC { + if word[i] > 0x7f { + is_ascii = false; + } + i += 1; + } + let seg = &word[start..i]; + + if is_ascii { + for &u in seg { + let gw = ascii_cell_width_u16(u, tab_width); + if current_width + gw > width { + write_line_end_reset(state, &mut current_line); + lines.push(current_line); + current_line = Vec::new(); + write_active_codes(state, &mut current_line); + current_width = 0; + } + current_line.push(u); + current_width += gw; + } + } else { + let _ = for_each_grapheme_u16_slow(seg, tab_width, |gu16, gw| { + if current_width + gw > width { + write_line_end_reset(state, &mut current_line); + lines.push(std::mem::take(&mut current_line)); + write_active_codes(state, &mut current_line); + current_width = 0; + } + current_line.extend_from_slice(gu16); + current_width += gw; + true + }); + } + } + + if !current_line.is_empty() { + lines.push(current_line); + } + + lines +} + +fn wrap_single_line(line: &[u16], width: usize, tab_width: usize) -> SmallVec<[Vec; 4]> { + if line.is_empty() { + return smallvec![Vec::new()]; + } + + if visible_width_u16(line, tab_width) <= width { + return smallvec![line.to_vec()]; + } + + let tokens = split_into_tokens_with_ansi(line); + let mut wrapped = SmallVec::<[Vec; 4]>::new(); + let mut current_line = Vec::::new(); + let mut current_width = 0usize; + let mut state = AnsiState::new(); + + for token in tokens { + let token_width = visible_width_u16(&token, tab_width); + let is_whitespace = token_is_whitespace(&token); + + if token_width > width && !is_whitespace { + if !current_line.is_empty() { + write_line_end_reset(&state, &mut current_line); + wrapped.push(current_line); + current_line = Vec::new(); + current_width = 0; + } + + let mut broken = break_long_word(&token, width, tab_width, &mut state); + if let Some(last) = broken.pop() { + wrapped.extend(broken); + current_line = last; + current_width = visible_width_u16(¤t_line, tab_width); + } + continue; + } + + let total_needed = current_width + token_width; + if total_needed > width && current_width > 0 { + let mut line_to_wrap = current_line; + trim_end_spaces_in_place(&mut line_to_wrap); + write_line_end_reset(&state, &mut line_to_wrap); + wrapped.push(line_to_wrap); + + current_line = Vec::new(); + write_active_codes(&state, &mut current_line); + if is_whitespace { + current_width = 0; + } else { + current_line.extend_from_slice(&token); + current_width = token_width; + } + } else { + current_line.extend_from_slice(&token); + current_width += token_width; + } + + update_state_from_text(&token, &mut state); + } + + if !current_line.is_empty() { + wrapped.push(current_line); + } + + for line in &mut wrapped { + trim_end_spaces_in_place(line); + } + + if wrapped.is_empty() { + wrapped.push(Vec::new()); + } + + wrapped +} + +fn wrap_text_with_ansi_impl( + text: &[u16], + width: usize, + tab_width: usize, +) -> SmallVec<[Vec; 4]> { + if text.is_empty() { + return smallvec![Vec::new()]; + } + + let mut result = SmallVec::<[Vec; 4]>::new(); + let mut state = AnsiState::new(); + let mut line_start = 0usize; + + for i in 0..=text.len() { + if i == text.len() || text[i] == b'\n' as u16 { + let line = &text[line_start..i]; + let mut line_with_prefix: Vec = Vec::new(); + if !result.is_empty() { + write_active_codes(&state, &mut line_with_prefix); + } + line_with_prefix.extend_from_slice(line); + + let wrapped = wrap_single_line(&line_with_prefix, width, tab_width); + result.extend(wrapped); + update_state_from_text(line, &mut state); + line_start = i + 1; + } + } + + if result.is_empty() { + result.push(Vec::new()); + } + + result +} + +/// Wrap text to a visible width, preserving ANSI escape codes across line +/// breaks. +/// +/// Returns UTF-16 lines with active SGR codes carried across line boundaries. +#[napi(js_name = "wrapTextWithAnsi")] +pub fn wrap_text_with_ansi( + text: JsString, + width: u32, + tab_width: Option, +) -> Result> { + let text_u16 = text.into_utf16()?; + let tab_width = clamp_tab_width(tab_width); + let lines = wrap_text_with_ansi_impl(text_u16.as_slice(), width as usize, tab_width); + Ok(lines.into_iter().map(utf16_to_string).collect()) +} + +// ============================================================================ +// truncateToWidth +// ============================================================================ + +/// Truncate text to a visible width, preserving ANSI codes. +/// +/// `ellipsis_kind`: 0 = "\u{2026}", 1 = "...", 2 = "" (omit); pads with +/// spaces when requested. +#[napi(js_name = "truncateToWidth")] +pub fn truncate_to_width( + text: JsString, + max_width: u32, + ellipsis_kind: u8, + pad: bool, + tab_width: Option, +) -> Result { + let max_width = max_width as usize; + let tab_width = clamp_tab_width(tab_width); + + let text_u16 = text.into_utf16()?; + let text = text_u16.as_slice(); + + // Fast path: early-exit width check + let (text_w, exceeded) = visible_width_u16_up_to(text, max_width, tab_width); + if !exceeded { + if !pad || text_w == max_width { + return Ok(utf16_to_string(text.to_vec())); + } + + let mut out = Vec::with_capacity(text.len() + (max_width - text_w)); + out.extend_from_slice(text); + out.resize(out.len() + (max_width - text_w), b' ' as u16); + return Ok(utf16_to_string(out)); + } + + const ELLIPSIS_UNICODE: &[u16] = &[0x2026]; + const ELLIPSIS_ASCII: &[u16] = &[0x2e, 0x2e, 0x2e]; + const ELLIPSIS_OMIT: &[u16] = &[]; + + let (ellipsis, ellipsis_w): (&[u16], usize) = match ellipsis_kind { + 0 => (ELLIPSIS_UNICODE, 1), + 1 => (ELLIPSIS_ASCII, 3), + 2 => (ELLIPSIS_OMIT, 0), + _ => (ELLIPSIS_UNICODE, 1), + }; + + let target_w = max_width.saturating_sub(ellipsis_w); + + if target_w == 0 { + let mut out = Vec::with_capacity(ellipsis.len().min(max_width * 2)); + let mut w = 0usize; + let _ = for_each_grapheme_u16_slow(ellipsis, tab_width, |gu16, gw| { + if w + gw > max_width { + return false; + } + out.extend_from_slice(gu16); + w += gw; + true + }); + + if pad && w < max_width { + out.resize(out.len() + (max_width - w), b' ' as u16); + } + return Ok(utf16_to_string(out)); + } + + let mut out = Vec::with_capacity(text.len().min(max_width * 2) + ellipsis.len() + 8); + let mut w = 0usize; + let mut i = 0usize; + let text_len = text.len(); + + let mut saw_sgr = false; + + while i < text_len { + if text[i] == ESC { + if let Some(seq_len) = ansi_seq_len_u16(text, i) { + let seq = &text[i..i + seq_len]; + out.extend_from_slice(seq); + if is_sgr_u16(seq) { + saw_sgr = true; + } + i += seq_len; + continue; + } + out.push(ESC); + i += 1; + continue; + } + + let start = i; + let mut is_ascii = true; + while i < text_len && text[i] != ESC { + if text[i] > 0x7f { + is_ascii = false; + } + i += 1; + } + let seg = &text[start..i]; + + if is_ascii { + for &u in seg { + let gw = ascii_cell_width_u16(u, tab_width); + if w + gw > target_w { + break; + } + out.push(u); + w += gw; + } + if w >= target_w { + break; + } + } else { + let keep_going = for_each_grapheme_u16_slow(seg, tab_width, |gu16, gw| { + if w + gw > target_w { + return false; + } + out.extend_from_slice(gu16); + w += gw; + true + }); + if !keep_going { + break; + } + } + } + + if saw_sgr { + out.extend_from_slice(&[ESC, b'[' as u16, b'0' as u16, b'm' as u16]); + } + out.extend_from_slice(ellipsis); + + if pad { + let out_w = w + ellipsis_w; + if out_w < max_width { + out.resize(out.len() + (max_width - out_w), b' ' as u16); + } + } + + Ok(utf16_to_string(out)) +} + +// ============================================================================ +// sliceWithWidth +// ============================================================================ + +fn slice_with_width_impl( + line: &[u16], + start_col: usize, + length: usize, + strict: bool, + tab_width: usize, +) -> (Vec, usize) { + let end_col = start_col.saturating_add(length); + + let mut out = Vec::with_capacity(length * 2); + let mut out_w = 0usize; + + let mut current_col = 0usize; + let mut i = 0usize; + let line_len = line.len(); + + let mut pending_ansi: SmallVec<[(usize, usize); 4]> = SmallVec::new(); + + while i < line_len && current_col < end_col { + if line[i] == ESC { + if let Some(seq_len) = ansi_seq_len_u16(line, i) { + if current_col >= start_col { + out.extend_from_slice(&line[i..i + seq_len]); + } else { + pending_ansi.push((i, seq_len)); + } + i += seq_len; + continue; + } + if current_col >= start_col { + out.push(ESC); + } + i += 1; + continue; + } + + let start = i; + let mut is_ascii = true; + while i < line_len && line[i] != ESC { + if line[i] > 0x7f { + is_ascii = false; + } + i += 1; + } + let seg = &line[start..i]; + + if is_ascii { + for &u in seg { + if current_col >= end_col { + break; + } + let gw = ascii_cell_width_u16(u, tab_width); + let in_range = current_col >= start_col; + let fits = !strict || current_col + gw <= end_col; + + if in_range && fits { + if !pending_ansi.is_empty() { + for &(p, l) in &pending_ansi { + out.extend_from_slice(&line[p..p + l]); + } + pending_ansi.clear(); + } + out.push(u); + out_w += gw; + } + current_col += gw; + } + } else { + let _ = for_each_grapheme_u16_slow(seg, tab_width, |gu16, gw| { + if current_col >= end_col { + return false; + } + + let in_range = current_col >= start_col; + let fits = !strict || current_col + gw <= end_col; + + if in_range && fits { + if !pending_ansi.is_empty() { + for &(p, l) in &pending_ansi { + out.extend_from_slice(&line[p..p + l]); + } + pending_ansi.clear(); + } + out.extend_from_slice(gu16); + out_w += gw; + } + + current_col += gw; + current_col < end_col + }); + } + } + + // Include trailing ANSI sequences (e.g., reset codes) that immediately follow + while i < line.len() { + if line[i] == ESC { + if let Some(len) = ansi_seq_len_u16(line, i) { + out.extend_from_slice(&line[i..i + len]); + i += len; + continue; + } + } + break; + } + + (out, out_w) +} + +/// Slice a range of visible columns from a line. +/// +/// Counts terminal cells, skipping ANSI escapes, and optionally enforces strict +/// width. +#[napi(js_name = "sliceWithWidth")] +pub fn slice_with_width( + line: JsString, + start_col: u32, + length: u32, + strict: bool, + tab_width: Option, +) -> Result { + let line_u16 = line.into_utf16()?; + let line = line_u16.as_slice(); + + let tab_width = clamp_tab_width(tab_width); + let (out, w) = + slice_with_width_impl(line, start_col as usize, length as usize, strict, tab_width); + + Ok(SliceResult { text: utf16_to_string(out), width: clamp_u32(w as u64) }) +} + +// ============================================================================ +// extractSegments +// ============================================================================ + +fn extract_segments_impl( + line: &[u16], + before_end: usize, + after_start: usize, + after_len: usize, + strict_after: bool, + tab_width: usize, +) -> (Vec, usize, Vec, usize) { + let after_end = after_start.saturating_add(after_len); + + let mut before = Vec::with_capacity(before_end * 2); + let mut before_w = 0usize; + + let mut after = Vec::with_capacity(after_len * 2); + let mut after_w = 0usize; + + let mut current_col = 0usize; + let mut i = 0usize; + let line_len = line.len(); + + let mut pending_before_ansi: SmallVec<[(usize, usize); 4]> = SmallVec::new(); + + let mut after_started = false; + let mut state = AnsiState::new(); + + let done_col = if after_len == 0 { before_end } else { after_end }; + + while i < line_len && current_col < done_col { + if line[i] == ESC { + if let Some(seq_len) = ansi_seq_len_u16(line, i) { + let seq = &line[i..i + seq_len]; + if is_sgr_u16(seq) { + state.apply_sgr_u16(&seq[2..seq_len - 1]); + } + + if current_col < before_end { + pending_before_ansi.push((i, seq_len)); + } else if current_col >= after_start && current_col < after_end && after_started { + after.extend_from_slice(seq); + } + + i += seq_len; + continue; + } + + if current_col < before_end { + before.push(ESC); + } else if current_col >= after_start && current_col < after_end && after_started { + after.push(ESC); + } + i += 1; + continue; + } + + let start = i; + let mut is_ascii = true; + while i < line_len && line[i] != ESC { + if line[i] > 0x7f { + is_ascii = false; + } + i += 1; + } + let seg = &line[start..i]; + + if is_ascii { + for &u in seg { + if current_col >= done_col { + break; + } + let gw = ascii_cell_width_u16(u, tab_width); + + if current_col < before_end { + if !pending_before_ansi.is_empty() { + for &(p, l) in &pending_before_ansi { + before.extend_from_slice(&line[p..p + l]); + } + pending_before_ansi.clear(); + } + before.push(u); + before_w += gw; + } else if current_col >= after_start && current_col < after_end { + let fits = !strict_after || current_col + gw <= after_end; + if fits { + if !after_started { + state.write_restore_u16(&mut after); + after_started = true; + } + after.push(u); + after_w += gw; + } + } + current_col += gw; + } + } else { + let _ = for_each_grapheme_u16_slow(seg, tab_width, |gu16, gw| { + if current_col >= done_col { + return false; + } + + if current_col < before_end { + if !pending_before_ansi.is_empty() { + for &(p, l) in &pending_before_ansi { + before.extend_from_slice(&line[p..p + l]); + } + pending_before_ansi.clear(); + } + before.extend_from_slice(gu16); + before_w += gw; + } else if current_col >= after_start && current_col < after_end { + let fits = !strict_after || current_col + gw <= after_end; + if fits { + if !after_started { + state.write_restore_u16(&mut after); + after_started = true; + } + after.extend_from_slice(gu16); + after_w += gw; + } + } + + current_col += gw; + true + }); + } + } + + (before, before_w, after, after_w) +} + +/// Extract the before/after slices around an overlay region. +/// +/// Preserves ANSI state so the `after` segment renders correctly after +/// truncation. +#[napi(js_name = "extractSegments")] +pub fn extract_segments( + line: JsString, + before_end: u32, + after_start: u32, + after_len: u32, + strict_after: bool, + tab_width: Option, +) -> Result { + let line_u16 = line.into_utf16()?; + let line = line_u16.as_slice(); + + let tab_width = clamp_tab_width(tab_width); + let (before, bw, after, aw) = extract_segments_impl( + line, + before_end as usize, + after_start as usize, + after_len as usize, + strict_after, + tab_width, + ); + + Ok(ExtractSegmentsResult { + before: utf16_to_string(before), + before_width: clamp_u32(bw as u64), + after: utf16_to_string(after), + after_width: clamp_u32(aw as u64), + }) +} + +// ============================================================================ +// sanitizeText +// ============================================================================ + +/// Strip ANSI escape sequences, remove control characters / lone surrogates, +/// and normalize line endings. +#[napi(js_name = "sanitizeText")] +pub fn sanitize_text(text: JsString) -> Result { + let text_u16 = text.into_utf16()?; + let data = text_u16.as_slice(); + + let mut did_change = false; + let mut out: Vec = Vec::new(); + let mut last = 0usize; + let mut i = 0usize; + let len = data.len(); + + while i < len { + let u = data[i]; + + if u == 0x09 || u == 0x0a { + i += 1; + continue; + } + + let mut remove_len = if u == ESC { + ansi_seq_len_u16(data, i).unwrap_or(0) + } else { + 0usize + }; + + if remove_len == 0 { + if u == 0x0d { + remove_len = 1; + } else if u <= 0x1f || u == 0x7f || (0x80..=0x9f).contains(&u) { + remove_len = 1; + } else if (0xd800..=0xdbff).contains(&u) { + if i + 1 < len { + let lo = data[i + 1]; + if (0xdc00..=0xdfff).contains(&lo) { + i += 2; + continue; + } + } + remove_len = 1; + } else if (0xdc00..=0xdfff).contains(&u) { + remove_len = 1; + } + } + + if remove_len == 0 { + i += 1; + continue; + } + + if !did_change { + did_change = true; + out = Vec::with_capacity(len); + } + if last != i { + out.extend_from_slice(&data[last..i]); + } + i += remove_len; + last = i; + } + + if !did_change { + return Ok(utf16_to_string(data.to_vec())); + } + if last < len { + out.extend_from_slice(&data[last..]); + } + Ok(utf16_to_string(out)) +} + +// ============================================================================ +// visibleWidth +// ============================================================================ + +/// Calculate visible width of text, excluding ANSI escape sequences. +/// +/// Tabs count as a fixed-width cell. +#[napi(js_name = "visibleWidth")] +pub fn visible_width_napi(text: JsString, tab_width: Option) -> Result { + let text_u16 = text.into_utf16()?; + let tab_width = clamp_tab_width(tab_width); + Ok(clamp_u32(visible_width_u16(text_u16.as_slice(), tab_width) as u64)) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn to_u16(s: &str) -> Vec { + s.encode_utf16().collect() + } + + #[test] + fn test_visible_width() { + assert_eq!(visible_width_u16(&to_u16("hello"), DEFAULT_TAB_WIDTH), 5); + assert_eq!( + visible_width_u16(&to_u16("\x1b[31mhello\x1b[0m"), DEFAULT_TAB_WIDTH), + 5 + ); + assert_eq!( + visible_width_u16(&to_u16("\x1b[38;5;196mred\x1b[0m"), DEFAULT_TAB_WIDTH), + 3 + ); + assert_eq!( + visible_width_u16(&to_u16("a\tb"), DEFAULT_TAB_WIDTH), + 1 + DEFAULT_TAB_WIDTH + 1 + ); + } + + #[test] + fn test_visible_width_cjk() { + assert_eq!( + visible_width_u16(&to_u16("\u{4e16}\u{754c}"), DEFAULT_TAB_WIDTH), + 4 + ); + assert_eq!(visible_width_u16(&to_u16("a\u{4e16}b"), DEFAULT_TAB_WIDTH), 4); + } + + #[test] + fn test_visible_width_emoji() { + assert_eq!(visible_width_u16(&to_u16("\u{1f600}"), DEFAULT_TAB_WIDTH), 2); + } + + #[test] + fn test_ansi_detection() { + let data = to_u16("\x1b[31mred\x1b[0m"); + assert_eq!(ansi_seq_len_u16(&data, 0), Some(5)); + assert_eq!(ansi_seq_len_u16(&data, 8), Some(4)); + } + + #[test] + fn test_ansi_detection_osc() { + let data = to_u16("\x1b]0;title\x07rest"); + assert_eq!(ansi_seq_len_u16(&data, 0), Some(10)); + } + + #[test] + fn test_slice_basic() { + let data = to_u16("hello world"); + let (out, width) = slice_with_width_impl(&data, 0, 5, false, DEFAULT_TAB_WIDTH); + assert_eq!(String::from_utf16_lossy(&out), "hello"); + assert_eq!(width, 5); + } + + #[test] + fn test_slice_middle() { + let data = to_u16("hello world"); + let (out, width) = slice_with_width_impl(&data, 6, 5, false, DEFAULT_TAB_WIDTH); + assert_eq!(String::from_utf16_lossy(&out), "world"); + assert_eq!(width, 5); + } + + #[test] + fn test_slice_with_ansi() { + let data = to_u16("\x1b[31mhello\x1b[0m world"); + let (out, width) = slice_with_width_impl(&data, 0, 5, false, DEFAULT_TAB_WIDTH); + assert_eq!(String::from_utf16_lossy(&out), "\x1b[31mhello\x1b[0m"); + assert_eq!(width, 5); + } + + #[test] + fn test_early_exit() { + let data = to_u16(&"a]b".repeat(1000)); + let (w, exceeded) = visible_width_u16_up_to(&data, 10, DEFAULT_TAB_WIDTH); + assert!(exceeded); + assert!(w > 10); + } + + #[test] + fn test_wrap_text_basic() { + let data = to_u16("hello world"); + let lines = wrap_text_with_ansi_impl(&data, 5, DEFAULT_TAB_WIDTH); + assert_eq!(lines.len(), 2); + assert_eq!(String::from_utf16_lossy(&lines[0]), "hello"); + assert_eq!(String::from_utf16_lossy(&lines[1]), "world"); + } + + #[test] + fn test_wrap_text_with_ansi_preserves_color() { + let data = to_u16("\x1b[38;2;156;163;176mhello world\x1b[0m"); + let lines = wrap_text_with_ansi_impl(&data, 5, DEFAULT_TAB_WIDTH); + assert_eq!(lines.len(), 2); + let first = String::from_utf16_lossy(&lines[0]); + let second = String::from_utf16_lossy(&lines[1]); + assert!(first.starts_with("\x1b[38;2;156;163;176m")); + assert!(second.starts_with("\x1b[38;2;156;163;176m")); + assert!(second.contains("world")); + } + + #[test] + fn test_wrap_text_with_ansi_resets_strike() { + let data = to_u16( + "\x1b[38;5;196m\x1b[48;5;236m\x1b[9mstrikethrough content wraps\x1b[29m\x1b[0m", + ); + let lines = wrap_text_with_ansi_impl(&data, 12, DEFAULT_TAB_WIDTH); + assert!(lines.len() > 1); + + for line in &lines[..lines.len() - 1] { + let line_text = String::from_utf16_lossy(line); + if line_text.contains("\x1b[9m") { + assert!(line_text.ends_with("\x1b[29m")); + assert!(!line_text.ends_with("\x1b[0m")); + } + } + + for line in &lines[1..] { + let line_text = String::from_utf16_lossy(line); + assert!(line_text.contains("38;5;196")); + assert!(line_text.contains("48;5;236")); + } + } + + #[test] + fn test_wrap_text_multiline() { + let data = to_u16("line one\nline two"); + let lines = wrap_text_with_ansi_impl(&data, 20, DEFAULT_TAB_WIDTH); + assert_eq!(lines.len(), 2); + assert_eq!(String::from_utf16_lossy(&lines[0]), "line one"); + assert_eq!(String::from_utf16_lossy(&lines[1]), "line two"); + } + + #[test] + fn test_wrap_text_empty() { + let data = to_u16(""); + let lines = wrap_text_with_ansi_impl(&data, 10, DEFAULT_TAB_WIDTH); + assert_eq!(lines.len(), 1); + assert!(lines[0].is_empty()); + } + + #[test] + fn test_extract_segments_basic() { + let data = to_u16("hello world test"); + let (before, bw, after, aw) = + extract_segments_impl(&data, 5, 6, 5, false, DEFAULT_TAB_WIDTH); + assert_eq!(String::from_utf16_lossy(&before), "hello"); + assert_eq!(bw, 5); + assert_eq!(String::from_utf16_lossy(&after), "world"); + assert_eq!(aw, 5); + } + + #[test] + fn test_ansi_state_sgr_parsing() { + let mut state = AnsiState::new(); + let params = to_u16("1;31"); + state.apply_sgr_u16(¶ms); + assert!(state.attrs & ATTR_BOLD != 0); + assert_eq!(state.fg, 2); // 31 - 29 = 2 + + let params = to_u16("0"); + state.apply_sgr_u16(¶ms); + assert!(state.is_empty()); + } + + #[test] + fn test_ansi_state_256_color() { + let mut state = AnsiState::new(); + let params = to_u16("38;5;196"); + state.apply_sgr_u16(¶ms); + assert_eq!(state.fg, 0x100 | 196); + } + + #[test] + fn test_ansi_state_rgb_color() { + let mut state = AnsiState::new(); + let params = to_u16("38;2;255;128;0"); + state.apply_sgr_u16(¶ms); + assert_eq!(state.fg, 0x1000000 | (255 << 16) | (128 << 8) | 0); + } + + #[test] + fn test_clamp_u32_helper() { + assert_eq!(clamp_u32(0), 0); + assert_eq!(clamp_u32(42), 42); + assert_eq!(clamp_u32(u32::MAX as u64), u32::MAX); + assert_eq!(clamp_u32(u32::MAX as u64 + 1), u32::MAX); + } +} diff --git a/packages/native/package.json b/packages/native/package.json index 84de3dfb3..4e8883f28 100644 --- a/packages/native/package.json +++ b/packages/native/package.json @@ -1,14 +1,14 @@ { "name": "@gsd/native", "version": "0.1.0", - "description": "Native Rust bindings for GSD — high-performance grep via N-API", + "description": "Native Rust bindings for GSD — high-performance grep and text utilities via N-API", "type": "module", "main": "./src/index.ts", "types": "./src/index.ts", "scripts": { "build:native": "node ../../native/scripts/build.js", "build:native:dev": "node ../../native/scripts/build.js --dev", - "test": "node --test src/__tests__/grep.test.mjs" + "test": "node --test src/__tests__/grep.test.mjs src/__tests__/text.test.mjs" }, "exports": { ".": { @@ -18,6 +18,10 @@ "./grep": { "types": "./src/grep/index.ts", "import": "./src/grep/index.ts" + }, + "./text": { + "types": "./src/text/index.ts", + "import": "./src/text/index.ts" } }, "files": [ diff --git a/packages/native/src/__tests__/text.test.mjs b/packages/native/src/__tests__/text.test.mjs new file mode 100644 index 000000000..1c101a7e6 --- /dev/null +++ b/packages/native/src/__tests__/text.test.mjs @@ -0,0 +1,262 @@ +import { test, describe } from "node:test"; +import assert from "node:assert/strict"; +import { createRequire } from "node:module"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const require = createRequire(import.meta.url); + +// Load the native addon directly +const addonDir = path.resolve( + __dirname, + "..", + "..", + "..", + "..", + "native", + "addon", +); +const platformTag = `${process.platform}-${process.arch}`; +const candidates = [ + path.join(addonDir, `gsd_engine.${platformTag}.node`), + path.join(addonDir, "gsd_engine.dev.node"), +]; + +let native; +for (const candidate of candidates) { + try { + native = require(candidate); + break; + } catch { + // try next + } +} + +if (!native) { + console.error( + "Native addon not found. Run `npm run build:native -w @gsd/native` first.", + ); + process.exit(1); +} + +// ── visibleWidth ─────────────────────────────────────────────────────── + +describe("visibleWidth", () => { + test("plain ASCII text", () => { + assert.equal(native.visibleWidth("hello"), 5); + }); + + test("empty string", () => { + assert.equal(native.visibleWidth(""), 0); + }); + + test("ignores ANSI SGR codes", () => { + assert.equal(native.visibleWidth("\x1b[31mhello\x1b[0m"), 5); + }); + + test("ignores 256-color ANSI", () => { + assert.equal(native.visibleWidth("\x1b[38;5;196mred\x1b[0m"), 3); + }); + + test("ignores RGB ANSI", () => { + assert.equal( + native.visibleWidth("\x1b[38;2;255;128;0morange\x1b[0m"), + 6, + ); + }); + + test("counts tabs with default width", () => { + // default tab width = 3 + assert.equal(native.visibleWidth("a\tb"), 1 + 3 + 1); + }); + + test("counts tabs with custom width", () => { + assert.equal(native.visibleWidth("a\tb", 4), 1 + 4 + 1); + }); + + test("CJK double-width characters", () => { + assert.equal(native.visibleWidth("\u4e16\u754c"), 4); // 世界 + }); + + test("mixed ASCII and CJK", () => { + assert.equal(native.visibleWidth("a\u4e16b"), 4); // a + 2 + 1 + }); +}); + +// ── wrapTextWithAnsi ─────────────────────────────────────────────────── + +describe("wrapTextWithAnsi", () => { + test("wraps plain text at word boundary", () => { + const lines = native.wrapTextWithAnsi("hello world", 5); + assert.equal(lines.length, 2); + assert.equal(lines[0], "hello"); + assert.equal(lines[1], "world"); + }); + + test("no wrap needed", () => { + const lines = native.wrapTextWithAnsi("hi", 10); + assert.equal(lines.length, 1); + assert.equal(lines[0], "hi"); + }); + + test("empty string produces one empty line", () => { + const lines = native.wrapTextWithAnsi("", 10); + assert.equal(lines.length, 1); + assert.equal(lines[0], ""); + }); + + test("preserves ANSI color across wrap", () => { + const lines = native.wrapTextWithAnsi( + "\x1b[38;2;156;163;176mhello world\x1b[0m", + 5, + ); + assert.equal(lines.length, 2); + assert.ok(lines[0].startsWith("\x1b[38;2;156;163;176m")); + assert.ok(lines[1].startsWith("\x1b[38;2;156;163;176m")); + assert.ok(lines[1].includes("world")); + }); + + test("handles multiline input (newlines)", () => { + const lines = native.wrapTextWithAnsi("line one\nline two", 20); + assert.equal(lines.length, 2); + assert.equal(lines[0], "line one"); + assert.equal(lines[1], "line two"); + }); + + test("breaks long words", () => { + const lines = native.wrapTextWithAnsi("abcdefghij", 5); + assert.equal(lines.length, 2); + assert.equal(lines[0], "abcde"); + assert.equal(lines[1], "fghij"); + }); +}); + +// ── truncateToWidth ──────────────────────────────────────────────────── + +describe("truncateToWidth", () => { + test("returns original when fits", () => { + const result = native.truncateToWidth("hello", 10, 0, false); + assert.equal(result, "hello"); + }); + + test("truncates with unicode ellipsis", () => { + const result = native.truncateToWidth("hello world", 6, 0, false); + assert.equal(native.visibleWidth(result), 6); + assert.ok(result.includes("\u2026")); + }); + + test("truncates with ASCII ellipsis", () => { + const result = native.truncateToWidth("hello world", 8, 1, false); + assert.ok(result.includes("...")); + }); + + test("truncates with no ellipsis", () => { + const result = native.truncateToWidth("hello world", 5, 2, false); + assert.equal(native.visibleWidth(result), 5); + assert.ok(!result.includes("\u2026")); + assert.ok(!result.includes("...")); + }); + + test("pads to width", () => { + const result = native.truncateToWidth("hi", 10, 0, true); + assert.equal(native.visibleWidth(result), 10); + }); + + test("preserves ANSI codes and resets on truncation", () => { + const input = "\x1b[31mhello world\x1b[0m"; + const result = native.truncateToWidth(input, 6, 0, false); + // Should contain the red code and a reset before ellipsis + assert.ok(result.includes("\x1b[31m")); + assert.ok(result.includes("\x1b[0m")); + }); +}); + +// ── sliceWithWidth ───────────────────────────────────────────────────── + +describe("sliceWithWidth", () => { + test("slices from start", () => { + const result = native.sliceWithWidth("hello world", 0, 5, false); + assert.equal(result.text, "hello"); + assert.equal(result.width, 5); + }); + + test("slices from middle", () => { + const result = native.sliceWithWidth("hello world", 6, 5, false); + assert.equal(result.text, "world"); + assert.equal(result.width, 5); + }); + + test("preserves ANSI codes in slice", () => { + const result = native.sliceWithWidth( + "\x1b[31mhello\x1b[0m world", + 0, + 5, + false, + ); + assert.equal(result.text, "\x1b[31mhello\x1b[0m"); + assert.equal(result.width, 5); + }); + + test("empty slice", () => { + const result = native.sliceWithWidth("hello", 0, 0, false); + assert.equal(result.text, ""); + assert.equal(result.width, 0); + }); + + test("beyond string length", () => { + const result = native.sliceWithWidth("hi", 0, 100, false); + assert.equal(result.text, "hi"); + assert.equal(result.width, 2); + }); +}); + +// ── extractSegments ──────────────────────────────────────────────────── + +describe("extractSegments", () => { + test("extracts before and after segments", () => { + const result = native.extractSegments( + "hello world test", + 5, + 6, + 5, + false, + ); + assert.equal(result.before, "hello"); + assert.equal(result.beforeWidth, 5); + assert.equal(result.after, "world"); + assert.equal(result.afterWidth, 5); + }); + + test("handles no after segment", () => { + const result = native.extractSegments("hello world", 5, 0, 0, false); + assert.equal(result.before, "hello"); + assert.equal(result.beforeWidth, 5); + assert.equal(result.after, ""); + assert.equal(result.afterWidth, 0); + }); +}); + +// ── sanitizeText ─────────────────────────────────────────────────────── + +describe("sanitizeText", () => { + test("strips ANSI codes", () => { + assert.equal(native.sanitizeText("\x1b[31mhello\x1b[0m"), "hello"); + }); + + test("returns original when clean", () => { + assert.equal(native.sanitizeText("hello"), "hello"); + }); + + test("removes control characters", () => { + assert.equal(native.sanitizeText("he\x01llo"), "hello"); + }); + + test("preserves tabs and newlines", () => { + assert.equal(native.sanitizeText("a\tb\nc"), "a\tb\nc"); + }); + + test("normalizes CR", () => { + assert.equal(native.sanitizeText("hello\r\nworld"), "hello\nworld"); + }); +}); diff --git a/packages/native/src/index.ts b/packages/native/src/index.ts index 3c5cfdf83..6ef4dc0e9 100644 --- a/packages/native/src/index.ts +++ b/packages/native/src/index.ts @@ -3,6 +3,7 @@ * * Modules: * - grep: ripgrep-backed regex search (content + filesystem) + * - text: ANSI-aware text measurement and slicing */ export { searchContent, grep } from "./grep/index.js"; @@ -15,3 +16,14 @@ export type { SearchOptions, SearchResult, } from "./grep/index.js"; + +export { + wrapTextWithAnsi, + truncateToWidth, + sliceWithWidth, + extractSegments, + sanitizeText, + visibleWidth, + EllipsisKind, +} from "./text/index.js"; +export type { SliceResult, ExtractSegmentsResult } from "./text/index.js"; diff --git a/packages/native/src/native.ts b/packages/native/src/native.ts index 93aa1a09d..f39aac9f4 100644 --- a/packages/native/src/native.ts +++ b/packages/native/src/native.ts @@ -43,4 +43,29 @@ function loadNative(): Record { export const native = loadNative() as { search: (content: Buffer | Uint8Array, options: unknown) => unknown; grep: (options: unknown) => unknown; + wrapTextWithAnsi: (text: string, width: number, tabWidth?: number) => string[]; + truncateToWidth: ( + text: string, + maxWidth: number, + ellipsisKind: number, + pad: boolean, + tabWidth?: number, + ) => string; + sliceWithWidth: ( + line: string, + startCol: number, + length: number, + strict: boolean, + tabWidth?: number, + ) => unknown; + extractSegments: ( + line: string, + beforeEnd: number, + afterStart: number, + afterLen: number, + strictAfter: boolean, + tabWidth?: number, + ) => unknown; + sanitizeText: (text: string) => string; + visibleWidth: (text: string, tabWidth?: number) => number; }; diff --git a/packages/native/src/text/index.ts b/packages/native/src/text/index.ts new file mode 100644 index 000000000..9c4e5be86 --- /dev/null +++ b/packages/native/src/text/index.ts @@ -0,0 +1,125 @@ +/** + * ANSI-aware text measurement and slicing. + * + * High-performance UTF-16 native implementation with ASCII fast-paths, + * single-pass ANSI scanning, and proper Unicode grapheme cluster support. + */ + +import { native } from "../native.js"; +import type { ExtractSegmentsResult, SliceResult } from "./types.js"; + +export type { ExtractSegmentsResult, SliceResult }; +export { EllipsisKind } from "./types.js"; + +/** + * Word-wrap text to a visible width, preserving ANSI escape codes across + * line breaks. + * + * Active SGR codes (colors, bold, etc.) are carried to continuation lines. + * Underline and strikethrough are reset at line ends and restored on the + * next line. + */ +export function wrapTextWithAnsi( + text: string, + width: number, + tabWidth?: number, +): string[] { + return (native as Record).wrapTextWithAnsi( + text, + width, + tabWidth, + ) as string[]; +} + +/** + * Truncate text to a visible width with an optional ellipsis. + * + * @param text Input string (may contain ANSI codes). + * @param maxWidth Maximum visible width in terminal cells. + * @param ellipsisKind 0 = "\u2026", 1 = "...", 2 = none. + * @param pad When true, pad with spaces to exactly `maxWidth`. + * @param tabWidth Tab stop width (default 3, range 1-16). + */ +export function truncateToWidth( + text: string, + maxWidth: number, + ellipsisKind: number, + pad: boolean, + tabWidth?: number, +): string { + return (native as Record).truncateToWidth( + text, + maxWidth, + ellipsisKind, + pad, + tabWidth, + ) as string; +} + +/** + * Slice a range of visible columns from a line. + * + * Counts terminal cells (skipping ANSI escapes). When `strict` is true, + * wide characters that would exceed the range are excluded. + */ +export function sliceWithWidth( + line: string, + startCol: number, + length: number, + strict: boolean, + tabWidth?: number, +): SliceResult { + return (native as Record).sliceWithWidth( + line, + startCol, + length, + strict, + tabWidth, + ) as SliceResult; +} + +/** + * Extract the before/after segments around an overlay region. + * + * ANSI state is tracked so the `after` segment renders correctly even when + * the overlay truncates styled text. + */ +export function extractSegments( + line: string, + beforeEnd: number, + afterStart: number, + afterLen: number, + strictAfter: boolean, + tabWidth?: number, +): ExtractSegmentsResult { + return (native as Record).extractSegments( + line, + beforeEnd, + afterStart, + afterLen, + strictAfter, + tabWidth, + ) as ExtractSegmentsResult; +} + +/** + * Strip ANSI escape sequences, remove control characters and lone + * surrogates, and normalize line endings (CR removed). + * + * Returns the original string when no changes are needed (zero-copy). + */ +export function sanitizeText(text: string): string { + return (native as Record).sanitizeText(text) as string; +} + +/** + * Calculate visible width of text excluding ANSI escape sequences. + * + * Tabs count as `tabWidth` cells (default 3). + */ +export function visibleWidth(text: string, tabWidth?: number): number { + return (native as Record).visibleWidth( + text, + tabWidth, + ) as number; +} diff --git a/packages/native/src/text/types.ts b/packages/native/src/text/types.ts new file mode 100644 index 000000000..e25e5ca56 --- /dev/null +++ b/packages/native/src/text/types.ts @@ -0,0 +1,29 @@ +/** Result of slicing a line by visible column range. */ +export interface SliceResult { + /** The extracted text (may include ANSI codes). */ + text: string; + /** Visible width of the extracted slice in terminal cells. */ + width: number; +} + +/** Result of extracting before/after segments around an overlay. */ +export interface ExtractSegmentsResult { + /** Text content before the overlay region. */ + before: string; + /** Visible width of the `before` segment. */ + beforeWidth: number; + /** Text content after the overlay region. */ + after: string; + /** Visible width of the `after` segment. */ + afterWidth: number; +} + +/** Ellipsis style for truncation. */ +export enum EllipsisKind { + /** Unicode ellipsis character: \u2026 (width 1) */ + Unicode = 0, + /** ASCII ellipsis: "..." (width 3) */ + Ascii = 1, + /** No ellipsis (hard truncate) */ + None = 2, +} From e05292f772f191e2301fbf5b3fc44a38e2154fde Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Fri, 13 Mar 2026 12:47:27 -0600 Subject: [PATCH 5/7] =?UTF-8?q?feat:=20add=20native=20ast=20module=20?= =?UTF-8?q?=E2=80=94=20AST-aware=20structural=20search=20and=20rewrite=20v?= =?UTF-8?q?ia=20ast-grep?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port ast-grep integration from Oh My Pi with 38+ language support via tree-sitter grammars. Exposes `astGrep` (search) and `astEdit` (rewrite) as N-API functions with TypeScript wrappers. Key changes: - New `gsd-ast` crate with language definitions, glob utilities, and ast-grep core - Replaces fs_cache/task dependencies with `ignore` crate for file walking - Synchronous API matching the existing grep module pattern - Full TypeScript type declarations in packages/native/src/ast/ Co-Authored-By: Claude Opus 4.6 (1M context) --- native/Cargo.lock | 618 ++++++++++++++ native/crates/ast/Cargo.toml | 54 ++ native/crates/ast/src/ast.rs | 929 ++++++++++++++++++++++ native/crates/ast/src/glob_util.rs | 54 ++ native/crates/ast/src/language/mod.rs | 437 ++++++++++ native/crates/ast/src/language/parsers.rs | 118 +++ native/crates/ast/src/lib.rs | 10 + native/crates/engine/Cargo.toml | 1 + native/crates/engine/src/ast.rs | 6 + native/crates/engine/src/lib.rs | 1 + packages/native/package.json | 4 + packages/native/src/ast/index.ts | 67 ++ packages/native/src/ast/types.ts | 137 ++++ packages/native/src/index.ts | 11 + packages/native/src/native.ts | 2 + 15 files changed, 2449 insertions(+) create mode 100644 native/crates/ast/Cargo.toml create mode 100644 native/crates/ast/src/ast.rs create mode 100644 native/crates/ast/src/glob_util.rs create mode 100644 native/crates/ast/src/language/mod.rs create mode 100644 native/crates/ast/src/language/parsers.rs create mode 100644 native/crates/ast/src/lib.rs create mode 100644 native/crates/engine/src/ast.rs create mode 100644 packages/native/src/ast/index.ts create mode 100644 packages/native/src/ast/types.ts diff --git a/native/Cargo.lock b/native/Cargo.lock index 748f53e2a..7fd93005a 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -37,12 +37,39 @@ dependencies = [ "x11rb", ] +[[package]] +name = "ast-grep-core" +version = "0.39.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "057ae90e7256ebf85f840b1638268df0142c9d19467d500b790631fd301acc27" +dependencies = [ + "bit-set", + "regex", + "thiserror", + "tree-sitter", +] + [[package]] name = "autocfg" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "2.11.0" @@ -72,6 +99,16 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -180,6 +217,12 @@ dependencies = [ "encoding_rs", ] +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "errno" version = "0.3.14" @@ -196,6 +239,12 @@ version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "fax" version = "0.2.6" @@ -225,6 +274,12 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "flate2" version = "1.1.9" @@ -295,6 +350,56 @@ dependencies = [ "memmap2", ] +[[package]] +name = "gsd-ast" +version = "0.1.0" +dependencies = [ + "ast-grep-core", + "globset", + "ignore", + "napi", + "napi-derive", + "phf", + "tree-sitter", + "tree-sitter-bash", + "tree-sitter-c", + "tree-sitter-c-sharp", + "tree-sitter-cpp", + "tree-sitter-css", + "tree-sitter-diff", + "tree-sitter-elixir", + "tree-sitter-go", + "tree-sitter-haskell", + "tree-sitter-hcl", + "tree-sitter-html", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-json", + "tree-sitter-julia", + "tree-sitter-kotlin-sg", + "tree-sitter-lua", + "tree-sitter-make", + "tree-sitter-md", + "tree-sitter-nix", + "tree-sitter-objc", + "tree-sitter-odin", + "tree-sitter-php", + "tree-sitter-python", + "tree-sitter-regex", + "tree-sitter-ruby", + "tree-sitter-rust", + "tree-sitter-scala", + "tree-sitter-solidity", + "tree-sitter-starlark", + "tree-sitter-swift", + "tree-sitter-toml-ng", + "tree-sitter-typescript", + "tree-sitter-verilog", + "tree-sitter-xml", + "tree-sitter-yaml", + "tree-sitter-zig", +] + [[package]] name = "gsd-engine" version = "0.1.0" @@ -329,6 +434,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + [[package]] name = "ignore" version = "0.4.25" @@ -359,6 +470,22 @@ dependencies = [ "tiff", ] +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + [[package]] name = "libc" version = "0.2.183" @@ -605,6 +732,49 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros", + "phf_shared", + "serde", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + [[package]] name = "png" version = "0.18.1" @@ -769,18 +939,50 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "indexmap", + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "simd-adler32" version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + [[package]] name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "syn" version = "2.0.117" @@ -792,6 +994,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tiff" version = "0.11.3" @@ -806,6 +1028,396 @@ dependencies = [ "zune-jpeg", ] +[[package]] +name = "tree-sitter" +version = "0.25.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-bash" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5ec769279cc91b561d3df0d8a5deb26b0ad40d183127f409494d6d8fc53062" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c-sharp" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67f06accca7b45351758663b8215089e643d53bd9a660ce0349314263737fcb0" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-css" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5cbc5e18f29a2c6d6435891f42569525cf95435a3e01c2f1947abcde178686f" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-diff" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfe1e5ca280a65dfe5ba4205c1bcc84edf486464fed315db53dee6da9a335889" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-elixir" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66dd064a762ed95bfc29857fa3cb7403bb1e5cb88112de0f6341b7e47284ba40" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-go" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8560a4d2f835cc0d4d2c2e03cbd0dde2f6114b43bc491164238d333e28b16ea" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-haskell" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "977c51e504548cba13fc27cb5a2edab2124cf6716a1934915d07ab99523b05a4" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-hcl" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a7b2cc3d7121553b84309fab9d11b3ff3d420403eef9ae50f9fd1cd9d9cf012" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-html" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261b708e5d92061ede329babaaa427b819329a9d427a1d710abb0f67bbef63ee" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-java" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-json" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86a5d6b3ea17e06e7a34aabeadd68f5866c0d0f9359155d432095f8b751865e4" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-julia" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4144731a178812ee867619b1e98b3b91e54c1652304b26e5ebe3175b701de323" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-kotlin-sg" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0e175b7530765d1e36ad234a7acaa8b2a3316153f239d724376c7ee5e8d8e98" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" + +[[package]] +name = "tree-sitter-lua" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb9adf0965fec58e7660cbb3a059dbb12ebeec9459e6dcbae3db004739641e" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-make" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5998dc7cbcbdab19fae8aefef982bf2d6544513d8d2e69cc44aec4c63810104" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-md" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2efd398be546456c814598ee56c0f51769a77241511b4a58077815d120afa882" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-nix" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4952a9733f3a98f6683a0ccd1035d84ab7a52f7e84eeed58548d86765ad92de3" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-objc" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca8bb556423fc176f0535e79d525f783a6684d3c9da81bf9d905303c129e1d2" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-odin" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24db210fe9ba2237c71c5030d7b146c7025420ba72dd8013d13cd822c3a8d77a" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-php" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c17c3ab69052c5eeaa7ff5cd972dd1bc25d1b97ee779fec391ad3b5df5592" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-python" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-regex" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd8a59be9f0ac131fd8f062eaaba14882b2fa5a6a7882a20134cb1d60df2e625" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-ruby" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9b18034c684a2420722be8b2a91c9c44f2546b631c039edf575ccba8c61be1" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-scala" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b4f354028b5fcf1d0c77f1c6d84cd5a579f29a1e43cb61551ec6580e9a99229" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-solidity" +version = "1.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eacf8875b70879f0cb670c60b233ad0b68752d9e1474e6c3ef168eea8a90b25" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-starlark" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8934f282d085cc4b9ee28aa688aa3fbe8aa3766201c2a6252f411d45b4c3a721" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-swift" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ef216011c3e3df4fa864736f347cb8d509b1066cf0c8549fb1fd81ac9832e59" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-toml-ng" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9adc2c898ae49730e857d75be403da3f92bb81d8e37a2f918a08dd10de5ebb1" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-verilog" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e7e0360395852f1f6ff5b7b82c72dc6557d181073188df1d60ec469ea69c66" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-xml" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e670041f591d994f54d597ddcd8f4ebc930e282c4c76a42268743b71f0c8b6b3" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-yaml" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53c223db85f05e34794f065454843b0668ebc15d240ada63e2b5939f43ce7c97" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-zig" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab11fc124851b0db4dd5e55983bbd9631192e93238389dcd44521715e5d53e28" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "unicode-ident" version = "1.0.24" @@ -969,6 +1581,12 @@ dependencies = [ "syn", ] +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + [[package]] name = "zune-core" version = "0.5.1" diff --git a/native/crates/ast/Cargo.toml b/native/crates/ast/Cargo.toml new file mode 100644 index 000000000..91647fb32 --- /dev/null +++ b/native/crates/ast/Cargo.toml @@ -0,0 +1,54 @@ +[package] +name = "gsd-ast" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true +repository.workspace = true +description = "AST-aware structural search and rewrite via ast-grep for GSD native engine" + +[dependencies] +ast-grep-core = { version = "0.39", default-features = false, features = ["tree-sitter"] } +globset = "0.4" +ignore = "0.4" +napi = { version = "2", features = ["napi8"] } +napi-derive = "2" +phf = { version = "0.13", features = ["macros"] } +tree-sitter = "0.25" +tree-sitter-bash = "0.25" +tree-sitter-c = "0.24" +tree-sitter-c-sharp = "0.23" +tree-sitter-cpp = "0.23" +tree-sitter-css = "0.25" +tree-sitter-diff = "0.1" +tree-sitter-elixir = "0.3" +tree-sitter-go = "0.25" +tree-sitter-haskell = "0.23" +tree-sitter-hcl = "1.1" +tree-sitter-html = "0.23" +tree-sitter-java = "0.23" +tree-sitter-javascript = "0.25" +tree-sitter-json = "0.23" +tree-sitter-julia = "0.23" +tree-sitter-kotlin = { version = "0.4", package = "tree-sitter-kotlin-sg" } +tree-sitter-lua = "0.2" +tree-sitter-make = "1.1" +tree-sitter-md = "0.5" +tree-sitter-nix = "0.3" +tree-sitter-objc = "3.0" +tree-sitter-odin = "1.3" +tree-sitter-php = "0.24" +tree-sitter-python = "0.25" +tree-sitter-regex = "0.25" +tree-sitter-ruby = "0.23" +tree-sitter-rust = "0.24" +tree-sitter-scala = "0.24" +tree-sitter-solidity = "1.2" +tree-sitter-starlark = "1.3" +tree-sitter-swift = "0.7" +tree-sitter-toml-ng = "0.7" +tree-sitter-typescript = "0.23" +tree-sitter-verilog = "1.0" +tree-sitter-xml = "0.7" +tree-sitter-yaml = "0.7" +tree-sitter-zig = "1.1" diff --git a/native/crates/ast/src/ast.rs b/native/crates/ast/src/ast.rs new file mode 100644 index 000000000..12d5aee17 --- /dev/null +++ b/native/crates/ast/src/ast.rs @@ -0,0 +1,929 @@ +//! AST-aware structural search and rewrite powered by ast-grep. + +use std::{ + collections::{BTreeMap, BTreeSet, HashMap}, + path::{Path, PathBuf}, +}; + +use ast_grep_core::{ + Language, MatchStrictness, matcher::Pattern, source::Edit, tree_sitter::LanguageExt, +}; +use ignore::WalkBuilder; +use napi::bindgen_prelude::*; +use napi_derive::napi; + +use crate::{glob_util, language::SupportLang}; + +const DEFAULT_FIND_LIMIT: u32 = 50; + +#[napi(object)] +pub struct AstFindOptions { + pub patterns: Option>, + pub lang: Option, + pub path: Option, + pub glob: Option, + pub selector: Option, + pub strictness: Option, + pub limit: Option, + pub offset: Option, + #[napi(js_name = "includeMeta")] + pub include_meta: Option, + pub context: Option, +} + +#[napi(object)] +pub struct AstFindMatch { + pub path: String, + pub text: String, + #[napi(js_name = "byteStart")] + pub byte_start: u32, + #[napi(js_name = "byteEnd")] + pub byte_end: u32, + #[napi(js_name = "startLine")] + pub start_line: u32, + #[napi(js_name = "startColumn")] + pub start_column: u32, + #[napi(js_name = "endLine")] + pub end_line: u32, + #[napi(js_name = "endColumn")] + pub end_column: u32, + #[napi(js_name = "metaVariables")] + pub meta_variables: Option>, +} + +#[napi(object)] +pub struct AstFindResult { + pub matches: Vec, + #[napi(js_name = "totalMatches")] + pub total_matches: u32, + #[napi(js_name = "filesWithMatches")] + pub files_with_matches: u32, + #[napi(js_name = "filesSearched")] + pub files_searched: u32, + #[napi(js_name = "limitReached")] + pub limit_reached: bool, + #[napi(js_name = "parseErrors")] + pub parse_errors: Option>, +} + +#[napi(object)] +pub struct AstReplaceOptions { + pub rewrites: Option>, + pub lang: Option, + pub path: Option, + pub glob: Option, + pub selector: Option, + pub strictness: Option, + #[napi(js_name = "dryRun")] + pub dry_run: Option, + #[napi(js_name = "maxReplacements")] + pub max_replacements: Option, + #[napi(js_name = "maxFiles")] + pub max_files: Option, + #[napi(js_name = "failOnParseError")] + pub fail_on_parse_error: Option, +} + +#[napi(object)] +pub struct AstReplaceChange { + pub path: String, + pub before: String, + pub after: String, + #[napi(js_name = "byteStart")] + pub byte_start: u32, + #[napi(js_name = "byteEnd")] + pub byte_end: u32, + #[napi(js_name = "deletedLength")] + pub deleted_length: u32, + #[napi(js_name = "startLine")] + pub start_line: u32, + #[napi(js_name = "startColumn")] + pub start_column: u32, + #[napi(js_name = "endLine")] + pub end_line: u32, + #[napi(js_name = "endColumn")] + pub end_column: u32, +} + +#[napi(object)] +pub struct AstReplaceFileChange { + pub path: String, + pub count: u32, +} + +#[napi(object)] +pub struct AstReplaceResult { + pub changes: Vec, + #[napi(js_name = "fileChanges")] + pub file_changes: Vec, + #[napi(js_name = "totalReplacements")] + pub total_replacements: u32, + #[napi(js_name = "filesTouched")] + pub files_touched: u32, + #[napi(js_name = "filesSearched")] + pub files_searched: u32, + pub applied: bool, + #[napi(js_name = "limitReached")] + pub limit_reached: bool, + #[napi(js_name = "parseErrors")] + pub parse_errors: Option>, +} + +struct FileCandidate { + absolute_path: PathBuf, + display_path: String, +} + +struct PendingFileChange { + change: AstReplaceChange, + edit: Edit, +} + +fn to_u32(value: usize) -> u32 { + value.min(u32::MAX as usize) as u32 +} + +/// Single source of truth: every recognised alias (lowercased) -> `SupportLang`. +static LANG_ALIASES: phf::Map<&'static str, SupportLang> = phf::phf_map! { + "bash" => SupportLang::Bash, + "sh" => SupportLang::Bash, + "c" => SupportLang::C, + "cpp" => SupportLang::Cpp, + "c++" => SupportLang::Cpp, + "cc" => SupportLang::Cpp, + "cxx" => SupportLang::Cpp, + "csharp" => SupportLang::CSharp, + "c#" => SupportLang::CSharp, + "cs" => SupportLang::CSharp, + "css" => SupportLang::Css, + "diff" => SupportLang::Diff, + "patch" => SupportLang::Diff, + "elixir" => SupportLang::Elixir, + "ex" => SupportLang::Elixir, + "go" => SupportLang::Go, + "golang" => SupportLang::Go, + "haskell" => SupportLang::Haskell, + "hs" => SupportLang::Haskell, + "hcl" => SupportLang::Hcl, + "tf" => SupportLang::Hcl, + "tfvars" => SupportLang::Hcl, + "terraform" => SupportLang::Hcl, + "html" => SupportLang::Html, + "htm" => SupportLang::Html, + "java" => SupportLang::Java, + "javascript" => SupportLang::JavaScript, + "js" => SupportLang::JavaScript, + "jsx" => SupportLang::JavaScript, + "mjs" => SupportLang::JavaScript, + "cjs" => SupportLang::JavaScript, + "json" => SupportLang::Json, + "julia" => SupportLang::Julia, + "jl" => SupportLang::Julia, + "kotlin" => SupportLang::Kotlin, + "kt" => SupportLang::Kotlin, + "lua" => SupportLang::Lua, + "make" => SupportLang::Make, + "makefile" => SupportLang::Make, + "markdown" => SupportLang::Markdown, + "md" => SupportLang::Markdown, + "mdx" => SupportLang::Markdown, + "nix" => SupportLang::Nix, + "objc" => SupportLang::ObjC, + "objective-c" => SupportLang::ObjC, + "odin" => SupportLang::Odin, + "php" => SupportLang::Php, + "python" => SupportLang::Python, + "py" => SupportLang::Python, + "regex" => SupportLang::Regex, + "ruby" => SupportLang::Ruby, + "rb" => SupportLang::Ruby, + "rust" => SupportLang::Rust, + "rs" => SupportLang::Rust, + "scala" => SupportLang::Scala, + "solidity" => SupportLang::Solidity, + "sol" => SupportLang::Solidity, + "starlark" => SupportLang::Starlark, + "star" => SupportLang::Starlark, + "swift" => SupportLang::Swift, + "toml" => SupportLang::Toml, + "tsx" => SupportLang::Tsx, + "typescript" => SupportLang::TypeScript, + "ts" => SupportLang::TypeScript, + "mts" => SupportLang::TypeScript, + "cts" => SupportLang::TypeScript, + "verilog" => SupportLang::Verilog, + "systemverilog" => SupportLang::Verilog, + "sv" => SupportLang::Verilog, + "xml" => SupportLang::Xml, + "xsl" => SupportLang::Xml, + "svg" => SupportLang::Xml, + "yaml" => SupportLang::Yaml, + "yml" => SupportLang::Yaml, + "zig" => SupportLang::Zig, +}; + +fn supported_lang_list() -> String { + let mut keys: Vec<&str> = LANG_ALIASES.keys().copied().collect(); + keys.sort_unstable(); + keys.join(", ") +} + +fn resolve_supported_lang(value: &str) -> Result { + let lower = value.to_ascii_lowercase(); + LANG_ALIASES.get(lower.as_str()).copied().ok_or_else(|| { + Error::from_reason(format!( + "Unsupported language '{value}'. Supported: {}", + supported_lang_list() + )) + }) +} + +fn resolve_language(lang: Option<&str>, file_path: &Path) -> Result { + if let Some(lang) = lang.map(str::trim).filter(|lang| !lang.is_empty()) { + return resolve_supported_lang(lang); + } + SupportLang::from_path(file_path).ok_or_else(|| { + Error::from_reason(format!( + "Unable to infer language from file extension: {}. Specify `lang` explicitly.", + file_path.display() + )) + }) +} + +fn is_supported_file(file_path: &Path, explicit_lang: Option<&str>) -> bool { + if explicit_lang.is_some() { + return true; + } + resolve_language(None, file_path).is_ok() +} + +fn infer_single_replace_lang(candidates: &[FileCandidate]) -> Result { + let mut inferred = BTreeSet::new(); + let mut unresolved = Vec::new(); + for candidate in candidates { + match resolve_language(None, &candidate.absolute_path) { + Ok(language) => { + inferred.insert(language.canonical_name().to_string()); + }, + Err(err) => unresolved.push(format!("{}: {}", candidate.display_path, err)), + } + } + if !unresolved.is_empty() { + let details = unresolved + .into_iter() + .map(|entry| format!("- {entry}")) + .collect::>() + .join("\n"); + return Err(Error::from_reason(format!( + "`lang` is required for ast_edit when language cannot be inferred from all \ + files:\n{details}" + ))); + } + if inferred.is_empty() { + return Err(Error::from_reason( + "`lang` is required for ast_edit when no files match path/glob".to_string(), + )); + } + if inferred.len() > 1 { + return Err(Error::from_reason(format!( + "`lang` is required for ast_edit when path/glob resolves to multiple languages: {}", + inferred.into_iter().collect::>().join(", ") + ))); + } + Ok(inferred.into_iter().next().expect("non-empty inferred set")) +} + +fn parse_strictness(value: Option<&str>) -> Result { + let Some(raw) = value.map(str::trim).filter(|v| !v.is_empty()) else { + return Ok(MatchStrictness::Smart); + }; + raw.parse::() + .map_err(|err| Error::from_reason(format!("Invalid strictness '{raw}': {err}"))) +} + +fn normalize_search_path(path: Option) -> Result { + let raw = path.unwrap_or_else(|| ".".to_string()); + let candidate = PathBuf::from(raw.trim()); + let absolute = if candidate.is_absolute() { + candidate + } else { + std::env::current_dir() + .map_err(|err| Error::from_reason(format!("Failed to resolve cwd: {err}")))? + .join(candidate) + }; + Ok(std::fs::canonicalize(&absolute).unwrap_or(absolute)) +} + +/// Collect file candidates by walking the directory tree using the `ignore` +/// crate (respects .gitignore, skips hidden files). +fn collect_candidates( + path: Option, + glob: Option<&str>, +) -> Result> { + let search_path = normalize_search_path(path)?; + let metadata = std::fs::metadata(&search_path) + .map_err(|err| Error::from_reason(format!("Path not found: {err}")))?; + + if metadata.is_file() { + let display_path = search_path + .file_name() + .and_then(|name| name.to_str()) + .map_or_else( + || search_path.to_string_lossy().into_owned(), + std::string::ToString::to_string, + ); + return Ok(vec![FileCandidate { absolute_path: search_path, display_path }]); + } + + if !metadata.is_dir() { + return Err(Error::from_reason(format!( + "Search path must be a file or directory: {}", + search_path.display() + ))); + } + + let glob_set = glob_util::try_compile_glob(glob, false)?; + let mentions_node_modules = glob.is_some_and(|value| value.contains("node_modules")); + + let walker = WalkBuilder::new(&search_path) + .hidden(true) + .git_ignore(true) + .git_global(true) + .git_exclude(true) + .build(); + + let mut files = Vec::new(); + for entry in walker { + let entry = match entry { + Ok(e) => e, + Err(_) => continue, + }; + + if !entry.file_type().is_some_and(|ft| ft.is_file()) { + continue; + } + + let abs = entry.path().to_path_buf(); + let relative = abs + .strip_prefix(&search_path) + .map(|p| p.to_string_lossy().replace('\\', "/")) + .unwrap_or_else(|_| abs.to_string_lossy().into_owned()); + + if !mentions_node_modules && relative.contains("node_modules") { + continue; + } + + if let Some(ref gs) = glob_set { + if !gs.is_match(&relative) { + continue; + } + } + + files.push(FileCandidate { absolute_path: abs, display_path: relative }); + } + + files.sort_by(|a, b| a.display_path.cmp(&b.display_path)); + Ok(files) +} + +fn compile_pattern( + pattern: &str, + selector: Option<&str>, + strictness: &MatchStrictness, + lang: SupportLang, +) -> Result { + let mut compiled = if let Some(selector) = selector.map(str::trim).filter(|s| !s.is_empty()) { + Pattern::contextual(pattern, selector, lang) + } else { + Pattern::try_new(pattern, lang) + } + .map_err(|err| Error::from_reason(format!("Invalid pattern: {err}")))?; + compiled.strictness = strictness.clone(); + Ok(compiled) +} + +fn apply_edits(content: &str, edits: &[Edit]) -> Result { + let mut sorted: Vec<&Edit> = edits.iter().collect(); + sorted.sort_by_key(|edit| edit.position); + let mut prev_end = 0usize; + for edit in &sorted { + if edit.position < prev_end { + return Err(Error::from_reason( + "Overlapping replacements detected; refine pattern to avoid ambiguous edits" + .to_string(), + )); + } + prev_end = edit.position.saturating_add(edit.deleted_length); + } + + let mut output = content.to_string(); + for edit in sorted.into_iter().rev() { + let start = edit.position; + let end = edit.position.saturating_add(edit.deleted_length); + if end > output.len() || start > end { + return Err(Error::from_reason("Computed edit range is out of bounds".to_string())); + } + let replacement = String::from_utf8(edit.inserted_text.clone()).map_err(|err| { + Error::from_reason(format!("Replacement text is not valid UTF-8: {err}")) + })?; + output.replace_range(start..end, &replacement); + } + Ok(output) +} + +fn normalize_pattern_list(patterns: Option>) -> Result> { + let mut normalized = Vec::new(); + let mut seen = BTreeSet::new(); + for raw in patterns.unwrap_or_default() { + let pattern = raw.trim(); + if pattern.is_empty() { + continue; + } + if seen.insert(pattern.to_string()) { + normalized.push(pattern.to_string()); + } + } + if normalized.is_empty() { + return Err(Error::from_reason( + "`patterns` is required and must include at least one non-empty pattern".to_string(), + )); + } + Ok(normalized) +} + +fn normalize_rewrite_map( + rewrites: Option>, +) -> Result> { + let mut normalized = Vec::new(); + for (pattern, rewrite) in rewrites.unwrap_or_default() { + if pattern.is_empty() { + return Err(Error::from_reason( + "`rewrites` keys must be non-empty pattern strings".to_string(), + )); + } + normalized.push((pattern, rewrite)); + } + if normalized.is_empty() { + return Err(Error::from_reason( + "`rewrites` is required and must include at least one pattern->rewrite mapping" + .to_string(), + )); + } + normalized.sort_by(|left, right| left.0.cmp(&right.0)); + Ok(normalized) +} + +struct CompiledFindPattern { + pattern: String, + compiled_by_lang: HashMap, + compile_errors_by_lang: HashMap, +} + +struct ResolvedCandidate { + candidate: FileCandidate, + language: Option, + language_error: Option, +} + +fn resolve_candidates_for_find( + candidates: Vec, + lang: Option<&str>, +) -> Result<(Vec, HashMap)> { + let mut resolved = Vec::with_capacity(candidates.len()); + let mut languages = HashMap::new(); + + for candidate in candidates { + match resolve_language(lang, &candidate.absolute_path) { + Ok(language) => { + let key = language.canonical_name().to_string(); + languages.entry(key).or_insert(language); + resolved.push(ResolvedCandidate { + candidate, + language: Some(language), + language_error: None, + }); + }, + Err(err) => { + resolved.push(ResolvedCandidate { + candidate, + language: None, + language_error: Some(err.to_string()), + }); + }, + } + } + + Ok((resolved, languages)) +} + +fn compile_find_patterns( + patterns: &[String], + languages: &HashMap, + selector: Option<&str>, + strictness: &MatchStrictness, +) -> Result> { + let mut compiled = Vec::with_capacity(patterns.len()); + + for pattern in patterns { + let mut compiled_by_lang = HashMap::with_capacity(languages.len()); + let mut compile_errors_by_lang = HashMap::new(); + + for (lang_key, &language) in languages { + match compile_pattern(pattern, selector, strictness, language) { + Ok(compiled_pattern) => { + compiled_by_lang.insert(lang_key.clone(), compiled_pattern); + }, + Err(err) => { + compile_errors_by_lang.insert(lang_key.clone(), err.to_string()); + }, + } + } + + compiled.push(CompiledFindPattern { + pattern: pattern.clone(), + compiled_by_lang, + compile_errors_by_lang, + }); + } + + Ok(compiled) +} + +/// Structural code search using ast-grep patterns. +/// +/// Searches files for AST patterns across 38+ languages. +#[napi(js_name = "astGrep")] +pub fn ast_grep(options: AstFindOptions) -> Result { + let AstFindOptions { + patterns, lang, path, glob, selector, strictness, + limit, offset, include_meta, context: _, + } = options; + + let normalized_limit = limit.unwrap_or(DEFAULT_FIND_LIMIT).max(1); + let normalized_offset = offset.unwrap_or(0); + + let patterns = normalize_pattern_list(patterns)?; + let strictness = parse_strictness(strictness.as_deref())?; + let include_meta = include_meta.unwrap_or(false); + let lang_str = lang.as_deref().map(str::trim).filter(|v| !v.is_empty()); + let candidates: Vec<_> = collect_candidates(path, glob.as_deref())? + .into_iter() + .filter(|candidate| is_supported_file(&candidate.absolute_path, lang_str)) + .collect(); + + let (resolved_candidates, languages) = resolve_candidates_for_find(candidates, lang_str)?; + let compiled_patterns = compile_find_patterns(&patterns, &languages, selector.as_deref(), &strictness)?; + let files_searched = to_u32(resolved_candidates.len()); + + let mut all_matches = Vec::new(); + let mut parse_errors = Vec::new(); + let mut total_matches = 0u32; + let mut files_with_matches = BTreeSet::new(); + + for resolved in resolved_candidates { + let ResolvedCandidate { candidate, language, language_error } = resolved; + + if let Some(error) = language_error.as_deref() { + for compiled in &compiled_patterns { + parse_errors.push(format!("{}: {}: {error}", compiled.pattern, candidate.display_path)); + } + continue; + } + + let Some(language) = language else { continue }; + let lang_key = language.canonical_name(); + let source = match std::fs::read_to_string(&candidate.absolute_path) { + Ok(source) => source, + Err(err) => { + for compiled in &compiled_patterns { + parse_errors.push(format!("{}: {}: {err}", compiled.pattern, candidate.display_path)); + } + continue; + }, + }; + + let mut runnable_patterns: Vec<(&str, &Pattern)> = Vec::new(); + for compiled in &compiled_patterns { + if let Some(error) = compiled.compile_errors_by_lang.get(lang_key) { + parse_errors.push(format!("{}: {}: {error}", compiled.pattern, candidate.display_path)); + continue; + } + if let Some(pattern) = compiled.compiled_by_lang.get(lang_key) { + runnable_patterns.push((compiled.pattern.as_str(), pattern)); + } + } + if runnable_patterns.is_empty() { + continue; + } + + let ast = language.ast_grep(source); + if ast.root().dfs().any(|node| node.is_error()) { + parse_errors.push(format!( + "{}: parse error (syntax tree contains error nodes)", + candidate.display_path + )); + } + + for (_, pattern) in runnable_patterns { + for matched in ast.root().find_all(pattern.clone()) { + total_matches = total_matches.saturating_add(1); + let range = matched.range(); + let start = matched.start_pos(); + let end = matched.end_pos(); + let meta_variables = if include_meta { + Some(HashMap::::from(matched.get_env().clone())) + } else { + None + }; + all_matches.push(AstFindMatch { + path: candidate.display_path.clone(), + text: matched.text().into_owned(), + byte_start: to_u32(range.start), + byte_end: to_u32(range.end), + start_line: to_u32(start.line().saturating_add(1)), + start_column: to_u32(start.column(matched.get_node()).saturating_add(1)), + end_line: to_u32(end.line().saturating_add(1)), + end_column: to_u32(end.column(matched.get_node()).saturating_add(1)), + meta_variables, + }); + files_with_matches.insert(candidate.display_path.clone()); + } + } + } + + all_matches.sort_by(|left, right| { + left.path.cmp(&right.path) + .then(left.start_line.cmp(&right.start_line)) + .then(left.start_column.cmp(&right.start_column)) + .then(left.end_line.cmp(&right.end_line)) + .then(left.end_column.cmp(&right.end_column)) + .then(left.byte_start.cmp(&right.byte_start)) + .then(left.byte_end.cmp(&right.byte_end)) + }); + + let visible_matches = all_matches.into_iter().skip(normalized_offset as usize).collect::>(); + let limit_reached = visible_matches.len() > normalized_limit as usize; + let matches = visible_matches.into_iter().take(normalized_limit as usize).collect::>(); + + Ok(AstFindResult { + matches, + total_matches, + files_with_matches: to_u32(files_with_matches.len()), + files_searched, + limit_reached, + parse_errors: (!parse_errors.is_empty()).then_some(parse_errors), + }) +} + +/// Structural code rewrite using ast-grep patterns. +/// +/// Applies pattern->replacement rewrites across files. Defaults to dry-run mode. +#[napi(js_name = "astEdit")] +pub fn ast_edit(options: AstReplaceOptions) -> Result { + let AstReplaceOptions { + rewrites, lang, path, glob, selector, strictness, + dry_run, max_replacements, max_files, fail_on_parse_error, + } = options; + + let rewrite_rules = normalize_rewrite_map(rewrites)?; + let strictness = parse_strictness(strictness.as_deref())?; + let dry_run = dry_run.unwrap_or(true); + let max_replacements = max_replacements.unwrap_or(u32::MAX).max(1); + let max_files = max_files.unwrap_or(u32::MAX).max(1); + let fail_on_parse_error = fail_on_parse_error.unwrap_or(false); + + let lang_str = lang.as_deref().map(str::trim).filter(|v| !v.is_empty()); + let candidates: Vec<_> = collect_candidates(path, glob.as_deref())? + .into_iter() + .filter(|candidate| is_supported_file(&candidate.absolute_path, lang_str)) + .collect(); + let effective_lang = if let Some(lang) = lang_str { + lang.to_string() + } else { + infer_single_replace_lang(&candidates)? + }; + + let language = resolve_supported_lang(&effective_lang)?; + let mut parse_errors = Vec::new(); + let mut compiled_rules = Vec::new(); + for (pattern, rewrite) in rewrite_rules { + match compile_pattern(&pattern, selector.as_deref(), &strictness, language) { + Ok(compiled) => compiled_rules.push((pattern, rewrite, compiled)), + Err(err) => { + if fail_on_parse_error { return Err(err); } + parse_errors.push(format!("{pattern}: {err}")); + }, + } + } + if compiled_rules.is_empty() { + return Ok(AstReplaceResult { + file_changes: vec![], total_replacements: 0, files_touched: 0, + files_searched: to_u32(candidates.len()), applied: !dry_run, + limit_reached: false, parse_errors: (!parse_errors.is_empty()).then_some(parse_errors), + changes: vec![], + }); + } + + let mut changes = Vec::new(); + let mut file_counts: BTreeMap = BTreeMap::new(); + let mut files_touched = 0u32; + let mut limit_reached = false; + + for candidate in &candidates { + let source = match std::fs::read_to_string(&candidate.absolute_path) { + Ok(source) => source, + Err(err) => { + if fail_on_parse_error { + return Err(Error::from_reason(format!("{}: {err}", candidate.display_path))); + } + parse_errors.push(format!("{}: {err}", candidate.display_path)); + continue; + }, + }; + + let ast = language.ast_grep(&source); + if ast.root().dfs().any(|node| node.is_error()) { + let parse_issue = format!( + "{}: parse error (syntax tree contains error nodes)", + candidate.display_path + ); + if fail_on_parse_error { return Err(Error::from_reason(parse_issue)); } + parse_errors.push(parse_issue); + continue; + } + + let mut file_changes = Vec::new(); + let mut reached_max_replacements = false; + 'patterns: for (_pattern, rewrite, compiled) in &compiled_rules { + for matched in ast.root().find_all(compiled.clone()) { + if changes.len() + file_changes.len() >= max_replacements as usize { + limit_reached = true; + reached_max_replacements = true; + break 'patterns; + } + let edit = matched.replace_by(rewrite.as_str()); + let range = matched.range(); + let start = matched.start_pos(); + let end = matched.end_pos(); + let after = String::from_utf8(edit.inserted_text.clone()).map_err(|err| { + Error::from_reason(format!( + "{}: replacement text is not valid UTF-8: {err}", + candidate.display_path + )) + })?; + file_changes.push(PendingFileChange { + change: AstReplaceChange { + path: candidate.display_path.clone(), + before: matched.text().into_owned(), + after, + byte_start: to_u32(range.start), + byte_end: to_u32(range.end), + deleted_length: to_u32(edit.deleted_length), + start_line: to_u32(start.line().saturating_add(1)), + start_column: to_u32(start.column(matched.get_node()).saturating_add(1)), + end_line: to_u32(end.line().saturating_add(1)), + end_column: to_u32(end.column(matched.get_node()).saturating_add(1)), + }, + edit, + }); + } + } + + if file_changes.is_empty() { + if reached_max_replacements { break; } + continue; + } + if files_touched >= max_files { + limit_reached = true; + break; + } + files_touched = files_touched.saturating_add(1); + file_counts.insert(candidate.display_path.clone(), to_u32(file_changes.len())); + + if !dry_run { + let edits: Vec> = file_changes.iter().map(|entry| Edit { + position: entry.edit.position, + deleted_length: entry.edit.deleted_length, + inserted_text: entry.edit.inserted_text.clone(), + }).collect(); + let output = apply_edits(&source, &edits)?; + if output != source { + std::fs::write(&candidate.absolute_path, output).map_err(|err| { + Error::from_reason(format!("Failed to write {}: {err}", candidate.display_path)) + })?; + } + } + + changes.extend(file_changes.into_iter().map(|entry| entry.change)); + if reached_max_replacements { break; } + } + + let file_changes = file_counts.into_iter() + .map(|(path, count)| AstReplaceFileChange { path, count }) + .collect::>(); + + Ok(AstReplaceResult { + file_changes, + total_replacements: to_u32(changes.len()), + files_touched, + files_searched: to_u32(candidates.len()), + applied: !dry_run, + limit_reached, + parse_errors: (!parse_errors.is_empty()).then_some(parse_errors), + changes, + }) +} + +#[cfg(test)] +mod tests { + use std::{fs, path::PathBuf, time::{SystemTime, UNIX_EPOCH}}; + use super::*; + + struct TempTree { root: PathBuf } + impl Drop for TempTree { + fn drop(&mut self) { let _ = fs::remove_dir_all(&self.root); } + } + + fn make_temp_tree() -> TempTree { + let unique = SystemTime::now().duration_since(UNIX_EPOCH) + .expect("system time should be after UNIX_EPOCH").as_nanos(); + let root = std::env::temp_dir().join(format!("gsd-ast-test-{unique}")); + fs::create_dir_all(root.join("nested")).expect("temp nested dir should be created"); + fs::write(root.join("a.ts"), "const a = 1;\n").expect("temp file a.ts should be written"); + fs::write(root.join("nested").join("b.ts"), "const b = 2;\n") + .expect("temp file nested/b.ts should be written"); + TempTree { root } + } + + #[test] + fn resolves_supported_language_aliases() { + assert_eq!(resolve_supported_lang("ts").ok(), Some(SupportLang::TypeScript)); + assert_eq!(resolve_supported_lang("jsx").ok(), Some(SupportLang::JavaScript)); + assert_eq!(resolve_supported_lang("rs").ok(), Some(SupportLang::Rust)); + assert_eq!(resolve_supported_lang("kotlin").ok(), Some(SupportLang::Kotlin)); + assert_eq!(resolve_supported_lang("bash").ok(), Some(SupportLang::Bash)); + assert_eq!(resolve_supported_lang("c").ok(), Some(SupportLang::C)); + assert_eq!(resolve_supported_lang("cpp").ok(), Some(SupportLang::Cpp)); + assert!(resolve_supported_lang("brainfuck").is_err()); + } + + #[test] + fn applies_non_overlapping_edits() { + let source = "const answer = 41;"; + let edits = vec![ + Edit:: { position: 6, deleted_length: 6, inserted_text: b"value".to_vec() }, + Edit:: { position: 15, deleted_length: 2, inserted_text: b"42".to_vec() }, + ]; + let output = apply_edits(source, &edits).expect("edits should apply"); + assert_eq!(output, "const value = 42;"); + } + + #[test] + fn rejects_overlapping_edits() { + let source = "abcdef"; + let edits = vec![ + Edit:: { position: 1, deleted_length: 3, inserted_text: b"x".to_vec() }, + Edit:: { position: 2, deleted_length: 1, inserted_text: b"y".to_vec() }, + ]; + assert!(apply_edits(source, &edits).is_err()); + } + + #[test] + fn collect_candidates_finds_files() { + let tree = make_temp_tree(); + let candidates = collect_candidates(Some(tree.root.to_string_lossy().into_owned()), None) + .expect("candidate collection should succeed"); + let paths: Vec<_> = candidates.iter().map(|f| f.display_path.as_str()).collect(); + assert!(paths.contains(&"a.ts")); + assert!(paths.contains(&"nested/b.ts")); + } + + #[test] + fn infers_single_replace_lang_for_uniform_candidates() { + let tree = make_temp_tree(); + let candidates = collect_candidates(Some(tree.root.to_string_lossy().into_owned()), Some("**/*.ts")) + .expect("candidate collection should succeed"); + let inferred = infer_single_replace_lang(&candidates).expect("language should be inferred"); + assert_eq!(inferred, "typescript"); + } + + fn make_mixed_temp_tree() -> TempTree { + let unique = SystemTime::now().duration_since(UNIX_EPOCH) + .expect("system time should be after UNIX_EPOCH").as_nanos(); + let root = std::env::temp_dir().join(format!("gsd-ast-mixed-lang-test-{unique}")); + fs::create_dir_all(&root).expect("temp mixed-lang dir should be created"); + fs::write(root.join("a.ts"), "const a = 1;\n").expect("temp file a.ts should be written"); + fs::write(root.join("b.rs"), "fn main() {}\n").expect("temp file b.rs should be written"); + TempTree { root } + } + + #[test] + fn rejects_mixed_replace_lang_inference() { + let tree = make_mixed_temp_tree(); + let candidates = collect_candidates(Some(tree.root.to_string_lossy().into_owned()), None) + .expect("candidate collection should succeed"); + let err = infer_single_replace_lang(&candidates) + .expect_err("mixed language inference should fail"); + assert!(err.to_string().contains("multiple languages")); + } +} diff --git a/native/crates/ast/src/glob_util.rs b/native/crates/ast/src/glob_util.rs new file mode 100644 index 000000000..158454dcb --- /dev/null +++ b/native/crates/ast/src/glob_util.rs @@ -0,0 +1,54 @@ +//! Shared glob-pattern helpers for AST search. + +use globset::{GlobBuilder, GlobSet, GlobSetBuilder}; +use napi::bindgen_prelude::*; + +/// Normalize a raw glob string: fix path separators, optionally prepend `**/` +/// for recursive matching, and close any unclosed `{` alternation groups. +pub fn build_glob_pattern(glob: &str, recursive: bool) -> String { + let normalized = glob.replace('\\', "/"); + let pattern = if !recursive || normalized.contains('/') || normalized.starts_with("**") { + normalized + } else { + format!("**/{normalized}") + }; + fix_unclosed_braces(pattern) +} + +/// Compile a glob pattern string into a [`GlobSet`]. +pub fn compile_glob(glob: &str, recursive: bool) -> Result { + let mut builder = GlobSetBuilder::new(); + let pattern = build_glob_pattern(glob, recursive); + let glob = GlobBuilder::new(&pattern) + .literal_separator(true) + .build() + .map_err(|err| Error::from_reason(format!("Invalid glob pattern: {err}")))?; + builder.add(glob); + builder + .build() + .map_err(|err| Error::from_reason(format!("Failed to build glob matcher: {err}"))) +} + +/// Like [`compile_glob`], but accepts an `Option<&str>` — returns `Ok(None)` +/// when the input is `None`, empty, or whitespace-only. +pub fn try_compile_glob(glob: Option<&str>, recursive: bool) -> Result> { + let Some(glob) = glob.map(str::trim).filter(|v| !v.is_empty()) else { + return Ok(None); + }; + compile_glob(glob, recursive).map(Some) +} + +/// Close unclosed `{` alternation groups in a glob pattern. +fn fix_unclosed_braces(pattern: String) -> String { + let opens = pattern.chars().filter(|&c| c == '{').count(); + let closes = pattern.chars().filter(|&c| c == '}').count(); + if opens > closes { + let mut fixed = pattern; + for _ in 0..(opens - closes) { + fixed.push('}'); + } + fixed + } else { + pattern + } +} diff --git a/native/crates/ast/src/language/mod.rs b/native/crates/ast/src/language/mod.rs new file mode 100644 index 000000000..01672e134 --- /dev/null +++ b/native/crates/ast/src/language/mod.rs @@ -0,0 +1,437 @@ +//! Language definitions for ast-grep integration. +//! +//! Provides `SupportLang` enum and `Language`/`LanguageExt` impls for 38 +//! languages, each backed by a tree-sitter grammar. + +mod parsers; + +use std::{borrow::Cow, collections::HashMap, fmt, path::Path}; + +use ast_grep_core::{ + Doc, Language, Node, + matcher::{KindMatcher, Pattern, PatternBuilder, PatternError}, + meta_var::MetaVariable, + tree_sitter::{LanguageExt, StrDoc, TSLanguage, TSRange}, +}; + +/// Implements a stub language (no expando / `pre_process_pattern` needed). +/// Use when the language grammar accepts `$VAR` as valid identifiers. +macro_rules! impl_lang { + ($lang:ident, $func:ident) => { + #[derive(Clone, Copy, Debug)] + pub struct $lang; + impl Language for $lang { + fn kind_to_id(&self, kind: &str) -> u16 { + self.get_ts_language().id_for_node_kind(kind, true) + } + + fn field_to_id(&self, field: &str) -> Option { + self + .get_ts_language() + .field_id_for_name(field) + .map(|f| f.get()) + } + + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + builder.build(|src| StrDoc::try_new(src, *self)) + } + } + impl LanguageExt for $lang { + fn get_ts_language(&self) -> TSLanguage { + parsers::$func().into() + } + } + }; +} + +fn pre_process_pattern(expando: char, query: &str) -> Cow<'_, str> { + let mut ret = Vec::with_capacity(query.len()); + let mut dollar_count = 0; + for c in query.chars() { + if c == '$' { + dollar_count += 1; + continue; + } + let need_replace = matches!(c, 'A'..='Z' | '_') || dollar_count == 3; + let sigil = if need_replace { expando } else { '$' }; + ret.extend(std::iter::repeat_n(sigil, dollar_count)); + dollar_count = 0; + ret.push(c); + } + let sigil = if dollar_count == 3 { expando } else { '$' }; + ret.extend(std::iter::repeat_n(sigil, dollar_count)); + Cow::Owned(ret.into_iter().collect()) +} + +/// Implements a language with `expando_char` / `pre_process_pattern`. +/// Use when the language does NOT accept `$` as a valid identifier character. +macro_rules! impl_lang_expando { + ($lang:ident, $func:ident, $char:expr) => { + #[derive(Clone, Copy, Debug)] + pub struct $lang; + impl Language for $lang { + fn kind_to_id(&self, kind: &str) -> u16 { + self.get_ts_language().id_for_node_kind(kind, true) + } + + fn field_to_id(&self, field: &str) -> Option { + self + .get_ts_language() + .field_id_for_name(field) + .map(|f| f.get()) + } + + fn expando_char(&self) -> char { + $char + } + + fn pre_process_pattern<'q>(&self, query: &'q str) -> Cow<'q, str> { + pre_process_pattern(self.expando_char(), query) + } + + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + builder.build(|src| StrDoc::try_new(src, *self)) + } + } + impl LanguageExt for $lang { + fn get_ts_language(&self) -> TSLanguage { + parsers::$func().into() + } + } + }; +} + +// ── Customized languages with expando_char ────────────────────────────── + +impl_lang_expando!(C, language_c, '\u{10000}'); +impl_lang_expando!(Cpp, language_cpp, '\u{10000}'); +impl_lang_expando!(CSharp, language_c_sharp, 'µ'); +impl_lang_expando!(Css, language_css, '_'); +impl_lang_expando!(Elixir, language_elixir, 'µ'); +impl_lang_expando!(Go, language_go, 'µ'); +impl_lang_expando!(Haskell, language_haskell, 'µ'); +impl_lang_expando!(Hcl, language_hcl, 'µ'); +impl_lang_expando!(Kotlin, language_kotlin, 'µ'); +impl_lang_expando!(Nix, language_nix, '_'); +impl_lang_expando!(Php, language_php, 'µ'); +impl_lang_expando!(Python, language_python, 'µ'); +impl_lang_expando!(Ruby, language_ruby, 'µ'); +impl_lang_expando!(Rust, language_rust, 'µ'); +impl_lang_expando!(Swift, language_swift, 'µ'); +impl_lang_expando!(Make, language_make, 'µ'); +impl_lang_expando!(ObjC, language_objc, '\u{10000}'); +impl_lang_expando!(Starlark, language_starlark, 'µ'); +impl_lang_expando!(Odin, language_odin, 'µ'); +impl_lang_expando!(Julia, language_julia, 'µ'); +impl_lang_expando!(Verilog, language_verilog, 'µ'); +impl_lang_expando!(Zig, language_zig, 'µ'); + +// ── Stub languages ($ accepted in grammar) ────────────────────────────── + +impl_lang!(Bash, language_bash); +impl_lang!(Java, language_java); +impl_lang!(JavaScript, language_javascript); +impl_lang!(Json, language_json); +impl_lang!(Lua, language_lua); +impl_lang!(Scala, language_scala); +impl_lang!(Solidity, language_solidity); +impl_lang!(Tsx, language_tsx); +impl_lang!(TypeScript, language_typescript); +impl_lang!(Yaml, language_yaml); +impl_lang!(Markdown, language_markdown); +impl_lang!(Toml, language_toml); +impl_lang!(Diff, language_diff); +impl_lang!(Xml, language_xml); +impl_lang!(Regex, language_regex); + +// ── Html (custom implementation with injection support) ────────────────── + +#[derive(Clone, Copy, Debug)] +pub struct Html; + +impl Language for Html { + fn expando_char(&self) -> char { + 'z' + } + + fn pre_process_pattern<'q>(&self, query: &'q str) -> Cow<'q, str> { + pre_process_pattern(self.expando_char(), query) + } + + fn kind_to_id(&self, kind: &str) -> u16 { + self.get_ts_language().id_for_node_kind(kind, true) + } + + fn field_to_id(&self, field: &str) -> Option { + self + .get_ts_language() + .field_id_for_name(field) + .map(|f| f.get()) + } + + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + builder.build(|src| StrDoc::try_new(src, *self)) + } +} + +impl LanguageExt for Html { + fn get_ts_language(&self) -> TSLanguage { + parsers::language_html() + } + + fn injectable_languages(&self) -> Option<&'static [&'static str]> { + Some(&["css", "js", "ts", "tsx", "scss", "less", "stylus", "coffee"]) + } + + fn extract_injections( + &self, + root: Node>, + ) -> HashMap> { + let lang = root.lang(); + let mut map = HashMap::new(); + let matcher = KindMatcher::new("script_element", lang.clone()); + for script in root.find_all(matcher) { + let injected = find_html_lang(&script).unwrap_or_else(|| "js".into()); + let content = script.children().find(|c| c.kind() == "raw_text"); + if let Some(content) = content { + map.entry(injected) + .or_insert_with(Vec::new) + .push(node_to_range(&content)); + } + } + let matcher = KindMatcher::new("style_element", lang.clone()); + for style in root.find_all(matcher) { + let injected = find_html_lang(&style).unwrap_or_else(|| "css".into()); + let content = style.children().find(|c| c.kind() == "raw_text"); + if let Some(content) = content { + map.entry(injected) + .or_insert_with(Vec::new) + .push(node_to_range(&content)); + } + } + map + } +} + +fn find_html_lang(node: &Node) -> Option { + let html = node.lang(); + let attr_matcher = KindMatcher::new("attribute", html.clone()); + let name_matcher = KindMatcher::new("attribute_name", html.clone()); + let val_matcher = KindMatcher::new("attribute_value", html.clone()); + node.find_all(attr_matcher).find_map(|attr| { + let name = attr.find(&name_matcher)?; + if name.text() != "lang" { + return None; + } + let val = attr.find(&val_matcher)?; + Some(val.text().to_string()) + }) +} + +fn node_to_range(node: &Node) -> TSRange { + let r = node.range(); + let start = node.start_pos(); + let sp = start.byte_point(); + let sp = tree_sitter::Point::new(sp.0, sp.1); + let end = node.end_pos(); + let ep = end.byte_point(); + let ep = tree_sitter::Point::new(ep.0, ep.1); + TSRange { start_byte: r.start, end_byte: r.end, start_point: sp, end_point: ep } +} + +// ── SupportLang enum ──────────────────────────────────────────────────── + +/// All supported languages for ast-grep structural search/replace. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum SupportLang { + Bash, C, Cpp, CSharp, Css, Diff, Elixir, Go, Haskell, Hcl, Html, + Java, JavaScript, Json, Julia, Kotlin, Lua, Make, Markdown, Nix, + ObjC, Odin, Php, Python, Regex, Ruby, Rust, Scala, Solidity, + Starlark, Swift, Toml, Tsx, TypeScript, Verilog, Xml, Yaml, Zig, +} + +impl SupportLang { + pub const fn all_langs() -> &'static [Self] { + use SupportLang::*; + &[ + Bash, C, Cpp, CSharp, Css, Diff, Elixir, Go, Haskell, Hcl, Html, Java, JavaScript, Json, + Julia, Kotlin, Lua, Make, Markdown, Nix, ObjC, Odin, Php, Python, Regex, Ruby, Rust, + Scala, Solidity, Starlark, Swift, Toml, Tsx, TypeScript, Verilog, Xml, Yaml, Zig, + ] + } + + pub const fn canonical_name(self) -> &'static str { + match self { + Self::Bash => "bash", Self::C => "c", Self::Cpp => "cpp", + Self::CSharp => "csharp", Self::Css => "css", Self::Diff => "diff", + Self::Elixir => "elixir", Self::Go => "go", Self::Haskell => "haskell", + Self::Hcl => "hcl", Self::Html => "html", Self::Java => "java", + Self::JavaScript => "javascript", Self::Json => "json", Self::Julia => "julia", + Self::Kotlin => "kotlin", Self::Lua => "lua", Self::Make => "make", + Self::Markdown => "markdown", Self::Nix => "nix", Self::ObjC => "objc", + Self::Odin => "odin", Self::Php => "php", Self::Python => "python", + Self::Regex => "regex", Self::Ruby => "ruby", Self::Rust => "rust", + Self::Scala => "scala", Self::Solidity => "solidity", Self::Starlark => "starlark", + Self::Swift => "swift", Self::Toml => "toml", Self::Tsx => "tsx", + Self::TypeScript => "typescript", Self::Verilog => "verilog", Self::Xml => "xml", + Self::Yaml => "yaml", Self::Zig => "zig", + } + } +} + +impl fmt::Display for SupportLang { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{self:?}") + } +} + +// ── Dispatch macro ────────────────────────────────────────────────────── + +macro_rules! execute_lang_method { + ($me:path, $method:ident, $($pname:tt),*) => { + use SupportLang as S; + match $me { + S::Bash => Bash.$method($($pname,)*), + S::C => C.$method($($pname,)*), + S::Cpp => Cpp.$method($($pname,)*), + S::CSharp => CSharp.$method($($pname,)*), + S::Css => Css.$method($($pname,)*), + S::Diff => Diff.$method($($pname,)*), + S::Elixir => Elixir.$method($($pname,)*), + S::Go => Go.$method($($pname,)*), + S::Haskell => Haskell.$method($($pname,)*), + S::Hcl => Hcl.$method($($pname,)*), + S::Html => Html.$method($($pname,)*), + S::Java => Java.$method($($pname,)*), + S::JavaScript => JavaScript.$method($($pname,)*), + S::Json => Json.$method($($pname,)*), + S::Julia => Julia.$method($($pname,)*), + S::Kotlin => Kotlin.$method($($pname,)*), + S::Lua => Lua.$method($($pname,)*), + S::Make => Make.$method($($pname,)*), + S::Markdown => Markdown.$method($($pname,)*), + S::Nix => Nix.$method($($pname,)*), + S::ObjC => ObjC.$method($($pname,)*), + S::Odin => Odin.$method($($pname,)*), + S::Php => Php.$method($($pname,)*), + S::Python => Python.$method($($pname,)*), + S::Regex => Regex.$method($($pname,)*), + S::Ruby => Ruby.$method($($pname,)*), + S::Rust => Rust.$method($($pname,)*), + S::Scala => Scala.$method($($pname,)*), + S::Solidity => Solidity.$method($($pname,)*), + S::Starlark => Starlark.$method($($pname,)*), + S::Swift => Swift.$method($($pname,)*), + S::Toml => Toml.$method($($pname,)*), + S::Tsx => Tsx.$method($($pname,)*), + S::TypeScript => TypeScript.$method($($pname,)*), + S::Verilog => Verilog.$method($($pname,)*), + S::Xml => Xml.$method($($pname,)*), + S::Yaml => Yaml.$method($($pname,)*), + S::Zig => Zig.$method($($pname,)*), + } + }; +} + +macro_rules! impl_lang_method { + ($method:ident, ($($pname:tt: $ptype:ty),*) => $return_type:ty) => { + #[inline] + fn $method(&self, $($pname: $ptype),*) -> $return_type { + execute_lang_method! { self, $method, $($pname),* } + } + }; +} + +impl Language for SupportLang { + impl_lang_method!(kind_to_id, (kind: &str) => u16); + impl_lang_method!(field_to_id, (field: &str) => Option); + impl_lang_method!(meta_var_char, () => char); + impl_lang_method!(expando_char, () => char); + impl_lang_method!(extract_meta_var, (source: &str) => Option); + impl_lang_method!(build_pattern, (builder: &PatternBuilder) => Result); + + fn pre_process_pattern<'q>(&self, query: &'q str) -> Cow<'q, str> { + execute_lang_method! { self, pre_process_pattern, query } + } + + fn from_path>(path: P) -> Option { + from_extension(path.as_ref()) + } +} + +impl LanguageExt for SupportLang { + impl_lang_method!(get_ts_language, () => TSLanguage); + impl_lang_method!(injectable_languages, () => Option<&'static [&'static str]>); + + fn extract_injections( + &self, + root: Node>, + ) -> HashMap> { + match self { + Self::Html => Html.extract_injections(root), + _ => HashMap::new(), + } + } +} + +// ── File extension mapping ────────────────────────────────────────────── + +const fn extensions(lang: SupportLang) -> &'static [&'static str] { + use SupportLang::*; + match lang { + Bash => &["bash", "bats", "cgi", "command", "env", "fcgi", "ksh", "sh", "tmux", "tool", "zsh"], + C => &["c", "h"], + Cpp => &["cc", "hpp", "cpp", "c++", "hh", "cxx", "cu", "ino"], + CSharp => &["cs"], + Css => &["css", "scss"], + Diff => &["diff", "patch"], + Elixir => &["ex", "exs"], + Go => &["go"], + Haskell => &["hs"], + Hcl => &["hcl", "tf", "tfvars"], + Html => &["html", "htm", "xhtml"], + Java => &["java"], + JavaScript => &["cjs", "js", "mjs", "jsx"], + Json => &["json"], + Julia => &["jl"], + Kotlin => &["kt", "ktm", "kts"], + Lua => &["lua"], + Make => &["mk", "mak"], + Markdown => &["md", "markdown", "mdx"], + Nix => &["nix"], + ObjC => &["m"], + Odin => &["odin"], + Php => &["php"], + Python => &["py", "py3", "pyi", "bzl"], + Regex => &[], + Ruby => &["rb", "rbw", "gemspec"], + Rust => &["rs"], + Scala => &["scala", "sc", "sbt"], + Solidity => &["sol"], + Starlark => &["star", "bzl"], + Swift => &["swift"], + Toml => &["toml"], + Tsx => &["tsx"], + TypeScript => &["ts", "cts", "mts"], + Verilog => &["v", "sv", "svh", "vh"], + Xml => &["xml", "xsl", "xslt", "svg", "plist"], + Yaml => &["yaml", "yml"], + Zig => &["zig"], + } +} + +/// Guess language from file extension. +pub fn from_extension(path: &Path) -> Option { + let ext = path.extension()?.to_str()?; + if ext.is_empty() { + let name = path.file_name()?.to_str()?; + return match name { + "Makefile" | "makefile" | "GNUmakefile" => Some(SupportLang::Make), + _ => None, + }; + } + SupportLang::all_langs() + .iter() + .copied() + .find(|&l| extensions(l).contains(&ext)) +} diff --git a/native/crates/ast/src/language/parsers.rs b/native/crates/ast/src/language/parsers.rs new file mode 100644 index 000000000..5c31b31ca --- /dev/null +++ b/native/crates/ast/src/language/parsers.rs @@ -0,0 +1,118 @@ +//! Tree-sitter parser functions for all supported languages. + +use ast_grep_core::tree_sitter::TSLanguage; + +pub fn language_bash() -> TSLanguage { + tree_sitter_bash::LANGUAGE.into() +} +pub fn language_c() -> TSLanguage { + tree_sitter_c::LANGUAGE.into() +} +pub fn language_cpp() -> TSLanguage { + tree_sitter_cpp::LANGUAGE.into() +} +pub fn language_c_sharp() -> TSLanguage { + tree_sitter_c_sharp::LANGUAGE.into() +} +pub fn language_css() -> TSLanguage { + tree_sitter_css::LANGUAGE.into() +} +pub fn language_diff() -> TSLanguage { + tree_sitter_diff::LANGUAGE.into() +} +pub fn language_elixir() -> TSLanguage { + tree_sitter_elixir::LANGUAGE.into() +} +pub fn language_go() -> TSLanguage { + tree_sitter_go::LANGUAGE.into() +} +pub fn language_haskell() -> TSLanguage { + tree_sitter_haskell::LANGUAGE.into() +} +pub fn language_hcl() -> TSLanguage { + tree_sitter_hcl::LANGUAGE.into() +} +pub fn language_html() -> TSLanguage { + tree_sitter_html::LANGUAGE.into() +} +pub fn language_java() -> TSLanguage { + tree_sitter_java::LANGUAGE.into() +} +pub fn language_javascript() -> TSLanguage { + tree_sitter_javascript::LANGUAGE.into() +} +pub fn language_json() -> TSLanguage { + tree_sitter_json::LANGUAGE.into() +} +pub fn language_julia() -> TSLanguage { + tree_sitter_julia::LANGUAGE.into() +} +pub fn language_kotlin() -> TSLanguage { + tree_sitter_kotlin::LANGUAGE.into() +} +pub fn language_lua() -> TSLanguage { + tree_sitter_lua::LANGUAGE.into() +} +pub fn language_make() -> TSLanguage { + tree_sitter_make::LANGUAGE.into() +} +pub fn language_markdown() -> TSLanguage { + tree_sitter_md::LANGUAGE.into() +} +pub fn language_nix() -> TSLanguage { + tree_sitter_nix::LANGUAGE.into() +} +pub fn language_objc() -> TSLanguage { + tree_sitter_objc::LANGUAGE.into() +} +pub fn language_odin() -> TSLanguage { + tree_sitter_odin::LANGUAGE.into() +} +pub fn language_php() -> TSLanguage { + tree_sitter_php::LANGUAGE_PHP_ONLY.into() +} +pub fn language_python() -> TSLanguage { + tree_sitter_python::LANGUAGE.into() +} +pub fn language_regex() -> TSLanguage { + tree_sitter_regex::LANGUAGE.into() +} +pub fn language_ruby() -> TSLanguage { + tree_sitter_ruby::LANGUAGE.into() +} +pub fn language_rust() -> TSLanguage { + tree_sitter_rust::LANGUAGE.into() +} +pub fn language_scala() -> TSLanguage { + tree_sitter_scala::LANGUAGE.into() +} +pub fn language_solidity() -> TSLanguage { + tree_sitter_solidity::LANGUAGE.into() +} +pub fn language_starlark() -> TSLanguage { + tree_sitter_starlark::LANGUAGE.into() +} +pub fn language_swift() -> TSLanguage { + tree_sitter_swift::LANGUAGE.into() +} +pub fn language_toml() -> TSLanguage { + tree_sitter_toml_ng::LANGUAGE.into() +} +pub fn language_tsx() -> TSLanguage { + tree_sitter_typescript::LANGUAGE_TSX.into() +} +pub fn language_typescript() -> TSLanguage { + tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into() +} +pub fn language_verilog() -> TSLanguage { + tree_sitter_verilog::LANGUAGE.into() +} +pub fn language_xml() -> TSLanguage { + tree_sitter_xml::LANGUAGE_XML.into() +} +pub fn language_yaml() -> TSLanguage { + tree_sitter_yaml::LANGUAGE.into() +} +pub fn language_zig() -> TSLanguage { + tree_sitter_zig::LANGUAGE.into() +} diff --git a/native/crates/ast/src/lib.rs b/native/crates/ast/src/lib.rs new file mode 100644 index 000000000..18cc50a2d --- /dev/null +++ b/native/crates/ast/src/lib.rs @@ -0,0 +1,10 @@ +//! AST-aware structural search and rewrite for GSD. +//! +//! Provides `astGrep` (search) and `astEdit` (rewrite) N-API functions +//! powered by ast-grep with tree-sitter grammars for 38+ languages. + +#![allow(clippy::needless_pass_by_value)] + +pub mod ast; +pub mod glob_util; +pub mod language; diff --git a/native/crates/engine/Cargo.toml b/native/crates/engine/Cargo.toml index 90cb772b4..7ac7a8756 100644 --- a/native/crates/engine/Cargo.toml +++ b/native/crates/engine/Cargo.toml @@ -11,6 +11,7 @@ description = "N-API native addon for GSD — exposes high-performance Rust modu crate-type = ["cdylib"] [dependencies] +gsd-ast = { path = "../ast" } gsd-grep = { path = "../grep" } arboard = "3" image = { version = "0.25", default-features = false, features = ["png"] } diff --git a/native/crates/engine/src/ast.rs b/native/crates/engine/src/ast.rs new file mode 100644 index 000000000..2d2646332 --- /dev/null +++ b/native/crates/engine/src/ast.rs @@ -0,0 +1,6 @@ +//! N-API bindings for the AST module. +//! +//! Forces the linker to include `gsd_ast` so napi-rs ctor registrations +//! for `astGrep` and `astEdit` are linked into the cdylib. + +use gsd_ast as _; diff --git a/native/crates/engine/src/lib.rs b/native/crates/engine/src/lib.rs index 8ab224c6c..0f6736e4d 100644 --- a/native/crates/engine/src/lib.rs +++ b/native/crates/engine/src/lib.rs @@ -8,5 +8,6 @@ #![allow(clippy::needless_pass_by_value)] +mod ast; mod clipboard; mod grep; diff --git a/packages/native/package.json b/packages/native/package.json index a195cc0af..276ca324e 100644 --- a/packages/native/package.json +++ b/packages/native/package.json @@ -22,6 +22,10 @@ "./clipboard": { "types": "./src/clipboard/index.ts", "import": "./src/clipboard/index.ts" + }, + "./ast": { + "types": "./src/ast/index.ts", + "import": "./src/ast/index.ts" } }, "files": [ diff --git a/packages/native/src/ast/index.ts b/packages/native/src/ast/index.ts new file mode 100644 index 000000000..314d6d719 --- /dev/null +++ b/packages/native/src/ast/index.ts @@ -0,0 +1,67 @@ +/** + * AST-aware structural search and rewrite via ast-grep. + * + * Supports 38+ languages with tree-sitter grammars. + */ + +import { native } from "../native.js"; +import type { + AstFindOptions, + AstFindResult, + AstReplaceChange, + AstReplaceFileChange, + AstReplaceOptions, + AstReplaceResult, + AstFindMatch, +} from "./types.js"; + +export type { + AstFindMatch, + AstFindOptions, + AstFindResult, + AstReplaceChange, + AstReplaceFileChange, + AstReplaceOptions, + AstReplaceResult, +}; + +/** + * Structural code search using ast-grep patterns. + * + * Searches files for AST patterns across 38+ languages. Unlike regex, + * patterns match the syntax tree structure, ignoring whitespace and + * formatting differences. + * + * @example + * ```ts + * const result = astGrep({ + * patterns: ["console.log($$$ARGS)"], + * path: "./src", + * lang: "typescript", + * }); + * ``` + */ +export function astGrep(options: AstFindOptions): AstFindResult { + return (native as Record).astGrep(options) as AstFindResult; +} + +/** + * Structural code rewrite using ast-grep patterns. + * + * Applies pattern->replacement rewrites across files. Meta-variables + * ($VAR, $$$ARGS) captured in patterns are substituted in replacements. + * Defaults to dry-run mode -- set `dryRun: false` to write changes. + * + * @example + * ```ts + * const result = astEdit({ + * rewrites: { "console.log($$$ARGS)": "logger.info($$$ARGS)" }, + * path: "./src", + * lang: "typescript", + * dryRun: false, + * }); + * ``` + */ +export function astEdit(options: AstReplaceOptions): AstReplaceResult { + return (native as Record).astEdit(options) as AstReplaceResult; +} diff --git a/packages/native/src/ast/types.ts b/packages/native/src/ast/types.ts new file mode 100644 index 000000000..7a3b88182 --- /dev/null +++ b/packages/native/src/ast/types.ts @@ -0,0 +1,137 @@ +/** Options for structural AST search via ast-grep. */ +export interface AstFindOptions { + /** One or more ast-grep patterns to search for. */ + patterns: string[]; + /** Language to parse files as (e.g. "typescript", "python"). Inferred from extension when omitted. */ + lang?: string; + /** File or directory path to search. Defaults to cwd. */ + path?: string; + /** Glob filter for filenames (e.g. "**/*.ts"). */ + glob?: string; + /** AST node kind selector to narrow pattern scope. */ + selector?: string; + /** Match strictness: "cst", "smart", "ast", "relaxed", "signature". Defaults to "smart". */ + strictness?: string; + /** Maximum number of matches to return. Defaults to 50. */ + limit?: number; + /** Number of matches to skip before returning results. */ + offset?: number; + /** Include meta-variable bindings in results. */ + includeMeta?: boolean; + /** Lines of context around matches (reserved for future use). */ + context?: number; +} + +/** A single structural match from ast-grep search. */ +export interface AstFindMatch { + /** Relative file path. */ + path: string; + /** Matched source text. */ + text: string; + /** Byte offset of match start. */ + byteStart: number; + /** Byte offset of match end. */ + byteEnd: number; + /** 1-indexed start line. */ + startLine: number; + /** 1-indexed start column. */ + startColumn: number; + /** 1-indexed end line. */ + endLine: number; + /** 1-indexed end column. */ + endColumn: number; + /** Meta-variable bindings (when includeMeta is true). */ + metaVariables?: Record; +} + +/** Result of an ast-grep structural search. */ +export interface AstFindResult { + /** Matched nodes (paginated by limit/offset). */ + matches: AstFindMatch[]; + /** Total match count across all files. */ + totalMatches: number; + /** Number of files containing at least one match. */ + filesWithMatches: number; + /** Number of files searched. */ + filesSearched: number; + /** Whether more matches exist beyond the limit. */ + limitReached: boolean; + /** Parse errors encountered (non-fatal). */ + parseErrors?: string[]; +} + +/** Options for structural AST rewrite via ast-grep. */ +export interface AstReplaceOptions { + /** Map of pattern -> replacement. Meta-variables ($VAR) in replacements are substituted. */ + rewrites: Record; + /** Language to parse files as. Required when path/glob spans multiple languages. */ + lang?: string; + /** File or directory path. Defaults to cwd. */ + path?: string; + /** Glob filter for filenames. */ + glob?: string; + /** AST node kind selector. */ + selector?: string; + /** Match strictness. Defaults to "smart". */ + strictness?: string; + /** Preview changes without writing files. Defaults to true. */ + dryRun?: boolean; + /** Maximum total replacements. */ + maxReplacements?: number; + /** Maximum files to modify. */ + maxFiles?: number; + /** Fail on parse errors instead of skipping. */ + failOnParseError?: boolean; +} + +/** A single replacement change from ast-grep rewrite. */ +export interface AstReplaceChange { + /** Relative file path. */ + path: string; + /** Original source text. */ + before: string; + /** Replacement text. */ + after: string; + /** Byte offset of change start. */ + byteStart: number; + /** Byte offset of change end. */ + byteEnd: number; + /** Number of bytes deleted. */ + deletedLength: number; + /** 1-indexed start line. */ + startLine: number; + /** 1-indexed start column. */ + startColumn: number; + /** 1-indexed end line. */ + endLine: number; + /** 1-indexed end column. */ + endColumn: number; +} + +/** Per-file change summary. */ +export interface AstReplaceFileChange { + /** Relative file path. */ + path: string; + /** Number of replacements in this file. */ + count: number; +} + +/** Result of an ast-grep structural rewrite. */ +export interface AstReplaceResult { + /** Individual replacement changes. */ + changes: AstReplaceChange[]; + /** Per-file change summaries. */ + fileChanges: AstReplaceFileChange[]; + /** Total number of replacements. */ + totalReplacements: number; + /** Number of files modified. */ + filesTouched: number; + /** Number of files searched. */ + filesSearched: number; + /** Whether changes were written to disk (false when dryRun is true). */ + applied: boolean; + /** Whether limits stopped processing early. */ + limitReached: boolean; + /** Parse errors encountered (non-fatal). */ + parseErrors?: string[]; +} diff --git a/packages/native/src/index.ts b/packages/native/src/index.ts index 4e3737609..c3ebe2a61 100644 --- a/packages/native/src/index.ts +++ b/packages/native/src/index.ts @@ -23,3 +23,14 @@ export type { SearchOptions, SearchResult, } from "./grep/index.js"; + +export { astGrep, astEdit } from "./ast/index.js"; +export type { + AstFindMatch, + AstFindOptions, + AstFindResult, + AstReplaceChange, + AstReplaceFileChange, + AstReplaceOptions, + AstReplaceResult, +} from "./ast/index.js"; diff --git a/packages/native/src/native.ts b/packages/native/src/native.ts index 613fe3aea..3596c6124 100644 --- a/packages/native/src/native.ts +++ b/packages/native/src/native.ts @@ -46,4 +46,6 @@ export const native = loadNative() as { copyToClipboard: (text: string) => void; readTextFromClipboard: () => string | null; readImageFromClipboard: () => Promise; + astGrep: (options: unknown) => unknown; + astEdit: (options: unknown) => unknown; }; From 75fe5d3319b74cd47fbaa67f96fb6a559ec0ed93 Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Fri, 13 Mar 2026 12:51:49 -0600 Subject: [PATCH 6/7] =?UTF-8?q?feat:=20add=20native=20image=20module=20?= =?UTF-8?q?=E2=80=94=20decode,=20encode,=20and=20resize=20via=20Rust=20ima?= =?UTF-8?q?ge=20crate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port image processing from Oh My Pi's pi-natives crate, adapted for napi-rs v2. Exposes NativeImage class with async parse/encode/resize methods backed by the Rust `image` crate (PNG, JPEG, WebP, GIF support). Includes: - task.rs: lightweight async task scheduling for libuv thread pool - image.rs: NativeImage class with SamplingFilter enum - TypeScript types and wrapper (parseImage, ImageFormat, SamplingFilter) - 8 passing tests covering decode, encode, resize, round-trip, error cases Co-Authored-By: Claude Opus 4.6 (1M context) --- native/Cargo.lock | 32 +++++ native/crates/engine/Cargo.toml | 7 +- native/crates/engine/src/image.rs | 137 +++++++++++++++++++ native/crates/engine/src/lib.rs | 5 +- native/crates/engine/src/task.rs | 89 ++++++++++++ packages/native/package.json | 8 +- packages/native/src/__tests__/image.test.mjs | 137 +++++++++++++++++++ packages/native/src/image/index.ts | 28 ++++ packages/native/src/image/types.ts | 41 ++++++ packages/native/src/index.ts | 5 + packages/native/src/native.ts | 1 + 11 files changed, 485 insertions(+), 5 deletions(-) create mode 100644 native/crates/engine/src/image.rs create mode 100644 native/crates/engine/src/task.rs create mode 100644 packages/native/src/__tests__/image.test.mjs create mode 100644 packages/native/src/image/index.ts create mode 100644 packages/native/src/image/types.ts diff --git a/native/Cargo.lock b/native/Cargo.lock index 7fd93005a..ce5e0abfc 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -124,6 +124,12 @@ dependencies = [ "error-code", ] +[[package]] +name = "color_quant" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" + [[package]] name = "convert_case" version = "0.6.0" @@ -300,6 +306,16 @@ dependencies = [ "windows-link", ] +[[package]] +name = "gif" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5df2ba84018d80c213569363bdcd0c64e6933c67fe4c1d60ecf822971a3c35e" +dependencies = [ + "color_quant", + "weezl", +] + [[package]] name = "globset" version = "0.4.18" @@ -405,6 +421,7 @@ name = "gsd-engine" version = "0.1.0" dependencies = [ "arboard", + "gsd-ast", "gsd-grep", "image", "napi", @@ -464,10 +481,25 @@ checksum = "85ab80394333c02fe689eaf900ab500fbd0c2213da414687ebf995a65d5a6104" dependencies = [ "bytemuck", "byteorder-lite", + "color_quant", + "gif", + "image-webp", "moxcms", "num-traits", "png", "tiff", + "zune-core", + "zune-jpeg", +] + +[[package]] +name = "image-webp" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "525e9ff3e1a4be2fbea1fdf0e98686a6d98b4d8f937e1bf7402245af1909e8c3" +dependencies = [ + "byteorder-lite", + "quick-error", ] [[package]] diff --git a/native/crates/engine/Cargo.toml b/native/crates/engine/Cargo.toml index 7ac7a8756..d718e23ed 100644 --- a/native/crates/engine/Cargo.toml +++ b/native/crates/engine/Cargo.toml @@ -14,7 +14,12 @@ crate-type = ["cdylib"] gsd-ast = { path = "../ast" } gsd-grep = { path = "../grep" } arboard = "3" -image = { version = "0.25", default-features = false, features = ["png"] } +image = { version = "0.25", default-features = false, features = [ + "png", + "jpeg", + "gif", + "webp", +] } napi = { version = "2", features = ["napi8"] } napi-derive = "2" diff --git a/native/crates/engine/src/image.rs b/native/crates/engine/src/image.rs new file mode 100644 index 000000000..22969ef30 --- /dev/null +++ b/native/crates/engine/src/image.rs @@ -0,0 +1,137 @@ +//! Image decode, encode, and resize via N-API. +//! +//! Provides: +//! - Load image from bytes (PNG, JPEG, WebP, GIF) +//! - Get dimensions +//! - Resize with configurable sampling filter +//! - Export as PNG, JPEG, WebP, or GIF + +use std::{io::Cursor, sync::Arc}; + +use image::{ + DynamicImage, ImageFormat, ImageReader, + codecs::{jpeg::JpegEncoder, webp::WebPEncoder}, + imageops::FilterType, +}; +use napi::bindgen_prelude::*; +use napi_derive::napi; + +use crate::task; + +/// Sampling filter for resize operations. +#[napi] +pub enum SamplingFilter { + /// Nearest-neighbor sampling (fast, low quality). + Nearest = 1, + /// Triangle filter (linear interpolation). + Triangle = 2, + /// Catmull-Rom filter with sharper edges. + CatmullRom = 3, + /// Gaussian filter for smoother results. + Gaussian = 4, + /// Lanczos3 filter for high-quality downscaling. + Lanczos3 = 5, +} + +impl From for FilterType { + fn from(filter: SamplingFilter) -> Self { + match filter { + SamplingFilter::Nearest => Self::Nearest, + SamplingFilter::Triangle => Self::Triangle, + SamplingFilter::CatmullRom => Self::CatmullRom, + SamplingFilter::Gaussian => Self::Gaussian, + SamplingFilter::Lanczos3 => Self::Lanczos3, + } + } +} + +/// Image container for native interop. +#[napi] +pub struct NativeImage { + img: Arc, +} + +type ImageTask = task::Async; + +#[napi] +impl NativeImage { + /// Decode encoded image bytes (PNG, JPEG, WebP, GIF) into a NativeImage. + #[napi(js_name = "parse")] + pub fn parse(bytes: Uint8Array) -> ImageTask { + let bytes = bytes.as_ref().to_vec(); + task::blocking("image.decode", (), move |_| -> Result { + let img = decode_image_from_bytes(&bytes)?; + Ok(Self { img: Arc::new(img) }) + }) + } + + /// Image width in pixels. + #[napi(getter, js_name = "width")] + pub fn get_width(&self) -> u32 { + self.img.width() + } + + /// Image height in pixels. + #[napi(getter, js_name = "height")] + pub fn get_height(&self) -> u32 { + self.img.height() + } + + /// Encode to bytes. Format: 0=PNG, 1=JPEG, 2=WebP, 3=GIF. + #[napi(js_name = "encode")] + pub fn encode(&self, format: u8, quality: u8) -> task::Async> { + let img = Arc::clone(&self.img); + task::blocking("image.encode", (), move |_| encode_image(&img, format, quality)) + } + + /// Resize to exact dimensions. Returns a new NativeImage. + #[napi(js_name = "resize")] + pub fn resize(&self, width: u32, height: u32, filter: SamplingFilter) -> ImageTask { + let img = Arc::clone(&self.img); + task::blocking("image.resize", (), move |_| { + Ok(Self { img: Arc::new(img.resize_exact(width, height, filter.into())) }) + }) + } +} + +fn decode_image_from_bytes(bytes: &[u8]) -> Result { + let reader = ImageReader::new(Cursor::new(bytes)) + .with_guessed_format() + .map_err(|e| Error::from_reason(format!("Failed to detect image format: {e}")))?; + reader + .decode() + .map_err(|e| Error::from_reason(format!("Failed to decode image: {e}"))) +} + +fn encode_image(img: &DynamicImage, format: u8, quality: u8) -> Result> { + let (w, h) = (img.width(), img.height()); + match format { + 0 => { + let mut buffer = Vec::with_capacity((w * h * 4) as usize); + img.write_to(&mut Cursor::new(&mut buffer), ImageFormat::Png) + .map_err(|e| Error::from_reason(format!("Failed to encode PNG: {e}")))?; + Ok(buffer) + }, + 1 => { + let mut buffer = Vec::with_capacity((w * h * 3) as usize); + let encoder = JpegEncoder::new_with_quality(&mut buffer, quality); + img.write_with_encoder(encoder) + .map_err(|e| Error::from_reason(format!("Failed to encode JPEG: {e}")))?; + Ok(buffer) + }, + 2 => { + let mut buffer = Vec::with_capacity((w * h * 4) as usize); + let encoder = WebPEncoder::new_lossless(&mut buffer); + img.write_with_encoder(encoder) + .map_err(|e| Error::from_reason(format!("Failed to encode WebP: {e}")))?; + Ok(buffer) + }, + 3 => { + let mut buffer = Vec::with_capacity((w * h) as usize); + img.write_to(&mut Cursor::new(&mut buffer), ImageFormat::Gif) + .map_err(|e| Error::from_reason(format!("Failed to encode GIF: {e}")))?; + Ok(buffer) + }, + _ => Err(Error::from_reason(format!("Invalid image format: {format}"))), + } +} diff --git a/native/crates/engine/src/lib.rs b/native/crates/engine/src/lib.rs index 0f6736e4d..1068a34cd 100644 --- a/native/crates/engine/src/lib.rs +++ b/native/crates/engine/src/lib.rs @@ -1,9 +1,8 @@ //! N-API addon for GSD. //! //! Exposes high-performance Rust modules to Node.js via napi-rs. -//! Architecture mirrors Oh My Pi's pi-natives crate: //! ```text -//! JS (packages/native) -> N-API -> Rust modules (grep, ...) +//! JS (packages/native) -> N-API -> Rust modules (ast, clipboard, grep, image, ...) //! ``` #![allow(clippy::needless_pass_by_value)] @@ -11,3 +10,5 @@ mod ast; mod clipboard; mod grep; +mod image; +mod task; diff --git a/native/crates/engine/src/task.rs b/native/crates/engine/src/task.rs new file mode 100644 index 000000000..a5a012c63 --- /dev/null +++ b/native/crates/engine/src/task.rs @@ -0,0 +1,89 @@ +//! Blocking work scheduling for N-API exports. +//! +//! Runs CPU-bound or blocking Rust work on libuv's thread pool via napi's +//! `Task` trait, keeping the main JS thread free. + +use std::time::{Duration, Instant}; + +use napi::{Env, Error, Result, Task, bindgen_prelude::*}; + +/// Token for cooperative cancellation of blocking work. +#[derive(Clone, Default)] +pub struct CancelToken { + deadline: Option, +} + +impl From<()> for CancelToken { + fn from((): ()) -> Self { + Self::default() + } +} + +impl CancelToken { + /// Create a new cancel token from an optional timeout in milliseconds. + #[allow(dead_code)] + pub fn new(timeout_ms: Option) -> Self { + Self { + deadline: timeout_ms + .map(|ms| Instant::now() + Duration::from_millis(ms as u64)), + } + } + + /// Check if cancellation has been requested. + #[allow(dead_code)] + pub fn heartbeat(&self) -> Result<()> { + if let Some(deadline) = self.deadline { + if deadline < Instant::now() { + return Err(Error::from_reason("Aborted: Timeout")); + } + } + Ok(()) + } +} + +/// Task that runs blocking work on libuv's thread pool. +pub struct Blocking +where + T: Send + 'static, +{ + cancel_token: CancelToken, + work: Option Result + Send>>, +} + +impl Task for Blocking +where + T: ToNapiValue + TypeName + Send + 'static, +{ + type JsValue = T; + type Output = T; + + fn compute(&mut self) -> Result { + let work = self + .work + .take() + .ok_or_else(|| Error::from_reason("BlockingTask: work already consumed"))?; + work(self.cancel_token.clone()) + } + + fn resolve(&mut self, _env: Env, output: Self::Output) -> Result { + Ok(output) + } +} + +pub type Async = AsyncTask>; + +/// Create an `AsyncTask` that runs blocking work on libuv's thread pool. +pub fn blocking( + _tag: &'static str, + cancel_token: impl Into, + work: F, +) -> AsyncTask> +where + F: FnOnce(CancelToken) -> Result + Send + 'static, + T: ToNapiValue + TypeName + Send + 'static, +{ + AsyncTask::new(Blocking { + cancel_token: cancel_token.into(), + work: Some(Box::new(work)), + }) +} diff --git a/packages/native/package.json b/packages/native/package.json index 276ca324e..aa1fc7f30 100644 --- a/packages/native/package.json +++ b/packages/native/package.json @@ -1,14 +1,14 @@ { "name": "@gsd/native", "version": "0.1.0", - "description": "Native Rust bindings for GSD — high-performance grep and clipboard via N-API", + "description": "Native Rust bindings for GSD — high-performance native modules via N-API", "type": "module", "main": "./src/index.ts", "types": "./src/index.ts", "scripts": { "build:native": "node ../../native/scripts/build.js", "build:native:dev": "node ../../native/scripts/build.js --dev", - "test": "node --test src/__tests__/grep.test.mjs src/__tests__/clipboard.test.mjs" + "test": "node --test src/__tests__/grep.test.mjs src/__tests__/clipboard.test.mjs src/__tests__/image.test.mjs" }, "exports": { ".": { @@ -26,6 +26,10 @@ "./ast": { "types": "./src/ast/index.ts", "import": "./src/ast/index.ts" + }, + "./image": { + "types": "./src/image/index.ts", + "import": "./src/image/index.ts" } }, "files": [ diff --git a/packages/native/src/__tests__/image.test.mjs b/packages/native/src/__tests__/image.test.mjs new file mode 100644 index 000000000..91f297ed6 --- /dev/null +++ b/packages/native/src/__tests__/image.test.mjs @@ -0,0 +1,137 @@ +import { test, describe } from "node:test"; +import assert from "node:assert/strict"; +import { createRequire } from "node:module"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; +import { deflateSync } from "node:zlib"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const require = createRequire(import.meta.url); + +const addonDir = path.resolve(__dirname, "..", "..", "..", "..", "native", "addon"); +const platformTag = `${process.platform}-${process.arch}`; +const candidates = [ + path.join(addonDir, `gsd_engine.${platformTag}.node`), + path.join(addonDir, "gsd_engine.dev.node"), +]; + +let native; +for (const candidate of candidates) { + try { + native = require(candidate); + break; + } catch { + // try next + } +} + +if (!native) { + console.error("Native addon not found. Run 'npm run build:native -w @gsd/native' first."); + process.exit(1); +} + +function crc32(buf) { + let crc = 0xffffffff; + const table = []; + for (let n = 0; n < 256; n++) { + let c = n; + for (let k = 0; k < 8; k++) c = c & 1 ? 0xedb88320 ^ (c >>> 1) : c >>> 1; + table[n] = c; + } + for (let i = 0; i < buf.length; i++) crc = table[(crc ^ buf[i]) & 0xff] ^ (crc >>> 8); + return (crc ^ 0xffffffff) >>> 0; +} + +function createTestPng() { + const signature = Buffer.from([137, 80, 78, 71, 13, 10, 26, 10]); + const ihdrData = Buffer.alloc(13); + ihdrData.writeUInt32BE(2, 0); + ihdrData.writeUInt32BE(2, 4); + ihdrData[8] = 8; + ihdrData[9] = 2; + const ihdrType = Buffer.from("IHDR"); + const ihdrCrc = Buffer.alloc(4); + ihdrCrc.writeUInt32BE(crc32(Buffer.concat([ihdrType, ihdrData]))); + const ihdr = Buffer.concat([Buffer.from([0, 0, 0, 13]), ihdrType, ihdrData, ihdrCrc]); + + const raw = Buffer.from([ + 0, 255, 0, 0, 255, 0, 0, + 0, 255, 0, 0, 255, 0, 0, + ]); + const compressed = deflateSync(raw); + const idatType = Buffer.from("IDAT"); + const idatLen = Buffer.alloc(4); + idatLen.writeUInt32BE(compressed.length); + const idatCrc = Buffer.alloc(4); + idatCrc.writeUInt32BE(crc32(Buffer.concat([idatType, compressed]))); + const idat = Buffer.concat([idatLen, idatType, compressed, idatCrc]); + + const iendType = Buffer.from("IEND"); + const iendCrc = Buffer.alloc(4); + iendCrc.writeUInt32BE(crc32(iendType)); + const iend = Buffer.concat([Buffer.from([0, 0, 0, 0]), iendType, iendCrc]); + + return Buffer.concat([signature, ihdr, idat, iend]); +} + +const NativeImage = native.NativeImage; + +describe("native image: NativeImage", () => { + test("NativeImage class exists with parse method", () => { + assert.ok(NativeImage, "NativeImage should be exported"); + assert.equal(typeof NativeImage.parse, "function"); + }); + + test("parse decodes PNG with correct dimensions", async () => { + const img = await NativeImage.parse(createTestPng()); + assert.equal(img.width, 2); + assert.equal(img.height, 2); + }); + + test("encode to PNG produces valid PNG", async () => { + const img = await NativeImage.parse(createTestPng()); + const encoded = await img.encode(0, 100); + assert.ok(encoded.length > 0); + assert.equal(encoded[0], 0x89); + assert.equal(encoded[1], 0x50); + assert.equal(encoded[2], 0x4e); + assert.equal(encoded[3], 0x47); + }); + + test("encode to JPEG produces valid JPEG", async () => { + const img = await NativeImage.parse(createTestPng()); + const encoded = await img.encode(1, 80); + assert.ok(encoded.length > 0); + assert.equal(encoded[0], 0xff); + assert.equal(encoded[1], 0xd8); + }); + + test("resize returns correct dimensions", async () => { + const img = await NativeImage.parse(createTestPng()); + const resized = await img.resize(10, 20, 5); + assert.equal(resized.width, 10); + assert.equal(resized.height, 20); + }); + + test("resize + encode round-trip", async () => { + const img = await NativeImage.parse(createTestPng()); + const resized = await img.resize(4, 4, 1); + const encoded = await resized.encode(0, 100); + assert.ok(encoded.length > 0); + const reparsed = await NativeImage.parse(new Uint8Array(encoded)); + assert.equal(reparsed.width, 4); + assert.equal(reparsed.height, 4); + }); + + test("rejects invalid image data", async () => { + await assert.rejects( + () => NativeImage.parse(new Uint8Array([0, 1, 2, 3, 4, 5])), + /Failed to (detect|decode) image/, + ); + }); + + test("rejects invalid format number", async () => { + const img = await NativeImage.parse(createTestPng()); + await assert.rejects(() => img.encode(99, 100), /Invalid image format/); + }); +}); diff --git a/packages/native/src/image/index.ts b/packages/native/src/image/index.ts new file mode 100644 index 000000000..d27df47bb --- /dev/null +++ b/packages/native/src/image/index.ts @@ -0,0 +1,28 @@ +/** + * Native image processing module using N-API. + * + * High-performance image decode/encode/resize backed by the Rust `image` crate. + */ + +import { native } from "../native.js"; +import type { NativeImageHandle } from "./types.js"; +import { ImageFormat, SamplingFilter } from "./types.js"; + +export { ImageFormat, SamplingFilter }; +export type { NativeImageHandle }; + +const NativeImageClass = (native as Record) + .NativeImage as NativeImageConstructor; + +interface NativeImageConstructor { + parse(bytes: Uint8Array): Promise; +} + +/** + * Decode image bytes (PNG, JPEG, WebP, GIF) into a NativeImage handle. + * + * Format is auto-detected from the byte content. + */ +export function parseImage(bytes: Uint8Array): Promise { + return NativeImageClass.parse(bytes); +} diff --git a/packages/native/src/image/types.ts b/packages/native/src/image/types.ts new file mode 100644 index 000000000..5a9dbb8b5 --- /dev/null +++ b/packages/native/src/image/types.ts @@ -0,0 +1,41 @@ +/** Sampling filter for resize operations. */ +export enum SamplingFilter { + /** Nearest-neighbor sampling (fast, low quality). */ + Nearest = 1, + /** Triangle filter (linear interpolation). */ + Triangle = 2, + /** Catmull-Rom filter with sharper edges. */ + CatmullRom = 3, + /** Gaussian filter for smoother results. */ + Gaussian = 4, + /** Lanczos3 filter for high-quality downscaling. */ + Lanczos3 = 5, +} + +/** Output image format for encoding. */ +export enum ImageFormat { + /** PNG (lossless, quality ignored). */ + PNG = 0, + /** JPEG (lossy, quality 0-100). */ + JPEG = 1, + /** WebP (lossless, quality ignored). */ + WebP = 2, + /** GIF (quality ignored). */ + GIF = 3, +} + +/** Native image handle returned from parse(). */ +export interface NativeImageHandle { + /** Image width in pixels. */ + readonly width: number; + /** Image height in pixels. */ + readonly height: number; + /** Encode to bytes in the specified format. Returns a Promise. */ + encode(format: number, quality: number): Promise; + /** Resize to the specified dimensions. Returns a new NativeImage Promise. */ + resize( + width: number, + height: number, + filter: SamplingFilter, + ): Promise; +} diff --git a/packages/native/src/index.ts b/packages/native/src/index.ts index c3ebe2a61..7d06991ff 100644 --- a/packages/native/src/index.ts +++ b/packages/native/src/index.ts @@ -2,8 +2,10 @@ * @gsd/native — High-performance Rust modules exposed via N-API. * * Modules: + * - ast: AST-aware structural search and rewrite * - clipboard: native clipboard access (text + image) * - grep: ripgrep-backed regex search (content + filesystem) + * - image: decode, encode, and resize images */ export { @@ -34,3 +36,6 @@ export type { AstReplaceOptions, AstReplaceResult, } from "./ast/index.js"; + +export { parseImage, ImageFormat, SamplingFilter } from "./image/index.js"; +export type { NativeImageHandle } from "./image/index.js"; diff --git a/packages/native/src/native.ts b/packages/native/src/native.ts index 3596c6124..2339e8abf 100644 --- a/packages/native/src/native.ts +++ b/packages/native/src/native.ts @@ -48,4 +48,5 @@ export const native = loadNative() as { readImageFromClipboard: () => Promise; astGrep: (options: unknown) => unknown; astEdit: (options: unknown) => unknown; + NativeImage: unknown; }; From cd444eb0ea6d0211b10ab189797506737a82a67b Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Fri, 13 Mar 2026 13:13:43 -0600 Subject: [PATCH 7/7] merge: integrate native fd module (#231) --- native/Cargo.lock | 1 + native/crates/engine/src/fd.rs | 494 ++++++++++++++++++++++ native/crates/engine/src/lib.rs | 3 +- packages/native/package.json | 7 +- packages/native/src/__tests__/fd.test.mjs | 164 +++++++ packages/native/src/fd/index.ts | 35 ++ packages/native/src/fd/types.ts | 31 ++ packages/native/src/index.ts | 8 + packages/native/src/native.ts | 1 + 9 files changed, 741 insertions(+), 3 deletions(-) create mode 100644 native/crates/engine/src/fd.rs create mode 100644 packages/native/src/__tests__/fd.test.mjs create mode 100644 packages/native/src/fd/index.ts create mode 100644 packages/native/src/fd/types.ts diff --git a/native/Cargo.lock b/native/Cargo.lock index 164bafec7..befccef43 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -157,6 +157,7 @@ name = "gsd-engine" version = "0.1.0" dependencies = [ "gsd-grep", + "ignore", "napi", "napi-build", "napi-derive", diff --git a/native/crates/engine/src/fd.rs b/native/crates/engine/src/fd.rs new file mode 100644 index 000000000..d792d1a0d --- /dev/null +++ b/native/crates/engine/src/fd.rs @@ -0,0 +1,494 @@ +//! Fuzzy file path discovery for autocomplete and @-mention resolution. +//! +//! Searches for files and directories whose paths match a query string via +//! subsequence scoring. Uses the `ignore` crate for directory walking +//! (respects `.gitignore`, hidden files, etc.). + +use std::path::Path; + +use ignore::WalkBuilder; +use napi::bindgen_prelude::*; +use napi_derive::napi; + +// ═══════════════════════════════════════════════════════════════════════════ +// Public types +// ═══════════════════════════════════════════════════════════════════════════ + +/// Options for fuzzy file path search. +#[napi(object)] +pub struct FuzzyFindOptions { + /// Fuzzy query to match against file paths (case-insensitive). + pub query: String, + /// Directory to search. + pub path: String, + /// Include hidden files (default: false). + pub hidden: Option, + /// Respect .gitignore (default: true). + pub gitignore: Option, + /// Maximum number of matches to return (default: 100). + #[napi(js_name = "maxResults")] + pub max_results: Option, +} + +/// A single match in fuzzy find results. +#[napi(object)] +pub struct FuzzyFindMatch { + /// Relative path from the search root (uses `/` separators). + pub path: String, + /// Whether this entry is a directory. + #[napi(js_name = "isDirectory")] + pub is_directory: bool, + /// Match quality score (higher is better). + pub score: u32, +} + +/// Result of fuzzy file path search. +#[napi(object)] +pub struct FuzzyFindResult { + /// Matched entries (up to `maxResults`). + pub matches: Vec, + /// Total number of matches found (may exceed `matches.len()`). + #[napi(js_name = "totalMatches")] + pub total_matches: u32, +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Path utilities +// ═══════════════════════════════════════════════════════════════════════════ + +/// Resolve a search path string to a canonical `PathBuf` (must be a directory). +fn resolve_search_path(path: &str) -> Result { + let candidate = std::path::PathBuf::from(path); + let root = if candidate.is_absolute() { + candidate + } else { + let cwd = std::env::current_dir() + .map_err(|err| Error::from_reason(format!("Failed to resolve cwd: {err}")))?; + cwd.join(candidate) + }; + let metadata = std::fs::metadata(&root) + .map_err(|err| Error::from_reason(format!("Path not found: {err}")))?; + if !metadata.is_dir() { + return Err(Error::from_reason( + "Search path must be a directory".to_string(), + )); + } + Ok(std::fs::canonicalize(&root).unwrap_or(root)) +} + +/// Check if a path component matches a target string. +fn contains_component(path: &Path, target: &str) -> bool { + path.components().any(|component| { + component + .as_os_str() + .to_str() + .is_some_and(|value| value == target) + }) +} + +/// Skip `.git` directories and `node_modules`. +fn should_skip_path(path: &Path) -> bool { + contains_component(path, ".git") || contains_component(path, "node_modules") +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Scoring +// ═══════════════════════════════════════════════════════════════════════════ + +/// Strips separators, whitespace, and punctuation for normalized fuzzy comparison. +fn normalize_fuzzy_text(value: &str) -> String { + value + .chars() + .filter(|ch| !ch.is_whitespace() && !matches!(ch, '/' | '\\' | '.' | '_' | '-')) + .flat_map(|ch| ch.to_lowercase()) + .collect() +} + +/// Scores a query as a subsequence of `target`. Returns 0 if not a subsequence. +fn fuzzy_subsequence_score(query_chars: &[char], target: &str) -> u32 { + if query_chars.is_empty() { + return 1; + } + let mut query_index = 0usize; + let mut gaps = 0u32; + let mut last_match_index: Option = None; + for (target_index, target_ch) in target.chars().enumerate() { + if query_index >= query_chars.len() { + break; + } + if query_chars[query_index] == target_ch { + if let Some(last_index) = last_match_index { + if target_index > last_index + 1 { + gaps = gaps.saturating_add(1); + } + } + last_match_index = Some(target_index); + query_index += 1; + } + } + if query_index != query_chars.len() { + return 0; + } + let gap_penalty = gaps.saturating_mul(5); + 40u32.saturating_sub(gap_penalty).max(1) +} + +/// Composite path scoring: exact > starts-with > contains > fuzzy subsequence. +fn score_fuzzy_path( + path: &str, + is_directory: bool, + query_lower: &str, + normalized_query: &str, + query_chars: &[char], +) -> u32 { + if query_lower.is_empty() { + return if is_directory { 11 } else { 1 }; + } + + let file_name = Path::new(path) + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or(path); + let lower_file_name = file_name.to_lowercase(); + + let mut score = if lower_file_name == query_lower { + 120 + } else if lower_file_name.starts_with(query_lower) { + 100 + } else if lower_file_name.contains(query_lower) { + 80 + } else { + let lower_path = path.to_lowercase(); + if lower_path.contains(query_lower) { + 60 + } else { + let normalized_file_name = normalize_fuzzy_text(file_name); + let file_name_fuzzy = fuzzy_subsequence_score(query_chars, &normalized_file_name); + if file_name_fuzzy > 0 { + 50 + file_name_fuzzy + } else { + let normalized_path = normalize_fuzzy_text(path); + let path_fuzzy = if normalized_path == normalized_query { + 40 + } else { + fuzzy_subsequence_score(query_chars, &normalized_path) + }; + if path_fuzzy > 0 { + 30 + path_fuzzy + } else { + 0 + } + } + } + }; + + if is_directory && score > 0 { + score += 10; + } + + score +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Directory walking +// ═══════════════════════════════════════════════════════════════════════════ + +/// File type classification for discovered entries. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum EntryType { + File, + Dir, + Symlink, +} + +/// A filesystem entry discovered during walking. +struct WalkEntry { + /// Relative path from root (forward slashes). + path: String, + /// Entry type. + entry_type: EntryType, +} + +/// Walk a directory tree collecting entries. +fn walk_directory( + root: &Path, + include_hidden: bool, + respect_gitignore: bool, +) -> Vec { + let mut builder = WalkBuilder::new(root); + builder + .hidden(!include_hidden) + .follow_links(false) + .sort_by_file_path(|a, b| a.cmp(b)); + + if respect_gitignore { + builder + .git_ignore(true) + .git_exclude(true) + .git_global(true) + .ignore(true) + .parents(true); + } else { + builder + .git_ignore(false) + .git_exclude(false) + .git_global(false) + .ignore(false) + .parents(false); + } + + let mut entries = Vec::new(); + for entry in builder.build() { + let Ok(entry) = entry else { continue }; + let path = entry.path(); + + if should_skip_path(path) { + continue; + } + + let relative = path.strip_prefix(root).unwrap_or(path); + let relative_str = relative.to_string_lossy(); + if relative_str.is_empty() { + continue; + } + + // Normalize to forward slashes on all platforms. + let relative_str = if cfg!(windows) && relative_str.contains('\\') { + relative_str.replace('\\', "/") + } else { + relative_str.into_owned() + }; + + let Some(metadata) = std::fs::symlink_metadata(path).ok() else { + continue; + }; + let file_type = metadata.file_type(); + let entry_type = if file_type.is_symlink() { + EntryType::Symlink + } else if file_type.is_dir() { + EntryType::Dir + } else { + EntryType::File + }; + + entries.push(WalkEntry { + path: relative_str, + entry_type, + }); + } + entries +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Execution +// ═══════════════════════════════════════════════════════════════════════════ + +/// Saturating cast from u64 to u32. +fn clamp_u32(value: u64) -> u32 { + value.min(u32::MAX as u64) as u32 +} + +/// Fuzzy file path search for autocomplete and @-mention resolution. +/// +/// Searches for files and directories whose paths match the query string. +/// Results are sorted by match quality (higher score = better match). +#[napi(js_name = "fuzzyFind")] +pub fn fuzzy_find(options: FuzzyFindOptions) -> Result { + let root = resolve_search_path(&options.path)?; + let include_hidden = options.hidden.unwrap_or(false); + let respect_gitignore = options.gitignore.unwrap_or(true); + let max_results = options.max_results.unwrap_or(100) as usize; + + if max_results == 0 { + return Ok(FuzzyFindResult { + matches: Vec::new(), + total_matches: 0, + }); + } + + let query_lower = options.query.trim().to_lowercase(); + let normalized_query = normalize_fuzzy_text(&query_lower); + let query_chars: Vec = normalized_query.chars().collect(); + + if !query_lower.is_empty() && normalized_query.is_empty() { + return Ok(FuzzyFindResult { + matches: Vec::new(), + total_matches: 0, + }); + } + + let entries = walk_directory(&root, include_hidden, respect_gitignore); + + let mut scored: Vec = Vec::with_capacity(entries.len().min(256)); + for entry in entries { + if entry.entry_type == EntryType::Symlink { + continue; + } + + let is_directory = entry.entry_type == EntryType::Dir; + let score = score_fuzzy_path( + &entry.path, + is_directory, + &query_lower, + &normalized_query, + &query_chars, + ); + if score == 0 { + continue; + } + + let mut path = entry.path; + if is_directory { + path.push('/'); + } + scored.push(FuzzyFindMatch { + path, + is_directory, + score, + }); + } + + scored.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.path.cmp(&b.path))); + let total_matches = clamp_u32(scored.len() as u64); + let matches = scored.into_iter().take(max_results).collect(); + + Ok(FuzzyFindResult { + matches, + total_matches, + }) +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Tests +// ═══════════════════════════════════════════════════════════════════════════ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_normalize_fuzzy_text() { + assert_eq!(normalize_fuzzy_text("foo/bar.ts"), "foobarts"); + assert_eq!(normalize_fuzzy_text("my_file-name.rs"), "myfilenamers"); + assert_eq!(normalize_fuzzy_text("MyFile"), "myfile"); + assert_eq!(normalize_fuzzy_text(""), ""); + } + + #[test] + fn test_fuzzy_subsequence_score_exact() { + let query: Vec = "abc".chars().collect(); + let score = fuzzy_subsequence_score(&query, "abc"); + assert_eq!(score, 40); + } + + #[test] + fn test_fuzzy_subsequence_score_with_gaps() { + let query: Vec = "ac".chars().collect(); + let score = fuzzy_subsequence_score(&query, "abc"); + assert_eq!(score, 35); + } + + #[test] + fn test_fuzzy_subsequence_score_no_match() { + let query: Vec = "xyz".chars().collect(); + let score = fuzzy_subsequence_score(&query, "abc"); + assert_eq!(score, 0); + } + + #[test] + fn test_fuzzy_subsequence_score_empty_query() { + let query: Vec = Vec::new(); + let score = fuzzy_subsequence_score(&query, "abc"); + assert_eq!(score, 1); + } + + #[test] + fn test_score_fuzzy_path_exact_filename() { + let score = score_fuzzy_path( + "src/main.rs", + false, + "main.rs", + "mainrs", + &"mainrs".chars().collect::>(), + ); + assert_eq!(score, 120); + } + + #[test] + fn test_score_fuzzy_path_starts_with() { + let score = score_fuzzy_path( + "src/main.rs", + false, + "main", + "main", + &"main".chars().collect::>(), + ); + assert_eq!(score, 100); + } + + #[test] + fn test_score_fuzzy_path_contains() { + let score = score_fuzzy_path( + "src/my_main.rs", + false, + "main", + "main", + &"main".chars().collect::>(), + ); + assert_eq!(score, 80); + } + + #[test] + fn test_score_fuzzy_path_directory_bonus() { + let file_score = score_fuzzy_path( + "src/main.rs", + false, + "main.rs", + "mainrs", + &"mainrs".chars().collect::>(), + ); + let dir_score = score_fuzzy_path( + "src/main.rs", + true, + "main.rs", + "mainrs", + &"mainrs".chars().collect::>(), + ); + assert_eq!(dir_score, file_score + 10); + } + + #[test] + fn test_score_fuzzy_path_empty_query() { + let file_score = score_fuzzy_path("src/main.rs", false, "", "", &[]); + let dir_score = score_fuzzy_path("src/", true, "", "", &[]); + assert_eq!(file_score, 1); + assert_eq!(dir_score, 11); + } + + #[test] + fn test_score_fuzzy_path_no_match() { + let score = score_fuzzy_path( + "src/main.rs", + false, + "xyz", + "xyz", + &"xyz".chars().collect::>(), + ); + assert_eq!(score, 0); + } + + #[test] + fn test_walk_directory_real_fs() { + let root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let entries = walk_directory(&root, false, true); + let paths: Vec<&str> = entries.iter().map(|e| e.path.as_str()).collect(); + assert!( + paths.iter().any(|p| p.contains("fd.rs")), + "Should find fd.rs in {paths:?}" + ); + assert!( + paths.iter().any(|p| p.contains("lib.rs")), + "Should find lib.rs in {paths:?}" + ); + } +} diff --git a/native/crates/engine/src/lib.rs b/native/crates/engine/src/lib.rs index fb7d06c62..c1a5667f9 100644 --- a/native/crates/engine/src/lib.rs +++ b/native/crates/engine/src/lib.rs @@ -8,11 +8,12 @@ #![allow(clippy::needless_pass_by_value)] +mod ast; mod clipboard; +mod fd; mod fs_cache; mod glob; mod glob_util; -mod ast; mod grep; mod highlight; mod html; diff --git a/packages/native/package.json b/packages/native/package.json index 2a15c6f60..e4acd0b59 100644 --- a/packages/native/package.json +++ b/packages/native/package.json @@ -8,7 +8,7 @@ "scripts": { "build:native": "node ../../native/scripts/build.js", "build:native:dev": "node ../../native/scripts/build.js --dev", - "test": "node --test src/__tests__/grep.test.mjs src/__tests__/ps.test.mjs src/__tests__/glob.test.mjs src/__tests__/clipboard.test.mjs src/__tests__/highlight.test.mjs src/__tests__/html.test.mjs src/__tests__/text.test.mjs" + "test": "node --test src/__tests__/grep.test.mjs src/__tests__/ps.test.mjs src/__tests__/glob.test.mjs src/__tests__/clipboard.test.mjs src/__tests__/highlight.test.mjs src/__tests__/html.test.mjs src/__tests__/text.test.mjs src/__tests__/fd.test.mjs" }, "exports": { ".": { @@ -19,7 +19,6 @@ "types": "./src/grep/index.ts", "import": "./src/grep/index.ts" }, -<<<<<<< HEAD "./ps": { "types": "./src/ps/index.ts", "import": "./src/ps/index.ts" @@ -43,6 +42,10 @@ "./text": { "types": "./src/text/index.ts", "import": "./src/text/index.ts" + }, + "./fd": { + "types": "./src/fd/index.ts", + "import": "./src/fd/index.ts" } }, "files": [ diff --git a/packages/native/src/__tests__/fd.test.mjs b/packages/native/src/__tests__/fd.test.mjs new file mode 100644 index 000000000..4a478fad8 --- /dev/null +++ b/packages/native/src/__tests__/fd.test.mjs @@ -0,0 +1,164 @@ +import { test, describe } from "node:test"; +import assert from "node:assert/strict"; +import { createRequire } from "node:module"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; +import * as fs from "node:fs"; +import * as os from "node:os"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const require = createRequire(import.meta.url); + +// Load the native addon directly +const addonDir = path.resolve(__dirname, "..", "..", "..", "..", "native", "addon"); +const platformTag = `${process.platform}-${process.arch}`; +const candidates = [ + path.join(addonDir, `gsd_engine.${platformTag}.node`), + path.join(addonDir, "gsd_engine.dev.node"), +]; + +let native; +for (const candidate of candidates) { + try { + native = require(candidate); + break; + } catch { + // try next + } +} + +if (!native) { + console.error("Native addon not found. Run `npm run build:native -w @gsd/native` first."); + process.exit(1); +} + +describe("native fd: fuzzyFind()", () => { + test("finds files matching a query", (t) => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-fd-test-")); + t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true })); + + fs.writeFileSync(path.join(tmpDir, "main.rs"), "fn main() {}"); + fs.writeFileSync(path.join(tmpDir, "lib.rs"), "pub mod lib;"); + fs.writeFileSync(path.join(tmpDir, "utils.ts"), "export {}"); + fs.mkdirSync(path.join(tmpDir, "src")); + fs.writeFileSync(path.join(tmpDir, "src", "helper.rs"), "fn helper() {}"); + + const result = native.fuzzyFind({ query: "main", path: tmpDir }); + + assert.ok(result.matches.length > 0, "Should find at least one match"); + assert.equal(result.matches[0].path, "main.rs"); + assert.equal(result.matches[0].isDirectory, false); + assert.ok(result.matches[0].score > 0); + }); + + test("returns empty results for non-matching query", (t) => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-fd-test-")); + t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true })); + + fs.writeFileSync(path.join(tmpDir, "hello.txt"), "hello"); + + const result = native.fuzzyFind({ + query: "zzzznotexist", + path: tmpDir, + }); + + assert.equal(result.matches.length, 0); + assert.equal(result.totalMatches, 0); + }); + + test("respects maxResults limit", (t) => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-fd-test-")); + t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true })); + + for (let i = 0; i < 10; i++) { + fs.writeFileSync(path.join(tmpDir, `file${i}.txt`), "content"); + } + + const result = native.fuzzyFind({ + query: "file", + path: tmpDir, + maxResults: 3, + }); + + assert.equal(result.matches.length, 3); + assert.ok(result.totalMatches >= 3); + }); + + test("directories have trailing slash and bonus score", (t) => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-fd-test-")); + t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true })); + + fs.mkdirSync(path.join(tmpDir, "models")); + fs.writeFileSync(path.join(tmpDir, "models.ts"), "export {}"); + + const result = native.fuzzyFind({ query: "models", path: tmpDir }); + + const dirMatch = result.matches.find((m) => m.isDirectory); + const fileMatch = result.matches.find((m) => !m.isDirectory); + + assert.ok(dirMatch, "Should find a directory match"); + assert.ok(fileMatch, "Should find a file match"); + assert.ok(dirMatch.path.endsWith("/"), "Directory should have trailing slash"); + assert.ok(dirMatch.score > fileMatch.score, "Directory should score higher"); + }); + + test("empty query returns all entries", (t) => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-fd-test-")); + t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true })); + + fs.writeFileSync(path.join(tmpDir, "a.txt"), "a"); + fs.writeFileSync(path.join(tmpDir, "b.txt"), "b"); + fs.writeFileSync(path.join(tmpDir, "c.txt"), "c"); + + const result = native.fuzzyFind({ query: "", path: tmpDir }); + + assert.equal(result.matches.length, 3); + }); + + test("errors on non-existent path", () => { + assert.throws( + () => native.fuzzyFind({ query: "test", path: "/nonexistent/path" }), + { message: /Path not found/ }, + ); + }); + + test("fuzzy subsequence matching works", (t) => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-fd-test-")); + t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true })); + + fs.writeFileSync(path.join(tmpDir, "MyComponentFile.tsx"), "export {}"); + fs.writeFileSync(path.join(tmpDir, "other.txt"), "other"); + + // "mcf" should fuzzy-match "MyComponentFile" via subsequence + const result = native.fuzzyFind({ query: "mcf", path: tmpDir }); + + assert.ok(result.matches.length > 0, "Fuzzy subsequence should match"); + assert.ok( + result.matches.some((m) => m.path.includes("MyComponentFile")), + "Should find MyComponentFile via fuzzy match", + ); + }); + + test("results are sorted by score descending", (t) => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-fd-test-")); + t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true })); + + fs.writeFileSync(path.join(tmpDir, "main.ts"), ""); + fs.writeFileSync(path.join(tmpDir, "my_main.ts"), ""); + fs.mkdirSync(path.join(tmpDir, "src")); + fs.writeFileSync(path.join(tmpDir, "src", "main.rs"), ""); + + const result = native.fuzzyFind({ + query: "main", + path: tmpDir, + maxResults: 100, + }); + + for (let i = 1; i < result.matches.length; i++) { + assert.ok( + result.matches[i - 1].score >= result.matches[i].score, + `Match ${i - 1} (score ${result.matches[i - 1].score}) should be >= match ${i} (score ${result.matches[i].score})`, + ); + } + }); +}); diff --git a/packages/native/src/fd/index.ts b/packages/native/src/fd/index.ts new file mode 100644 index 000000000..3dc413922 --- /dev/null +++ b/packages/native/src/fd/index.ts @@ -0,0 +1,35 @@ +/** + * Native fuzzy file path discovery using N-API. + * + * High-performance fuzzy file search for autocomplete and @-mention resolution. + * Backed by Rust's `ignore` crate for directory walking with subsequence scoring. + */ + +import { native } from "../native.js"; +import type { + FuzzyFindMatch, + FuzzyFindOptions, + FuzzyFindResult, +} from "./types.js"; + +export type { FuzzyFindMatch, FuzzyFindOptions, FuzzyFindResult }; + +/** + * Fuzzy file path search. + * + * Searches for files and directories whose paths match the query string. + * Results are sorted by match quality (higher score = better match). + * + * Scoring tiers (highest to lowest): + * - 120: exact filename match + * - 100: filename starts with query + * - 80: filename contains query + * - 60: full path contains query + * - 50-90: fuzzy subsequence match on filename + * - 30-70: fuzzy subsequence match on full path + * + * Directories receive a +10 score bonus. + */ +export function fuzzyFind(options: FuzzyFindOptions): FuzzyFindResult { + return native.fuzzyFind(options) as FuzzyFindResult; +} diff --git a/packages/native/src/fd/types.ts b/packages/native/src/fd/types.ts new file mode 100644 index 000000000..dacbe7dca --- /dev/null +++ b/packages/native/src/fd/types.ts @@ -0,0 +1,31 @@ +/** Options for fuzzy file path search. */ +export interface FuzzyFindOptions { + /** Fuzzy query to match against file paths (case-insensitive). */ + query: string; + /** Directory to search. */ + path: string; + /** Include hidden files (default: false). */ + hidden?: boolean; + /** Respect .gitignore (default: true). */ + gitignore?: boolean; + /** Maximum number of matches to return (default: 100). */ + maxResults?: number; +} + +/** A single match in fuzzy find results. */ +export interface FuzzyFindMatch { + /** Relative path from the search root (uses `/` separators). Directories have a trailing `/`. */ + path: string; + /** Whether this entry is a directory. */ + isDirectory: boolean; + /** Match quality score (higher is better). */ + score: number; +} + +/** Result of fuzzy file path search. */ +export interface FuzzyFindResult { + /** Matched entries (up to `maxResults`), sorted by score descending. */ + matches: FuzzyFindMatch[]; + /** Total number of matches found (may exceed `matches.length`). */ + totalMatches: number; +} diff --git a/packages/native/src/index.ts b/packages/native/src/index.ts index 16d52533e..6196f5719 100644 --- a/packages/native/src/index.ts +++ b/packages/native/src/index.ts @@ -9,6 +9,7 @@ * - highlight: syntect-based syntax highlighting * - html: HTML to Markdown conversion * - text: ANSI-aware text measurement and slicing + * - fd: fuzzy file path discovery for autocomplete and @-mention resolution */ export { @@ -70,3 +71,10 @@ export { EllipsisKind, } from "./text/index.js"; export type { SliceResult, ExtractSegmentsResult } from "./text/index.js"; + +export { fuzzyFind } from "./fd/index.js"; +export type { + FuzzyFindMatch, + FuzzyFindOptions, + FuzzyFindResult, +} from "./fd/index.js"; diff --git a/packages/native/src/native.ts b/packages/native/src/native.ts index 92e62f82f..3e127a3ff 100644 --- a/packages/native/src/native.ts +++ b/packages/native/src/native.ts @@ -86,4 +86,5 @@ export const native = loadNative() as { ) => unknown; sanitizeText: (text: string) => string; visibleWidth: (text: string, tabWidth?: number) => number; + fuzzyFind: (options: unknown) => unknown; };