From e05292f772f191e2301fbf5b3fc44a38e2154fde Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Fri, 13 Mar 2026 12:47:27 -0600 Subject: [PATCH] =?UTF-8?q?feat:=20add=20native=20ast=20module=20=E2=80=94?= =?UTF-8?q?=20AST-aware=20structural=20search=20and=20rewrite=20via=20ast-?= =?UTF-8?q?grep?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port ast-grep integration from Oh My Pi with 38+ language support via tree-sitter grammars. Exposes `astGrep` (search) and `astEdit` (rewrite) as N-API functions with TypeScript wrappers. Key changes: - New `gsd-ast` crate with language definitions, glob utilities, and ast-grep core - Replaces fs_cache/task dependencies with `ignore` crate for file walking - Synchronous API matching the existing grep module pattern - Full TypeScript type declarations in packages/native/src/ast/ Co-Authored-By: Claude Opus 4.6 (1M context) --- native/Cargo.lock | 618 ++++++++++++++ native/crates/ast/Cargo.toml | 54 ++ native/crates/ast/src/ast.rs | 929 ++++++++++++++++++++++ native/crates/ast/src/glob_util.rs | 54 ++ native/crates/ast/src/language/mod.rs | 437 ++++++++++ native/crates/ast/src/language/parsers.rs | 118 +++ native/crates/ast/src/lib.rs | 10 + native/crates/engine/Cargo.toml | 1 + native/crates/engine/src/ast.rs | 6 + native/crates/engine/src/lib.rs | 1 + packages/native/package.json | 4 + packages/native/src/ast/index.ts | 67 ++ packages/native/src/ast/types.ts | 137 ++++ packages/native/src/index.ts | 11 + packages/native/src/native.ts | 2 + 15 files changed, 2449 insertions(+) create mode 100644 native/crates/ast/Cargo.toml create mode 100644 native/crates/ast/src/ast.rs create mode 100644 native/crates/ast/src/glob_util.rs create mode 100644 native/crates/ast/src/language/mod.rs create mode 100644 native/crates/ast/src/language/parsers.rs create mode 100644 native/crates/ast/src/lib.rs create mode 100644 native/crates/engine/src/ast.rs create mode 100644 packages/native/src/ast/index.ts create mode 100644 packages/native/src/ast/types.ts diff --git a/native/Cargo.lock b/native/Cargo.lock index 748f53e2a..7fd93005a 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -37,12 +37,39 @@ dependencies = [ "x11rb", ] +[[package]] +name = "ast-grep-core" +version = "0.39.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "057ae90e7256ebf85f840b1638268df0142c9d19467d500b790631fd301acc27" +dependencies = [ + "bit-set", + "regex", + "thiserror", + "tree-sitter", +] + [[package]] name = "autocfg" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "2.11.0" @@ -72,6 +99,16 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -180,6 +217,12 @@ dependencies = [ "encoding_rs", ] +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "errno" version = "0.3.14" @@ -196,6 +239,12 @@ version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "fax" version = "0.2.6" @@ -225,6 +274,12 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "flate2" version = "1.1.9" @@ -295,6 +350,56 @@ dependencies = [ "memmap2", ] +[[package]] +name = "gsd-ast" +version = "0.1.0" +dependencies = [ + "ast-grep-core", + "globset", + "ignore", + "napi", + "napi-derive", + "phf", + "tree-sitter", + "tree-sitter-bash", + "tree-sitter-c", + "tree-sitter-c-sharp", + "tree-sitter-cpp", + "tree-sitter-css", + "tree-sitter-diff", + "tree-sitter-elixir", + "tree-sitter-go", + "tree-sitter-haskell", + "tree-sitter-hcl", + "tree-sitter-html", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-json", + "tree-sitter-julia", + "tree-sitter-kotlin-sg", + "tree-sitter-lua", + "tree-sitter-make", + "tree-sitter-md", + "tree-sitter-nix", + "tree-sitter-objc", + "tree-sitter-odin", + "tree-sitter-php", + "tree-sitter-python", + "tree-sitter-regex", + "tree-sitter-ruby", + "tree-sitter-rust", + "tree-sitter-scala", + "tree-sitter-solidity", + "tree-sitter-starlark", + "tree-sitter-swift", + "tree-sitter-toml-ng", + "tree-sitter-typescript", + "tree-sitter-verilog", + "tree-sitter-xml", + "tree-sitter-yaml", + "tree-sitter-zig", +] + [[package]] name = "gsd-engine" version = "0.1.0" @@ -329,6 +434,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + [[package]] name = "ignore" version = "0.4.25" @@ -359,6 +470,22 @@ dependencies = [ "tiff", ] +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + [[package]] name = "libc" version = "0.2.183" @@ -605,6 +732,49 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros", + "phf_shared", + "serde", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + [[package]] name = "png" version = "0.18.1" @@ -769,18 +939,50 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "indexmap", + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "simd-adler32" version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + [[package]] name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "syn" version = "2.0.117" @@ -792,6 +994,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tiff" version = "0.11.3" @@ -806,6 +1028,396 @@ dependencies = [ "zune-jpeg", ] +[[package]] +name = "tree-sitter" +version = "0.25.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-bash" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5ec769279cc91b561d3df0d8a5deb26b0ad40d183127f409494d6d8fc53062" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c-sharp" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67f06accca7b45351758663b8215089e643d53bd9a660ce0349314263737fcb0" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-css" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5cbc5e18f29a2c6d6435891f42569525cf95435a3e01c2f1947abcde178686f" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-diff" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfe1e5ca280a65dfe5ba4205c1bcc84edf486464fed315db53dee6da9a335889" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-elixir" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66dd064a762ed95bfc29857fa3cb7403bb1e5cb88112de0f6341b7e47284ba40" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-go" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8560a4d2f835cc0d4d2c2e03cbd0dde2f6114b43bc491164238d333e28b16ea" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-haskell" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "977c51e504548cba13fc27cb5a2edab2124cf6716a1934915d07ab99523b05a4" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-hcl" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a7b2cc3d7121553b84309fab9d11b3ff3d420403eef9ae50f9fd1cd9d9cf012" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-html" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261b708e5d92061ede329babaaa427b819329a9d427a1d710abb0f67bbef63ee" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-java" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-json" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86a5d6b3ea17e06e7a34aabeadd68f5866c0d0f9359155d432095f8b751865e4" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-julia" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4144731a178812ee867619b1e98b3b91e54c1652304b26e5ebe3175b701de323" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-kotlin-sg" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0e175b7530765d1e36ad234a7acaa8b2a3316153f239d724376c7ee5e8d8e98" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" + +[[package]] +name = "tree-sitter-lua" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb9adf0965fec58e7660cbb3a059dbb12ebeec9459e6dcbae3db004739641e" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-make" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5998dc7cbcbdab19fae8aefef982bf2d6544513d8d2e69cc44aec4c63810104" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-md" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2efd398be546456c814598ee56c0f51769a77241511b4a58077815d120afa882" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-nix" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4952a9733f3a98f6683a0ccd1035d84ab7a52f7e84eeed58548d86765ad92de3" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-objc" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca8bb556423fc176f0535e79d525f783a6684d3c9da81bf9d905303c129e1d2" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-odin" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24db210fe9ba2237c71c5030d7b146c7025420ba72dd8013d13cd822c3a8d77a" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-php" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c17c3ab69052c5eeaa7ff5cd972dd1bc25d1b97ee779fec391ad3b5df5592" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-python" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-regex" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd8a59be9f0ac131fd8f062eaaba14882b2fa5a6a7882a20134cb1d60df2e625" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-ruby" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9b18034c684a2420722be8b2a91c9c44f2546b631c039edf575ccba8c61be1" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-scala" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b4f354028b5fcf1d0c77f1c6d84cd5a579f29a1e43cb61551ec6580e9a99229" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-solidity" +version = "1.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eacf8875b70879f0cb670c60b233ad0b68752d9e1474e6c3ef168eea8a90b25" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-starlark" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8934f282d085cc4b9ee28aa688aa3fbe8aa3766201c2a6252f411d45b4c3a721" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-swift" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ef216011c3e3df4fa864736f347cb8d509b1066cf0c8549fb1fd81ac9832e59" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-toml-ng" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9adc2c898ae49730e857d75be403da3f92bb81d8e37a2f918a08dd10de5ebb1" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-verilog" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e7e0360395852f1f6ff5b7b82c72dc6557d181073188df1d60ec469ea69c66" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-xml" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e670041f591d994f54d597ddcd8f4ebc930e282c4c76a42268743b71f0c8b6b3" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-yaml" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53c223db85f05e34794f065454843b0668ebc15d240ada63e2b5939f43ce7c97" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-zig" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab11fc124851b0db4dd5e55983bbd9631192e93238389dcd44521715e5d53e28" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "unicode-ident" version = "1.0.24" @@ -969,6 +1581,12 @@ dependencies = [ "syn", ] +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + [[package]] name = "zune-core" version = "0.5.1" diff --git a/native/crates/ast/Cargo.toml b/native/crates/ast/Cargo.toml new file mode 100644 index 000000000..91647fb32 --- /dev/null +++ b/native/crates/ast/Cargo.toml @@ -0,0 +1,54 @@ +[package] +name = "gsd-ast" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true +repository.workspace = true +description = "AST-aware structural search and rewrite via ast-grep for GSD native engine" + +[dependencies] +ast-grep-core = { version = "0.39", default-features = false, features = ["tree-sitter"] } +globset = "0.4" +ignore = "0.4" +napi = { version = "2", features = ["napi8"] } +napi-derive = "2" +phf = { version = "0.13", features = ["macros"] } +tree-sitter = "0.25" +tree-sitter-bash = "0.25" +tree-sitter-c = "0.24" +tree-sitter-c-sharp = "0.23" +tree-sitter-cpp = "0.23" +tree-sitter-css = "0.25" +tree-sitter-diff = "0.1" +tree-sitter-elixir = "0.3" +tree-sitter-go = "0.25" +tree-sitter-haskell = "0.23" +tree-sitter-hcl = "1.1" +tree-sitter-html = "0.23" +tree-sitter-java = "0.23" +tree-sitter-javascript = "0.25" +tree-sitter-json = "0.23" +tree-sitter-julia = "0.23" +tree-sitter-kotlin = { version = "0.4", package = "tree-sitter-kotlin-sg" } +tree-sitter-lua = "0.2" +tree-sitter-make = "1.1" +tree-sitter-md = "0.5" +tree-sitter-nix = "0.3" +tree-sitter-objc = "3.0" +tree-sitter-odin = "1.3" +tree-sitter-php = "0.24" +tree-sitter-python = "0.25" +tree-sitter-regex = "0.25" +tree-sitter-ruby = "0.23" +tree-sitter-rust = "0.24" +tree-sitter-scala = "0.24" +tree-sitter-solidity = "1.2" +tree-sitter-starlark = "1.3" +tree-sitter-swift = "0.7" +tree-sitter-toml-ng = "0.7" +tree-sitter-typescript = "0.23" +tree-sitter-verilog = "1.0" +tree-sitter-xml = "0.7" +tree-sitter-yaml = "0.7" +tree-sitter-zig = "1.1" diff --git a/native/crates/ast/src/ast.rs b/native/crates/ast/src/ast.rs new file mode 100644 index 000000000..12d5aee17 --- /dev/null +++ b/native/crates/ast/src/ast.rs @@ -0,0 +1,929 @@ +//! AST-aware structural search and rewrite powered by ast-grep. + +use std::{ + collections::{BTreeMap, BTreeSet, HashMap}, + path::{Path, PathBuf}, +}; + +use ast_grep_core::{ + Language, MatchStrictness, matcher::Pattern, source::Edit, tree_sitter::LanguageExt, +}; +use ignore::WalkBuilder; +use napi::bindgen_prelude::*; +use napi_derive::napi; + +use crate::{glob_util, language::SupportLang}; + +const DEFAULT_FIND_LIMIT: u32 = 50; + +#[napi(object)] +pub struct AstFindOptions { + pub patterns: Option>, + pub lang: Option, + pub path: Option, + pub glob: Option, + pub selector: Option, + pub strictness: Option, + pub limit: Option, + pub offset: Option, + #[napi(js_name = "includeMeta")] + pub include_meta: Option, + pub context: Option, +} + +#[napi(object)] +pub struct AstFindMatch { + pub path: String, + pub text: String, + #[napi(js_name = "byteStart")] + pub byte_start: u32, + #[napi(js_name = "byteEnd")] + pub byte_end: u32, + #[napi(js_name = "startLine")] + pub start_line: u32, + #[napi(js_name = "startColumn")] + pub start_column: u32, + #[napi(js_name = "endLine")] + pub end_line: u32, + #[napi(js_name = "endColumn")] + pub end_column: u32, + #[napi(js_name = "metaVariables")] + pub meta_variables: Option>, +} + +#[napi(object)] +pub struct AstFindResult { + pub matches: Vec, + #[napi(js_name = "totalMatches")] + pub total_matches: u32, + #[napi(js_name = "filesWithMatches")] + pub files_with_matches: u32, + #[napi(js_name = "filesSearched")] + pub files_searched: u32, + #[napi(js_name = "limitReached")] + pub limit_reached: bool, + #[napi(js_name = "parseErrors")] + pub parse_errors: Option>, +} + +#[napi(object)] +pub struct AstReplaceOptions { + pub rewrites: Option>, + pub lang: Option, + pub path: Option, + pub glob: Option, + pub selector: Option, + pub strictness: Option, + #[napi(js_name = "dryRun")] + pub dry_run: Option, + #[napi(js_name = "maxReplacements")] + pub max_replacements: Option, + #[napi(js_name = "maxFiles")] + pub max_files: Option, + #[napi(js_name = "failOnParseError")] + pub fail_on_parse_error: Option, +} + +#[napi(object)] +pub struct AstReplaceChange { + pub path: String, + pub before: String, + pub after: String, + #[napi(js_name = "byteStart")] + pub byte_start: u32, + #[napi(js_name = "byteEnd")] + pub byte_end: u32, + #[napi(js_name = "deletedLength")] + pub deleted_length: u32, + #[napi(js_name = "startLine")] + pub start_line: u32, + #[napi(js_name = "startColumn")] + pub start_column: u32, + #[napi(js_name = "endLine")] + pub end_line: u32, + #[napi(js_name = "endColumn")] + pub end_column: u32, +} + +#[napi(object)] +pub struct AstReplaceFileChange { + pub path: String, + pub count: u32, +} + +#[napi(object)] +pub struct AstReplaceResult { + pub changes: Vec, + #[napi(js_name = "fileChanges")] + pub file_changes: Vec, + #[napi(js_name = "totalReplacements")] + pub total_replacements: u32, + #[napi(js_name = "filesTouched")] + pub files_touched: u32, + #[napi(js_name = "filesSearched")] + pub files_searched: u32, + pub applied: bool, + #[napi(js_name = "limitReached")] + pub limit_reached: bool, + #[napi(js_name = "parseErrors")] + pub parse_errors: Option>, +} + +struct FileCandidate { + absolute_path: PathBuf, + display_path: String, +} + +struct PendingFileChange { + change: AstReplaceChange, + edit: Edit, +} + +fn to_u32(value: usize) -> u32 { + value.min(u32::MAX as usize) as u32 +} + +/// Single source of truth: every recognised alias (lowercased) -> `SupportLang`. +static LANG_ALIASES: phf::Map<&'static str, SupportLang> = phf::phf_map! { + "bash" => SupportLang::Bash, + "sh" => SupportLang::Bash, + "c" => SupportLang::C, + "cpp" => SupportLang::Cpp, + "c++" => SupportLang::Cpp, + "cc" => SupportLang::Cpp, + "cxx" => SupportLang::Cpp, + "csharp" => SupportLang::CSharp, + "c#" => SupportLang::CSharp, + "cs" => SupportLang::CSharp, + "css" => SupportLang::Css, + "diff" => SupportLang::Diff, + "patch" => SupportLang::Diff, + "elixir" => SupportLang::Elixir, + "ex" => SupportLang::Elixir, + "go" => SupportLang::Go, + "golang" => SupportLang::Go, + "haskell" => SupportLang::Haskell, + "hs" => SupportLang::Haskell, + "hcl" => SupportLang::Hcl, + "tf" => SupportLang::Hcl, + "tfvars" => SupportLang::Hcl, + "terraform" => SupportLang::Hcl, + "html" => SupportLang::Html, + "htm" => SupportLang::Html, + "java" => SupportLang::Java, + "javascript" => SupportLang::JavaScript, + "js" => SupportLang::JavaScript, + "jsx" => SupportLang::JavaScript, + "mjs" => SupportLang::JavaScript, + "cjs" => SupportLang::JavaScript, + "json" => SupportLang::Json, + "julia" => SupportLang::Julia, + "jl" => SupportLang::Julia, + "kotlin" => SupportLang::Kotlin, + "kt" => SupportLang::Kotlin, + "lua" => SupportLang::Lua, + "make" => SupportLang::Make, + "makefile" => SupportLang::Make, + "markdown" => SupportLang::Markdown, + "md" => SupportLang::Markdown, + "mdx" => SupportLang::Markdown, + "nix" => SupportLang::Nix, + "objc" => SupportLang::ObjC, + "objective-c" => SupportLang::ObjC, + "odin" => SupportLang::Odin, + "php" => SupportLang::Php, + "python" => SupportLang::Python, + "py" => SupportLang::Python, + "regex" => SupportLang::Regex, + "ruby" => SupportLang::Ruby, + "rb" => SupportLang::Ruby, + "rust" => SupportLang::Rust, + "rs" => SupportLang::Rust, + "scala" => SupportLang::Scala, + "solidity" => SupportLang::Solidity, + "sol" => SupportLang::Solidity, + "starlark" => SupportLang::Starlark, + "star" => SupportLang::Starlark, + "swift" => SupportLang::Swift, + "toml" => SupportLang::Toml, + "tsx" => SupportLang::Tsx, + "typescript" => SupportLang::TypeScript, + "ts" => SupportLang::TypeScript, + "mts" => SupportLang::TypeScript, + "cts" => SupportLang::TypeScript, + "verilog" => SupportLang::Verilog, + "systemverilog" => SupportLang::Verilog, + "sv" => SupportLang::Verilog, + "xml" => SupportLang::Xml, + "xsl" => SupportLang::Xml, + "svg" => SupportLang::Xml, + "yaml" => SupportLang::Yaml, + "yml" => SupportLang::Yaml, + "zig" => SupportLang::Zig, +}; + +fn supported_lang_list() -> String { + let mut keys: Vec<&str> = LANG_ALIASES.keys().copied().collect(); + keys.sort_unstable(); + keys.join(", ") +} + +fn resolve_supported_lang(value: &str) -> Result { + let lower = value.to_ascii_lowercase(); + LANG_ALIASES.get(lower.as_str()).copied().ok_or_else(|| { + Error::from_reason(format!( + "Unsupported language '{value}'. Supported: {}", + supported_lang_list() + )) + }) +} + +fn resolve_language(lang: Option<&str>, file_path: &Path) -> Result { + if let Some(lang) = lang.map(str::trim).filter(|lang| !lang.is_empty()) { + return resolve_supported_lang(lang); + } + SupportLang::from_path(file_path).ok_or_else(|| { + Error::from_reason(format!( + "Unable to infer language from file extension: {}. Specify `lang` explicitly.", + file_path.display() + )) + }) +} + +fn is_supported_file(file_path: &Path, explicit_lang: Option<&str>) -> bool { + if explicit_lang.is_some() { + return true; + } + resolve_language(None, file_path).is_ok() +} + +fn infer_single_replace_lang(candidates: &[FileCandidate]) -> Result { + let mut inferred = BTreeSet::new(); + let mut unresolved = Vec::new(); + for candidate in candidates { + match resolve_language(None, &candidate.absolute_path) { + Ok(language) => { + inferred.insert(language.canonical_name().to_string()); + }, + Err(err) => unresolved.push(format!("{}: {}", candidate.display_path, err)), + } + } + if !unresolved.is_empty() { + let details = unresolved + .into_iter() + .map(|entry| format!("- {entry}")) + .collect::>() + .join("\n"); + return Err(Error::from_reason(format!( + "`lang` is required for ast_edit when language cannot be inferred from all \ + files:\n{details}" + ))); + } + if inferred.is_empty() { + return Err(Error::from_reason( + "`lang` is required for ast_edit when no files match path/glob".to_string(), + )); + } + if inferred.len() > 1 { + return Err(Error::from_reason(format!( + "`lang` is required for ast_edit when path/glob resolves to multiple languages: {}", + inferred.into_iter().collect::>().join(", ") + ))); + } + Ok(inferred.into_iter().next().expect("non-empty inferred set")) +} + +fn parse_strictness(value: Option<&str>) -> Result { + let Some(raw) = value.map(str::trim).filter(|v| !v.is_empty()) else { + return Ok(MatchStrictness::Smart); + }; + raw.parse::() + .map_err(|err| Error::from_reason(format!("Invalid strictness '{raw}': {err}"))) +} + +fn normalize_search_path(path: Option) -> Result { + let raw = path.unwrap_or_else(|| ".".to_string()); + let candidate = PathBuf::from(raw.trim()); + let absolute = if candidate.is_absolute() { + candidate + } else { + std::env::current_dir() + .map_err(|err| Error::from_reason(format!("Failed to resolve cwd: {err}")))? + .join(candidate) + }; + Ok(std::fs::canonicalize(&absolute).unwrap_or(absolute)) +} + +/// Collect file candidates by walking the directory tree using the `ignore` +/// crate (respects .gitignore, skips hidden files). +fn collect_candidates( + path: Option, + glob: Option<&str>, +) -> Result> { + let search_path = normalize_search_path(path)?; + let metadata = std::fs::metadata(&search_path) + .map_err(|err| Error::from_reason(format!("Path not found: {err}")))?; + + if metadata.is_file() { + let display_path = search_path + .file_name() + .and_then(|name| name.to_str()) + .map_or_else( + || search_path.to_string_lossy().into_owned(), + std::string::ToString::to_string, + ); + return Ok(vec![FileCandidate { absolute_path: search_path, display_path }]); + } + + if !metadata.is_dir() { + return Err(Error::from_reason(format!( + "Search path must be a file or directory: {}", + search_path.display() + ))); + } + + let glob_set = glob_util::try_compile_glob(glob, false)?; + let mentions_node_modules = glob.is_some_and(|value| value.contains("node_modules")); + + let walker = WalkBuilder::new(&search_path) + .hidden(true) + .git_ignore(true) + .git_global(true) + .git_exclude(true) + .build(); + + let mut files = Vec::new(); + for entry in walker { + let entry = match entry { + Ok(e) => e, + Err(_) => continue, + }; + + if !entry.file_type().is_some_and(|ft| ft.is_file()) { + continue; + } + + let abs = entry.path().to_path_buf(); + let relative = abs + .strip_prefix(&search_path) + .map(|p| p.to_string_lossy().replace('\\', "/")) + .unwrap_or_else(|_| abs.to_string_lossy().into_owned()); + + if !mentions_node_modules && relative.contains("node_modules") { + continue; + } + + if let Some(ref gs) = glob_set { + if !gs.is_match(&relative) { + continue; + } + } + + files.push(FileCandidate { absolute_path: abs, display_path: relative }); + } + + files.sort_by(|a, b| a.display_path.cmp(&b.display_path)); + Ok(files) +} + +fn compile_pattern( + pattern: &str, + selector: Option<&str>, + strictness: &MatchStrictness, + lang: SupportLang, +) -> Result { + let mut compiled = if let Some(selector) = selector.map(str::trim).filter(|s| !s.is_empty()) { + Pattern::contextual(pattern, selector, lang) + } else { + Pattern::try_new(pattern, lang) + } + .map_err(|err| Error::from_reason(format!("Invalid pattern: {err}")))?; + compiled.strictness = strictness.clone(); + Ok(compiled) +} + +fn apply_edits(content: &str, edits: &[Edit]) -> Result { + let mut sorted: Vec<&Edit> = edits.iter().collect(); + sorted.sort_by_key(|edit| edit.position); + let mut prev_end = 0usize; + for edit in &sorted { + if edit.position < prev_end { + return Err(Error::from_reason( + "Overlapping replacements detected; refine pattern to avoid ambiguous edits" + .to_string(), + )); + } + prev_end = edit.position.saturating_add(edit.deleted_length); + } + + let mut output = content.to_string(); + for edit in sorted.into_iter().rev() { + let start = edit.position; + let end = edit.position.saturating_add(edit.deleted_length); + if end > output.len() || start > end { + return Err(Error::from_reason("Computed edit range is out of bounds".to_string())); + } + let replacement = String::from_utf8(edit.inserted_text.clone()).map_err(|err| { + Error::from_reason(format!("Replacement text is not valid UTF-8: {err}")) + })?; + output.replace_range(start..end, &replacement); + } + Ok(output) +} + +fn normalize_pattern_list(patterns: Option>) -> Result> { + let mut normalized = Vec::new(); + let mut seen = BTreeSet::new(); + for raw in patterns.unwrap_or_default() { + let pattern = raw.trim(); + if pattern.is_empty() { + continue; + } + if seen.insert(pattern.to_string()) { + normalized.push(pattern.to_string()); + } + } + if normalized.is_empty() { + return Err(Error::from_reason( + "`patterns` is required and must include at least one non-empty pattern".to_string(), + )); + } + Ok(normalized) +} + +fn normalize_rewrite_map( + rewrites: Option>, +) -> Result> { + let mut normalized = Vec::new(); + for (pattern, rewrite) in rewrites.unwrap_or_default() { + if pattern.is_empty() { + return Err(Error::from_reason( + "`rewrites` keys must be non-empty pattern strings".to_string(), + )); + } + normalized.push((pattern, rewrite)); + } + if normalized.is_empty() { + return Err(Error::from_reason( + "`rewrites` is required and must include at least one pattern->rewrite mapping" + .to_string(), + )); + } + normalized.sort_by(|left, right| left.0.cmp(&right.0)); + Ok(normalized) +} + +struct CompiledFindPattern { + pattern: String, + compiled_by_lang: HashMap, + compile_errors_by_lang: HashMap, +} + +struct ResolvedCandidate { + candidate: FileCandidate, + language: Option, + language_error: Option, +} + +fn resolve_candidates_for_find( + candidates: Vec, + lang: Option<&str>, +) -> Result<(Vec, HashMap)> { + let mut resolved = Vec::with_capacity(candidates.len()); + let mut languages = HashMap::new(); + + for candidate in candidates { + match resolve_language(lang, &candidate.absolute_path) { + Ok(language) => { + let key = language.canonical_name().to_string(); + languages.entry(key).or_insert(language); + resolved.push(ResolvedCandidate { + candidate, + language: Some(language), + language_error: None, + }); + }, + Err(err) => { + resolved.push(ResolvedCandidate { + candidate, + language: None, + language_error: Some(err.to_string()), + }); + }, + } + } + + Ok((resolved, languages)) +} + +fn compile_find_patterns( + patterns: &[String], + languages: &HashMap, + selector: Option<&str>, + strictness: &MatchStrictness, +) -> Result> { + let mut compiled = Vec::with_capacity(patterns.len()); + + for pattern in patterns { + let mut compiled_by_lang = HashMap::with_capacity(languages.len()); + let mut compile_errors_by_lang = HashMap::new(); + + for (lang_key, &language) in languages { + match compile_pattern(pattern, selector, strictness, language) { + Ok(compiled_pattern) => { + compiled_by_lang.insert(lang_key.clone(), compiled_pattern); + }, + Err(err) => { + compile_errors_by_lang.insert(lang_key.clone(), err.to_string()); + }, + } + } + + compiled.push(CompiledFindPattern { + pattern: pattern.clone(), + compiled_by_lang, + compile_errors_by_lang, + }); + } + + Ok(compiled) +} + +/// Structural code search using ast-grep patterns. +/// +/// Searches files for AST patterns across 38+ languages. +#[napi(js_name = "astGrep")] +pub fn ast_grep(options: AstFindOptions) -> Result { + let AstFindOptions { + patterns, lang, path, glob, selector, strictness, + limit, offset, include_meta, context: _, + } = options; + + let normalized_limit = limit.unwrap_or(DEFAULT_FIND_LIMIT).max(1); + let normalized_offset = offset.unwrap_or(0); + + let patterns = normalize_pattern_list(patterns)?; + let strictness = parse_strictness(strictness.as_deref())?; + let include_meta = include_meta.unwrap_or(false); + let lang_str = lang.as_deref().map(str::trim).filter(|v| !v.is_empty()); + let candidates: Vec<_> = collect_candidates(path, glob.as_deref())? + .into_iter() + .filter(|candidate| is_supported_file(&candidate.absolute_path, lang_str)) + .collect(); + + let (resolved_candidates, languages) = resolve_candidates_for_find(candidates, lang_str)?; + let compiled_patterns = compile_find_patterns(&patterns, &languages, selector.as_deref(), &strictness)?; + let files_searched = to_u32(resolved_candidates.len()); + + let mut all_matches = Vec::new(); + let mut parse_errors = Vec::new(); + let mut total_matches = 0u32; + let mut files_with_matches = BTreeSet::new(); + + for resolved in resolved_candidates { + let ResolvedCandidate { candidate, language, language_error } = resolved; + + if let Some(error) = language_error.as_deref() { + for compiled in &compiled_patterns { + parse_errors.push(format!("{}: {}: {error}", compiled.pattern, candidate.display_path)); + } + continue; + } + + let Some(language) = language else { continue }; + let lang_key = language.canonical_name(); + let source = match std::fs::read_to_string(&candidate.absolute_path) { + Ok(source) => source, + Err(err) => { + for compiled in &compiled_patterns { + parse_errors.push(format!("{}: {}: {err}", compiled.pattern, candidate.display_path)); + } + continue; + }, + }; + + let mut runnable_patterns: Vec<(&str, &Pattern)> = Vec::new(); + for compiled in &compiled_patterns { + if let Some(error) = compiled.compile_errors_by_lang.get(lang_key) { + parse_errors.push(format!("{}: {}: {error}", compiled.pattern, candidate.display_path)); + continue; + } + if let Some(pattern) = compiled.compiled_by_lang.get(lang_key) { + runnable_patterns.push((compiled.pattern.as_str(), pattern)); + } + } + if runnable_patterns.is_empty() { + continue; + } + + let ast = language.ast_grep(source); + if ast.root().dfs().any(|node| node.is_error()) { + parse_errors.push(format!( + "{}: parse error (syntax tree contains error nodes)", + candidate.display_path + )); + } + + for (_, pattern) in runnable_patterns { + for matched in ast.root().find_all(pattern.clone()) { + total_matches = total_matches.saturating_add(1); + let range = matched.range(); + let start = matched.start_pos(); + let end = matched.end_pos(); + let meta_variables = if include_meta { + Some(HashMap::::from(matched.get_env().clone())) + } else { + None + }; + all_matches.push(AstFindMatch { + path: candidate.display_path.clone(), + text: matched.text().into_owned(), + byte_start: to_u32(range.start), + byte_end: to_u32(range.end), + start_line: to_u32(start.line().saturating_add(1)), + start_column: to_u32(start.column(matched.get_node()).saturating_add(1)), + end_line: to_u32(end.line().saturating_add(1)), + end_column: to_u32(end.column(matched.get_node()).saturating_add(1)), + meta_variables, + }); + files_with_matches.insert(candidate.display_path.clone()); + } + } + } + + all_matches.sort_by(|left, right| { + left.path.cmp(&right.path) + .then(left.start_line.cmp(&right.start_line)) + .then(left.start_column.cmp(&right.start_column)) + .then(left.end_line.cmp(&right.end_line)) + .then(left.end_column.cmp(&right.end_column)) + .then(left.byte_start.cmp(&right.byte_start)) + .then(left.byte_end.cmp(&right.byte_end)) + }); + + let visible_matches = all_matches.into_iter().skip(normalized_offset as usize).collect::>(); + let limit_reached = visible_matches.len() > normalized_limit as usize; + let matches = visible_matches.into_iter().take(normalized_limit as usize).collect::>(); + + Ok(AstFindResult { + matches, + total_matches, + files_with_matches: to_u32(files_with_matches.len()), + files_searched, + limit_reached, + parse_errors: (!parse_errors.is_empty()).then_some(parse_errors), + }) +} + +/// Structural code rewrite using ast-grep patterns. +/// +/// Applies pattern->replacement rewrites across files. Defaults to dry-run mode. +#[napi(js_name = "astEdit")] +pub fn ast_edit(options: AstReplaceOptions) -> Result { + let AstReplaceOptions { + rewrites, lang, path, glob, selector, strictness, + dry_run, max_replacements, max_files, fail_on_parse_error, + } = options; + + let rewrite_rules = normalize_rewrite_map(rewrites)?; + let strictness = parse_strictness(strictness.as_deref())?; + let dry_run = dry_run.unwrap_or(true); + let max_replacements = max_replacements.unwrap_or(u32::MAX).max(1); + let max_files = max_files.unwrap_or(u32::MAX).max(1); + let fail_on_parse_error = fail_on_parse_error.unwrap_or(false); + + let lang_str = lang.as_deref().map(str::trim).filter(|v| !v.is_empty()); + let candidates: Vec<_> = collect_candidates(path, glob.as_deref())? + .into_iter() + .filter(|candidate| is_supported_file(&candidate.absolute_path, lang_str)) + .collect(); + let effective_lang = if let Some(lang) = lang_str { + lang.to_string() + } else { + infer_single_replace_lang(&candidates)? + }; + + let language = resolve_supported_lang(&effective_lang)?; + let mut parse_errors = Vec::new(); + let mut compiled_rules = Vec::new(); + for (pattern, rewrite) in rewrite_rules { + match compile_pattern(&pattern, selector.as_deref(), &strictness, language) { + Ok(compiled) => compiled_rules.push((pattern, rewrite, compiled)), + Err(err) => { + if fail_on_parse_error { return Err(err); } + parse_errors.push(format!("{pattern}: {err}")); + }, + } + } + if compiled_rules.is_empty() { + return Ok(AstReplaceResult { + file_changes: vec![], total_replacements: 0, files_touched: 0, + files_searched: to_u32(candidates.len()), applied: !dry_run, + limit_reached: false, parse_errors: (!parse_errors.is_empty()).then_some(parse_errors), + changes: vec![], + }); + } + + let mut changes = Vec::new(); + let mut file_counts: BTreeMap = BTreeMap::new(); + let mut files_touched = 0u32; + let mut limit_reached = false; + + for candidate in &candidates { + let source = match std::fs::read_to_string(&candidate.absolute_path) { + Ok(source) => source, + Err(err) => { + if fail_on_parse_error { + return Err(Error::from_reason(format!("{}: {err}", candidate.display_path))); + } + parse_errors.push(format!("{}: {err}", candidate.display_path)); + continue; + }, + }; + + let ast = language.ast_grep(&source); + if ast.root().dfs().any(|node| node.is_error()) { + let parse_issue = format!( + "{}: parse error (syntax tree contains error nodes)", + candidate.display_path + ); + if fail_on_parse_error { return Err(Error::from_reason(parse_issue)); } + parse_errors.push(parse_issue); + continue; + } + + let mut file_changes = Vec::new(); + let mut reached_max_replacements = false; + 'patterns: for (_pattern, rewrite, compiled) in &compiled_rules { + for matched in ast.root().find_all(compiled.clone()) { + if changes.len() + file_changes.len() >= max_replacements as usize { + limit_reached = true; + reached_max_replacements = true; + break 'patterns; + } + let edit = matched.replace_by(rewrite.as_str()); + let range = matched.range(); + let start = matched.start_pos(); + let end = matched.end_pos(); + let after = String::from_utf8(edit.inserted_text.clone()).map_err(|err| { + Error::from_reason(format!( + "{}: replacement text is not valid UTF-8: {err}", + candidate.display_path + )) + })?; + file_changes.push(PendingFileChange { + change: AstReplaceChange { + path: candidate.display_path.clone(), + before: matched.text().into_owned(), + after, + byte_start: to_u32(range.start), + byte_end: to_u32(range.end), + deleted_length: to_u32(edit.deleted_length), + start_line: to_u32(start.line().saturating_add(1)), + start_column: to_u32(start.column(matched.get_node()).saturating_add(1)), + end_line: to_u32(end.line().saturating_add(1)), + end_column: to_u32(end.column(matched.get_node()).saturating_add(1)), + }, + edit, + }); + } + } + + if file_changes.is_empty() { + if reached_max_replacements { break; } + continue; + } + if files_touched >= max_files { + limit_reached = true; + break; + } + files_touched = files_touched.saturating_add(1); + file_counts.insert(candidate.display_path.clone(), to_u32(file_changes.len())); + + if !dry_run { + let edits: Vec> = file_changes.iter().map(|entry| Edit { + position: entry.edit.position, + deleted_length: entry.edit.deleted_length, + inserted_text: entry.edit.inserted_text.clone(), + }).collect(); + let output = apply_edits(&source, &edits)?; + if output != source { + std::fs::write(&candidate.absolute_path, output).map_err(|err| { + Error::from_reason(format!("Failed to write {}: {err}", candidate.display_path)) + })?; + } + } + + changes.extend(file_changes.into_iter().map(|entry| entry.change)); + if reached_max_replacements { break; } + } + + let file_changes = file_counts.into_iter() + .map(|(path, count)| AstReplaceFileChange { path, count }) + .collect::>(); + + Ok(AstReplaceResult { + file_changes, + total_replacements: to_u32(changes.len()), + files_touched, + files_searched: to_u32(candidates.len()), + applied: !dry_run, + limit_reached, + parse_errors: (!parse_errors.is_empty()).then_some(parse_errors), + changes, + }) +} + +#[cfg(test)] +mod tests { + use std::{fs, path::PathBuf, time::{SystemTime, UNIX_EPOCH}}; + use super::*; + + struct TempTree { root: PathBuf } + impl Drop for TempTree { + fn drop(&mut self) { let _ = fs::remove_dir_all(&self.root); } + } + + fn make_temp_tree() -> TempTree { + let unique = SystemTime::now().duration_since(UNIX_EPOCH) + .expect("system time should be after UNIX_EPOCH").as_nanos(); + let root = std::env::temp_dir().join(format!("gsd-ast-test-{unique}")); + fs::create_dir_all(root.join("nested")).expect("temp nested dir should be created"); + fs::write(root.join("a.ts"), "const a = 1;\n").expect("temp file a.ts should be written"); + fs::write(root.join("nested").join("b.ts"), "const b = 2;\n") + .expect("temp file nested/b.ts should be written"); + TempTree { root } + } + + #[test] + fn resolves_supported_language_aliases() { + assert_eq!(resolve_supported_lang("ts").ok(), Some(SupportLang::TypeScript)); + assert_eq!(resolve_supported_lang("jsx").ok(), Some(SupportLang::JavaScript)); + assert_eq!(resolve_supported_lang("rs").ok(), Some(SupportLang::Rust)); + assert_eq!(resolve_supported_lang("kotlin").ok(), Some(SupportLang::Kotlin)); + assert_eq!(resolve_supported_lang("bash").ok(), Some(SupportLang::Bash)); + assert_eq!(resolve_supported_lang("c").ok(), Some(SupportLang::C)); + assert_eq!(resolve_supported_lang("cpp").ok(), Some(SupportLang::Cpp)); + assert!(resolve_supported_lang("brainfuck").is_err()); + } + + #[test] + fn applies_non_overlapping_edits() { + let source = "const answer = 41;"; + let edits = vec![ + Edit:: { position: 6, deleted_length: 6, inserted_text: b"value".to_vec() }, + Edit:: { position: 15, deleted_length: 2, inserted_text: b"42".to_vec() }, + ]; + let output = apply_edits(source, &edits).expect("edits should apply"); + assert_eq!(output, "const value = 42;"); + } + + #[test] + fn rejects_overlapping_edits() { + let source = "abcdef"; + let edits = vec![ + Edit:: { position: 1, deleted_length: 3, inserted_text: b"x".to_vec() }, + Edit:: { position: 2, deleted_length: 1, inserted_text: b"y".to_vec() }, + ]; + assert!(apply_edits(source, &edits).is_err()); + } + + #[test] + fn collect_candidates_finds_files() { + let tree = make_temp_tree(); + let candidates = collect_candidates(Some(tree.root.to_string_lossy().into_owned()), None) + .expect("candidate collection should succeed"); + let paths: Vec<_> = candidates.iter().map(|f| f.display_path.as_str()).collect(); + assert!(paths.contains(&"a.ts")); + assert!(paths.contains(&"nested/b.ts")); + } + + #[test] + fn infers_single_replace_lang_for_uniform_candidates() { + let tree = make_temp_tree(); + let candidates = collect_candidates(Some(tree.root.to_string_lossy().into_owned()), Some("**/*.ts")) + .expect("candidate collection should succeed"); + let inferred = infer_single_replace_lang(&candidates).expect("language should be inferred"); + assert_eq!(inferred, "typescript"); + } + + fn make_mixed_temp_tree() -> TempTree { + let unique = SystemTime::now().duration_since(UNIX_EPOCH) + .expect("system time should be after UNIX_EPOCH").as_nanos(); + let root = std::env::temp_dir().join(format!("gsd-ast-mixed-lang-test-{unique}")); + fs::create_dir_all(&root).expect("temp mixed-lang dir should be created"); + fs::write(root.join("a.ts"), "const a = 1;\n").expect("temp file a.ts should be written"); + fs::write(root.join("b.rs"), "fn main() {}\n").expect("temp file b.rs should be written"); + TempTree { root } + } + + #[test] + fn rejects_mixed_replace_lang_inference() { + let tree = make_mixed_temp_tree(); + let candidates = collect_candidates(Some(tree.root.to_string_lossy().into_owned()), None) + .expect("candidate collection should succeed"); + let err = infer_single_replace_lang(&candidates) + .expect_err("mixed language inference should fail"); + assert!(err.to_string().contains("multiple languages")); + } +} diff --git a/native/crates/ast/src/glob_util.rs b/native/crates/ast/src/glob_util.rs new file mode 100644 index 000000000..158454dcb --- /dev/null +++ b/native/crates/ast/src/glob_util.rs @@ -0,0 +1,54 @@ +//! Shared glob-pattern helpers for AST search. + +use globset::{GlobBuilder, GlobSet, GlobSetBuilder}; +use napi::bindgen_prelude::*; + +/// Normalize a raw glob string: fix path separators, optionally prepend `**/` +/// for recursive matching, and close any unclosed `{` alternation groups. +pub fn build_glob_pattern(glob: &str, recursive: bool) -> String { + let normalized = glob.replace('\\', "/"); + let pattern = if !recursive || normalized.contains('/') || normalized.starts_with("**") { + normalized + } else { + format!("**/{normalized}") + }; + fix_unclosed_braces(pattern) +} + +/// Compile a glob pattern string into a [`GlobSet`]. +pub fn compile_glob(glob: &str, recursive: bool) -> Result { + let mut builder = GlobSetBuilder::new(); + let pattern = build_glob_pattern(glob, recursive); + let glob = GlobBuilder::new(&pattern) + .literal_separator(true) + .build() + .map_err(|err| Error::from_reason(format!("Invalid glob pattern: {err}")))?; + builder.add(glob); + builder + .build() + .map_err(|err| Error::from_reason(format!("Failed to build glob matcher: {err}"))) +} + +/// Like [`compile_glob`], but accepts an `Option<&str>` — returns `Ok(None)` +/// when the input is `None`, empty, or whitespace-only. +pub fn try_compile_glob(glob: Option<&str>, recursive: bool) -> Result> { + let Some(glob) = glob.map(str::trim).filter(|v| !v.is_empty()) else { + return Ok(None); + }; + compile_glob(glob, recursive).map(Some) +} + +/// Close unclosed `{` alternation groups in a glob pattern. +fn fix_unclosed_braces(pattern: String) -> String { + let opens = pattern.chars().filter(|&c| c == '{').count(); + let closes = pattern.chars().filter(|&c| c == '}').count(); + if opens > closes { + let mut fixed = pattern; + for _ in 0..(opens - closes) { + fixed.push('}'); + } + fixed + } else { + pattern + } +} diff --git a/native/crates/ast/src/language/mod.rs b/native/crates/ast/src/language/mod.rs new file mode 100644 index 000000000..01672e134 --- /dev/null +++ b/native/crates/ast/src/language/mod.rs @@ -0,0 +1,437 @@ +//! Language definitions for ast-grep integration. +//! +//! Provides `SupportLang` enum and `Language`/`LanguageExt` impls for 38 +//! languages, each backed by a tree-sitter grammar. + +mod parsers; + +use std::{borrow::Cow, collections::HashMap, fmt, path::Path}; + +use ast_grep_core::{ + Doc, Language, Node, + matcher::{KindMatcher, Pattern, PatternBuilder, PatternError}, + meta_var::MetaVariable, + tree_sitter::{LanguageExt, StrDoc, TSLanguage, TSRange}, +}; + +/// Implements a stub language (no expando / `pre_process_pattern` needed). +/// Use when the language grammar accepts `$VAR` as valid identifiers. +macro_rules! impl_lang { + ($lang:ident, $func:ident) => { + #[derive(Clone, Copy, Debug)] + pub struct $lang; + impl Language for $lang { + fn kind_to_id(&self, kind: &str) -> u16 { + self.get_ts_language().id_for_node_kind(kind, true) + } + + fn field_to_id(&self, field: &str) -> Option { + self + .get_ts_language() + .field_id_for_name(field) + .map(|f| f.get()) + } + + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + builder.build(|src| StrDoc::try_new(src, *self)) + } + } + impl LanguageExt for $lang { + fn get_ts_language(&self) -> TSLanguage { + parsers::$func().into() + } + } + }; +} + +fn pre_process_pattern(expando: char, query: &str) -> Cow<'_, str> { + let mut ret = Vec::with_capacity(query.len()); + let mut dollar_count = 0; + for c in query.chars() { + if c == '$' { + dollar_count += 1; + continue; + } + let need_replace = matches!(c, 'A'..='Z' | '_') || dollar_count == 3; + let sigil = if need_replace { expando } else { '$' }; + ret.extend(std::iter::repeat_n(sigil, dollar_count)); + dollar_count = 0; + ret.push(c); + } + let sigil = if dollar_count == 3 { expando } else { '$' }; + ret.extend(std::iter::repeat_n(sigil, dollar_count)); + Cow::Owned(ret.into_iter().collect()) +} + +/// Implements a language with `expando_char` / `pre_process_pattern`. +/// Use when the language does NOT accept `$` as a valid identifier character. +macro_rules! impl_lang_expando { + ($lang:ident, $func:ident, $char:expr) => { + #[derive(Clone, Copy, Debug)] + pub struct $lang; + impl Language for $lang { + fn kind_to_id(&self, kind: &str) -> u16 { + self.get_ts_language().id_for_node_kind(kind, true) + } + + fn field_to_id(&self, field: &str) -> Option { + self + .get_ts_language() + .field_id_for_name(field) + .map(|f| f.get()) + } + + fn expando_char(&self) -> char { + $char + } + + fn pre_process_pattern<'q>(&self, query: &'q str) -> Cow<'q, str> { + pre_process_pattern(self.expando_char(), query) + } + + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + builder.build(|src| StrDoc::try_new(src, *self)) + } + } + impl LanguageExt for $lang { + fn get_ts_language(&self) -> TSLanguage { + parsers::$func().into() + } + } + }; +} + +// ── Customized languages with expando_char ────────────────────────────── + +impl_lang_expando!(C, language_c, '\u{10000}'); +impl_lang_expando!(Cpp, language_cpp, '\u{10000}'); +impl_lang_expando!(CSharp, language_c_sharp, 'µ'); +impl_lang_expando!(Css, language_css, '_'); +impl_lang_expando!(Elixir, language_elixir, 'µ'); +impl_lang_expando!(Go, language_go, 'µ'); +impl_lang_expando!(Haskell, language_haskell, 'µ'); +impl_lang_expando!(Hcl, language_hcl, 'µ'); +impl_lang_expando!(Kotlin, language_kotlin, 'µ'); +impl_lang_expando!(Nix, language_nix, '_'); +impl_lang_expando!(Php, language_php, 'µ'); +impl_lang_expando!(Python, language_python, 'µ'); +impl_lang_expando!(Ruby, language_ruby, 'µ'); +impl_lang_expando!(Rust, language_rust, 'µ'); +impl_lang_expando!(Swift, language_swift, 'µ'); +impl_lang_expando!(Make, language_make, 'µ'); +impl_lang_expando!(ObjC, language_objc, '\u{10000}'); +impl_lang_expando!(Starlark, language_starlark, 'µ'); +impl_lang_expando!(Odin, language_odin, 'µ'); +impl_lang_expando!(Julia, language_julia, 'µ'); +impl_lang_expando!(Verilog, language_verilog, 'µ'); +impl_lang_expando!(Zig, language_zig, 'µ'); + +// ── Stub languages ($ accepted in grammar) ────────────────────────────── + +impl_lang!(Bash, language_bash); +impl_lang!(Java, language_java); +impl_lang!(JavaScript, language_javascript); +impl_lang!(Json, language_json); +impl_lang!(Lua, language_lua); +impl_lang!(Scala, language_scala); +impl_lang!(Solidity, language_solidity); +impl_lang!(Tsx, language_tsx); +impl_lang!(TypeScript, language_typescript); +impl_lang!(Yaml, language_yaml); +impl_lang!(Markdown, language_markdown); +impl_lang!(Toml, language_toml); +impl_lang!(Diff, language_diff); +impl_lang!(Xml, language_xml); +impl_lang!(Regex, language_regex); + +// ── Html (custom implementation with injection support) ────────────────── + +#[derive(Clone, Copy, Debug)] +pub struct Html; + +impl Language for Html { + fn expando_char(&self) -> char { + 'z' + } + + fn pre_process_pattern<'q>(&self, query: &'q str) -> Cow<'q, str> { + pre_process_pattern(self.expando_char(), query) + } + + fn kind_to_id(&self, kind: &str) -> u16 { + self.get_ts_language().id_for_node_kind(kind, true) + } + + fn field_to_id(&self, field: &str) -> Option { + self + .get_ts_language() + .field_id_for_name(field) + .map(|f| f.get()) + } + + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + builder.build(|src| StrDoc::try_new(src, *self)) + } +} + +impl LanguageExt for Html { + fn get_ts_language(&self) -> TSLanguage { + parsers::language_html() + } + + fn injectable_languages(&self) -> Option<&'static [&'static str]> { + Some(&["css", "js", "ts", "tsx", "scss", "less", "stylus", "coffee"]) + } + + fn extract_injections( + &self, + root: Node>, + ) -> HashMap> { + let lang = root.lang(); + let mut map = HashMap::new(); + let matcher = KindMatcher::new("script_element", lang.clone()); + for script in root.find_all(matcher) { + let injected = find_html_lang(&script).unwrap_or_else(|| "js".into()); + let content = script.children().find(|c| c.kind() == "raw_text"); + if let Some(content) = content { + map.entry(injected) + .or_insert_with(Vec::new) + .push(node_to_range(&content)); + } + } + let matcher = KindMatcher::new("style_element", lang.clone()); + for style in root.find_all(matcher) { + let injected = find_html_lang(&style).unwrap_or_else(|| "css".into()); + let content = style.children().find(|c| c.kind() == "raw_text"); + if let Some(content) = content { + map.entry(injected) + .or_insert_with(Vec::new) + .push(node_to_range(&content)); + } + } + map + } +} + +fn find_html_lang(node: &Node) -> Option { + let html = node.lang(); + let attr_matcher = KindMatcher::new("attribute", html.clone()); + let name_matcher = KindMatcher::new("attribute_name", html.clone()); + let val_matcher = KindMatcher::new("attribute_value", html.clone()); + node.find_all(attr_matcher).find_map(|attr| { + let name = attr.find(&name_matcher)?; + if name.text() != "lang" { + return None; + } + let val = attr.find(&val_matcher)?; + Some(val.text().to_string()) + }) +} + +fn node_to_range(node: &Node) -> TSRange { + let r = node.range(); + let start = node.start_pos(); + let sp = start.byte_point(); + let sp = tree_sitter::Point::new(sp.0, sp.1); + let end = node.end_pos(); + let ep = end.byte_point(); + let ep = tree_sitter::Point::new(ep.0, ep.1); + TSRange { start_byte: r.start, end_byte: r.end, start_point: sp, end_point: ep } +} + +// ── SupportLang enum ──────────────────────────────────────────────────── + +/// All supported languages for ast-grep structural search/replace. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum SupportLang { + Bash, C, Cpp, CSharp, Css, Diff, Elixir, Go, Haskell, Hcl, Html, + Java, JavaScript, Json, Julia, Kotlin, Lua, Make, Markdown, Nix, + ObjC, Odin, Php, Python, Regex, Ruby, Rust, Scala, Solidity, + Starlark, Swift, Toml, Tsx, TypeScript, Verilog, Xml, Yaml, Zig, +} + +impl SupportLang { + pub const fn all_langs() -> &'static [Self] { + use SupportLang::*; + &[ + Bash, C, Cpp, CSharp, Css, Diff, Elixir, Go, Haskell, Hcl, Html, Java, JavaScript, Json, + Julia, Kotlin, Lua, Make, Markdown, Nix, ObjC, Odin, Php, Python, Regex, Ruby, Rust, + Scala, Solidity, Starlark, Swift, Toml, Tsx, TypeScript, Verilog, Xml, Yaml, Zig, + ] + } + + pub const fn canonical_name(self) -> &'static str { + match self { + Self::Bash => "bash", Self::C => "c", Self::Cpp => "cpp", + Self::CSharp => "csharp", Self::Css => "css", Self::Diff => "diff", + Self::Elixir => "elixir", Self::Go => "go", Self::Haskell => "haskell", + Self::Hcl => "hcl", Self::Html => "html", Self::Java => "java", + Self::JavaScript => "javascript", Self::Json => "json", Self::Julia => "julia", + Self::Kotlin => "kotlin", Self::Lua => "lua", Self::Make => "make", + Self::Markdown => "markdown", Self::Nix => "nix", Self::ObjC => "objc", + Self::Odin => "odin", Self::Php => "php", Self::Python => "python", + Self::Regex => "regex", Self::Ruby => "ruby", Self::Rust => "rust", + Self::Scala => "scala", Self::Solidity => "solidity", Self::Starlark => "starlark", + Self::Swift => "swift", Self::Toml => "toml", Self::Tsx => "tsx", + Self::TypeScript => "typescript", Self::Verilog => "verilog", Self::Xml => "xml", + Self::Yaml => "yaml", Self::Zig => "zig", + } + } +} + +impl fmt::Display for SupportLang { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{self:?}") + } +} + +// ── Dispatch macro ────────────────────────────────────────────────────── + +macro_rules! execute_lang_method { + ($me:path, $method:ident, $($pname:tt),*) => { + use SupportLang as S; + match $me { + S::Bash => Bash.$method($($pname,)*), + S::C => C.$method($($pname,)*), + S::Cpp => Cpp.$method($($pname,)*), + S::CSharp => CSharp.$method($($pname,)*), + S::Css => Css.$method($($pname,)*), + S::Diff => Diff.$method($($pname,)*), + S::Elixir => Elixir.$method($($pname,)*), + S::Go => Go.$method($($pname,)*), + S::Haskell => Haskell.$method($($pname,)*), + S::Hcl => Hcl.$method($($pname,)*), + S::Html => Html.$method($($pname,)*), + S::Java => Java.$method($($pname,)*), + S::JavaScript => JavaScript.$method($($pname,)*), + S::Json => Json.$method($($pname,)*), + S::Julia => Julia.$method($($pname,)*), + S::Kotlin => Kotlin.$method($($pname,)*), + S::Lua => Lua.$method($($pname,)*), + S::Make => Make.$method($($pname,)*), + S::Markdown => Markdown.$method($($pname,)*), + S::Nix => Nix.$method($($pname,)*), + S::ObjC => ObjC.$method($($pname,)*), + S::Odin => Odin.$method($($pname,)*), + S::Php => Php.$method($($pname,)*), + S::Python => Python.$method($($pname,)*), + S::Regex => Regex.$method($($pname,)*), + S::Ruby => Ruby.$method($($pname,)*), + S::Rust => Rust.$method($($pname,)*), + S::Scala => Scala.$method($($pname,)*), + S::Solidity => Solidity.$method($($pname,)*), + S::Starlark => Starlark.$method($($pname,)*), + S::Swift => Swift.$method($($pname,)*), + S::Toml => Toml.$method($($pname,)*), + S::Tsx => Tsx.$method($($pname,)*), + S::TypeScript => TypeScript.$method($($pname,)*), + S::Verilog => Verilog.$method($($pname,)*), + S::Xml => Xml.$method($($pname,)*), + S::Yaml => Yaml.$method($($pname,)*), + S::Zig => Zig.$method($($pname,)*), + } + }; +} + +macro_rules! impl_lang_method { + ($method:ident, ($($pname:tt: $ptype:ty),*) => $return_type:ty) => { + #[inline] + fn $method(&self, $($pname: $ptype),*) -> $return_type { + execute_lang_method! { self, $method, $($pname),* } + } + }; +} + +impl Language for SupportLang { + impl_lang_method!(kind_to_id, (kind: &str) => u16); + impl_lang_method!(field_to_id, (field: &str) => Option); + impl_lang_method!(meta_var_char, () => char); + impl_lang_method!(expando_char, () => char); + impl_lang_method!(extract_meta_var, (source: &str) => Option); + impl_lang_method!(build_pattern, (builder: &PatternBuilder) => Result); + + fn pre_process_pattern<'q>(&self, query: &'q str) -> Cow<'q, str> { + execute_lang_method! { self, pre_process_pattern, query } + } + + fn from_path>(path: P) -> Option { + from_extension(path.as_ref()) + } +} + +impl LanguageExt for SupportLang { + impl_lang_method!(get_ts_language, () => TSLanguage); + impl_lang_method!(injectable_languages, () => Option<&'static [&'static str]>); + + fn extract_injections( + &self, + root: Node>, + ) -> HashMap> { + match self { + Self::Html => Html.extract_injections(root), + _ => HashMap::new(), + } + } +} + +// ── File extension mapping ────────────────────────────────────────────── + +const fn extensions(lang: SupportLang) -> &'static [&'static str] { + use SupportLang::*; + match lang { + Bash => &["bash", "bats", "cgi", "command", "env", "fcgi", "ksh", "sh", "tmux", "tool", "zsh"], + C => &["c", "h"], + Cpp => &["cc", "hpp", "cpp", "c++", "hh", "cxx", "cu", "ino"], + CSharp => &["cs"], + Css => &["css", "scss"], + Diff => &["diff", "patch"], + Elixir => &["ex", "exs"], + Go => &["go"], + Haskell => &["hs"], + Hcl => &["hcl", "tf", "tfvars"], + Html => &["html", "htm", "xhtml"], + Java => &["java"], + JavaScript => &["cjs", "js", "mjs", "jsx"], + Json => &["json"], + Julia => &["jl"], + Kotlin => &["kt", "ktm", "kts"], + Lua => &["lua"], + Make => &["mk", "mak"], + Markdown => &["md", "markdown", "mdx"], + Nix => &["nix"], + ObjC => &["m"], + Odin => &["odin"], + Php => &["php"], + Python => &["py", "py3", "pyi", "bzl"], + Regex => &[], + Ruby => &["rb", "rbw", "gemspec"], + Rust => &["rs"], + Scala => &["scala", "sc", "sbt"], + Solidity => &["sol"], + Starlark => &["star", "bzl"], + Swift => &["swift"], + Toml => &["toml"], + Tsx => &["tsx"], + TypeScript => &["ts", "cts", "mts"], + Verilog => &["v", "sv", "svh", "vh"], + Xml => &["xml", "xsl", "xslt", "svg", "plist"], + Yaml => &["yaml", "yml"], + Zig => &["zig"], + } +} + +/// Guess language from file extension. +pub fn from_extension(path: &Path) -> Option { + let ext = path.extension()?.to_str()?; + if ext.is_empty() { + let name = path.file_name()?.to_str()?; + return match name { + "Makefile" | "makefile" | "GNUmakefile" => Some(SupportLang::Make), + _ => None, + }; + } + SupportLang::all_langs() + .iter() + .copied() + .find(|&l| extensions(l).contains(&ext)) +} diff --git a/native/crates/ast/src/language/parsers.rs b/native/crates/ast/src/language/parsers.rs new file mode 100644 index 000000000..5c31b31ca --- /dev/null +++ b/native/crates/ast/src/language/parsers.rs @@ -0,0 +1,118 @@ +//! Tree-sitter parser functions for all supported languages. + +use ast_grep_core::tree_sitter::TSLanguage; + +pub fn language_bash() -> TSLanguage { + tree_sitter_bash::LANGUAGE.into() +} +pub fn language_c() -> TSLanguage { + tree_sitter_c::LANGUAGE.into() +} +pub fn language_cpp() -> TSLanguage { + tree_sitter_cpp::LANGUAGE.into() +} +pub fn language_c_sharp() -> TSLanguage { + tree_sitter_c_sharp::LANGUAGE.into() +} +pub fn language_css() -> TSLanguage { + tree_sitter_css::LANGUAGE.into() +} +pub fn language_diff() -> TSLanguage { + tree_sitter_diff::LANGUAGE.into() +} +pub fn language_elixir() -> TSLanguage { + tree_sitter_elixir::LANGUAGE.into() +} +pub fn language_go() -> TSLanguage { + tree_sitter_go::LANGUAGE.into() +} +pub fn language_haskell() -> TSLanguage { + tree_sitter_haskell::LANGUAGE.into() +} +pub fn language_hcl() -> TSLanguage { + tree_sitter_hcl::LANGUAGE.into() +} +pub fn language_html() -> TSLanguage { + tree_sitter_html::LANGUAGE.into() +} +pub fn language_java() -> TSLanguage { + tree_sitter_java::LANGUAGE.into() +} +pub fn language_javascript() -> TSLanguage { + tree_sitter_javascript::LANGUAGE.into() +} +pub fn language_json() -> TSLanguage { + tree_sitter_json::LANGUAGE.into() +} +pub fn language_julia() -> TSLanguage { + tree_sitter_julia::LANGUAGE.into() +} +pub fn language_kotlin() -> TSLanguage { + tree_sitter_kotlin::LANGUAGE.into() +} +pub fn language_lua() -> TSLanguage { + tree_sitter_lua::LANGUAGE.into() +} +pub fn language_make() -> TSLanguage { + tree_sitter_make::LANGUAGE.into() +} +pub fn language_markdown() -> TSLanguage { + tree_sitter_md::LANGUAGE.into() +} +pub fn language_nix() -> TSLanguage { + tree_sitter_nix::LANGUAGE.into() +} +pub fn language_objc() -> TSLanguage { + tree_sitter_objc::LANGUAGE.into() +} +pub fn language_odin() -> TSLanguage { + tree_sitter_odin::LANGUAGE.into() +} +pub fn language_php() -> TSLanguage { + tree_sitter_php::LANGUAGE_PHP_ONLY.into() +} +pub fn language_python() -> TSLanguage { + tree_sitter_python::LANGUAGE.into() +} +pub fn language_regex() -> TSLanguage { + tree_sitter_regex::LANGUAGE.into() +} +pub fn language_ruby() -> TSLanguage { + tree_sitter_ruby::LANGUAGE.into() +} +pub fn language_rust() -> TSLanguage { + tree_sitter_rust::LANGUAGE.into() +} +pub fn language_scala() -> TSLanguage { + tree_sitter_scala::LANGUAGE.into() +} +pub fn language_solidity() -> TSLanguage { + tree_sitter_solidity::LANGUAGE.into() +} +pub fn language_starlark() -> TSLanguage { + tree_sitter_starlark::LANGUAGE.into() +} +pub fn language_swift() -> TSLanguage { + tree_sitter_swift::LANGUAGE.into() +} +pub fn language_toml() -> TSLanguage { + tree_sitter_toml_ng::LANGUAGE.into() +} +pub fn language_tsx() -> TSLanguage { + tree_sitter_typescript::LANGUAGE_TSX.into() +} +pub fn language_typescript() -> TSLanguage { + tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into() +} +pub fn language_verilog() -> TSLanguage { + tree_sitter_verilog::LANGUAGE.into() +} +pub fn language_xml() -> TSLanguage { + tree_sitter_xml::LANGUAGE_XML.into() +} +pub fn language_yaml() -> TSLanguage { + tree_sitter_yaml::LANGUAGE.into() +} +pub fn language_zig() -> TSLanguage { + tree_sitter_zig::LANGUAGE.into() +} diff --git a/native/crates/ast/src/lib.rs b/native/crates/ast/src/lib.rs new file mode 100644 index 000000000..18cc50a2d --- /dev/null +++ b/native/crates/ast/src/lib.rs @@ -0,0 +1,10 @@ +//! AST-aware structural search and rewrite for GSD. +//! +//! Provides `astGrep` (search) and `astEdit` (rewrite) N-API functions +//! powered by ast-grep with tree-sitter grammars for 38+ languages. + +#![allow(clippy::needless_pass_by_value)] + +pub mod ast; +pub mod glob_util; +pub mod language; diff --git a/native/crates/engine/Cargo.toml b/native/crates/engine/Cargo.toml index 90cb772b4..7ac7a8756 100644 --- a/native/crates/engine/Cargo.toml +++ b/native/crates/engine/Cargo.toml @@ -11,6 +11,7 @@ description = "N-API native addon for GSD — exposes high-performance Rust modu crate-type = ["cdylib"] [dependencies] +gsd-ast = { path = "../ast" } gsd-grep = { path = "../grep" } arboard = "3" image = { version = "0.25", default-features = false, features = ["png"] } diff --git a/native/crates/engine/src/ast.rs b/native/crates/engine/src/ast.rs new file mode 100644 index 000000000..2d2646332 --- /dev/null +++ b/native/crates/engine/src/ast.rs @@ -0,0 +1,6 @@ +//! N-API bindings for the AST module. +//! +//! Forces the linker to include `gsd_ast` so napi-rs ctor registrations +//! for `astGrep` and `astEdit` are linked into the cdylib. + +use gsd_ast as _; diff --git a/native/crates/engine/src/lib.rs b/native/crates/engine/src/lib.rs index 8ab224c6c..0f6736e4d 100644 --- a/native/crates/engine/src/lib.rs +++ b/native/crates/engine/src/lib.rs @@ -8,5 +8,6 @@ #![allow(clippy::needless_pass_by_value)] +mod ast; mod clipboard; mod grep; diff --git a/packages/native/package.json b/packages/native/package.json index a195cc0af..276ca324e 100644 --- a/packages/native/package.json +++ b/packages/native/package.json @@ -22,6 +22,10 @@ "./clipboard": { "types": "./src/clipboard/index.ts", "import": "./src/clipboard/index.ts" + }, + "./ast": { + "types": "./src/ast/index.ts", + "import": "./src/ast/index.ts" } }, "files": [ diff --git a/packages/native/src/ast/index.ts b/packages/native/src/ast/index.ts new file mode 100644 index 000000000..314d6d719 --- /dev/null +++ b/packages/native/src/ast/index.ts @@ -0,0 +1,67 @@ +/** + * AST-aware structural search and rewrite via ast-grep. + * + * Supports 38+ languages with tree-sitter grammars. + */ + +import { native } from "../native.js"; +import type { + AstFindOptions, + AstFindResult, + AstReplaceChange, + AstReplaceFileChange, + AstReplaceOptions, + AstReplaceResult, + AstFindMatch, +} from "./types.js"; + +export type { + AstFindMatch, + AstFindOptions, + AstFindResult, + AstReplaceChange, + AstReplaceFileChange, + AstReplaceOptions, + AstReplaceResult, +}; + +/** + * Structural code search using ast-grep patterns. + * + * Searches files for AST patterns across 38+ languages. Unlike regex, + * patterns match the syntax tree structure, ignoring whitespace and + * formatting differences. + * + * @example + * ```ts + * const result = astGrep({ + * patterns: ["console.log($$$ARGS)"], + * path: "./src", + * lang: "typescript", + * }); + * ``` + */ +export function astGrep(options: AstFindOptions): AstFindResult { + return (native as Record).astGrep(options) as AstFindResult; +} + +/** + * Structural code rewrite using ast-grep patterns. + * + * Applies pattern->replacement rewrites across files. Meta-variables + * ($VAR, $$$ARGS) captured in patterns are substituted in replacements. + * Defaults to dry-run mode -- set `dryRun: false` to write changes. + * + * @example + * ```ts + * const result = astEdit({ + * rewrites: { "console.log($$$ARGS)": "logger.info($$$ARGS)" }, + * path: "./src", + * lang: "typescript", + * dryRun: false, + * }); + * ``` + */ +export function astEdit(options: AstReplaceOptions): AstReplaceResult { + return (native as Record).astEdit(options) as AstReplaceResult; +} diff --git a/packages/native/src/ast/types.ts b/packages/native/src/ast/types.ts new file mode 100644 index 000000000..7a3b88182 --- /dev/null +++ b/packages/native/src/ast/types.ts @@ -0,0 +1,137 @@ +/** Options for structural AST search via ast-grep. */ +export interface AstFindOptions { + /** One or more ast-grep patterns to search for. */ + patterns: string[]; + /** Language to parse files as (e.g. "typescript", "python"). Inferred from extension when omitted. */ + lang?: string; + /** File or directory path to search. Defaults to cwd. */ + path?: string; + /** Glob filter for filenames (e.g. "**/*.ts"). */ + glob?: string; + /** AST node kind selector to narrow pattern scope. */ + selector?: string; + /** Match strictness: "cst", "smart", "ast", "relaxed", "signature". Defaults to "smart". */ + strictness?: string; + /** Maximum number of matches to return. Defaults to 50. */ + limit?: number; + /** Number of matches to skip before returning results. */ + offset?: number; + /** Include meta-variable bindings in results. */ + includeMeta?: boolean; + /** Lines of context around matches (reserved for future use). */ + context?: number; +} + +/** A single structural match from ast-grep search. */ +export interface AstFindMatch { + /** Relative file path. */ + path: string; + /** Matched source text. */ + text: string; + /** Byte offset of match start. */ + byteStart: number; + /** Byte offset of match end. */ + byteEnd: number; + /** 1-indexed start line. */ + startLine: number; + /** 1-indexed start column. */ + startColumn: number; + /** 1-indexed end line. */ + endLine: number; + /** 1-indexed end column. */ + endColumn: number; + /** Meta-variable bindings (when includeMeta is true). */ + metaVariables?: Record; +} + +/** Result of an ast-grep structural search. */ +export interface AstFindResult { + /** Matched nodes (paginated by limit/offset). */ + matches: AstFindMatch[]; + /** Total match count across all files. */ + totalMatches: number; + /** Number of files containing at least one match. */ + filesWithMatches: number; + /** Number of files searched. */ + filesSearched: number; + /** Whether more matches exist beyond the limit. */ + limitReached: boolean; + /** Parse errors encountered (non-fatal). */ + parseErrors?: string[]; +} + +/** Options for structural AST rewrite via ast-grep. */ +export interface AstReplaceOptions { + /** Map of pattern -> replacement. Meta-variables ($VAR) in replacements are substituted. */ + rewrites: Record; + /** Language to parse files as. Required when path/glob spans multiple languages. */ + lang?: string; + /** File or directory path. Defaults to cwd. */ + path?: string; + /** Glob filter for filenames. */ + glob?: string; + /** AST node kind selector. */ + selector?: string; + /** Match strictness. Defaults to "smart". */ + strictness?: string; + /** Preview changes without writing files. Defaults to true. */ + dryRun?: boolean; + /** Maximum total replacements. */ + maxReplacements?: number; + /** Maximum files to modify. */ + maxFiles?: number; + /** Fail on parse errors instead of skipping. */ + failOnParseError?: boolean; +} + +/** A single replacement change from ast-grep rewrite. */ +export interface AstReplaceChange { + /** Relative file path. */ + path: string; + /** Original source text. */ + before: string; + /** Replacement text. */ + after: string; + /** Byte offset of change start. */ + byteStart: number; + /** Byte offset of change end. */ + byteEnd: number; + /** Number of bytes deleted. */ + deletedLength: number; + /** 1-indexed start line. */ + startLine: number; + /** 1-indexed start column. */ + startColumn: number; + /** 1-indexed end line. */ + endLine: number; + /** 1-indexed end column. */ + endColumn: number; +} + +/** Per-file change summary. */ +export interface AstReplaceFileChange { + /** Relative file path. */ + path: string; + /** Number of replacements in this file. */ + count: number; +} + +/** Result of an ast-grep structural rewrite. */ +export interface AstReplaceResult { + /** Individual replacement changes. */ + changes: AstReplaceChange[]; + /** Per-file change summaries. */ + fileChanges: AstReplaceFileChange[]; + /** Total number of replacements. */ + totalReplacements: number; + /** Number of files modified. */ + filesTouched: number; + /** Number of files searched. */ + filesSearched: number; + /** Whether changes were written to disk (false when dryRun is true). */ + applied: boolean; + /** Whether limits stopped processing early. */ + limitReached: boolean; + /** Parse errors encountered (non-fatal). */ + parseErrors?: string[]; +} diff --git a/packages/native/src/index.ts b/packages/native/src/index.ts index 4e3737609..c3ebe2a61 100644 --- a/packages/native/src/index.ts +++ b/packages/native/src/index.ts @@ -23,3 +23,14 @@ export type { SearchOptions, SearchResult, } from "./grep/index.js"; + +export { astGrep, astEdit } from "./ast/index.js"; +export type { + AstFindMatch, + AstFindOptions, + AstFindResult, + AstReplaceChange, + AstReplaceFileChange, + AstReplaceOptions, + AstReplaceResult, +} from "./ast/index.js"; diff --git a/packages/native/src/native.ts b/packages/native/src/native.ts index 613fe3aea..3596c6124 100644 --- a/packages/native/src/native.ts +++ b/packages/native/src/native.ts @@ -46,4 +46,6 @@ export const native = loadNative() as { copyToClipboard: (text: string) => void; readTextFromClipboard: () => string | null; readImageFromClipboard: () => Promise; + astGrep: (options: unknown) => unknown; + astEdit: (options: unknown) => unknown; };