From df39cea85ee8e1587b1b1fbd8af864f55b9d5403 Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Fri, 13 Mar 2026 13:01:41 -0600 Subject: [PATCH] feat: add native ast module with ast-grep structural search and rewrite Adds the `gsd-ast` crate providing AST-aware code search (`astGrep`) and rewrite (`astEdit`) via ast-grep with tree-sitter grammars for 38+ languages. Replaces Oh My Pi's fs_cache/task dependencies with the `ignore` crate for .gitignore-respecting file walking. Includes TypeScript type declarations and wrappers in packages/native. Co-Authored-By: Claude Opus 4.6 (1M context) --- native/Cargo.lock | 586 +++++++++++++++++++++- native/crates/ast/Cargo.toml | 54 ++ native/crates/ast/src/ast.rs | 432 ++++++++++++++++ native/crates/ast/src/glob_util.rs | 116 +++++ native/crates/ast/src/language/mod.rs | 511 +++++++++++++++++++ native/crates/ast/src/language/parsers.rs | 118 +++++ native/crates/ast/src/lib.rs | 6 + native/crates/engine/Cargo.toml | 1 + native/crates/engine/src/ast.rs | 2 + native/crates/engine/src/lib.rs | 1 + packages/native/package.json | 6 +- packages/native/src/ast/index.ts | 12 + packages/native/src/ast/types.ts | 75 +++ packages/native/src/index.ts | 6 + packages/native/src/native.ts | 2 + 15 files changed, 1926 insertions(+), 2 deletions(-) create mode 100644 native/crates/ast/Cargo.toml create mode 100644 native/crates/ast/src/ast.rs create mode 100644 native/crates/ast/src/glob_util.rs create mode 100644 native/crates/ast/src/language/mod.rs create mode 100644 native/crates/ast/src/language/parsers.rs create mode 100644 native/crates/ast/src/lib.rs create mode 100644 native/crates/engine/src/ast.rs create mode 100644 packages/native/src/ast/index.ts create mode 100644 packages/native/src/ast/types.ts diff --git a/native/Cargo.lock b/native/Cargo.lock index 3c0803f89..d297ecc07 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -37,6 +37,18 @@ dependencies = [ "x11rb", ] +[[package]] +name = "ast-grep-core" +version = "0.39.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "057ae90e7256ebf85f840b1638268df0142c9d19467d500b790631fd301acc27" +dependencies = [ + "bit-set", + "regex", + "thiserror", + "tree-sitter", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -96,6 +108,16 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -178,7 +200,7 @@ checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ "cfg-if", "crossbeam-utils", - "hashbrown", + "hashbrown 0.14.5", "lock_api", "once_cell", "parking_lot_core", @@ -218,6 +240,12 @@ dependencies = [ "encoding_rs", ] +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "errno" version = "0.3.14" @@ -245,6 +273,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "fax" version = "0.2.6" @@ -274,6 +308,12 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "flate2" version = "1.1.9" @@ -350,6 +390,56 @@ dependencies = [ "memmap2", ] +[[package]] +name = "gsd-ast" +version = "0.1.0" +dependencies = [ + "ast-grep-core", + "globset", + "ignore", + "napi", + "napi-derive", + "phf", + "tree-sitter", + "tree-sitter-bash", + "tree-sitter-c", + "tree-sitter-c-sharp", + "tree-sitter-cpp", + "tree-sitter-css", + "tree-sitter-diff", + "tree-sitter-elixir", + "tree-sitter-go", + "tree-sitter-haskell", + "tree-sitter-hcl", + "tree-sitter-html", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-json", + "tree-sitter-julia", + "tree-sitter-kotlin-sg", + "tree-sitter-lua", + "tree-sitter-make", + "tree-sitter-md", + "tree-sitter-nix", + "tree-sitter-objc", + "tree-sitter-odin", + "tree-sitter-php", + "tree-sitter-python", + "tree-sitter-regex", + "tree-sitter-ruby", + "tree-sitter-rust", + "tree-sitter-scala", + "tree-sitter-solidity", + "tree-sitter-starlark", + "tree-sitter-swift", + "tree-sitter-toml-ng", + "tree-sitter-typescript", + "tree-sitter-verilog", + "tree-sitter-xml", + "tree-sitter-yaml", + "tree-sitter-zig", +] + [[package]] name = "gsd-engine" version = "0.1.0" @@ -357,6 +447,7 @@ dependencies = [ "arboard", "dashmap", "globset", + "gsd-ast", "gsd-grep", "ignore", "image", @@ -395,6 +486,12 @@ version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + [[package]] name = "ignore" version = "0.4.25" @@ -425,6 +522,22 @@ dependencies = [ "tiff", ] +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + [[package]] name = "libc" version = "0.2.183" @@ -671,6 +784,49 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros", + "phf_shared", + "serde", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + [[package]] name = "png" version = "0.18.1" @@ -835,18 +991,50 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "indexmap", + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "simd-adler32" version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + [[package]] name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "syn" version = "2.0.117" @@ -910,6 +1098,396 @@ dependencies = [ "zune-jpeg", ] +[[package]] +name = "tree-sitter" +version = "0.25.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-bash" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5ec769279cc91b561d3df0d8a5deb26b0ad40d183127f409494d6d8fc53062" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c-sharp" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67f06accca7b45351758663b8215089e643d53bd9a660ce0349314263737fcb0" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-css" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5cbc5e18f29a2c6d6435891f42569525cf95435a3e01c2f1947abcde178686f" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-diff" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfe1e5ca280a65dfe5ba4205c1bcc84edf486464fed315db53dee6da9a335889" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-elixir" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66dd064a762ed95bfc29857fa3cb7403bb1e5cb88112de0f6341b7e47284ba40" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-go" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8560a4d2f835cc0d4d2c2e03cbd0dde2f6114b43bc491164238d333e28b16ea" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-haskell" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "977c51e504548cba13fc27cb5a2edab2124cf6716a1934915d07ab99523b05a4" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-hcl" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a7b2cc3d7121553b84309fab9d11b3ff3d420403eef9ae50f9fd1cd9d9cf012" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-html" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261b708e5d92061ede329babaaa427b819329a9d427a1d710abb0f67bbef63ee" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-java" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-json" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86a5d6b3ea17e06e7a34aabeadd68f5866c0d0f9359155d432095f8b751865e4" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-julia" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4144731a178812ee867619b1e98b3b91e54c1652304b26e5ebe3175b701de323" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-kotlin-sg" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0e175b7530765d1e36ad234a7acaa8b2a3316153f239d724376c7ee5e8d8e98" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" + +[[package]] +name = "tree-sitter-lua" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb9adf0965fec58e7660cbb3a059dbb12ebeec9459e6dcbae3db004739641e" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-make" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5998dc7cbcbdab19fae8aefef982bf2d6544513d8d2e69cc44aec4c63810104" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-md" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2efd398be546456c814598ee56c0f51769a77241511b4a58077815d120afa882" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-nix" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4952a9733f3a98f6683a0ccd1035d84ab7a52f7e84eeed58548d86765ad92de3" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-objc" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca8bb556423fc176f0535e79d525f783a6684d3c9da81bf9d905303c129e1d2" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-odin" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24db210fe9ba2237c71c5030d7b146c7025420ba72dd8013d13cd822c3a8d77a" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-php" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c17c3ab69052c5eeaa7ff5cd972dd1bc25d1b97ee779fec391ad3b5df5592" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-python" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-regex" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd8a59be9f0ac131fd8f062eaaba14882b2fa5a6a7882a20134cb1d60df2e625" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-ruby" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9b18034c684a2420722be8b2a91c9c44f2546b631c039edf575ccba8c61be1" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-scala" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b4f354028b5fcf1d0c77f1c6d84cd5a579f29a1e43cb61551ec6580e9a99229" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-solidity" +version = "1.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eacf8875b70879f0cb670c60b233ad0b68752d9e1474e6c3ef168eea8a90b25" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-starlark" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8934f282d085cc4b9ee28aa688aa3fbe8aa3766201c2a6252f411d45b4c3a721" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-swift" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ef216011c3e3df4fa864736f347cb8d509b1066cf0c8549fb1fd81ac9832e59" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-toml-ng" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9adc2c898ae49730e857d75be403da3f92bb81d8e37a2f918a08dd10de5ebb1" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-verilog" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e7e0360395852f1f6ff5b7b82c72dc6557d181073188df1d60ec469ea69c66" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-xml" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e670041f591d994f54d597ddcd8f4ebc930e282c4c76a42268743b71f0c8b6b3" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-yaml" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53c223db85f05e34794f065454843b0668ebc15d240ada63e2b5939f43ce7c97" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-zig" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab11fc124851b0db4dd5e55983bbd9631192e93238389dcd44521715e5d53e28" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "unicode-ident" version = "1.0.24" @@ -1073,6 +1651,12 @@ dependencies = [ "syn", ] +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + [[package]] name = "zune-core" version = "0.5.1" diff --git a/native/crates/ast/Cargo.toml b/native/crates/ast/Cargo.toml new file mode 100644 index 000000000..91647fb32 --- /dev/null +++ b/native/crates/ast/Cargo.toml @@ -0,0 +1,54 @@ +[package] +name = "gsd-ast" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true +repository.workspace = true +description = "AST-aware structural search and rewrite via ast-grep for GSD native engine" + +[dependencies] +ast-grep-core = { version = "0.39", default-features = false, features = ["tree-sitter"] } +globset = "0.4" +ignore = "0.4" +napi = { version = "2", features = ["napi8"] } +napi-derive = "2" +phf = { version = "0.13", features = ["macros"] } +tree-sitter = "0.25" +tree-sitter-bash = "0.25" +tree-sitter-c = "0.24" +tree-sitter-c-sharp = "0.23" +tree-sitter-cpp = "0.23" +tree-sitter-css = "0.25" +tree-sitter-diff = "0.1" +tree-sitter-elixir = "0.3" +tree-sitter-go = "0.25" +tree-sitter-haskell = "0.23" +tree-sitter-hcl = "1.1" +tree-sitter-html = "0.23" +tree-sitter-java = "0.23" +tree-sitter-javascript = "0.25" +tree-sitter-json = "0.23" +tree-sitter-julia = "0.23" +tree-sitter-kotlin = { version = "0.4", package = "tree-sitter-kotlin-sg" } +tree-sitter-lua = "0.2" +tree-sitter-make = "1.1" +tree-sitter-md = "0.5" +tree-sitter-nix = "0.3" +tree-sitter-objc = "3.0" +tree-sitter-odin = "1.3" +tree-sitter-php = "0.24" +tree-sitter-python = "0.25" +tree-sitter-regex = "0.25" +tree-sitter-ruby = "0.23" +tree-sitter-rust = "0.24" +tree-sitter-scala = "0.24" +tree-sitter-solidity = "1.2" +tree-sitter-starlark = "1.3" +tree-sitter-swift = "0.7" +tree-sitter-toml-ng = "0.7" +tree-sitter-typescript = "0.23" +tree-sitter-verilog = "1.0" +tree-sitter-xml = "0.7" +tree-sitter-yaml = "0.7" +tree-sitter-zig = "1.1" diff --git a/native/crates/ast/src/ast.rs b/native/crates/ast/src/ast.rs new file mode 100644 index 000000000..8559af9f3 --- /dev/null +++ b/native/crates/ast/src/ast.rs @@ -0,0 +1,432 @@ +//! AST-aware structural search and rewrite powered by ast-grep. + +use std::{collections::{BTreeMap, BTreeSet, HashMap}, path::{Path, PathBuf}}; + +use ast_grep_core::{Language, MatchStrictness, matcher::Pattern, source::Edit, tree_sitter::LanguageExt}; +use ignore::WalkBuilder; +use napi::bindgen_prelude::*; +use napi_derive::napi; + +use crate::{glob_util, language::SupportLang}; + +const DEFAULT_FIND_LIMIT: u32 = 50; + +#[napi(object)] +pub struct AstFindOptions { + pub patterns: Option>, + pub lang: Option, + pub path: Option, + pub glob: Option, + pub selector: Option, + pub strictness: Option, + pub limit: Option, + pub offset: Option, + #[napi(js_name = "includeMeta")] + pub include_meta: Option, + pub context: Option, +} + +#[napi(object)] +pub struct AstFindMatch { + pub path: String, + pub text: String, + #[napi(js_name = "byteStart")] + pub byte_start: u32, + #[napi(js_name = "byteEnd")] + pub byte_end: u32, + #[napi(js_name = "startLine")] + pub start_line: u32, + #[napi(js_name = "startColumn")] + pub start_column: u32, + #[napi(js_name = "endLine")] + pub end_line: u32, + #[napi(js_name = "endColumn")] + pub end_column: u32, + #[napi(js_name = "metaVariables")] + pub meta_variables: Option>, +} + +#[napi(object)] +pub struct AstFindResult { + pub matches: Vec, + #[napi(js_name = "totalMatches")] + pub total_matches: u32, + #[napi(js_name = "filesWithMatches")] + pub files_with_matches: u32, + #[napi(js_name = "filesSearched")] + pub files_searched: u32, + #[napi(js_name = "limitReached")] + pub limit_reached: bool, + #[napi(js_name = "parseErrors")] + pub parse_errors: Option>, +} + +#[napi(object)] +pub struct AstReplaceOptions { + pub rewrites: Option>, + pub lang: Option, + pub path: Option, + pub glob: Option, + pub selector: Option, + pub strictness: Option, + #[napi(js_name = "dryRun")] + pub dry_run: Option, + #[napi(js_name = "maxReplacements")] + pub max_replacements: Option, + #[napi(js_name = "maxFiles")] + pub max_files: Option, + #[napi(js_name = "failOnParseError")] + pub fail_on_parse_error: Option, +} + +#[napi(object)] +pub struct AstReplaceChange { + pub path: String, pub before: String, pub after: String, + #[napi(js_name = "byteStart")] + pub byte_start: u32, + #[napi(js_name = "byteEnd")] + pub byte_end: u32, + #[napi(js_name = "deletedLength")] + pub deleted_length: u32, + #[napi(js_name = "startLine")] + pub start_line: u32, + #[napi(js_name = "startColumn")] + pub start_column: u32, + #[napi(js_name = "endLine")] + pub end_line: u32, + #[napi(js_name = "endColumn")] + pub end_column: u32, +} + +#[napi(object)] +pub struct AstReplaceFileChange { pub path: String, pub count: u32 } + +#[napi(object)] +pub struct AstReplaceResult { + pub changes: Vec, + #[napi(js_name = "fileChanges")] + pub file_changes: Vec, + #[napi(js_name = "totalReplacements")] + pub total_replacements: u32, + #[napi(js_name = "filesTouched")] + pub files_touched: u32, + #[napi(js_name = "filesSearched")] + pub files_searched: u32, + pub applied: bool, + #[napi(js_name = "limitReached")] + pub limit_reached: bool, + #[napi(js_name = "parseErrors")] + pub parse_errors: Option>, +} + +struct FileCandidate { absolute_path: PathBuf, display_path: String } +struct PendingFileChange { change: AstReplaceChange, edit: Edit } +fn to_u32(value: usize) -> u32 { value.min(u32::MAX as usize) as u32 } + +static LANG_ALIASES: phf::Map<&'static str, SupportLang> = phf::phf_map! { + "bash" => SupportLang::Bash, "sh" => SupportLang::Bash, + "c" => SupportLang::C, "cpp" => SupportLang::Cpp, "c++" => SupportLang::Cpp, + "cc" => SupportLang::Cpp, "cxx" => SupportLang::Cpp, + "csharp" => SupportLang::CSharp, "c#" => SupportLang::CSharp, "cs" => SupportLang::CSharp, + "css" => SupportLang::Css, "diff" => SupportLang::Diff, "patch" => SupportLang::Diff, + "elixir" => SupportLang::Elixir, "ex" => SupportLang::Elixir, + "go" => SupportLang::Go, "golang" => SupportLang::Go, + "haskell" => SupportLang::Haskell, "hs" => SupportLang::Haskell, + "hcl" => SupportLang::Hcl, "tf" => SupportLang::Hcl, "tfvars" => SupportLang::Hcl, "terraform" => SupportLang::Hcl, + "html" => SupportLang::Html, "htm" => SupportLang::Html, + "java" => SupportLang::Java, + "javascript" => SupportLang::JavaScript, "js" => SupportLang::JavaScript, + "jsx" => SupportLang::JavaScript, "mjs" => SupportLang::JavaScript, "cjs" => SupportLang::JavaScript, + "json" => SupportLang::Json, "julia" => SupportLang::Julia, "jl" => SupportLang::Julia, + "kotlin" => SupportLang::Kotlin, "kt" => SupportLang::Kotlin, + "lua" => SupportLang::Lua, "make" => SupportLang::Make, "makefile" => SupportLang::Make, + "markdown" => SupportLang::Markdown, "md" => SupportLang::Markdown, "mdx" => SupportLang::Markdown, + "nix" => SupportLang::Nix, "objc" => SupportLang::ObjC, "objective-c" => SupportLang::ObjC, + "odin" => SupportLang::Odin, "php" => SupportLang::Php, + "python" => SupportLang::Python, "py" => SupportLang::Python, + "regex" => SupportLang::Regex, "ruby" => SupportLang::Ruby, "rb" => SupportLang::Ruby, + "rust" => SupportLang::Rust, "rs" => SupportLang::Rust, + "scala" => SupportLang::Scala, "solidity" => SupportLang::Solidity, "sol" => SupportLang::Solidity, + "starlark" => SupportLang::Starlark, "star" => SupportLang::Starlark, + "swift" => SupportLang::Swift, "toml" => SupportLang::Toml, "tsx" => SupportLang::Tsx, + "typescript" => SupportLang::TypeScript, "ts" => SupportLang::TypeScript, + "mts" => SupportLang::TypeScript, "cts" => SupportLang::TypeScript, + "verilog" => SupportLang::Verilog, "systemverilog" => SupportLang::Verilog, "sv" => SupportLang::Verilog, + "xml" => SupportLang::Xml, "xsl" => SupportLang::Xml, "svg" => SupportLang::Xml, + "yaml" => SupportLang::Yaml, "yml" => SupportLang::Yaml, "zig" => SupportLang::Zig, +}; + +fn supported_lang_list() -> String { let mut keys: Vec<&str> = LANG_ALIASES.keys().copied().collect(); keys.sort_unstable(); keys.join(", ") } + +fn resolve_supported_lang(value: &str) -> Result { + let lower = value.to_ascii_lowercase(); + LANG_ALIASES.get(lower.as_str()).copied().ok_or_else(|| Error::from_reason(format!("Unsupported language '{value}'. Supported: {}", supported_lang_list()))) +} + +fn resolve_language(lang: Option<&str>, file_path: &Path) -> Result { + if let Some(lang) = lang.map(str::trim).filter(|l| !l.is_empty()) { return resolve_supported_lang(lang); } + SupportLang::from_path(file_path).ok_or_else(|| Error::from_reason(format!("Unable to infer language from file extension: {}. Specify `lang` explicitly.", file_path.display()))) +} + +fn is_supported_file(file_path: &Path, explicit_lang: Option<&str>) -> bool { + if explicit_lang.is_some() { return true; } + resolve_language(None, file_path).is_ok() +} + +fn infer_single_replace_lang(candidates: &[FileCandidate]) -> Result { + let mut inferred = BTreeSet::new(); + let mut unresolved = Vec::new(); + for c in candidates { + match resolve_language(None, &c.absolute_path) { + Ok(l) => { inferred.insert(l.canonical_name().to_string()); }, + Err(e) => unresolved.push(format!("{}: {}", c.display_path, e)), + } + } + if !unresolved.is_empty() { return Err(Error::from_reason(format!("`lang` is required for ast_edit when language cannot be inferred from all files:\n{}", unresolved.into_iter().map(|e| format!("- {e}")).collect::>().join("\n")))); } + if inferred.is_empty() { return Err(Error::from_reason("`lang` is required for ast_edit when no files match path/glob".to_string())); } + if inferred.len() > 1 { return Err(Error::from_reason(format!("`lang` is required for ast_edit when path/glob resolves to multiple languages: {}", inferred.into_iter().collect::>().join(", ")))); } + Ok(inferred.into_iter().next().unwrap()) +} + +fn parse_strictness(value: Option<&str>) -> Result { + let Some(raw) = value.map(str::trim).filter(|v| !v.is_empty()) else { return Ok(MatchStrictness::Smart) }; + raw.parse::().map_err(|e| Error::from_reason(format!("Invalid strictness '{raw}': {e}"))) +} + +fn normalize_search_path(path: Option) -> Result { + let raw = path.unwrap_or_else(|| ".".into()); + let candidate = PathBuf::from(raw.trim()); + let absolute = if candidate.is_absolute() { candidate } else { std::env::current_dir().map_err(|e| Error::from_reason(format!("Failed to resolve cwd: {e}")))?.join(candidate) }; + Ok(std::fs::canonicalize(&absolute).unwrap_or(absolute)) +} + +fn collect_candidates(path: Option, glob: Option<&str>) -> Result> { + let search_path = normalize_search_path(path)?; + let metadata = std::fs::metadata(&search_path).map_err(|e| Error::from_reason(format!("Path not found: {e}")))?; + if metadata.is_file() { + let display_path = search_path.file_name().and_then(|n| n.to_str()).map_or_else(|| search_path.to_string_lossy().into_owned(), |s| s.to_string()); + return Ok(vec![FileCandidate { absolute_path: search_path, display_path }]); + } + if !metadata.is_dir() { return Err(Error::from_reason(format!("Search path must be a file or directory: {}", search_path.display()))); } + let glob_set = glob_util::try_compile_glob(glob, false)?; + let mentions_node_modules = glob.is_some_and(|v| v.contains("node_modules")); + let walker = WalkBuilder::new(&search_path).hidden(true).git_ignore(true).git_global(true).git_exclude(true).build(); + let mut files = Vec::new(); + for entry in walker { + let entry = match entry { Ok(e) => e, Err(_) => continue }; + if !entry.file_type().is_some_and(|ft| ft.is_file()) { continue; } + let abs = entry.path().to_path_buf(); + let relative = abs.strip_prefix(&search_path).map(|p| p.to_string_lossy().replace('\\', "/")).unwrap_or_else(|_| abs.to_string_lossy().into_owned()); + if !mentions_node_modules && relative.contains("node_modules") { continue; } + if let Some(ref gs) = glob_set { if !gs.is_match(&relative) { continue; } } + files.push(FileCandidate { absolute_path: abs, display_path: relative }); + } + files.sort_by(|a, b| a.display_path.cmp(&b.display_path)); + Ok(files) +} + +fn compile_pattern(pattern: &str, selector: Option<&str>, strictness: &MatchStrictness, lang: SupportLang) -> Result { + let mut compiled = if let Some(sel) = selector.map(str::trim).filter(|s| !s.is_empty()) { Pattern::contextual(pattern, sel, lang) } else { Pattern::try_new(pattern, lang) } + .map_err(|e| Error::from_reason(format!("Invalid pattern: {e}")))?; + compiled.strictness = strictness.clone(); + Ok(compiled) +} + +fn apply_edits(content: &str, edits: &[Edit]) -> Result { + let mut sorted: Vec<&Edit> = edits.iter().collect(); + sorted.sort_by_key(|e| e.position); + let mut prev_end = 0usize; + for edit in &sorted { if edit.position < prev_end { return Err(Error::from_reason("Overlapping replacements detected".to_string())); } prev_end = edit.position.saturating_add(edit.deleted_length); } + let mut output = content.to_string(); + for edit in sorted.into_iter().rev() { + let start = edit.position; let end = edit.position.saturating_add(edit.deleted_length); + if end > output.len() || start > end { return Err(Error::from_reason("Computed edit range is out of bounds".to_string())); } + let replacement = String::from_utf8(edit.inserted_text.clone()).map_err(|e| Error::from_reason(format!("Replacement text is not valid UTF-8: {e}")))?; + output.replace_range(start..end, &replacement); + } + Ok(output) +} + +fn normalize_pattern_list(patterns: Option>) -> Result> { + let mut normalized = Vec::new(); let mut seen = BTreeSet::new(); + for raw in patterns.unwrap_or_default() { let p = raw.trim(); if !p.is_empty() && seen.insert(p.to_string()) { normalized.push(p.to_string()); } } + if normalized.is_empty() { return Err(Error::from_reason("`patterns` is required and must include at least one non-empty pattern".to_string())); } + Ok(normalized) +} + +fn normalize_rewrite_map(rewrites: Option>) -> Result> { + let mut normalized = Vec::new(); + for (p, r) in rewrites.unwrap_or_default() { if p.is_empty() { return Err(Error::from_reason("`rewrites` keys must be non-empty".to_string())); } normalized.push((p, r)); } + if normalized.is_empty() { return Err(Error::from_reason("`rewrites` is required".to_string())); } + normalized.sort_by(|l, r| l.0.cmp(&r.0)); Ok(normalized) +} + +struct CompiledFindPattern { pattern: String, compiled_by_lang: HashMap, compile_errors_by_lang: HashMap } +struct ResolvedCandidate { candidate: FileCandidate, language: Option, language_error: Option } + +fn resolve_candidates_for_find(candidates: Vec, lang: Option<&str>) -> Result<(Vec, HashMap)> { + let mut resolved = Vec::with_capacity(candidates.len()); let mut languages = HashMap::new(); + for candidate in candidates { + match resolve_language(lang, &candidate.absolute_path) { + Ok(language) => { languages.entry(language.canonical_name().to_string()).or_insert(language); resolved.push(ResolvedCandidate { candidate, language: Some(language), language_error: None }); }, + Err(err) => resolved.push(ResolvedCandidate { candidate, language: None, language_error: Some(err.to_string()) }), + } + } + Ok((resolved, languages)) +} + +fn compile_find_patterns(patterns: &[String], languages: &HashMap, selector: Option<&str>, strictness: &MatchStrictness) -> Result> { + let mut compiled = Vec::with_capacity(patterns.len()); + for pattern in patterns { + let mut by_lang = HashMap::with_capacity(languages.len()); let mut errors = HashMap::new(); + for (key, &lang) in languages { match compile_pattern(pattern, selector, strictness, lang) { Ok(p) => { by_lang.insert(key.clone(), p); }, Err(e) => { errors.insert(key.clone(), e.to_string()); } } } + compiled.push(CompiledFindPattern { pattern: pattern.clone(), compiled_by_lang: by_lang, compile_errors_by_lang: errors }); + } + Ok(compiled) +} + +#[napi(js_name = "astGrep")] +pub fn ast_grep(options: AstFindOptions) -> Result { + let AstFindOptions { patterns, lang, path, glob, selector, strictness, limit, offset, include_meta, context: _ } = options; + let normalized_limit = limit.unwrap_or(DEFAULT_FIND_LIMIT).max(1); + let normalized_offset = offset.unwrap_or(0); + let patterns = normalize_pattern_list(patterns)?; + let strictness = parse_strictness(strictness.as_deref())?; + let include_meta = include_meta.unwrap_or(false); + let lang_str = lang.as_deref().map(str::trim).filter(|v| !v.is_empty()); + let candidates: Vec<_> = collect_candidates(path, glob.as_deref())?.into_iter().filter(|c| is_supported_file(&c.absolute_path, lang_str)).collect(); + let (resolved_candidates, languages) = resolve_candidates_for_find(candidates, lang_str)?; + let compiled_patterns = compile_find_patterns(&patterns, &languages, selector.as_deref(), &strictness)?; + let files_searched = to_u32(resolved_candidates.len()); + let mut all_matches = Vec::new(); let mut parse_errors = Vec::new(); let mut total_matches = 0u32; let mut files_with_matches = BTreeSet::new(); + for resolved in resolved_candidates { + let ResolvedCandidate { candidate, language, language_error } = resolved; + if let Some(error) = language_error.as_deref() { for c in &compiled_patterns { parse_errors.push(format!("{}: {}: {error}", c.pattern, candidate.display_path)); } continue; } + let Some(language) = language else { continue }; + let lang_key = language.canonical_name(); + let source = match std::fs::read_to_string(&candidate.absolute_path) { Ok(s) => s, Err(e) => { for c in &compiled_patterns { parse_errors.push(format!("{}: {}: {e}", c.pattern, candidate.display_path)); } continue; } }; + let mut runnable: Vec<(&str, &Pattern)> = Vec::new(); + for c in &compiled_patterns { + if let Some(e) = c.compile_errors_by_lang.get(lang_key) { parse_errors.push(format!("{}: {}: {e}", c.pattern, candidate.display_path)); continue; } + if let Some(p) = c.compiled_by_lang.get(lang_key) { runnable.push((c.pattern.as_str(), p)); } + } + if runnable.is_empty() { continue; } + let ast = language.ast_grep(source); + if ast.root().dfs().any(|node| node.is_error()) { parse_errors.push(format!("{}: parse error (syntax tree contains error nodes)", candidate.display_path)); } + for (_, pattern) in runnable { + for matched in ast.root().find_all(pattern.clone()) { + total_matches = total_matches.saturating_add(1); + let range = matched.range(); let start = matched.start_pos(); let end = matched.end_pos(); + let meta_variables = if include_meta { Some(HashMap::::from(matched.get_env().clone())) } else { None }; + all_matches.push(AstFindMatch { path: candidate.display_path.clone(), text: matched.text().into_owned(), byte_start: to_u32(range.start), byte_end: to_u32(range.end), start_line: to_u32(start.line().saturating_add(1)), start_column: to_u32(start.column(matched.get_node()).saturating_add(1)), end_line: to_u32(end.line().saturating_add(1)), end_column: to_u32(end.column(matched.get_node()).saturating_add(1)), meta_variables }); + files_with_matches.insert(candidate.display_path.clone()); + } + } + } + all_matches.sort_by(|l, r| l.path.cmp(&r.path).then(l.start_line.cmp(&r.start_line)).then(l.start_column.cmp(&r.start_column))); + let visible: Vec<_> = all_matches.into_iter().skip(normalized_offset as usize).collect(); + let limit_reached = visible.len() > normalized_limit as usize; + let matches: Vec<_> = visible.into_iter().take(normalized_limit as usize).collect(); + Ok(AstFindResult { matches, total_matches, files_with_matches: to_u32(files_with_matches.len()), files_searched, limit_reached, parse_errors: (!parse_errors.is_empty()).then_some(parse_errors) }) +} + +#[napi(js_name = "astEdit")] +pub fn ast_edit(options: AstReplaceOptions) -> Result { + let AstReplaceOptions { rewrites, lang, path, glob, selector, strictness, dry_run, max_replacements, max_files, fail_on_parse_error } = options; + let rewrite_rules = normalize_rewrite_map(rewrites)?; + let strictness = parse_strictness(strictness.as_deref())?; + let dry_run = dry_run.unwrap_or(true); let max_replacements = max_replacements.unwrap_or(u32::MAX).max(1); let max_files = max_files.unwrap_or(u32::MAX).max(1); let fail_on_parse_error = fail_on_parse_error.unwrap_or(false); + let lang_str = lang.as_deref().map(str::trim).filter(|v| !v.is_empty()); + let candidates: Vec<_> = collect_candidates(path, glob.as_deref())?.into_iter().filter(|c| is_supported_file(&c.absolute_path, lang_str)).collect(); + let effective_lang = if let Some(l) = lang_str { l.to_string() } else { infer_single_replace_lang(&candidates)? }; + let language = resolve_supported_lang(&effective_lang)?; + let mut parse_errors = Vec::new(); let mut compiled_rules = Vec::new(); + for (pattern, rewrite) in rewrite_rules { + match compile_pattern(&pattern, selector.as_deref(), &strictness, language) { Ok(c) => compiled_rules.push((pattern, rewrite, c)), Err(e) => { if fail_on_parse_error { return Err(e); } parse_errors.push(format!("{pattern}: {e}")); } } + } + if compiled_rules.is_empty() { return Ok(AstReplaceResult { file_changes: vec![], total_replacements: 0, files_touched: 0, files_searched: to_u32(candidates.len()), applied: !dry_run, limit_reached: false, parse_errors: (!parse_errors.is_empty()).then_some(parse_errors), changes: vec![] }); } + let mut changes = Vec::new(); let mut file_counts: BTreeMap = BTreeMap::new(); let mut files_touched = 0u32; let mut limit_reached = false; + for candidate in &candidates { + let source = match std::fs::read_to_string(&candidate.absolute_path) { Ok(s) => s, Err(e) => { if fail_on_parse_error { return Err(Error::from_reason(format!("{}: {e}", candidate.display_path))); } parse_errors.push(format!("{}: {e}", candidate.display_path)); continue; } }; + let ast = language.ast_grep(&source); + if ast.root().dfs().any(|n| n.is_error()) { let msg = format!("{}: parse error (syntax tree contains error nodes)", candidate.display_path); if fail_on_parse_error { return Err(Error::from_reason(msg)); } parse_errors.push(msg); continue; } + let mut file_changes = Vec::new(); let mut reached_max = false; + 'patterns: for (_pat, rewrite, compiled) in &compiled_rules { + for matched in ast.root().find_all(compiled.clone()) { + if changes.len() + file_changes.len() >= max_replacements as usize { limit_reached = true; reached_max = true; break 'patterns; } + let edit = matched.replace_by(rewrite.as_str()); let range = matched.range(); let start = matched.start_pos(); let end = matched.end_pos(); + let after = String::from_utf8(edit.inserted_text.clone()).map_err(|e| Error::from_reason(format!("{}: replacement not valid UTF-8: {e}", candidate.display_path)))?; + file_changes.push(PendingFileChange { change: AstReplaceChange { path: candidate.display_path.clone(), before: matched.text().into_owned(), after, byte_start: to_u32(range.start), byte_end: to_u32(range.end), deleted_length: to_u32(edit.deleted_length), start_line: to_u32(start.line().saturating_add(1)), start_column: to_u32(start.column(matched.get_node()).saturating_add(1)), end_line: to_u32(end.line().saturating_add(1)), end_column: to_u32(end.column(matched.get_node()).saturating_add(1)) }, edit }); + } + } + if file_changes.is_empty() { if reached_max { break; } continue; } + if files_touched >= max_files { limit_reached = true; break; } + files_touched = files_touched.saturating_add(1); + file_counts.insert(candidate.display_path.clone(), to_u32(file_changes.len())); + if !dry_run { + let edits: Vec> = file_changes.iter().map(|e| Edit { position: e.edit.position, deleted_length: e.edit.deleted_length, inserted_text: e.edit.inserted_text.clone() }).collect(); + let output = apply_edits(&source, &edits)?; + if output != source { std::fs::write(&candidate.absolute_path, output).map_err(|e| Error::from_reason(format!("Failed to write {}: {e}", candidate.display_path)))?; } + } + changes.extend(file_changes.into_iter().map(|e| e.change)); + if reached_max { break; } + } + let file_changes: Vec<_> = file_counts.into_iter().map(|(p, c)| AstReplaceFileChange { path: p, count: c }).collect(); + Ok(AstReplaceResult { file_changes, total_replacements: to_u32(changes.len()), files_touched, files_searched: to_u32(candidates.len()), applied: !dry_run, limit_reached, parse_errors: (!parse_errors.is_empty()).then_some(parse_errors), changes }) +} + +#[cfg(test)] +mod tests { + use std::{fs, path::PathBuf, time::{SystemTime, UNIX_EPOCH}}; + use super::*; + struct TempTree { root: PathBuf } + impl Drop for TempTree { fn drop(&mut self) { let _ = fs::remove_dir_all(&self.root); } } + fn make_temp_tree() -> TempTree { + let unique = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos(); + let root = std::env::temp_dir().join(format!("gsd-ast-test-{unique}")); + fs::create_dir_all(root.join("nested")).unwrap(); + fs::write(root.join("a.ts"), "const a = 1;\n").unwrap(); + fs::write(root.join("nested").join("b.ts"), "const b = 2;\n").unwrap(); + TempTree { root } + } + #[test] + fn resolves_supported_language_aliases() { + assert_eq!(resolve_supported_lang("ts").ok(), Some(SupportLang::TypeScript)); + assert_eq!(resolve_supported_lang("rs").ok(), Some(SupportLang::Rust)); + assert!(resolve_supported_lang("brainfuck").is_err()); + } + #[test] + fn applies_non_overlapping_edits() { + let edits = vec![Edit:: { position: 6, deleted_length: 6, inserted_text: b"value".to_vec() }, Edit:: { position: 15, deleted_length: 2, inserted_text: b"42".to_vec() }]; + assert_eq!(apply_edits("const answer = 41;", &edits).unwrap(), "const value = 42;"); + } + #[test] + fn rejects_overlapping_edits() { + let edits = vec![Edit:: { position: 1, deleted_length: 3, inserted_text: b"x".to_vec() }, Edit:: { position: 2, deleted_length: 1, inserted_text: b"y".to_vec() }]; + assert!(apply_edits("abcdef", &edits).is_err()); + } + #[test] + fn collect_candidates_finds_files() { + let tree = make_temp_tree(); + let candidates = collect_candidates(Some(tree.root.to_string_lossy().into_owned()), None).unwrap(); + let paths: Vec<_> = candidates.iter().map(|f| f.display_path.as_str()).collect(); + assert!(paths.contains(&"a.ts") && paths.contains(&"nested/b.ts")); + } + #[test] + fn infers_single_replace_lang() { + let tree = make_temp_tree(); + let candidates = collect_candidates(Some(tree.root.to_string_lossy().into_owned()), Some("**/*.ts")).unwrap(); + assert_eq!(infer_single_replace_lang(&candidates).unwrap(), "typescript"); + } + #[test] + fn rejects_mixed_replace_lang() { + let unique = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos(); + let root = std::env::temp_dir().join(format!("gsd-ast-mixed-{unique}")); + fs::create_dir_all(&root).unwrap(); + fs::write(root.join("a.ts"), "const a = 1;\n").unwrap(); + fs::write(root.join("b.rs"), "fn main() {}\n").unwrap(); + let candidates = collect_candidates(Some(root.to_string_lossy().into_owned()), None).unwrap(); + assert!(infer_single_replace_lang(&candidates).unwrap_err().to_string().contains("multiple languages")); + let _ = fs::remove_dir_all(&root); + } +} diff --git a/native/crates/ast/src/glob_util.rs b/native/crates/ast/src/glob_util.rs new file mode 100644 index 000000000..4ce59b548 --- /dev/null +++ b/native/crates/ast/src/glob_util.rs @@ -0,0 +1,116 @@ +//! Shared glob-pattern helpers used by both [`crate::glob`] and +//! [`crate::grep`]. + +use globset::{GlobBuilder, GlobSet, GlobSetBuilder}; +use napi::bindgen_prelude::*; + +/// Normalize a raw glob string: fix path separators, optionally prepend `**/` +/// for recursive matching, and close any unclosed `{` alternation groups. +pub fn build_glob_pattern(glob: &str, recursive: bool) -> String { + let normalized = glob.replace('\\', "/"); + let pattern = if !recursive || normalized.contains('/') || normalized.starts_with("**") { + normalized + } else { + format!("**/{normalized}") + }; + fix_unclosed_braces(pattern) +} + +/// Compile a glob pattern string into a [`GlobSet`]. +/// +/// When `recursive` is true, simple patterns (no path separators, no leading +/// `**`) are automatically prefixed with `**/`. +pub fn compile_glob(glob: &str, recursive: bool) -> Result { + let mut builder = GlobSetBuilder::new(); + let pattern = build_glob_pattern(glob, recursive); + let glob = GlobBuilder::new(&pattern) + .literal_separator(true) + .build() + .map_err(|err| Error::from_reason(format!("Invalid glob pattern: {err}")))?; + builder.add(glob); + builder + .build() + .map_err(|err| Error::from_reason(format!("Failed to build glob matcher: {err}"))) +} + +/// Like [`compile_glob`], but accepts an `Option<&str>` — returns `Ok(None)` +/// when the input is `None`, empty, or whitespace-only. +pub fn try_compile_glob(glob: Option<&str>, recursive: bool) -> Result> { + let Some(glob) = glob.map(str::trim).filter(|v| !v.is_empty()) else { + return Ok(None); + }; + compile_glob(glob, recursive).map(Some) +} + +/// Close unclosed `{` alternation groups in a glob pattern. +/// +/// LLMs occasionally produce patterns like `*.{ts,js` without the closing `}`. +/// Rather than failing, we append the missing braces. +fn fix_unclosed_braces(pattern: String) -> String { + let opens = pattern.chars().filter(|&c| c == '{').count(); + let closes = pattern.chars().filter(|&c| c == '}').count(); + if opens > closes { + let mut fixed = pattern; + for _ in 0..(opens - closes) { + fixed.push('}'); + } + fixed + } else { + pattern + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn simple_pattern_gets_recursive_prefix() { + assert_eq!(build_glob_pattern("*.ts", true), "**/*.ts"); + } + + #[test] + fn pattern_with_path_stays_as_is() { + assert_eq!(build_glob_pattern("src/*.ts", true), "src/*.ts"); + } + + #[test] + fn already_recursive_pattern_unchanged() { + assert_eq!(build_glob_pattern("**/*.rs", true), "**/*.rs"); + } + + #[test] + fn non_recursive_keeps_simple_pattern() { + assert_eq!(build_glob_pattern("*.ts", false), "*.ts"); + } + + #[test] + fn backslashes_normalized() { + assert_eq!(build_glob_pattern("src\\**\\*.ts", true), "src/**/*.ts"); + } + + #[test] + fn unclosed_brace_gets_closed() { + assert_eq!(build_glob_pattern("*.{ts,tsx,js", true), "**/*.{ts,tsx,js}"); + } + + #[test] + fn deeply_unclosed_braces_all_closed() { + assert_eq!(build_glob_pattern("{a,{b,c}", true), "**/{a,{b,c}}"); + } + + #[test] + fn balanced_braces_unchanged() { + assert_eq!(build_glob_pattern("*.{ts,js}", true), "**/*.{ts,js}"); + } + + #[test] + fn compile_glob_accepts_valid_pattern() { + assert!(compile_glob("*.ts", true).is_ok()); + } + + #[test] + fn compile_glob_fixes_unclosed_brace() { + assert!(compile_glob("*.{ts,tsx,js", true).is_ok()); + } +} diff --git a/native/crates/ast/src/language/mod.rs b/native/crates/ast/src/language/mod.rs new file mode 100644 index 000000000..51f180665 --- /dev/null +++ b/native/crates/ast/src/language/mod.rs @@ -0,0 +1,511 @@ +//! Vendored and extended language definitions for ast-grep integration. +//! +//! Originally derived from `ast-grep-language` v0.39.9, stripped of +//! serde/ignore machinery, and extended with additional languages. + +mod parsers; + +use std::{borrow::Cow, collections::HashMap, fmt, path::Path}; + +use ast_grep_core::{ + Doc, Language, Node, + matcher::{KindMatcher, Pattern, PatternBuilder, PatternError}, + meta_var::MetaVariable, + tree_sitter::{LanguageExt, StrDoc, TSLanguage, TSRange}, +}; + +/// Implements a stub language (no expando / `pre_process_pattern` needed). +/// Use when the language grammar accepts `$VAR` as valid identifiers. +macro_rules! impl_lang { + ($lang:ident, $func:ident) => { + #[derive(Clone, Copy, Debug)] + pub struct $lang; + impl Language for $lang { + fn kind_to_id(&self, kind: &str) -> u16 { + self.get_ts_language().id_for_node_kind(kind, true) + } + + fn field_to_id(&self, field: &str) -> Option { + self + .get_ts_language() + .field_id_for_name(field) + .map(|f| f.get()) + } + + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + builder.build(|src| StrDoc::try_new(src, *self)) + } + } + impl LanguageExt for $lang { + fn get_ts_language(&self) -> TSLanguage { + parsers::$func().into() + } + } + }; +} + +fn pre_process_pattern(expando: char, query: &str) -> Cow<'_, str> { + let mut ret = Vec::with_capacity(query.len()); + let mut dollar_count = 0; + for c in query.chars() { + if c == '$' { + dollar_count += 1; + continue; + } + let need_replace = matches!(c, 'A'..='Z' | '_') || dollar_count == 3; + let sigil = if need_replace { expando } else { '$' }; + ret.extend(std::iter::repeat_n(sigil, dollar_count)); + dollar_count = 0; + ret.push(c); + } + let sigil = if dollar_count == 3 { expando } else { '$' }; + ret.extend(std::iter::repeat_n(sigil, dollar_count)); + Cow::Owned(ret.into_iter().collect()) +} + +/// Implements a language with `expando_char` / `pre_process_pattern`. +/// Use when the language does NOT accept `$` as a valid identifier character. +macro_rules! impl_lang_expando { + ($lang:ident, $func:ident, $char:expr) => { + #[derive(Clone, Copy, Debug)] + pub struct $lang; + impl Language for $lang { + fn kind_to_id(&self, kind: &str) -> u16 { + self.get_ts_language().id_for_node_kind(kind, true) + } + + fn field_to_id(&self, field: &str) -> Option { + self + .get_ts_language() + .field_id_for_name(field) + .map(|f| f.get()) + } + + fn expando_char(&self) -> char { + $char + } + + fn pre_process_pattern<'q>(&self, query: &'q str) -> Cow<'q, str> { + pre_process_pattern(self.expando_char(), query) + } + + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + builder.build(|src| StrDoc::try_new(src, *self)) + } + } + impl LanguageExt for $lang { + fn get_ts_language(&self) -> TSLanguage { + parsers::$func().into() + } + } + }; +} + +// ── Customized languages with expando_char ────────────────────────────── + +impl_lang_expando!(C, language_c, '𐀀'); +impl_lang_expando!(Cpp, language_cpp, '𐀀'); +impl_lang_expando!(CSharp, language_c_sharp, 'µ'); +impl_lang_expando!(Css, language_css, '_'); +impl_lang_expando!(Elixir, language_elixir, 'µ'); +impl_lang_expando!(Go, language_go, 'µ'); +impl_lang_expando!(Haskell, language_haskell, 'µ'); +impl_lang_expando!(Hcl, language_hcl, 'µ'); +impl_lang_expando!(Kotlin, language_kotlin, 'µ'); +impl_lang_expando!(Nix, language_nix, '_'); +impl_lang_expando!(Php, language_php, 'µ'); +impl_lang_expando!(Python, language_python, 'µ'); +impl_lang_expando!(Ruby, language_ruby, 'µ'); +impl_lang_expando!(Rust, language_rust, 'µ'); +impl_lang_expando!(Swift, language_swift, 'µ'); + +// New expando languages +impl_lang_expando!(Make, language_make, 'µ'); +impl_lang_expando!(ObjC, language_objc, '𐀀'); +impl_lang_expando!(Starlark, language_starlark, 'µ'); +impl_lang_expando!(Odin, language_odin, 'µ'); +impl_lang_expando!(Julia, language_julia, 'µ'); +impl_lang_expando!(Verilog, language_verilog, 'µ'); +impl_lang_expando!(Zig, language_zig, 'µ'); + +// ── Stub languages ($ accepted in grammar) ────────────────────────────── + +impl_lang!(Bash, language_bash); +impl_lang!(Java, language_java); +impl_lang!(JavaScript, language_javascript); +impl_lang!(Json, language_json); +impl_lang!(Lua, language_lua); +impl_lang!(Scala, language_scala); +impl_lang!(Solidity, language_solidity); +impl_lang!(Tsx, language_tsx); +impl_lang!(TypeScript, language_typescript); +impl_lang!(Yaml, language_yaml); + +// New stub languages +impl_lang!(Markdown, language_markdown); +impl_lang!(Toml, language_toml); +impl_lang!(Diff, language_diff); +impl_lang!(Xml, language_xml); +impl_lang!(Regex, language_regex); + +// ── Html (custom implementation with injection support) ────────────────── + +#[derive(Clone, Copy, Debug)] +pub struct Html; + +impl Language for Html { + fn expando_char(&self) -> char { + 'z' + } + + fn pre_process_pattern<'q>(&self, query: &'q str) -> Cow<'q, str> { + pre_process_pattern(self.expando_char(), query) + } + + fn kind_to_id(&self, kind: &str) -> u16 { + self.get_ts_language().id_for_node_kind(kind, true) + } + + fn field_to_id(&self, field: &str) -> Option { + self + .get_ts_language() + .field_id_for_name(field) + .map(|f| f.get()) + } + + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + builder.build(|src| StrDoc::try_new(src, *self)) + } +} + +impl LanguageExt for Html { + fn get_ts_language(&self) -> TSLanguage { + parsers::language_html() + } + + fn injectable_languages(&self) -> Option<&'static [&'static str]> { + Some(&["css", "js", "ts", "tsx", "scss", "less", "stylus", "coffee"]) + } + + fn extract_injections( + &self, + root: Node>, + ) -> HashMap> { + let lang = root.lang(); + let mut map = HashMap::new(); + let matcher = KindMatcher::new("script_element", lang.clone()); + for script in root.find_all(matcher) { + let injected = find_html_lang(&script).unwrap_or_else(|| "js".into()); + let content = script.children().find(|c| c.kind() == "raw_text"); + if let Some(content) = content { + map.entry(injected) + .or_insert_with(Vec::new) + .push(node_to_range(&content)); + } + } + let matcher = KindMatcher::new("style_element", lang.clone()); + for style in root.find_all(matcher) { + let injected = find_html_lang(&style).unwrap_or_else(|| "css".into()); + let content = style.children().find(|c| c.kind() == "raw_text"); + if let Some(content) = content { + map.entry(injected) + .or_insert_with(Vec::new) + .push(node_to_range(&content)); + } + } + map + } +} + +fn find_html_lang(node: &Node) -> Option { + let html = node.lang(); + let attr_matcher = KindMatcher::new("attribute", html.clone()); + let name_matcher = KindMatcher::new("attribute_name", html.clone()); + let val_matcher = KindMatcher::new("attribute_value", html.clone()); + node.find_all(attr_matcher).find_map(|attr| { + let name = attr.find(&name_matcher)?; + if name.text() != "lang" { + return None; + } + let val = attr.find(&val_matcher)?; + Some(val.text().to_string()) + }) +} + +fn node_to_range(node: &Node) -> TSRange { + let r = node.range(); + let start = node.start_pos(); + let sp = start.byte_point(); + let sp = tree_sitter::Point::new(sp.0, sp.1); + let end = node.end_pos(); + let ep = end.byte_point(); + let ep = tree_sitter::Point::new(ep.0, ep.1); + TSRange { start_byte: r.start, end_byte: r.end, start_point: sp, end_point: ep } +} + +// ── SupportLang enum ──────────────────────────────────────────────────── + +/// All supported languages for ast-grep structural search/replace. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum SupportLang { + Bash, + C, + Cpp, + CSharp, + Css, + Diff, + Elixir, + Go, + Haskell, + Hcl, + Html, + Java, + JavaScript, + Json, + Julia, + Kotlin, + Lua, + Make, + Markdown, + Nix, + ObjC, + Odin, + Php, + Python, + Regex, + Ruby, + Rust, + Scala, + Solidity, + Starlark, + Swift, + Toml, + Tsx, + TypeScript, + Verilog, + Xml, + Yaml, + Zig, +} + +impl SupportLang { + pub const fn all_langs() -> &'static [Self] { + use SupportLang::*; + &[ + Bash, C, Cpp, CSharp, Css, Diff, Elixir, Go, Haskell, Hcl, Html, Java, JavaScript, Json, + Julia, Kotlin, Lua, Make, Markdown, Nix, ObjC, Odin, Php, Python, Regex, Ruby, Rust, + Scala, Solidity, Starlark, Swift, Toml, Tsx, TypeScript, Verilog, Xml, Yaml, Zig, + ] + } + + /// The canonical lowercase name used as a stable key in alias maps, + /// file-type inference results, and error messages. + pub const fn canonical_name(self) -> &'static str { + match self { + Self::Bash => "bash", + Self::C => "c", + Self::Cpp => "cpp", + Self::CSharp => "csharp", + Self::Css => "css", + Self::Diff => "diff", + Self::Elixir => "elixir", + Self::Go => "go", + Self::Haskell => "haskell", + Self::Hcl => "hcl", + Self::Html => "html", + Self::Java => "java", + Self::JavaScript => "javascript", + Self::Json => "json", + Self::Julia => "julia", + Self::Kotlin => "kotlin", + Self::Lua => "lua", + Self::Make => "make", + Self::Markdown => "markdown", + Self::Nix => "nix", + Self::ObjC => "objc", + Self::Odin => "odin", + Self::Php => "php", + Self::Python => "python", + Self::Regex => "regex", + Self::Ruby => "ruby", + Self::Rust => "rust", + Self::Scala => "scala", + Self::Solidity => "solidity", + Self::Starlark => "starlark", + Self::Swift => "swift", + Self::Toml => "toml", + Self::Tsx => "tsx", + Self::TypeScript => "typescript", + Self::Verilog => "verilog", + Self::Xml => "xml", + Self::Yaml => "yaml", + Self::Zig => "zig", + } + } +} + +impl fmt::Display for SupportLang { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{self:?}") + } +} + +// ── Dispatch macro ────────────────────────────────────────────────────── + +macro_rules! execute_lang_method { + ($me:path, $method:ident, $($pname:tt),*) => { + use SupportLang as S; + match $me { + S::Bash => Bash.$method($($pname,)*), + S::C => C.$method($($pname,)*), + S::Cpp => Cpp.$method($($pname,)*), + S::CSharp => CSharp.$method($($pname,)*), + S::Css => Css.$method($($pname,)*), + S::Diff => Diff.$method($($pname,)*), + S::Elixir => Elixir.$method($($pname,)*), + S::Go => Go.$method($($pname,)*), + S::Haskell => Haskell.$method($($pname,)*), + S::Hcl => Hcl.$method($($pname,)*), + S::Html => Html.$method($($pname,)*), + S::Java => Java.$method($($pname,)*), + S::JavaScript => JavaScript.$method($($pname,)*), + S::Json => Json.$method($($pname,)*), + S::Julia => Julia.$method($($pname,)*), + S::Kotlin => Kotlin.$method($($pname,)*), + S::Lua => Lua.$method($($pname,)*), + S::Make => Make.$method($($pname,)*), + S::Markdown => Markdown.$method($($pname,)*), + S::Nix => Nix.$method($($pname,)*), + S::ObjC => ObjC.$method($($pname,)*), + S::Odin => Odin.$method($($pname,)*), + S::Php => Php.$method($($pname,)*), + S::Python => Python.$method($($pname,)*), + S::Regex => Regex.$method($($pname,)*), + S::Ruby => Ruby.$method($($pname,)*), + S::Rust => Rust.$method($($pname,)*), + S::Scala => Scala.$method($($pname,)*), + S::Solidity => Solidity.$method($($pname,)*), + S::Starlark => Starlark.$method($($pname,)*), + S::Swift => Swift.$method($($pname,)*), + S::Toml => Toml.$method($($pname,)*), + S::Tsx => Tsx.$method($($pname,)*), + S::TypeScript => TypeScript.$method($($pname,)*), + S::Verilog => Verilog.$method($($pname,)*), + S::Xml => Xml.$method($($pname,)*), + S::Yaml => Yaml.$method($($pname,)*), + S::Zig => Zig.$method($($pname,)*), + } + }; +} + +macro_rules! impl_lang_method { + ($method:ident, ($($pname:tt: $ptype:ty),*) => $return_type:ty) => { + #[inline] + fn $method(&self, $($pname: $ptype),*) -> $return_type { + execute_lang_method! { self, $method, $($pname),* } + } + }; +} + +impl Language for SupportLang { + impl_lang_method!(kind_to_id, (kind: &str) => u16); + + impl_lang_method!(field_to_id, (field: &str) => Option); + + impl_lang_method!(meta_var_char, () => char); + + impl_lang_method!(expando_char, () => char); + + impl_lang_method!(extract_meta_var, (source: &str) => Option); + + impl_lang_method!(build_pattern, (builder: &PatternBuilder) => Result); + + fn pre_process_pattern<'q>(&self, query: &'q str) -> Cow<'q, str> { + execute_lang_method! { self, pre_process_pattern, query } + } + + fn from_path>(path: P) -> Option { + from_extension(path.as_ref()) + } +} + +impl LanguageExt for SupportLang { + impl_lang_method!(get_ts_language, () => TSLanguage); + + impl_lang_method!(injectable_languages, () => Option<&'static [&'static str]>); + + fn extract_injections( + &self, + root: Node>, + ) -> HashMap> { + match self { + Self::Html => Html.extract_injections(root), + _ => HashMap::new(), + } + } +} + +// ── File extension mapping ────────────────────────────────────────────── + +const fn extensions(lang: SupportLang) -> &'static [&'static str] { + use SupportLang::*; + match lang { + Bash => { + &["bash", "bats", "cgi", "command", "env", "fcgi", "ksh", "sh", "tmux", "tool", "zsh"] + }, + C => &["c", "h"], + Cpp => &["cc", "hpp", "cpp", "c++", "hh", "cxx", "cu", "ino"], + CSharp => &["cs"], + Css => &["css", "scss"], + Diff => &["diff", "patch"], + Elixir => &["ex", "exs"], + Go => &["go"], + Haskell => &["hs"], + Hcl => &["hcl", "tf", "tfvars"], + Html => &["html", "htm", "xhtml"], + Java => &["java"], + JavaScript => &["cjs", "js", "mjs", "jsx"], + Json => &["json"], + Julia => &["jl"], + Kotlin => &["kt", "ktm", "kts"], + Lua => &["lua"], + Make => &["mk", "mak"], + Markdown => &["md", "markdown", "mdx"], + Nix => &["nix"], + ObjC => &["m"], + Odin => &["odin"], + Php => &["php"], + Python => &["py", "py3", "pyi", "bzl"], + Regex => &[], // regex has no file extension + Ruby => &["rb", "rbw", "gemspec"], + Rust => &["rs"], + Scala => &["scala", "sc", "sbt"], + Solidity => &["sol"], + Starlark => &["star", "bzl"], + Swift => &["swift"], + Toml => &["toml"], + Tsx => &["tsx"], + TypeScript => &["ts", "cts", "mts"], + Verilog => &["v", "sv", "svh", "vh"], + Xml => &["xml", "xsl", "xslt", "svg", "plist"], + Yaml => &["yaml", "yml"], + Zig => &["zig"], + } +} + +/// Guess language from file extension. +fn from_extension(path: &Path) -> Option { + let ext = path.extension()?.to_str()?; + // Special cases: Makefile has no extension + if ext.is_empty() { + let name = path.file_name()?.to_str()?; + return match name { + "Makefile" | "makefile" | "GNUmakefile" => Some(SupportLang::Make), + _ => None, + }; + } + SupportLang::all_langs() + .iter() + .copied() + .find(|&l| extensions(l).contains(&ext)) +} diff --git a/native/crates/ast/src/language/parsers.rs b/native/crates/ast/src/language/parsers.rs new file mode 100644 index 000000000..5c31b31ca --- /dev/null +++ b/native/crates/ast/src/language/parsers.rs @@ -0,0 +1,118 @@ +//! Tree-sitter parser functions for all supported languages. + +use ast_grep_core::tree_sitter::TSLanguage; + +pub fn language_bash() -> TSLanguage { + tree_sitter_bash::LANGUAGE.into() +} +pub fn language_c() -> TSLanguage { + tree_sitter_c::LANGUAGE.into() +} +pub fn language_cpp() -> TSLanguage { + tree_sitter_cpp::LANGUAGE.into() +} +pub fn language_c_sharp() -> TSLanguage { + tree_sitter_c_sharp::LANGUAGE.into() +} +pub fn language_css() -> TSLanguage { + tree_sitter_css::LANGUAGE.into() +} +pub fn language_diff() -> TSLanguage { + tree_sitter_diff::LANGUAGE.into() +} +pub fn language_elixir() -> TSLanguage { + tree_sitter_elixir::LANGUAGE.into() +} +pub fn language_go() -> TSLanguage { + tree_sitter_go::LANGUAGE.into() +} +pub fn language_haskell() -> TSLanguage { + tree_sitter_haskell::LANGUAGE.into() +} +pub fn language_hcl() -> TSLanguage { + tree_sitter_hcl::LANGUAGE.into() +} +pub fn language_html() -> TSLanguage { + tree_sitter_html::LANGUAGE.into() +} +pub fn language_java() -> TSLanguage { + tree_sitter_java::LANGUAGE.into() +} +pub fn language_javascript() -> TSLanguage { + tree_sitter_javascript::LANGUAGE.into() +} +pub fn language_json() -> TSLanguage { + tree_sitter_json::LANGUAGE.into() +} +pub fn language_julia() -> TSLanguage { + tree_sitter_julia::LANGUAGE.into() +} +pub fn language_kotlin() -> TSLanguage { + tree_sitter_kotlin::LANGUAGE.into() +} +pub fn language_lua() -> TSLanguage { + tree_sitter_lua::LANGUAGE.into() +} +pub fn language_make() -> TSLanguage { + tree_sitter_make::LANGUAGE.into() +} +pub fn language_markdown() -> TSLanguage { + tree_sitter_md::LANGUAGE.into() +} +pub fn language_nix() -> TSLanguage { + tree_sitter_nix::LANGUAGE.into() +} +pub fn language_objc() -> TSLanguage { + tree_sitter_objc::LANGUAGE.into() +} +pub fn language_odin() -> TSLanguage { + tree_sitter_odin::LANGUAGE.into() +} +pub fn language_php() -> TSLanguage { + tree_sitter_php::LANGUAGE_PHP_ONLY.into() +} +pub fn language_python() -> TSLanguage { + tree_sitter_python::LANGUAGE.into() +} +pub fn language_regex() -> TSLanguage { + tree_sitter_regex::LANGUAGE.into() +} +pub fn language_ruby() -> TSLanguage { + tree_sitter_ruby::LANGUAGE.into() +} +pub fn language_rust() -> TSLanguage { + tree_sitter_rust::LANGUAGE.into() +} +pub fn language_scala() -> TSLanguage { + tree_sitter_scala::LANGUAGE.into() +} +pub fn language_solidity() -> TSLanguage { + tree_sitter_solidity::LANGUAGE.into() +} +pub fn language_starlark() -> TSLanguage { + tree_sitter_starlark::LANGUAGE.into() +} +pub fn language_swift() -> TSLanguage { + tree_sitter_swift::LANGUAGE.into() +} +pub fn language_toml() -> TSLanguage { + tree_sitter_toml_ng::LANGUAGE.into() +} +pub fn language_tsx() -> TSLanguage { + tree_sitter_typescript::LANGUAGE_TSX.into() +} +pub fn language_typescript() -> TSLanguage { + tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into() +} +pub fn language_verilog() -> TSLanguage { + tree_sitter_verilog::LANGUAGE.into() +} +pub fn language_xml() -> TSLanguage { + tree_sitter_xml::LANGUAGE_XML.into() +} +pub fn language_yaml() -> TSLanguage { + tree_sitter_yaml::LANGUAGE.into() +} +pub fn language_zig() -> TSLanguage { + tree_sitter_zig::LANGUAGE.into() +} diff --git a/native/crates/ast/src/lib.rs b/native/crates/ast/src/lib.rs new file mode 100644 index 000000000..8b4591654 --- /dev/null +++ b/native/crates/ast/src/lib.rs @@ -0,0 +1,6 @@ +//! AST-aware structural search and rewrite for GSD. +#![allow(clippy::needless_pass_by_value)] + +pub mod ast; +pub mod glob_util; +pub mod language; diff --git a/native/crates/engine/Cargo.toml b/native/crates/engine/Cargo.toml index 992a5811e..82917751e 100644 --- a/native/crates/engine/Cargo.toml +++ b/native/crates/engine/Cargo.toml @@ -11,6 +11,7 @@ description = "N-API native addon for GSD — exposes high-performance Rust modu crate-type = ["cdylib"] [dependencies] +gsd-ast = { path = "../ast" } gsd-grep = { path = "../grep" } arboard = "3" dashmap = "6" diff --git a/native/crates/engine/src/ast.rs b/native/crates/engine/src/ast.rs new file mode 100644 index 000000000..78570d939 --- /dev/null +++ b/native/crates/engine/src/ast.rs @@ -0,0 +1,2 @@ +//! Forces the linker to include gsd_ast napi registrations. +use gsd_ast as _; diff --git a/native/crates/engine/src/lib.rs b/native/crates/engine/src/lib.rs index c37ad583b..012a80bd4 100644 --- a/native/crates/engine/src/lib.rs +++ b/native/crates/engine/src/lib.rs @@ -12,6 +12,7 @@ mod clipboard; mod fs_cache; mod glob; mod glob_util; +mod ast; mod grep; mod highlight; mod ps; diff --git a/packages/native/package.json b/packages/native/package.json index 2542389c1..adcfb2e48 100644 --- a/packages/native/package.json +++ b/packages/native/package.json @@ -8,7 +8,7 @@ "scripts": { "build:native": "node ../../native/scripts/build.js", "build:native:dev": "node ../../native/scripts/build.js --dev", - "test": "node --test src/__tests__/grep.test.mjs src/__tests__/ps.test.mjs src/__tests__/glob.test.mjs src/__tests__/clipboard.test.mjs" + "test": "node --test src/__tests__/grep.test.mjs src/__tests__/ps.test.mjs src/__tests__/glob.test.mjs src/__tests__/clipboard.test.mjs src/__tests__/highlight.test.mjs" }, "exports": { ".": { @@ -30,6 +30,10 @@ "./clipboard": { "types": "./src/clipboard/index.ts", "import": "./src/clipboard/index.ts" + }, + "./ast": { + "types": "./src/ast/index.ts", + "import": "./src/ast/index.ts" } }, "files": [ diff --git a/packages/native/src/ast/index.ts b/packages/native/src/ast/index.ts new file mode 100644 index 000000000..01084d73d --- /dev/null +++ b/packages/native/src/ast/index.ts @@ -0,0 +1,12 @@ +import { native } from "../native.js"; +import type { AstFindOptions, AstFindResult, AstReplaceOptions, AstReplaceResult, AstFindMatch, AstReplaceChange, AstReplaceFileChange } from "./types.js"; + +export type { AstFindMatch, AstFindOptions, AstFindResult, AstReplaceChange, AstReplaceFileChange, AstReplaceOptions, AstReplaceResult }; + +export function astGrep(options: AstFindOptions): AstFindResult { + return (native as Record).astGrep(options) as AstFindResult; +} + +export function astEdit(options: AstReplaceOptions): AstReplaceResult { + return (native as Record).astEdit(options) as AstReplaceResult; +} diff --git a/packages/native/src/ast/types.ts b/packages/native/src/ast/types.ts new file mode 100644 index 000000000..ab98047c3 --- /dev/null +++ b/packages/native/src/ast/types.ts @@ -0,0 +1,75 @@ +export interface AstFindOptions { + patterns: string[]; + lang?: string; + path?: string; + glob?: string; + selector?: string; + strictness?: string; + limit?: number; + offset?: number; + includeMeta?: boolean; + context?: number; +} + +export interface AstFindMatch { + path: string; + text: string; + byteStart: number; + byteEnd: number; + startLine: number; + startColumn: number; + endLine: number; + endColumn: number; + metaVariables?: Record; +} + +export interface AstFindResult { + matches: AstFindMatch[]; + totalMatches: number; + filesWithMatches: number; + filesSearched: number; + limitReached: boolean; + parseErrors?: string[]; +} + +export interface AstReplaceOptions { + rewrites: Record; + lang?: string; + path?: string; + glob?: string; + selector?: string; + strictness?: string; + dryRun?: boolean; + maxReplacements?: number; + maxFiles?: number; + failOnParseError?: boolean; +} + +export interface AstReplaceChange { + path: string; + before: string; + after: string; + byteStart: number; + byteEnd: number; + deletedLength: number; + startLine: number; + startColumn: number; + endLine: number; + endColumn: number; +} + +export interface AstReplaceFileChange { + path: string; + count: number; +} + +export interface AstReplaceResult { + changes: AstReplaceChange[]; + fileChanges: AstReplaceFileChange[]; + totalReplacements: number; + filesTouched: number; + filesSearched: number; + applied: boolean; + limitReached: boolean; + parseErrors?: string[]; +} diff --git a/packages/native/src/index.ts b/packages/native/src/index.ts index ed92e3a92..c43f05ab8 100644 --- a/packages/native/src/index.ts +++ b/packages/native/src/index.ts @@ -48,3 +48,9 @@ export type { GlobOptions, GlobResult, } from "./glob/index.js"; + +export { astGrep, astEdit } from "./ast/index.js"; +export type { + AstFindMatch, AstFindOptions, AstFindResult, + AstReplaceChange, AstReplaceFileChange, AstReplaceOptions, AstReplaceResult, +} from "./ast/index.js"; diff --git a/packages/native/src/native.ts b/packages/native/src/native.ts index 6d6958927..039efc10e 100644 --- a/packages/native/src/native.ts +++ b/packages/native/src/native.ts @@ -58,4 +58,6 @@ export const native = loadNative() as { copyToClipboard: (text: string) => void; readTextFromClipboard: () => string | null; readImageFromClipboard: () => Promise; + astGrep: (options: unknown) => unknown; + astEdit: (options: unknown) => unknown; };