diff --git a/native/Cargo.lock b/native/Cargo.lock index ee19d016b..7cb16d2d3 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -550,6 +550,7 @@ dependencies = [ "syntect", "unicode-segmentation", "unicode-width", + "xxhash-rust", ] [[package]] @@ -1914,6 +1915,12 @@ version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea6fc2961e4ef194dcbfe56bb845534d0dc8098940c7e5c012a258bfec6701bd" +[[package]] +name = "xxhash-rust" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" + [[package]] name = "zerocopy" version = "0.8.42" diff --git a/native/crates/engine/Cargo.toml b/native/crates/engine/Cargo.toml index dc9ae6957..204eef0b2 100644 --- a/native/crates/engine/Cargo.toml +++ b/native/crates/engine/Cargo.toml @@ -33,6 +33,7 @@ smallvec = "1" syntect = { version = "5", default-features = false, features = ["default-syntaxes", "default-themes", "regex-fancy"] } unicode-segmentation = "1" unicode-width = "0.2" +xxhash-rust = { version = "0.8", features = ["xxh32"] } [build-dependencies] napi-build = "2" diff --git a/native/crates/engine/src/lib.rs b/native/crates/engine/src/lib.rs index 7da3ef524..ed5feb445 100644 --- a/native/crates/engine/src/lib.rs +++ b/native/crates/engine/src/lib.rs @@ -25,3 +25,4 @@ mod gsd_parser; mod image; mod json_parse; mod stream_process; +mod xxhash; diff --git a/native/crates/engine/src/xxhash.rs b/native/crates/engine/src/xxhash.rs new file mode 100644 index 000000000..d6092c52e --- /dev/null +++ b/native/crates/engine/src/xxhash.rs @@ -0,0 +1,43 @@ +//! xxHash32 exposed to JS via N-API. +//! +//! Wraps `xxhash_rust::xxh32` to provide a drop-in replacement for the pure-JS +//! xxHash32 used by the hashline edit tool. + +use napi_derive::napi; + +/// Compute xxHash32 of a UTF-8 string with the given seed. +/// +/// Matches the behavior of the pure-JS `xxHash32(input, seed)` in hashline.ts: +/// the input string is converted to UTF-8 bytes and hashed. +#[napi(js_name = "xxHash32")] +pub fn xx_hash32(input: String, seed: u32) -> u32 { + xxhash_rust::xxh32::xxh32(input.as_bytes(), seed) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Reference vectors verified against the pure-JS implementation. + #[test] + fn known_vectors() { + // Empty string, seed 0 + assert_eq!(xx_hash32(String::new(), 0), 0x02CC5D05); + // "hello", seed 0 + assert_eq!(xx_hash32("hello".into(), 0), 0xFB0DA52A); + // "hello", seed 42 + assert_eq!(xx_hash32("hello".into(), 42), 0x0AA8E13E); + } + + #[test] + fn short_and_long_inputs() { + // < 16 bytes (no stripe loop) + let short = xx_hash32("abc".into(), 0); + assert_ne!(short, 0); + + // >= 16 bytes (enters stripe loop) + let long = xx_hash32("abcdefghijklmnop".into(), 0); + assert_ne!(long, 0); + assert_ne!(short, long); + } +} diff --git a/packages/native/package.json b/packages/native/package.json index 374dd7856..bdcd1a42a 100644 --- a/packages/native/package.json +++ b/packages/native/package.json @@ -51,6 +51,10 @@ "./image": { "types": "./dist/image/index.d.ts", "import": "./dist/image/index.js" + }, + "./xxhash": { + "types": "./dist/xxhash/index.d.ts", + "import": "./dist/xxhash/index.js" } }, "files": [ diff --git a/packages/native/src/__tests__/xxhash.test.mjs b/packages/native/src/__tests__/xxhash.test.mjs new file mode 100644 index 000000000..1791b55ce --- /dev/null +++ b/packages/native/src/__tests__/xxhash.test.mjs @@ -0,0 +1,86 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { xxHash32 } from "@gsd/native/xxhash"; + +/** + * Reference values computed from the pure-JS xxHash32 implementation + * that was previously inlined in hashline.ts. + */ + +// Pure-JS reference implementation for generating expected values +const PRIME32_1 = 0x9e3779b1; +const PRIME32_2 = 0x85ebca77; +const PRIME32_3 = 0xc2b2ae3d; +const PRIME32_4 = 0x27d4eb2f; +const PRIME32_5 = 0x165667b1; + +function rotl32(val, bits) { + return ((val << bits) | (val >>> (32 - bits))) >>> 0; +} +function imul32(a, b) { + return Math.imul(a, b) >>> 0; +} +function jsXxHash32(input, seed) { + const buf = Buffer.from(input, "utf-8"); + const len = buf.length; + let h32; + let i = 0; + if (len >= 16) { + let v1 = (seed + PRIME32_1 + PRIME32_2) >>> 0; + let v2 = (seed + PRIME32_2) >>> 0; + let v3 = (seed + 0) >>> 0; + let v4 = (seed - PRIME32_1) >>> 0; + while (i <= len - 16) { + v1 = imul32(rotl32((v1 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1) >>> 0; i += 4; + v2 = imul32(rotl32((v2 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1) >>> 0; i += 4; + v3 = imul32(rotl32((v3 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1) >>> 0; i += 4; + v4 = imul32(rotl32((v4 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1) >>> 0; i += 4; + } + h32 = (rotl32(v1, 1) + rotl32(v2, 7) + rotl32(v3, 12) + rotl32(v4, 18)) >>> 0; + } else { + h32 = (seed + PRIME32_5) >>> 0; + } + h32 = (h32 + len) >>> 0; + while (i <= len - 4) { + h32 = (h32 + imul32(buf.readUInt32LE(i), PRIME32_3)) >>> 0; + h32 = imul32(rotl32(h32, 17), PRIME32_4); + i += 4; + } + while (i < len) { + h32 = (h32 + imul32(buf[i], PRIME32_5)) >>> 0; + h32 = imul32(rotl32(h32, 11), PRIME32_1); + i += 1; + } + h32 = imul32(h32 ^ (h32 >>> 15), PRIME32_2); + h32 = imul32(h32 ^ (h32 >>> 13), PRIME32_3); + h32 = (h32 ^ (h32 >>> 16)) >>> 0; + return h32; +} + +describe("xxHash32 native vs JS compatibility", () => { + const testCases = [ + ["empty string, seed 0", "", 0], + ["short string, seed 0", "hello", 0], + ["short string, seed 42", "hello", 42], + ["medium string, seed 0", "hello world!", 0], + ["long string (>16 bytes)", "abcdefghijklmnopqrstuvwxyz", 0], + ["whitespace only", " ", 0], + ["punctuation", "{}();", 0], + ["unicode", "\u{4e16}\u{754c}\u{1f600}", 0], + ["empty with nonzero seed", "", 7], + ["typical code line", " const x = 42;", 0], + ["typical code line with seed", " const x = 42;", 3], + ]; + + for (const [label, input, seed] of testCases) { + it(`matches JS reference: ${label}`, () => { + const expected = jsXxHash32(input, seed); + const actual = xxHash32(input, seed); + assert.equal( + actual, + expected, + `Mismatch for "${input}" seed=${seed}: native=${actual.toString(16)} js=${expected.toString(16)}` + ); + }); + } +}); diff --git a/packages/native/src/index.ts b/packages/native/src/index.ts index c156f5018..e66228c9e 100644 --- a/packages/native/src/index.ts +++ b/packages/native/src/index.ts @@ -91,6 +91,8 @@ export type { export { parseImage, ImageFormat, SamplingFilter } from "./image/index.js"; export type { NativeImageHandle } from "./image/index.js"; +export { xxHash32 } from "./xxhash/index.js"; + export { ttsrCompileRules, ttsrCheckBuffer, ttsrFreeRules } from "./ttsr/index.js"; export type { TtsrHandle, TtsrRuleInput } from "./ttsr/index.js"; export { diff --git a/packages/native/src/native.ts b/packages/native/src/native.ts index f6d0e9a99..2463b9114 100644 --- a/packages/native/src/native.ts +++ b/packages/native/src/native.ts @@ -135,4 +135,5 @@ export const native = loadNative() as { parseJson: (text: string) => unknown; parsePartialJson: (text: string) => unknown; parseStreamingJson: (text: string) => unknown; + xxHash32: (input: string, seed: number) => number; }; diff --git a/packages/native/src/xxhash/index.ts b/packages/native/src/xxhash/index.ts new file mode 100644 index 000000000..1aac2adfe --- /dev/null +++ b/packages/native/src/xxhash/index.ts @@ -0,0 +1,18 @@ +/** + * Native xxHash32 — Rust implementation via napi-rs. + * + * Hashes the UTF-8 representation of the input string with the given seed. + */ + +import { native } from "../native.js"; + +/** + * Compute xxHash32 of a UTF-8 string. + * + * @param input The string to hash (encoded as UTF-8 internally). + * @param seed 32-bit seed value. + * @returns 32-bit unsigned hash. + */ +export function xxHash32(input: string, seed: number): number { + return native.xxHash32(input, seed); +} diff --git a/packages/pi-coding-agent/src/core/tools/hashline.ts b/packages/pi-coding-agent/src/core/tools/hashline.ts index 990f9ee8e..7f3908e8c 100644 --- a/packages/pi-coding-agent/src/core/tools/hashline.ts +++ b/packages/pi-coding-agent/src/core/tools/hashline.ts @@ -15,76 +15,7 @@ * Adapted from Oh My Pi's hashline implementation for Node.js (no Bun dependency). */ -// ═══════════════════════════════════════════════════════════════════════════ -// xxHash32 — pure JS implementation (no native dependencies) -// ═══════════════════════════════════════════════════════════════════════════ - -const PRIME32_1 = 0x9e3779b1; -const PRIME32_2 = 0x85ebca77; -const PRIME32_3 = 0xc2b2ae3d; -const PRIME32_4 = 0x27d4eb2f; -const PRIME32_5 = 0x165667b1; - -function rotl32(val: number, bits: number): number { - return ((val << bits) | (val >>> (32 - bits))) >>> 0; -} - -function imul32(a: number, b: number): number { - return Math.imul(a, b) >>> 0; -} - -/** - * Pure JS xxHash32 operating on a UTF-8 encoded string. - * Matches Bun.hash.xxHash32(str, seed) behavior. - */ -function xxHash32(input: string, seed: number): number { - const buf = Buffer.from(input, "utf-8"); - const len = buf.length; - let h32: number; - let i = 0; - - if (len >= 16) { - let v1 = (seed + PRIME32_1 + PRIME32_2) >>> 0; - let v2 = (seed + PRIME32_2) >>> 0; - let v3 = (seed + 0) >>> 0; - let v4 = (seed - PRIME32_1) >>> 0; - - while (i <= len - 16) { - v1 = (imul32(rotl32((v1 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1)) >>> 0; - i += 4; - v2 = (imul32(rotl32((v2 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1)) >>> 0; - i += 4; - v3 = (imul32(rotl32((v3 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1)) >>> 0; - i += 4; - v4 = (imul32(rotl32((v4 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1)) >>> 0; - i += 4; - } - - h32 = (rotl32(v1, 1) + rotl32(v2, 7) + rotl32(v3, 12) + rotl32(v4, 18)) >>> 0; - } else { - h32 = (seed + PRIME32_5) >>> 0; - } - - h32 = (h32 + len) >>> 0; - - while (i <= len - 4) { - h32 = (h32 + imul32(buf.readUInt32LE(i), PRIME32_3)) >>> 0; - h32 = imul32(rotl32(h32, 17), PRIME32_4); - i += 4; - } - - while (i < len) { - h32 = (h32 + imul32(buf[i], PRIME32_5)) >>> 0; - h32 = imul32(rotl32(h32, 11), PRIME32_1); - i += 1; - } - - h32 = imul32(h32 ^ (h32 >>> 15), PRIME32_2); - h32 = imul32(h32 ^ (h32 >>> 13), PRIME32_3); - h32 = (h32 ^ (h32 >>> 16)) >>> 0; - - return h32; -} +import { xxHash32 } from "@gsd/native/xxhash"; // ═══════════════════════════════════════════════════════════════════════════ // Hash Computation