feat: native Rust xxHash32 for hashline (#272)
* feat: replace pure-JS xxHash32 with native Rust implementation via napi The hashline edit tool calls xxHash32 on every line of every file read/edit. Moving this to a native Rust implementation (xxhash-rust crate) eliminates JS overhead for this hot path. Hash output is identical -- verified by tests comparing native vs JS reference across 11 input vectors including empty strings, short/long inputs, unicode, and seeded variants. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix: use typed native interface and remove version-drag comment in xxhash wrapper Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
8ac5a82409
commit
b730ed87d0
10 changed files with 164 additions and 70 deletions
7
native/Cargo.lock
generated
7
native/Cargo.lock
generated
|
|
@ -550,6 +550,7 @@ dependencies = [
|
|||
"syntect",
|
||||
"unicode-segmentation",
|
||||
"unicode-width",
|
||||
"xxhash-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1914,6 +1915,12 @@ version = "0.13.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea6fc2961e4ef194dcbfe56bb845534d0dc8098940c7e5c012a258bfec6701bd"
|
||||
|
||||
[[package]]
|
||||
name = "xxhash-rust"
|
||||
version = "0.8.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.8.42"
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ smallvec = "1"
|
|||
syntect = { version = "5", default-features = false, features = ["default-syntaxes", "default-themes", "regex-fancy"] }
|
||||
unicode-segmentation = "1"
|
||||
unicode-width = "0.2"
|
||||
xxhash-rust = { version = "0.8", features = ["xxh32"] }
|
||||
|
||||
[build-dependencies]
|
||||
napi-build = "2"
|
||||
|
|
|
|||
|
|
@ -25,3 +25,4 @@ mod gsd_parser;
|
|||
mod image;
|
||||
mod json_parse;
|
||||
mod stream_process;
|
||||
mod xxhash;
|
||||
|
|
|
|||
43
native/crates/engine/src/xxhash.rs
Normal file
43
native/crates/engine/src/xxhash.rs
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
//! xxHash32 exposed to JS via N-API.
|
||||
//!
|
||||
//! Wraps `xxhash_rust::xxh32` to provide a drop-in replacement for the pure-JS
|
||||
//! xxHash32 used by the hashline edit tool.
|
||||
|
||||
use napi_derive::napi;
|
||||
|
||||
/// Compute xxHash32 of a UTF-8 string with the given seed.
|
||||
///
|
||||
/// Matches the behavior of the pure-JS `xxHash32(input, seed)` in hashline.ts:
|
||||
/// the input string is converted to UTF-8 bytes and hashed.
|
||||
#[napi(js_name = "xxHash32")]
|
||||
pub fn xx_hash32(input: String, seed: u32) -> u32 {
|
||||
xxhash_rust::xxh32::xxh32(input.as_bytes(), seed)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Reference vectors verified against the pure-JS implementation.
|
||||
#[test]
|
||||
fn known_vectors() {
|
||||
// Empty string, seed 0
|
||||
assert_eq!(xx_hash32(String::new(), 0), 0x02CC5D05);
|
||||
// "hello", seed 0
|
||||
assert_eq!(xx_hash32("hello".into(), 0), 0xFB0DA52A);
|
||||
// "hello", seed 42
|
||||
assert_eq!(xx_hash32("hello".into(), 42), 0x0AA8E13E);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn short_and_long_inputs() {
|
||||
// < 16 bytes (no stripe loop)
|
||||
let short = xx_hash32("abc".into(), 0);
|
||||
assert_ne!(short, 0);
|
||||
|
||||
// >= 16 bytes (enters stripe loop)
|
||||
let long = xx_hash32("abcdefghijklmnop".into(), 0);
|
||||
assert_ne!(long, 0);
|
||||
assert_ne!(short, long);
|
||||
}
|
||||
}
|
||||
|
|
@ -51,6 +51,10 @@
|
|||
"./image": {
|
||||
"types": "./dist/image/index.d.ts",
|
||||
"import": "./dist/image/index.js"
|
||||
},
|
||||
"./xxhash": {
|
||||
"types": "./dist/xxhash/index.d.ts",
|
||||
"import": "./dist/xxhash/index.js"
|
||||
}
|
||||
},
|
||||
"files": [
|
||||
|
|
|
|||
86
packages/native/src/__tests__/xxhash.test.mjs
Normal file
86
packages/native/src/__tests__/xxhash.test.mjs
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
import { describe, it } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { xxHash32 } from "@gsd/native/xxhash";
|
||||
|
||||
/**
|
||||
* Reference values computed from the pure-JS xxHash32 implementation
|
||||
* that was previously inlined in hashline.ts.
|
||||
*/
|
||||
|
||||
// Pure-JS reference implementation for generating expected values
|
||||
const PRIME32_1 = 0x9e3779b1;
|
||||
const PRIME32_2 = 0x85ebca77;
|
||||
const PRIME32_3 = 0xc2b2ae3d;
|
||||
const PRIME32_4 = 0x27d4eb2f;
|
||||
const PRIME32_5 = 0x165667b1;
|
||||
|
||||
function rotl32(val, bits) {
|
||||
return ((val << bits) | (val >>> (32 - bits))) >>> 0;
|
||||
}
|
||||
function imul32(a, b) {
|
||||
return Math.imul(a, b) >>> 0;
|
||||
}
|
||||
function jsXxHash32(input, seed) {
|
||||
const buf = Buffer.from(input, "utf-8");
|
||||
const len = buf.length;
|
||||
let h32;
|
||||
let i = 0;
|
||||
if (len >= 16) {
|
||||
let v1 = (seed + PRIME32_1 + PRIME32_2) >>> 0;
|
||||
let v2 = (seed + PRIME32_2) >>> 0;
|
||||
let v3 = (seed + 0) >>> 0;
|
||||
let v4 = (seed - PRIME32_1) >>> 0;
|
||||
while (i <= len - 16) {
|
||||
v1 = imul32(rotl32((v1 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1) >>> 0; i += 4;
|
||||
v2 = imul32(rotl32((v2 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1) >>> 0; i += 4;
|
||||
v3 = imul32(rotl32((v3 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1) >>> 0; i += 4;
|
||||
v4 = imul32(rotl32((v4 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1) >>> 0; i += 4;
|
||||
}
|
||||
h32 = (rotl32(v1, 1) + rotl32(v2, 7) + rotl32(v3, 12) + rotl32(v4, 18)) >>> 0;
|
||||
} else {
|
||||
h32 = (seed + PRIME32_5) >>> 0;
|
||||
}
|
||||
h32 = (h32 + len) >>> 0;
|
||||
while (i <= len - 4) {
|
||||
h32 = (h32 + imul32(buf.readUInt32LE(i), PRIME32_3)) >>> 0;
|
||||
h32 = imul32(rotl32(h32, 17), PRIME32_4);
|
||||
i += 4;
|
||||
}
|
||||
while (i < len) {
|
||||
h32 = (h32 + imul32(buf[i], PRIME32_5)) >>> 0;
|
||||
h32 = imul32(rotl32(h32, 11), PRIME32_1);
|
||||
i += 1;
|
||||
}
|
||||
h32 = imul32(h32 ^ (h32 >>> 15), PRIME32_2);
|
||||
h32 = imul32(h32 ^ (h32 >>> 13), PRIME32_3);
|
||||
h32 = (h32 ^ (h32 >>> 16)) >>> 0;
|
||||
return h32;
|
||||
}
|
||||
|
||||
describe("xxHash32 native vs JS compatibility", () => {
|
||||
const testCases = [
|
||||
["empty string, seed 0", "", 0],
|
||||
["short string, seed 0", "hello", 0],
|
||||
["short string, seed 42", "hello", 42],
|
||||
["medium string, seed 0", "hello world!", 0],
|
||||
["long string (>16 bytes)", "abcdefghijklmnopqrstuvwxyz", 0],
|
||||
["whitespace only", " ", 0],
|
||||
["punctuation", "{}();", 0],
|
||||
["unicode", "\u{4e16}\u{754c}\u{1f600}", 0],
|
||||
["empty with nonzero seed", "", 7],
|
||||
["typical code line", " const x = 42;", 0],
|
||||
["typical code line with seed", " const x = 42;", 3],
|
||||
];
|
||||
|
||||
for (const [label, input, seed] of testCases) {
|
||||
it(`matches JS reference: ${label}`, () => {
|
||||
const expected = jsXxHash32(input, seed);
|
||||
const actual = xxHash32(input, seed);
|
||||
assert.equal(
|
||||
actual,
|
||||
expected,
|
||||
`Mismatch for "${input}" seed=${seed}: native=${actual.toString(16)} js=${expected.toString(16)}`
|
||||
);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
|
@ -91,6 +91,8 @@ export type {
|
|||
export { parseImage, ImageFormat, SamplingFilter } from "./image/index.js";
|
||||
export type { NativeImageHandle } from "./image/index.js";
|
||||
|
||||
export { xxHash32 } from "./xxhash/index.js";
|
||||
|
||||
export { ttsrCompileRules, ttsrCheckBuffer, ttsrFreeRules } from "./ttsr/index.js";
|
||||
export type { TtsrHandle, TtsrRuleInput } from "./ttsr/index.js";
|
||||
export {
|
||||
|
|
|
|||
|
|
@ -135,4 +135,5 @@ export const native = loadNative() as {
|
|||
parseJson: (text: string) => unknown;
|
||||
parsePartialJson: (text: string) => unknown;
|
||||
parseStreamingJson: (text: string) => unknown;
|
||||
xxHash32: (input: string, seed: number) => number;
|
||||
};
|
||||
|
|
|
|||
18
packages/native/src/xxhash/index.ts
Normal file
18
packages/native/src/xxhash/index.ts
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
/**
|
||||
* Native xxHash32 — Rust implementation via napi-rs.
|
||||
*
|
||||
* Hashes the UTF-8 representation of the input string with the given seed.
|
||||
*/
|
||||
|
||||
import { native } from "../native.js";
|
||||
|
||||
/**
|
||||
* Compute xxHash32 of a UTF-8 string.
|
||||
*
|
||||
* @param input The string to hash (encoded as UTF-8 internally).
|
||||
* @param seed 32-bit seed value.
|
||||
* @returns 32-bit unsigned hash.
|
||||
*/
|
||||
export function xxHash32(input: string, seed: number): number {
|
||||
return native.xxHash32(input, seed);
|
||||
}
|
||||
|
|
@ -15,76 +15,7 @@
|
|||
* Adapted from Oh My Pi's hashline implementation for Node.js (no Bun dependency).
|
||||
*/
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// xxHash32 — pure JS implementation (no native dependencies)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
const PRIME32_1 = 0x9e3779b1;
|
||||
const PRIME32_2 = 0x85ebca77;
|
||||
const PRIME32_3 = 0xc2b2ae3d;
|
||||
const PRIME32_4 = 0x27d4eb2f;
|
||||
const PRIME32_5 = 0x165667b1;
|
||||
|
||||
function rotl32(val: number, bits: number): number {
|
||||
return ((val << bits) | (val >>> (32 - bits))) >>> 0;
|
||||
}
|
||||
|
||||
function imul32(a: number, b: number): number {
|
||||
return Math.imul(a, b) >>> 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pure JS xxHash32 operating on a UTF-8 encoded string.
|
||||
* Matches Bun.hash.xxHash32(str, seed) behavior.
|
||||
*/
|
||||
function xxHash32(input: string, seed: number): number {
|
||||
const buf = Buffer.from(input, "utf-8");
|
||||
const len = buf.length;
|
||||
let h32: number;
|
||||
let i = 0;
|
||||
|
||||
if (len >= 16) {
|
||||
let v1 = (seed + PRIME32_1 + PRIME32_2) >>> 0;
|
||||
let v2 = (seed + PRIME32_2) >>> 0;
|
||||
let v3 = (seed + 0) >>> 0;
|
||||
let v4 = (seed - PRIME32_1) >>> 0;
|
||||
|
||||
while (i <= len - 16) {
|
||||
v1 = (imul32(rotl32((v1 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1)) >>> 0;
|
||||
i += 4;
|
||||
v2 = (imul32(rotl32((v2 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1)) >>> 0;
|
||||
i += 4;
|
||||
v3 = (imul32(rotl32((v3 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1)) >>> 0;
|
||||
i += 4;
|
||||
v4 = (imul32(rotl32((v4 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1)) >>> 0;
|
||||
i += 4;
|
||||
}
|
||||
|
||||
h32 = (rotl32(v1, 1) + rotl32(v2, 7) + rotl32(v3, 12) + rotl32(v4, 18)) >>> 0;
|
||||
} else {
|
||||
h32 = (seed + PRIME32_5) >>> 0;
|
||||
}
|
||||
|
||||
h32 = (h32 + len) >>> 0;
|
||||
|
||||
while (i <= len - 4) {
|
||||
h32 = (h32 + imul32(buf.readUInt32LE(i), PRIME32_3)) >>> 0;
|
||||
h32 = imul32(rotl32(h32, 17), PRIME32_4);
|
||||
i += 4;
|
||||
}
|
||||
|
||||
while (i < len) {
|
||||
h32 = (h32 + imul32(buf[i], PRIME32_5)) >>> 0;
|
||||
h32 = imul32(rotl32(h32, 11), PRIME32_1);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
h32 = imul32(h32 ^ (h32 >>> 15), PRIME32_2);
|
||||
h32 = imul32(h32 ^ (h32 >>> 13), PRIME32_3);
|
||||
h32 = (h32 ^ (h32 >>> 16)) >>> 0;
|
||||
|
||||
return h32;
|
||||
}
|
||||
import { xxHash32 } from "@gsd/native/xxhash";
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Hash Computation
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue