feat: native Rust xxHash32 for hashline (#272)

* feat: replace pure-JS xxHash32 with native Rust implementation via napi

The hashline edit tool calls xxHash32 on every line of every file read/edit.
Moving this to a native Rust implementation (xxhash-rust crate) eliminates
JS overhead for this hot path. Hash output is identical -- verified by tests
comparing native vs JS reference across 11 input vectors including empty
strings, short/long inputs, unicode, and seeded variants.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix: use typed native interface and remove version-drag comment in xxhash wrapper

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
TÂCHES 2026-03-13 16:46:08 -06:00 committed by GitHub
parent 8ac5a82409
commit b730ed87d0
10 changed files with 164 additions and 70 deletions

7
native/Cargo.lock generated
View file

@ -550,6 +550,7 @@ dependencies = [
"syntect",
"unicode-segmentation",
"unicode-width",
"xxhash-rust",
]
[[package]]
@ -1914,6 +1915,12 @@ version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea6fc2961e4ef194dcbfe56bb845534d0dc8098940c7e5c012a258bfec6701bd"
[[package]]
name = "xxhash-rust"
version = "0.8.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
[[package]]
name = "zerocopy"
version = "0.8.42"

View file

@ -33,6 +33,7 @@ smallvec = "1"
syntect = { version = "5", default-features = false, features = ["default-syntaxes", "default-themes", "regex-fancy"] }
unicode-segmentation = "1"
unicode-width = "0.2"
xxhash-rust = { version = "0.8", features = ["xxh32"] }
[build-dependencies]
napi-build = "2"

View file

@ -25,3 +25,4 @@ mod gsd_parser;
mod image;
mod json_parse;
mod stream_process;
mod xxhash;

View file

@ -0,0 +1,43 @@
//! xxHash32 exposed to JS via N-API.
//!
//! Wraps `xxhash_rust::xxh32` to provide a drop-in replacement for the pure-JS
//! xxHash32 used by the hashline edit tool.
use napi_derive::napi;
/// Compute xxHash32 of a UTF-8 string with the given seed.
///
/// Matches the behavior of the pure-JS `xxHash32(input, seed)` in hashline.ts:
/// the input string is converted to UTF-8 bytes and hashed.
#[napi(js_name = "xxHash32")]
pub fn xx_hash32(input: String, seed: u32) -> u32 {
xxhash_rust::xxh32::xxh32(input.as_bytes(), seed)
}
#[cfg(test)]
mod tests {
use super::*;
/// Reference vectors verified against the pure-JS implementation.
#[test]
fn known_vectors() {
// Empty string, seed 0
assert_eq!(xx_hash32(String::new(), 0), 0x02CC5D05);
// "hello", seed 0
assert_eq!(xx_hash32("hello".into(), 0), 0xFB0DA52A);
// "hello", seed 42
assert_eq!(xx_hash32("hello".into(), 42), 0x0AA8E13E);
}
#[test]
fn short_and_long_inputs() {
// < 16 bytes (no stripe loop)
let short = xx_hash32("abc".into(), 0);
assert_ne!(short, 0);
// >= 16 bytes (enters stripe loop)
let long = xx_hash32("abcdefghijklmnop".into(), 0);
assert_ne!(long, 0);
assert_ne!(short, long);
}
}

View file

@ -51,6 +51,10 @@
"./image": {
"types": "./dist/image/index.d.ts",
"import": "./dist/image/index.js"
},
"./xxhash": {
"types": "./dist/xxhash/index.d.ts",
"import": "./dist/xxhash/index.js"
}
},
"files": [

View file

@ -0,0 +1,86 @@
import { describe, it } from "node:test";
import assert from "node:assert/strict";
import { xxHash32 } from "@gsd/native/xxhash";
/**
* Reference values computed from the pure-JS xxHash32 implementation
* that was previously inlined in hashline.ts.
*/
// Pure-JS reference implementation for generating expected values
const PRIME32_1 = 0x9e3779b1;
const PRIME32_2 = 0x85ebca77;
const PRIME32_3 = 0xc2b2ae3d;
const PRIME32_4 = 0x27d4eb2f;
const PRIME32_5 = 0x165667b1;
function rotl32(val, bits) {
return ((val << bits) | (val >>> (32 - bits))) >>> 0;
}
function imul32(a, b) {
return Math.imul(a, b) >>> 0;
}
function jsXxHash32(input, seed) {
const buf = Buffer.from(input, "utf-8");
const len = buf.length;
let h32;
let i = 0;
if (len >= 16) {
let v1 = (seed + PRIME32_1 + PRIME32_2) >>> 0;
let v2 = (seed + PRIME32_2) >>> 0;
let v3 = (seed + 0) >>> 0;
let v4 = (seed - PRIME32_1) >>> 0;
while (i <= len - 16) {
v1 = imul32(rotl32((v1 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1) >>> 0; i += 4;
v2 = imul32(rotl32((v2 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1) >>> 0; i += 4;
v3 = imul32(rotl32((v3 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1) >>> 0; i += 4;
v4 = imul32(rotl32((v4 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1) >>> 0; i += 4;
}
h32 = (rotl32(v1, 1) + rotl32(v2, 7) + rotl32(v3, 12) + rotl32(v4, 18)) >>> 0;
} else {
h32 = (seed + PRIME32_5) >>> 0;
}
h32 = (h32 + len) >>> 0;
while (i <= len - 4) {
h32 = (h32 + imul32(buf.readUInt32LE(i), PRIME32_3)) >>> 0;
h32 = imul32(rotl32(h32, 17), PRIME32_4);
i += 4;
}
while (i < len) {
h32 = (h32 + imul32(buf[i], PRIME32_5)) >>> 0;
h32 = imul32(rotl32(h32, 11), PRIME32_1);
i += 1;
}
h32 = imul32(h32 ^ (h32 >>> 15), PRIME32_2);
h32 = imul32(h32 ^ (h32 >>> 13), PRIME32_3);
h32 = (h32 ^ (h32 >>> 16)) >>> 0;
return h32;
}
describe("xxHash32 native vs JS compatibility", () => {
const testCases = [
["empty string, seed 0", "", 0],
["short string, seed 0", "hello", 0],
["short string, seed 42", "hello", 42],
["medium string, seed 0", "hello world!", 0],
["long string (>16 bytes)", "abcdefghijklmnopqrstuvwxyz", 0],
["whitespace only", " ", 0],
["punctuation", "{}();", 0],
["unicode", "\u{4e16}\u{754c}\u{1f600}", 0],
["empty with nonzero seed", "", 7],
["typical code line", " const x = 42;", 0],
["typical code line with seed", " const x = 42;", 3],
];
for (const [label, input, seed] of testCases) {
it(`matches JS reference: ${label}`, () => {
const expected = jsXxHash32(input, seed);
const actual = xxHash32(input, seed);
assert.equal(
actual,
expected,
`Mismatch for "${input}" seed=${seed}: native=${actual.toString(16)} js=${expected.toString(16)}`
);
});
}
});

View file

@ -91,6 +91,8 @@ export type {
export { parseImage, ImageFormat, SamplingFilter } from "./image/index.js";
export type { NativeImageHandle } from "./image/index.js";
export { xxHash32 } from "./xxhash/index.js";
export { ttsrCompileRules, ttsrCheckBuffer, ttsrFreeRules } from "./ttsr/index.js";
export type { TtsrHandle, TtsrRuleInput } from "./ttsr/index.js";
export {

View file

@ -135,4 +135,5 @@ export const native = loadNative() as {
parseJson: (text: string) => unknown;
parsePartialJson: (text: string) => unknown;
parseStreamingJson: (text: string) => unknown;
xxHash32: (input: string, seed: number) => number;
};

View file

@ -0,0 +1,18 @@
/**
* Native xxHash32 Rust implementation via napi-rs.
*
* Hashes the UTF-8 representation of the input string with the given seed.
*/
import { native } from "../native.js";
/**
* Compute xxHash32 of a UTF-8 string.
*
* @param input The string to hash (encoded as UTF-8 internally).
* @param seed 32-bit seed value.
* @returns 32-bit unsigned hash.
*/
export function xxHash32(input: string, seed: number): number {
return native.xxHash32(input, seed);
}

View file

@ -15,76 +15,7 @@
* Adapted from Oh My Pi's hashline implementation for Node.js (no Bun dependency).
*/
// ═══════════════════════════════════════════════════════════════════════════
// xxHash32 — pure JS implementation (no native dependencies)
// ═══════════════════════════════════════════════════════════════════════════
const PRIME32_1 = 0x9e3779b1;
const PRIME32_2 = 0x85ebca77;
const PRIME32_3 = 0xc2b2ae3d;
const PRIME32_4 = 0x27d4eb2f;
const PRIME32_5 = 0x165667b1;
function rotl32(val: number, bits: number): number {
return ((val << bits) | (val >>> (32 - bits))) >>> 0;
}
function imul32(a: number, b: number): number {
return Math.imul(a, b) >>> 0;
}
/**
* Pure JS xxHash32 operating on a UTF-8 encoded string.
* Matches Bun.hash.xxHash32(str, seed) behavior.
*/
function xxHash32(input: string, seed: number): number {
const buf = Buffer.from(input, "utf-8");
const len = buf.length;
let h32: number;
let i = 0;
if (len >= 16) {
let v1 = (seed + PRIME32_1 + PRIME32_2) >>> 0;
let v2 = (seed + PRIME32_2) >>> 0;
let v3 = (seed + 0) >>> 0;
let v4 = (seed - PRIME32_1) >>> 0;
while (i <= len - 16) {
v1 = (imul32(rotl32((v1 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1)) >>> 0;
i += 4;
v2 = (imul32(rotl32((v2 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1)) >>> 0;
i += 4;
v3 = (imul32(rotl32((v3 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1)) >>> 0;
i += 4;
v4 = (imul32(rotl32((v4 + imul32(buf.readUInt32LE(i), PRIME32_2)) >>> 0, 13), PRIME32_1)) >>> 0;
i += 4;
}
h32 = (rotl32(v1, 1) + rotl32(v2, 7) + rotl32(v3, 12) + rotl32(v4, 18)) >>> 0;
} else {
h32 = (seed + PRIME32_5) >>> 0;
}
h32 = (h32 + len) >>> 0;
while (i <= len - 4) {
h32 = (h32 + imul32(buf.readUInt32LE(i), PRIME32_3)) >>> 0;
h32 = imul32(rotl32(h32, 17), PRIME32_4);
i += 4;
}
while (i < len) {
h32 = (h32 + imul32(buf[i], PRIME32_5)) >>> 0;
h32 = imul32(rotl32(h32, 11), PRIME32_1);
i += 1;
}
h32 = imul32(h32 ^ (h32 >>> 15), PRIME32_2);
h32 = imul32(h32 ^ (h32 >>> 13), PRIME32_3);
h32 = (h32 ^ (h32 >>> 16)) >>> 0;
return h32;
}
import { xxHash32 } from "@gsd/native/xxhash";
// ═══════════════════════════════════════════════════════════════════════════
// Hash Computation