feat: add ANSI-aware text measurement and slicing native module

Port Oh My Pi's optimized text utilities to GSD's native engine:
- wrapTextWithAnsi: word-wrap preserving ANSI codes across breaks
- truncateToWidth: truncate with ellipsis options
- sliceWithWidth: column-range extraction
- extractSegments: split around overlay regions
- sanitizeText: strip ANSI, remove control chars, normalize CR
- visibleWidth: display width excluding ANSI sequences

Single-pass ANSI scanning, ASCII fast-path, grapheme-aware Unicode
width measurement, and zero-copy input via UTF-16 JsString interop.

Includes 19 Rust unit tests and 33 Node.js integration tests.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Lex Christopherson 2026-03-13 12:42:42 -06:00
parent 0d390688e3
commit b669f9f580
10 changed files with 2014 additions and 2 deletions

15
native/Cargo.lock generated
View file

@ -160,6 +160,9 @@ dependencies = [
"napi",
"napi-build",
"napi-derive",
"smallvec",
"unicode-segmentation",
"unicode-width",
]
[[package]]
@ -400,6 +403,12 @@ dependencies = [
"syn",
]
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "syn"
version = "2.0.117"
@ -423,6 +432,12 @@ version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "unicode-width"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
[[package]]
name = "walkdir"
version = "2.5.0"

View file

@ -14,6 +14,9 @@ crate-type = ["cdylib"]
gsd-grep = { path = "../grep" }
napi = { version = "2", features = ["napi8"] }
napi-derive = "2"
smallvec = "1"
unicode-segmentation = "1"
unicode-width = "0.2"
[build-dependencies]
napi-build = "2"

View file

@ -9,3 +9,4 @@
#![allow(clippy::needless_pass_by_value)]
mod grep;
mod text;

File diff suppressed because it is too large Load diff

View file

@ -1,14 +1,14 @@
{
"name": "@gsd/native",
"version": "0.1.0",
"description": "Native Rust bindings for GSD — high-performance grep via N-API",
"description": "Native Rust bindings for GSD — high-performance grep and text utilities via N-API",
"type": "module",
"main": "./src/index.ts",
"types": "./src/index.ts",
"scripts": {
"build:native": "node ../../native/scripts/build.js",
"build:native:dev": "node ../../native/scripts/build.js --dev",
"test": "node --test src/__tests__/grep.test.mjs"
"test": "node --test src/__tests__/grep.test.mjs src/__tests__/text.test.mjs"
},
"exports": {
".": {
@ -18,6 +18,10 @@
"./grep": {
"types": "./src/grep/index.ts",
"import": "./src/grep/index.ts"
},
"./text": {
"types": "./src/text/index.ts",
"import": "./src/text/index.ts"
}
},
"files": [

View file

@ -0,0 +1,262 @@
import { test, describe } from "node:test";
import assert from "node:assert/strict";
import { createRequire } from "node:module";
import * as path from "node:path";
import { fileURLToPath } from "node:url";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const require = createRequire(import.meta.url);
// Load the native addon directly
const addonDir = path.resolve(
__dirname,
"..",
"..",
"..",
"..",
"native",
"addon",
);
const platformTag = `${process.platform}-${process.arch}`;
const candidates = [
path.join(addonDir, `gsd_engine.${platformTag}.node`),
path.join(addonDir, "gsd_engine.dev.node"),
];
let native;
for (const candidate of candidates) {
try {
native = require(candidate);
break;
} catch {
// try next
}
}
if (!native) {
console.error(
"Native addon not found. Run `npm run build:native -w @gsd/native` first.",
);
process.exit(1);
}
// ── visibleWidth ───────────────────────────────────────────────────────
describe("visibleWidth", () => {
test("plain ASCII text", () => {
assert.equal(native.visibleWidth("hello"), 5);
});
test("empty string", () => {
assert.equal(native.visibleWidth(""), 0);
});
test("ignores ANSI SGR codes", () => {
assert.equal(native.visibleWidth("\x1b[31mhello\x1b[0m"), 5);
});
test("ignores 256-color ANSI", () => {
assert.equal(native.visibleWidth("\x1b[38;5;196mred\x1b[0m"), 3);
});
test("ignores RGB ANSI", () => {
assert.equal(
native.visibleWidth("\x1b[38;2;255;128;0morange\x1b[0m"),
6,
);
});
test("counts tabs with default width", () => {
// default tab width = 3
assert.equal(native.visibleWidth("a\tb"), 1 + 3 + 1);
});
test("counts tabs with custom width", () => {
assert.equal(native.visibleWidth("a\tb", 4), 1 + 4 + 1);
});
test("CJK double-width characters", () => {
assert.equal(native.visibleWidth("\u4e16\u754c"), 4); // 世界
});
test("mixed ASCII and CJK", () => {
assert.equal(native.visibleWidth("a\u4e16b"), 4); // a + 2 + 1
});
});
// ── wrapTextWithAnsi ───────────────────────────────────────────────────
describe("wrapTextWithAnsi", () => {
test("wraps plain text at word boundary", () => {
const lines = native.wrapTextWithAnsi("hello world", 5);
assert.equal(lines.length, 2);
assert.equal(lines[0], "hello");
assert.equal(lines[1], "world");
});
test("no wrap needed", () => {
const lines = native.wrapTextWithAnsi("hi", 10);
assert.equal(lines.length, 1);
assert.equal(lines[0], "hi");
});
test("empty string produces one empty line", () => {
const lines = native.wrapTextWithAnsi("", 10);
assert.equal(lines.length, 1);
assert.equal(lines[0], "");
});
test("preserves ANSI color across wrap", () => {
const lines = native.wrapTextWithAnsi(
"\x1b[38;2;156;163;176mhello world\x1b[0m",
5,
);
assert.equal(lines.length, 2);
assert.ok(lines[0].startsWith("\x1b[38;2;156;163;176m"));
assert.ok(lines[1].startsWith("\x1b[38;2;156;163;176m"));
assert.ok(lines[1].includes("world"));
});
test("handles multiline input (newlines)", () => {
const lines = native.wrapTextWithAnsi("line one\nline two", 20);
assert.equal(lines.length, 2);
assert.equal(lines[0], "line one");
assert.equal(lines[1], "line two");
});
test("breaks long words", () => {
const lines = native.wrapTextWithAnsi("abcdefghij", 5);
assert.equal(lines.length, 2);
assert.equal(lines[0], "abcde");
assert.equal(lines[1], "fghij");
});
});
// ── truncateToWidth ────────────────────────────────────────────────────
describe("truncateToWidth", () => {
test("returns original when fits", () => {
const result = native.truncateToWidth("hello", 10, 0, false);
assert.equal(result, "hello");
});
test("truncates with unicode ellipsis", () => {
const result = native.truncateToWidth("hello world", 6, 0, false);
assert.equal(native.visibleWidth(result), 6);
assert.ok(result.includes("\u2026"));
});
test("truncates with ASCII ellipsis", () => {
const result = native.truncateToWidth("hello world", 8, 1, false);
assert.ok(result.includes("..."));
});
test("truncates with no ellipsis", () => {
const result = native.truncateToWidth("hello world", 5, 2, false);
assert.equal(native.visibleWidth(result), 5);
assert.ok(!result.includes("\u2026"));
assert.ok(!result.includes("..."));
});
test("pads to width", () => {
const result = native.truncateToWidth("hi", 10, 0, true);
assert.equal(native.visibleWidth(result), 10);
});
test("preserves ANSI codes and resets on truncation", () => {
const input = "\x1b[31mhello world\x1b[0m";
const result = native.truncateToWidth(input, 6, 0, false);
// Should contain the red code and a reset before ellipsis
assert.ok(result.includes("\x1b[31m"));
assert.ok(result.includes("\x1b[0m"));
});
});
// ── sliceWithWidth ─────────────────────────────────────────────────────
describe("sliceWithWidth", () => {
test("slices from start", () => {
const result = native.sliceWithWidth("hello world", 0, 5, false);
assert.equal(result.text, "hello");
assert.equal(result.width, 5);
});
test("slices from middle", () => {
const result = native.sliceWithWidth("hello world", 6, 5, false);
assert.equal(result.text, "world");
assert.equal(result.width, 5);
});
test("preserves ANSI codes in slice", () => {
const result = native.sliceWithWidth(
"\x1b[31mhello\x1b[0m world",
0,
5,
false,
);
assert.equal(result.text, "\x1b[31mhello\x1b[0m");
assert.equal(result.width, 5);
});
test("empty slice", () => {
const result = native.sliceWithWidth("hello", 0, 0, false);
assert.equal(result.text, "");
assert.equal(result.width, 0);
});
test("beyond string length", () => {
const result = native.sliceWithWidth("hi", 0, 100, false);
assert.equal(result.text, "hi");
assert.equal(result.width, 2);
});
});
// ── extractSegments ────────────────────────────────────────────────────
describe("extractSegments", () => {
test("extracts before and after segments", () => {
const result = native.extractSegments(
"hello world test",
5,
6,
5,
false,
);
assert.equal(result.before, "hello");
assert.equal(result.beforeWidth, 5);
assert.equal(result.after, "world");
assert.equal(result.afterWidth, 5);
});
test("handles no after segment", () => {
const result = native.extractSegments("hello world", 5, 0, 0, false);
assert.equal(result.before, "hello");
assert.equal(result.beforeWidth, 5);
assert.equal(result.after, "");
assert.equal(result.afterWidth, 0);
});
});
// ── sanitizeText ───────────────────────────────────────────────────────
describe("sanitizeText", () => {
test("strips ANSI codes", () => {
assert.equal(native.sanitizeText("\x1b[31mhello\x1b[0m"), "hello");
});
test("returns original when clean", () => {
assert.equal(native.sanitizeText("hello"), "hello");
});
test("removes control characters", () => {
assert.equal(native.sanitizeText("he\x01llo"), "hello");
});
test("preserves tabs and newlines", () => {
assert.equal(native.sanitizeText("a\tb\nc"), "a\tb\nc");
});
test("normalizes CR", () => {
assert.equal(native.sanitizeText("hello\r\nworld"), "hello\nworld");
});
});

View file

@ -3,6 +3,7 @@
*
* Modules:
* - grep: ripgrep-backed regex search (content + filesystem)
* - text: ANSI-aware text measurement and slicing
*/
export { searchContent, grep } from "./grep/index.js";
@ -15,3 +16,14 @@ export type {
SearchOptions,
SearchResult,
} from "./grep/index.js";
export {
wrapTextWithAnsi,
truncateToWidth,
sliceWithWidth,
extractSegments,
sanitizeText,
visibleWidth,
EllipsisKind,
} from "./text/index.js";
export type { SliceResult, ExtractSegmentsResult } from "./text/index.js";

View file

@ -43,4 +43,29 @@ function loadNative(): Record<string, unknown> {
export const native = loadNative() as {
search: (content: Buffer | Uint8Array, options: unknown) => unknown;
grep: (options: unknown) => unknown;
wrapTextWithAnsi: (text: string, width: number, tabWidth?: number) => string[];
truncateToWidth: (
text: string,
maxWidth: number,
ellipsisKind: number,
pad: boolean,
tabWidth?: number,
) => string;
sliceWithWidth: (
line: string,
startCol: number,
length: number,
strict: boolean,
tabWidth?: number,
) => unknown;
extractSegments: (
line: string,
beforeEnd: number,
afterStart: number,
afterLen: number,
strictAfter: boolean,
tabWidth?: number,
) => unknown;
sanitizeText: (text: string) => string;
visibleWidth: (text: string, tabWidth?: number) => number;
};

View file

@ -0,0 +1,125 @@
/**
* ANSI-aware text measurement and slicing.
*
* High-performance UTF-16 native implementation with ASCII fast-paths,
* single-pass ANSI scanning, and proper Unicode grapheme cluster support.
*/
import { native } from "../native.js";
import type { ExtractSegmentsResult, SliceResult } from "./types.js";
export type { ExtractSegmentsResult, SliceResult };
export { EllipsisKind } from "./types.js";
/**
* Word-wrap text to a visible width, preserving ANSI escape codes across
* line breaks.
*
* Active SGR codes (colors, bold, etc.) are carried to continuation lines.
* Underline and strikethrough are reset at line ends and restored on the
* next line.
*/
export function wrapTextWithAnsi(
text: string,
width: number,
tabWidth?: number,
): string[] {
return (native as Record<string, Function>).wrapTextWithAnsi(
text,
width,
tabWidth,
) as string[];
}
/**
* Truncate text to a visible width with an optional ellipsis.
*
* @param text Input string (may contain ANSI codes).
* @param maxWidth Maximum visible width in terminal cells.
* @param ellipsisKind 0 = "\u2026", 1 = "...", 2 = none.
* @param pad When true, pad with spaces to exactly `maxWidth`.
* @param tabWidth Tab stop width (default 3, range 1-16).
*/
export function truncateToWidth(
text: string,
maxWidth: number,
ellipsisKind: number,
pad: boolean,
tabWidth?: number,
): string {
return (native as Record<string, Function>).truncateToWidth(
text,
maxWidth,
ellipsisKind,
pad,
tabWidth,
) as string;
}
/**
* Slice a range of visible columns from a line.
*
* Counts terminal cells (skipping ANSI escapes). When `strict` is true,
* wide characters that would exceed the range are excluded.
*/
export function sliceWithWidth(
line: string,
startCol: number,
length: number,
strict: boolean,
tabWidth?: number,
): SliceResult {
return (native as Record<string, Function>).sliceWithWidth(
line,
startCol,
length,
strict,
tabWidth,
) as SliceResult;
}
/**
* Extract the before/after segments around an overlay region.
*
* ANSI state is tracked so the `after` segment renders correctly even when
* the overlay truncates styled text.
*/
export function extractSegments(
line: string,
beforeEnd: number,
afterStart: number,
afterLen: number,
strictAfter: boolean,
tabWidth?: number,
): ExtractSegmentsResult {
return (native as Record<string, Function>).extractSegments(
line,
beforeEnd,
afterStart,
afterLen,
strictAfter,
tabWidth,
) as ExtractSegmentsResult;
}
/**
* Strip ANSI escape sequences, remove control characters and lone
* surrogates, and normalize line endings (CR removed).
*
* Returns the original string when no changes are needed (zero-copy).
*/
export function sanitizeText(text: string): string {
return (native as Record<string, Function>).sanitizeText(text) as string;
}
/**
* Calculate visible width of text excluding ANSI escape sequences.
*
* Tabs count as `tabWidth` cells (default 3).
*/
export function visibleWidth(text: string, tabWidth?: number): number {
return (native as Record<string, Function>).visibleWidth(
text,
tabWidth,
) as number;
}

View file

@ -0,0 +1,29 @@
/** Result of slicing a line by visible column range. */
export interface SliceResult {
/** The extracted text (may include ANSI codes). */
text: string;
/** Visible width of the extracted slice in terminal cells. */
width: number;
}
/** Result of extracting before/after segments around an overlay. */
export interface ExtractSegmentsResult {
/** Text content before the overlay region. */
before: string;
/** Visible width of the `before` segment. */
beforeWidth: number;
/** Text content after the overlay region. */
after: string;
/** Visible width of the `after` segment. */
afterWidth: number;
}
/** Ellipsis style for truncation. */
export enum EllipsisKind {
/** Unicode ellipsis character: \u2026 (width 1) */
Unicode = 0,
/** ASCII ellipsis: "..." (width 3) */
Ascii = 1,
/** No ellipsis (hard truncate) */
None = 2,
}