feat: add ANSI-aware text measurement and slicing native module
Port Oh My Pi's optimized text utilities to GSD's native engine: - wrapTextWithAnsi: word-wrap preserving ANSI codes across breaks - truncateToWidth: truncate with ellipsis options - sliceWithWidth: column-range extraction - extractSegments: split around overlay regions - sanitizeText: strip ANSI, remove control chars, normalize CR - visibleWidth: display width excluding ANSI sequences Single-pass ANSI scanning, ASCII fast-path, grapheme-aware Unicode width measurement, and zero-copy input via UTF-16 JsString interop. Includes 19 Rust unit tests and 33 Node.js integration tests. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0d390688e3
commit
b669f9f580
10 changed files with 2014 additions and 2 deletions
15
native/Cargo.lock
generated
15
native/Cargo.lock
generated
|
|
@ -160,6 +160,9 @@ dependencies = [
|
|||
"napi",
|
||||
"napi-build",
|
||||
"napi-derive",
|
||||
"smallvec",
|
||||
"unicode-segmentation",
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -400,6 +403,12 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.15.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.117"
|
||||
|
|
@ -423,6 +432,12 @@ version = "1.12.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.5.0"
|
||||
|
|
|
|||
|
|
@ -14,6 +14,9 @@ crate-type = ["cdylib"]
|
|||
gsd-grep = { path = "../grep" }
|
||||
napi = { version = "2", features = ["napi8"] }
|
||||
napi-derive = "2"
|
||||
smallvec = "1"
|
||||
unicode-segmentation = "1"
|
||||
unicode-width = "0.2"
|
||||
|
||||
[build-dependencies]
|
||||
napi-build = "2"
|
||||
|
|
|
|||
|
|
@ -9,3 +9,4 @@
|
|||
#![allow(clippy::needless_pass_by_value)]
|
||||
|
||||
mod grep;
|
||||
mod text;
|
||||
|
|
|
|||
1536
native/crates/engine/src/text.rs
Normal file
1536
native/crates/engine/src/text.rs
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,14 +1,14 @@
|
|||
{
|
||||
"name": "@gsd/native",
|
||||
"version": "0.1.0",
|
||||
"description": "Native Rust bindings for GSD — high-performance grep via N-API",
|
||||
"description": "Native Rust bindings for GSD — high-performance grep and text utilities via N-API",
|
||||
"type": "module",
|
||||
"main": "./src/index.ts",
|
||||
"types": "./src/index.ts",
|
||||
"scripts": {
|
||||
"build:native": "node ../../native/scripts/build.js",
|
||||
"build:native:dev": "node ../../native/scripts/build.js --dev",
|
||||
"test": "node --test src/__tests__/grep.test.mjs"
|
||||
"test": "node --test src/__tests__/grep.test.mjs src/__tests__/text.test.mjs"
|
||||
},
|
||||
"exports": {
|
||||
".": {
|
||||
|
|
@ -18,6 +18,10 @@
|
|||
"./grep": {
|
||||
"types": "./src/grep/index.ts",
|
||||
"import": "./src/grep/index.ts"
|
||||
},
|
||||
"./text": {
|
||||
"types": "./src/text/index.ts",
|
||||
"import": "./src/text/index.ts"
|
||||
}
|
||||
},
|
||||
"files": [
|
||||
|
|
|
|||
262
packages/native/src/__tests__/text.test.mjs
Normal file
262
packages/native/src/__tests__/text.test.mjs
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
import { test, describe } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { createRequire } from "node:module";
|
||||
import * as path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const require = createRequire(import.meta.url);
|
||||
|
||||
// Load the native addon directly
|
||||
const addonDir = path.resolve(
|
||||
__dirname,
|
||||
"..",
|
||||
"..",
|
||||
"..",
|
||||
"..",
|
||||
"native",
|
||||
"addon",
|
||||
);
|
||||
const platformTag = `${process.platform}-${process.arch}`;
|
||||
const candidates = [
|
||||
path.join(addonDir, `gsd_engine.${platformTag}.node`),
|
||||
path.join(addonDir, "gsd_engine.dev.node"),
|
||||
];
|
||||
|
||||
let native;
|
||||
for (const candidate of candidates) {
|
||||
try {
|
||||
native = require(candidate);
|
||||
break;
|
||||
} catch {
|
||||
// try next
|
||||
}
|
||||
}
|
||||
|
||||
if (!native) {
|
||||
console.error(
|
||||
"Native addon not found. Run `npm run build:native -w @gsd/native` first.",
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// ── visibleWidth ───────────────────────────────────────────────────────
|
||||
|
||||
describe("visibleWidth", () => {
|
||||
test("plain ASCII text", () => {
|
||||
assert.equal(native.visibleWidth("hello"), 5);
|
||||
});
|
||||
|
||||
test("empty string", () => {
|
||||
assert.equal(native.visibleWidth(""), 0);
|
||||
});
|
||||
|
||||
test("ignores ANSI SGR codes", () => {
|
||||
assert.equal(native.visibleWidth("\x1b[31mhello\x1b[0m"), 5);
|
||||
});
|
||||
|
||||
test("ignores 256-color ANSI", () => {
|
||||
assert.equal(native.visibleWidth("\x1b[38;5;196mred\x1b[0m"), 3);
|
||||
});
|
||||
|
||||
test("ignores RGB ANSI", () => {
|
||||
assert.equal(
|
||||
native.visibleWidth("\x1b[38;2;255;128;0morange\x1b[0m"),
|
||||
6,
|
||||
);
|
||||
});
|
||||
|
||||
test("counts tabs with default width", () => {
|
||||
// default tab width = 3
|
||||
assert.equal(native.visibleWidth("a\tb"), 1 + 3 + 1);
|
||||
});
|
||||
|
||||
test("counts tabs with custom width", () => {
|
||||
assert.equal(native.visibleWidth("a\tb", 4), 1 + 4 + 1);
|
||||
});
|
||||
|
||||
test("CJK double-width characters", () => {
|
||||
assert.equal(native.visibleWidth("\u4e16\u754c"), 4); // 世界
|
||||
});
|
||||
|
||||
test("mixed ASCII and CJK", () => {
|
||||
assert.equal(native.visibleWidth("a\u4e16b"), 4); // a + 2 + 1
|
||||
});
|
||||
});
|
||||
|
||||
// ── wrapTextWithAnsi ───────────────────────────────────────────────────
|
||||
|
||||
describe("wrapTextWithAnsi", () => {
|
||||
test("wraps plain text at word boundary", () => {
|
||||
const lines = native.wrapTextWithAnsi("hello world", 5);
|
||||
assert.equal(lines.length, 2);
|
||||
assert.equal(lines[0], "hello");
|
||||
assert.equal(lines[1], "world");
|
||||
});
|
||||
|
||||
test("no wrap needed", () => {
|
||||
const lines = native.wrapTextWithAnsi("hi", 10);
|
||||
assert.equal(lines.length, 1);
|
||||
assert.equal(lines[0], "hi");
|
||||
});
|
||||
|
||||
test("empty string produces one empty line", () => {
|
||||
const lines = native.wrapTextWithAnsi("", 10);
|
||||
assert.equal(lines.length, 1);
|
||||
assert.equal(lines[0], "");
|
||||
});
|
||||
|
||||
test("preserves ANSI color across wrap", () => {
|
||||
const lines = native.wrapTextWithAnsi(
|
||||
"\x1b[38;2;156;163;176mhello world\x1b[0m",
|
||||
5,
|
||||
);
|
||||
assert.equal(lines.length, 2);
|
||||
assert.ok(lines[0].startsWith("\x1b[38;2;156;163;176m"));
|
||||
assert.ok(lines[1].startsWith("\x1b[38;2;156;163;176m"));
|
||||
assert.ok(lines[1].includes("world"));
|
||||
});
|
||||
|
||||
test("handles multiline input (newlines)", () => {
|
||||
const lines = native.wrapTextWithAnsi("line one\nline two", 20);
|
||||
assert.equal(lines.length, 2);
|
||||
assert.equal(lines[0], "line one");
|
||||
assert.equal(lines[1], "line two");
|
||||
});
|
||||
|
||||
test("breaks long words", () => {
|
||||
const lines = native.wrapTextWithAnsi("abcdefghij", 5);
|
||||
assert.equal(lines.length, 2);
|
||||
assert.equal(lines[0], "abcde");
|
||||
assert.equal(lines[1], "fghij");
|
||||
});
|
||||
});
|
||||
|
||||
// ── truncateToWidth ────────────────────────────────────────────────────
|
||||
|
||||
describe("truncateToWidth", () => {
|
||||
test("returns original when fits", () => {
|
||||
const result = native.truncateToWidth("hello", 10, 0, false);
|
||||
assert.equal(result, "hello");
|
||||
});
|
||||
|
||||
test("truncates with unicode ellipsis", () => {
|
||||
const result = native.truncateToWidth("hello world", 6, 0, false);
|
||||
assert.equal(native.visibleWidth(result), 6);
|
||||
assert.ok(result.includes("\u2026"));
|
||||
});
|
||||
|
||||
test("truncates with ASCII ellipsis", () => {
|
||||
const result = native.truncateToWidth("hello world", 8, 1, false);
|
||||
assert.ok(result.includes("..."));
|
||||
});
|
||||
|
||||
test("truncates with no ellipsis", () => {
|
||||
const result = native.truncateToWidth("hello world", 5, 2, false);
|
||||
assert.equal(native.visibleWidth(result), 5);
|
||||
assert.ok(!result.includes("\u2026"));
|
||||
assert.ok(!result.includes("..."));
|
||||
});
|
||||
|
||||
test("pads to width", () => {
|
||||
const result = native.truncateToWidth("hi", 10, 0, true);
|
||||
assert.equal(native.visibleWidth(result), 10);
|
||||
});
|
||||
|
||||
test("preserves ANSI codes and resets on truncation", () => {
|
||||
const input = "\x1b[31mhello world\x1b[0m";
|
||||
const result = native.truncateToWidth(input, 6, 0, false);
|
||||
// Should contain the red code and a reset before ellipsis
|
||||
assert.ok(result.includes("\x1b[31m"));
|
||||
assert.ok(result.includes("\x1b[0m"));
|
||||
});
|
||||
});
|
||||
|
||||
// ── sliceWithWidth ─────────────────────────────────────────────────────
|
||||
|
||||
describe("sliceWithWidth", () => {
|
||||
test("slices from start", () => {
|
||||
const result = native.sliceWithWidth("hello world", 0, 5, false);
|
||||
assert.equal(result.text, "hello");
|
||||
assert.equal(result.width, 5);
|
||||
});
|
||||
|
||||
test("slices from middle", () => {
|
||||
const result = native.sliceWithWidth("hello world", 6, 5, false);
|
||||
assert.equal(result.text, "world");
|
||||
assert.equal(result.width, 5);
|
||||
});
|
||||
|
||||
test("preserves ANSI codes in slice", () => {
|
||||
const result = native.sliceWithWidth(
|
||||
"\x1b[31mhello\x1b[0m world",
|
||||
0,
|
||||
5,
|
||||
false,
|
||||
);
|
||||
assert.equal(result.text, "\x1b[31mhello\x1b[0m");
|
||||
assert.equal(result.width, 5);
|
||||
});
|
||||
|
||||
test("empty slice", () => {
|
||||
const result = native.sliceWithWidth("hello", 0, 0, false);
|
||||
assert.equal(result.text, "");
|
||||
assert.equal(result.width, 0);
|
||||
});
|
||||
|
||||
test("beyond string length", () => {
|
||||
const result = native.sliceWithWidth("hi", 0, 100, false);
|
||||
assert.equal(result.text, "hi");
|
||||
assert.equal(result.width, 2);
|
||||
});
|
||||
});
|
||||
|
||||
// ── extractSegments ────────────────────────────────────────────────────
|
||||
|
||||
describe("extractSegments", () => {
|
||||
test("extracts before and after segments", () => {
|
||||
const result = native.extractSegments(
|
||||
"hello world test",
|
||||
5,
|
||||
6,
|
||||
5,
|
||||
false,
|
||||
);
|
||||
assert.equal(result.before, "hello");
|
||||
assert.equal(result.beforeWidth, 5);
|
||||
assert.equal(result.after, "world");
|
||||
assert.equal(result.afterWidth, 5);
|
||||
});
|
||||
|
||||
test("handles no after segment", () => {
|
||||
const result = native.extractSegments("hello world", 5, 0, 0, false);
|
||||
assert.equal(result.before, "hello");
|
||||
assert.equal(result.beforeWidth, 5);
|
||||
assert.equal(result.after, "");
|
||||
assert.equal(result.afterWidth, 0);
|
||||
});
|
||||
});
|
||||
|
||||
// ── sanitizeText ───────────────────────────────────────────────────────
|
||||
|
||||
describe("sanitizeText", () => {
|
||||
test("strips ANSI codes", () => {
|
||||
assert.equal(native.sanitizeText("\x1b[31mhello\x1b[0m"), "hello");
|
||||
});
|
||||
|
||||
test("returns original when clean", () => {
|
||||
assert.equal(native.sanitizeText("hello"), "hello");
|
||||
});
|
||||
|
||||
test("removes control characters", () => {
|
||||
assert.equal(native.sanitizeText("he\x01llo"), "hello");
|
||||
});
|
||||
|
||||
test("preserves tabs and newlines", () => {
|
||||
assert.equal(native.sanitizeText("a\tb\nc"), "a\tb\nc");
|
||||
});
|
||||
|
||||
test("normalizes CR", () => {
|
||||
assert.equal(native.sanitizeText("hello\r\nworld"), "hello\nworld");
|
||||
});
|
||||
});
|
||||
|
|
@ -3,6 +3,7 @@
|
|||
*
|
||||
* Modules:
|
||||
* - grep: ripgrep-backed regex search (content + filesystem)
|
||||
* - text: ANSI-aware text measurement and slicing
|
||||
*/
|
||||
|
||||
export { searchContent, grep } from "./grep/index.js";
|
||||
|
|
@ -15,3 +16,14 @@ export type {
|
|||
SearchOptions,
|
||||
SearchResult,
|
||||
} from "./grep/index.js";
|
||||
|
||||
export {
|
||||
wrapTextWithAnsi,
|
||||
truncateToWidth,
|
||||
sliceWithWidth,
|
||||
extractSegments,
|
||||
sanitizeText,
|
||||
visibleWidth,
|
||||
EllipsisKind,
|
||||
} from "./text/index.js";
|
||||
export type { SliceResult, ExtractSegmentsResult } from "./text/index.js";
|
||||
|
|
|
|||
|
|
@ -43,4 +43,29 @@ function loadNative(): Record<string, unknown> {
|
|||
export const native = loadNative() as {
|
||||
search: (content: Buffer | Uint8Array, options: unknown) => unknown;
|
||||
grep: (options: unknown) => unknown;
|
||||
wrapTextWithAnsi: (text: string, width: number, tabWidth?: number) => string[];
|
||||
truncateToWidth: (
|
||||
text: string,
|
||||
maxWidth: number,
|
||||
ellipsisKind: number,
|
||||
pad: boolean,
|
||||
tabWidth?: number,
|
||||
) => string;
|
||||
sliceWithWidth: (
|
||||
line: string,
|
||||
startCol: number,
|
||||
length: number,
|
||||
strict: boolean,
|
||||
tabWidth?: number,
|
||||
) => unknown;
|
||||
extractSegments: (
|
||||
line: string,
|
||||
beforeEnd: number,
|
||||
afterStart: number,
|
||||
afterLen: number,
|
||||
strictAfter: boolean,
|
||||
tabWidth?: number,
|
||||
) => unknown;
|
||||
sanitizeText: (text: string) => string;
|
||||
visibleWidth: (text: string, tabWidth?: number) => number;
|
||||
};
|
||||
|
|
|
|||
125
packages/native/src/text/index.ts
Normal file
125
packages/native/src/text/index.ts
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
/**
|
||||
* ANSI-aware text measurement and slicing.
|
||||
*
|
||||
* High-performance UTF-16 native implementation with ASCII fast-paths,
|
||||
* single-pass ANSI scanning, and proper Unicode grapheme cluster support.
|
||||
*/
|
||||
|
||||
import { native } from "../native.js";
|
||||
import type { ExtractSegmentsResult, SliceResult } from "./types.js";
|
||||
|
||||
export type { ExtractSegmentsResult, SliceResult };
|
||||
export { EllipsisKind } from "./types.js";
|
||||
|
||||
/**
|
||||
* Word-wrap text to a visible width, preserving ANSI escape codes across
|
||||
* line breaks.
|
||||
*
|
||||
* Active SGR codes (colors, bold, etc.) are carried to continuation lines.
|
||||
* Underline and strikethrough are reset at line ends and restored on the
|
||||
* next line.
|
||||
*/
|
||||
export function wrapTextWithAnsi(
|
||||
text: string,
|
||||
width: number,
|
||||
tabWidth?: number,
|
||||
): string[] {
|
||||
return (native as Record<string, Function>).wrapTextWithAnsi(
|
||||
text,
|
||||
width,
|
||||
tabWidth,
|
||||
) as string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate text to a visible width with an optional ellipsis.
|
||||
*
|
||||
* @param text Input string (may contain ANSI codes).
|
||||
* @param maxWidth Maximum visible width in terminal cells.
|
||||
* @param ellipsisKind 0 = "\u2026", 1 = "...", 2 = none.
|
||||
* @param pad When true, pad with spaces to exactly `maxWidth`.
|
||||
* @param tabWidth Tab stop width (default 3, range 1-16).
|
||||
*/
|
||||
export function truncateToWidth(
|
||||
text: string,
|
||||
maxWidth: number,
|
||||
ellipsisKind: number,
|
||||
pad: boolean,
|
||||
tabWidth?: number,
|
||||
): string {
|
||||
return (native as Record<string, Function>).truncateToWidth(
|
||||
text,
|
||||
maxWidth,
|
||||
ellipsisKind,
|
||||
pad,
|
||||
tabWidth,
|
||||
) as string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Slice a range of visible columns from a line.
|
||||
*
|
||||
* Counts terminal cells (skipping ANSI escapes). When `strict` is true,
|
||||
* wide characters that would exceed the range are excluded.
|
||||
*/
|
||||
export function sliceWithWidth(
|
||||
line: string,
|
||||
startCol: number,
|
||||
length: number,
|
||||
strict: boolean,
|
||||
tabWidth?: number,
|
||||
): SliceResult {
|
||||
return (native as Record<string, Function>).sliceWithWidth(
|
||||
line,
|
||||
startCol,
|
||||
length,
|
||||
strict,
|
||||
tabWidth,
|
||||
) as SliceResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the before/after segments around an overlay region.
|
||||
*
|
||||
* ANSI state is tracked so the `after` segment renders correctly even when
|
||||
* the overlay truncates styled text.
|
||||
*/
|
||||
export function extractSegments(
|
||||
line: string,
|
||||
beforeEnd: number,
|
||||
afterStart: number,
|
||||
afterLen: number,
|
||||
strictAfter: boolean,
|
||||
tabWidth?: number,
|
||||
): ExtractSegmentsResult {
|
||||
return (native as Record<string, Function>).extractSegments(
|
||||
line,
|
||||
beforeEnd,
|
||||
afterStart,
|
||||
afterLen,
|
||||
strictAfter,
|
||||
tabWidth,
|
||||
) as ExtractSegmentsResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip ANSI escape sequences, remove control characters and lone
|
||||
* surrogates, and normalize line endings (CR removed).
|
||||
*
|
||||
* Returns the original string when no changes are needed (zero-copy).
|
||||
*/
|
||||
export function sanitizeText(text: string): string {
|
||||
return (native as Record<string, Function>).sanitizeText(text) as string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate visible width of text excluding ANSI escape sequences.
|
||||
*
|
||||
* Tabs count as `tabWidth` cells (default 3).
|
||||
*/
|
||||
export function visibleWidth(text: string, tabWidth?: number): number {
|
||||
return (native as Record<string, Function>).visibleWidth(
|
||||
text,
|
||||
tabWidth,
|
||||
) as number;
|
||||
}
|
||||
29
packages/native/src/text/types.ts
Normal file
29
packages/native/src/text/types.ts
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
/** Result of slicing a line by visible column range. */
|
||||
export interface SliceResult {
|
||||
/** The extracted text (may include ANSI codes). */
|
||||
text: string;
|
||||
/** Visible width of the extracted slice in terminal cells. */
|
||||
width: number;
|
||||
}
|
||||
|
||||
/** Result of extracting before/after segments around an overlay. */
|
||||
export interface ExtractSegmentsResult {
|
||||
/** Text content before the overlay region. */
|
||||
before: string;
|
||||
/** Visible width of the `before` segment. */
|
||||
beforeWidth: number;
|
||||
/** Text content after the overlay region. */
|
||||
after: string;
|
||||
/** Visible width of the `after` segment. */
|
||||
afterWidth: number;
|
||||
}
|
||||
|
||||
/** Ellipsis style for truncation. */
|
||||
export enum EllipsisKind {
|
||||
/** Unicode ellipsis character: \u2026 (width 1) */
|
||||
Unicode = 0,
|
||||
/** ASCII ellipsis: "..." (width 3) */
|
||||
Ascii = 1,
|
||||
/** No ellipsis (hard truncate) */
|
||||
None = 2,
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue