feat: add ANSI-aware text measurement and slicing native module

Port Oh My Pi's optimized text utilities to GSD's native engine: - wrapTextWithAnsi: word-wrap preserving ANSI codes across breaks - truncateToWidth: truncate with ellipsis options - sliceWithWidth: column-range extraction - extractSegments: split around overlay regions - sanitizeText: strip ANSI, remove control chars, normalize CR - visibleWidth: display width excluding ANSI sequences Single-pass ANSI scanning, ASCII fast-path, grapheme-aware Unicode width measurement, and zero-copy input via UTF-16 JsString interop. Includes 19 Rust unit tests and 33 Node.js integration tests. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-13 12:42:42 -06:00 · 2026-03-13 12:42:42 -06:00 · b669f9f580
commit b669f9f580
parent 0d390688e3
10 changed files with 2014 additions and 2 deletions
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@ -160,6 +160,9 @@ dependencies = [
 "napi",
 "napi-build",
 "napi-derive",
+ "smallvec",
+ "unicode-segmentation",
+ "unicode-width",
 ]

 [[package]]
@ -400,6 +403,12 @@ dependencies = [
 "syn",
 ]

+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
 [[package]]
 name = "syn"
 version = "2.0.117"
@ -423,6 +432,12 @@ version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"

+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
 [[package]]
 name = "walkdir"
 version = "2.5.0"
--- a/native/crates/engine/Cargo.toml
+++ b/native/crates/engine/Cargo.toml
@ -14,6 +14,9 @@ crate-type = ["cdylib"]
 gsd-grep = { path = "../grep" }
 napi = { version = "2", features = ["napi8"] }
 napi-derive = "2"
+smallvec = "1"
+unicode-segmentation = "1"
+unicode-width = "0.2"

 [build-dependencies]
 napi-build = "2"
--- a/native/crates/engine/src/lib.rs
+++ b/native/crates/engine/src/lib.rs
@ -9,3 +9,4 @@
 #![allow(clippy::needless_pass_by_value)]

 mod grep;
+mod text;
--- a/native/crates/engine/src/text.rs
+++ b/native/crates/engine/src/text.rs
--- a/packages/native/package.json
+++ b/packages/native/package.json
@ -1,14 +1,14 @@
 {
  "name": "@gsd/native",
  "version": "0.1.0",
-  "description": "Native Rust bindings for GSD — high-performance grep via N-API",
+  "description": "Native Rust bindings for GSD — high-performance grep and text utilities via N-API",
  "type": "module",
  "main": "./src/index.ts",
  "types": "./src/index.ts",
  "scripts": {
    "build:native": "node ../../native/scripts/build.js",
    "build:native:dev": "node ../../native/scripts/build.js --dev",
-    "test": "node --test src/__tests__/grep.test.mjs"
+    "test": "node --test src/__tests__/grep.test.mjs src/__tests__/text.test.mjs"
  },
  "exports": {
    ".": {
@ -18,6 +18,10 @@
    "./grep": {
      "types": "./src/grep/index.ts",
      "import": "./src/grep/index.ts"
+    },
+    "./text": {
+      "types": "./src/text/index.ts",
+      "import": "./src/text/index.ts"
    }
  },
  "files": [
--- a/packages/native/src/tests/text.test.mjs
+++ b/packages/native/src/tests/text.test.mjs
@ -0,0 +1,262 @@
+import { test, describe } from "node:test";
+import assert from "node:assert/strict";
+import { createRequire } from "node:module";
+import * as path from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const require = createRequire(import.meta.url);
+
+// Load the native addon directly
+const addonDir = path.resolve(
+  __dirname,
+  "..",
+  "..",
+  "..",
+  "..",
+  "native",
+  "addon",
+);
+const platformTag = `${process.platform}-${process.arch}`;
+const candidates = [
+  path.join(addonDir, `gsd_engine.${platformTag}.node`),
+  path.join(addonDir, "gsd_engine.dev.node"),
+];
+
+let native;
+for (const candidate of candidates) {
+  try {
+    native = require(candidate);
+    break;
+  } catch {
+    // try next
+  }
+}
+
+if (!native) {
+  console.error(
+    "Native addon not found. Run `npm run build:native -w @gsd/native` first.",
+  );
+  process.exit(1);
+}
+
+// ── visibleWidth ───────────────────────────────────────────────────────
+
+describe("visibleWidth", () => {
+  test("plain ASCII text", () => {
+    assert.equal(native.visibleWidth("hello"), 5);
+  });
+
+  test("empty string", () => {
+    assert.equal(native.visibleWidth(""), 0);
+  });
+
+  test("ignores ANSI SGR codes", () => {
+    assert.equal(native.visibleWidth("\x1b[31mhello\x1b[0m"), 5);
+  });
+
+  test("ignores 256-color ANSI", () => {
+    assert.equal(native.visibleWidth("\x1b[38;5;196mred\x1b[0m"), 3);
+  });
+
+  test("ignores RGB ANSI", () => {
+    assert.equal(
+      native.visibleWidth("\x1b[38;2;255;128;0morange\x1b[0m"),
+      6,
+    );
+  });
+
+  test("counts tabs with default width", () => {
+    // default tab width = 3
+    assert.equal(native.visibleWidth("a\tb"), 1 + 3 + 1);
+  });
+
+  test("counts tabs with custom width", () => {
+    assert.equal(native.visibleWidth("a\tb", 4), 1 + 4 + 1);
+  });
+
+  test("CJK double-width characters", () => {
+    assert.equal(native.visibleWidth("\u4e16\u754c"), 4); // 世界
+  });
+
+  test("mixed ASCII and CJK", () => {
+    assert.equal(native.visibleWidth("a\u4e16b"), 4); // a + 2 + 1
+  });
+});
+
+// ── wrapTextWithAnsi ───────────────────────────────────────────────────
+
+describe("wrapTextWithAnsi", () => {
+  test("wraps plain text at word boundary", () => {
+    const lines = native.wrapTextWithAnsi("hello world", 5);
+    assert.equal(lines.length, 2);
+    assert.equal(lines[0], "hello");
+    assert.equal(lines[1], "world");
+  });
+
+  test("no wrap needed", () => {
+    const lines = native.wrapTextWithAnsi("hi", 10);
+    assert.equal(lines.length, 1);
+    assert.equal(lines[0], "hi");
+  });
+
+  test("empty string produces one empty line", () => {
+    const lines = native.wrapTextWithAnsi("", 10);
+    assert.equal(lines.length, 1);
+    assert.equal(lines[0], "");
+  });
+
+  test("preserves ANSI color across wrap", () => {
+    const lines = native.wrapTextWithAnsi(
+      "\x1b[38;2;156;163;176mhello world\x1b[0m",
+      5,
+    );
+    assert.equal(lines.length, 2);
+    assert.ok(lines[0].startsWith("\x1b[38;2;156;163;176m"));
+    assert.ok(lines[1].startsWith("\x1b[38;2;156;163;176m"));
+    assert.ok(lines[1].includes("world"));
+  });
+
+  test("handles multiline input (newlines)", () => {
+    const lines = native.wrapTextWithAnsi("line one\nline two", 20);
+    assert.equal(lines.length, 2);
+    assert.equal(lines[0], "line one");
+    assert.equal(lines[1], "line two");
+  });
+
+  test("breaks long words", () => {
+    const lines = native.wrapTextWithAnsi("abcdefghij", 5);
+    assert.equal(lines.length, 2);
+    assert.equal(lines[0], "abcde");
+    assert.equal(lines[1], "fghij");
+  });
+});
+
+// ── truncateToWidth ────────────────────────────────────────────────────
+
+describe("truncateToWidth", () => {
+  test("returns original when fits", () => {
+    const result = native.truncateToWidth("hello", 10, 0, false);
+    assert.equal(result, "hello");
+  });
+
+  test("truncates with unicode ellipsis", () => {
+    const result = native.truncateToWidth("hello world", 6, 0, false);
+    assert.equal(native.visibleWidth(result), 6);
+    assert.ok(result.includes("\u2026"));
+  });
+
+  test("truncates with ASCII ellipsis", () => {
+    const result = native.truncateToWidth("hello world", 8, 1, false);
+    assert.ok(result.includes("..."));
+  });
+
+  test("truncates with no ellipsis", () => {
+    const result = native.truncateToWidth("hello world", 5, 2, false);
+    assert.equal(native.visibleWidth(result), 5);
+    assert.ok(!result.includes("\u2026"));
+    assert.ok(!result.includes("..."));
+  });
+
+  test("pads to width", () => {
+    const result = native.truncateToWidth("hi", 10, 0, true);
+    assert.equal(native.visibleWidth(result), 10);
+  });
+
+  test("preserves ANSI codes and resets on truncation", () => {
+    const input = "\x1b[31mhello world\x1b[0m";
+    const result = native.truncateToWidth(input, 6, 0, false);
+    // Should contain the red code and a reset before ellipsis
+    assert.ok(result.includes("\x1b[31m"));
+    assert.ok(result.includes("\x1b[0m"));
+  });
+});
+
+// ── sliceWithWidth ─────────────────────────────────────────────────────
+
+describe("sliceWithWidth", () => {
+  test("slices from start", () => {
+    const result = native.sliceWithWidth("hello world", 0, 5, false);
+    assert.equal(result.text, "hello");
+    assert.equal(result.width, 5);
+  });
+
+  test("slices from middle", () => {
+    const result = native.sliceWithWidth("hello world", 6, 5, false);
+    assert.equal(result.text, "world");
+    assert.equal(result.width, 5);
+  });
+
+  test("preserves ANSI codes in slice", () => {
+    const result = native.sliceWithWidth(
+      "\x1b[31mhello\x1b[0m world",
+      0,
+      5,
+      false,
+    );
+    assert.equal(result.text, "\x1b[31mhello\x1b[0m");
+    assert.equal(result.width, 5);
+  });
+
+  test("empty slice", () => {
+    const result = native.sliceWithWidth("hello", 0, 0, false);
+    assert.equal(result.text, "");
+    assert.equal(result.width, 0);
+  });
+
+  test("beyond string length", () => {
+    const result = native.sliceWithWidth("hi", 0, 100, false);
+    assert.equal(result.text, "hi");
+    assert.equal(result.width, 2);
+  });
+});
+
+// ── extractSegments ────────────────────────────────────────────────────
+
+describe("extractSegments", () => {
+  test("extracts before and after segments", () => {
+    const result = native.extractSegments(
+      "hello world test",
+      5,
+      6,
+      5,
+      false,
+    );
+    assert.equal(result.before, "hello");
+    assert.equal(result.beforeWidth, 5);
+    assert.equal(result.after, "world");
+    assert.equal(result.afterWidth, 5);
+  });
+
+  test("handles no after segment", () => {
+    const result = native.extractSegments("hello world", 5, 0, 0, false);
+    assert.equal(result.before, "hello");
+    assert.equal(result.beforeWidth, 5);
+    assert.equal(result.after, "");
+    assert.equal(result.afterWidth, 0);
+  });
+});
+
+// ── sanitizeText ───────────────────────────────────────────────────────
+
+describe("sanitizeText", () => {
+  test("strips ANSI codes", () => {
+    assert.equal(native.sanitizeText("\x1b[31mhello\x1b[0m"), "hello");
+  });
+
+  test("returns original when clean", () => {
+    assert.equal(native.sanitizeText("hello"), "hello");
+  });
+
+  test("removes control characters", () => {
+    assert.equal(native.sanitizeText("he\x01llo"), "hello");
+  });
+
+  test("preserves tabs and newlines", () => {
+    assert.equal(native.sanitizeText("a\tb\nc"), "a\tb\nc");
+  });
+
+  test("normalizes CR", () => {
+    assert.equal(native.sanitizeText("hello\r\nworld"), "hello\nworld");
+  });
+});
--- a/packages/native/src/index.ts
+++ b/packages/native/src/index.ts
@ -3,6 +3,7 @@
 *
 * Modules:
 * - grep: ripgrep-backed regex search (content + filesystem)
+ * - text: ANSI-aware text measurement and slicing
 */

 export { searchContent, grep } from "./grep/index.js";
@ -15,3 +16,14 @@ export type {
  SearchOptions,
  SearchResult,
 } from "./grep/index.js";
+
+export {
+  wrapTextWithAnsi,
+  truncateToWidth,
+  sliceWithWidth,
+  extractSegments,
+  sanitizeText,
+  visibleWidth,
+  EllipsisKind,
+} from "./text/index.js";
+export type { SliceResult, ExtractSegmentsResult } from "./text/index.js";
--- a/packages/native/src/native.ts
+++ b/packages/native/src/native.ts
@ -43,4 +43,29 @@ function loadNative(): Record<string, unknown> {
 export const native = loadNative() as {
  search: (content: Buffer | Uint8Array, options: unknown) => unknown;
  grep: (options: unknown) => unknown;
+  wrapTextWithAnsi: (text: string, width: number, tabWidth?: number) => string[];
+  truncateToWidth: (
+    text: string,
+    maxWidth: number,
+    ellipsisKind: number,
+    pad: boolean,
+    tabWidth?: number,
+  ) => string;
+  sliceWithWidth: (
+    line: string,
+    startCol: number,
+    length: number,
+    strict: boolean,
+    tabWidth?: number,
+  ) => unknown;
+  extractSegments: (
+    line: string,
+    beforeEnd: number,
+    afterStart: number,
+    afterLen: number,
+    strictAfter: boolean,
+    tabWidth?: number,
+  ) => unknown;
+  sanitizeText: (text: string) => string;
+  visibleWidth: (text: string, tabWidth?: number) => number;
 };
--- a/packages/native/src/text/index.ts
+++ b/packages/native/src/text/index.ts
@ -0,0 +1,125 @@
+/**
+ * ANSI-aware text measurement and slicing.
+ *
+ * High-performance UTF-16 native implementation with ASCII fast-paths,
+ * single-pass ANSI scanning, and proper Unicode grapheme cluster support.
+ */
+
+import { native } from "../native.js";
+import type { ExtractSegmentsResult, SliceResult } from "./types.js";
+
+export type { ExtractSegmentsResult, SliceResult };
+export { EllipsisKind } from "./types.js";
+
+/**
+ * Word-wrap text to a visible width, preserving ANSI escape codes across
+ * line breaks.
+ *
+ * Active SGR codes (colors, bold, etc.) are carried to continuation lines.
+ * Underline and strikethrough are reset at line ends and restored on the
+ * next line.
+ */
+export function wrapTextWithAnsi(
+  text: string,
+  width: number,
+  tabWidth?: number,
+): string[] {
+  return (native as Record<string, Function>).wrapTextWithAnsi(
+    text,
+    width,
+    tabWidth,
+  ) as string[];
+}
+
+/**
+ * Truncate text to a visible width with an optional ellipsis.
+ *
+ * @param text       Input string (may contain ANSI codes).
+ * @param maxWidth   Maximum visible width in terminal cells.
+ * @param ellipsisKind  0 = "\u2026", 1 = "...", 2 = none.
+ * @param pad        When true, pad with spaces to exactly `maxWidth`.
+ * @param tabWidth   Tab stop width (default 3, range 1-16).
+ */
+export function truncateToWidth(
+  text: string,
+  maxWidth: number,
+  ellipsisKind: number,
+  pad: boolean,
+  tabWidth?: number,
+): string {
+  return (native as Record<string, Function>).truncateToWidth(
+    text,
+    maxWidth,
+    ellipsisKind,
+    pad,
+    tabWidth,
+  ) as string;
+}
+
+/**
+ * Slice a range of visible columns from a line.
+ *
+ * Counts terminal cells (skipping ANSI escapes). When `strict` is true,
+ * wide characters that would exceed the range are excluded.
+ */
+export function sliceWithWidth(
+  line: string,
+  startCol: number,
+  length: number,
+  strict: boolean,
+  tabWidth?: number,
+): SliceResult {
+  return (native as Record<string, Function>).sliceWithWidth(
+    line,
+    startCol,
+    length,
+    strict,
+    tabWidth,
+  ) as SliceResult;
+}
+
+/**
+ * Extract the before/after segments around an overlay region.
+ *
+ * ANSI state is tracked so the `after` segment renders correctly even when
+ * the overlay truncates styled text.
+ */
+export function extractSegments(
+  line: string,
+  beforeEnd: number,
+  afterStart: number,
+  afterLen: number,
+  strictAfter: boolean,
+  tabWidth?: number,
+): ExtractSegmentsResult {
+  return (native as Record<string, Function>).extractSegments(
+    line,
+    beforeEnd,
+    afterStart,
+    afterLen,
+    strictAfter,
+    tabWidth,
+  ) as ExtractSegmentsResult;
+}
+
+/**
+ * Strip ANSI escape sequences, remove control characters and lone
+ * surrogates, and normalize line endings (CR removed).
+ *
+ * Returns the original string when no changes are needed (zero-copy).
+ */
+export function sanitizeText(text: string): string {
+  return (native as Record<string, Function>).sanitizeText(text) as string;
+}
+
+/**
+ * Calculate visible width of text excluding ANSI escape sequences.
+ *
+ * Tabs count as `tabWidth` cells (default 3).
+ */
+export function visibleWidth(text: string, tabWidth?: number): number {
+  return (native as Record<string, Function>).visibleWidth(
+    text,
+    tabWidth,
+  ) as number;
+}
--- a/packages/native/src/text/types.ts
+++ b/packages/native/src/text/types.ts
@ -0,0 +1,29 @@
+/** Result of slicing a line by visible column range. */
+export interface SliceResult {
+  /** The extracted text (may include ANSI codes). */
+  text: string;
+  /** Visible width of the extracted slice in terminal cells. */
+  width: number;
+}
+
+/** Result of extracting before/after segments around an overlay. */
+export interface ExtractSegmentsResult {
+  /** Text content before the overlay region. */
+  before: string;
+  /** Visible width of the `before` segment. */
+  beforeWidth: number;
+  /** Text content after the overlay region. */
+  after: string;
+  /** Visible width of the `after` segment. */
+  afterWidth: number;
+}
+
+/** Ellipsis style for truncation. */
+export enum EllipsisKind {
+  /** Unicode ellipsis character: \u2026 (width 1) */
+  Unicode = 0,
+  /** ASCII ellipsis: "..." (width 3) */
+  Ascii = 1,
+  /** No ellipsis (hard truncate) */
+  None = 2,
+}