feat: add syntect-based syntax highlighting module to native engine (#227)

Port the highlight module from Oh My Pi's pi-natives crate. Provides
ANSI-colored syntax highlighting with scope-based semantic token matching
across 11 categories (comment, keyword, function, variable, string, number,
type, operator, punctuation, inserted, deleted).

Exposed N-API functions:
- highlightCode(code, lang, colors) -> ANSI-highlighted string
- supportsLanguage(lang) -> boolean
- getSupportedLanguages() -> string[]

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
TÂCHES 2026-03-13 12:47:02 -06:00 committed by GitHub
parent c36c8bd0b0
commit d64575cd3c
8 changed files with 710 additions and 0 deletions

View file

@ -17,6 +17,7 @@ globset = "0.4"
ignore = "0.4"
napi = { version = "2", features = ["napi8"] }
napi-derive = "2"
syntect = { version = "5", default-features = false, features = ["default-syntaxes", "default-themes", "regex-fancy"] }
[build-dependencies]
napi-build = "2"

View file

@ -0,0 +1,472 @@
//! Syntax highlighting using syntect.
//!
//! Provides ANSI-colored output for code blocks. Takes theme colors as input
//! and maps syntect scopes to 11 semantic categories:
//! - comment, keyword, function, variable, string, number, type, operator,
//! punctuation, inserted, deleted
use std::{cell::RefCell, collections::HashMap, sync::OnceLock};
use napi_derive::napi;
use syntect::parsing::{ParseState, Scope, ScopeStack, ScopeStackOp, SyntaxReference, SyntaxSet};
static SYNTAX_SET: OnceLock<SyntaxSet> = OnceLock::new();
static SCOPE_MATCHERS: OnceLock<ScopeMatchers> = OnceLock::new();
// Thread-local cache for scope -> color index lookups
thread_local! {
static SCOPE_COLOR_CACHE: RefCell<HashMap<Scope, usize>> = RefCell::new(HashMap::with_capacity(256));
}
fn get_syntax_set() -> &'static SyntaxSet {
SYNTAX_SET.get_or_init(SyntaxSet::load_defaults_newlines)
}
/// Pre-compiled scope patterns for fast matching.
struct ScopeMatchers {
// Comment (index 0)
comment: Scope,
// String (index 4)
string: Scope,
constant_character: Scope,
meta_string: Scope,
// Number (index 5)
constant_numeric: Scope,
constant_integer: Scope,
constant: Scope,
// Keyword (index 1)
keyword: Scope,
storage_type: Scope,
storage_modifier: Scope,
// Function (index 2)
entity_name_function: Scope,
support_function: Scope,
meta_function_call: Scope,
variable_function: Scope,
// Type (index 6)
entity_name_type: Scope,
support_type: Scope,
support_class: Scope,
entity_name_class: Scope,
entity_name_struct: Scope,
entity_name_enum: Scope,
entity_name_interface: Scope,
entity_name_trait: Scope,
// Operator (index 7)
keyword_operator: Scope,
punctuation_accessor: Scope,
// Punctuation (index 8)
punctuation: Scope,
// Variable (index 3)
variable: Scope,
entity_name: Scope,
meta_path: Scope,
// Diff (indices 9, 10)
markup_inserted: Scope,
markup_deleted: Scope,
meta_diff_header: Scope,
meta_diff_range: Scope,
}
impl ScopeMatchers {
fn new() -> Self {
Self {
comment: Scope::new("comment").unwrap(),
string: Scope::new("string").unwrap(),
constant_character: Scope::new("constant.character").unwrap(),
meta_string: Scope::new("meta.string").unwrap(),
constant_numeric: Scope::new("constant.numeric").unwrap(),
constant_integer: Scope::new("constant.integer").unwrap(),
constant: Scope::new("constant").unwrap(),
keyword: Scope::new("keyword").unwrap(),
storage_type: Scope::new("storage.type").unwrap(),
storage_modifier: Scope::new("storage.modifier").unwrap(),
entity_name_function: Scope::new("entity.name.function").unwrap(),
support_function: Scope::new("support.function").unwrap(),
meta_function_call: Scope::new("meta.function-call").unwrap(),
variable_function: Scope::new("variable.function").unwrap(),
entity_name_type: Scope::new("entity.name.type").unwrap(),
support_type: Scope::new("support.type").unwrap(),
support_class: Scope::new("support.class").unwrap(),
entity_name_class: Scope::new("entity.name.class").unwrap(),
entity_name_struct: Scope::new("entity.name.struct").unwrap(),
entity_name_enum: Scope::new("entity.name.enum").unwrap(),
entity_name_interface: Scope::new("entity.name.interface").unwrap(),
entity_name_trait: Scope::new("entity.name.trait").unwrap(),
keyword_operator: Scope::new("keyword.operator").unwrap(),
punctuation_accessor: Scope::new("punctuation.accessor").unwrap(),
punctuation: Scope::new("punctuation").unwrap(),
variable: Scope::new("variable").unwrap(),
entity_name: Scope::new("entity.name").unwrap(),
meta_path: Scope::new("meta.path").unwrap(),
markup_inserted: Scope::new("markup.inserted").unwrap(),
markup_deleted: Scope::new("markup.deleted").unwrap(),
meta_diff_header: Scope::new("meta.diff.header").unwrap(),
meta_diff_range: Scope::new("meta.diff.range").unwrap(),
}
}
}
fn get_scope_matchers() -> &'static ScopeMatchers {
SCOPE_MATCHERS.get_or_init(ScopeMatchers::new)
}
/// Theme colors for syntax highlighting.
/// Each color is an ANSI escape sequence (e.g., "\x1b[38;2;255;0;0m").
#[derive(Debug)]
#[napi(object)]
pub struct HighlightColors {
/// ANSI color for comments.
pub comment: String,
/// ANSI color for keywords.
pub keyword: String,
/// ANSI color for function names.
pub function: String,
/// ANSI color for variables and identifiers.
pub variable: String,
/// ANSI color for string literals.
pub string: String,
/// ANSI color for numeric literals.
pub number: String,
/// ANSI color for type identifiers.
#[napi(js_name = "type")]
pub r#type: String,
/// ANSI color for operators.
pub operator: String,
/// ANSI color for punctuation tokens.
pub punctuation: String,
/// ANSI color for diff inserted lines.
#[napi(js_name = "inserted")]
pub inserted: Option<String>,
/// ANSI color for diff deleted lines.
#[napi(js_name = "deleted")]
pub deleted: Option<String>,
}
/// Language alias mappings: (aliases, target syntax name).
/// Used for languages not in syntect's default set or with non-standard names.
const LANG_ALIASES: &[(&[&str], &str)] = &[
(&["ts", "tsx", "typescript", "js", "jsx", "javascript", "mjs", "cjs"], "JavaScript"),
(&["py", "python"], "Python"),
(&["rb", "ruby"], "Ruby"),
(&["rs", "rust"], "Rust"),
(&["go", "golang"], "Go"),
(&["java"], "Java"),
(&["kt", "kotlin"], "Java"),
(&["swift"], "Objective-C"),
(&["c", "h"], "C"),
(&["cpp", "cc", "cxx", "c++", "hpp", "hxx", "hh"], "C++"),
(&["cs", "csharp"], "C#"),
(&["php"], "PHP"),
(&["sh", "bash", "zsh", "shell"], "Bash"),
(&["fish"], "Shell-Unix-Generic"),
(&["ps1", "powershell"], "PowerShell"),
(&["html", "htm"], "HTML"),
(&["css"], "CSS"),
(&["scss"], "SCSS"),
(&["sass"], "Sass"),
(&["less"], "LESS"),
(&["json"], "JSON"),
(&["yaml", "yml"], "YAML"),
(&["toml"], "TOML"),
(&["xml"], "XML"),
(&["md", "markdown"], "Markdown"),
(&["sql"], "SQL"),
(&["lua"], "Lua"),
(&["perl", "pl"], "Perl"),
(&["r"], "R"),
(&["scala"], "Scala"),
(&["clj", "clojure"], "Clojure"),
(&["ex", "exs", "elixir"], "Ruby"),
(&["erl", "erlang"], "Erlang"),
(&["hs", "haskell"], "Haskell"),
(&["ml", "ocaml"], "OCaml"),
(&["vim"], "VimL"),
(&["graphql", "gql"], "GraphQL"),
(&["proto", "protobuf"], "Protocol Buffers"),
(&["tf", "hcl", "terraform"], "Terraform"),
(&["dockerfile", "docker"], "Dockerfile"),
(&["makefile", "make"], "Makefile"),
(&["cmake"], "CMake"),
(&["ini", "cfg", "conf", "config", "properties"], "INI"),
(&["diff", "patch"], "Diff"),
(&["gitignore", "gitattributes", "gitmodules"], "Git Ignore"),
];
/// Find syntax name from alias table using case-insensitive comparison.
#[inline]
fn find_alias(lang: &str) -> Option<&'static str> {
LANG_ALIASES
.iter()
.find(|(aliases, _)| aliases.iter().any(|a| lang.eq_ignore_ascii_case(a)))
.map(|(_, target)| *target)
}
/// Check if language is in the alias table.
#[inline]
fn is_known_alias(lang: &str) -> bool {
LANG_ALIASES
.iter()
.any(|(aliases, _)| aliases.iter().any(|a| lang.eq_ignore_ascii_case(a)))
}
/// Compute the color index for a single scope (uncached).
#[inline]
fn compute_scope_color(s: Scope) -> usize {
let m = get_scope_matchers();
// Comment (index 0)
if m.comment.is_prefix_of(s) {
return 0;
}
// Diff inserted (index 9)
if m.markup_inserted.is_prefix_of(s) {
return 9;
}
// Diff deleted (index 10)
if m.markup_deleted.is_prefix_of(s) {
return 10;
}
// Diff header/range -> keyword (index 1)
if m.meta_diff_header.is_prefix_of(s) || m.meta_diff_range.is_prefix_of(s) {
return 1;
}
// String (index 4)
if m.string.is_prefix_of(s)
|| m.constant_character.is_prefix_of(s)
|| m.meta_string.is_prefix_of(s)
{
return 4;
}
// Number (index 5)
if m.constant_numeric.is_prefix_of(s) || m.constant_integer.is_prefix_of(s) {
return 5;
}
// Keyword (index 1)
if m.keyword.is_prefix_of(s)
|| m.storage_type.is_prefix_of(s)
|| m.storage_modifier.is_prefix_of(s)
{
return 1;
}
// Function (index 2)
if m.entity_name_function.is_prefix_of(s)
|| m.support_function.is_prefix_of(s)
|| m.meta_function_call.is_prefix_of(s)
|| m.variable_function.is_prefix_of(s)
{
return 2;
}
// Type (index 6)
if m.entity_name_type.is_prefix_of(s)
|| m.support_type.is_prefix_of(s)
|| m.support_class.is_prefix_of(s)
|| m.entity_name_class.is_prefix_of(s)
|| m.entity_name_struct.is_prefix_of(s)
|| m.entity_name_enum.is_prefix_of(s)
|| m.entity_name_interface.is_prefix_of(s)
|| m.entity_name_trait.is_prefix_of(s)
{
return 6;
}
// Operator (index 7)
if m.keyword_operator.is_prefix_of(s) || m.punctuation_accessor.is_prefix_of(s) {
return 7;
}
// Punctuation (index 8)
if m.punctuation.is_prefix_of(s) {
return 8;
}
// Variable (index 3)
if m.variable.is_prefix_of(s) || m.entity_name.is_prefix_of(s) || m.meta_path.is_prefix_of(s) {
return 3;
}
// Generic constant -> number (index 5)
if m.constant.is_prefix_of(s) {
return 5;
}
// No match
usize::MAX
}
/// Determine the semantic color category from a scope stack.
/// Uses per-scope caching to avoid repeated prefix checks.
#[inline]
fn scope_to_color_index(scope: &ScopeStack) -> usize {
SCOPE_COLOR_CACHE.with(|cache| {
let mut cache = cache.borrow_mut();
// Walk from innermost to outermost scope
for s in scope.as_slice().iter().rev() {
let color_idx = *cache.entry(*s).or_insert_with(|| compute_scope_color(*s));
if color_idx != usize::MAX {
return color_idx;
}
}
usize::MAX
})
}
/// Find the appropriate syntax for a language name.
fn find_syntax<'a>(ss: &'a SyntaxSet, lang: &str) -> Option<&'a SyntaxReference> {
// Direct name/token match (syntect APIs are case-insensitive)
if let Some(syn) = ss.find_syntax_by_token(lang) {
return Some(syn);
}
// Extension-based match
if let Some(syn) = ss.find_syntax_by_extension(lang) {
return Some(syn);
}
// Alias lookup for languages not in syntect's default set
let alias = find_alias(lang)?;
ss.find_syntax_by_name(alias)
.or_else(|| ss.find_syntax_by_token(alias))
}
/// Highlight code and return ANSI-colored lines.
///
/// # Arguments
/// * `code` - The source code to highlight
/// * `lang` - Language identifier (e.g., "rust", "typescript", "python")
/// * `colors` - Theme colors as ANSI escape sequences
///
/// # Returns
/// Highlighted code with ANSI color codes, or the original code if highlighting
/// fails.
#[napi(js_name = "highlightCode")]
pub fn highlight_code(code: String, lang: Option<String>, colors: HighlightColors) -> String {
let inserted = colors.inserted.as_deref().unwrap_or("");
let deleted = colors.deleted.as_deref().unwrap_or("");
// Color palette as array for quick indexing
let palette = [
colors.comment.as_str(), // 0
colors.keyword.as_str(), // 1
colors.function.as_str(), // 2
colors.variable.as_str(), // 3
colors.string.as_str(), // 4
colors.number.as_str(), // 5
colors.r#type.as_str(), // 6
colors.operator.as_str(), // 7
colors.punctuation.as_str(), // 8
inserted, // 9
deleted, // 10
];
let ss = get_syntax_set();
// Find syntax for the language
let syntax = match &lang {
Some(l) => find_syntax(ss, l),
None => None,
}
.unwrap_or_else(|| ss.find_syntax_plain_text());
let mut parse_state = ParseState::new(syntax);
let mut scope_stack = ScopeStack::new();
let mut result = String::with_capacity(code.len() * 2);
for line in syntect::util::LinesWithEndings::from(code.as_str()) {
let Ok(ops) = parse_state.parse_line(line, ss) else {
// Parse error - append unhighlighted line and continue
result.push_str(line);
continue;
};
let mut prev_end = 0;
for (offset, op) in ops {
let offset = offset.min(line.len());
// Output text BEFORE this operation using current scope
if offset > prev_end {
let text = &line[prev_end..offset];
let color_idx = scope_to_color_index(&scope_stack);
if color_idx < palette.len() && !palette[color_idx].is_empty() {
result.push_str(palette[color_idx]);
result.push_str(text);
result.push_str("\x1b[39m");
} else {
result.push_str(text);
}
}
prev_end = offset;
// Now apply scope operation for NEXT segment
match op {
ScopeStackOp::Push(scope) => {
scope_stack.push(scope);
},
ScopeStackOp::Pop(count) => {
for _ in 0..count {
scope_stack.pop();
}
},
ScopeStackOp::Restore | ScopeStackOp::Clear(_) | ScopeStackOp::Noop => {},
}
}
// Output remaining text with current scope
if prev_end < line.len() {
let text = &line[prev_end..];
let color_idx = scope_to_color_index(&scope_stack);
if color_idx < palette.len() && !palette[color_idx].is_empty() {
result.push_str(palette[color_idx]);
result.push_str(text);
result.push_str("\x1b[39m");
} else {
result.push_str(text);
}
}
}
result
}
/// Check if a language is supported for highlighting.
/// Returns true if the language has either direct support or a fallback
/// mapping.
#[napi(js_name = "supportsLanguage")]
pub fn supports_language(lang: String) -> bool {
if is_known_alias(&lang) {
return true;
}
// Fall back to direct syntax lookup
let ss = get_syntax_set();
find_syntax(ss, &lang).is_some()
}
/// Get list of supported languages.
#[napi(js_name = "getSupportedLanguages")]
pub fn get_supported_languages() -> Vec<String> {
let ss = get_syntax_set();
ss.syntaxes().iter().map(|s| s.name.clone()).collect()
}

View file

@ -12,5 +12,6 @@ mod fs_cache;
mod glob;
mod glob_util;
mod grep;
mod highlight;
mod ps;
mod task;

View file

@ -0,0 +1,156 @@
import { test, describe } from "node:test";
import assert from "node:assert/strict";
import { createRequire } from "node:module";
import * as path from "node:path";
import { fileURLToPath } from "node:url";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const require = createRequire(import.meta.url);
// Load the native addon directly
const addonDir = path.resolve(__dirname, "..", "..", "..", "..", "native", "addon");
const platformTag = `${process.platform}-${process.arch}`;
const candidates = [
path.join(addonDir, `gsd_engine.${platformTag}.node`),
path.join(addonDir, "gsd_engine.dev.node"),
];
let native;
for (const candidate of candidates) {
try {
native = require(candidate);
break;
} catch {
// try next
}
}
if (!native) {
console.error("Native addon not found. Run `npm run build:native -w @gsd/native` first.");
process.exit(1);
}
const testColors = {
comment: "\x1b[38;2;106;153;85m",
keyword: "\x1b[38;2;197;134;192m",
function: "\x1b[38;2;220;220;170m",
variable: "\x1b[38;2;156;220;254m",
string: "\x1b[38;2;206;145;120m",
number: "\x1b[38;2;181;206;168m",
type: "\x1b[38;2;78;201;176m",
operator: "\x1b[38;2;212;212;212m",
punctuation: "\x1b[38;2;212;212;212m",
};
describe("native highlight: highlightCode()", () => {
test("highlights JavaScript code with ANSI colors", () => {
const code = 'const x = 42;\n';
const result = native.highlightCode(code, "javascript", testColors);
// Result should contain ANSI escape sequences
assert.ok(result.includes("\x1b["), "should contain ANSI escape codes");
// Result should contain the original tokens
assert.ok(result.includes("const"), "should contain 'const'");
assert.ok(result.includes("42"), "should contain '42'");
// Reset codes should be present
assert.ok(result.includes("\x1b[39m"), "should contain ANSI reset codes");
});
test("returns unhighlighted code for unknown language", () => {
const code = "some random text\n";
const result = native.highlightCode(code, "nonexistent_lang_xyz", testColors);
// Plain text syntax should pass through without color codes on plain content
assert.ok(typeof result === "string");
assert.ok(result.includes("some random text"));
});
test("handles null language gracefully", () => {
const code = "hello world\n";
const result = native.highlightCode(code, null, testColors);
assert.ok(typeof result === "string");
assert.ok(result.includes("hello world"));
});
test("handles empty code", () => {
const result = native.highlightCode("", "javascript", testColors);
assert.equal(result, "");
});
test("handles multiline code", () => {
const code = 'function foo() {\n return "bar";\n}\n';
const result = native.highlightCode(code, "javascript", testColors);
assert.ok(result.includes("function"));
assert.ok(result.includes("foo"));
assert.ok(result.includes("return"));
assert.ok(result.includes('"bar"'));
});
test("supports optional inserted/deleted colors", () => {
const colorsWithDiff = {
...testColors,
inserted: "\x1b[38;2;0;255;0m",
deleted: "\x1b[38;2;255;0;0m",
};
const code = "+added line\n-removed line\n";
const result = native.highlightCode(code, "diff", colorsWithDiff);
assert.ok(typeof result === "string");
assert.ok(result.length > 0);
});
});
describe("native highlight: supportsLanguage()", () => {
test("returns true for known aliases", () => {
assert.ok(native.supportsLanguage("javascript"));
assert.ok(native.supportsLanguage("typescript"));
assert.ok(native.supportsLanguage("python"));
assert.ok(native.supportsLanguage("rust"));
assert.ok(native.supportsLanguage("go"));
assert.ok(native.supportsLanguage("bash"));
});
test("returns true case-insensitively", () => {
assert.ok(native.supportsLanguage("JavaScript"));
assert.ok(native.supportsLanguage("PYTHON"));
assert.ok(native.supportsLanguage("Rust"));
});
test("returns true for short aliases", () => {
assert.ok(native.supportsLanguage("ts"));
assert.ok(native.supportsLanguage("py"));
assert.ok(native.supportsLanguage("rs"));
assert.ok(native.supportsLanguage("rb"));
assert.ok(native.supportsLanguage("sh"));
});
test("returns false for completely unknown languages", () => {
assert.equal(native.supportsLanguage("nonexistent_lang_xyz"), false);
});
});
describe("native highlight: getSupportedLanguages()", () => {
test("returns an array of language names", () => {
const langs = native.getSupportedLanguages();
assert.ok(Array.isArray(langs));
assert.ok(langs.length > 0, "should have at least one language");
});
test("includes common languages", () => {
const langs = native.getSupportedLanguages();
// These are syntect default syntax names
assert.ok(langs.includes("JavaScript"), "should include JavaScript");
assert.ok(langs.includes("Python"), "should include Python");
assert.ok(langs.includes("Rust"), "should include Rust");
assert.ok(langs.includes("C"), "should include C");
});
test("returns strings", () => {
const langs = native.getSupportedLanguages();
for (const lang of langs) {
assert.equal(typeof lang, "string");
}
});
});

View file

@ -0,0 +1,44 @@
/**
* Syntect-based syntax highlighting via N-API.
*
* Provides ANSI-colored output for code blocks using semantic scope matching
* across 11 token categories.
*/
import { native } from "../native.js";
import type { HighlightColors } from "./types.js";
export type { HighlightColors };
/**
* Highlight source code and return ANSI-colored output.
*
* @param code - The source code to highlight
* @param lang - Language identifier (e.g., "rust", "typescript", "python"), or null for plain text
* @param colors - Theme colors as ANSI escape sequences
* @returns Highlighted code with ANSI color codes
*/
export function highlightCode(
code: string,
lang: string | null,
colors: HighlightColors,
): string {
return native.highlightCode(code, lang, colors) as string;
}
/**
* Check if a language is supported for highlighting.
*
* Returns true if the language has either direct syntect support or a
* fallback alias mapping.
*/
export function supportsLanguage(lang: string): boolean {
return native.supportsLanguage(lang) as boolean;
}
/**
* Get list of all supported language names from syntect's default syntax set.
*/
export function getSupportedLanguages(): string[] {
return native.getSupportedLanguages() as string[];
}

View file

@ -0,0 +1,25 @@
/** Theme colors for syntax highlighting as ANSI escape sequences. */
export interface HighlightColors {
/** ANSI color for comments. */
comment: string;
/** ANSI color for keywords. */
keyword: string;
/** ANSI color for function names. */
function: string;
/** ANSI color for variables and identifiers. */
variable: string;
/** ANSI color for string literals. */
string: string;
/** ANSI color for numeric literals. */
number: string;
/** ANSI color for type identifiers. */
type: string;
/** ANSI color for operators. */
operator: string;
/** ANSI color for punctuation tokens. */
punctuation: string;
/** ANSI color for diff inserted lines. */
inserted?: string;
/** ANSI color for diff deleted lines. */
deleted?: string;
}

View file

@ -5,8 +5,16 @@
* - grep: ripgrep-backed regex search (content + filesystem)
* - ps: cross-platform process tree management
* - glob: gitignore-respecting filesystem discovery with scan caching
* - highlight: syntect-based syntax highlighting
*/
export {
highlightCode,
supportsLanguage,
getSupportedLanguages,
} from "./highlight/index.js";
export type { HighlightColors } from "./highlight/index.js";
export { searchContent, grep } from "./grep/index.js";
export type {
ContextLine,

View file

@ -52,4 +52,7 @@ export const native = loadNative() as {
onMatch?: ((match: unknown) => void) | undefined | null,
) => Promise<unknown>;
invalidateFsScanCache: (path?: string) => void;
highlightCode: (code: string, lang: string | null, colors: unknown) => unknown;
supportsLanguage: (lang: string) => unknown;
getSupportedLanguages: () => unknown;
};