364 lines
11 KiB
Rust
364 lines
11 KiB
Rust
//! Line-boundary-aware output truncation.
|
|
//!
|
|
//! Truncates tool output (bash, grep, file reads) at line boundaries,
|
|
//! counting by UTF-8 bytes. Three modes:
|
|
//! - **head**: keep the first N bytes worth of complete lines
|
|
//! - **tail**: keep the last N bytes worth of complete lines
|
|
//! - **both**: split budget between head and tail with an elision marker
|
|
|
|
use napi_derive::napi;
|
|
|
|
#[napi(object)]
|
|
pub struct TruncateResult {
|
|
/// The truncated (or original) text.
|
|
pub text: String,
|
|
/// Whether any truncation occurred.
|
|
pub truncated: bool,
|
|
/// Total number of lines in the original input.
|
|
pub original_lines: u32,
|
|
/// Number of complete lines kept in the output.
|
|
pub kept_lines: u32,
|
|
}
|
|
|
|
#[napi(object)]
|
|
pub struct TruncateOutputResult {
|
|
/// The truncated (or original) text.
|
|
pub text: String,
|
|
/// Whether any truncation occurred.
|
|
pub truncated: bool,
|
|
/// Human-readable truncation summary (e.g. "Kept 50 of 1200 lines").
|
|
pub message: Option<String>,
|
|
}
|
|
|
|
/// Keep the first `max_bytes` worth of complete lines.
|
|
///
|
|
/// Returns the original text unchanged when it fits. When truncation is
|
|
/// required, the output ends at the last newline boundary that fits within
|
|
/// the byte budget. UTF-8 boundaries are respected because we split on `\n`
|
|
/// which is always a single byte.
|
|
#[napi(js_name = "truncateTail")]
|
|
pub fn truncate_tail(text: String, max_bytes: u32) -> TruncateResult {
|
|
let max = max_bytes as usize;
|
|
let total_bytes = text.len();
|
|
|
|
// Fast path: fits entirely
|
|
if total_bytes <= max {
|
|
let line_count = memchr::memchr_iter(b'\n', text.as_bytes()).count()
|
|
+ if text.is_empty() || text.ends_with('\n') { 0 } else { 1 };
|
|
return TruncateResult {
|
|
text,
|
|
truncated: false,
|
|
original_lines: line_count as u32,
|
|
kept_lines: line_count as u32,
|
|
};
|
|
}
|
|
|
|
let bytes = text.as_bytes();
|
|
let original_lines = count_lines(bytes);
|
|
|
|
// Find the last newline at or before max_bytes
|
|
let cut = find_last_newline_before(bytes, max);
|
|
|
|
if cut == 0 {
|
|
// First line alone exceeds the budget — keep nothing
|
|
return TruncateResult {
|
|
text: String::new(),
|
|
truncated: true,
|
|
original_lines,
|
|
kept_lines: 0,
|
|
};
|
|
}
|
|
|
|
let kept = &bytes[..cut];
|
|
let kept_lines = count_lines(kept);
|
|
|
|
TruncateResult {
|
|
text: std::str::from_utf8(kept).expect("split at newline boundary preserves UTF-8").to_owned(),
|
|
truncated: true,
|
|
original_lines,
|
|
kept_lines,
|
|
}
|
|
}
|
|
|
|
/// Keep the last `max_bytes` worth of complete lines.
|
|
///
|
|
/// The output starts at the first line boundary after skipping enough bytes
|
|
/// from the front. UTF-8 boundaries are respected because we only split on
|
|
/// `\n`.
|
|
#[napi(js_name = "truncateHead")]
|
|
pub fn truncate_head(text: String, max_bytes: u32) -> TruncateResult {
|
|
let max = max_bytes as usize;
|
|
let total_bytes = text.len();
|
|
|
|
// Fast path
|
|
if total_bytes <= max {
|
|
let line_count = memchr::memchr_iter(b'\n', text.as_bytes()).count()
|
|
+ if text.is_empty() || text.ends_with('\n') { 0 } else { 1 };
|
|
return TruncateResult {
|
|
text,
|
|
truncated: false,
|
|
original_lines: line_count as u32,
|
|
kept_lines: line_count as u32,
|
|
};
|
|
}
|
|
|
|
let bytes = text.as_bytes();
|
|
let original_lines = count_lines(bytes);
|
|
|
|
// We need to keep the last `max` bytes. Find the first newline at or
|
|
// after (total_bytes - max) so we start on a line boundary.
|
|
let skip_to = total_bytes - max;
|
|
let start = find_first_newline_after(bytes, skip_to);
|
|
|
|
if start >= total_bytes {
|
|
// Last line alone exceeds the budget — keep nothing
|
|
return TruncateResult {
|
|
text: String::new(),
|
|
truncated: true,
|
|
original_lines,
|
|
kept_lines: 0,
|
|
};
|
|
}
|
|
|
|
let kept = &bytes[start..];
|
|
let kept_lines = count_lines(kept);
|
|
|
|
TruncateResult {
|
|
text: std::str::from_utf8(kept).expect("split at newline boundary preserves UTF-8").to_owned(),
|
|
truncated: true,
|
|
original_lines,
|
|
kept_lines,
|
|
}
|
|
}
|
|
|
|
/// Main entry point: truncate tool output with head/tail/both modes.
|
|
///
|
|
/// Modes:
|
|
/// - `"tail"` (default): keep the beginning (head truncation removes tail)
|
|
/// - `"head"`: keep the end (tail truncation removes head)
|
|
/// - `"both"`: keep beginning and end, elide the middle
|
|
#[napi(js_name = "truncateOutput")]
|
|
pub fn truncate_output(
|
|
text: String,
|
|
max_bytes: u32,
|
|
mode: Option<String>,
|
|
) -> TruncateOutputResult {
|
|
let max = max_bytes as usize;
|
|
|
|
if text.len() <= max {
|
|
return TruncateOutputResult {
|
|
text,
|
|
truncated: false,
|
|
message: None,
|
|
};
|
|
}
|
|
|
|
let mode_str = mode.as_deref().unwrap_or("tail");
|
|
let original_lines = count_lines(text.as_bytes());
|
|
|
|
match mode_str {
|
|
"head" => {
|
|
let total_bytes = text.len();
|
|
let r = truncate_head(text, max_bytes);
|
|
let removed = total_bytes - r.text.len();
|
|
let msg = format!(
|
|
"Kept last {} of {} lines ({} bytes truncated from start)",
|
|
r.kept_lines, r.original_lines, removed
|
|
);
|
|
TruncateOutputResult {
|
|
text: r.text,
|
|
truncated: true,
|
|
message: Some(msg),
|
|
}
|
|
}
|
|
"both" => {
|
|
let half = max / 2;
|
|
let head_result = truncate_tail(text.clone(), half as u32);
|
|
let tail_result = truncate_head(text, (max - half) as u32);
|
|
|
|
let marker = format!(
|
|
"\n\n... [{} lines elided] ...\n\n",
|
|
original_lines
|
|
.saturating_sub(head_result.kept_lines)
|
|
.saturating_sub(tail_result.kept_lines)
|
|
);
|
|
let combined = format!("{}{}{}", head_result.text, marker, tail_result.text);
|
|
let kept = head_result.kept_lines + tail_result.kept_lines;
|
|
let msg = format!(
|
|
"Kept {} of {} lines (head {} + tail {})",
|
|
kept, original_lines, head_result.kept_lines, tail_result.kept_lines
|
|
);
|
|
TruncateOutputResult {
|
|
text: combined,
|
|
truncated: true,
|
|
message: Some(msg),
|
|
}
|
|
}
|
|
_ => {
|
|
// "tail" — keep the beginning
|
|
let total_bytes = text.len();
|
|
let r = truncate_tail(text, max_bytes);
|
|
let removed = total_bytes - r.text.len();
|
|
let msg = format!(
|
|
"Kept first {} of {} lines ({} bytes truncated from end)",
|
|
r.kept_lines, r.original_lines, removed
|
|
);
|
|
TruncateOutputResult {
|
|
text: r.text,
|
|
truncated: true,
|
|
message: Some(msg),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── helpers ──────────────────────────────────────────────────────────────
|
|
|
|
/// Count lines in a byte slice. A trailing newline does not add an extra line.
|
|
#[inline]
|
|
fn count_lines(bytes: &[u8]) -> u32 {
|
|
if bytes.is_empty() {
|
|
return 0;
|
|
}
|
|
let newlines = memchr::memchr_iter(b'\n', bytes).count() as u32;
|
|
if bytes.last() == Some(&b'\n') {
|
|
newlines
|
|
} else {
|
|
newlines + 1
|
|
}
|
|
}
|
|
|
|
/// Find the byte position just past the last `\n` that is at or before `limit`.
|
|
/// Returns 0 if no newline exists before `limit`.
|
|
#[inline]
|
|
fn find_last_newline_before(bytes: &[u8], limit: usize) -> usize {
|
|
let search_end = limit.min(bytes.len());
|
|
// Search backwards for \n
|
|
match memchr::memrchr(b'\n', &bytes[..search_end]) {
|
|
Some(pos) => pos + 1, // include the newline
|
|
None => 0,
|
|
}
|
|
}
|
|
|
|
/// Find the byte position just past the first `\n` at or after `pos`.
|
|
/// Returns `bytes.len()` if no newline is found.
|
|
#[inline]
|
|
fn find_first_newline_after(bytes: &[u8], pos: usize) -> usize {
|
|
let start = pos.min(bytes.len());
|
|
match memchr::memchr(b'\n', &bytes[start..]) {
|
|
Some(offset) => start + offset + 1, // skip past the newline
|
|
None => bytes.len(),
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_no_truncation_needed() {
|
|
let r = truncate_tail("hello\nworld\n".into(), 100);
|
|
assert!(!r.truncated);
|
|
assert_eq!(r.original_lines, 2);
|
|
assert_eq!(r.kept_lines, 2);
|
|
assert_eq!(r.text, "hello\nworld\n");
|
|
}
|
|
|
|
#[test]
|
|
fn test_tail_truncation_ascii() {
|
|
// "hello\nworld\n" = 12 bytes, limit to 7 -> keep "hello\n"
|
|
let r = truncate_tail("hello\nworld\n".into(), 7);
|
|
assert!(r.truncated);
|
|
assert_eq!(r.text, "hello\n");
|
|
assert_eq!(r.kept_lines, 1);
|
|
assert_eq!(r.original_lines, 2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_head_truncation_ascii() {
|
|
let r = truncate_head("hello\nworld\n".into(), 7);
|
|
assert!(r.truncated);
|
|
assert_eq!(r.text, "world\n");
|
|
assert_eq!(r.kept_lines, 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_utf8_multibyte() {
|
|
// "cafe\u{0301}\n" = "café\n" where é is e + combining accent (3 bytes for the combining char)
|
|
// Actually let's use a simpler case: "日本\n" = 7 bytes (3+3+1)
|
|
let input = "日本\nworld\n".to_string();
|
|
assert_eq!(input.len(), 13); // 3+3+1+5+1
|
|
let r = truncate_tail(input.clone(), 8);
|
|
assert!(r.truncated);
|
|
assert_eq!(r.text, "日本\n");
|
|
assert_eq!(r.kept_lines, 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_empty_input() {
|
|
let r = truncate_tail(String::new(), 100);
|
|
assert!(!r.truncated);
|
|
assert_eq!(r.original_lines, 0);
|
|
assert_eq!(r.kept_lines, 0);
|
|
|
|
let r2 = truncate_head(String::new(), 100);
|
|
assert!(!r2.truncated);
|
|
}
|
|
|
|
#[test]
|
|
fn test_exact_boundary() {
|
|
let input = "abc\ndef\n".to_string(); // 8 bytes
|
|
let r = truncate_tail(input.clone(), 8);
|
|
assert!(!r.truncated);
|
|
assert_eq!(r.text, "abc\ndef\n");
|
|
}
|
|
|
|
#[test]
|
|
fn test_single_line_exceeding_limit() {
|
|
let r = truncate_tail("this_is_a_very_long_line".into(), 5);
|
|
assert!(r.truncated);
|
|
assert_eq!(r.text, "");
|
|
assert_eq!(r.kept_lines, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_head_single_line_exceeding() {
|
|
let r = truncate_head("this_is_a_very_long_line".into(), 5);
|
|
assert!(r.truncated);
|
|
assert_eq!(r.text, "");
|
|
assert_eq!(r.kept_lines, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_truncate_output_both_mode() {
|
|
let mut lines = Vec::new();
|
|
for i in 0..100 {
|
|
lines.push(format!("line {i}"));
|
|
}
|
|
let input = lines.join("\n") + "\n";
|
|
let r = truncate_output(input, 200, Some("both".into()));
|
|
assert!(r.truncated);
|
|
assert!(r.message.is_some());
|
|
assert!(r.text.contains("... ["));
|
|
}
|
|
|
|
#[test]
|
|
fn test_count_lines() {
|
|
assert_eq!(count_lines(b""), 0);
|
|
assert_eq!(count_lines(b"a"), 1);
|
|
assert_eq!(count_lines(b"a\n"), 1);
|
|
assert_eq!(count_lines(b"a\nb"), 2);
|
|
assert_eq!(count_lines(b"a\nb\n"), 2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_utf8_emoji() {
|
|
// Each emoji is 4 bytes
|
|
let input = "😀\n😂\n🎉\n".to_string();
|
|
assert_eq!(input.len(), 15); // 4+1+4+1+4+1
|
|
let r = truncate_tail(input, 6);
|
|
assert!(r.truncated);
|
|
assert_eq!(r.text, "😀\n");
|
|
assert_eq!(r.kept_lines, 1);
|
|
}
|
|
}
|