perf(read): stream lines when offset/limit provided to avoid loading entire file

When offset or limit are specified, use Node.js readline streaming instead of
loading the entire file into memory. This fixes the truncation issue for large
files (>50KB) where the read tool would return truncated content even when
requesting a small slice.

- Add readLinesStreamed() for memory-efficient line reading
- Add countLines() for total line count without full read
- Use streaming path when offset !== undefined || limit !== undefined
- Keep existing full-file read path when no offset/limit specified
- Add tests for streaming behavior with large files

Fixes the long-standing issue where reading large files like src/headless.ts
(~50KB) with offset/limit would still hit truncation limits.
This commit is contained in:
Mikael Hugo 2026-05-04 15:20:16 +02:00
parent 362d766680
commit ccdd3027ab
2 changed files with 237 additions and 60 deletions

View file

@ -1,8 +1,10 @@
import type { AgentTool } from "@singularity-forge/pi-agent-core";
import type { ImageContent, TextContent } from "@singularity-forge/pi-ai";
import { type Static, Type } from "@sinclair/typebox";
import { createReadStream } from "fs";
import { constants } from "fs";
import { access as fsAccess, readFile as fsReadFile } from "fs/promises";
import { createInterface } from "readline";
import { formatDimensionNote, resizeImage } from "../../utils/image-resize.js";
import { detectSupportedImageMimeTypeFromFile } from "../../utils/mime.js";
import { resolveReadPath } from "./path-utils.js";
@ -46,6 +48,99 @@ export interface ReadToolOptions {
operations?: ReadOperations;
}
/**
* Read specific lines from a file using streaming (memory-efficient).
* Only reads the requested lines without loading the entire file.
*
* @param absolutePath - Path to the file
* @param startLine - 1-indexed line to start from
* @param lineCount - Maximum number of lines to read
* @param signal - Optional abort signal
* @returns Object with lines array and total line count (if reached)
*/
async function readLinesStreamed(
absolutePath: string,
startLine: number,
lineCount: number,
signal?: AbortSignal,
): Promise<{ lines: string[]; totalLines: number }> {
const stream = createReadStream(absolutePath, { encoding: "utf-8" });
const rl = createInterface({ input: stream });
const lines: string[] = [];
let currentLine = 0;
let totalLines = 0;
try {
for await (const line of rl) {
if (signal?.aborted) {
throw new Error("Operation aborted");
}
currentLine++;
// Skip lines before startLine
if (currentLine < startLine) {
continue;
}
// Collect lines within the requested range
if (currentLine >= startLine && lines.length < lineCount) {
lines.push(line);
}
// Stop if we've collected enough lines
if (lines.length >= lineCount) {
// We need to count remaining lines for the "total lines" info
// But we can stop reading and just count
totalLines = currentLine;
break;
}
}
// If we didn't break early, currentLine is the total
if (totalLines === 0) {
totalLines = currentLine;
}
} finally {
stream.destroy();
rl.close();
}
return { lines, totalLines };
}
/**
* Count total lines in a file efficiently.
*
* @param absolutePath - Path to the file
* @param signal - Optional abort signal
* @returns Total number of lines
*/
async function countLines(
absolutePath: string,
signal?: AbortSignal,
): Promise<number> {
const stream = createReadStream(absolutePath, { encoding: "utf-8" });
const rl = createInterface({ input: stream });
let count = 0;
try {
for await (const _line of rl) {
if (signal?.aborted) {
throw new Error("Operation aborted");
}
count++;
}
} finally {
stream.destroy();
rl.close();
}
return count;
}
export function createReadTool(cwd: string, options?: ReadToolOptions): AgentTool<typeof readSchema> {
const autoResizeImages = options?.autoResizeImages ?? true;
const ops = options?.operations ?? defaultReadOperations;
@ -127,74 +222,89 @@ export function createReadTool(cwd: string, options?: ReadToolOptions): AgentToo
}
} else {
// Read as text
const buffer = await ops.readFile(absolutePath);
const textContent = buffer.toString("utf-8");
const allLines = textContent.split("\n");
const totalFileLines = allLines.length;
// Apply offset if specified (1-indexed to 0-indexed)
let startLine = offset ? Math.max(0, offset - 1) : 0;
// Clamp offset to file bounds instead of throwing (#3007).
// When an agent requests offset:30 on a 13-line file, return
// the last line with a notice rather than an error that
// propagates as invalid JSON downstream.
let offsetClamped = false;
if (startLine >= allLines.length) {
startLine = Math.max(0, allLines.length - 1);
offsetClamped = true;
}
const startLineDisplay = startLine + 1; // For display (1-indexed)
// If limit is specified by user, use it; otherwise we'll let truncateHead decide
let selectedContent: string;
let userLimitedLines: number | undefined;
if (limit !== undefined) {
const endLine = Math.min(startLine + limit, allLines.length);
selectedContent = allLines.slice(startLine, endLine).join("\n");
userLimitedLines = endLine - startLine;
} else {
selectedContent = allLines.slice(startLine).join("\n");
}
// Apply truncation (respects both line and byte limits)
const truncation = truncateHead(selectedContent);
let outputText: string;
let details: ReadToolDetails | undefined;
if (truncation.firstLineExceedsLimit) {
// First line at offset exceeds 30KB - tell model to use bash
const firstLineSize = formatSize(Buffer.byteLength(allLines[startLine], "utf-8"));
outputText = `[Line ${startLineDisplay} is ${firstLineSize}, exceeds ${formatSize(DEFAULT_MAX_BYTES)} limit. Use bash: sed -n '${startLineDisplay}p' ${path} | head -c ${DEFAULT_MAX_BYTES}]`;
details = { truncation };
} else if (truncation.truncated) {
// Truncation occurred - build actionable notice
const endLineDisplay = startLineDisplay + truncation.outputLines - 1;
const nextOffset = endLineDisplay + 1;
// Use streaming when offset or limit are specified (memory-efficient for large files)
if (offset !== undefined || limit !== undefined) {
const startLine = offset ? Math.max(1, offset) : 1;
const lineCount = limit ?? DEFAULT_MAX_LINES;
outputText = truncation.content;
// First, count total lines for the notice
const totalFileLines = await countLines(absolutePath, signal);
if (truncation.truncatedBy === "lines") {
outputText += `\n\n[Showing lines ${startLineDisplay}-${endLineDisplay} of ${totalFileLines}. Use offset=${nextOffset} to continue.]`;
} else {
outputText += `\n\n[Showing lines ${startLineDisplay}-${endLineDisplay} of ${totalFileLines} (${formatSize(DEFAULT_MAX_BYTES)} limit). Use offset=${nextOffset} to continue.]`;
// Clamp offset to file bounds (#3007)
let effectiveStartLine = startLine;
let offsetClamped = false;
if (effectiveStartLine > totalFileLines) {
effectiveStartLine = Math.max(1, totalFileLines);
offsetClamped = true;
}
details = { truncation };
} else if (userLimitedLines !== undefined && startLine + userLimitedLines < allLines.length) {
// User specified limit, there's more content, but no truncation
const remaining = allLines.length - (startLine + userLimitedLines);
const nextOffset = startLine + userLimitedLines + 1;
outputText = truncation.content;
outputText += `\n\n[${remaining} more lines in file. Use offset=${nextOffset} to continue.]`;
// Stream only the requested lines
const { lines } = await readLinesStreamed(absolutePath, effectiveStartLine, lineCount, signal);
const selectedContent = lines.join("\n");
// Apply truncation to the selected content
const truncation = truncateHead(selectedContent);
if (truncation.firstLineExceedsLimit) {
const firstLineSize = formatSize(Buffer.byteLength(lines[0] ?? "", "utf-8"));
outputText = `[Line ${effectiveStartLine} is ${firstLineSize}, exceeds ${formatSize(DEFAULT_MAX_BYTES)} limit. Use bash: sed -n '${effectiveStartLine}p' ${path} | head -c ${DEFAULT_MAX_BYTES}]`;
details = { truncation };
} else if (truncation.truncated) {
const endLineDisplay = effectiveStartLine + truncation.outputLines - 1;
const nextOffset = endLineDisplay + 1;
outputText = truncation.content;
if (truncation.truncatedBy === "lines") {
outputText += `\n\n[Showing lines ${effectiveStartLine}-${endLineDisplay} of ${totalFileLines}. Use offset=${nextOffset} to continue.]`;
} else {
outputText += `\n\n[Showing lines ${effectiveStartLine}-${endLineDisplay} of ${totalFileLines} (${formatSize(DEFAULT_MAX_BYTES)} limit). Use offset=${nextOffset} to continue.]`;
}
details = { truncation };
} else if (lines.length >= lineCount && effectiveStartLine + lines.length - 1 < totalFileLines) {
// User limit reached, more content available
const nextOffset = effectiveStartLine + lines.length;
const remaining = totalFileLines - (effectiveStartLine + lines.length - 1);
outputText = truncation.content;
outputText += `\n\n[${remaining} more lines in file. Use offset=${nextOffset} to continue.]`;
} else {
outputText = truncation.content;
}
if (offsetClamped) {
outputText = `[Offset ${offset} beyond end of file (${totalFileLines} lines). Clamped to line ${effectiveStartLine}.]\n\n${outputText}`;
}
} else {
// No truncation, no user limit exceeded
outputText = truncation.content;
}
// No offset/limit - read entire file (existing behavior)
const buffer = await ops.readFile(absolutePath);
const textContent = buffer.toString("utf-8");
const allLines = textContent.split("\n");
const totalFileLines = allLines.length;
// Prepend clamp notice so the agent knows offset was adjusted
if (offsetClamped) {
outputText = `[Offset ${offset} beyond end of file (${totalFileLines} lines). Clamped to line ${startLineDisplay}.]\n\n${outputText}`;
const truncation = truncateHead(textContent);
if (truncation.firstLineExceedsLimit) {
outputText = `[File exceeds ${formatSize(DEFAULT_MAX_BYTES)} limit. Use bash: head -c ${DEFAULT_MAX_BYTES} ${path}]`;
details = { truncation };
} else if (truncation.truncated) {
const endLineDisplay = truncation.outputLines;
const nextOffset = endLineDisplay + 1;
outputText = truncation.content;
if (truncation.truncatedBy === "lines") {
outputText += `\n\n[Showing lines 1-${endLineDisplay} of ${totalFileLines}. Use offset=${nextOffset} to continue.]`;
} else {
outputText += `\n\n[Showing lines 1-${endLineDisplay} of ${totalFileLines} (${formatSize(DEFAULT_MAX_BYTES)} limit). Use offset=${nextOffset} to continue.]`;
}
details = { truncation };
} else {
outputText = truncation.content;
}
}
content = [{ type: "text", text: outputText }];

View file

@ -118,3 +118,70 @@ test("read tool: clamped offset includes notice about adjustment", async (t) =>
`should indicate offset was clamped, got: ${text.slice(0, 200)}`,
);
});
test("read tool: streaming with offset and limit on large file reads only requested lines", async (t) => {
const { dir, cleanup } = makeTmpDir();
afterEach(cleanup);
// Create a 1000-line file
writeLines(dir, "large-file.txt", 1000);
const readTool = createReadTool(dir);
const result = await readTool.execute("test-call", {
path: "large-file.txt",
offset: 100,
limit: 10,
});
const text = (result.content[0] as any).text as string;
// Should include lines 100-109
assert.ok(text.includes("Line 100:"), "should include line 100");
assert.ok(text.includes("Line 109:"), "should include line 109");
// Should NOT include line 1 or line 200
assert.ok(!text.includes("Line 1:"), "should NOT include line 1");
assert.ok(!text.includes("Line 200:"), "should NOT include line 200");
// Should include continuation notice
assert.ok(
text.includes("more lines") || text.includes("offset="),
"should include continuation notice",
);
});
test("read tool: streaming with limit only reads first N lines", async (t) => {
const { dir, cleanup } = makeTmpDir();
afterEach(cleanup);
writeLines(dir, "medium-file.txt", 500);
const readTool = createReadTool(dir);
const result = await readTool.execute("test-call", {
path: "medium-file.txt",
limit: 5,
});
const text = (result.content[0] as any).text as string;
assert.ok(text.includes("Line 1:"), "should include line 1");
assert.ok(text.includes("Line 5:"), "should include line 5");
assert.ok(!text.includes("Line 6:"), "should NOT include line 6");
});
test("read tool: streaming with offset only reads from offset to end", async (t) => {
const { dir, cleanup } = makeTmpDir();
afterEach(cleanup);
writeLines(dir, "offset-only.txt", 100);
const readTool = createReadTool(dir);
const result = await readTool.execute("test-call", {
path: "offset-only.txt",
offset: 95,
});
const text = (result.content[0] as any).text as string;
assert.ok(text.includes("Line 95:"), "should include line 95");
assert.ok(text.includes("Line 100:"), "should include line 100");
assert.ok(!text.includes("Line 94:"), "should NOT include line 94");
});