diff --git a/packages/pi-coding-agent/src/core/tools/read.ts b/packages/pi-coding-agent/src/core/tools/read.ts index 2f6907dba..bc171bee3 100644 --- a/packages/pi-coding-agent/src/core/tools/read.ts +++ b/packages/pi-coding-agent/src/core/tools/read.ts @@ -1,8 +1,10 @@ import type { AgentTool } from "@singularity-forge/pi-agent-core"; import type { ImageContent, TextContent } from "@singularity-forge/pi-ai"; import { type Static, Type } from "@sinclair/typebox"; +import { createReadStream } from "fs"; import { constants } from "fs"; import { access as fsAccess, readFile as fsReadFile } from "fs/promises"; +import { createInterface } from "readline"; import { formatDimensionNote, resizeImage } from "../../utils/image-resize.js"; import { detectSupportedImageMimeTypeFromFile } from "../../utils/mime.js"; import { resolveReadPath } from "./path-utils.js"; @@ -46,6 +48,99 @@ export interface ReadToolOptions { operations?: ReadOperations; } +/** + * Read specific lines from a file using streaming (memory-efficient). + * Only reads the requested lines without loading the entire file. + * + * @param absolutePath - Path to the file + * @param startLine - 1-indexed line to start from + * @param lineCount - Maximum number of lines to read + * @param signal - Optional abort signal + * @returns Object with lines array and total line count (if reached) + */ +async function readLinesStreamed( + absolutePath: string, + startLine: number, + lineCount: number, + signal?: AbortSignal, +): Promise<{ lines: string[]; totalLines: number }> { + const stream = createReadStream(absolutePath, { encoding: "utf-8" }); + const rl = createInterface({ input: stream }); + + const lines: string[] = []; + let currentLine = 0; + let totalLines = 0; + + try { + for await (const line of rl) { + if (signal?.aborted) { + throw new Error("Operation aborted"); + } + + currentLine++; + + // Skip lines before startLine + if (currentLine < startLine) { + continue; + } + + // Collect lines within the requested range + if (currentLine >= startLine && lines.length < lineCount) { + lines.push(line); + } + + // Stop if we've collected enough lines + if (lines.length >= lineCount) { + // We need to count remaining lines for the "total lines" info + // But we can stop reading and just count + totalLines = currentLine; + break; + } + } + + // If we didn't break early, currentLine is the total + if (totalLines === 0) { + totalLines = currentLine; + } + } finally { + stream.destroy(); + rl.close(); + } + + return { lines, totalLines }; +} + +/** + * Count total lines in a file efficiently. + * + * @param absolutePath - Path to the file + * @param signal - Optional abort signal + * @returns Total number of lines + */ +async function countLines( + absolutePath: string, + signal?: AbortSignal, +): Promise { + const stream = createReadStream(absolutePath, { encoding: "utf-8" }); + const rl = createInterface({ input: stream }); + + let count = 0; + + try { + for await (const _line of rl) { + if (signal?.aborted) { + throw new Error("Operation aborted"); + } + count++; + } + } finally { + stream.destroy(); + rl.close(); + } + + return count; +} + export function createReadTool(cwd: string, options?: ReadToolOptions): AgentTool { const autoResizeImages = options?.autoResizeImages ?? true; const ops = options?.operations ?? defaultReadOperations; @@ -127,74 +222,89 @@ export function createReadTool(cwd: string, options?: ReadToolOptions): AgentToo } } else { // Read as text - const buffer = await ops.readFile(absolutePath); - const textContent = buffer.toString("utf-8"); - const allLines = textContent.split("\n"); - const totalFileLines = allLines.length; - - // Apply offset if specified (1-indexed to 0-indexed) - let startLine = offset ? Math.max(0, offset - 1) : 0; - - // Clamp offset to file bounds instead of throwing (#3007). - // When an agent requests offset:30 on a 13-line file, return - // the last line with a notice rather than an error that - // propagates as invalid JSON downstream. - let offsetClamped = false; - if (startLine >= allLines.length) { - startLine = Math.max(0, allLines.length - 1); - offsetClamped = true; - } - const startLineDisplay = startLine + 1; // For display (1-indexed) - - // If limit is specified by user, use it; otherwise we'll let truncateHead decide - let selectedContent: string; - let userLimitedLines: number | undefined; - if (limit !== undefined) { - const endLine = Math.min(startLine + limit, allLines.length); - selectedContent = allLines.slice(startLine, endLine).join("\n"); - userLimitedLines = endLine - startLine; - } else { - selectedContent = allLines.slice(startLine).join("\n"); - } - - // Apply truncation (respects both line and byte limits) - const truncation = truncateHead(selectedContent); - let outputText: string; + let details: ReadToolDetails | undefined; - if (truncation.firstLineExceedsLimit) { - // First line at offset exceeds 30KB - tell model to use bash - const firstLineSize = formatSize(Buffer.byteLength(allLines[startLine], "utf-8")); - outputText = `[Line ${startLineDisplay} is ${firstLineSize}, exceeds ${formatSize(DEFAULT_MAX_BYTES)} limit. Use bash: sed -n '${startLineDisplay}p' ${path} | head -c ${DEFAULT_MAX_BYTES}]`; - details = { truncation }; - } else if (truncation.truncated) { - // Truncation occurred - build actionable notice - const endLineDisplay = startLineDisplay + truncation.outputLines - 1; - const nextOffset = endLineDisplay + 1; + // Use streaming when offset or limit are specified (memory-efficient for large files) + if (offset !== undefined || limit !== undefined) { + const startLine = offset ? Math.max(1, offset) : 1; + const lineCount = limit ?? DEFAULT_MAX_LINES; - outputText = truncation.content; + // First, count total lines for the notice + const totalFileLines = await countLines(absolutePath, signal); - if (truncation.truncatedBy === "lines") { - outputText += `\n\n[Showing lines ${startLineDisplay}-${endLineDisplay} of ${totalFileLines}. Use offset=${nextOffset} to continue.]`; - } else { - outputText += `\n\n[Showing lines ${startLineDisplay}-${endLineDisplay} of ${totalFileLines} (${formatSize(DEFAULT_MAX_BYTES)} limit). Use offset=${nextOffset} to continue.]`; + // Clamp offset to file bounds (#3007) + let effectiveStartLine = startLine; + let offsetClamped = false; + if (effectiveStartLine > totalFileLines) { + effectiveStartLine = Math.max(1, totalFileLines); + offsetClamped = true; } - details = { truncation }; - } else if (userLimitedLines !== undefined && startLine + userLimitedLines < allLines.length) { - // User specified limit, there's more content, but no truncation - const remaining = allLines.length - (startLine + userLimitedLines); - const nextOffset = startLine + userLimitedLines + 1; - outputText = truncation.content; - outputText += `\n\n[${remaining} more lines in file. Use offset=${nextOffset} to continue.]`; + // Stream only the requested lines + const { lines } = await readLinesStreamed(absolutePath, effectiveStartLine, lineCount, signal); + const selectedContent = lines.join("\n"); + + // Apply truncation to the selected content + const truncation = truncateHead(selectedContent); + + if (truncation.firstLineExceedsLimit) { + const firstLineSize = formatSize(Buffer.byteLength(lines[0] ?? "", "utf-8")); + outputText = `[Line ${effectiveStartLine} is ${firstLineSize}, exceeds ${formatSize(DEFAULT_MAX_BYTES)} limit. Use bash: sed -n '${effectiveStartLine}p' ${path} | head -c ${DEFAULT_MAX_BYTES}]`; + details = { truncation }; + } else if (truncation.truncated) { + const endLineDisplay = effectiveStartLine + truncation.outputLines - 1; + const nextOffset = endLineDisplay + 1; + + outputText = truncation.content; + + if (truncation.truncatedBy === "lines") { + outputText += `\n\n[Showing lines ${effectiveStartLine}-${endLineDisplay} of ${totalFileLines}. Use offset=${nextOffset} to continue.]`; + } else { + outputText += `\n\n[Showing lines ${effectiveStartLine}-${endLineDisplay} of ${totalFileLines} (${formatSize(DEFAULT_MAX_BYTES)} limit). Use offset=${nextOffset} to continue.]`; + } + details = { truncation }; + } else if (lines.length >= lineCount && effectiveStartLine + lines.length - 1 < totalFileLines) { + // User limit reached, more content available + const nextOffset = effectiveStartLine + lines.length; + const remaining = totalFileLines - (effectiveStartLine + lines.length - 1); + + outputText = truncation.content; + outputText += `\n\n[${remaining} more lines in file. Use offset=${nextOffset} to continue.]`; + } else { + outputText = truncation.content; + } + + if (offsetClamped) { + outputText = `[Offset ${offset} beyond end of file (${totalFileLines} lines). Clamped to line ${effectiveStartLine}.]\n\n${outputText}`; + } } else { - // No truncation, no user limit exceeded - outputText = truncation.content; - } + // No offset/limit - read entire file (existing behavior) + const buffer = await ops.readFile(absolutePath); + const textContent = buffer.toString("utf-8"); + const allLines = textContent.split("\n"); + const totalFileLines = allLines.length; - // Prepend clamp notice so the agent knows offset was adjusted - if (offsetClamped) { - outputText = `[Offset ${offset} beyond end of file (${totalFileLines} lines). Clamped to line ${startLineDisplay}.]\n\n${outputText}`; + const truncation = truncateHead(textContent); + + if (truncation.firstLineExceedsLimit) { + outputText = `[File exceeds ${formatSize(DEFAULT_MAX_BYTES)} limit. Use bash: head -c ${DEFAULT_MAX_BYTES} ${path}]`; + details = { truncation }; + } else if (truncation.truncated) { + const endLineDisplay = truncation.outputLines; + const nextOffset = endLineDisplay + 1; + + outputText = truncation.content; + + if (truncation.truncatedBy === "lines") { + outputText += `\n\n[Showing lines 1-${endLineDisplay} of ${totalFileLines}. Use offset=${nextOffset} to continue.]`; + } else { + outputText += `\n\n[Showing lines 1-${endLineDisplay} of ${totalFileLines} (${formatSize(DEFAULT_MAX_BYTES)} limit). Use offset=${nextOffset} to continue.]`; + } + details = { truncation }; + } else { + outputText = truncation.content; + } } content = [{ type: "text", text: outputText }]; diff --git a/src/tests/read-tool-offset-clamp.test.ts b/src/tests/read-tool-offset-clamp.test.ts index 4c07c15ab..5746211ed 100644 --- a/src/tests/read-tool-offset-clamp.test.ts +++ b/src/tests/read-tool-offset-clamp.test.ts @@ -118,3 +118,70 @@ test("read tool: clamped offset includes notice about adjustment", async (t) => `should indicate offset was clamped, got: ${text.slice(0, 200)}`, ); }); + +test("read tool: streaming with offset and limit on large file reads only requested lines", async (t) => { + const { dir, cleanup } = makeTmpDir(); + afterEach(cleanup); + + // Create a 1000-line file + writeLines(dir, "large-file.txt", 1000); + + const readTool = createReadTool(dir); + const result = await readTool.execute("test-call", { + path: "large-file.txt", + offset: 100, + limit: 10, + }); + + const text = (result.content[0] as any).text as string; + + // Should include lines 100-109 + assert.ok(text.includes("Line 100:"), "should include line 100"); + assert.ok(text.includes("Line 109:"), "should include line 109"); + + // Should NOT include line 1 or line 200 + assert.ok(!text.includes("Line 1:"), "should NOT include line 1"); + assert.ok(!text.includes("Line 200:"), "should NOT include line 200"); + + // Should include continuation notice + assert.ok( + text.includes("more lines") || text.includes("offset="), + "should include continuation notice", + ); +}); + +test("read tool: streaming with limit only reads first N lines", async (t) => { + const { dir, cleanup } = makeTmpDir(); + afterEach(cleanup); + writeLines(dir, "medium-file.txt", 500); + + const readTool = createReadTool(dir); + const result = await readTool.execute("test-call", { + path: "medium-file.txt", + limit: 5, + }); + + const text = (result.content[0] as any).text as string; + + assert.ok(text.includes("Line 1:"), "should include line 1"); + assert.ok(text.includes("Line 5:"), "should include line 5"); + assert.ok(!text.includes("Line 6:"), "should NOT include line 6"); +}); + +test("read tool: streaming with offset only reads from offset to end", async (t) => { + const { dir, cleanup } = makeTmpDir(); + afterEach(cleanup); + writeLines(dir, "offset-only.txt", 100); + + const readTool = createReadTool(dir); + const result = await readTool.execute("test-call", { + path: "offset-only.txt", + offset: 95, + }); + + const text = (result.content[0] as any).text as string; + + assert.ok(text.includes("Line 95:"), "should include line 95"); + assert.ok(text.includes("Line 100:"), "should include line 100"); + assert.ok(!text.includes("Line 94:"), "should NOT include line 94"); +});