* fix: avoid native hangs in gsd auto paths * fix: use .js extension in edit-diff.test.ts import for tsc compatibility * fix: prevent OOM on large file diffs and implement context-line windowing - Add size guard (MAX_DP_CELLS=4M) to buildLineDiff that falls back to a linear-time prefix/suffix matching algorithm for large files, preventing the O(n*m) DP table from causing OOM crashes - Implement contextLines parameter in generateDiffString so only lines within N lines of a change are rendered (with "..." separators), matching unified diff behavior — the parameter was previously accepted but ignored - Add tests for both context windowing and large-file fallback --------- Co-authored-by: TÂCHES <afromanguy@me.com>
This commit is contained in:
parent
132ae92944
commit
d35ae683f1
5 changed files with 353 additions and 17 deletions
85
packages/pi-coding-agent/src/core/tools/edit-diff.test.ts
Normal file
85
packages/pi-coding-agent/src/core/tools/edit-diff.test.ts
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { describe, it } from "node:test";
|
||||
|
||||
import {
|
||||
computeEditDiff,
|
||||
fuzzyFindText,
|
||||
generateDiffString,
|
||||
normalizeForFuzzyMatch,
|
||||
} from "./edit-diff.js";
|
||||
|
||||
describe("edit-diff", () => {
|
||||
it("normalizes quotes, dashes, spaces, and trailing whitespace", () => {
|
||||
const input = "“hello”\u00A0world — test \nnext\t\t\n";
|
||||
assert.equal(normalizeForFuzzyMatch(input), "\"hello\" world - test\nnext\n");
|
||||
});
|
||||
|
||||
it("falls back to fuzzy matching when unicode punctuation differs", () => {
|
||||
const result = fuzzyFindText("const title = “Hello”;\n", "const title = \"Hello\";\n");
|
||||
assert.equal(result.found, true);
|
||||
assert.equal(result.usedFuzzyMatch, true);
|
||||
assert.equal(result.contentForReplacement, "const title = \"Hello\";\n");
|
||||
});
|
||||
|
||||
it("renders numbered diffs with the first changed line", () => {
|
||||
const result = generateDiffString("line 1\nline 2\nline 3\n", "line 1\nline two\nline 3\n");
|
||||
assert.equal(result.firstChangedLine, 2);
|
||||
assert.match(result.diff, /-2 line 2/);
|
||||
assert.match(result.diff, /\+2 line two/);
|
||||
});
|
||||
|
||||
it("respects contextLines and inserts separators for distant changes", () => {
|
||||
const lines = Array.from({ length: 20 }, (_, i) => `line ${i + 1}`);
|
||||
const oldContent = lines.join("\n") + "\n";
|
||||
const modified = [...lines];
|
||||
modified[1] = "changed 2"; // line 2
|
||||
modified[17] = "changed 18"; // line 18
|
||||
const newContent = modified.join("\n") + "\n";
|
||||
|
||||
const result = generateDiffString(oldContent, newContent, 2);
|
||||
// Should contain separator between the two distant change regions
|
||||
assert.match(result.diff, /\.\.\./);
|
||||
// Should NOT contain lines far from changes (e.g. line 10)
|
||||
assert.doesNotMatch(result.diff, /line 10/);
|
||||
// Should contain the changed lines
|
||||
assert.match(result.diff, /changed 2/);
|
||||
assert.match(result.diff, /changed 18/);
|
||||
});
|
||||
|
||||
it("handles large files without OOM by falling back to linear diff", () => {
|
||||
// Create files large enough to exceed the DP threshold
|
||||
const lineCount = 3000;
|
||||
const oldLines = Array.from({ length: lineCount }, (_, i) => `line ${i}`);
|
||||
const newLines = [...oldLines];
|
||||
newLines[1500] = "CHANGED";
|
||||
const result = generateDiffString(oldLines.join("\n") + "\n", newLines.join("\n") + "\n");
|
||||
assert.ok(result.firstChangedLine !== undefined);
|
||||
assert.match(result.diff, /CHANGED/);
|
||||
});
|
||||
|
||||
it("computes diffs for preview without native helpers", async () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), "edit-diff-test-"));
|
||||
try {
|
||||
const file = join(dir, "sample.ts");
|
||||
writeFileSync(file, "const title = “Hello”;\n", "utf-8");
|
||||
|
||||
const result = await computeEditDiff(
|
||||
file,
|
||||
"const title = \"Hello\";\n",
|
||||
"const title = \"Hi\";\n",
|
||||
dir,
|
||||
);
|
||||
|
||||
assert.ok(!("error" in result), "expected a diff result");
|
||||
if (!("error" in result)) {
|
||||
assert.equal(result.firstChangedLine, 1);
|
||||
assert.match(result.diff, /\+1 const title = "Hi";/);
|
||||
}
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
@ -2,15 +2,11 @@
|
|||
* Shared diff computation utilities for the edit tool.
|
||||
* Used by both edit.ts (for execution) and tool-execution.ts (for preview rendering).
|
||||
*
|
||||
* Hot-path functions (fuzzyFindText, normalizeForFuzzyMatch, generateDiffString)
|
||||
* delegate to the native Rust engine for performance on large files.
|
||||
* These helpers intentionally stay in JavaScript. Issue #453 showed that
|
||||
* post-tool preview paths must not depend on the native addon because a native
|
||||
* hang there can wedge the entire interactive session after a successful tool run.
|
||||
*/
|
||||
|
||||
import {
|
||||
fuzzyFindText as nativeFuzzyFindText,
|
||||
generateDiff as nativeGenerateDiff,
|
||||
normalizeForFuzzyMatch as nativeNormalizeForFuzzyMatch,
|
||||
} from "@gsd/native";
|
||||
import { constants } from "fs";
|
||||
import { access, readFile } from "fs/promises";
|
||||
import { resolveToCwd } from "./path-utils.js";
|
||||
|
|
@ -32,14 +28,23 @@ export function restoreLineEndings(text: string, ending: "\r\n" | "\n"): string
|
|||
}
|
||||
|
||||
/**
|
||||
* Normalize text for fuzzy matching (native Rust implementation).
|
||||
* Normalize text for fuzzy matching.
|
||||
* - Strip trailing whitespace from each line
|
||||
* - Normalize smart quotes to ASCII equivalents
|
||||
* - Normalize Unicode dashes/hyphens to ASCII hyphen
|
||||
* - Normalize special Unicode spaces to regular space
|
||||
*/
|
||||
export function normalizeForFuzzyMatch(text: string): string {
|
||||
return nativeNormalizeForFuzzyMatch(text);
|
||||
return text
|
||||
.replace(/\r\n/g, "\n")
|
||||
.replace(/\r/g, "\n")
|
||||
.replace(/[“”]/g, '"')
|
||||
.replace(/[‘’]/g, "'")
|
||||
.replace(/[‐‑‒–—−]/g, "-")
|
||||
.replace(/[\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]/g, " ")
|
||||
.split("\n")
|
||||
.map((line) => line.replace(/[ \t]+$/g, ""))
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
export interface FuzzyMatchResult {
|
||||
|
|
@ -59,14 +64,44 @@ export interface FuzzyMatchResult {
|
|||
}
|
||||
|
||||
/**
|
||||
* Find oldText in content, trying exact match first, then fuzzy match
|
||||
* (native Rust implementation).
|
||||
* Find oldText in content, trying exact match first, then fuzzy match.
|
||||
*
|
||||
* When fuzzy matching is used, the returned contentForReplacement is the
|
||||
* fuzzy-normalized version of the content.
|
||||
*/
|
||||
export function fuzzyFindText(content: string, oldText: string): FuzzyMatchResult {
|
||||
return nativeFuzzyFindText(content, oldText);
|
||||
const exactIndex = content.indexOf(oldText);
|
||||
if (exactIndex !== -1) {
|
||||
return {
|
||||
found: true,
|
||||
index: exactIndex,
|
||||
matchLength: oldText.length,
|
||||
usedFuzzyMatch: false,
|
||||
contentForReplacement: content,
|
||||
};
|
||||
}
|
||||
|
||||
const normalizedContent = normalizeForFuzzyMatch(content);
|
||||
const normalizedOldText = normalizeForFuzzyMatch(oldText);
|
||||
const fuzzyIndex = normalizedContent.indexOf(normalizedOldText);
|
||||
|
||||
if (fuzzyIndex === -1) {
|
||||
return {
|
||||
found: false,
|
||||
index: -1,
|
||||
matchLength: 0,
|
||||
usedFuzzyMatch: false,
|
||||
contentForReplacement: content,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
found: true,
|
||||
index: fuzzyIndex,
|
||||
matchLength: normalizedOldText.length,
|
||||
usedFuzzyMatch: true,
|
||||
contentForReplacement: normalizedContent,
|
||||
};
|
||||
}
|
||||
|
||||
/** Strip UTF-8 BOM if present, return both the BOM (if any) and the text without it */
|
||||
|
|
@ -75,20 +110,81 @@ export function stripBom(content: string): { bom: string; text: string } {
|
|||
}
|
||||
|
||||
/**
|
||||
* Generate a unified diff string with line numbers and context
|
||||
* (native Rust implementation using Myers' algorithm via the `similar` crate).
|
||||
* Generate a unified diff string with line numbers and context.
|
||||
*
|
||||
* Returns both the diff string and the first changed line number (in the new file).
|
||||
* Only lines within `contextLines` of a change are included (like unified diff).
|
||||
*/
|
||||
export function generateDiffString(
|
||||
oldContent: string,
|
||||
newContent: string,
|
||||
contextLines = 4,
|
||||
): { diff: string; firstChangedLine: number | undefined } {
|
||||
const result = nativeGenerateDiff(oldContent, newContent, contextLines);
|
||||
const ops = buildLineDiff(oldContent, newContent);
|
||||
let firstChangedLine: number | undefined;
|
||||
|
||||
// First pass: assign line numbers and find changed indices
|
||||
const annotated: { op: LineDiffOp; oldLine: number; newLine: number }[] = [];
|
||||
let oldLine = 1;
|
||||
let newLine = 1;
|
||||
const changedIndices: number[] = [];
|
||||
|
||||
for (let idx = 0; idx < ops.length; idx++) {
|
||||
const op = ops[idx];
|
||||
annotated.push({ op, oldLine, newLine });
|
||||
|
||||
if (op.type !== "context") {
|
||||
changedIndices.push(idx);
|
||||
if (firstChangedLine === undefined) {
|
||||
firstChangedLine = newLine;
|
||||
}
|
||||
}
|
||||
|
||||
if (op.type === "remove") {
|
||||
oldLine += 1;
|
||||
} else if (op.type === "add") {
|
||||
newLine += 1;
|
||||
} else {
|
||||
oldLine += 1;
|
||||
newLine += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Build set of indices to include (changes + surrounding context)
|
||||
const includeSet = new Set<number>();
|
||||
for (const ci of changedIndices) {
|
||||
for (let k = Math.max(0, ci - contextLines); k <= Math.min(ops.length - 1, ci + contextLines); k++) {
|
||||
includeSet.add(k);
|
||||
}
|
||||
}
|
||||
|
||||
const maxLine = Math.max(oldLine - 1, newLine - 1, 1);
|
||||
const lineNumberWidth = String(maxLine).length;
|
||||
const rendered: string[] = [];
|
||||
let lastIncluded = -1;
|
||||
|
||||
for (let idx = 0; idx < annotated.length; idx++) {
|
||||
if (!includeSet.has(idx)) continue;
|
||||
|
||||
// Insert separator when there's a gap between included regions
|
||||
if (lastIncluded !== -1 && idx > lastIncluded + 1) {
|
||||
rendered.push("...");
|
||||
}
|
||||
lastIncluded = idx;
|
||||
|
||||
const { op, oldLine: ol, newLine: nl } = annotated[idx];
|
||||
if (op.type === "context") {
|
||||
rendered.push(` ${String(nl).padStart(lineNumberWidth, " ")} ${op.line}`);
|
||||
} else if (op.type === "remove") {
|
||||
rendered.push(`-${String(ol).padStart(lineNumberWidth, " ")} ${op.line}`);
|
||||
} else {
|
||||
rendered.push(`+${String(nl).padStart(lineNumberWidth, " ")} ${op.line}`);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
diff: result.diff,
|
||||
firstChangedLine: result.firstChangedLine ?? undefined,
|
||||
diff: rendered.join("\n"),
|
||||
firstChangedLine,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -101,6 +197,138 @@ export interface EditDiffError {
|
|||
error: string;
|
||||
}
|
||||
|
||||
type LineDiffOp =
|
||||
| { type: "context"; line: string }
|
||||
| { type: "remove"; line: string }
|
||||
| { type: "add"; line: string };
|
||||
|
||||
function splitLines(text: string): string[] {
|
||||
const lines = text.split("\n");
|
||||
if (lines.length > 0 && lines.at(-1) === "") {
|
||||
lines.pop();
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
/**
|
||||
* Maximum number of cells (oldLines * newLines) before we switch from the
|
||||
* full LCS DP algorithm to a simpler linear-scan diff. This prevents OOM
|
||||
* on large files (e.g. 10k lines would need a 100M-cell matrix).
|
||||
*/
|
||||
const MAX_DP_CELLS = 4_000_000; // ~32 MB for 64-bit numbers
|
||||
|
||||
function buildLineDiff(oldContent: string, newContent: string): LineDiffOp[] {
|
||||
const oldLines = splitLines(oldContent);
|
||||
const newLines = splitLines(newContent);
|
||||
|
||||
const cells = (oldLines.length + 1) * (newLines.length + 1);
|
||||
if (cells > MAX_DP_CELLS) {
|
||||
return buildLineDiffLinear(oldLines, newLines);
|
||||
}
|
||||
|
||||
return buildLineDiffLCS(oldLines, newLines);
|
||||
}
|
||||
|
||||
/**
|
||||
* Full LCS-based diff using O(n*m) DP table. Produces optimal diffs but
|
||||
* is only safe for files where n*m <= MAX_DP_CELLS.
|
||||
*/
|
||||
function buildLineDiffLCS(oldLines: string[], newLines: string[]): LineDiffOp[] {
|
||||
const dp: number[][] = Array.from({ length: oldLines.length + 1 }, () =>
|
||||
Array<number>(newLines.length + 1).fill(0),
|
||||
);
|
||||
|
||||
for (let i = oldLines.length - 1; i >= 0; i--) {
|
||||
for (let j = newLines.length - 1; j >= 0; j--) {
|
||||
if (oldLines[i] === newLines[j]) {
|
||||
dp[i][j] = dp[i + 1][j + 1] + 1;
|
||||
} else {
|
||||
dp[i][j] = Math.max(dp[i + 1][j], dp[i][j + 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const ops: LineDiffOp[] = [];
|
||||
let i = 0;
|
||||
let j = 0;
|
||||
|
||||
while (i < oldLines.length && j < newLines.length) {
|
||||
if (oldLines[i] === newLines[j]) {
|
||||
ops.push({ type: "context", line: oldLines[i] });
|
||||
i += 1;
|
||||
j += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (dp[i + 1][j] >= dp[i][j + 1]) {
|
||||
ops.push({ type: "remove", line: oldLines[i] });
|
||||
i += 1;
|
||||
} else {
|
||||
ops.push({ type: "add", line: newLines[j] });
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
|
||||
while (i < oldLines.length) {
|
||||
ops.push({ type: "remove", line: oldLines[i] });
|
||||
i += 1;
|
||||
}
|
||||
|
||||
while (j < newLines.length) {
|
||||
ops.push({ type: "add", line: newLines[j] });
|
||||
j += 1;
|
||||
}
|
||||
|
||||
return ops;
|
||||
}
|
||||
|
||||
/**
|
||||
* Linear-time fallback diff for large files. Matches common prefix/suffix,
|
||||
* then treats the remaining middle as a bulk remove+add. Not optimal but
|
||||
* O(n+m) in both time and space.
|
||||
*/
|
||||
function buildLineDiffLinear(oldLines: string[], newLines: string[]): LineDiffOp[] {
|
||||
const ops: LineDiffOp[] = [];
|
||||
|
||||
// Match common prefix
|
||||
let prefixLen = 0;
|
||||
const minLen = Math.min(oldLines.length, newLines.length);
|
||||
while (prefixLen < minLen && oldLines[prefixLen] === newLines[prefixLen]) {
|
||||
prefixLen++;
|
||||
}
|
||||
|
||||
// Match common suffix (not overlapping with prefix)
|
||||
let suffixLen = 0;
|
||||
while (
|
||||
suffixLen < minLen - prefixLen &&
|
||||
oldLines[oldLines.length - 1 - suffixLen] === newLines[newLines.length - 1 - suffixLen]
|
||||
) {
|
||||
suffixLen++;
|
||||
}
|
||||
|
||||
// Emit prefix context
|
||||
for (let i = 0; i < prefixLen; i++) {
|
||||
ops.push({ type: "context", line: oldLines[i] });
|
||||
}
|
||||
|
||||
// Emit removed lines from the middle
|
||||
for (let i = prefixLen; i < oldLines.length - suffixLen; i++) {
|
||||
ops.push({ type: "remove", line: oldLines[i] });
|
||||
}
|
||||
|
||||
// Emit added lines from the middle
|
||||
for (let j = prefixLen; j < newLines.length - suffixLen; j++) {
|
||||
ops.push({ type: "add", line: newLines[j] });
|
||||
}
|
||||
|
||||
// Emit suffix context
|
||||
for (let i = oldLines.length - suffixLen; i < oldLines.length; i++) {
|
||||
ops.push({ type: "context", line: oldLines[i] });
|
||||
}
|
||||
|
||||
return ops;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the diff for an edit operation without applying it.
|
||||
* Used for preview rendering in the TUI before the tool executes.
|
||||
|
|
|
|||
|
|
@ -11,6 +11,11 @@ import {
|
|||
} from "@gsd/native";
|
||||
import { getCustomThemesDir, getThemesDir } from "../../../config.js";
|
||||
|
||||
// Issue #453: native preview highlighting can wedge the entire interactive
|
||||
// session after a successful file tool. Keep the safer plain-text path as the
|
||||
// default and allow native highlighting only as an explicit opt-in.
|
||||
const NATIVE_TUI_HIGHLIGHT_ENABLED = process.env.GSD_ENABLE_NATIVE_TUI_HIGHLIGHT === "1";
|
||||
|
||||
// ============================================================================
|
||||
// Types & Schema
|
||||
// ============================================================================
|
||||
|
|
@ -955,6 +960,10 @@ function getHighlightColors(t: Theme): HighlightColors {
|
|||
* Returns array of highlighted lines.
|
||||
*/
|
||||
export function highlightCode(code: string, lang?: string): string[] {
|
||||
if (!NATIVE_TUI_HIGHLIGHT_ENABLED) {
|
||||
return code.split("\n");
|
||||
}
|
||||
|
||||
const validLang = lang && supportsLanguage(lang) ? lang : null;
|
||||
try {
|
||||
return nativeHighlightCode(code, validLang, getHighlightColors(theme)).split("\n");
|
||||
|
|
@ -1051,6 +1060,10 @@ export function getMarkdownTheme(): MarkdownTheme {
|
|||
underline: (text: string) => theme.underline(text),
|
||||
strikethrough: (text: string) => chalk.strikethrough(text),
|
||||
highlightCode: (code: string, lang?: string): string[] => {
|
||||
if (!NATIVE_TUI_HIGHLIGHT_ENABLED) {
|
||||
return code.split("\n").map((line) => theme.fg("mdCodeBlock", line));
|
||||
}
|
||||
|
||||
const validLang = lang && supportsLanguage(lang) ? lang : null;
|
||||
try {
|
||||
return nativeHighlightCode(code, validLang, getHighlightColors(theme)).split("\n");
|
||||
|
|
|
|||
|
|
@ -17,6 +17,10 @@ const GIT_NO_PROMPT_ENV = {
|
|||
GIT_SVN_ID: "",
|
||||
};
|
||||
|
||||
// Issue #453: keep auto-mode bookkeeping on the stable git CLI path unless a
|
||||
// caller explicitly opts into the native helper.
|
||||
const NATIVE_GSD_GIT_ENABLED = process.env.GSD_ENABLE_NATIVE_GSD_GIT === "1";
|
||||
|
||||
// ─── Native Module Types ──────────────────────────────────────────────────
|
||||
|
||||
interface GitDiffStat {
|
||||
|
|
@ -116,6 +120,7 @@ let loadAttempted = false;
|
|||
function loadNative(): typeof nativeModule {
|
||||
if (loadAttempted) return nativeModule;
|
||||
loadAttempted = true;
|
||||
if (!NATIVE_GSD_GIT_ENABLED) return nativeModule;
|
||||
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@
|
|||
|
||||
import type { Roadmap, BoundaryMapEntry, RoadmapSliceEntry, RiskLevel } from './types.js';
|
||||
|
||||
// Issue #453: auto-mode post-turn reconciliation must stay on the stable JS path
|
||||
// unless the native parser is explicitly requested.
|
||||
const NATIVE_GSD_PARSER_ENABLED = process.env.GSD_ENABLE_NATIVE_GSD_PARSER === "1";
|
||||
|
||||
let nativeModule: {
|
||||
parseFrontmatter: (content: string) => { metadata: string; body: string };
|
||||
extractSection: (content: string, heading: string, level?: number) => { content: string; found: boolean };
|
||||
|
|
@ -29,6 +33,7 @@ let loadAttempted = false;
|
|||
function loadNative(): typeof nativeModule {
|
||||
if (loadAttempted) return nativeModule;
|
||||
loadAttempted = true;
|
||||
if (!NATIVE_GSD_PARSER_ENABLED) return nativeModule;
|
||||
|
||||
try {
|
||||
// Dynamic import to avoid hard dependency - fails gracefully if native module not built
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue