From b0f880689bfe3c15141bfc7c1a3f9c4b4b904a4e Mon Sep 17 00:00:00 2001 From: Flux Labs Date: Mon, 16 Mar 2026 07:57:39 -0500 Subject: [PATCH] fix: prevent heap OOM during long-running auto-mode sessions (#611) (#613) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multiple sources of unbounded memory growth caused V8 to OOM after ~50 minutes of auto-mode operation: 1. activity-log.ts: saveActivityLog serialized ALL session entries into a single string for SHA1 dedup, allocating hundreds of MB per unit cycle. Now uses streaming writes (writeSync per entry) and a lightweight fingerprint (entry count + last 3 entries hash) instead of full-content hashing. 2. activity-log.ts: activityLogState Map was never cleared between sessions, accumulating lastSnapshotKeyByUnit entries indefinitely. Added clearActivityLogState() export, called from stopAuto(). 3. auto.ts: completedUnits array grew unbounded for dashboard display. Now capped at 200 entries and cleared on stopAuto(). 4. paths.ts: dirEntryCache and dirListCache Maps grew without bounds between clearPathCache() calls. Added DIR_CACHE_MAX (200) eviction — when cache exceeds limit, it's cleared before adding new entries. Closes #611 --- src/resources/extensions/gsd/activity-log.ts | 44 ++++++++-- src/resources/extensions/gsd/auto.ts | 8 +- src/resources/extensions/gsd/paths.ts | 7 ++ .../gsd/tests/memory-leak-guards.test.ts | 87 +++++++++++++++++++ 4 files changed, 138 insertions(+), 8 deletions(-) create mode 100644 src/resources/extensions/gsd/tests/memory-leak-guards.test.ts diff --git a/src/resources/extensions/gsd/activity-log.ts b/src/resources/extensions/gsd/activity-log.ts index fd235d121..aa69192c6 100644 --- a/src/resources/extensions/gsd/activity-log.ts +++ b/src/resources/extensions/gsd/activity-log.ts @@ -8,7 +8,7 @@ * Diagnostic extraction is handled by session-forensics.ts. */ -import { writeFileSync, mkdirSync, readdirSync, unlinkSync, statSync, openSync, closeSync, constants } from "node:fs"; +import { writeFileSync, writeSync, mkdirSync, readdirSync, unlinkSync, statSync, openSync, closeSync, constants } from "node:fs"; import { createHash } from "node:crypto"; import { join } from "node:path"; @@ -23,6 +23,15 @@ interface ActivityLogState { const activityLogState = new Map(); +/** + * Clear accumulated activity log state (#611). + * Call when auto-mode stops to prevent unbounded memory growth + * from lastSnapshotKeyByUnit maps accumulating across units. + */ +export function clearActivityLogState(): void { + activityLogState.clear(); +} + function scanNextSequence(activityDir: string): number { let maxSeq = 0; try { @@ -46,9 +55,21 @@ function getActivityState(activityDir: string): ActivityLogState { return state; } -function snapshotKey(unitType: string, unitId: string, content: string): string { - const digest = createHash("sha1").update(content).digest("hex"); - return `${unitType}\0${unitId}\0${digest}`; +/** + * Build a lightweight dedup key from session entries without serializing + * the entire content to a string (#611). Uses entry count + hash of + * the last few entries as a fingerprint instead of hashing megabytes. + */ +function snapshotKey(unitType: string, unitId: string, entries: unknown[]): string { + const hash = createHash("sha1"); + hash.update(`${unitType}\0${unitId}\0${entries.length}\0`); + // Hash only the last 3 entries as a fingerprint — if the session grew, + // the count change alone detects it; if content changed, the tail hash catches it. + const tail = entries.slice(-3); + for (const entry of tail) { + hash.update(JSON.stringify(entry)); + } + return hash.digest("hex"); } function nextActivityFilePath( @@ -91,14 +112,23 @@ export function saveActivityLog( mkdirSync(activityDir, { recursive: true }); const safeUnitId = unitId.replace(/\//g, "-"); - const content = `${entries.map(entry => JSON.stringify(entry)).join("\n")}\n`; const state = getActivityState(activityDir); const unitKey = `${unitType}\0${safeUnitId}`; - const key = snapshotKey(unitType, safeUnitId, content); + // Use lightweight fingerprint instead of serializing all entries (#611) + const key = snapshotKey(unitType, safeUnitId, entries); if (state.lastSnapshotKeyByUnit.get(unitKey) === key) return; const filePath = nextActivityFilePath(activityDir, state, unitType, safeUnitId); - writeFileSync(filePath, content, "utf-8"); + // Stream entries to disk line-by-line instead of building one massive string (#611). + // For large sessions, the single-string approach allocated hundreds of MB. + const fd = openSync(filePath, "w"); + try { + for (const entry of entries) { + writeSync(fd, JSON.stringify(entry) + "\n"); + } + } finally { + closeSync(fd); + } state.nextSeq += 1; state.lastSnapshotKeyByUnit.set(unitKey, key); } catch (e) { diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 0e919b110..07575ce81 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -29,7 +29,7 @@ import { buildMilestoneFileName, buildSliceFileName, buildTaskFileName, } from "./paths.js"; import { invalidateAllCaches } from "./cache.js"; -import { saveActivityLog } from "./activity-log.js"; +import { saveActivityLog, clearActivityLogState } from "./activity-log.js"; import { synthesizeCrashRecovery, getDeepDiagnostic } from "./session-forensics.js"; import { writeLock, clearLock, readCrashLock, formatCrashInfo, isLockProcessAlive } from "./crash-recovery.js"; import { @@ -485,7 +485,9 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promi currentUnit = null; currentMilestoneId = null; originalBasePath = ""; + completedUnits = []; clearSliceProgressCache(); + clearActivityLogState(); pendingCrashRecovery = null; _handlingAgentEnd = false; ctx?.ui.setStatus("gsd-auto", undefined); @@ -1784,6 +1786,10 @@ async function dispatchNextUnit( startedAt: currentUnit.startedAt, finishedAt: Date.now(), }); + // Cap to last 200 entries to prevent unbounded growth (#611) + if (completedUnits.length > 200) { + completedUnits = completedUnits.slice(-200); + } clearUnitRuntimeRecord(basePath, currentUnit.type, currentUnit.id); unitDispatchCount.delete(`${currentUnit.type}/${currentUnit.id}`); unitRecoveryCount.delete(`${currentUnit.type}/${currentUnit.id}`); diff --git a/src/resources/extensions/gsd/paths.ts b/src/resources/extensions/gsd/paths.ts index b90c463fa..6e7458db6 100644 --- a/src/resources/extensions/gsd/paths.ts +++ b/src/resources/extensions/gsd/paths.ts @@ -15,6 +15,9 @@ import { nativeScanGsdTree, type GsdTreeEntry } from "./native-parser-bridge.js" // ─── Directory Listing Cache ────────────────────────────────────────────────── +/** Max entries before eviction. Prevents unbounded growth in long sessions (#611). */ +const DIR_CACHE_MAX = 200; + const dirEntryCache = new Map(); const dirListCache = new Map(); @@ -85,6 +88,7 @@ function cachedReaddirWithTypes(dirPath: string): Dirent[] { d.isSocket = () => false; return d; }); + if (dirEntryCache.size >= DIR_CACHE_MAX) dirEntryCache.clear(); dirEntryCache.set(dirPath, dirents); return dirents; } @@ -92,6 +96,7 @@ function cachedReaddirWithTypes(dirPath: string): Dirent[] { } const entries = readdirSync(dirPath, { withFileTypes: true }); + if (dirEntryCache.size >= DIR_CACHE_MAX) dirEntryCache.clear(); dirEntryCache.set(dirPath, entries); return entries; } @@ -107,6 +112,7 @@ function cachedReaddir(dirPath: string): string[] { const treeEntries = nativeTreeCache.get(key); if (treeEntries) { const names = treeEntries.map(e => e.name); + if (dirListCache.size >= DIR_CACHE_MAX) dirListCache.clear(); dirListCache.set(dirPath, names); return names; } @@ -114,6 +120,7 @@ function cachedReaddir(dirPath: string): string[] { } const entries = readdirSync(dirPath); + if (dirListCache.size >= DIR_CACHE_MAX) dirListCache.clear(); dirListCache.set(dirPath, entries); return entries; } diff --git a/src/resources/extensions/gsd/tests/memory-leak-guards.test.ts b/src/resources/extensions/gsd/tests/memory-leak-guards.test.ts new file mode 100644 index 000000000..305d1fc50 --- /dev/null +++ b/src/resources/extensions/gsd/tests/memory-leak-guards.test.ts @@ -0,0 +1,87 @@ +/** + * memory-leak-guards.test.ts — Tests for #611 memory leak fixes. + * + * Verifies that module-level state accumulators are properly bounded + * and cleared to prevent OOM during long-running auto-mode sessions. + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync, existsSync, readdirSync, readFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { saveActivityLog, clearActivityLogState } from "../activity-log.ts"; +import { clearPathCache } from "../paths.ts"; +import type { ExtensionContext } from "@gsd/pi-coding-agent"; + +function createCtx(entries: unknown[]) { + return { sessionManager: { getEntries: () => entries } } as unknown as ExtensionContext; +} + +// ─── activity-log: clearActivityLogState ───────────────────────────────────── + +test("clearActivityLogState resets dedup state so identical saves write again", () => { + clearActivityLogState(); + const baseDir = mkdtempSync(join(tmpdir(), "gsd-memleak-test-")); + try { + const entries = [{ role: "assistant", content: "test entry" }]; + const ctx = createCtx(entries); + + // First save + saveActivityLog(ctx, baseDir, "execute-task", "M001/S01/T01"); + + const actDir = join(baseDir, ".gsd", "activity"); + assert.equal(readdirSync(actDir).length, 1, "first save creates one file"); + + // Same content, same unit — deduped + saveActivityLog(ctx, baseDir, "execute-task", "M001/S01/T01"); + assert.equal(readdirSync(actDir).length, 1, "dedup prevents duplicate write"); + + // Clear state + clearActivityLogState(); + + // Same content again — after clear, writes again (fresh state) + saveActivityLog(ctx, baseDir, "execute-task", "M001/S01/T01"); + assert.equal(readdirSync(actDir).length, 2, "after clear, dedup state is reset"); + } finally { + rmSync(baseDir, { recursive: true, force: true }); + } +}); + +// ─── activity-log: streaming JSONL write ──────────────────────────────────── + +test("saveActivityLog writes valid JSONL via streaming", () => { + clearActivityLogState(); + const baseDir = mkdtempSync(join(tmpdir(), "gsd-memleak-jsonl-")); + try { + const entries = [ + { type: "message", message: { role: "user", content: "hello" } }, + { type: "message", message: { role: "assistant", content: "world" } }, + { type: "message", message: { role: "user", content: "test" } }, + ]; + const ctx = createCtx(entries); + + saveActivityLog(ctx, baseDir, "execute-task", "M002/S01/T01"); + + const actDir = join(baseDir, ".gsd", "activity"); + const files = readdirSync(actDir); + assert.equal(files.length, 1, "one file written"); + + const content = readFileSync(join(actDir, files[0]), "utf-8"); + const lines = content.trim().split("\n"); + assert.equal(lines.length, 3, "three JSONL lines"); + + for (const line of lines) { + assert.doesNotThrow(() => JSON.parse(line), `line is valid JSON`); + } + } finally { + rmSync(baseDir, { recursive: true, force: true }); + } +}); + +// ─── paths.ts: directory cache bounds ─────────────────────────────────────── + +test("clearPathCache does not throw", () => { + assert.doesNotThrow(() => clearPathCache(), "clearPathCache should not throw"); +});