fix: cap recovery/retry prompt injection to prevent V8 OOM (#139)

The crash loop: stale state → unit redispatched → activity log grows →
retry diagnostic reads full log → prompt grows → replaceAll on huge
string → V8 heap exhaustion. Cap both the read path (10MB JSONL parse
limit) and the injection path (50K char prompt cap) to break the cycle.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Lex Christopherson 2026-03-12 21:12:47 -06:00
parent db2a409d7d
commit af2061bbe1
2 changed files with 29 additions and 8 deletions

View file

@ -1374,14 +1374,22 @@ async function dispatchNextUnit(
// On crash recovery, prepend the full recovery briefing
// On retry (stuck detection), prepend deep diagnostic from last attempt
// Cap injected content to prevent unbounded prompt growth → OOM
const MAX_RECOVERY_CHARS = 50_000;
let finalPrompt = prompt;
if (pendingCrashRecovery) {
finalPrompt = `${pendingCrashRecovery}\n\n---\n\n${finalPrompt}`;
const capped = pendingCrashRecovery.length > MAX_RECOVERY_CHARS
? pendingCrashRecovery.slice(0, MAX_RECOVERY_CHARS) + "\n\n[...recovery briefing truncated to prevent memory exhaustion]"
: pendingCrashRecovery;
finalPrompt = `${capped}\n\n---\n\n${finalPrompt}`;
pendingCrashRecovery = null;
} else if ((unitDispatchCount.get(`${unitType}/${unitId}`) ?? 0) > 1) {
const diagnostic = getDeepDiagnostic(basePath);
if (diagnostic) {
finalPrompt = `**RETRY — your previous attempt did not produce the required artifact.**\n\nDiagnostic from previous attempt:\n${diagnostic}\n\nFix whatever went wrong and make sure you write the required file this time.\n\n---\n\n${finalPrompt}`;
const cappedDiag = diagnostic.length > MAX_RECOVERY_CHARS
? diagnostic.slice(0, MAX_RECOVERY_CHARS) + "\n\n[...diagnostic truncated to prevent memory exhaustion]"
: diagnostic;
finalPrompt = `**RETRY — your previous attempt did not produce the required artifact.**\n\nDiagnostic from previous attempt:\n${cappedDiag}\n\nFix whatever went wrong and make sure you write the required file this time.\n\n---\n\n${finalPrompt}`;
}
}

View file

@ -18,7 +18,7 @@
* - Tool results: { role: "toolResult", toolCallId: "toolu_...", toolName: "bash", isError: bool, content: ... }
*/
import { readFileSync, readdirSync, existsSync } from "node:fs";
import { readFileSync, readdirSync, existsSync, statSync } from "node:fs";
import { execSync } from "node:child_process";
import { basename, join } from "node:path";
@ -62,8 +62,16 @@ export interface RecoveryBriefing {
// ─── JSONL Parsing ────────────────────────────────────────────────────────────
/** Max bytes to parse from a JSONL source. Prevents V8 OOM on bloated activity logs. */
const MAX_JSONL_BYTES = 10 * 1024 * 1024; // 10 MB
function parseJSONL(raw: string): unknown[] {
return raw.trim().split("\n").map(line => {
// If the file is enormous, only parse the tail (most recent entries).
// This prevents the OOM crash path: large file → split → map → parse → OOM.
const source = raw.length > MAX_JSONL_BYTES
? raw.slice(-MAX_JSONL_BYTES)
: raw;
return source.trim().split("\n").map(line => {
try { return JSON.parse(line); }
catch { return null; }
}).filter(Boolean) as unknown[];
@ -239,10 +247,15 @@ export function synthesizeCrashRecovery(
// Primary source: surviving pi session file
if (sessionFile && existsSync(sessionFile)) {
const raw = readFileSync(sessionFile, "utf-8");
const allEntries = parseJSONL(raw);
const sessionEntries = extractLastSession(allEntries);
trace = extractTrace(sessionEntries);
const stat = statSync(sessionFile, { throwIfNoEntry: false });
const fileSize = stat?.size ?? 0;
// Skip files that would blow up memory; fall back to activity log
if (fileSize <= MAX_JSONL_BYTES * 2) {
const raw = readFileSync(sessionFile, "utf-8");
const allEntries = parseJSONL(raw);
const sessionEntries = extractLastSession(allEntries);
trace = extractTrace(sessionEntries);
}
}
// Fallback: last GSD activity log