From 0184498a4433be0799601f378fe1ca1fdaa02c0d Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Mon, 16 Mar 2026 22:30:15 -0600 Subject: [PATCH] fix: prevent phantom skip loop from stale crash recovery context (#790) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a crash lock references a unit from a fully-completed milestone, crash recovery injects stale context that fights the skip-loop breaker, creating an infinite evict/repair cycle with selfHealRuntimeRecords. Fix 1: Validate recovered unit's milestone before synthesizing recovery context. If the milestone has a SUMMARY file (complete), discard the stale recovery context and clear the lock without injection. Fix 2: Skip-loop breaker cross-checks whether the evicted unit belongs to a completed milestone. If so, the eviction is counterproductive — clear the skip counter and re-dispatch from fresh state instead. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/resources/extensions/gsd/auto.ts | 80 +++++++++++++++++++++++----- 1 file changed, 67 insertions(+), 13 deletions(-) diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 8e95668b2..aea1be1c7 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -889,23 +889,37 @@ export async function startAuto( ); return; } - // Stale lock from a dead process — synthesize crash recovery context. - const activityDir = join(gsdRoot(base), "activity"); - const recovery = synthesizeCrashRecovery( - base, crashLock.unitType, crashLock.unitId, - crashLock.sessionFile, activityDir, - ); - if (recovery && recovery.trace.toolCallCount > 0) { - pendingCrashRecovery = recovery.prompt; + // Stale lock from a dead process — validate before synthesizing recovery context. + // If the recovered unit belongs to a fully-completed milestone (SUMMARY exists), + // discard recovery context to prevent phantom skip loops (#790). + const recoveredMid = crashLock.unitId.split("/")[0]; + const milestoneAlreadyComplete = recoveredMid + ? !!resolveMilestoneFile(base, recoveredMid, "SUMMARY") + : false; + + if (milestoneAlreadyComplete) { ctx.ui.notify( - `${formatCrashInfo(crashLock)}\nRecovered ${recovery.trace.toolCallCount} tool calls from crashed session. Resuming with full context.`, - "warning", + `Crash recovery: discarding stale context for ${crashLock.unitId} — milestone ${recoveredMid} is already complete.`, + "info", ); } else { - ctx.ui.notify( - `${formatCrashInfo(crashLock)}\nNo session data recovered. Resuming from disk state.`, - "warning", + const activityDir = join(gsdRoot(base), "activity"); + const recovery = synthesizeCrashRecovery( + base, crashLock.unitType, crashLock.unitId, + crashLock.sessionFile, activityDir, ); + if (recovery && recovery.trace.toolCallCount > 0) { + pendingCrashRecovery = recovery.prompt; + ctx.ui.notify( + `${formatCrashInfo(crashLock)}\nRecovered ${recovery.trace.toolCallCount} tool calls from crashed session. Resuming with full context.`, + "warning", + ); + } else { + ctx.ui.notify( + `${formatCrashInfo(crashLock)}\nNo session data recovered. Resuming from disk state.`, + "warning", + ); + } } clearLock(base); } @@ -2418,6 +2432,28 @@ async function dispatchNextUnit( const skipCount = (unitConsecutiveSkips.get(idempotencyKey) ?? 0) + 1; unitConsecutiveSkips.set(idempotencyKey, skipCount); if (skipCount > MAX_CONSECUTIVE_SKIPS) { + // Cross-check: verify deriveState actually returns this unit (#790). + // If the unit's milestone is already complete, this is a phantom skip + // loop from stale crash recovery context — don't evict. + const skippedMid = unitId.split("/")[0]; + const skippedMilestoneComplete = skippedMid + ? !!resolveMilestoneFile(basePath, skippedMid, "SUMMARY") + : false; + if (skippedMilestoneComplete) { + // Milestone is complete — evicting this key would fight self-heal. + // Clear skip counter and re-dispatch from fresh state. + unitConsecutiveSkips.delete(idempotencyKey); + invalidateStateCache(); + ctx.ui.notify( + `Phantom skip loop cleared: ${unitType} ${unitId} belongs to completed milestone ${skippedMid}. Re-dispatching from fresh state.`, + "info", + ); + _skipDepth++; + await new Promise(r => setTimeout(r, 50)); + await dispatchNextUnit(ctx, pi); + _skipDepth = Math.max(0, _skipDepth - 1); + return; + } unitConsecutiveSkips.delete(idempotencyKey); completedKeySet.delete(idempotencyKey); removePersistedKey(basePath, idempotencyKey); @@ -2465,6 +2501,24 @@ async function dispatchNextUnit( const skipCount2 = (unitConsecutiveSkips.get(idempotencyKey) ?? 0) + 1; unitConsecutiveSkips.set(idempotencyKey, skipCount2); if (skipCount2 > MAX_CONSECUTIVE_SKIPS) { + // Cross-check: verify the unit's milestone is still active (#790). + const skippedMid2 = unitId.split("/")[0]; + const skippedMilestoneComplete2 = skippedMid2 + ? !!resolveMilestoneFile(basePath, skippedMid2, "SUMMARY") + : false; + if (skippedMilestoneComplete2) { + unitConsecutiveSkips.delete(idempotencyKey); + invalidateStateCache(); + ctx.ui.notify( + `Phantom skip loop cleared: ${unitType} ${unitId} belongs to completed milestone ${skippedMid2}. Re-dispatching from fresh state.`, + "info", + ); + _skipDepth++; + await new Promise(r => setTimeout(r, 50)); + await dispatchNextUnit(ctx, pi); + _skipDepth = Math.max(0, _skipDepth - 1); + return; + } unitConsecutiveSkips.delete(idempotencyKey); completedKeySet.delete(idempotencyKey); removePersistedKey(basePath, idempotencyKey);