From 4c3fafd6a676d67094f3279ec8bfabaf5d80310e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=82CHES?= Date: Sat, 21 Mar 2026 09:33:02 -0600 Subject: [PATCH] fix: closeout unit on pause and heal runtime records on resume (#1625) (#1773) pauseAuto now calls closeoutUnit() and clearUnitRuntimeRecord() for the current unit before setting s.active = false, preventing stale "dispatched" runtime records from accumulating on disk. The resume path in startAuto now calls selfHealRuntimeRecords() before entering autoLoop to clean any stale records that survived from prior sessions (e.g. if clearUnitRuntimeRecord failed silently during pause). Closes #1625 Co-authored-by: Claude Opus 4.6 (1M context) --- src/resources/extensions/gsd/auto.ts | 25 ++++++++++++++ .../gsd/tests/auto-recovery.test.ts | 33 +++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 4df1bcaf4..ac5bd5241 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -85,6 +85,7 @@ import { } from "./auto-observability.js"; import { closeoutUnit } from "./auto-unit-closeout.js"; import { recoverTimedOutUnit } from "./auto-timeout-recovery.js"; +import { selfHealRuntimeRecords } from "./auto-recovery.js"; import { selectAndApplyModel } from "./auto-model-selection.js"; import { syncProjectRootToWorktree, @@ -743,6 +744,21 @@ export async function pauseAuto( // Non-fatal — resume will still work via full bootstrap, just without worktree context } + // Close out the current unit so its runtime record doesn't stay at "dispatched" + if (s.currentUnit && ctx) { + try { + await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt); + } catch { + // Non-fatal — best-effort closeout on pause + } + try { + clearUnitRuntimeRecord(s.basePath, s.currentUnit.type, s.currentUnit.id); + } catch { + // Non-fatal + } + s.currentUnit = null; + } + if (lockBase()) { releaseSessionLock(lockBase()); clearLock(lockBase()); @@ -1020,6 +1036,15 @@ export async function startAuto( } invalidateAllCaches(); + // Clean stale runtime records left from the paused session + try { + await selfHealRuntimeRecords(s.basePath, ctx); + } catch (e) { + debugLog("resume-self-heal-runtime-failed", { + error: e instanceof Error ? e.message : String(e), + }); + } + if (s.pausedSessionFile) { const activityDir = join(gsdRoot(s.basePath), "activity"); const recovery = synthesizeCrashRecovery( diff --git a/src/resources/extensions/gsd/tests/auto-recovery.test.ts b/src/resources/extensions/gsd/tests/auto-recovery.test.ts index 45f0a485d..ae2ffe24f 100644 --- a/src/resources/extensions/gsd/tests/auto-recovery.test.ts +++ b/src/resources/extensions/gsd/tests/auto-recovery.test.ts @@ -434,6 +434,39 @@ test("selfHealRuntimeRecords clears stale dispatched records (#769)", async () = } }); +// ─── #1625: selfHealRuntimeRecords on resume clears paused-session leftovers ── + +test("selfHealRuntimeRecords clears recently-paused dispatched records on resume (#1625)", async () => { + // When pauseAuto closes out a unit but clearUnitRuntimeRecord silently fails + // (e.g. permission error), selfHealRuntimeRecords on resume should still + // clean up stale dispatched records that are >1h old. + const base = makeTmpBase(); + try { + const { writeUnitRuntimeRecord, readUnitRuntimeRecord } = await import("../unit-runtime.ts"); + + // Simulate a record left behind after a pause — aged >1h to be considered stale + writeUnitRuntimeRecord(base, "execute-task", "M001/S01/T01", Date.now() - 3700_000, { + phase: "dispatched", + }); + + const before = readUnitRuntimeRecord(base, "execute-task", "M001/S01/T01"); + assert.ok(before, "dispatched record should exist before resume heal"); + assert.equal(before!.phase, "dispatched"); + + const notifications: string[] = []; + const mockCtx = { + ui: { notify: (msg: string) => { notifications.push(msg); } }, + } as any; + + await selfHealRuntimeRecords(base, mockCtx); + + const after = readUnitRuntimeRecord(base, "execute-task", "M001/S01/T01"); + assert.equal(after, null, "stale dispatched record should be cleared on resume (#1625)"); + } finally { + cleanup(base); + } +}); + // ─── #793: invalidateAllCaches unblocks skip-loop ───────────────────────── // When the skip-loop breaker fires, it must call invalidateAllCaches() (not // just invalidateStateCache()) to clear path/parse caches that deriveState