fix: closeout unit on pause and heal runtime records on resume (#1625) (#1773)

pauseAuto now calls closeoutUnit() and clearUnitRuntimeRecord() for the
current unit before setting s.active = false, preventing stale
"dispatched" runtime records from accumulating on disk.

The resume path in startAuto now calls selfHealRuntimeRecords() before
entering autoLoop to clean any stale records that survived from prior
sessions (e.g. if clearUnitRuntimeRecord failed silently during pause).

Closes #1625

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
TÂCHES 2026-03-21 09:33:02 -06:00 committed by GitHub
parent b609c3b30b
commit 4c3fafd6a6
2 changed files with 58 additions and 0 deletions

View file

@ -85,6 +85,7 @@ import {
} from "./auto-observability.js";
import { closeoutUnit } from "./auto-unit-closeout.js";
import { recoverTimedOutUnit } from "./auto-timeout-recovery.js";
import { selfHealRuntimeRecords } from "./auto-recovery.js";
import { selectAndApplyModel } from "./auto-model-selection.js";
import {
syncProjectRootToWorktree,
@ -743,6 +744,21 @@ export async function pauseAuto(
// Non-fatal — resume will still work via full bootstrap, just without worktree context
}
// Close out the current unit so its runtime record doesn't stay at "dispatched"
if (s.currentUnit && ctx) {
try {
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt);
} catch {
// Non-fatal — best-effort closeout on pause
}
try {
clearUnitRuntimeRecord(s.basePath, s.currentUnit.type, s.currentUnit.id);
} catch {
// Non-fatal
}
s.currentUnit = null;
}
if (lockBase()) {
releaseSessionLock(lockBase());
clearLock(lockBase());
@ -1020,6 +1036,15 @@ export async function startAuto(
}
invalidateAllCaches();
// Clean stale runtime records left from the paused session
try {
await selfHealRuntimeRecords(s.basePath, ctx);
} catch (e) {
debugLog("resume-self-heal-runtime-failed", {
error: e instanceof Error ? e.message : String(e),
});
}
if (s.pausedSessionFile) {
const activityDir = join(gsdRoot(s.basePath), "activity");
const recovery = synthesizeCrashRecovery(

View file

@ -434,6 +434,39 @@ test("selfHealRuntimeRecords clears stale dispatched records (#769)", async () =
}
});
// ─── #1625: selfHealRuntimeRecords on resume clears paused-session leftovers ──
test("selfHealRuntimeRecords clears recently-paused dispatched records on resume (#1625)", async () => {
// When pauseAuto closes out a unit but clearUnitRuntimeRecord silently fails
// (e.g. permission error), selfHealRuntimeRecords on resume should still
// clean up stale dispatched records that are >1h old.
const base = makeTmpBase();
try {
const { writeUnitRuntimeRecord, readUnitRuntimeRecord } = await import("../unit-runtime.ts");
// Simulate a record left behind after a pause — aged >1h to be considered stale
writeUnitRuntimeRecord(base, "execute-task", "M001/S01/T01", Date.now() - 3700_000, {
phase: "dispatched",
});
const before = readUnitRuntimeRecord(base, "execute-task", "M001/S01/T01");
assert.ok(before, "dispatched record should exist before resume heal");
assert.equal(before!.phase, "dispatched");
const notifications: string[] = [];
const mockCtx = {
ui: { notify: (msg: string) => { notifications.push(msg); } },
} as any;
await selfHealRuntimeRecords(base, mockCtx);
const after = readUnitRuntimeRecord(base, "execute-task", "M001/S01/T01");
assert.equal(after, null, "stale dispatched record should be cleared on resume (#1625)");
} finally {
cleanup(base);
}
});
// ─── #793: invalidateAllCaches unblocks skip-loop ─────────────────────────
// When the skip-loop breaker fires, it must call invalidateAllCaches() (not
// just invalidateStateCache()) to clear path/parse caches that deriveState