fix: write crash lock after newSession so it records correct session path (#1757)

The crash lock was written with the session file path from before
runUnit() called newSession(), causing crash recovery to look up the
previous unit's session file instead of the current one. This meant
recovery reported "No session data recovered" even when 261KB of session
data was on disk.

Split the lock write into two phases: a preliminary lock (unit info only,
no session path) before runUnit for crash identification, then a full
lock update with the correct session file path after runUnit returns.

Closes #1710

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
TÂCHES 2026-03-21 09:04:50 -06:00 committed by GitHub
parent 1fb59ecb71
commit 0483363a33
2 changed files with 130 additions and 10 deletions

View file

@ -923,21 +923,13 @@ export async function runUnitPhase(
pauseAuto: deps.pauseAuto,
});
// Session + send + await
const sessionFile = deps.getSessionFile(ctx);
deps.updateSessionLock(
deps.lockBase(),
unitType,
unitId,
s.completedUnits.length,
sessionFile,
);
// Write preliminary lock (no session path yet — runUnit creates a new session).
// Crash recovery can still identify the in-flight unit from this lock.
deps.writeLock(
deps.lockBase(),
unitType,
unitId,
s.completedUnits.length,
sessionFile,
);
debugLog("autoLoop", {
@ -962,6 +954,23 @@ export async function runUnitPhase(
status: unitResult.status,
});
// Now that runUnit has called newSession(), the session file path is correct.
const sessionFile = deps.getSessionFile(ctx);
deps.updateSessionLock(
deps.lockBase(),
unitType,
unitId,
s.completedUnits.length,
sessionFile,
);
deps.writeLock(
deps.lockBase(),
unitType,
unitId,
s.completedUnits.length,
sessionFile,
);
// Tag the most recent window entry with error info for stuck detection
if (unitResult.status === "error" || unitResult.status === "cancelled") {
const lastEntry = loopState.recentUnits[loopState.recentUnits.length - 1];

View file

@ -664,6 +664,117 @@ test("autoLoop calls deriveState → resolveDispatch → runUnit in sequence", a
);
});
test("crash lock records session file from AFTER newSession, not before (#1710)", async (t) => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
// Simulate newSession changing the session file path.
// newSession() in runUnit changes the underlying session, so getSessionFile
// returns a different path after newSession completes.
let currentSessionFile = "/tmp/old-session.json";
ctx.sessionManager = {
getSessionFile: () => currentSessionFile,
};
const pi = makeMockPi();
let loopCount = 0;
const s = makeLoopSession({
cmdCtx: {
newSession: () => {
// When newSession completes, the session file changes
currentSessionFile = "/tmp/new-session-after-newSession.json";
return Promise.resolve({ cancelled: false });
},
getContextUsage: () => ({ percent: 10, tokens: 1000, limit: 10000 }),
},
});
// Track all writeLock calls with their sessionFile argument
const writeLockCalls: { sessionFile: string | undefined }[] = [];
const updateSessionLockCalls: { sessionFile: string | undefined }[] = [];
const deps = makeMockDeps({
deriveState: async () => {
deps.callLog.push("deriveState");
return {
phase: "executing",
activeMilestone: { id: "M001", title: "Test", status: "active" },
activeSlice: { id: "S01", title: "Slice 1" },
activeTask: { id: "T01" },
registry: [{ id: "M001", status: "active" }],
blockers: [],
} as any;
},
resolveDispatch: async () => {
deps.callLog.push("resolveDispatch");
return {
action: "dispatch" as const,
unitType: "execute-task",
unitId: "M001/S01/T01",
prompt: "do the thing",
};
},
writeLock: (_base: string, _ut: string, _uid: string, _count: number, sessionFile?: string) => {
writeLockCalls.push({ sessionFile });
},
updateSessionLock: (_base: string, _ut: string, _uid: string, _count: number, sessionFile?: string) => {
updateSessionLockCalls.push({ sessionFile });
},
getSessionFile: (ctxArg: any) => {
return ctxArg.sessionManager?.getSessionFile() ?? "";
},
postUnitPostVerification: async () => {
deps.callLog.push("postUnitPostVerification");
loopCount++;
if (loopCount >= 1) {
s.active = false;
}
return "continue" as const;
},
});
const loopPromise = autoLoop(ctx, pi, s, deps);
// Give the loop time to reach runUnit's await
await new Promise((r) => setTimeout(r, 50));
// Resolve the unit's agent_end
resolveAgentEnd(makeEvent());
await loopPromise;
// The preliminary lock (before runUnit) should have NO session file
assert.ok(
writeLockCalls.length >= 2,
`expected at least 2 writeLock calls, got ${writeLockCalls.length}`,
);
assert.strictEqual(
writeLockCalls[0].sessionFile,
undefined,
"preliminary lock before runUnit should have no session file",
);
// The post-runUnit lock should have the NEW session file path
assert.strictEqual(
writeLockCalls[1].sessionFile,
"/tmp/new-session-after-newSession.json",
"post-runUnit lock should record the session file created by newSession",
);
// updateSessionLock should also have the new session file
assert.ok(
updateSessionLockCalls.length >= 1,
"updateSessionLock should have been called at least once",
);
assert.strictEqual(
updateSessionLockCalls[0].sessionFile,
"/tmp/new-session-after-newSession.json",
"updateSessionLock should record the session file created by newSession",
);
});
test("autoLoop handles verification retry by continuing loop", async (t) => {
_resetPendingResolve();