fix: write crash lock after newSession so it records correct session path (#1757)

The crash lock was written with the session file path from before runUnit() called newSession(), causing crash recovery to look up the previous unit's session file instead of the current one. This meant recovery reported "No session data recovered" even when 261KB of session data was on disk. Split the lock write into two phases: a preliminary lock (unit info only, no session path) before runUnit for crash identification, then a full lock update with the correct session file path after runUnit returns. Closes #1710 Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-21 09:04:50 -06:00 · 2026-03-21 09:04:50 -06:00 · 0483363a33
commit 0483363a33
parent 1fb59ecb71
2 changed files with 130 additions and 10 deletions
--- a/src/resources/extensions/gsd/auto/phases.ts
+++ b/src/resources/extensions/gsd/auto/phases.ts
@ -923,21 +923,13 @@ export async function runUnitPhase(
    pauseAuto: deps.pauseAuto,
  });

-  // Session + send + await
-  const sessionFile = deps.getSessionFile(ctx);
-  deps.updateSessionLock(
-    deps.lockBase(),
-    unitType,
-    unitId,
-    s.completedUnits.length,
-    sessionFile,
-  );
+  // Write preliminary lock (no session path yet — runUnit creates a new session).
+  // Crash recovery can still identify the in-flight unit from this lock.
  deps.writeLock(
    deps.lockBase(),
    unitType,
    unitId,
    s.completedUnits.length,
-    sessionFile,
  );

  debugLog("autoLoop", {
@ -962,6 +954,23 @@ export async function runUnitPhase(
    status: unitResult.status,
  });

+  // Now that runUnit has called newSession(), the session file path is correct.
+  const sessionFile = deps.getSessionFile(ctx);
+  deps.updateSessionLock(
+    deps.lockBase(),
+    unitType,
+    unitId,
+    s.completedUnits.length,
+    sessionFile,
+  );
+  deps.writeLock(
+    deps.lockBase(),
+    unitType,
+    unitId,
+    s.completedUnits.length,
+    sessionFile,
+  );
+
  // Tag the most recent window entry with error info for stuck detection
  if (unitResult.status === "error" || unitResult.status === "cancelled") {
    const lastEntry = loopState.recentUnits[loopState.recentUnits.length - 1];
--- a/src/resources/extensions/gsd/tests/auto-loop.test.ts
+++ b/src/resources/extensions/gsd/tests/auto-loop.test.ts
@ -664,6 +664,117 @@ test("autoLoop calls deriveState → resolveDispatch → runUnit in sequence", a
  );
 });

+test("crash lock records session file from AFTER newSession, not before (#1710)", async (t) => {
+  _resetPendingResolve();
+
+  const ctx = makeMockCtx();
+  ctx.ui.setStatus = () => {};
+
+  // Simulate newSession changing the session file path.
+  // newSession() in runUnit changes the underlying session, so getSessionFile
+  // returns a different path after newSession completes.
+  let currentSessionFile = "/tmp/old-session.json";
+  ctx.sessionManager = {
+    getSessionFile: () => currentSessionFile,
+  };
+  const pi = makeMockPi();
+
+  let loopCount = 0;
+  const s = makeLoopSession({
+    cmdCtx: {
+      newSession: () => {
+        // When newSession completes, the session file changes
+        currentSessionFile = "/tmp/new-session-after-newSession.json";
+        return Promise.resolve({ cancelled: false });
+      },
+      getContextUsage: () => ({ percent: 10, tokens: 1000, limit: 10000 }),
+    },
+  });
+
+  // Track all writeLock calls with their sessionFile argument
+  const writeLockCalls: { sessionFile: string | undefined }[] = [];
+  const updateSessionLockCalls: { sessionFile: string | undefined }[] = [];
+
+  const deps = makeMockDeps({
+    deriveState: async () => {
+      deps.callLog.push("deriveState");
+      return {
+        phase: "executing",
+        activeMilestone: { id: "M001", title: "Test", status: "active" },
+        activeSlice: { id: "S01", title: "Slice 1" },
+        activeTask: { id: "T01" },
+        registry: [{ id: "M001", status: "active" }],
+        blockers: [],
+      } as any;
+    },
+    resolveDispatch: async () => {
+      deps.callLog.push("resolveDispatch");
+      return {
+        action: "dispatch" as const,
+        unitType: "execute-task",
+        unitId: "M001/S01/T01",
+        prompt: "do the thing",
+      };
+    },
+    writeLock: (_base: string, _ut: string, _uid: string, _count: number, sessionFile?: string) => {
+      writeLockCalls.push({ sessionFile });
+    },
+    updateSessionLock: (_base: string, _ut: string, _uid: string, _count: number, sessionFile?: string) => {
+      updateSessionLockCalls.push({ sessionFile });
+    },
+    getSessionFile: (ctxArg: any) => {
+      return ctxArg.sessionManager?.getSessionFile() ?? "";
+    },
+    postUnitPostVerification: async () => {
+      deps.callLog.push("postUnitPostVerification");
+      loopCount++;
+      if (loopCount >= 1) {
+        s.active = false;
+      }
+      return "continue" as const;
+    },
+  });
+
+  const loopPromise = autoLoop(ctx, pi, s, deps);
+
+  // Give the loop time to reach runUnit's await
+  await new Promise((r) => setTimeout(r, 50));
+
+  // Resolve the unit's agent_end
+  resolveAgentEnd(makeEvent());
+
+  await loopPromise;
+
+  // The preliminary lock (before runUnit) should have NO session file
+  assert.ok(
+    writeLockCalls.length >= 2,
+    `expected at least 2 writeLock calls, got ${writeLockCalls.length}`,
+  );
+  assert.strictEqual(
+    writeLockCalls[0].sessionFile,
+    undefined,
+    "preliminary lock before runUnit should have no session file",
+  );
+
+  // The post-runUnit lock should have the NEW session file path
+  assert.strictEqual(
+    writeLockCalls[1].sessionFile,
+    "/tmp/new-session-after-newSession.json",
+    "post-runUnit lock should record the session file created by newSession",
+  );
+
+  // updateSessionLock should also have the new session file
+  assert.ok(
+    updateSessionLockCalls.length >= 1,
+    "updateSessionLock should have been called at least once",
+  );
+  assert.strictEqual(
+    updateSessionLockCalls[0].sessionFile,
+    "/tmp/new-session-after-newSession.json",
+    "updateSessionLock should record the session file created by newSession",
+  );
+});
+
 test("autoLoop handles verification retry by continuing loop", async (t) => {
  _resetPendingResolve();