diff --git a/src/resources/extensions/gsd/auto-loop.ts b/src/resources/extensions/gsd/auto-loop.ts index 7ce847d20..b2e5800ea 100644 --- a/src/resources/extensions/gsd/auto-loop.ts +++ b/src/resources/extensions/gsd/auto-loop.ts @@ -15,6 +15,7 @@ import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent"; import type { AutoSession } from "./auto/session.js"; import { NEW_SESSION_TIMEOUT_MS } from "./auto/session.js"; import type { GSDPreferences } from "./preferences.js"; +import type { SessionLockStatus } from "./session-lock.js"; import type { GSDState } from "./types.js"; import type { CloseoutOptions } from "./auto-unit-closeout.js"; import type { PostUnitContext } from "./auto-post-unit.js"; @@ -307,7 +308,7 @@ export interface LoopDeps { checkResourcesStale: (version: string | null) => string | null; // Session lock - validateSessionLock: (basePath: string) => boolean; + validateSessionLock: (basePath: string) => SessionLockStatus; updateSessionLock: ( basePath: string, unitType: string, @@ -315,7 +316,10 @@ export interface LoopDeps { completedUnits: number, sessionFile?: string, ) => void; - handleLostSessionLock: (ctx?: ExtensionContext) => void; + handleLostSessionLock: ( + ctx?: ExtensionContext, + lockStatus?: SessionLockStatus, + ) => void; // Milestone transition functions sendDesktopNotification: ( @@ -559,10 +563,24 @@ export async function autoLoop( try { // ── Blanket try/catch: one bad iteration must not kill the session - if (deps.lockBase() && !deps.validateSessionLock(deps.lockBase())) { - deps.handleLostSessionLock(ctx); - debugLog("autoLoop", { phase: "exit", reason: "session-lock-lost" }); - break; + const sessionLockBase = deps.lockBase(); + if (sessionLockBase) { + const lockStatus = deps.validateSessionLock(sessionLockBase); + if (!lockStatus.valid) { + debugLog("autoLoop", { + phase: "session-lock-invalid", + reason: lockStatus.failureReason ?? "unknown", + existingPid: lockStatus.existingPid, + expectedPid: lockStatus.expectedPid, + }); + deps.handleLostSessionLock(ctx, lockStatus); + debugLog("autoLoop", { + phase: "exit", + reason: "session-lock-lost", + detail: lockStatus.failureReason ?? "unknown", + }); + break; + } } // ── Phase 1: Pre-dispatch ─────────────────────────────────────────── diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index c3de44263..191dc8e21 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -47,10 +47,11 @@ import { } from "./crash-recovery.js"; import { acquireSessionLock, - validateSessionLock, + getSessionLockStatus, releaseSessionLock, updateSessionLock, } from "./session-lock.js"; +import type { SessionLockStatus } from "./session-lock.js"; import { clearUnitRuntimeRecord, inspectExecuteTaskDurability, @@ -461,15 +462,33 @@ function buildSnapshotOpts( }; } -function handleLostSessionLock(ctx?: ExtensionContext): void { - debugLog("session-lock-lost", { lockBase: lockBase() }); +function handleLostSessionLock( + ctx?: ExtensionContext, + lockStatus?: SessionLockStatus, +): void { + debugLog("session-lock-lost", { + lockBase: lockBase(), + reason: lockStatus?.failureReason, + existingPid: lockStatus?.existingPid, + expectedPid: lockStatus?.expectedPid, + }); s.active = false; s.paused = false; clearUnitTimeout(); deregisterSigtermHandler(); clearCmuxSidebar(loadEffectiveGSDPreferences()?.preferences); + const message = + lockStatus?.failureReason === "pid-mismatch" + ? lockStatus.existingPid + ? `Session lock moved to PID ${lockStatus.existingPid} — another GSD process appears to have taken over. Stopping gracefully.` + : "Session lock moved to a different process — another GSD process appears to have taken over. Stopping gracefully." + : lockStatus?.failureReason === "missing-metadata" + ? "Session lock metadata disappeared, so ownership could not be confirmed. Stopping gracefully." + : lockStatus?.failureReason === "compromised" + ? "Session lock was compromised or invalidated during heartbeat checks; takeover was not confirmed. Stopping gracefully." + : "Session lock lost. Stopping gracefully."; ctx?.ui.notify( - "Session lock lost — another GSD process appears to have taken over. Stopping gracefully.", + message, "error", ); ctx?.ui.setStatus("gsd-auto", undefined); @@ -736,7 +755,7 @@ function buildLoopDeps(): LoopDeps { checkResourcesStale, // Session lock - validateSessionLock, + validateSessionLock: getSessionLockStatus, updateSessionLock, handleLostSessionLock, diff --git a/src/resources/extensions/gsd/session-lock.ts b/src/resources/extensions/gsd/session-lock.ts index 2275ff29d..b2b722388 100644 --- a/src/resources/extensions/gsd/session-lock.ts +++ b/src/resources/extensions/gsd/session-lock.ts @@ -40,6 +40,19 @@ export type SessionLockResult = | { acquired: true } | { acquired: false; reason: string; existingPid?: number }; +export type SessionLockFailureReason = + | "compromised" + | "missing-metadata" + | "pid-mismatch"; + +export interface SessionLockStatus { + valid: boolean; + failureReason?: SessionLockFailureReason; + existingPid?: number; + expectedPid?: number; + recovered?: boolean; +} + // ─── Module State ─────────────────────────────────────────────────────────── /** Release function from proper-lockfile — calling it releases the OS lock. */ @@ -368,7 +381,7 @@ export function updateSessionLock( * * This is called periodically during the dispatch loop. */ -export function validateSessionLock(basePath: string): boolean { +export function getSessionLockStatus(basePath: string): SessionLockStatus { // Lock was compromised by proper-lockfile (mtime drift from sleep, stall, etc.) if (_lockCompromised) { // Recovery gate (#1512): Before declaring the lock lost, check if the lock @@ -385,18 +398,23 @@ export function validateSessionLock(basePath: string): boolean { process.stderr.write( `[gsd] Lock recovered after onCompromised — lock file PID matched, re-acquired.\n`, ); - return true; + return { valid: true, recovered: true }; } } catch { // Re-acquisition failed — fall through to return false } } - return false; + return { + valid: false, + failureReason: "compromised", + existingPid: existing?.pid, + expectedPid: process.pid, + }; } // If we have an OS-level lock, we're still the owner if (_releaseFunction && _lockedPath === basePath) { - return true; + return { valid: true }; } // Fallback: check the lock file PID @@ -404,10 +422,27 @@ export function validateSessionLock(basePath: string): boolean { const existing = readExistingLockData(lp); if (!existing) { // Lock file was deleted — we lost ownership - return false; + return { + valid: false, + failureReason: "missing-metadata", + expectedPid: process.pid, + }; } - return existing.pid === process.pid; + if (existing.pid !== process.pid) { + return { + valid: false, + failureReason: "pid-mismatch", + existingPid: existing.pid, + expectedPid: process.pid, + }; + } + + return { valid: true }; +} + +export function validateSessionLock(basePath: string): boolean { + return getSessionLockStatus(basePath).valid; } /** diff --git a/src/resources/extensions/gsd/tests/auto-loop.test.ts b/src/resources/extensions/gsd/tests/auto-loop.test.ts index f5dfe4db3..dc34f1952 100644 --- a/src/resources/extensions/gsd/tests/auto-loop.test.ts +++ b/src/resources/extensions/gsd/tests/auto-loop.test.ts @@ -14,6 +14,7 @@ import { type AgentEndEvent, type LoopDeps, } from "../auto-loop.js"; +import type { SessionLockStatus } from "../session-lock.js"; // ─── Helpers ───────────────────────────────────────────────────────────────── @@ -341,7 +342,7 @@ function makeMockDeps( preDispatchHealthGate: async () => ({ proceed: true, fixesApplied: [] }), syncProjectRootToWorktree: () => {}, checkResourcesStale: () => null, - validateSessionLock: () => true, + validateSessionLock: () => ({ valid: true } as SessionLockStatus), updateSessionLock: () => { callLog.push("updateSessionLock"); }, @@ -532,6 +533,41 @@ test("autoLoop exits on terminal complete state", async (t) => { ); }); +test("autoLoop passes structured session-lock failure details to the handler", async () => { + _resetPendingResolve(); + + const ctx = makeMockCtx(); + ctx.ui.setStatus = () => {}; + const pi = makeMockPi(); + const s = makeLoopSession(); + let observedLockStatus: SessionLockStatus | undefined; + + const deps = makeMockDeps({ + validateSessionLock: () => + ({ + valid: false, + failureReason: "compromised", + expectedPid: process.pid, + }) as SessionLockStatus, + handleLostSessionLock: (_ctx, lockStatus) => { + observedLockStatus = lockStatus; + deps.callLog.push("handleLostSessionLock"); + }, + }); + + await autoLoop(ctx, pi, s, deps); + + assert.deepEqual(observedLockStatus, { + valid: false, + failureReason: "compromised", + expectedPid: process.pid, + }); + assert.ok( + !deps.callLog.includes("resolveDispatch"), + "should stop before dispatch after lock validation fails", + ); +}); + test("autoLoop exits on terminal blocked state", async (t) => { _resetPendingResolve(); diff --git a/src/resources/extensions/gsd/tests/session-lock-regression.test.ts b/src/resources/extensions/gsd/tests/session-lock-regression.test.ts index f955ebf83..22bc3d397 100644 --- a/src/resources/extensions/gsd/tests/session-lock-regression.test.ts +++ b/src/resources/extensions/gsd/tests/session-lock-regression.test.ts @@ -17,6 +17,7 @@ import { tmpdir } from 'node:os'; import { acquireSessionLock, + getSessionLockStatus, validateSessionLock, releaseSessionLock, readSessionLockData, @@ -201,6 +202,50 @@ async function main(): Promise { } } + // ─── 7b. getSessionLockStatus with missing metadata → reason surfaced ── + console.log('\n=== 7b. missing lock metadata → structured reason ==='); + { + const base = mkdtempSync(join(tmpdir(), 'gsd-session-lock-')); + mkdirSync(join(base, '.gsd'), { recursive: true }); + + try { + const status = getSessionLockStatus(base); + assertEq(status.valid, false, 'missing lock metadata is invalid'); + assertEq(status.failureReason, 'missing-metadata', 'missing metadata reason is surfaced'); + assertEq(status.expectedPid, process.pid, 'expected PID is included'); + } finally { + rmSync(base, { recursive: true, force: true }); + } + } + + // ─── 7c. getSessionLockStatus with foreign PID → reason surfaced ─────── + console.log('\n=== 7c. foreign PID in lock file → structured reason ==='); + { + const base = mkdtempSync(join(tmpdir(), 'gsd-session-lock-')); + mkdirSync(join(base, '.gsd'), { recursive: true }); + + try { + const foreignPid = process.pid + 1000; + const lockFile = join(gsdRoot(base), 'auto.lock'); + writeFileSync(lockFile, JSON.stringify({ + pid: foreignPid, + startedAt: new Date().toISOString(), + unitType: 'execute-task', + unitId: 'M001/S01/T01', + unitStartedAt: new Date().toISOString(), + completedUnits: 0, + }, null, 2)); + + const status = getSessionLockStatus(base); + assertEq(status.valid, false, 'foreign PID lock is invalid'); + assertEq(status.failureReason, 'pid-mismatch', 'PID mismatch reason is surfaced'); + assertEq(status.existingPid, foreignPid, 'existing PID is included'); + assertEq(status.expectedPid, process.pid, 'expected PID is included'); + } finally { + rmSync(base, { recursive: true, force: true }); + } + } + // ─── 8. Acquire after release is possible ───────────────────────────── console.log('\n=== 8. acquire after release → re-acquirable ==='); {