diff --git a/src/resources/extensions/gsd/session-lock.ts b/src/resources/extensions/gsd/session-lock.ts index 7c0a0d6ce..f0f3d2562 100644 --- a/src/resources/extensions/gsd/session-lock.ts +++ b/src/resources/extensions/gsd/session-lock.ts @@ -167,6 +167,56 @@ function ensureExitHandler(_gsdDir: string): void { }); } +// ─── Lock Acquisition Helpers ─────────────────────────────────────────────── + +/** + * Create the onCompromised callback for proper-lockfile. + * + * proper-lockfile fires onCompromised when it detects mtime drift (system sleep, + * event loop stall, etc.). The default handler throws inside setTimeout — an + * uncaught exception that crashes or corrupts process state. + * + * False-positive suppression (#1362): If we're still within the stale window + * (30 min since acquisition), the mtime mismatch is from an event loop stall + * during a long LLM call — not a real takeover. Log and continue. + * + * PID ownership check (#1578): Past the stale window, check if the lock file + * still contains our PID before declaring compromise. Retry reads tolerate + * transient filesystem hiccups (NFS/CIFS latency, APFS snapshots, etc.) (#2324). + */ +function createLockCompromisedHandler(lockFilePath: string): () => void { + return () => { + const elapsed = Date.now() - _lockAcquiredAt; + if (elapsed < 1_800_000) { + process.stderr.write( + `[gsd] Lock heartbeat caught up after ${Math.round(elapsed / 1000)}s — long LLM call, no action needed.\n`, + ); + return; + } + const existing = readExistingLockDataWithRetry(lockFilePath); + if (existing && existing.pid === process.pid) { + process.stderr.write( + `[gsd] Lock heartbeat mismatch after ${Math.round(elapsed / 1000)}s — lock file still owned by PID ${process.pid}, treating as false positive.\n`, + ); + return; + } + _lockCompromised = true; + _releaseFunction = null; + }; +} + +/** + * Assign module-level lock state after a successful lock acquisition. + */ +function assignLockState(basePath: string, release: () => void, lockFilePath: string): void { + _releaseFunction = release; + _lockedPath = basePath; + _lockPid = process.pid; + _lockCompromised = false; + _lockAcquiredAt = Date.now(); + _snapshotLockPath = lockFilePath; +} + // ─── Public API ───────────────────────────────────────────────────────────── /** @@ -226,43 +276,10 @@ export function acquireSessionLock(basePath: string): SessionLockResult { realpath: false, stale: 1_800_000, // 30 minutes — safe for laptop sleep / long event loop stalls update: 10_000, // Update lock mtime every 10s to prove liveness - onCompromised: () => { - // proper-lockfile detected mtime drift (system sleep, event loop stall, etc.). - // Default handler throws inside setTimeout — an uncaught exception that crashes - // or corrupts process state. - // - // False-positive suppression (#1362): If we're still within the stale window - // (30 min since acquisition), the mtime mismatch is from an event loop stall - // during a long LLM call — not a real takeover. Log and continue. - const elapsed = Date.now() - _lockAcquiredAt; - if (elapsed < 1_800_000) { - process.stderr.write( - `[gsd] Lock heartbeat caught up after ${Math.round(elapsed / 1000)}s — long LLM call, no action needed.\n`, - ); - return; // Suppress false positive - } - // Past the stale window — check if the lock file still belongs to us before - // declaring compromise (#1578). Retry reads to tolerate transient filesystem - // hiccups (NFS/CIFS latency, APFS snapshots, etc.) (#2324). - const existing = readExistingLockDataWithRetry(lp); - if (existing && existing.pid === process.pid) { - process.stderr.write( - `[gsd] Lock heartbeat mismatch after ${Math.round(elapsed / 1000)}s — lock file still owned by PID ${process.pid}, treating as false positive.\n`, - ); - return; // Our PID still owns the lock file — no real takeover - } - // Lock file is gone or owned by another PID after retries — real compromise - _lockCompromised = true; - _releaseFunction = null; - }, + onCompromised: createLockCompromisedHandler(lp), }); - _releaseFunction = release; - _lockedPath = basePath; - _lockPid = process.pid; - _lockCompromised = false; - _lockAcquiredAt = Date.now(); - _snapshotLockPath = lp; // Snapshot the resolved path for consistent access (#1363) + assignLockState(basePath, release, lp); // Safety net: clean up lock dir on process exit if _releaseFunction // wasn't called (e.g., normal exit after clean completion) (#1245). @@ -290,36 +307,9 @@ export function acquireSessionLock(basePath: string): SessionLockResult { realpath: false, stale: 1_800_000, // 30 minutes — match primary lock settings update: 10_000, - onCompromised: () => { - // Same false-positive suppression as the primary lock (#1512). - // Without this, the retry path fires _lockCompromised unconditionally - // on benign mtime drift (laptop sleep, heavy LLM event loop stalls). - const elapsed = Date.now() - _lockAcquiredAt; - if (elapsed < 1_800_000) { - process.stderr.write( - `[gsd] Lock heartbeat caught up after ${Math.round(elapsed / 1000)}s — long LLM call, no action needed.\n`, - ); - return; - } - // Check PID ownership before declaring compromise (#1578). - // Retry reads to tolerate transient filesystem hiccups (#2324). - const existing = readExistingLockDataWithRetry(lp); - if (existing && existing.pid === process.pid) { - process.stderr.write( - `[gsd] Lock heartbeat mismatch after ${Math.round(elapsed / 1000)}s — lock file still owned by PID ${process.pid}, treating as false positive.\n`, - ); - return; - } - _lockCompromised = true; - _releaseFunction = null; - }, + onCompromised: createLockCompromisedHandler(lp), }); - _releaseFunction = release; - _lockedPath = basePath; - _lockPid = process.pid; - _lockCompromised = false; - _lockAcquiredAt = Date.now(); - _snapshotLockPath = lp; // Snapshot for retry path too (#1363) + assignLockState(basePath, release, lp); // Safety net — uses centralized handler to avoid double-registration ensureExitHandler(gsdDir);