From 8f8a9db7cb03bb72be3276e5fa1d610843a0e76b Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Mar 2026 08:43:33 -0600 Subject: [PATCH] fix(auto): stale lock detection, SIGTERM handler, live-session guard (#362) --- src/resources/extensions/gsd/auto.ts | 54 +++++++++++++++++-- .../extensions/gsd/crash-recovery.ts | 20 +++++++ 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 3c70d5aab..e22ec2f2f 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -32,7 +32,7 @@ import { } from "./paths.js"; import { saveActivityLog } from "./activity-log.js"; import { synthesizeCrashRecovery, getDeepDiagnostic } from "./session-forensics.js"; -import { writeLock, clearLock, readCrashLock, formatCrashInfo } from "./crash-recovery.js"; +import { writeLock, clearLock, readCrashLock, formatCrashInfo, isLockProcessAlive } from "./crash-recovery.js"; import { clearUnitRuntimeRecord, formatExecuteTaskRecoveryStatus, @@ -164,6 +164,32 @@ let unitTimeoutHandle: ReturnType | null = null; let wrapupWarningHandle: ReturnType | null = null; let idleWatchdogHandle: ReturnType | null = null; +/** SIGTERM handler registered while auto-mode is active — cleared on stop/pause. */ +let _sigtermHandler: (() => void) | null = null; + +/** + * Register a SIGTERM handler that clears the lock file and exits cleanly. + * Captures the active base path at registration time so the handler + * always references the correct path even if the module variable changes. + * Removes any previously registered handler before installing the new one. + */ +function registerSigtermHandler(currentBasePath: string): void { + if (_sigtermHandler) process.off("SIGTERM", _sigtermHandler); + _sigtermHandler = () => { + clearLock(currentBasePath); + process.exit(0); + }; + process.on("SIGTERM", _sigtermHandler); +} + +/** Deregister the SIGTERM handler (called on stop/pause). */ +function deregisterSigtermHandler(): void { + if (_sigtermHandler) { + process.off("SIGTERM", _sigtermHandler); + _sigtermHandler = null; + } +} + /** Format token counts for compact display */ function formatWidgetTokens(count: number): string { if (count < 1000) return count.toString(); @@ -251,7 +277,8 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promi if (basePath) clearLock(basePath); clearSkillSnapshot(); - // Show final cost summary before resetting + // Remove SIGTERM handler registered at auto-mode start + deregisterSigtermHandler(); const ledger = getLedger(); if (ledger && ledger.units.length > 0) { const totals = getProjectTotals(ledger.units); @@ -303,6 +330,10 @@ export async function pauseAuto(ctx?: ExtensionContext, _pi?: ExtensionAPI): Pro if (!active) return; clearUnitTimeout(); if (basePath) clearLock(basePath); + + // Remove SIGTERM handler registered at auto-mode start + deregisterSigtermHandler(); + active = false; paused = true; // Preserve: unitDispatchCount, currentUnit, basePath, verbose, cmdCtx, @@ -479,6 +510,10 @@ export async function startAuto( if (!getLedger()) initMetrics(base); // Ensure milestone ID is set on git service for integration branch resolution if (currentMilestoneId) setActiveMilestoneId(base, currentMilestoneId); + + // Re-register SIGTERM handler for the resumed session + registerSigtermHandler(base); + ctx.ui.setStatus("gsd-auto", stepMode ? "next" : "auto"); ctx.ui.setFooter(hideFooter); ctx.ui.notify(stepMode ? "Step-mode resumed." : "Auto-mode resumed.", "info"); @@ -525,8 +560,16 @@ export async function startAuto( // Check for crash from previous session const crashLock = readCrashLock(base); if (crashLock) { - // Synthesize a rich recovery briefing from the surviving pi session file - // (pi writes entries incrementally, so it contains every tool call up to the crash) + if (isLockProcessAlive(crashLock)) { + // The lock belongs to a process that is still running — not a crash. + // Warn the user and abort to avoid two concurrent auto-mode sessions. + ctx.ui.notify( + `Another auto-mode session (PID ${crashLock.pid}) appears to be running.\nStop it with \`kill ${crashLock.pid}\` before starting a new session.`, + "error", + ); + return; + } + // Stale lock from a dead process — synthesize crash recovery context. const activityDir = join(gsdRoot(base), "activity"); const recovery = synthesizeCrashRecovery( base, crashLock.unitType, crashLock.unitId, @@ -586,6 +629,9 @@ export async function startAuto( originalModelId = ctx.model?.id ?? null; originalModelProvider = ctx.model?.provider ?? null; + // Register a SIGTERM handler so `kill ` cleans up the lock and exits. + registerSigtermHandler(base); + // Capture the integration branch — records the branch the user was on when // auto-mode started. Slice branches will merge back to this branch instead // of the repo's default (main/master). Idempotent when the branch is the diff --git a/src/resources/extensions/gsd/crash-recovery.ts b/src/resources/extensions/gsd/crash-recovery.ts index ae80031fb..bb9bd6d6c 100644 --- a/src/resources/extensions/gsd/crash-recovery.ts +++ b/src/resources/extensions/gsd/crash-recovery.ts @@ -73,6 +73,26 @@ export function readCrashLock(basePath: string): LockData | null { } } +/** + * Check whether the process that wrote the lock is still running. + * Uses `process.kill(pid, 0)` which sends no signal but checks liveness. + * Returns false if the PID matches our own (recycled PID from a prior run). + */ +export function isLockProcessAlive(lock: LockData): boolean { + const pid = lock.pid; + if (!Number.isInteger(pid) || pid <= 0) return false; + if (pid === process.pid) return false; + try { + process.kill(pid, 0); + return true; + } catch (err) { + // EPERM means the process exists but we lack permission — treat as alive. + // ESRCH means the process does not exist — treat as dead (stale lock). + if ((err as NodeJS.ErrnoException).code === "EPERM") return true; + return false; + } +} + /** Format crash info for display or injection into a prompt. */ export function formatCrashInfo(lock: LockData): string { return [