diff --git a/src/resources/extensions/sf/auto.js b/src/resources/extensions/sf/auto.js index 63f7600c7..f08f30347 100644 --- a/src/resources/extensions/sf/auto.js +++ b/src/resources/extensions/sf/auto.js @@ -1547,11 +1547,28 @@ export async function startAuto(ctx, pi, base, verboseMode, options) { interruptedAssessment ?? (await assessInterruptedSession(base)); if (freshStartAssessment.classification === "running") { const pid = freshStartAssessment.lock?.pid; + // #wiggums: verify PID liveness before reporting. A crashed prior + // session leaves a stale lock; telling the operator to kill a dead + // PID is misleading. If dead, label as stale + let session-lock + // auto-recover on the next acquisition attempt. + const pidAlive = + pid && Number.isInteger(pid) && pid > 0 && pid !== process.pid + ? (() => { + try { + process.kill(pid, 0); + return true; + } catch (err) { + return err?.code === "EPERM"; + } + })() + : false; ctx.ui.notify( - pid - ? `Another autonomous mode session (PID ${pid}) appears to be running.\nStop it with \`kill ${pid}\` before starting a new session.` - : "Another autonomous mode session appears to be running.", - "error", + pid && !pidAlive + ? `Stale autonomous mode lock from dead PID ${pid} — will be auto-recovered on next session-lock acquisition.` + : pid + ? `Another autonomous mode session (PID ${pid}) appears to be running.\nStop it with \`kill ${pid}\` before starting a new session.` + : "Another autonomous mode session appears to be running.", + pid && !pidAlive ? "warning" : "error", { noticeKind: NOTICE_KIND.BLOCKING_NOTICE, dedupe_key: "auto-start-concurrent-session-blocked", diff --git a/src/resources/extensions/sf/interrupted-session.js b/src/resources/extensions/sf/interrupted-session.js index 27f33d438..58bf00771 100644 --- a/src/resources/extensions/sf/interrupted-session.js +++ b/src/resources/extensions/sf/interrupted-session.js @@ -164,8 +164,28 @@ export function formatInterruptedSessionSummary(assessment) { } return ["Paused autonomous mode session detected."]; } +// #wiggums: verify the PID is actually alive before telling the operator +// to kill it. Without this check, a crashed prior session leaves the lock +// pid behind and we tell the operator "kill " which is +// confusing + makes the operator think SF is buggy. If the PID isn't +// alive, label the lock as stale rather than reporting a live session. +function isPidAlive(pid) { + if (!Number.isInteger(pid) || pid <= 0) return false; + if (pid === process.pid) return false; + try { + process.kill(pid, 0); + return true; + } catch (err) { + if (err?.code === "EPERM") return true; + return false; + } +} + export function formatInterruptedSessionRunningMessage(assessment) { const pid = assessment.lock?.pid; + if (pid && !isPidAlive(pid)) { + return `Stale autonomous mode lock from dead PID ${pid}. The previous session crashed without releasing the lock — it will be auto-recovered on the next session-lock acquisition (#sf-stale-lock-auto-recover).`; + } return pid ? `Another autonomous mode session (PID ${pid}) appears to be running.\nStop it with \`kill ${pid}\` before starting a new session.` : "Another autonomous mode session appears to be running.";