From 24d2b375627e6d5d9c369ea59a35491050c4e810 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sun, 17 May 2026 04:20:00 +0200 Subject: [PATCH] fix(wiggums): verify PID liveness before "Another session running" message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two sites told operator to "kill PID X" without checking X was alive: - interrupted-session.js:formatInterruptedSessionRunningMessage - auto.js autonomous-start blocking notification Both report stale locks from crashed prior sessions as if a live session exists, confusing operator and blocking restart. Session-lock.js already has auto-recovery for stale-PID locks; these two surfaces just needed matching liveness checks to label dead-PID locks correctly. Now: dead-PID → "Stale lock from dead PID X — will be auto-recovered" alive-PID → original "kill X" message Catches one of the 14 Ralph-Wiggum-obvious patterns surfaced this session. Reduces operator confusion + dovetails with R055 (M038/S05) when stale-lock auto-recovery becomes a core-loop detector. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/resources/extensions/sf/auto.js | 25 ++++++++++++++++--- .../extensions/sf/interrupted-session.js | 20 +++++++++++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/resources/extensions/sf/auto.js b/src/resources/extensions/sf/auto.js index 63f7600c7..f08f30347 100644 --- a/src/resources/extensions/sf/auto.js +++ b/src/resources/extensions/sf/auto.js @@ -1547,11 +1547,28 @@ export async function startAuto(ctx, pi, base, verboseMode, options) { interruptedAssessment ?? (await assessInterruptedSession(base)); if (freshStartAssessment.classification === "running") { const pid = freshStartAssessment.lock?.pid; + // #wiggums: verify PID liveness before reporting. A crashed prior + // session leaves a stale lock; telling the operator to kill a dead + // PID is misleading. If dead, label as stale + let session-lock + // auto-recover on the next acquisition attempt. + const pidAlive = + pid && Number.isInteger(pid) && pid > 0 && pid !== process.pid + ? (() => { + try { + process.kill(pid, 0); + return true; + } catch (err) { + return err?.code === "EPERM"; + } + })() + : false; ctx.ui.notify( - pid - ? `Another autonomous mode session (PID ${pid}) appears to be running.\nStop it with \`kill ${pid}\` before starting a new session.` - : "Another autonomous mode session appears to be running.", - "error", + pid && !pidAlive + ? `Stale autonomous mode lock from dead PID ${pid} — will be auto-recovered on next session-lock acquisition.` + : pid + ? `Another autonomous mode session (PID ${pid}) appears to be running.\nStop it with \`kill ${pid}\` before starting a new session.` + : "Another autonomous mode session appears to be running.", + pid && !pidAlive ? "warning" : "error", { noticeKind: NOTICE_KIND.BLOCKING_NOTICE, dedupe_key: "auto-start-concurrent-session-blocked", diff --git a/src/resources/extensions/sf/interrupted-session.js b/src/resources/extensions/sf/interrupted-session.js index 27f33d438..58bf00771 100644 --- a/src/resources/extensions/sf/interrupted-session.js +++ b/src/resources/extensions/sf/interrupted-session.js @@ -164,8 +164,28 @@ export function formatInterruptedSessionSummary(assessment) { } return ["Paused autonomous mode session detected."]; } +// #wiggums: verify the PID is actually alive before telling the operator +// to kill it. Without this check, a crashed prior session leaves the lock +// pid behind and we tell the operator "kill " which is +// confusing + makes the operator think SF is buggy. If the PID isn't +// alive, label the lock as stale rather than reporting a live session. +function isPidAlive(pid) { + if (!Number.isInteger(pid) || pid <= 0) return false; + if (pid === process.pid) return false; + try { + process.kill(pid, 0); + return true; + } catch (err) { + if (err?.code === "EPERM") return true; + return false; + } +} + export function formatInterruptedSessionRunningMessage(assessment) { const pid = assessment.lock?.pid; + if (pid && !isPidAlive(pid)) { + return `Stale autonomous mode lock from dead PID ${pid}. The previous session crashed without releasing the lock — it will be auto-recovered on the next session-lock acquisition (#sf-stale-lock-auto-recover).`; + } return pid ? `Another autonomous mode session (PID ${pid}) appears to be running.\nStop it with \`kill ${pid}\` before starting a new session.` : "Another autonomous mode session appears to be running.";