fix(wiggums): verify PID liveness before "Another session running" message

Two sites told operator to "kill PID X" without checking X was alive:
- interrupted-session.js:formatInterruptedSessionRunningMessage
- auto.js autonomous-start blocking notification

Both report stale locks from crashed prior sessions as if a live session
exists, confusing operator and blocking restart. Session-lock.js already
has auto-recovery for stale-PID locks; these two surfaces just needed
matching liveness checks to label dead-PID locks correctly.

Now: dead-PID → "Stale lock from dead PID X — will be auto-recovered"
     alive-PID → original "kill X" message

Catches one of the 14 Ralph-Wiggum-obvious patterns surfaced this
session. Reduces operator confusion + dovetails with R055 (M038/S05)
when stale-lock auto-recovery becomes a core-loop detector.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-17 04:20:00 +02:00
parent 96d03b33bc
commit 24d2b37562
2 changed files with 41 additions and 4 deletions

View file

@ -1547,11 +1547,28 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
interruptedAssessment ?? (await assessInterruptedSession(base));
if (freshStartAssessment.classification === "running") {
const pid = freshStartAssessment.lock?.pid;
// #wiggums: verify PID liveness before reporting. A crashed prior
// session leaves a stale lock; telling the operator to kill a dead
// PID is misleading. If dead, label as stale + let session-lock
// auto-recover on the next acquisition attempt.
const pidAlive =
pid && Number.isInteger(pid) && pid > 0 && pid !== process.pid
? (() => {
try {
process.kill(pid, 0);
return true;
} catch (err) {
return err?.code === "EPERM";
}
})()
: false;
ctx.ui.notify(
pid
? `Another autonomous mode session (PID ${pid}) appears to be running.\nStop it with \`kill ${pid}\` before starting a new session.`
: "Another autonomous mode session appears to be running.",
"error",
pid && !pidAlive
? `Stale autonomous mode lock from dead PID ${pid} — will be auto-recovered on next session-lock acquisition.`
: pid
? `Another autonomous mode session (PID ${pid}) appears to be running.\nStop it with \`kill ${pid}\` before starting a new session.`
: "Another autonomous mode session appears to be running.",
pid && !pidAlive ? "warning" : "error",
{
noticeKind: NOTICE_KIND.BLOCKING_NOTICE,
dedupe_key: "auto-start-concurrent-session-blocked",

View file

@ -164,8 +164,28 @@ export function formatInterruptedSessionSummary(assessment) {
}
return ["Paused autonomous mode session detected."];
}
// #wiggums: verify the PID is actually alive before telling the operator
// to kill it. Without this check, a crashed prior session leaves the lock
// pid behind and we tell the operator "kill <dead-PID>" which is
// confusing + makes the operator think SF is buggy. If the PID isn't
// alive, label the lock as stale rather than reporting a live session.
function isPidAlive(pid) {
if (!Number.isInteger(pid) || pid <= 0) return false;
if (pid === process.pid) return false;
try {
process.kill(pid, 0);
return true;
} catch (err) {
if (err?.code === "EPERM") return true;
return false;
}
}
export function formatInterruptedSessionRunningMessage(assessment) {
const pid = assessment.lock?.pid;
if (pid && !isPidAlive(pid)) {
return `Stale autonomous mode lock from dead PID ${pid}. The previous session crashed without releasing the lock — it will be auto-recovered on the next session-lock acquisition (#sf-stale-lock-auto-recover).`;
}
return pid
? `Another autonomous mode session (PID ${pid}) appears to be running.\nStop it with \`kill ${pid}\` before starting a new session.`
: "Another autonomous mode session appears to be running.";