diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 131272345..81939a4ae 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -414,6 +414,13 @@ export function stopAutoRemote(projectRoot: string): { const lock = readCrashLock(projectRoot); if (!lock) return { found: false }; + // Never SIGTERM ourselves — a stale lock with our own PID is not a remote + // session, it is leftover from a prior loop exit in this process. (#2730) + if (lock.pid === process.pid) { + clearLock(projectRoot); + return { found: false }; + } + if (!isLockProcessAlive(lock)) { // Stale lock — clean it up clearLock(projectRoot); @@ -445,6 +452,10 @@ export function checkRemoteAutoSession(projectRoot: string): { const lock = readCrashLock(projectRoot); if (!lock) return { running: false }; + // Our own PID is not a "remote" session — it is a stale lock left by this + // process (e.g. after step-mode exit without full cleanup). (#2730) + if (lock.pid === process.pid) return { running: false }; + if (!isLockProcessAlive(lock)) { // Stale lock from a dead process — not a live remote session return { running: false }; @@ -548,6 +559,16 @@ function cleanupAfterLoopExit(ctx: ExtensionContext): void { s.active = false; clearUnitTimeout(); + // Clear crash lock and release session lock so the next `/gsd next` does + // not see a stale lock with the current PID and treat it as a "remote" + // session (which would cause it to SIGTERM itself). (#2730) + try { + if (lockBase()) clearLock(lockBase()); + if (lockBase()) releaseSessionLock(lockBase()); + } catch { + /* best-effort — mirror stopAuto cleanup */ + } + ctx.ui.setStatus("gsd-auto", undefined); ctx.ui.setWidget("gsd-progress", undefined); ctx.ui.setFooter(undefined); diff --git a/src/resources/extensions/gsd/tests/auto-stale-lock-self-kill.test.ts b/src/resources/extensions/gsd/tests/auto-stale-lock-self-kill.test.ts new file mode 100644 index 000000000..08f1c8f29 --- /dev/null +++ b/src/resources/extensions/gsd/tests/auto-stale-lock-self-kill.test.ts @@ -0,0 +1,87 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdirSync, mkdtempSync, writeFileSync, existsSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { writeLock, readCrashLock, clearLock } from "../crash-recovery.ts"; +import { checkRemoteAutoSession, stopAutoRemote } from "../auto.ts"; + +function makeTmpProject(): string { + const dir = mkdtempSync(join(tmpdir(), "gsd-stale-lock-test-")); + mkdirSync(join(dir, ".gsd"), { recursive: true }); + return dir; +} + +// ─── checkRemoteAutoSession: own-PID filtering (#2730) ─────────────────── + +test("#2730: checkRemoteAutoSession returns { running: false } when lock PID matches current process", (t) => { + const dir = makeTmpProject(); + t.after(() => rmSync(dir, { recursive: true, force: true })); + + // Write a lock with the current process PID — simulates a stale lock + // left behind after step-mode exit without full cleanup. + writeLock(dir, "execute-task", "M001/S01/T01"); + + const lock = readCrashLock(dir); + assert.ok(lock, "lock file should exist"); + assert.equal(lock!.pid, process.pid, "lock should have our PID"); + + const result = checkRemoteAutoSession(dir); + assert.equal(result.running, false, "own PID must not be treated as a remote session"); +}); + +test("#2730: checkRemoteAutoSession still detects a genuine remote session (different PID)", (t) => { + const dir = makeTmpProject(); + t.after(() => rmSync(dir, { recursive: true, force: true })); + + // Use parent PID — guaranteed alive, guaranteed not our PID. + const remotePid = process.ppid; + const lockData = { + pid: remotePid, + startedAt: new Date().toISOString(), + unitType: "execute-task", + unitId: "M001/S01/T02", + unitStartedAt: new Date().toISOString(), + }; + writeFileSync(join(dir, ".gsd", "auto.lock"), JSON.stringify(lockData, null, 2)); + + const result = checkRemoteAutoSession(dir); + assert.equal(result.running, true, "different live PID should be detected as running"); + assert.equal(result.pid, remotePid); +}); + +// ─── stopAutoRemote: self-kill prevention (#2730) ──────────────────────── + +test("#2730: stopAutoRemote does not send SIGTERM when lock PID matches current process", (t) => { + const dir = makeTmpProject(); + t.after(() => rmSync(dir, { recursive: true, force: true })); + + // Write a lock with our own PID + writeLock(dir, "execute-task", "M001/S01/T01"); + + const result = stopAutoRemote(dir); + assert.equal(result.found, false, "own PID must not be signalled"); + + // The lock should be cleared as part of the self-detection cleanup + assert.ok(!existsSync(join(dir, ".gsd", "auto.lock")), "stale self-lock should be cleared"); +}); + +test("#2730: stopAutoRemote clears stale lock from dead remote process without error", (t) => { + const dir = makeTmpProject(); + t.after(() => rmSync(dir, { recursive: true, force: true })); + + // Simulate a stale lock from a process that no longer exists + const lockData = { + pid: 9999999, + startedAt: "2026-03-01T00:00:00Z", + unitType: "plan-slice", + unitId: "M001/S02", + unitStartedAt: "2026-03-01T00:05:00Z", + }; + writeFileSync(join(dir, ".gsd", "auto.lock"), JSON.stringify(lockData, null, 2)); + + const result = stopAutoRemote(dir); + assert.equal(result.found, false, "dead remote PID should not be reported as found"); + assert.ok(!existsSync(join(dir, ".gsd", "auto.lock")), "stale lock should be cleaned up"); +});