fix: prevent gsd next from self-killing via stale crash lock (#2784)
* fix: prevent gsd next from self-killing via stale crash lock
cleanupAfterLoopExit() did not clear the crash lock or session lock.
On the next `/gsd next`, checkRemoteAutoSession() read the stale lock
with the current PID, reported it as a "remote" session, and
stopAutoRemote() sent SIGTERM to the current process.
Three fixes:
1. cleanupAfterLoopExit() now clears crash lock and releases session lock
2. checkRemoteAutoSession() returns { running: false } for own PID
3. stopAutoRemote() guards against self-kill for own PID
Closes #2730
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
* test: add regression tests for stale lock self-kill prevention
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---------
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
474d097c34
commit
4fe4dbf456
2 changed files with 108 additions and 0 deletions
|
|
@ -414,6 +414,13 @@ export function stopAutoRemote(projectRoot: string): {
|
|||
const lock = readCrashLock(projectRoot);
|
||||
if (!lock) return { found: false };
|
||||
|
||||
// Never SIGTERM ourselves — a stale lock with our own PID is not a remote
|
||||
// session, it is leftover from a prior loop exit in this process. (#2730)
|
||||
if (lock.pid === process.pid) {
|
||||
clearLock(projectRoot);
|
||||
return { found: false };
|
||||
}
|
||||
|
||||
if (!isLockProcessAlive(lock)) {
|
||||
// Stale lock — clean it up
|
||||
clearLock(projectRoot);
|
||||
|
|
@ -445,6 +452,10 @@ export function checkRemoteAutoSession(projectRoot: string): {
|
|||
const lock = readCrashLock(projectRoot);
|
||||
if (!lock) return { running: false };
|
||||
|
||||
// Our own PID is not a "remote" session — it is a stale lock left by this
|
||||
// process (e.g. after step-mode exit without full cleanup). (#2730)
|
||||
if (lock.pid === process.pid) return { running: false };
|
||||
|
||||
if (!isLockProcessAlive(lock)) {
|
||||
// Stale lock from a dead process — not a live remote session
|
||||
return { running: false };
|
||||
|
|
@ -548,6 +559,16 @@ function cleanupAfterLoopExit(ctx: ExtensionContext): void {
|
|||
s.active = false;
|
||||
clearUnitTimeout();
|
||||
|
||||
// Clear crash lock and release session lock so the next `/gsd next` does
|
||||
// not see a stale lock with the current PID and treat it as a "remote"
|
||||
// session (which would cause it to SIGTERM itself). (#2730)
|
||||
try {
|
||||
if (lockBase()) clearLock(lockBase());
|
||||
if (lockBase()) releaseSessionLock(lockBase());
|
||||
} catch {
|
||||
/* best-effort — mirror stopAuto cleanup */
|
||||
}
|
||||
|
||||
ctx.ui.setStatus("gsd-auto", undefined);
|
||||
ctx.ui.setWidget("gsd-progress", undefined);
|
||||
ctx.ui.setFooter(undefined);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,87 @@
|
|||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdirSync, mkdtempSync, writeFileSync, existsSync, rmSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
|
||||
import { writeLock, readCrashLock, clearLock } from "../crash-recovery.ts";
|
||||
import { checkRemoteAutoSession, stopAutoRemote } from "../auto.ts";
|
||||
|
||||
function makeTmpProject(): string {
|
||||
const dir = mkdtempSync(join(tmpdir(), "gsd-stale-lock-test-"));
|
||||
mkdirSync(join(dir, ".gsd"), { recursive: true });
|
||||
return dir;
|
||||
}
|
||||
|
||||
// ─── checkRemoteAutoSession: own-PID filtering (#2730) ───────────────────
|
||||
|
||||
test("#2730: checkRemoteAutoSession returns { running: false } when lock PID matches current process", (t) => {
|
||||
const dir = makeTmpProject();
|
||||
t.after(() => rmSync(dir, { recursive: true, force: true }));
|
||||
|
||||
// Write a lock with the current process PID — simulates a stale lock
|
||||
// left behind after step-mode exit without full cleanup.
|
||||
writeLock(dir, "execute-task", "M001/S01/T01");
|
||||
|
||||
const lock = readCrashLock(dir);
|
||||
assert.ok(lock, "lock file should exist");
|
||||
assert.equal(lock!.pid, process.pid, "lock should have our PID");
|
||||
|
||||
const result = checkRemoteAutoSession(dir);
|
||||
assert.equal(result.running, false, "own PID must not be treated as a remote session");
|
||||
});
|
||||
|
||||
test("#2730: checkRemoteAutoSession still detects a genuine remote session (different PID)", (t) => {
|
||||
const dir = makeTmpProject();
|
||||
t.after(() => rmSync(dir, { recursive: true, force: true }));
|
||||
|
||||
// Use parent PID — guaranteed alive, guaranteed not our PID.
|
||||
const remotePid = process.ppid;
|
||||
const lockData = {
|
||||
pid: remotePid,
|
||||
startedAt: new Date().toISOString(),
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T02",
|
||||
unitStartedAt: new Date().toISOString(),
|
||||
};
|
||||
writeFileSync(join(dir, ".gsd", "auto.lock"), JSON.stringify(lockData, null, 2));
|
||||
|
||||
const result = checkRemoteAutoSession(dir);
|
||||
assert.equal(result.running, true, "different live PID should be detected as running");
|
||||
assert.equal(result.pid, remotePid);
|
||||
});
|
||||
|
||||
// ─── stopAutoRemote: self-kill prevention (#2730) ────────────────────────
|
||||
|
||||
test("#2730: stopAutoRemote does not send SIGTERM when lock PID matches current process", (t) => {
|
||||
const dir = makeTmpProject();
|
||||
t.after(() => rmSync(dir, { recursive: true, force: true }));
|
||||
|
||||
// Write a lock with our own PID
|
||||
writeLock(dir, "execute-task", "M001/S01/T01");
|
||||
|
||||
const result = stopAutoRemote(dir);
|
||||
assert.equal(result.found, false, "own PID must not be signalled");
|
||||
|
||||
// The lock should be cleared as part of the self-detection cleanup
|
||||
assert.ok(!existsSync(join(dir, ".gsd", "auto.lock")), "stale self-lock should be cleared");
|
||||
});
|
||||
|
||||
test("#2730: stopAutoRemote clears stale lock from dead remote process without error", (t) => {
|
||||
const dir = makeTmpProject();
|
||||
t.after(() => rmSync(dir, { recursive: true, force: true }));
|
||||
|
||||
// Simulate a stale lock from a process that no longer exists
|
||||
const lockData = {
|
||||
pid: 9999999,
|
||||
startedAt: "2026-03-01T00:00:00Z",
|
||||
unitType: "plan-slice",
|
||||
unitId: "M001/S02",
|
||||
unitStartedAt: "2026-03-01T00:05:00Z",
|
||||
};
|
||||
writeFileSync(join(dir, ".gsd", "auto.lock"), JSON.stringify(lockData, null, 2));
|
||||
|
||||
const result = stopAutoRemote(dir);
|
||||
assert.equal(result.found, false, "dead remote PID should not be reported as found");
|
||||
assert.ok(!existsSync(join(dir, ".gsd", "auto.lock")), "stale lock should be cleaned up");
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue