fix: allow stopping auto-mode from a different terminal (#586)
* fix: allow stopping auto-mode from a different terminal (#584) Auto-mode lock file was written to the worktree path instead of the project root, making it invisible to other processes. Additionally, /gsd stop only checked in-memory state which is process-local. - Add lockBase() helper to always write auto.lock at project root - Add stopAutoRemote() for cross-process stop via SIGTERM - Update /gsd stop to fall back to lock-file-based remote stop * fix: handle Windows SIGTERM behavior in stop-auto-remote test On Windows, SIGTERM is not interceptable by Node.js processes — the process exits with code 1 rather than running the SIGTERM handler. Accept either exit code on Windows while still asserting clean exit (0) on Unix platforms.
This commit is contained in:
parent
5866bb0b27
commit
4c283192bd
3 changed files with 183 additions and 10 deletions
|
|
@ -293,6 +293,41 @@ export function isAutoPaused(): boolean {
|
|||
return paused;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the base path to use for the auto.lock file.
|
||||
* Always uses the original project root (not the worktree) so that
|
||||
* a second terminal can discover and stop a running auto-mode session.
|
||||
*/
|
||||
function lockBase(): string {
|
||||
return originalBasePath || basePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to stop a running auto-mode session from a different process.
|
||||
* Reads the lock file at the project root, checks if the PID is alive,
|
||||
* and sends SIGTERM to gracefully stop it.
|
||||
*
|
||||
* Returns true if a remote session was found and signaled, false otherwise.
|
||||
*/
|
||||
export function stopAutoRemote(projectRoot: string): { found: boolean; pid?: number; error?: string } {
|
||||
const lock = readCrashLock(projectRoot);
|
||||
if (!lock) return { found: false };
|
||||
|
||||
if (!isLockProcessAlive(lock)) {
|
||||
// Stale lock — clean it up
|
||||
clearLock(projectRoot);
|
||||
return { found: false };
|
||||
}
|
||||
|
||||
// Send SIGTERM — the auto-mode process has a handler that clears the lock and exits
|
||||
try {
|
||||
process.kill(lock.pid, "SIGTERM");
|
||||
return { found: true, pid: lock.pid };
|
||||
} catch (err) {
|
||||
return { found: false, error: (err as Error).message };
|
||||
}
|
||||
}
|
||||
|
||||
export function isStepMode(): boolean {
|
||||
return stepMode;
|
||||
}
|
||||
|
|
@ -371,7 +406,7 @@ function startDispatchGapWatchdog(ctx: ExtensionContext, pi: ExtensionAPI): void
|
|||
export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promise<void> {
|
||||
if (!active && !paused) return;
|
||||
clearUnitTimeout();
|
||||
if (basePath) clearLock(basePath);
|
||||
if (lockBase()) clearLock(lockBase());
|
||||
clearSkillSnapshot();
|
||||
_dispatching = false;
|
||||
_skipDepth = 0;
|
||||
|
|
@ -454,7 +489,7 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promi
|
|||
export async function pauseAuto(ctx?: ExtensionContext, _pi?: ExtensionAPI): Promise<void> {
|
||||
if (!active) return;
|
||||
clearUnitTimeout();
|
||||
if (basePath) clearLock(basePath);
|
||||
if (lockBase()) clearLock(lockBase());
|
||||
|
||||
// Remove SIGTERM handler registered at auto-mode start
|
||||
deregisterSigtermHandler();
|
||||
|
|
@ -527,8 +562,8 @@ export async function startAuto(
|
|||
}
|
||||
}
|
||||
|
||||
// Re-register SIGTERM handler for the resumed session
|
||||
registerSigtermHandler(basePath);
|
||||
// Re-register SIGTERM handler for the resumed session (use original base for lock)
|
||||
registerSigtermHandler(lockBase());
|
||||
|
||||
ctx.ui.setStatus("gsd-auto", stepMode ? "next" : "auto");
|
||||
ctx.ui.setFooter(hideFooter);
|
||||
|
|
@ -699,8 +734,8 @@ export async function startAuto(
|
|||
gitService = new GitServiceImpl(basePath, loadEffectiveGSDPreferences()?.preferences?.git ?? {});
|
||||
ctx.ui.notify(`Created auto-worktree at ${wtPath}`, "info");
|
||||
}
|
||||
// Re-register SIGTERM handler with the new basePath
|
||||
registerSigtermHandler(basePath);
|
||||
// Re-register SIGTERM handler with the original basePath (lock lives there)
|
||||
registerSigtermHandler(originalBasePath);
|
||||
} catch (err) {
|
||||
// Worktree creation is non-fatal — continue in the project root.
|
||||
ctx.ui.notify(
|
||||
|
|
@ -956,7 +991,7 @@ export async function handleAgentEnd(
|
|||
return;
|
||||
}
|
||||
const sessionFile = ctx.sessionManager.getSessionFile();
|
||||
writeLock(basePath, hookUnit.unitType, hookUnit.unitId, completedUnits.length, sessionFile);
|
||||
writeLock(lockBase(), hookUnit.unitType, hookUnit.unitId, completedUnits.length, sessionFile);
|
||||
// Persist hook state so cycle counts survive crashes
|
||||
persistHookState(basePath);
|
||||
|
||||
|
|
@ -1762,7 +1797,7 @@ async function dispatchNextUnit(
|
|||
// Pi appends entries incrementally via appendFileSync, so on crash the
|
||||
// session file survives with every tool call up to the crash point.
|
||||
const sessionFile = ctx.sessionManager.getSessionFile();
|
||||
writeLock(basePath, unitType, unitId, completedUnits.length, sessionFile);
|
||||
writeLock(lockBase(), unitType, unitId, completedUnits.length, sessionFile);
|
||||
|
||||
// On crash recovery, prepend the full recovery briefing
|
||||
// On retry (stuck detection), prepend deep diagnostic from last attempt
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import { fileURLToPath } from "node:url";
|
|||
import { deriveState } from "./state.js";
|
||||
import { GSDDashboardOverlay } from "./dashboard-overlay.js";
|
||||
import { showQueue, showDiscuss } from "./guided-flow.js";
|
||||
import { startAuto, stopAuto, pauseAuto, isAutoActive, isAutoPaused, isStepMode } from "./auto.js";
|
||||
import { startAuto, stopAuto, pauseAuto, isAutoActive, isAutoPaused, isStepMode, stopAutoRemote } from "./auto.js";
|
||||
import {
|
||||
getGlobalGSDPreferencesPath,
|
||||
getLegacyGlobalGSDPreferencesPath,
|
||||
|
|
@ -178,7 +178,15 @@ export function registerGSDCommand(pi: ExtensionAPI): void {
|
|||
|
||||
if (trimmed === "stop") {
|
||||
if (!isAutoActive() && !isAutoPaused()) {
|
||||
ctx.ui.notify("Auto-mode is not running.", "info");
|
||||
// Not running in this process — check for a remote auto-mode session
|
||||
const result = stopAutoRemote(process.cwd());
|
||||
if (result.found) {
|
||||
ctx.ui.notify(`Sent stop signal to auto-mode session (PID ${result.pid}). It will shut down gracefully.`, "info");
|
||||
} else if (result.error) {
|
||||
ctx.ui.notify(`Failed to stop remote auto-mode: ${result.error}`, "error");
|
||||
} else {
|
||||
ctx.ui.notify("Auto-mode is not running.", "info");
|
||||
}
|
||||
return;
|
||||
}
|
||||
await stopAuto(ctx, pi);
|
||||
|
|
|
|||
130
src/resources/extensions/gsd/tests/stop-auto-remote.test.ts
Normal file
130
src/resources/extensions/gsd/tests/stop-auto-remote.test.ts
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdirSync, rmSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { fork } from "node:child_process";
|
||||
|
||||
import { writeFileSync } from "node:fs";
|
||||
import {
|
||||
writeLock,
|
||||
readCrashLock,
|
||||
clearLock,
|
||||
isLockProcessAlive,
|
||||
} from "../crash-recovery.ts";
|
||||
import { stopAutoRemote } from "../auto.ts";
|
||||
|
||||
function makeTmpBase(): string {
|
||||
const base = join(tmpdir(), `gsd-test-${randomUUID()}`);
|
||||
mkdirSync(join(base, ".gsd"), { recursive: true });
|
||||
return base;
|
||||
}
|
||||
|
||||
function cleanup(base: string): void {
|
||||
try { rmSync(base, { recursive: true, force: true }); } catch { /* */ }
|
||||
}
|
||||
|
||||
// ─── stopAutoRemote ──────────────────────────────────────────────────────
|
||||
|
||||
test("stopAutoRemote returns found:false when no lock file exists", () => {
|
||||
const base = makeTmpBase();
|
||||
try {
|
||||
const result = stopAutoRemote(base);
|
||||
assert.equal(result.found, false);
|
||||
assert.equal(result.pid, undefined);
|
||||
assert.equal(result.error, undefined);
|
||||
} finally {
|
||||
cleanup(base);
|
||||
}
|
||||
});
|
||||
|
||||
test("stopAutoRemote cleans up stale lock (dead PID) and returns found:false", () => {
|
||||
const base = makeTmpBase();
|
||||
try {
|
||||
// Write a lock with a PID that doesn't exist
|
||||
writeLock(base, "execute-task", "M001/S01/T01", 3);
|
||||
// Overwrite PID to a dead one
|
||||
const lock = readCrashLock(base)!;
|
||||
const staleData = { ...lock, pid: 999999999 };
|
||||
writeFileSync(join(base, ".gsd", "auto.lock"), JSON.stringify(staleData, null, 2), "utf-8");
|
||||
|
||||
const result = stopAutoRemote(base);
|
||||
assert.equal(result.found, false, "stale lock should not be found as running");
|
||||
|
||||
// Lock should be cleaned up
|
||||
assert.equal(readCrashLock(base), null, "stale lock should be removed");
|
||||
} finally {
|
||||
cleanup(base);
|
||||
}
|
||||
});
|
||||
|
||||
test("stopAutoRemote sends SIGTERM to a live process and returns found:true", async () => {
|
||||
const base = makeTmpBase();
|
||||
|
||||
// Spawn a child process that sleeps, acting as a fake auto-mode session
|
||||
const child = fork(
|
||||
"-e",
|
||||
["process.on('SIGTERM', () => process.exit(0)); setTimeout(() => process.exit(1), 30000);"],
|
||||
{ stdio: "ignore", detached: false },
|
||||
);
|
||||
|
||||
try {
|
||||
// Wait for child to be ready
|
||||
await new Promise((resolve) => setTimeout(resolve, 200));
|
||||
|
||||
// Write lock with child's PID
|
||||
const lockData = {
|
||||
pid: child.pid,
|
||||
startedAt: new Date().toISOString(),
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
unitStartedAt: new Date().toISOString(),
|
||||
completedUnits: 0,
|
||||
};
|
||||
writeFileSync(join(base, ".gsd", "auto.lock"), JSON.stringify(lockData, null, 2), "utf-8");
|
||||
|
||||
const result = stopAutoRemote(base);
|
||||
assert.equal(result.found, true, "should find running auto-mode");
|
||||
assert.equal(result.pid, child.pid, "should return the PID");
|
||||
|
||||
// Wait for child to exit (it should receive SIGTERM)
|
||||
const exitCode = await new Promise<number | null>((resolve) => {
|
||||
child.on("exit", (code) => resolve(code));
|
||||
setTimeout(() => resolve(null), 5000);
|
||||
});
|
||||
// On Windows, SIGTERM is not interceptable — the process exits with code 1
|
||||
// rather than running the handler. Accept either clean exit (0) or forced (1).
|
||||
assert.ok(exitCode !== null, "child should have exited after SIGTERM");
|
||||
if (process.platform !== "win32") {
|
||||
assert.equal(exitCode, 0, "child should have exited cleanly via SIGTERM");
|
||||
}
|
||||
} finally {
|
||||
try { child.kill("SIGKILL"); } catch { /* already dead */ }
|
||||
cleanup(base);
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Lock path: original project root vs worktree ────────────────────────
|
||||
|
||||
test("lock file should be discoverable at project root, not worktree path", () => {
|
||||
const projectRoot = makeTmpBase();
|
||||
const worktreePath = join(projectRoot, ".gsd", "worktrees", "M001");
|
||||
mkdirSync(join(worktreePath, ".gsd"), { recursive: true });
|
||||
|
||||
try {
|
||||
// Simulate: auto-mode writes lock to project root (the fix)
|
||||
writeLock(projectRoot, "execute-task", "M001/S01/T01", 0);
|
||||
|
||||
// Second terminal checks project root — should find the lock
|
||||
const lock = readCrashLock(projectRoot);
|
||||
assert.ok(lock, "lock should be found at project root");
|
||||
assert.equal(lock!.unitType, "execute-task");
|
||||
|
||||
// Worktree path should NOT have a lock
|
||||
const worktreeLock = readCrashLock(worktreePath);
|
||||
assert.equal(worktreeLock, null, "lock should NOT exist at worktree path");
|
||||
} finally {
|
||||
cleanup(projectRoot);
|
||||
}
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue