fix: allow stopping auto-mode from a different terminal (#586)

* fix: allow stopping auto-mode from a different terminal (#584)

Auto-mode lock file was written to the worktree path instead of the
project root, making it invisible to other processes. Additionally,
/gsd stop only checked in-memory state which is process-local.

- Add lockBase() helper to always write auto.lock at project root
- Add stopAutoRemote() for cross-process stop via SIGTERM
- Update /gsd stop to fall back to lock-file-based remote stop

* fix: handle Windows SIGTERM behavior in stop-auto-remote test

On Windows, SIGTERM is not interceptable by Node.js processes — the
process exits with code 1 rather than running the SIGTERM handler.
Accept either exit code on Windows while still asserting clean exit (0)
on Unix platforms.
This commit is contained in:
Flux Labs 2026-03-15 23:19:04 -05:00 committed by GitHub
parent 5866bb0b27
commit 4c283192bd
3 changed files with 183 additions and 10 deletions

View file

@ -293,6 +293,41 @@ export function isAutoPaused(): boolean {
return paused;
}
/**
* Return the base path to use for the auto.lock file.
* Always uses the original project root (not the worktree) so that
* a second terminal can discover and stop a running auto-mode session.
*/
function lockBase(): string {
return originalBasePath || basePath;
}
/**
* Attempt to stop a running auto-mode session from a different process.
* Reads the lock file at the project root, checks if the PID is alive,
* and sends SIGTERM to gracefully stop it.
*
* Returns true if a remote session was found and signaled, false otherwise.
*/
export function stopAutoRemote(projectRoot: string): { found: boolean; pid?: number; error?: string } {
const lock = readCrashLock(projectRoot);
if (!lock) return { found: false };
if (!isLockProcessAlive(lock)) {
// Stale lock — clean it up
clearLock(projectRoot);
return { found: false };
}
// Send SIGTERM — the auto-mode process has a handler that clears the lock and exits
try {
process.kill(lock.pid, "SIGTERM");
return { found: true, pid: lock.pid };
} catch (err) {
return { found: false, error: (err as Error).message };
}
}
export function isStepMode(): boolean {
return stepMode;
}
@ -371,7 +406,7 @@ function startDispatchGapWatchdog(ctx: ExtensionContext, pi: ExtensionAPI): void
export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promise<void> {
if (!active && !paused) return;
clearUnitTimeout();
if (basePath) clearLock(basePath);
if (lockBase()) clearLock(lockBase());
clearSkillSnapshot();
_dispatching = false;
_skipDepth = 0;
@ -454,7 +489,7 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promi
export async function pauseAuto(ctx?: ExtensionContext, _pi?: ExtensionAPI): Promise<void> {
if (!active) return;
clearUnitTimeout();
if (basePath) clearLock(basePath);
if (lockBase()) clearLock(lockBase());
// Remove SIGTERM handler registered at auto-mode start
deregisterSigtermHandler();
@ -527,8 +562,8 @@ export async function startAuto(
}
}
// Re-register SIGTERM handler for the resumed session
registerSigtermHandler(basePath);
// Re-register SIGTERM handler for the resumed session (use original base for lock)
registerSigtermHandler(lockBase());
ctx.ui.setStatus("gsd-auto", stepMode ? "next" : "auto");
ctx.ui.setFooter(hideFooter);
@ -699,8 +734,8 @@ export async function startAuto(
gitService = new GitServiceImpl(basePath, loadEffectiveGSDPreferences()?.preferences?.git ?? {});
ctx.ui.notify(`Created auto-worktree at ${wtPath}`, "info");
}
// Re-register SIGTERM handler with the new basePath
registerSigtermHandler(basePath);
// Re-register SIGTERM handler with the original basePath (lock lives there)
registerSigtermHandler(originalBasePath);
} catch (err) {
// Worktree creation is non-fatal — continue in the project root.
ctx.ui.notify(
@ -956,7 +991,7 @@ export async function handleAgentEnd(
return;
}
const sessionFile = ctx.sessionManager.getSessionFile();
writeLock(basePath, hookUnit.unitType, hookUnit.unitId, completedUnits.length, sessionFile);
writeLock(lockBase(), hookUnit.unitType, hookUnit.unitId, completedUnits.length, sessionFile);
// Persist hook state so cycle counts survive crashes
persistHookState(basePath);
@ -1762,7 +1797,7 @@ async function dispatchNextUnit(
// Pi appends entries incrementally via appendFileSync, so on crash the
// session file survives with every tool call up to the crash point.
const sessionFile = ctx.sessionManager.getSessionFile();
writeLock(basePath, unitType, unitId, completedUnits.length, sessionFile);
writeLock(lockBase(), unitType, unitId, completedUnits.length, sessionFile);
// On crash recovery, prepend the full recovery briefing
// On retry (stuck detection), prepend deep diagnostic from last attempt

View file

@ -12,7 +12,7 @@ import { fileURLToPath } from "node:url";
import { deriveState } from "./state.js";
import { GSDDashboardOverlay } from "./dashboard-overlay.js";
import { showQueue, showDiscuss } from "./guided-flow.js";
import { startAuto, stopAuto, pauseAuto, isAutoActive, isAutoPaused, isStepMode } from "./auto.js";
import { startAuto, stopAuto, pauseAuto, isAutoActive, isAutoPaused, isStepMode, stopAutoRemote } from "./auto.js";
import {
getGlobalGSDPreferencesPath,
getLegacyGlobalGSDPreferencesPath,
@ -178,7 +178,15 @@ export function registerGSDCommand(pi: ExtensionAPI): void {
if (trimmed === "stop") {
if (!isAutoActive() && !isAutoPaused()) {
ctx.ui.notify("Auto-mode is not running.", "info");
// Not running in this process — check for a remote auto-mode session
const result = stopAutoRemote(process.cwd());
if (result.found) {
ctx.ui.notify(`Sent stop signal to auto-mode session (PID ${result.pid}). It will shut down gracefully.`, "info");
} else if (result.error) {
ctx.ui.notify(`Failed to stop remote auto-mode: ${result.error}`, "error");
} else {
ctx.ui.notify("Auto-mode is not running.", "info");
}
return;
}
await stopAuto(ctx, pi);

View file

@ -0,0 +1,130 @@
import test from "node:test";
import assert from "node:assert/strict";
import { mkdirSync, rmSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import { randomUUID } from "node:crypto";
import { fork } from "node:child_process";
import { writeFileSync } from "node:fs";
import {
writeLock,
readCrashLock,
clearLock,
isLockProcessAlive,
} from "../crash-recovery.ts";
import { stopAutoRemote } from "../auto.ts";
function makeTmpBase(): string {
const base = join(tmpdir(), `gsd-test-${randomUUID()}`);
mkdirSync(join(base, ".gsd"), { recursive: true });
return base;
}
function cleanup(base: string): void {
try { rmSync(base, { recursive: true, force: true }); } catch { /* */ }
}
// ─── stopAutoRemote ──────────────────────────────────────────────────────
test("stopAutoRemote returns found:false when no lock file exists", () => {
const base = makeTmpBase();
try {
const result = stopAutoRemote(base);
assert.equal(result.found, false);
assert.equal(result.pid, undefined);
assert.equal(result.error, undefined);
} finally {
cleanup(base);
}
});
test("stopAutoRemote cleans up stale lock (dead PID) and returns found:false", () => {
const base = makeTmpBase();
try {
// Write a lock with a PID that doesn't exist
writeLock(base, "execute-task", "M001/S01/T01", 3);
// Overwrite PID to a dead one
const lock = readCrashLock(base)!;
const staleData = { ...lock, pid: 999999999 };
writeFileSync(join(base, ".gsd", "auto.lock"), JSON.stringify(staleData, null, 2), "utf-8");
const result = stopAutoRemote(base);
assert.equal(result.found, false, "stale lock should not be found as running");
// Lock should be cleaned up
assert.equal(readCrashLock(base), null, "stale lock should be removed");
} finally {
cleanup(base);
}
});
test("stopAutoRemote sends SIGTERM to a live process and returns found:true", async () => {
const base = makeTmpBase();
// Spawn a child process that sleeps, acting as a fake auto-mode session
const child = fork(
"-e",
["process.on('SIGTERM', () => process.exit(0)); setTimeout(() => process.exit(1), 30000);"],
{ stdio: "ignore", detached: false },
);
try {
// Wait for child to be ready
await new Promise((resolve) => setTimeout(resolve, 200));
// Write lock with child's PID
const lockData = {
pid: child.pid,
startedAt: new Date().toISOString(),
unitType: "execute-task",
unitId: "M001/S01/T01",
unitStartedAt: new Date().toISOString(),
completedUnits: 0,
};
writeFileSync(join(base, ".gsd", "auto.lock"), JSON.stringify(lockData, null, 2), "utf-8");
const result = stopAutoRemote(base);
assert.equal(result.found, true, "should find running auto-mode");
assert.equal(result.pid, child.pid, "should return the PID");
// Wait for child to exit (it should receive SIGTERM)
const exitCode = await new Promise<number | null>((resolve) => {
child.on("exit", (code) => resolve(code));
setTimeout(() => resolve(null), 5000);
});
// On Windows, SIGTERM is not interceptable — the process exits with code 1
// rather than running the handler. Accept either clean exit (0) or forced (1).
assert.ok(exitCode !== null, "child should have exited after SIGTERM");
if (process.platform !== "win32") {
assert.equal(exitCode, 0, "child should have exited cleanly via SIGTERM");
}
} finally {
try { child.kill("SIGKILL"); } catch { /* already dead */ }
cleanup(base);
}
});
// ─── Lock path: original project root vs worktree ────────────────────────
test("lock file should be discoverable at project root, not worktree path", () => {
const projectRoot = makeTmpBase();
const worktreePath = join(projectRoot, ".gsd", "worktrees", "M001");
mkdirSync(join(worktreePath, ".gsd"), { recursive: true });
try {
// Simulate: auto-mode writes lock to project root (the fix)
writeLock(projectRoot, "execute-task", "M001/S01/T01", 0);
// Second terminal checks project root — should find the lock
const lock = readCrashLock(projectRoot);
assert.ok(lock, "lock should be found at project root");
assert.equal(lock!.unitType, "execute-task");
// Worktree path should NOT have a lock
const worktreeLock = readCrashLock(worktreePath);
assert.equal(worktreeLock, null, "lock should NOT exist at worktree path");
} finally {
cleanup(projectRoot);
}
});