diff --git a/src/tests/integration/web-mode-cli.test.ts b/src/tests/integration/web-mode-cli.test.ts index 8be245774..9d1642bb8 100644 --- a/src/tests/integration/web-mode-cli.test.ts +++ b/src/tests/integration/web-mode-cli.test.ts @@ -143,6 +143,7 @@ test("launchWebMode prefers the packaged standalone host and opens the resolved let writtenPid: { path: string; pid: number } | undefined; const pidFilePath = join(tmp, "web-server.pid"); + const registryPath = join(tmp, "web-instances.json"); afterEach(() => { rmSync(tmp, { recursive: true, force: true }); @@ -181,6 +182,7 @@ test("launchWebMode prefers the packaged standalone host and opens the resolved openedUrl = url; }, pidFilePath, + registryPath, writePidFile: (path, pid) => { writtenPid = { path, pid }; webMode.writePidFile(path, pid); @@ -667,6 +669,56 @@ test("unregisterInstance removes a single entry", (_t) => { assert.equal(registry[resolve("/tmp/project-b")]?.pid, 1002); }); +test("signalWebServerProcessTree uses process-group signal on Unix", () => { + const calls: Array<{ pid: number; signal: NodeJS.Signals | 0 }> = []; + const result = webMode.signalWebServerProcessTree(12345, "SIGTERM", { + platform: "linux", + kill: ((pid: number, signal?: NodeJS.Signals | 0) => { + calls.push({ pid, signal: signal ?? 0 }); + return true; + }) as typeof process.kill, + }); + + assert.equal(result, "killed"); + assert.deepEqual(calls, [{ pid: -12345, signal: "SIGTERM" }]); +}); + +test("signalWebServerProcessTree falls back to direct PID when group signal fails", () => { + const calls: Array<{ pid: number; signal: NodeJS.Signals | 0 }> = []; + const result = webMode.signalWebServerProcessTree(12345, "SIGTERM", { + platform: "linux", + kill: ((pid: number, signal?: NodeJS.Signals | 0) => { + calls.push({ pid, signal: signal ?? 0 }); + if (pid < 0) { + const error = new Error("no process group") as NodeJS.ErrnoException; + error.code = "ESRCH"; + throw error; + } + return true; + }) as typeof process.kill, + }); + + assert.equal(result, "killed"); + assert.deepEqual(calls, [ + { pid: -12345, signal: "SIGTERM" }, + { pid: 12345, signal: "SIGTERM" }, + ]); +}); + +test("terminateWebServerProcessTree returns already-dead for missing PID", () => { + const result = webMode.terminateWebServerProcessTree(99999, { + kill: ((pid: number, signal?: NodeJS.Signals | 0) => { + assert.equal(pid, 99999); + assert.equal(signal, 0); + const error = new Error("missing") as NodeJS.ErrnoException; + error.code = "ESRCH"; + throw error; + }) as typeof process.kill, + }); + + assert.equal(result, "already-dead"); +}); + test("stopWebMode with projectCwd reports not-found when not in registry", () => { let stderrOutput = ""; diff --git a/src/web-mode.ts b/src/web-mode.ts index b8a7700b4..d1bdb221d 100644 --- a/src/web-mode.ts +++ b/src/web-mode.ts @@ -4,6 +4,7 @@ import { execSync, type SpawnOptions, spawn, + spawnSync, } from "node:child_process"; import { randomBytes } from "node:crypto"; import { @@ -131,6 +132,13 @@ export interface WebModeDeps { registryPath?: string; } +interface ProcessTreeSignalDeps { + kill?: typeof process.kill; + spawnSync?: typeof spawnSync; + platform?: NodeJS.Platform; + sleepSync?: (ms: number) => void; +} + export interface WebModeStopResult { ok: boolean; reason?: string; @@ -194,20 +202,148 @@ export function unregisterInstance( writeInstanceRegistry(registry, registryPath); } -function killPid(pid: number): "killed" | "already-dead" | { error: string } { +function isAlreadyDeadError(error: unknown): boolean { + return ( + error instanceof Error && + "code" in error && + (error as NodeJS.ErrnoException).code === "ESRCH" + ); +} + +function pidExists( + pid: number, + kill: typeof process.kill = process.kill, +): boolean { try { - process.kill(pid, "SIGTERM"); + kill(pid, 0); + return true; + } catch (error) { + if (isAlreadyDeadError(error)) return false; + return true; + } +} + +function defaultSleepSync(ms: number): void { + const buffer = new SharedArrayBuffer(4); + Atomics.wait(new Int32Array(buffer), 0, 0, ms); +} + +function listChildPids( + pid: number, + deps: ProcessTreeSignalDeps = {}, +): number[] { + if ((deps.platform ?? process.platform) === "win32") return []; + try { + const result = (deps.spawnSync ?? spawnSync)( + "ps", + ["-o", "pid=", "--ppid", String(pid)], + { + encoding: "utf8", + timeout: 1000, + }, + ); + if (result.status !== 0 || !result.stdout) return []; + return String(result.stdout) + .split(/\s+/) + .map((value) => Number.parseInt(value, 10)) + .filter((value) => Number.isFinite(value) && value > 0); + } catch { + return []; + } +} + +function collectDescendantPids( + rootPid: number, + deps: ProcessTreeSignalDeps = {}, +): number[] { + const seen = new Set(); + const queue = [rootPid]; + while (queue.length > 0) { + const pid = queue.shift(); + if (!pid || seen.has(pid)) continue; + seen.add(pid); + for (const childPid of listChildPids(pid, deps)) { + if (!seen.has(childPid)) queue.push(childPid); + } + } + seen.delete(rootPid); + return [...seen]; +} + +/** + * Signal the SF web server process tree by process group when possible. + * + * Purpose: detached Next standalone hosts are process-group leaders; signalling + * the group stops both the `next-server` parent and its bridge `sf` child. + * + * Consumer: stopWebMode, cleanupStaleInstance, and tests for server lifecycle. + */ +export function signalWebServerProcessTree( + pid: number, + signal: NodeJS.Signals, + deps: ProcessTreeSignalDeps = {}, +): "killed" | "already-dead" | { error: string } { + const kill = deps.kill ?? process.kill; + const platform = deps.platform ?? process.platform; + if (!Number.isFinite(pid) || pid <= 1) { + return { error: `invalid pid ${pid}` }; + } + + if (platform !== "win32") { + try { + kill(-pid, signal); + return "killed"; + } catch { + // Fall through: the process may not be a process-group leader. + } + } + + try { + kill(pid, signal); return "killed"; } catch (error) { - const isAlreadyDead = - error instanceof Error && - "code" in error && - (error as NodeJS.ErrnoException).code === "ESRCH"; - if (isAlreadyDead) return "already-dead"; + if (isAlreadyDeadError(error)) return "already-dead"; return { error: error instanceof Error ? error.message : String(error) }; } } +/** + * Terminate the SF web server process tree and wait for root PID exit. + * + * Purpose: avoid stale registry rows and orphaned bridge children after + * `sf server stop`, especially when the fixed port must be immediately reusable. + * + * Consumer: stopWebMode and stale-instance cleanup before launch. + */ +export function terminateWebServerProcessTree( + pid: number, + deps: ProcessTreeSignalDeps = {}, +): "killed" | "force-killed" | "already-dead" | { error: string } { + const kill = deps.kill ?? process.kill; + if (!pidExists(pid, kill)) return "already-dead"; + + const descendants = collectDescendantPids(pid, deps); + for (const childPid of descendants) { + signalWebServerProcessTree(childPid, "SIGTERM", deps); + } + const termResult = signalWebServerProcessTree(pid, "SIGTERM", deps); + if (typeof termResult === "object") return termResult; + + const sleep = deps.sleepSync ?? defaultSleepSync; + const deadline = Date.now() + 1500; + while (Date.now() < deadline) { + if (!pidExists(pid, kill)) return "killed"; + sleep(50); + } + + for (const childPid of descendants) { + signalWebServerProcessTree(childPid, "SIGKILL", deps); + } + const killResult = signalWebServerProcessTree(pid, "SIGKILL", deps); + if (typeof killResult === "object") return killResult; + return "force-killed"; +} + export function writePidFile(filePath: string, pid: number): void { writeFileSync(filePath, String(pid), "utf8"); } @@ -256,8 +392,8 @@ export function stopWebMode( } let stopped = 0; for (const [cwd, entry] of entries) { - const result = killPid(entry.pid); - if (result === "killed") { + const result = terminateWebServerProcessTree(entry.pid); + if (result === "killed" || result === "force-killed") { stderr.write( `[forge] Stopped SF server for ${cwd} (pid=${entry.pid})\n`, ); @@ -300,9 +436,9 @@ export function stopWebMode( stderr.write(`[forge] No SF server running for ${resolvedCwd}\n`); return { ok: false, reason: "not-found" }; } - const result = killPid(entry.pid); + const result = terminateWebServerProcessTree(entry.pid); unregisterInstance(resolvedCwd); - if (result === "killed") { + if (result === "killed" || result === "force-killed") { stderr.write( `[forge] Stopped SF server for ${resolvedCwd} (pid=${entry.pid})\n`, ); @@ -343,9 +479,9 @@ function stopLegacyPidFile( stderr.write(`[forge] Stopping SF server (pid=${pid})…\n`); - const result = killPid(pid); + const result = terminateWebServerProcessTree(pid); deletePid(pidFilePath); - if (result === "killed") { + if (result === "killed" || result === "force-killed") { stderr.write(`[forge] Web server stopped.\n`); return { ok: true }; } else if (result === "already-dead") { @@ -657,8 +793,8 @@ function cleanupStaleInstance( stderr.write( `[forge] Cleaning up stale SF server for ${key} (pid=${stale.pid}, port=${stale.port})…\n`, ); - const result = killPid(stale.pid); - if (result === "killed") { + const result = terminateWebServerProcessTree(stale.pid); + if (result === "killed" || result === "force-killed") { stderr.write(`[forge] Killed stale SF server (pid=${stale.pid}).\n`); } else if (result === "already-dead") { stderr.write(