fix(sf): harden server pid lifecycle

This commit is contained in:
Mikael Hugo 2026-05-17 19:00:21 +02:00
parent 3568972059
commit f7b262f33a
2 changed files with 203 additions and 15 deletions

View file

@ -143,6 +143,7 @@ test("launchWebMode prefers the packaged standalone host and opens the resolved
let writtenPid: { path: string; pid: number } | undefined;
const pidFilePath = join(tmp, "web-server.pid");
const registryPath = join(tmp, "web-instances.json");
afterEach(() => {
rmSync(tmp, { recursive: true, force: true });
@ -181,6 +182,7 @@ test("launchWebMode prefers the packaged standalone host and opens the resolved
openedUrl = url;
},
pidFilePath,
registryPath,
writePidFile: (path, pid) => {
writtenPid = { path, pid };
webMode.writePidFile(path, pid);
@ -667,6 +669,56 @@ test("unregisterInstance removes a single entry", (_t) => {
assert.equal(registry[resolve("/tmp/project-b")]?.pid, 1002);
});
test("signalWebServerProcessTree uses process-group signal on Unix", () => {
const calls: Array<{ pid: number; signal: NodeJS.Signals | 0 }> = [];
const result = webMode.signalWebServerProcessTree(12345, "SIGTERM", {
platform: "linux",
kill: ((pid: number, signal?: NodeJS.Signals | 0) => {
calls.push({ pid, signal: signal ?? 0 });
return true;
}) as typeof process.kill,
});
assert.equal(result, "killed");
assert.deepEqual(calls, [{ pid: -12345, signal: "SIGTERM" }]);
});
test("signalWebServerProcessTree falls back to direct PID when group signal fails", () => {
const calls: Array<{ pid: number; signal: NodeJS.Signals | 0 }> = [];
const result = webMode.signalWebServerProcessTree(12345, "SIGTERM", {
platform: "linux",
kill: ((pid: number, signal?: NodeJS.Signals | 0) => {
calls.push({ pid, signal: signal ?? 0 });
if (pid < 0) {
const error = new Error("no process group") as NodeJS.ErrnoException;
error.code = "ESRCH";
throw error;
}
return true;
}) as typeof process.kill,
});
assert.equal(result, "killed");
assert.deepEqual(calls, [
{ pid: -12345, signal: "SIGTERM" },
{ pid: 12345, signal: "SIGTERM" },
]);
});
test("terminateWebServerProcessTree returns already-dead for missing PID", () => {
const result = webMode.terminateWebServerProcessTree(99999, {
kill: ((pid: number, signal?: NodeJS.Signals | 0) => {
assert.equal(pid, 99999);
assert.equal(signal, 0);
const error = new Error("missing") as NodeJS.ErrnoException;
error.code = "ESRCH";
throw error;
}) as typeof process.kill,
});
assert.equal(result, "already-dead");
});
test("stopWebMode with projectCwd reports not-found when not in registry", () => {
let stderrOutput = "";

View file

@ -4,6 +4,7 @@ import {
execSync,
type SpawnOptions,
spawn,
spawnSync,
} from "node:child_process";
import { randomBytes } from "node:crypto";
import {
@ -131,6 +132,13 @@ export interface WebModeDeps {
registryPath?: string;
}
interface ProcessTreeSignalDeps {
kill?: typeof process.kill;
spawnSync?: typeof spawnSync;
platform?: NodeJS.Platform;
sleepSync?: (ms: number) => void;
}
export interface WebModeStopResult {
ok: boolean;
reason?: string;
@ -194,20 +202,148 @@ export function unregisterInstance(
writeInstanceRegistry(registry, registryPath);
}
function killPid(pid: number): "killed" | "already-dead" | { error: string } {
function isAlreadyDeadError(error: unknown): boolean {
return (
error instanceof Error &&
"code" in error &&
(error as NodeJS.ErrnoException).code === "ESRCH"
);
}
function pidExists(
pid: number,
kill: typeof process.kill = process.kill,
): boolean {
try {
process.kill(pid, "SIGTERM");
kill(pid, 0);
return true;
} catch (error) {
if (isAlreadyDeadError(error)) return false;
return true;
}
}
function defaultSleepSync(ms: number): void {
const buffer = new SharedArrayBuffer(4);
Atomics.wait(new Int32Array(buffer), 0, 0, ms);
}
function listChildPids(
pid: number,
deps: ProcessTreeSignalDeps = {},
): number[] {
if ((deps.platform ?? process.platform) === "win32") return [];
try {
const result = (deps.spawnSync ?? spawnSync)(
"ps",
["-o", "pid=", "--ppid", String(pid)],
{
encoding: "utf8",
timeout: 1000,
},
);
if (result.status !== 0 || !result.stdout) return [];
return String(result.stdout)
.split(/\s+/)
.map((value) => Number.parseInt(value, 10))
.filter((value) => Number.isFinite(value) && value > 0);
} catch {
return [];
}
}
function collectDescendantPids(
rootPid: number,
deps: ProcessTreeSignalDeps = {},
): number[] {
const seen = new Set<number>();
const queue = [rootPid];
while (queue.length > 0) {
const pid = queue.shift();
if (!pid || seen.has(pid)) continue;
seen.add(pid);
for (const childPid of listChildPids(pid, deps)) {
if (!seen.has(childPid)) queue.push(childPid);
}
}
seen.delete(rootPid);
return [...seen];
}
/**
* Signal the SF web server process tree by process group when possible.
*
* Purpose: detached Next standalone hosts are process-group leaders; signalling
* the group stops both the `next-server` parent and its bridge `sf` child.
*
* Consumer: stopWebMode, cleanupStaleInstance, and tests for server lifecycle.
*/
export function signalWebServerProcessTree(
pid: number,
signal: NodeJS.Signals,
deps: ProcessTreeSignalDeps = {},
): "killed" | "already-dead" | { error: string } {
const kill = deps.kill ?? process.kill;
const platform = deps.platform ?? process.platform;
if (!Number.isFinite(pid) || pid <= 1) {
return { error: `invalid pid ${pid}` };
}
if (platform !== "win32") {
try {
kill(-pid, signal);
return "killed";
} catch {
// Fall through: the process may not be a process-group leader.
}
}
try {
kill(pid, signal);
return "killed";
} catch (error) {
const isAlreadyDead =
error instanceof Error &&
"code" in error &&
(error as NodeJS.ErrnoException).code === "ESRCH";
if (isAlreadyDead) return "already-dead";
if (isAlreadyDeadError(error)) return "already-dead";
return { error: error instanceof Error ? error.message : String(error) };
}
}
/**
* Terminate the SF web server process tree and wait for root PID exit.
*
* Purpose: avoid stale registry rows and orphaned bridge children after
* `sf server stop`, especially when the fixed port must be immediately reusable.
*
* Consumer: stopWebMode and stale-instance cleanup before launch.
*/
export function terminateWebServerProcessTree(
pid: number,
deps: ProcessTreeSignalDeps = {},
): "killed" | "force-killed" | "already-dead" | { error: string } {
const kill = deps.kill ?? process.kill;
if (!pidExists(pid, kill)) return "already-dead";
const descendants = collectDescendantPids(pid, deps);
for (const childPid of descendants) {
signalWebServerProcessTree(childPid, "SIGTERM", deps);
}
const termResult = signalWebServerProcessTree(pid, "SIGTERM", deps);
if (typeof termResult === "object") return termResult;
const sleep = deps.sleepSync ?? defaultSleepSync;
const deadline = Date.now() + 1500;
while (Date.now() < deadline) {
if (!pidExists(pid, kill)) return "killed";
sleep(50);
}
for (const childPid of descendants) {
signalWebServerProcessTree(childPid, "SIGKILL", deps);
}
const killResult = signalWebServerProcessTree(pid, "SIGKILL", deps);
if (typeof killResult === "object") return killResult;
return "force-killed";
}
export function writePidFile(filePath: string, pid: number): void {
writeFileSync(filePath, String(pid), "utf8");
}
@ -256,8 +392,8 @@ export function stopWebMode(
}
let stopped = 0;
for (const [cwd, entry] of entries) {
const result = killPid(entry.pid);
if (result === "killed") {
const result = terminateWebServerProcessTree(entry.pid);
if (result === "killed" || result === "force-killed") {
stderr.write(
`[forge] Stopped SF server for ${cwd} (pid=${entry.pid})\n`,
);
@ -300,9 +436,9 @@ export function stopWebMode(
stderr.write(`[forge] No SF server running for ${resolvedCwd}\n`);
return { ok: false, reason: "not-found" };
}
const result = killPid(entry.pid);
const result = terminateWebServerProcessTree(entry.pid);
unregisterInstance(resolvedCwd);
if (result === "killed") {
if (result === "killed" || result === "force-killed") {
stderr.write(
`[forge] Stopped SF server for ${resolvedCwd} (pid=${entry.pid})\n`,
);
@ -343,9 +479,9 @@ function stopLegacyPidFile(
stderr.write(`[forge] Stopping SF server (pid=${pid})…\n`);
const result = killPid(pid);
const result = terminateWebServerProcessTree(pid);
deletePid(pidFilePath);
if (result === "killed") {
if (result === "killed" || result === "force-killed") {
stderr.write(`[forge] Web server stopped.\n`);
return { ok: true };
} else if (result === "already-dead") {
@ -657,8 +793,8 @@ function cleanupStaleInstance(
stderr.write(
`[forge] Cleaning up stale SF server for ${key} (pid=${stale.pid}, port=${stale.port})…\n`,
);
const result = killPid(stale.pid);
if (result === "killed") {
const result = terminateWebServerProcessTree(stale.pid);
if (result === "killed" || result === "force-killed") {
stderr.write(`[forge] Killed stale SF server (pid=${stale.pid}).\n`);
} else if (result === "already-dead") {
stderr.write(