From 615c6845b23c4239536d9c78804cbff923bfbf29 Mon Sep 17 00:00:00 2001 From: Tom Boucher Date: Sun, 22 Mar 2026 19:06:29 -0400 Subject: [PATCH] fix(web): kill stale server process before launch to prevent EADDRINUSE (#1934) (#2034) When `gsd --web` exits uncleanly (terminal closed, crash), the spawned server process survives as an orphan bound to port 3000. On re-launch, the new server gets EADDRINUSE and the 3-minute boot-ready poll hangs. Add `cleanupStaleInstance()` that checks the instance registry for a previous entry matching the same cwd and kills its process before reserving a port. This makes re-launches succeed immediately instead of timing out after 180 seconds. Fixes #1934 Co-authored-by: Claude Opus 4.6 (1M context) --- src/tests/web-mode-cli.test.ts | 118 +++++++++++++++++++++++++++++++++ src/web-mode.ts | 33 ++++++++- 2 files changed, 150 insertions(+), 1 deletion(-) diff --git a/src/tests/web-mode-cli.test.ts b/src/tests/web-mode-cli.test.ts index e6b8ae802..179bd6566 100644 --- a/src/tests/web-mode-cli.test.ts +++ b/src/tests/web-mode-cli.test.ts @@ -668,3 +668,121 @@ test('resolveContextAwareCwd returns cwd unchanged when outside dev root', () => rmSync(tmp, { recursive: true, force: true }) } }) + +// ─── Stale instance cleanup tests ───────────────────────────────────── + +test('launchWebMode kills stale instance for same cwd before spawning', async () => { + const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-stale-')) + const standaloneRoot = join(tmp, 'dist', 'web', 'standalone') + const serverPath = join(standaloneRoot, 'server.js') + mkdirSync(standaloneRoot, { recursive: true }) + writeFileSync(serverPath, 'console.log("stub")\n') + + const registryPath = join(tmp, 'web-instances.json') + const pidFilePath = join(tmp, 'web-server.pid') + const cwd = '/tmp/stale-project' + + // Pre-register a stale instance for the same cwd + webMode.registerInstance(cwd, { pid: 77777, port: 3000, url: 'http://127.0.0.1:3000' }, registryPath) + + let stderrOutput = '' + let spawnCalled = false + + try { + const status = await webMode.launchWebMode( + { + cwd, + projectSessionsDir: '/tmp/.gsd/sessions/stale', + agentDir: '/tmp/.gsd/agent', + packageRoot: tmp, + }, + { + initResources: () => {}, + resolvePort: async () => 45200, + execPath: '/custom/node', + env: { TEST_ENV: '1' }, + spawn: (command, args, options) => { + spawnCalled = true + return { + pid: 88888, + once: () => undefined, + unref: () => {}, + } as any + }, + waitForBootReady: async () => undefined, + openBrowser: () => {}, + pidFilePath, + writePidFile: webMode.writePidFile, + registryPath, + stderr: { + write(chunk: string) { + stderrOutput += chunk + return true + }, + }, + }, + ) + + assert.equal(status.ok, true) + assert.equal(spawnCalled, true) + // Stale instance for same cwd should have been cleaned up + assert.match(stderrOutput, /Cleaning up stale/) + // New instance should be registered + const registry = webMode.readInstanceRegistry(registryPath) + assert.equal(registry[resolve(cwd)]?.pid, 88888) + } finally { + rmSync(tmp, { recursive: true, force: true }) + } +}) + +test('launchWebMode does not log cleanup when no stale instance exists', async () => { + const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-no-stale-')) + const standaloneRoot = join(tmp, 'dist', 'web', 'standalone') + const serverPath = join(standaloneRoot, 'server.js') + mkdirSync(standaloneRoot, { recursive: true }) + writeFileSync(serverPath, 'console.log("stub")\n') + + const registryPath = join(tmp, 'web-instances.json') + const pidFilePath = join(tmp, 'web-server.pid') + + let stderrOutput = '' + + try { + const status = await webMode.launchWebMode( + { + cwd: '/tmp/clean-project', + projectSessionsDir: '/tmp/.gsd/sessions/clean', + agentDir: '/tmp/.gsd/agent', + packageRoot: tmp, + }, + { + initResources: () => {}, + resolvePort: async () => 45201, + execPath: '/custom/node', + env: { TEST_ENV: '1' }, + spawn: () => ({ + pid: 88889, + once: () => undefined, + unref: () => {}, + } as any), + waitForBootReady: async () => undefined, + openBrowser: () => {}, + pidFilePath, + writePidFile: webMode.writePidFile, + registryPath, + stderr: { + write(chunk: string) { + stderrOutput += chunk + return true + }, + }, + }, + ) + + assert.equal(status.ok, true) + // No cleanup message when no stale instance exists + assert.equal(stderrOutput.includes('Cleaning up stale'), false) + } finally { + rmSync(tmp, { recursive: true, force: true }) + } +}) diff --git a/src/web-mode.ts b/src/web-mode.ts index 2f6b3e2ad..08696bcf1 100644 --- a/src/web-mode.ts +++ b/src/web-mode.ts @@ -102,6 +102,8 @@ export interface WebModeDeps { writePidFile?: (path: string, pid: number) => void readPidFile?: (path: string) => number | null deletePidFile?: (path: string) => void + /** Path to the multi-instance registry JSON (for testing). */ + registryPath?: string } export interface WebModeStopResult { @@ -514,6 +516,30 @@ async function waitForBootReady(url: string, timeoutMs = 180_000, stderr?: Writa throw new Error(lastError ?? 'timed out waiting for boot readiness') } +/** + * If a previous web server instance is registered for the same `cwd`, attempt + * to kill it and remove its registry entry so the new launch can bind the port + * cleanly. This handles the "orphan process" scenario where a prior `gsd --web` + * was terminated without clean shutdown (e.g. terminal closed). + */ +function cleanupStaleInstance(cwd: string, stderr: WritableLike, registryPath?: string): void { + const registry = readInstanceRegistry(registryPath) + const key = resolve(cwd) + const stale = registry[key] + if (!stale) return + + stderr.write(`[gsd] Cleaning up stale web server for ${key} (pid=${stale.pid}, port=${stale.port})…\n`) + const result = killPid(stale.pid) + if (result === 'killed') { + stderr.write(`[gsd] Killed stale web server (pid=${stale.pid}).\n`) + } else if (result === 'already-dead') { + stderr.write(`[gsd] Stale web server was already stopped (pid=${stale.pid}) — clearing entry.\n`) + } else { + stderr.write(`[gsd] Could not kill stale web server (pid=${stale.pid}): ${result.error}\n`) + } + unregisterInstance(cwd, registryPath) +} + export async function launchWebMode( options: WebModeLaunchOptions, deps: WebModeDeps = {}, @@ -546,6 +572,11 @@ export async function launchWebMode( stderr.write(`[gsd] Starting web mode…\n`) + // Kill any stale server instance for this project before reserving a port. + // This prevents EADDRINUSE when the previous `gsd --web` was terminated + // without a clean shutdown (e.g. terminal closed, crash). + cleanupStaleInstance(options.cwd, stderr, deps.registryPath) + const port = options.port ?? await (deps.resolvePort ?? reserveWebPort)(host) const authToken = randomBytes(32).toString('hex') const url = `http://${host}:${port}` @@ -654,7 +685,7 @@ export async function launchWebMode( const pidFilePath = deps.pidFilePath ?? defaultWebPidFilePath ;(deps.writePidFile ?? writePidFile)(pidFilePath, pid) // Register in multi-instance registry - registerInstance(options.cwd, { pid, port, url }) + registerInstance(options.cwd, { pid, port, url }, deps.registryPath) } ;(deps.openBrowser ?? openBrowser)(`${url}/#token=${authToken}`) } catch (error) {