fix(web): kill stale server process before launch to prevent EADDRINUSE (#1934) (#2034)

When `gsd --web` exits uncleanly (terminal closed, crash), the spawned
server process survives as an orphan bound to port 3000. On re-launch,
the new server gets EADDRINUSE and the 3-minute boot-ready poll hangs.

Add `cleanupStaleInstance()` that checks the instance registry for a
previous entry matching the same cwd and kills its process before
reserving a port. This makes re-launches succeed immediately instead
of timing out after 180 seconds.

Fixes #1934

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Tom Boucher 2026-03-22 19:06:29 -04:00 committed by GitHub
parent a7cf125970
commit 615c6845b2
2 changed files with 150 additions and 1 deletions

View file

@ -668,3 +668,121 @@ test('resolveContextAwareCwd returns cwd unchanged when outside dev root', () =>
rmSync(tmp, { recursive: true, force: true })
}
})
// ─── Stale instance cleanup tests ─────────────────────────────────────
test('launchWebMode kills stale instance for same cwd before spawning', async () => {
const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-stale-'))
const standaloneRoot = join(tmp, 'dist', 'web', 'standalone')
const serverPath = join(standaloneRoot, 'server.js')
mkdirSync(standaloneRoot, { recursive: true })
writeFileSync(serverPath, 'console.log("stub")\n')
const registryPath = join(tmp, 'web-instances.json')
const pidFilePath = join(tmp, 'web-server.pid')
const cwd = '/tmp/stale-project'
// Pre-register a stale instance for the same cwd
webMode.registerInstance(cwd, { pid: 77777, port: 3000, url: 'http://127.0.0.1:3000' }, registryPath)
let stderrOutput = ''
let spawnCalled = false
try {
const status = await webMode.launchWebMode(
{
cwd,
projectSessionsDir: '/tmp/.gsd/sessions/stale',
agentDir: '/tmp/.gsd/agent',
packageRoot: tmp,
},
{
initResources: () => {},
resolvePort: async () => 45200,
execPath: '/custom/node',
env: { TEST_ENV: '1' },
spawn: (command, args, options) => {
spawnCalled = true
return {
pid: 88888,
once: () => undefined,
unref: () => {},
} as any
},
waitForBootReady: async () => undefined,
openBrowser: () => {},
pidFilePath,
writePidFile: webMode.writePidFile,
registryPath,
stderr: {
write(chunk: string) {
stderrOutput += chunk
return true
},
},
},
)
assert.equal(status.ok, true)
assert.equal(spawnCalled, true)
// Stale instance for same cwd should have been cleaned up
assert.match(stderrOutput, /Cleaning up stale/)
// New instance should be registered
const registry = webMode.readInstanceRegistry(registryPath)
assert.equal(registry[resolve(cwd)]?.pid, 88888)
} finally {
rmSync(tmp, { recursive: true, force: true })
}
})
test('launchWebMode does not log cleanup when no stale instance exists', async () => {
const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-no-stale-'))
const standaloneRoot = join(tmp, 'dist', 'web', 'standalone')
const serverPath = join(standaloneRoot, 'server.js')
mkdirSync(standaloneRoot, { recursive: true })
writeFileSync(serverPath, 'console.log("stub")\n')
const registryPath = join(tmp, 'web-instances.json')
const pidFilePath = join(tmp, 'web-server.pid')
let stderrOutput = ''
try {
const status = await webMode.launchWebMode(
{
cwd: '/tmp/clean-project',
projectSessionsDir: '/tmp/.gsd/sessions/clean',
agentDir: '/tmp/.gsd/agent',
packageRoot: tmp,
},
{
initResources: () => {},
resolvePort: async () => 45201,
execPath: '/custom/node',
env: { TEST_ENV: '1' },
spawn: () => ({
pid: 88889,
once: () => undefined,
unref: () => {},
} as any),
waitForBootReady: async () => undefined,
openBrowser: () => {},
pidFilePath,
writePidFile: webMode.writePidFile,
registryPath,
stderr: {
write(chunk: string) {
stderrOutput += chunk
return true
},
},
},
)
assert.equal(status.ok, true)
// No cleanup message when no stale instance exists
assert.equal(stderrOutput.includes('Cleaning up stale'), false)
} finally {
rmSync(tmp, { recursive: true, force: true })
}
})

View file

@ -102,6 +102,8 @@ export interface WebModeDeps {
writePidFile?: (path: string, pid: number) => void
readPidFile?: (path: string) => number | null
deletePidFile?: (path: string) => void
/** Path to the multi-instance registry JSON (for testing). */
registryPath?: string
}
export interface WebModeStopResult {
@ -514,6 +516,30 @@ async function waitForBootReady(url: string, timeoutMs = 180_000, stderr?: Writa
throw new Error(lastError ?? 'timed out waiting for boot readiness')
}
/**
* If a previous web server instance is registered for the same `cwd`, attempt
* to kill it and remove its registry entry so the new launch can bind the port
* cleanly. This handles the "orphan process" scenario where a prior `gsd --web`
* was terminated without clean shutdown (e.g. terminal closed).
*/
function cleanupStaleInstance(cwd: string, stderr: WritableLike, registryPath?: string): void {
const registry = readInstanceRegistry(registryPath)
const key = resolve(cwd)
const stale = registry[key]
if (!stale) return
stderr.write(`[gsd] Cleaning up stale web server for ${key} (pid=${stale.pid}, port=${stale.port})…\n`)
const result = killPid(stale.pid)
if (result === 'killed') {
stderr.write(`[gsd] Killed stale web server (pid=${stale.pid}).\n`)
} else if (result === 'already-dead') {
stderr.write(`[gsd] Stale web server was already stopped (pid=${stale.pid}) — clearing entry.\n`)
} else {
stderr.write(`[gsd] Could not kill stale web server (pid=${stale.pid}): ${result.error}\n`)
}
unregisterInstance(cwd, registryPath)
}
export async function launchWebMode(
options: WebModeLaunchOptions,
deps: WebModeDeps = {},
@ -546,6 +572,11 @@ export async function launchWebMode(
stderr.write(`[gsd] Starting web mode…\n`)
// Kill any stale server instance for this project before reserving a port.
// This prevents EADDRINUSE when the previous `gsd --web` was terminated
// without a clean shutdown (e.g. terminal closed, crash).
cleanupStaleInstance(options.cwd, stderr, deps.registryPath)
const port = options.port ?? await (deps.resolvePort ?? reserveWebPort)(host)
const authToken = randomBytes(32).toString('hex')
const url = `http://${host}:${port}`
@ -654,7 +685,7 @@ export async function launchWebMode(
const pidFilePath = deps.pidFilePath ?? defaultWebPidFilePath
;(deps.writePidFile ?? writePidFile)(pidFilePath, pid)
// Register in multi-instance registry
registerInstance(options.cwd, { pid, port, url })
registerInstance(options.cwd, { pid, port, url }, deps.registryPath)
}
;(deps.openBrowser ?? openBrowser)(`${url}/#token=${authToken}`)
} catch (error) {