From 01a6294b23ceb44db1f8276ef12d6061847482bf Mon Sep 17 00:00:00 2001 From: Tom Boucher Date: Tue, 17 Mar 2026 11:35:33 -0400 Subject: [PATCH] feat: auto-restart headless mode on crash with exponential backoff (#886) (#897) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the headless child process crashes or errors out, auto-restart with exponential backoff (5s, 10s, 15s... up to 30s) instead of exiting immediately. This enables overnight 'fire and forget' runs. - --max-restarts N (default 3, 0 to disable): controls restart budget - Only restarts on crashes (exit code !== 0), not on success or blocked - SIGINT/SIGTERM bypasses restart (user intent to stop) - Restart count shown in summary output - Backoff prevents rapid crash loops from burning API credits The inner loop function (runHeadlessOnce) returns exit status instead of calling process.exit, letting the outer loop decide whether to restart or terminate. This is the first step toward the 'absolute autonomy' goal described in #886 — process-level resilience for long-running sessions. --- src/headless.ts | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/src/headless.ts b/src/headless.ts index 78e6934bf..531fcbcf3 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -33,6 +33,7 @@ export interface HeadlessOptions { contextText?: string // inline text auto?: boolean // chain into auto-mode after milestone creation verbose?: boolean // show tool calls in output + maxRestarts?: number // auto-restart on crash (default 3, 0 to disable) } interface ExtensionUIRequest { @@ -92,6 +93,12 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions { options.auto = true } else if (arg === '--verbose') { options.verbose = true + } else if (arg === '--max-restarts' && i + 1 < args.length) { + options.maxRestarts = parseInt(args[++i], 10) + if (Number.isNaN(options.maxRestarts) || options.maxRestarts < 0) { + process.stderr.write('[headless] Error: --max-restarts must be a non-negative integer\n') + process.exit(1) + } } } else if (!positionalStarted) { positionalStarted = true @@ -279,6 +286,37 @@ function bootstrapGsdProject(basePath: string): void { } export async function runHeadless(options: HeadlessOptions): Promise { + const maxRestarts = options.maxRestarts ?? 3 + let restartCount = 0 + + while (true) { + const result = await runHeadlessOnce(options, restartCount) + + // Success or blocked — exit normally + if (result.exitCode === 0 || result.exitCode === 2) { + process.exit(result.exitCode) + } + + // Crash/error — check if we should restart + if (restartCount >= maxRestarts) { + process.stderr.write(`[headless] Max restarts (${maxRestarts}) reached. Exiting.\n`) + process.exit(result.exitCode) + } + + // Don't restart if SIGINT/SIGTERM was received + if (result.interrupted) { + process.exit(result.exitCode) + } + + restartCount++ + const backoffMs = Math.min(5000 * restartCount, 30_000) + process.stderr.write(`[headless] Restarting in ${(backoffMs / 1000).toFixed(0)}s (attempt ${restartCount}/${maxRestarts})...\n`) + await new Promise(resolve => setTimeout(resolve, backoffMs)) + } +} + +async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): Promise<{ exitCode: number; interrupted: boolean }> { + let interrupted = false const startTime = Date.now() const isNewMilestone = options.command === 'new-milestone' @@ -452,6 +490,7 @@ export async function runHeadless(options: HeadlessOptions): Promise { // Signal handling const signalHandler = () => { process.stderr.write('\n[headless] Interrupted, stopping child process...\n') + interrupted = true exitCode = 1 client.stop().finally(() => { clearTimeout(timeoutTimer) @@ -553,6 +592,9 @@ export async function runHeadless(options: HeadlessOptions): Promise { process.stderr.write(`[headless] Status: ${status}\n`) process.stderr.write(`[headless] Duration: ${duration}s\n`) process.stderr.write(`[headless] Events: ${totalEvents} total, ${toolCallCount} tool calls\n`) + if (restartCount > 0) { + process.stderr.write(`[headless] Restarts: ${restartCount}\n`) + } // On failure, print last 5 events for diagnostics if (exitCode !== 0) { @@ -565,5 +607,5 @@ export async function runHeadless(options: HeadlessOptions): Promise { } } - process.exit(exitCode) + return { exitCode, interrupted } }