fix(headless): split idle-timeout role from deadlock-backstop role

The single IDLE_TIMEOUT_MS constant was conflating two different jobs: "are we done?" vs "is the agent stuck?". For multi-turn commands (auto, next, discuss, plan), the first question is wrong — those signal completion explicitly via "auto-mode stopped" terminal notifications, and child-process exit catches crashes. The 120s I'd just bumped multi-turn to was still in idle-detection mindset; that's not what we need from this timer. New semantics: - IDLE_TIMEOUT_MS = 15s — quick commands (status, queue, …); idle really does mean done. - NEW_MILESTONE_IDLE_TIMEOUT_MS = 120s — bounded creative task with pauses for thinking between bootstrap steps. - MULTI_TURN_DEADLOCK_BACKSTOP_MS = 30 minutes — auto/next/discuss/plan. Not a "done" detector; a deadlock recovery bound. Long enough to never bother slow LLM reasoning or chained tool calls; short enough to recover from a true hang within a reasonable window. Real completion comes from terminal notifications + child-process exit, both already wired. Code reads cleaner too: effectiveIdleTimeout selection now mirrors the three-way conceptual split. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-29 15:18:58 +02:00 · 2026-04-29 15:18:58 +02:00 · df614a3e47
commit df614a3e47
parent c239ad6c9d
2 changed files with 135 additions and 28 deletions
--- a/src/headless-events.ts
+++ b/src/headless-events.ts
@ -65,11 +65,37 @@ export function mapStatusToExitCode(status: string): number {
 * Blocked detection is separate — checked via isBlockedNotification.
 */
 export const TERMINAL_PREFIXES = ["auto-mode stopped", "step-mode stopped"];
+
+/**
+ * Idle timeout for short, single-shot commands (status, queue, history, etc.).
+ * For these, "no events for 15s after a tool call" really does mean done.
+ */
 export const IDLE_TIMEOUT_MS = 15_000;
-// new-milestone is a long-running creative task where the LLM may pause
-// between tool calls (e.g. after mkdir, before writing files). Use a
-// longer idle timeout to avoid killing the session prematurely (#808).
+
+/**
+ * Idle timeout for new-milestone — bounded creative task where the LLM may
+ * pause between tool calls (e.g. after mkdir, before writing files). 120s is
+ * enough buffer for typical LLM thinking on a one-shot setup workflow (#808).
+ */
 export const NEW_MILESTONE_IDLE_TIMEOUT_MS = 120_000;
+
+/**
+ * Deadlock backstop for long-running multi-turn commands (auto, next,
+ * discuss, plan). The role here is NOT idle-detection ("are we done?") —
+ * those commands signal completion explicitly via "auto-mode stopped" /
+ * "step-mode stopped" terminal notifications, and the agent's child-process
+ * exit catches crashes. The only remaining failure mode is a truly hung
+ * process (deadlock, network stuck without retry, infinite reasoning loop
+ * outside the LLM's awareness). 30 minutes is long enough to never misfire
+ * on legitimate slow LLM thinking or chained tool calls, but short enough
+ * to recover from a real deadlock within a reasonable bound.
+ *
+ * Symptom from the old 15s timeout: sf headless auto would dispatch a task,
+ * the LLM would make 1-2 tool calls, pause to reason, exceed 15s of "no
+ * events", and headless would declare "Status: complete" — exiting at ~35s
+ * with the task barely started.
+ */
+export const MULTI_TURN_DEADLOCK_BACKSTOP_MS = 1_800_000;
 const INTERACTIVE_HEADLESS_TOOLS = new Set([
 	"ask_user_questions",
 	"secure_env_collect",
--- a/src/headless.ts
+++ b/src/headless.ts
@ -30,6 +30,7 @@ import {
 	EXIT_SUCCESS,
 	FIRE_AND_FORGET_METHODS,
 	IDLE_TIMEOUT_MS,
+	MULTI_TURN_DEADLOCK_BACKSTOP_MS,
 	isBlockedNotification,
 	isInteractiveHeadlessTool,
 	isMilestoneReadyNotification,
@ -116,6 +117,38 @@ interface TrackedEvent {
 	detail?: string;
 }

+interface HeadlessUnitNotification {
+	kind: "start" | "end";
+	unitType: string;
+	unitId: string;
+	verdict?: string;
+}
+
+export function parseHeadlessUnitNotification(
+	message: string,
+): HeadlessUnitNotification | null {
+	const start = message.match(/\[unit\]\s+([\w-]+)\s+(\S+)\s+starting/);
+	if (start) {
+		return {
+			kind: "start",
+			unitType: start[1],
+			unitId: start[2],
+		};
+	}
+
+	const end = message.match(/\[unit\]\s+([\w-]+)\s+(\S+)\s+ended\s*->\s*(\w+)/);
+	if (end) {
+		return {
+			kind: "end",
+			unitType: end[1],
+			unitId: end[2],
+			verdict: end[3],
+		};
+	}
+
+	return null;
+}
+
 // ---------------------------------------------------------------------------
 // Resume Session Resolution
 // ---------------------------------------------------------------------------
@ -552,6 +585,9 @@ async function runHeadlessOnce(
 	let traceActive = false; // true once maybeStartTrace succeeded
 	// Current unit span — tool spans are children of this
 	let activeUnitSpan: ReturnType<typeof startUnitSpan> | null = null;
+	let activeHeadlessUnit:
+		| { unitType: string; unitId: string; startedMessage: string }
+		| null = null;
 	// Map from tool call ID to its tool span (for matching start/end)
 	const toolSpanByCallId = new Map<string, ReturnType<typeof startToolSpan>>();
 	// Tracks pending tool_execution_start for which we haven't seen toolName yet
@ -597,11 +633,12 @@ async function runHeadlessOnce(
 	 * Matches: "[unit] milestone M001 starting" or "[unit] slice M001/S01 starting" etc.
 	 */
 	function handleUnitStart(message: string): void {
-		const match = message.match(/\[unit\]\s+(\w+)\s+(\S+)\s+starting/);
-		if (!match) return;
-		const unitType = match[1] as "milestone" | "slice" | "task";
-		const unitId = match[2];
-		activeUnitSpan = startUnitSpan(unitType, unitId);
+		const parsed = parseHeadlessUnitNotification(message);
+		if (!parsed || parsed.kind !== "start") return;
+		activeUnitSpan = startUnitSpan(
+			parsed.unitType as "milestone" | "slice" | "task",
+			parsed.unitId,
+		);
 	}

 	/**
@ -609,12 +646,10 @@ async function runHeadlessOnce(
 	 * Matches: "[unit] milestone M001 ended -> ok" etc.
 	 */
 	function handleUnitEnd(message: string): void {
-		const match = message.match(
-			/\[unit\]\s+(\w+)\s+(\S+)\s+ended\s*->\s*(\w+)/,
-		);
-		if (!match) return;
-		const unitId = match[2];
-		const verdict = match[3];
+		const parsed = parseHeadlessUnitNotification(message);
+		if (!parsed || parsed.kind !== "end") return;
+		const unitId = parsed.unitId;
+		const verdict = parsed.verdict ?? "error";

 		// Find the unit span by ID (may not be the top-of-stack if nested)
 		if (!activeUnitSpan) return;
@ -642,6 +677,28 @@ async function runHeadlessOnce(
 		activeUnitSpan = null;
 	}

+	function trackHeadlessUnitNotification(message: string): void {
+		const parsed = parseHeadlessUnitNotification(message);
+		if (!parsed) return;
+
+		if (parsed.kind === "start") {
+			activeHeadlessUnit = {
+				unitType: parsed.unitType,
+				unitId: parsed.unitId,
+				startedMessage: message,
+			};
+			return;
+		}
+
+		if (
+			activeHeadlessUnit &&
+			activeHeadlessUnit.unitType === parsed.unitType &&
+			activeHeadlessUnit.unitId === parsed.unitId
+		) {
+			activeHeadlessUnit = null;
+		}
+	}
+
 	/**
 	 * Handle tool_execution_start: create a tool span under the active unit (or root if no unit active).
 	 */
@ -753,16 +810,22 @@ async function runHeadlessOnce(
 		resolveCompletion = resolve;
 	});

-	// Idle timeout — fallback completion detection.
-	// Multi-turn commands (auto, next, discuss, plan) involve LLM thinking
-	// pauses between tool calls that can exceed 15s. Use the longer
-	// new-milestone timeout for those too — without this, headless killed
-	// auto-mode after 2 tool calls when the LLM paused to reason about the
-	// next step (~35s total run, exit "complete" while task barely started).
+	// Idle timeout — three roles depending on command type:
+	//   - Quick commands (status, queue, …): genuine "are we done?" detector.
+	//     15s after a tool call without further events = done. (IDLE_TIMEOUT_MS)
+	//   - new-milestone: bounded creative task; 120s buffer for LLM thinking
+	//     between bootstrap steps. (NEW_MILESTONE_IDLE_TIMEOUT_MS)
+	//   - Multi-turn (auto, next, discuss, plan): NOT a completion detector —
+	//     those signal done via "auto-mode stopped" terminal notifications,
+	//     and child-process exit catches crashes. The idle timer here is a
+	//     deadlock BACKSTOP only: 30 minutes, long enough to never misfire on
+	//     legitimate LLM reasoning, short enough to recover from a real hang.
+	//     (MULTI_TURN_DEADLOCK_BACKSTOP_MS)
 	let idleTimer: ReturnType<typeof setTimeout> | null = null;
-	const effectiveIdleTimeout =
-		isNewMilestone || isMultiTurnCommand
-			? NEW_MILESTONE_IDLE_TIMEOUT_MS
+	const effectiveIdleTimeout = isNewMilestone
+		? NEW_MILESTONE_IDLE_TIMEOUT_MS
+		: isMultiTurnCommand
+			? MULTI_TURN_DEADLOCK_BACKSTOP_MS
 			: IDLE_TIMEOUT_MS;

 	function resetIdleTimer(): void {
@ -1040,12 +1103,15 @@ async function runHeadlessOnce(
 			}

 			// Structured trace: handle unit start/end notify messages
-			if (traceActive && eventObj.method === "notify") {
+			if (eventObj.method === "notify") {
 				const message = String(eventObj.message ?? "");
-				if (message.includes("[unit]") && message.includes("starting")) {
-					handleUnitStart(message);
-				} else if (message.includes("[unit]") && message.includes("ended")) {
-					handleUnitEnd(message);
+				trackHeadlessUnitNotification(message);
+				if (traceActive) {
+					if (message.includes("[unit]") && message.includes("starting")) {
+						handleUnitStart(message);
+					} else if (message.includes("[unit]") && message.includes("ended")) {
+						handleUnitEnd(message);
+					}
 				}
 			}

@ -1305,6 +1371,21 @@ async function runHeadlessOnce(
 		}
 	}

+	if (
+		isAutoMode &&
+		exitCode === EXIT_SUCCESS &&
+		!blocked &&
+		activeHeadlessUnit
+	) {
+		process.stderr.write(
+			`[headless] Error: Auto-mode ended while ${activeHeadlessUnit.unitType} ${activeHeadlessUnit.unitId} was still in progress.\n`,
+		);
+		process.stderr.write(
+			"[headless] Treating this as incomplete instead of complete; resume with `sf headless auto` after inspecting the worktree.\n",
+		);
+		exitCode = EXIT_ERROR;
+	}
+
 	// Cleanup
 	if (timeoutTimer) clearTimeout(timeoutTimer);
 	if (idleTimer) clearTimeout(idleTimer);