diff --git a/src/headless-events.ts b/src/headless-events.ts index 8a2681cc4..213553b02 100644 --- a/src/headless-events.ts +++ b/src/headless-events.ts @@ -65,11 +65,37 @@ export function mapStatusToExitCode(status: string): number { * Blocked detection is separate — checked via isBlockedNotification. */ export const TERMINAL_PREFIXES = ["auto-mode stopped", "step-mode stopped"]; + +/** + * Idle timeout for short, single-shot commands (status, queue, history, etc.). + * For these, "no events for 15s after a tool call" really does mean done. + */ export const IDLE_TIMEOUT_MS = 15_000; -// new-milestone is a long-running creative task where the LLM may pause -// between tool calls (e.g. after mkdir, before writing files). Use a -// longer idle timeout to avoid killing the session prematurely (#808). + +/** + * Idle timeout for new-milestone — bounded creative task where the LLM may + * pause between tool calls (e.g. after mkdir, before writing files). 120s is + * enough buffer for typical LLM thinking on a one-shot setup workflow (#808). + */ export const NEW_MILESTONE_IDLE_TIMEOUT_MS = 120_000; + +/** + * Deadlock backstop for long-running multi-turn commands (auto, next, + * discuss, plan). The role here is NOT idle-detection ("are we done?") — + * those commands signal completion explicitly via "auto-mode stopped" / + * "step-mode stopped" terminal notifications, and the agent's child-process + * exit catches crashes. The only remaining failure mode is a truly hung + * process (deadlock, network stuck without retry, infinite reasoning loop + * outside the LLM's awareness). 30 minutes is long enough to never misfire + * on legitimate slow LLM thinking or chained tool calls, but short enough + * to recover from a real deadlock within a reasonable bound. + * + * Symptom from the old 15s timeout: sf headless auto would dispatch a task, + * the LLM would make 1-2 tool calls, pause to reason, exceed 15s of "no + * events", and headless would declare "Status: complete" — exiting at ~35s + * with the task barely started. + */ +export const MULTI_TURN_DEADLOCK_BACKSTOP_MS = 1_800_000; const INTERACTIVE_HEADLESS_TOOLS = new Set([ "ask_user_questions", "secure_env_collect", diff --git a/src/headless.ts b/src/headless.ts index e27de3f91..e52602996 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -30,6 +30,7 @@ import { EXIT_SUCCESS, FIRE_AND_FORGET_METHODS, IDLE_TIMEOUT_MS, + MULTI_TURN_DEADLOCK_BACKSTOP_MS, isBlockedNotification, isInteractiveHeadlessTool, isMilestoneReadyNotification, @@ -116,6 +117,38 @@ interface TrackedEvent { detail?: string; } +interface HeadlessUnitNotification { + kind: "start" | "end"; + unitType: string; + unitId: string; + verdict?: string; +} + +export function parseHeadlessUnitNotification( + message: string, +): HeadlessUnitNotification | null { + const start = message.match(/\[unit\]\s+([\w-]+)\s+(\S+)\s+starting/); + if (start) { + return { + kind: "start", + unitType: start[1], + unitId: start[2], + }; + } + + const end = message.match(/\[unit\]\s+([\w-]+)\s+(\S+)\s+ended\s*->\s*(\w+)/); + if (end) { + return { + kind: "end", + unitType: end[1], + unitId: end[2], + verdict: end[3], + }; + } + + return null; +} + // --------------------------------------------------------------------------- // Resume Session Resolution // --------------------------------------------------------------------------- @@ -552,6 +585,9 @@ async function runHeadlessOnce( let traceActive = false; // true once maybeStartTrace succeeded // Current unit span — tool spans are children of this let activeUnitSpan: ReturnType | null = null; + let activeHeadlessUnit: + | { unitType: string; unitId: string; startedMessage: string } + | null = null; // Map from tool call ID to its tool span (for matching start/end) const toolSpanByCallId = new Map>(); // Tracks pending tool_execution_start for which we haven't seen toolName yet @@ -597,11 +633,12 @@ async function runHeadlessOnce( * Matches: "[unit] milestone M001 starting" or "[unit] slice M001/S01 starting" etc. */ function handleUnitStart(message: string): void { - const match = message.match(/\[unit\]\s+(\w+)\s+(\S+)\s+starting/); - if (!match) return; - const unitType = match[1] as "milestone" | "slice" | "task"; - const unitId = match[2]; - activeUnitSpan = startUnitSpan(unitType, unitId); + const parsed = parseHeadlessUnitNotification(message); + if (!parsed || parsed.kind !== "start") return; + activeUnitSpan = startUnitSpan( + parsed.unitType as "milestone" | "slice" | "task", + parsed.unitId, + ); } /** @@ -609,12 +646,10 @@ async function runHeadlessOnce( * Matches: "[unit] milestone M001 ended -> ok" etc. */ function handleUnitEnd(message: string): void { - const match = message.match( - /\[unit\]\s+(\w+)\s+(\S+)\s+ended\s*->\s*(\w+)/, - ); - if (!match) return; - const unitId = match[2]; - const verdict = match[3]; + const parsed = parseHeadlessUnitNotification(message); + if (!parsed || parsed.kind !== "end") return; + const unitId = parsed.unitId; + const verdict = parsed.verdict ?? "error"; // Find the unit span by ID (may not be the top-of-stack if nested) if (!activeUnitSpan) return; @@ -642,6 +677,28 @@ async function runHeadlessOnce( activeUnitSpan = null; } + function trackHeadlessUnitNotification(message: string): void { + const parsed = parseHeadlessUnitNotification(message); + if (!parsed) return; + + if (parsed.kind === "start") { + activeHeadlessUnit = { + unitType: parsed.unitType, + unitId: parsed.unitId, + startedMessage: message, + }; + return; + } + + if ( + activeHeadlessUnit && + activeHeadlessUnit.unitType === parsed.unitType && + activeHeadlessUnit.unitId === parsed.unitId + ) { + activeHeadlessUnit = null; + } + } + /** * Handle tool_execution_start: create a tool span under the active unit (or root if no unit active). */ @@ -753,16 +810,22 @@ async function runHeadlessOnce( resolveCompletion = resolve; }); - // Idle timeout — fallback completion detection. - // Multi-turn commands (auto, next, discuss, plan) involve LLM thinking - // pauses between tool calls that can exceed 15s. Use the longer - // new-milestone timeout for those too — without this, headless killed - // auto-mode after 2 tool calls when the LLM paused to reason about the - // next step (~35s total run, exit "complete" while task barely started). + // Idle timeout — three roles depending on command type: + // - Quick commands (status, queue, …): genuine "are we done?" detector. + // 15s after a tool call without further events = done. (IDLE_TIMEOUT_MS) + // - new-milestone: bounded creative task; 120s buffer for LLM thinking + // between bootstrap steps. (NEW_MILESTONE_IDLE_TIMEOUT_MS) + // - Multi-turn (auto, next, discuss, plan): NOT a completion detector — + // those signal done via "auto-mode stopped" terminal notifications, + // and child-process exit catches crashes. The idle timer here is a + // deadlock BACKSTOP only: 30 minutes, long enough to never misfire on + // legitimate LLM reasoning, short enough to recover from a real hang. + // (MULTI_TURN_DEADLOCK_BACKSTOP_MS) let idleTimer: ReturnType | null = null; - const effectiveIdleTimeout = - isNewMilestone || isMultiTurnCommand - ? NEW_MILESTONE_IDLE_TIMEOUT_MS + const effectiveIdleTimeout = isNewMilestone + ? NEW_MILESTONE_IDLE_TIMEOUT_MS + : isMultiTurnCommand + ? MULTI_TURN_DEADLOCK_BACKSTOP_MS : IDLE_TIMEOUT_MS; function resetIdleTimer(): void { @@ -1040,12 +1103,15 @@ async function runHeadlessOnce( } // Structured trace: handle unit start/end notify messages - if (traceActive && eventObj.method === "notify") { + if (eventObj.method === "notify") { const message = String(eventObj.message ?? ""); - if (message.includes("[unit]") && message.includes("starting")) { - handleUnitStart(message); - } else if (message.includes("[unit]") && message.includes("ended")) { - handleUnitEnd(message); + trackHeadlessUnitNotification(message); + if (traceActive) { + if (message.includes("[unit]") && message.includes("starting")) { + handleUnitStart(message); + } else if (message.includes("[unit]") && message.includes("ended")) { + handleUnitEnd(message); + } } } @@ -1305,6 +1371,21 @@ async function runHeadlessOnce( } } + if ( + isAutoMode && + exitCode === EXIT_SUCCESS && + !blocked && + activeHeadlessUnit + ) { + process.stderr.write( + `[headless] Error: Auto-mode ended while ${activeHeadlessUnit.unitType} ${activeHeadlessUnit.unitId} was still in progress.\n`, + ); + process.stderr.write( + "[headless] Treating this as incomplete instead of complete; resume with `sf headless auto` after inspecting the worktree.\n", + ); + exitCode = EXIT_ERROR; + } + // Cleanup if (timeoutTimer) clearTimeout(timeoutTimer); if (idleTimer) clearTimeout(idleTimer);