fix(headless): split idle-timeout role from deadlock-backstop role
The single IDLE_TIMEOUT_MS constant was conflating two different jobs: "are we done?" vs "is the agent stuck?". For multi-turn commands (auto, next, discuss, plan), the first question is wrong — those signal completion explicitly via "auto-mode stopped" terminal notifications, and child-process exit catches crashes. The 120s I'd just bumped multi-turn to was still in idle-detection mindset; that's not what we need from this timer. New semantics: - IDLE_TIMEOUT_MS = 15s — quick commands (status, queue, …); idle really does mean done. - NEW_MILESTONE_IDLE_TIMEOUT_MS = 120s — bounded creative task with pauses for thinking between bootstrap steps. - MULTI_TURN_DEADLOCK_BACKSTOP_MS = 30 minutes — auto/next/discuss/plan. Not a "done" detector; a deadlock recovery bound. Long enough to never bother slow LLM reasoning or chained tool calls; short enough to recover from a true hang within a reasonable window. Real completion comes from terminal notifications + child-process exit, both already wired. Code reads cleaner too: effectiveIdleTimeout selection now mirrors the three-way conceptual split. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
c239ad6c9d
commit
df614a3e47
2 changed files with 135 additions and 28 deletions
|
|
@ -65,11 +65,37 @@ export function mapStatusToExitCode(status: string): number {
|
|||
* Blocked detection is separate — checked via isBlockedNotification.
|
||||
*/
|
||||
export const TERMINAL_PREFIXES = ["auto-mode stopped", "step-mode stopped"];
|
||||
|
||||
/**
|
||||
* Idle timeout for short, single-shot commands (status, queue, history, etc.).
|
||||
* For these, "no events for 15s after a tool call" really does mean done.
|
||||
*/
|
||||
export const IDLE_TIMEOUT_MS = 15_000;
|
||||
// new-milestone is a long-running creative task where the LLM may pause
|
||||
// between tool calls (e.g. after mkdir, before writing files). Use a
|
||||
// longer idle timeout to avoid killing the session prematurely (#808).
|
||||
|
||||
/**
|
||||
* Idle timeout for new-milestone — bounded creative task where the LLM may
|
||||
* pause between tool calls (e.g. after mkdir, before writing files). 120s is
|
||||
* enough buffer for typical LLM thinking on a one-shot setup workflow (#808).
|
||||
*/
|
||||
export const NEW_MILESTONE_IDLE_TIMEOUT_MS = 120_000;
|
||||
|
||||
/**
|
||||
* Deadlock backstop for long-running multi-turn commands (auto, next,
|
||||
* discuss, plan). The role here is NOT idle-detection ("are we done?") —
|
||||
* those commands signal completion explicitly via "auto-mode stopped" /
|
||||
* "step-mode stopped" terminal notifications, and the agent's child-process
|
||||
* exit catches crashes. The only remaining failure mode is a truly hung
|
||||
* process (deadlock, network stuck without retry, infinite reasoning loop
|
||||
* outside the LLM's awareness). 30 minutes is long enough to never misfire
|
||||
* on legitimate slow LLM thinking or chained tool calls, but short enough
|
||||
* to recover from a real deadlock within a reasonable bound.
|
||||
*
|
||||
* Symptom from the old 15s timeout: sf headless auto would dispatch a task,
|
||||
* the LLM would make 1-2 tool calls, pause to reason, exceed 15s of "no
|
||||
* events", and headless would declare "Status: complete" — exiting at ~35s
|
||||
* with the task barely started.
|
||||
*/
|
||||
export const MULTI_TURN_DEADLOCK_BACKSTOP_MS = 1_800_000;
|
||||
const INTERACTIVE_HEADLESS_TOOLS = new Set([
|
||||
"ask_user_questions",
|
||||
"secure_env_collect",
|
||||
|
|
|
|||
131
src/headless.ts
131
src/headless.ts
|
|
@ -30,6 +30,7 @@ import {
|
|||
EXIT_SUCCESS,
|
||||
FIRE_AND_FORGET_METHODS,
|
||||
IDLE_TIMEOUT_MS,
|
||||
MULTI_TURN_DEADLOCK_BACKSTOP_MS,
|
||||
isBlockedNotification,
|
||||
isInteractiveHeadlessTool,
|
||||
isMilestoneReadyNotification,
|
||||
|
|
@ -116,6 +117,38 @@ interface TrackedEvent {
|
|||
detail?: string;
|
||||
}
|
||||
|
||||
interface HeadlessUnitNotification {
|
||||
kind: "start" | "end";
|
||||
unitType: string;
|
||||
unitId: string;
|
||||
verdict?: string;
|
||||
}
|
||||
|
||||
export function parseHeadlessUnitNotification(
|
||||
message: string,
|
||||
): HeadlessUnitNotification | null {
|
||||
const start = message.match(/\[unit\]\s+([\w-]+)\s+(\S+)\s+starting/);
|
||||
if (start) {
|
||||
return {
|
||||
kind: "start",
|
||||
unitType: start[1],
|
||||
unitId: start[2],
|
||||
};
|
||||
}
|
||||
|
||||
const end = message.match(/\[unit\]\s+([\w-]+)\s+(\S+)\s+ended\s*->\s*(\w+)/);
|
||||
if (end) {
|
||||
return {
|
||||
kind: "end",
|
||||
unitType: end[1],
|
||||
unitId: end[2],
|
||||
verdict: end[3],
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Resume Session Resolution
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -552,6 +585,9 @@ async function runHeadlessOnce(
|
|||
let traceActive = false; // true once maybeStartTrace succeeded
|
||||
// Current unit span — tool spans are children of this
|
||||
let activeUnitSpan: ReturnType<typeof startUnitSpan> | null = null;
|
||||
let activeHeadlessUnit:
|
||||
| { unitType: string; unitId: string; startedMessage: string }
|
||||
| null = null;
|
||||
// Map from tool call ID to its tool span (for matching start/end)
|
||||
const toolSpanByCallId = new Map<string, ReturnType<typeof startToolSpan>>();
|
||||
// Tracks pending tool_execution_start for which we haven't seen toolName yet
|
||||
|
|
@ -597,11 +633,12 @@ async function runHeadlessOnce(
|
|||
* Matches: "[unit] milestone M001 starting" or "[unit] slice M001/S01 starting" etc.
|
||||
*/
|
||||
function handleUnitStart(message: string): void {
|
||||
const match = message.match(/\[unit\]\s+(\w+)\s+(\S+)\s+starting/);
|
||||
if (!match) return;
|
||||
const unitType = match[1] as "milestone" | "slice" | "task";
|
||||
const unitId = match[2];
|
||||
activeUnitSpan = startUnitSpan(unitType, unitId);
|
||||
const parsed = parseHeadlessUnitNotification(message);
|
||||
if (!parsed || parsed.kind !== "start") return;
|
||||
activeUnitSpan = startUnitSpan(
|
||||
parsed.unitType as "milestone" | "slice" | "task",
|
||||
parsed.unitId,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -609,12 +646,10 @@ async function runHeadlessOnce(
|
|||
* Matches: "[unit] milestone M001 ended -> ok" etc.
|
||||
*/
|
||||
function handleUnitEnd(message: string): void {
|
||||
const match = message.match(
|
||||
/\[unit\]\s+(\w+)\s+(\S+)\s+ended\s*->\s*(\w+)/,
|
||||
);
|
||||
if (!match) return;
|
||||
const unitId = match[2];
|
||||
const verdict = match[3];
|
||||
const parsed = parseHeadlessUnitNotification(message);
|
||||
if (!parsed || parsed.kind !== "end") return;
|
||||
const unitId = parsed.unitId;
|
||||
const verdict = parsed.verdict ?? "error";
|
||||
|
||||
// Find the unit span by ID (may not be the top-of-stack if nested)
|
||||
if (!activeUnitSpan) return;
|
||||
|
|
@ -642,6 +677,28 @@ async function runHeadlessOnce(
|
|||
activeUnitSpan = null;
|
||||
}
|
||||
|
||||
function trackHeadlessUnitNotification(message: string): void {
|
||||
const parsed = parseHeadlessUnitNotification(message);
|
||||
if (!parsed) return;
|
||||
|
||||
if (parsed.kind === "start") {
|
||||
activeHeadlessUnit = {
|
||||
unitType: parsed.unitType,
|
||||
unitId: parsed.unitId,
|
||||
startedMessage: message,
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
activeHeadlessUnit &&
|
||||
activeHeadlessUnit.unitType === parsed.unitType &&
|
||||
activeHeadlessUnit.unitId === parsed.unitId
|
||||
) {
|
||||
activeHeadlessUnit = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle tool_execution_start: create a tool span under the active unit (or root if no unit active).
|
||||
*/
|
||||
|
|
@ -753,16 +810,22 @@ async function runHeadlessOnce(
|
|||
resolveCompletion = resolve;
|
||||
});
|
||||
|
||||
// Idle timeout — fallback completion detection.
|
||||
// Multi-turn commands (auto, next, discuss, plan) involve LLM thinking
|
||||
// pauses between tool calls that can exceed 15s. Use the longer
|
||||
// new-milestone timeout for those too — without this, headless killed
|
||||
// auto-mode after 2 tool calls when the LLM paused to reason about the
|
||||
// next step (~35s total run, exit "complete" while task barely started).
|
||||
// Idle timeout — three roles depending on command type:
|
||||
// - Quick commands (status, queue, …): genuine "are we done?" detector.
|
||||
// 15s after a tool call without further events = done. (IDLE_TIMEOUT_MS)
|
||||
// - new-milestone: bounded creative task; 120s buffer for LLM thinking
|
||||
// between bootstrap steps. (NEW_MILESTONE_IDLE_TIMEOUT_MS)
|
||||
// - Multi-turn (auto, next, discuss, plan): NOT a completion detector —
|
||||
// those signal done via "auto-mode stopped" terminal notifications,
|
||||
// and child-process exit catches crashes. The idle timer here is a
|
||||
// deadlock BACKSTOP only: 30 minutes, long enough to never misfire on
|
||||
// legitimate LLM reasoning, short enough to recover from a real hang.
|
||||
// (MULTI_TURN_DEADLOCK_BACKSTOP_MS)
|
||||
let idleTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
const effectiveIdleTimeout =
|
||||
isNewMilestone || isMultiTurnCommand
|
||||
? NEW_MILESTONE_IDLE_TIMEOUT_MS
|
||||
const effectiveIdleTimeout = isNewMilestone
|
||||
? NEW_MILESTONE_IDLE_TIMEOUT_MS
|
||||
: isMultiTurnCommand
|
||||
? MULTI_TURN_DEADLOCK_BACKSTOP_MS
|
||||
: IDLE_TIMEOUT_MS;
|
||||
|
||||
function resetIdleTimer(): void {
|
||||
|
|
@ -1040,12 +1103,15 @@ async function runHeadlessOnce(
|
|||
}
|
||||
|
||||
// Structured trace: handle unit start/end notify messages
|
||||
if (traceActive && eventObj.method === "notify") {
|
||||
if (eventObj.method === "notify") {
|
||||
const message = String(eventObj.message ?? "");
|
||||
if (message.includes("[unit]") && message.includes("starting")) {
|
||||
handleUnitStart(message);
|
||||
} else if (message.includes("[unit]") && message.includes("ended")) {
|
||||
handleUnitEnd(message);
|
||||
trackHeadlessUnitNotification(message);
|
||||
if (traceActive) {
|
||||
if (message.includes("[unit]") && message.includes("starting")) {
|
||||
handleUnitStart(message);
|
||||
} else if (message.includes("[unit]") && message.includes("ended")) {
|
||||
handleUnitEnd(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1305,6 +1371,21 @@ async function runHeadlessOnce(
|
|||
}
|
||||
}
|
||||
|
||||
if (
|
||||
isAutoMode &&
|
||||
exitCode === EXIT_SUCCESS &&
|
||||
!blocked &&
|
||||
activeHeadlessUnit
|
||||
) {
|
||||
process.stderr.write(
|
||||
`[headless] Error: Auto-mode ended while ${activeHeadlessUnit.unitType} ${activeHeadlessUnit.unitId} was still in progress.\n`,
|
||||
);
|
||||
process.stderr.write(
|
||||
"[headless] Treating this as incomplete instead of complete; resume with `sf headless auto` after inspecting the worktree.\n",
|
||||
);
|
||||
exitCode = EXIT_ERROR;
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
if (timeoutTimer) clearTimeout(timeoutTimer);
|
||||
if (idleTimer) clearTimeout(idleTimer);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue