diff --git a/packages/coding-agent/src/core/subagent-runner.ts b/packages/coding-agent/src/core/subagent-runner.ts index 8ce6da0db..42a496346 100644 --- a/packages/coding-agent/src/core/subagent-runner.ts +++ b/packages/coding-agent/src/core/subagent-runner.ts @@ -130,6 +130,26 @@ export async function runSubagent( const timeoutMs = options?.timeoutMs ?? DEFAULT_SUBAGENT_TIMEOUT_MS; const noOutputTimeoutMs = options?.noOutputTimeoutMs ?? 0; + // Per-phase timing diagnostics for sf-mp8e02m1-zpk903: the prompt-never-sent + // bug surfaces here as "session initialized but no LLM call". Each phase + // logs start/end so when an iteration silently hangs, the log identifies + // which step is stuck. Also wraps each await in a soft watchdog that emits + // a "STUCK in phase X" warning after 10s — no abort, just visibility so + // the auto-supervisor can correlate with idle-halt errors. + const phaseStartedAt = (): number => Date.now(); + const logPhase = (phase: string, startedAt: number, extra?: string) => { + const ms = Date.now() - startedAt; + const tail = extra ? ` ${extra}` : ""; + process.stderr.write(`[subagent:${name}] phase=${phase} ${ms}ms${tail}\n`); + }; + const phaseWatchdog = (phase: string, startedAt: number): NodeJS.Timeout => + setTimeout(() => { + const ms = Date.now() - startedAt; + process.stderr.write( + `[subagent:${name}] STUCK phase=${phase} ${ms}ms (no completion signal — see sf-mp8e02m1-zpk903 for context)\n`, + ); + }, 10_000); + // Build an isolated resource loader with the caller's system prompt appended. const agentDir = getAgentDir(); const settingsManager = SettingsManager.create(cwd, agentDir); @@ -139,10 +159,16 @@ export async function runSubagent( settingsManager, appendSystemPrompt: config.systemPrompt, }); + let phaseAt = phaseStartedAt(); + let watchdog = phaseWatchdog("resourceLoader.reload", phaseAt); await resourceLoader.reload(); + clearTimeout(watchdog); + logPhase("resourceLoader.reload", phaseAt); // Create a fresh in-memory session so it never touches the user's session files. // Tool filtering is applied below via setActiveToolsByName after session creation. + phaseAt = phaseStartedAt(); + watchdog = phaseWatchdog("createAgentSession", phaseAt); const { session, modelFallbackMessage } = await createAgentSession({ cwd, agentDir, @@ -151,6 +177,8 @@ export async function runSubagent( settingsManager, persistModelChanges: false, }); + clearTimeout(watchdog); + logPhase("createAgentSession", phaseAt); if (modelFallbackMessage) { // Non-fatal: log but continue with whatever model was resolved. @@ -202,6 +230,8 @@ export async function runSubagent( if (config.permissionLevel) { process.env.SF_PERMISSION_LEVEL = config.permissionLevel; } + phaseAt = phaseStartedAt(); + watchdog = phaseWatchdog("bindExtensions", phaseAt); try { await session.bindExtensions({ uiContext: createSubagentUIContext(), @@ -212,6 +242,12 @@ export async function runSubagent( }, runLifecycle: Boolean(config.permissionLevel), }); + clearTimeout(watchdog); + logPhase( + "bindExtensions", + phaseAt, + `runLifecycle=${Boolean(config.permissionLevel)}`, + ); } finally { if (config.permissionLevel) { if (previousPermissionLevel === undefined) { @@ -281,6 +317,18 @@ export async function runSubagent( const promptTask = `Task: ${task}`; + // Final diagnostic point: about to call session.prompt. If this never logs + // "phase=session.prompt-returned" and the subagent hangs, the LLM dispatch + // path itself is the silent abort seam (likely a runExtensionHooks=false + // path A/D bypass via slash/sf-route prefix expansion, or a model-dispatch + // hang). Combined with the no-output watchdog below, operator gets a clear + // "which phase" diagnostic for any future hang report. + process.stderr.write( + `[subagent:${name}] phase=session.prompt-entered taskLen=${promptTask.length} timeoutMs=${timeoutMs} noOutputMs=${noOutputTimeoutMs}\n`, + ); + const promptEnteredAt = phaseStartedAt(); + const promptWatchdog = phaseWatchdog("session.prompt", promptEnteredAt); + try { if (timeoutMs === 0 && noOutputTimeoutMs === 0 && !options?.signal) { // Fast path: no watchdog, no cancellation. @@ -288,6 +336,8 @@ export async function runSubagent( runExtensionHooks: false, }); await promptPromise; + clearTimeout(promptWatchdog); + logPhase("session.prompt-returned", promptEnteredAt); cleanup(); return { ok: true, output: extractFinalOutput(), exitCode: 0 }; } @@ -364,11 +414,18 @@ export async function runSubagent( const promptPromise = session.prompt(promptTask, { runExtensionHooks: false, }); - competitors.unshift(promptPromise.then(() => ({}) as RaceResult)); + competitors.unshift( + promptPromise.then(() => { + clearTimeout(promptWatchdog); + logPhase("session.prompt-returned", promptEnteredAt); + return {} as RaceResult; + }), + ); const result = await Promise.race(competitors); cleanup(); + clearTimeout(promptWatchdog); if (result.timedOut) { return {