diff --git a/packages/coding-agent/src/core/subagent-runner.ts b/packages/coding-agent/src/core/subagent-runner.ts index 8d47f708b..598144f2e 100644 --- a/packages/coding-agent/src/core/subagent-runner.ts +++ b/packages/coding-agent/src/core/subagent-runner.ts @@ -87,6 +87,13 @@ function createSubagentUIContext(): ExtensionUIContext { export interface RunSubagentOptions { signal?: AbortSignal; timeoutMs?: number; + /** + * Abort when the subagent produces no session events for this long. + * This is separate from timeoutMs: a long-running worker may keep making + * useful progress, but a silent model call should fail fast so callers can + * retry or route to a different model. + */ + noOutputTimeoutMs?: number; /** * Called for each agent session event (forwarded from session.subscribe). * Use this to drive live UI updates without polling. @@ -111,6 +118,7 @@ export async function runSubagent( const name = config.name ?? "subagent"; const cwd = config.cwd ?? process.cwd(); const timeoutMs = options?.timeoutMs ?? DEFAULT_SUBAGENT_TIMEOUT_MS; + const noOutputTimeoutMs = options?.noOutputTimeoutMs ?? 0; // Build an isolated resource loader with the caller's system prompt appended. const agentDir = getAgentDir(); @@ -246,12 +254,18 @@ export async function runSubagent( }; let timer: ReturnType | undefined; + let noOutputTimer: ReturnType | undefined; + let noOutputTimedOut = false; const cleanup = (): void => { if (timer) { clearTimeout(timer); timer = undefined; } + if (noOutputTimer) { + clearTimeout(noOutputTimer); + noOutputTimer = undefined; + } unsubscribe(); }; @@ -270,7 +284,12 @@ export async function runSubagent( } // Build race competitors. - type RaceResult = { timedOut?: true; cancelled?: true; error?: unknown }; + type RaceResult = { + timedOut?: true; + noOutputTimedOut?: true; + cancelled?: true; + error?: unknown; + }; const competitors: Promise[] = [ promptPromise.then(() => ({}) as RaceResult), ]; @@ -286,6 +305,30 @@ export async function runSubagent( ); } + if (noOutputTimeoutMs > 0) { + competitors.push( + new Promise((resolve) => { + const armNoOutputTimer = () => { + if (noOutputTimer) clearTimeout(noOutputTimer); + noOutputTimer = setTimeout(() => { + noOutputTimedOut = true; + void session.abort().catch(() => {}); + resolve({ noOutputTimedOut: true }); + }, noOutputTimeoutMs); + }; + armNoOutputTimer(); + const previousOnEvent = options?.onEvent; + options = { + ...options, + onEvent: (event) => { + if (!noOutputTimedOut) armNoOutputTimer(); + previousOnEvent?.(event); + }, + }; + }), + ); + } + if (options?.signal) { const sig = options.signal; if (sig.aborted) { @@ -322,6 +365,15 @@ export async function runSubagent( }; } + if (result.noOutputTimedOut) { + return { + ok: false, + output: extractFinalOutput(), + stderr: `${name} produced no output for ${noOutputTimeoutMs}ms`, + exitCode: 124, + }; + } + if (result.cancelled) { return { ok: false, diff --git a/src/resources/extensions/sf/auto/run-unit.js b/src/resources/extensions/sf/auto/run-unit.js index 6d955c5aa..0f85a52a7 100644 --- a/src/resources/extensions/sf/auto/run-unit.js +++ b/src/resources/extensions/sf/auto/run-unit.js @@ -288,6 +288,16 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) { 30_000, (supervisor.hard_timeout_minutes ?? 8) * 60 * 1000, ); + const configuredNoOutputTimeoutMs = Number( + process.env.SF_SWARM_NO_OUTPUT_TIMEOUT_MS ?? "", + ); + const noOutputTimeoutMs = Math.min( + timeoutMs, + Number.isFinite(configuredNoOutputTimeoutMs) && + configuredNoOutputTimeoutMs > 0 + ? Math.floor(configuredNoOutputTimeoutMs) + : 180_000, + ); // ── Event collector: capture real tool calls and completion signal ────────── // The worker agent emits events as it runs. We intercept "toolcall_end" @@ -358,6 +368,7 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) { try { swarmResult = await swarmDispatchAndWait(basePath, envelope, { timeoutMs, + noOutputTimeoutMs, onEvent, }); } catch (err) { diff --git a/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs b/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs index 66719d959..4a8b206b4 100644 --- a/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs +++ b/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs @@ -118,11 +118,14 @@ function makeS(basePath = "/tmp/test-project") { // ─── Save / restore env ─────────────────────────────────────────────────────── let origEnv; +let origNoOutputEnv; beforeEach(() => { vi.clearAllMocks(); origEnv = process.env.SF_AUTONOMOUS_VIA_SWARM; + origNoOutputEnv = process.env.SF_SWARM_NO_OUTPUT_TIMEOUT_MS; delete process.env.SF_AUTONOMOUS_VIA_SWARM; + delete process.env.SF_SWARM_NO_OUTPUT_TIMEOUT_MS; // Default implementation for the happy-path tests: return a deterministic reply. mockSwarmDispatchAndWait.mockImplementation( @@ -143,6 +146,11 @@ afterEach(() => { } else { process.env.SF_AUTONOMOUS_VIA_SWARM = origEnv; } + if (origNoOutputEnv === undefined) { + delete process.env.SF_SWARM_NO_OUTPUT_TIMEOUT_MS; + } else { + process.env.SF_SWARM_NO_OUTPUT_TIMEOUT_MS = origNoOutputEnv; + } }); // ─── Flag ON — happy path ───────────────────────────────────────────────────── @@ -199,6 +207,23 @@ describe("runUnit — SF_AUTONOMOUS_VIA_SWARM=1 — happy path", () => { expect(envelope.priority).toBe(7); expect(envelope.executorPermissionLevel).toBe("low"); expect(opts.timeoutMs).toBeGreaterThan(0); + expect(opts.noOutputTimeoutMs).toBe(180_000); + expect(opts.noOutputTimeoutMs).toBeLessThanOrEqual(opts.timeoutMs); + }); + + test("uses configured no-output timeout without exceeding hard timeout", async () => { + process.env.SF_AUTONOMOUS_VIA_SWARM = "1"; + process.env.SF_SWARM_NO_OUTPUT_TIMEOUT_MS = "45000"; + + const ctx = makeCtx("/proj"); + const pi = makePi(); + const s = makeS("/proj"); + + await runUnit(ctx, pi, s, "execute-task", "unit-timeout", "do work", {}); + + const [, , opts] = mockSwarmDispatchAndWait.mock.calls[0]; + expect(opts.noOutputTimeoutMs).toBe(45_000); + expect(opts.noOutputTimeoutMs).toBeLessThanOrEqual(opts.timeoutMs); }); // ─── Round 7: executor system prompt + tools on the envelope ───────────── diff --git a/src/resources/extensions/sf/tests/swarm-dispatch-and-wait.test.mjs b/src/resources/extensions/sf/tests/swarm-dispatch-and-wait.test.mjs index 30393db59..d44a31bc9 100644 --- a/src/resources/extensions/sf/tests/swarm-dispatch-and-wait.test.mjs +++ b/src/resources/extensions/sf/tests/swarm-dispatch-and-wait.test.mjs @@ -539,6 +539,48 @@ describe("SwarmDispatchLayer.dispatchAndWait — Round 7: executor config forwar expect(capturedOpts.permissionLevel).toBe("low"); }); + test("noOutputTimeoutMs option is forwarded to runAgentTurn", async () => { + const { runAgentTurn } = await import("../uok/agent-runner.js"); + + let capturedOpts = null; + runAgentTurn.mockImplementationOnce(async (agent, opts = {}) => { + capturedOpts = opts; + const { onlyMessageId } = opts; + if (onlyMessageId) agent._inbox.refresh(); + const all = agent.receive(false); + const target = all.find((m) => m.id === onlyMessageId && !m.read); + const messages = target ? [target] : []; + if (messages.length === 0) return { turnsProcessed: 0, response: null }; + for (const msg of messages) agent.markRead(msg.id); + const lastMsg = messages[messages.length - 1]; + const replyId = agent._bus.send( + `agent:${agent.identity.name}`, + lastMsg.from, + MOCK_REPLY_TEXT, + { replyTo: lastMsg.id, type: "response" }, + ); + return { turnsProcessed: 1, response: MOCK_REPLY_TEXT, replyId }; + }); + + const root = makeProject(); + const layer = new SwarmDispatchLayer(root); + + await layer.dispatchAndWait( + { + unitId: "task-no-output-timeout", + unitType: "execute-task", + workMode: "build", + payload: "edit files", + priority: 5, + scope: "scope-timeout", + }, + { noOutputTimeoutMs: 45_000 }, + ); + + expect(capturedOpts).not.toBeNull(); + expect(capturedOpts.noOutputTimeoutMs).toBe(45_000); + }); + test("envelope without executorSystemPrompt does not forward systemPromptOverride", async () => { // Envelopes without the optional fields must not pass undefined opts to runAgentTurn. const { runAgentTurn } = await import("../uok/agent-runner.js"); diff --git a/src/resources/extensions/sf/uok/agent-runner.js b/src/resources/extensions/sf/uok/agent-runner.js index 306f22e9e..7382f3765 100644 --- a/src/resources/extensions/sf/uok/agent-runner.js +++ b/src/resources/extensions/sf/uok/agent-runner.js @@ -18,6 +18,7 @@ import { runSubagent } from "@singularity-forge/coding-agent"; const DEFAULT_MAX_CONTEXT_TURNS = 10; const DEFAULT_MAX_TURNS_PER_RUN = 5; const DEFAULT_RUNNER_TIMEOUT_MS = 120_000; +const DEFAULT_NO_OUTPUT_TIMEOUT_MS = 180_000; const DEFAULT_POLL_INTERVAL_MS = 1_000; /** @@ -62,6 +63,7 @@ function buildAgentPrompt(agent, messages) { * @param {number} [timeoutMs] * @param {object} [opts] * @param {Function} [opts.onEvent] Optional event callback forwarded to runSubagent. + * @param {number} [opts.noOutputTimeoutMs] Abort a silent model call after this long. * @param {string} [opts.systemPromptOverride] Override the default swarm-agent system prompt. * When set (e.g. from envelope.executorSystemPrompt), this replaces the generic * "persistent agent in a swarm" prompt so the worker receives the full autonomous @@ -78,8 +80,13 @@ async function runHeadlessPrompt( timeoutMs = DEFAULT_RUNNER_TIMEOUT_MS, opts = {}, ) { - const { onEvent, systemPromptOverride, toolsOverride, permissionLevel } = - opts; + const { + onEvent, + noOutputTimeoutMs = DEFAULT_NO_OUTPUT_TIMEOUT_MS, + systemPromptOverride, + toolsOverride, + permissionLevel, + } = opts; const result = await runSubagent( { systemPrompt: @@ -93,12 +100,14 @@ async function runHeadlessPrompt( : {}), }, prompt, - { timeoutMs, ...(onEvent ? { onEvent } : {}) }, + { timeoutMs, noOutputTimeoutMs, ...(onEvent ? { onEvent } : {}) }, ); if (!result.ok) { if (result.exitCode === 124) { - throw new Error(`Agent runner timed out after ${timeoutMs}ms`); + throw new Error( + result.stderr ?? `Agent runner timed out after ${timeoutMs}ms`, + ); } throw new Error( `sf headless failed: ${result.stderr || result.output || "unknown error"}`, @@ -122,6 +131,7 @@ async function runHeadlessPrompt( * specified messageId exactly, and legitimately queued messages from other * senders remain unread and available for the next turn. * @param {Function} [opts.onEvent] Optional event callback forwarded to runHeadlessPrompt. + * @param {number} [opts.noOutputTimeoutMs] Abort a silent model call after this long. * @param {string} [opts.systemPromptOverride] Override the worker's system prompt. * Forwarded to runHeadlessPrompt so executor-specific contracts (e.g. the autonomous * checkpoint requirement) reach the LLM session unchanged. @@ -136,6 +146,7 @@ export async function runAgentTurn(agent, opts = {}) { const { maxContextTurns = DEFAULT_MAX_CONTEXT_TURNS, timeoutMs = DEFAULT_RUNNER_TIMEOUT_MS, + noOutputTimeoutMs = DEFAULT_NO_OUTPUT_TIMEOUT_MS, onlyMessageId, onEvent, systemPromptOverride, @@ -186,6 +197,7 @@ export async function runAgentTurn(agent, opts = {}) { try { response = await runHeadlessPrompt(agent._basePath, prompt, timeoutMs, { onEvent, + noOutputTimeoutMs, ...(systemPromptOverride ? { systemPromptOverride } : {}), ...(toolsOverride ? { toolsOverride } : {}), ...(permissionLevel ? { permissionLevel } : {}), diff --git a/src/resources/extensions/sf/uok/swarm-dispatch.js b/src/resources/extensions/sf/uok/swarm-dispatch.js index 125a47628..6fddebf34 100644 --- a/src/resources/extensions/sf/uok/swarm-dispatch.js +++ b/src/resources/extensions/sf/uok/swarm-dispatch.js @@ -288,12 +288,13 @@ export class SwarmDispatchLayer { * @param {DispatchEnvelope} envelope * @param {object} [options={}] * @param {number} [options.timeoutMs=480000] Hard cap for the agent's turn. + * @param {number} [options.noOutputTimeoutMs] Cap for silent model response time. * @param {AbortSignal} [options.signal] * @param {Function} [options.onEvent] Optional event callback forwarded to runAgentTurn. * @returns {Promise} */ async dispatchAndWait(envelope, options = {}) { - const { timeoutMs = 480_000, signal, onEvent } = options; + const { timeoutMs = 480_000, noOutputTimeoutMs, signal, onEvent } = options; // A2A path: no synchronous wait support yet — return nulled reply fields. if (process.env.SF_A2A_ENABLED) { @@ -335,6 +336,7 @@ export class SwarmDispatchLayer { try { turnResult = await runAgentTurn(agent, { timeoutMs, + ...(noOutputTimeoutMs ? { noOutputTimeoutMs } : {}), signal, onlyMessageId: dispatchResult.messageId, ...(onEvent ? { onEvent } : {}),