From 68a999ebde3fb20c1129e4c8bfe3bce7214cfbcd Mon Sep 17 00:00:00 2001 From: Jeremy McSpadden Date: Tue, 17 Mar 2026 21:49:39 -0500 Subject: [PATCH] fix: prevent summarizing phase stall by retrying dropped agent_end events (#1072) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When handleAgentEnd dispatches a sub-unit (via hooks, triage, or quick-task early-dispatch paths) and that unit completes before handleAgentEnd returns, the resulting agent_end event is silently dropped by the reentrancy guard. This leaves auto-mode active but permanently stalled — no unit running, no watchdog set, process at high CPU doing nothing. Add a pendingAgentEndRetry flag to AutoSession that the reentrancy guard sets when it drops an agent_end event. The finally block in handleAgentEnd checks this flag and schedules a deferred retry via setImmediate, ensuring the completed unit's agent_end is always processed. --- src/resources/extensions/gsd/auto.ts | 27 ++++- src/resources/extensions/gsd/auto/session.ts | 2 + .../gsd/tests/agent-end-retry.test.ts | 107 ++++++++++++++++++ 3 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 src/resources/extensions/gsd/tests/agent-end-retry.test.ts diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index c830badd9..13d62be42 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -1163,7 +1163,15 @@ export async function handleAgentEnd( pi: ExtensionAPI, ): Promise { if (!s.active || !s.cmdCtx) return; - if (s.handlingAgentEnd) return; + if (s.handlingAgentEnd) { + // Another agent_end arrived while we're still processing the previous one. + // This happens when a unit dispatched inside handleAgentEnd (e.g. via hooks, + // triage, or quick-task early-dispatch paths) completes before the outer + // handleAgentEnd returns. Queue a retry so the completed unit's agent_end + // is not silently dropped (#1072). + s.pendingAgentEndRetry = true; + return; + } s.handlingAgentEnd = true; try { @@ -1888,6 +1896,23 @@ export async function handleAgentEnd( } finally { s.handlingAgentEnd = false; + + // If an agent_end event was dropped by the reentrancy guard while we were + // processing, re-enter handleAgentEnd on the next microtask. This prevents + // the summarizing phase stall (#1072) where a unit dispatched inside + // handleAgentEnd (hooks, triage, quick-task) completes before we return, + // and its agent_end is silently dropped — leaving auto-mode active but + // permanently stalled with no unit running and no watchdog set. + if (s.pendingAgentEndRetry) { + s.pendingAgentEndRetry = false; + setImmediate(() => { + handleAgentEnd(ctx, pi).catch((err) => { + const msg = err instanceof Error ? err.message : String(err); + ctx.ui.notify(`Deferred agent_end retry failed: ${msg}`, "error"); + pauseAuto(ctx, pi).catch(() => {}); + }); + }); + } } } diff --git a/src/resources/extensions/gsd/auto/session.ts b/src/resources/extensions/gsd/auto/session.ts index d60ba03ae..71642afff 100644 --- a/src/resources/extensions/gsd/auto/session.ts +++ b/src/resources/extensions/gsd/auto/session.ts @@ -112,6 +112,7 @@ export class AutoSession { // ── Guards ─────────────────────────────────────────────────────────────── handlingAgentEnd = false; + pendingAgentEndRetry = false; dispatching = false; skipDepth = 0; readonly recentlyEvictedKeys = new Set(); @@ -198,6 +199,7 @@ export class AutoSession { // Guards this.handlingAgentEnd = false; + this.pendingAgentEndRetry = false; this.dispatching = false; this.skipDepth = 0; this.recentlyEvictedKeys.clear(); diff --git a/src/resources/extensions/gsd/tests/agent-end-retry.test.ts b/src/resources/extensions/gsd/tests/agent-end-retry.test.ts new file mode 100644 index 000000000..85704d62c --- /dev/null +++ b/src/resources/extensions/gsd/tests/agent-end-retry.test.ts @@ -0,0 +1,107 @@ +/** + * agent-end-retry.test.ts — Verifies the deferred agent_end retry mechanism (#1072). + * + * When handleAgentEnd is already running and a second agent_end event fires + * (e.g. a hook/triage/quick-task unit dispatched inside handleAgentEnd completes + * before it returns), the reentrancy guard must not silently drop the event. + * Instead, it should queue a retry via pendingAgentEndRetry so the completed + * unit's agent_end is processed after the current handler finishes. + * + * Without this, auto-mode can stall permanently in the "summarizing" phase + * with no unit running and no watchdog set. + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const AUTO_TS_PATH = join(__dirname, "..", "auto.ts"); +const SESSION_TS_PATH = join(__dirname, "..", "auto", "session.ts"); + +function getAutoTsSource(): string { + return readFileSync(AUTO_TS_PATH, "utf-8"); +} + +function getSessionTsSource(): string { + return readFileSync(SESSION_TS_PATH, "utf-8"); +} + +// ── AutoSession must declare pendingAgentEndRetry ──────────────────────────── + +test("AutoSession declares pendingAgentEndRetry field", () => { + const source = getSessionTsSource(); + assert.ok( + source.includes("pendingAgentEndRetry"), + "AutoSession (auto/session.ts) must declare pendingAgentEndRetry field for deferred retry", + ); +}); + +test("AutoSession resets pendingAgentEndRetry in reset()", () => { + const source = getSessionTsSource(); + // Find the reset() method — it's declared as "reset(): void {" + const resetIdx = source.indexOf("reset(): void"); + assert.ok(resetIdx > -1, "AutoSession must have a reset() method"); + const resetBlock = source.slice(resetIdx, resetIdx + 3000); + assert.ok( + resetBlock.includes("pendingAgentEndRetry"), + "reset() must clear pendingAgentEndRetry", + ); +}); + +// ── handleAgentEnd reentrancy guard must queue retry ───────────────────────── + +test("handleAgentEnd sets pendingAgentEndRetry when reentrant", () => { + const source = getAutoTsSource(); + // Find the handleAgentEnd function + const fnIdx = source.indexOf("export async function handleAgentEnd"); + assert.ok(fnIdx > -1, "handleAgentEnd must exist in auto.ts"); + + // The reentrancy guard section (within ~500 chars of the function start) + const guardBlock = source.slice(fnIdx, fnIdx + 800); + assert.ok( + guardBlock.includes("s.handlingAgentEnd"), + "handleAgentEnd must check s.handlingAgentEnd", + ); + assert.ok( + guardBlock.includes("pendingAgentEndRetry = true"), + "reentrancy guard must set pendingAgentEndRetry = true instead of silently dropping (#1072)", + ); +}); + +// ── finally block must process pendingAgentEndRetry ────────────────────────── + +test("handleAgentEnd finally block retries if pendingAgentEndRetry is set", () => { + const source = getAutoTsSource(); + const fnIdx = source.indexOf("export async function handleAgentEnd"); + assert.ok(fnIdx > -1, "handleAgentEnd must exist"); + + // Find the finally block within handleAgentEnd (search for the closing pattern) + const fnBlock = source.slice(fnIdx, source.indexOf("\n// ─── ", fnIdx + 100)); + assert.ok( + fnBlock.includes("pendingAgentEndRetry"), + "handleAgentEnd finally block must check pendingAgentEndRetry", + ); + assert.ok( + fnBlock.includes("setImmediate"), + "deferred retry must use setImmediate to avoid stack overflow (#1072)", + ); + assert.ok( + fnBlock.includes("handleAgentEnd(ctx, pi)"), + "deferred retry must call handleAgentEnd recursively (#1072)", + ); +}); + +// ── Regression: reentrancy guard must NOT silently return ───────────────────── + +test("reentrancy guard references issue #1072", () => { + const source = getAutoTsSource(); + const fnIdx = source.indexOf("export async function handleAgentEnd"); + const guardBlock = source.slice(fnIdx, fnIdx + 800); + assert.ok( + guardBlock.includes("1072"), + "reentrancy guard comment must reference #1072 for traceability", + ); +});