fix: prevent summarizing phase stall by retrying dropped agent_end events (#1072) (#1074)

When handleAgentEnd dispatches a sub-unit (via hooks, triage, or quick-task
early-dispatch paths) and that unit completes before handleAgentEnd returns,
the resulting agent_end event is silently dropped by the reentrancy guard.
This leaves auto-mode active but permanently stalled — no unit running, no
watchdog set, process at high CPU doing nothing.

Add a pendingAgentEndRetry flag to AutoSession that the reentrancy guard sets
when it drops an agent_end event. The finally block in handleAgentEnd checks
this flag and schedules a deferred retry via setImmediate, ensuring the
completed unit's agent_end is always processed.
This commit is contained in:
Jeremy McSpadden 2026-03-17 23:01:58 -05:00 committed by GitHub
parent 38b79d75a7
commit b2befe3628
3 changed files with 135 additions and 1 deletions

View file

@ -1172,7 +1172,15 @@ export async function handleAgentEnd(
pi: ExtensionAPI,
): Promise<void> {
if (!s.active || !s.cmdCtx) return;
if (s.handlingAgentEnd) return;
if (s.handlingAgentEnd) {
// Another agent_end arrived while we're still processing the previous one.
// This happens when a unit dispatched inside handleAgentEnd (e.g. via hooks,
// triage, or quick-task early-dispatch paths) completes before the outer
// handleAgentEnd returns. Queue a retry so the completed unit's agent_end
// is not silently dropped (#1072).
s.pendingAgentEndRetry = true;
return;
}
s.handlingAgentEnd = true;
try {
@ -1901,6 +1909,23 @@ export async function handleAgentEnd(
} finally {
s.handlingAgentEnd = false;
// If an agent_end event was dropped by the reentrancy guard while we were
// processing, re-enter handleAgentEnd on the next microtask. This prevents
// the summarizing phase stall (#1072) where a unit dispatched inside
// handleAgentEnd (hooks, triage, quick-task) completes before we return,
// and its agent_end is silently dropped — leaving auto-mode active but
// permanently stalled with no unit running and no watchdog set.
if (s.pendingAgentEndRetry) {
s.pendingAgentEndRetry = false;
setImmediate(() => {
handleAgentEnd(ctx, pi).catch((err) => {
const msg = err instanceof Error ? err.message : String(err);
ctx.ui.notify(`Deferred agent_end retry failed: ${msg}`, "error");
pauseAuto(ctx, pi).catch(() => {});
});
});
}
}
}

View file

@ -112,6 +112,7 @@ export class AutoSession {
// ── Guards ───────────────────────────────────────────────────────────────
handlingAgentEnd = false;
pendingAgentEndRetry = false;
dispatching = false;
skipDepth = 0;
readonly recentlyEvictedKeys = new Set<string>();
@ -198,6 +199,7 @@ export class AutoSession {
// Guards
this.handlingAgentEnd = false;
this.pendingAgentEndRetry = false;
this.dispatching = false;
this.skipDepth = 0;
this.recentlyEvictedKeys.clear();

View file

@ -0,0 +1,107 @@
/**
* agent-end-retry.test.ts Verifies the deferred agent_end retry mechanism (#1072).
*
* When handleAgentEnd is already running and a second agent_end event fires
* (e.g. a hook/triage/quick-task unit dispatched inside handleAgentEnd completes
* before it returns), the reentrancy guard must not silently drop the event.
* Instead, it should queue a retry via pendingAgentEndRetry so the completed
* unit's agent_end is processed after the current handler finishes.
*
* Without this, auto-mode can stall permanently in the "summarizing" phase
* with no unit running and no watchdog set.
*/
import test from "node:test";
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
const __dirname = dirname(fileURLToPath(import.meta.url));
const AUTO_TS_PATH = join(__dirname, "..", "auto.ts");
const SESSION_TS_PATH = join(__dirname, "..", "auto", "session.ts");
function getAutoTsSource(): string {
return readFileSync(AUTO_TS_PATH, "utf-8");
}
function getSessionTsSource(): string {
return readFileSync(SESSION_TS_PATH, "utf-8");
}
// ── AutoSession must declare pendingAgentEndRetry ────────────────────────────
test("AutoSession declares pendingAgentEndRetry field", () => {
const source = getSessionTsSource();
assert.ok(
source.includes("pendingAgentEndRetry"),
"AutoSession (auto/session.ts) must declare pendingAgentEndRetry field for deferred retry",
);
});
test("AutoSession resets pendingAgentEndRetry in reset()", () => {
const source = getSessionTsSource();
// Find the reset() method — it's declared as "reset(): void {"
const resetIdx = source.indexOf("reset(): void");
assert.ok(resetIdx > -1, "AutoSession must have a reset() method");
const resetBlock = source.slice(resetIdx, resetIdx + 3000);
assert.ok(
resetBlock.includes("pendingAgentEndRetry"),
"reset() must clear pendingAgentEndRetry",
);
});
// ── handleAgentEnd reentrancy guard must queue retry ─────────────────────────
test("handleAgentEnd sets pendingAgentEndRetry when reentrant", () => {
const source = getAutoTsSource();
// Find the handleAgentEnd function
const fnIdx = source.indexOf("export async function handleAgentEnd");
assert.ok(fnIdx > -1, "handleAgentEnd must exist in auto.ts");
// The reentrancy guard section (within ~500 chars of the function start)
const guardBlock = source.slice(fnIdx, fnIdx + 800);
assert.ok(
guardBlock.includes("s.handlingAgentEnd"),
"handleAgentEnd must check s.handlingAgentEnd",
);
assert.ok(
guardBlock.includes("pendingAgentEndRetry = true"),
"reentrancy guard must set pendingAgentEndRetry = true instead of silently dropping (#1072)",
);
});
// ── finally block must process pendingAgentEndRetry ──────────────────────────
test("handleAgentEnd finally block retries if pendingAgentEndRetry is set", () => {
const source = getAutoTsSource();
const fnIdx = source.indexOf("export async function handleAgentEnd");
assert.ok(fnIdx > -1, "handleAgentEnd must exist");
// Find the finally block within handleAgentEnd (search for the closing pattern)
const fnBlock = source.slice(fnIdx, source.indexOf("\n// ─── ", fnIdx + 100));
assert.ok(
fnBlock.includes("pendingAgentEndRetry"),
"handleAgentEnd finally block must check pendingAgentEndRetry",
);
assert.ok(
fnBlock.includes("setImmediate"),
"deferred retry must use setImmediate to avoid stack overflow (#1072)",
);
assert.ok(
fnBlock.includes("handleAgentEnd(ctx, pi)"),
"deferred retry must call handleAgentEnd recursively (#1072)",
);
});
// ── Regression: reentrancy guard must NOT silently return ─────────────────────
test("reentrancy guard references issue #1072", () => {
const source = getAutoTsSource();
const fnIdx = source.indexOf("export async function handleAgentEnd");
const guardBlock = source.slice(fnIdx, fnIdx + 800);
assert.ok(
guardBlock.includes("1072"),
"reentrancy guard comment must reference #1072 for traceability",
);
});