fix: prevent summarizing phase stall by retrying dropped agent_end events (#1072)
When handleAgentEnd dispatches a sub-unit (via hooks, triage, or quick-task early-dispatch paths) and that unit completes before handleAgentEnd returns, the resulting agent_end event is silently dropped by the reentrancy guard. This leaves auto-mode active but permanently stalled — no unit running, no watchdog set, process at high CPU doing nothing. Add a pendingAgentEndRetry flag to AutoSession that the reentrancy guard sets when it drops an agent_end event. The finally block in handleAgentEnd checks this flag and schedules a deferred retry via setImmediate, ensuring the completed unit's agent_end is always processed.
This commit is contained in:
parent
94be09482f
commit
68a999ebde
3 changed files with 135 additions and 1 deletions
|
|
@ -1163,7 +1163,15 @@ export async function handleAgentEnd(
|
|||
pi: ExtensionAPI,
|
||||
): Promise<void> {
|
||||
if (!s.active || !s.cmdCtx) return;
|
||||
if (s.handlingAgentEnd) return;
|
||||
if (s.handlingAgentEnd) {
|
||||
// Another agent_end arrived while we're still processing the previous one.
|
||||
// This happens when a unit dispatched inside handleAgentEnd (e.g. via hooks,
|
||||
// triage, or quick-task early-dispatch paths) completes before the outer
|
||||
// handleAgentEnd returns. Queue a retry so the completed unit's agent_end
|
||||
// is not silently dropped (#1072).
|
||||
s.pendingAgentEndRetry = true;
|
||||
return;
|
||||
}
|
||||
s.handlingAgentEnd = true;
|
||||
|
||||
try {
|
||||
|
|
@ -1888,6 +1896,23 @@ export async function handleAgentEnd(
|
|||
|
||||
} finally {
|
||||
s.handlingAgentEnd = false;
|
||||
|
||||
// If an agent_end event was dropped by the reentrancy guard while we were
|
||||
// processing, re-enter handleAgentEnd on the next microtask. This prevents
|
||||
// the summarizing phase stall (#1072) where a unit dispatched inside
|
||||
// handleAgentEnd (hooks, triage, quick-task) completes before we return,
|
||||
// and its agent_end is silently dropped — leaving auto-mode active but
|
||||
// permanently stalled with no unit running and no watchdog set.
|
||||
if (s.pendingAgentEndRetry) {
|
||||
s.pendingAgentEndRetry = false;
|
||||
setImmediate(() => {
|
||||
handleAgentEnd(ctx, pi).catch((err) => {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
ctx.ui.notify(`Deferred agent_end retry failed: ${msg}`, "error");
|
||||
pauseAuto(ctx, pi).catch(() => {});
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -112,6 +112,7 @@ export class AutoSession {
|
|||
|
||||
// ── Guards ───────────────────────────────────────────────────────────────
|
||||
handlingAgentEnd = false;
|
||||
pendingAgentEndRetry = false;
|
||||
dispatching = false;
|
||||
skipDepth = 0;
|
||||
readonly recentlyEvictedKeys = new Set<string>();
|
||||
|
|
@ -198,6 +199,7 @@ export class AutoSession {
|
|||
|
||||
// Guards
|
||||
this.handlingAgentEnd = false;
|
||||
this.pendingAgentEndRetry = false;
|
||||
this.dispatching = false;
|
||||
this.skipDepth = 0;
|
||||
this.recentlyEvictedKeys.clear();
|
||||
|
|
|
|||
107
src/resources/extensions/gsd/tests/agent-end-retry.test.ts
Normal file
107
src/resources/extensions/gsd/tests/agent-end-retry.test.ts
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
/**
|
||||
* agent-end-retry.test.ts — Verifies the deferred agent_end retry mechanism (#1072).
|
||||
*
|
||||
* When handleAgentEnd is already running and a second agent_end event fires
|
||||
* (e.g. a hook/triage/quick-task unit dispatched inside handleAgentEnd completes
|
||||
* before it returns), the reentrancy guard must not silently drop the event.
|
||||
* Instead, it should queue a retry via pendingAgentEndRetry so the completed
|
||||
* unit's agent_end is processed after the current handler finishes.
|
||||
*
|
||||
* Without this, auto-mode can stall permanently in the "summarizing" phase
|
||||
* with no unit running and no watchdog set.
|
||||
*/
|
||||
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { join, dirname } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const AUTO_TS_PATH = join(__dirname, "..", "auto.ts");
|
||||
const SESSION_TS_PATH = join(__dirname, "..", "auto", "session.ts");
|
||||
|
||||
function getAutoTsSource(): string {
|
||||
return readFileSync(AUTO_TS_PATH, "utf-8");
|
||||
}
|
||||
|
||||
function getSessionTsSource(): string {
|
||||
return readFileSync(SESSION_TS_PATH, "utf-8");
|
||||
}
|
||||
|
||||
// ── AutoSession must declare pendingAgentEndRetry ────────────────────────────
|
||||
|
||||
test("AutoSession declares pendingAgentEndRetry field", () => {
|
||||
const source = getSessionTsSource();
|
||||
assert.ok(
|
||||
source.includes("pendingAgentEndRetry"),
|
||||
"AutoSession (auto/session.ts) must declare pendingAgentEndRetry field for deferred retry",
|
||||
);
|
||||
});
|
||||
|
||||
test("AutoSession resets pendingAgentEndRetry in reset()", () => {
|
||||
const source = getSessionTsSource();
|
||||
// Find the reset() method — it's declared as "reset(): void {"
|
||||
const resetIdx = source.indexOf("reset(): void");
|
||||
assert.ok(resetIdx > -1, "AutoSession must have a reset() method");
|
||||
const resetBlock = source.slice(resetIdx, resetIdx + 3000);
|
||||
assert.ok(
|
||||
resetBlock.includes("pendingAgentEndRetry"),
|
||||
"reset() must clear pendingAgentEndRetry",
|
||||
);
|
||||
});
|
||||
|
||||
// ── handleAgentEnd reentrancy guard must queue retry ─────────────────────────
|
||||
|
||||
test("handleAgentEnd sets pendingAgentEndRetry when reentrant", () => {
|
||||
const source = getAutoTsSource();
|
||||
// Find the handleAgentEnd function
|
||||
const fnIdx = source.indexOf("export async function handleAgentEnd");
|
||||
assert.ok(fnIdx > -1, "handleAgentEnd must exist in auto.ts");
|
||||
|
||||
// The reentrancy guard section (within ~500 chars of the function start)
|
||||
const guardBlock = source.slice(fnIdx, fnIdx + 800);
|
||||
assert.ok(
|
||||
guardBlock.includes("s.handlingAgentEnd"),
|
||||
"handleAgentEnd must check s.handlingAgentEnd",
|
||||
);
|
||||
assert.ok(
|
||||
guardBlock.includes("pendingAgentEndRetry = true"),
|
||||
"reentrancy guard must set pendingAgentEndRetry = true instead of silently dropping (#1072)",
|
||||
);
|
||||
});
|
||||
|
||||
// ── finally block must process pendingAgentEndRetry ──────────────────────────
|
||||
|
||||
test("handleAgentEnd finally block retries if pendingAgentEndRetry is set", () => {
|
||||
const source = getAutoTsSource();
|
||||
const fnIdx = source.indexOf("export async function handleAgentEnd");
|
||||
assert.ok(fnIdx > -1, "handleAgentEnd must exist");
|
||||
|
||||
// Find the finally block within handleAgentEnd (search for the closing pattern)
|
||||
const fnBlock = source.slice(fnIdx, source.indexOf("\n// ─── ", fnIdx + 100));
|
||||
assert.ok(
|
||||
fnBlock.includes("pendingAgentEndRetry"),
|
||||
"handleAgentEnd finally block must check pendingAgentEndRetry",
|
||||
);
|
||||
assert.ok(
|
||||
fnBlock.includes("setImmediate"),
|
||||
"deferred retry must use setImmediate to avoid stack overflow (#1072)",
|
||||
);
|
||||
assert.ok(
|
||||
fnBlock.includes("handleAgentEnd(ctx, pi)"),
|
||||
"deferred retry must call handleAgentEnd recursively (#1072)",
|
||||
);
|
||||
});
|
||||
|
||||
// ── Regression: reentrancy guard must NOT silently return ─────────────────────
|
||||
|
||||
test("reentrancy guard references issue #1072", () => {
|
||||
const source = getAutoTsSource();
|
||||
const fnIdx = source.indexOf("export async function handleAgentEnd");
|
||||
const guardBlock = source.slice(fnIdx, fnIdx + 800);
|
||||
assert.ok(
|
||||
guardBlock.includes("1072"),
|
||||
"reentrancy guard comment must reference #1072 for traceability",
|
||||
);
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue