fix(gsd): add escalation and unit-detach guards to finalize timeout handlers
Address adversarial review findings: 1. Timed-out pre/post verification continues running in background and can mutate s.currentUnit for the wrong unit. Fix: null out s.currentUnit on timeout so late async completions are harmless (all side effects in postUnitPreVerification guard on s.currentUnit). 2. Finalize timeouts were treated as successful iterations, resetting consecutiveErrors and enabling silent infinite churn. Fix: add consecutiveFinalizeTimeouts counter to LoopState, increment on each timeout, hard-stop auto-mode after MAX_FINALIZE_TIMEOUTS (3) consecutive timeouts. Reset to 0 on successful finalize. Both fixes apply symmetrically to pre and post verification timeouts. Refs #3757
This commit is contained in:
parent
32a7af0513
commit
0ca76f6813
4 changed files with 114 additions and 5 deletions
|
|
@ -46,7 +46,7 @@ export async function autoLoop(
|
|||
): Promise<void> {
|
||||
debugLog("autoLoop", { phase: "enter" });
|
||||
let iteration = 0;
|
||||
const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0 };
|
||||
const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 };
|
||||
let consecutiveErrors = 0;
|
||||
const recentErrorMessages: string[] = [];
|
||||
|
||||
|
|
@ -247,7 +247,7 @@ export async function autoLoop(
|
|||
|
||||
// ── Phase 5: Finalize ───────────────────────────────────────────────
|
||||
|
||||
const finalizeResult = await runFinalize(ic, iterData, sidecarItem);
|
||||
const finalizeResult = await runFinalize(ic, iterData, loopState, sidecarItem);
|
||||
if (finalizeResult.action === "break") break;
|
||||
if (finalizeResult.action === "continue") continue;
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import type { PostUnitContext, PreVerificationOpts } from "../auto-post-unit.js"
|
|||
import {
|
||||
MAX_RECOVERY_CHARS,
|
||||
BUDGET_THRESHOLDS,
|
||||
MAX_FINALIZE_TIMEOUTS,
|
||||
type PhaseResult,
|
||||
type IterationContext,
|
||||
type LoopState,
|
||||
|
|
@ -1427,6 +1428,7 @@ export async function runUnitPhase(
|
|||
export async function runFinalize(
|
||||
ic: IterationContext,
|
||||
iterData: IterationData,
|
||||
loopState: LoopState,
|
||||
sidecarItem?: SidecarItem,
|
||||
): Promise<PhaseResult> {
|
||||
const { ctx, pi, s, deps } = ic;
|
||||
|
|
@ -1453,12 +1455,19 @@ export async function runFinalize(
|
|||
// Timeout guard: if postUnitPreVerification hangs (e.g., safety harness
|
||||
// deadlock, browser teardown hang, worktree sync stall), force-continue
|
||||
// after timeout so the auto-loop is not permanently frozen (#3757).
|
||||
//
|
||||
// On timeout, null out s.currentUnit so the timed-out task's late async
|
||||
// mutations are harmless — postUnitPreVerification guards all side effects
|
||||
// behind `if (s.currentUnit)`. The next iteration sets a fresh currentUnit.
|
||||
// Sidecar items use lightweight pre-verification opts
|
||||
const preVerificationOpts: PreVerificationOpts | undefined = sidecarItem
|
||||
? sidecarItem.kind === "hook"
|
||||
? { skipSettleDelay: true, skipWorktreeSync: true }
|
||||
: { skipSettleDelay: true }
|
||||
: undefined;
|
||||
const preUnitSnapshot = s.currentUnit
|
||||
? { type: s.currentUnit.type, id: s.currentUnit.id, startedAt: s.currentUnit.startedAt }
|
||||
: null;
|
||||
const preResultGuard = await withTimeout(
|
||||
deps.postUnitPreVerification(postUnitCtx, preVerificationOpts),
|
||||
FINALIZE_PRE_TIMEOUT_MS,
|
||||
|
|
@ -1466,14 +1475,29 @@ export async function runFinalize(
|
|||
);
|
||||
|
||||
if (preResultGuard.timedOut) {
|
||||
// Detach session from the timed-out unit so late async completions
|
||||
// cannot mutate state for the next unit (#3757).
|
||||
s.currentUnit = null;
|
||||
loopState.consecutiveFinalizeTimeouts++;
|
||||
debugLog("autoLoop", {
|
||||
phase: "pre-verification-timeout",
|
||||
iteration: ic.iteration,
|
||||
unitType: iterData.unitType,
|
||||
unitId: iterData.unitId,
|
||||
consecutiveTimeouts: loopState.consecutiveFinalizeTimeouts,
|
||||
});
|
||||
|
||||
if (loopState.consecutiveFinalizeTimeouts >= MAX_FINALIZE_TIMEOUTS) {
|
||||
ctx.ui.notify(
|
||||
`postUnitPreVerification timed out ${loopState.consecutiveFinalizeTimeouts} consecutive times — stopping auto-mode to prevent budget waste`,
|
||||
"error",
|
||||
);
|
||||
await deps.stopAuto(ctx, pi, `${loopState.consecutiveFinalizeTimeouts} consecutive finalize timeouts`);
|
||||
return { action: "break", reason: "finalize-timeout-escalation" };
|
||||
}
|
||||
|
||||
ctx.ui.notify(
|
||||
`postUnitPreVerification timed out after ${FINALIZE_PRE_TIMEOUT_MS / 1000}s for ${iterData.unitType} ${iterData.unitId} — continuing to next iteration`,
|
||||
`postUnitPreVerification timed out after ${FINALIZE_PRE_TIMEOUT_MS / 1000}s for ${iterData.unitType} ${iterData.unitId} (${loopState.consecutiveFinalizeTimeouts}/${MAX_FINALIZE_TIMEOUTS}) — continuing to next iteration`,
|
||||
"warning",
|
||||
);
|
||||
return { action: "next", data: undefined as void };
|
||||
|
|
@ -1548,14 +1572,29 @@ export async function runFinalize(
|
|||
);
|
||||
|
||||
if (postResultGuard.timedOut) {
|
||||
// Detach session from the timed-out unit so late async completions
|
||||
// cannot mutate state for the next unit (#3757).
|
||||
s.currentUnit = null;
|
||||
loopState.consecutiveFinalizeTimeouts++;
|
||||
debugLog("autoLoop", {
|
||||
phase: "post-verification-timeout",
|
||||
iteration: ic.iteration,
|
||||
unitType: iterData.unitType,
|
||||
unitId: iterData.unitId,
|
||||
consecutiveTimeouts: loopState.consecutiveFinalizeTimeouts,
|
||||
});
|
||||
|
||||
if (loopState.consecutiveFinalizeTimeouts >= MAX_FINALIZE_TIMEOUTS) {
|
||||
ctx.ui.notify(
|
||||
`postUnitPostVerification timed out ${loopState.consecutiveFinalizeTimeouts} consecutive times — stopping auto-mode to prevent budget waste`,
|
||||
"error",
|
||||
);
|
||||
await deps.stopAuto(ctx, pi, `${loopState.consecutiveFinalizeTimeouts} consecutive finalize timeouts`);
|
||||
return { action: "break", reason: "finalize-timeout-escalation" };
|
||||
}
|
||||
|
||||
ctx.ui.notify(
|
||||
`postUnitPostVerification timed out after ${FINALIZE_POST_TIMEOUT_MS / 1000}s for ${iterData.unitType} ${iterData.unitId} — continuing to next iteration`,
|
||||
`postUnitPostVerification timed out after ${FINALIZE_POST_TIMEOUT_MS / 1000}s for ${iterData.unitType} ${iterData.unitId} (${loopState.consecutiveFinalizeTimeouts}/${MAX_FINALIZE_TIMEOUTS}) — continuing to next iteration`,
|
||||
"warning",
|
||||
);
|
||||
return { action: "next", data: undefined as void };
|
||||
|
|
@ -1577,6 +1616,9 @@ export async function runFinalize(
|
|||
return { action: "break", reason: "step-wizard" };
|
||||
}
|
||||
|
||||
// Both pre and post verification completed without timeout — reset counter
|
||||
loopState.consecutiveFinalizeTimeouts = 0;
|
||||
|
||||
return { action: "next", data: undefined as void };
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -91,8 +91,13 @@ export interface IterationContext {
|
|||
export interface LoopState {
|
||||
recentUnits: Array<{ key: string; error?: string }>;
|
||||
stuckRecoveryAttempts: number;
|
||||
/** Consecutive finalize timeout count — stops auto-mode after threshold. */
|
||||
consecutiveFinalizeTimeouts: number;
|
||||
}
|
||||
|
||||
/** Max consecutive finalize timeouts before hard-stopping auto-mode. */
|
||||
export const MAX_FINALIZE_TIMEOUTS = 3;
|
||||
|
||||
export interface PreDispatchData {
|
||||
state: GSDState;
|
||||
mid: string;
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ import {
|
|||
FINALIZE_PRE_TIMEOUT_MS,
|
||||
FINALIZE_POST_TIMEOUT_MS,
|
||||
} from "../auto/finalize-timeout.ts";
|
||||
import { MAX_FINALIZE_TIMEOUTS } from "../auto/types.ts";
|
||||
|
||||
const { assertTrue, assertEq, report } = createTestContext();
|
||||
|
||||
|
|
@ -148,7 +149,7 @@ const { assertTrue, assertEq, report } = createTestContext();
|
|||
const fnIdx = phasesSource.indexOf("export async function runFinalize(");
|
||||
assertTrue(fnIdx > 0, "runFinalize function should exist in phases.ts");
|
||||
|
||||
const fnBody = phasesSource.slice(fnIdx, fnIdx + 5000);
|
||||
const fnBody = phasesSource.slice(fnIdx, fnIdx + 8000);
|
||||
|
||||
// postUnitPreVerification must be wrapped in withTimeout
|
||||
const preTimeoutIdx = fnBody.indexOf("withTimeout(");
|
||||
|
|
@ -176,4 +177,65 @@ const { assertTrue, assertEq, report } = createTestContext();
|
|||
);
|
||||
}
|
||||
|
||||
// ═══ Test: MAX_FINALIZE_TIMEOUTS is defined and reasonable ═══════════════════
|
||||
|
||||
{
|
||||
console.log("\n=== #3757: MAX_FINALIZE_TIMEOUTS is defined and reasonable ===");
|
||||
|
||||
assertTrue(
|
||||
typeof MAX_FINALIZE_TIMEOUTS === "number",
|
||||
"MAX_FINALIZE_TIMEOUTS should be a number",
|
||||
);
|
||||
assertTrue(
|
||||
MAX_FINALIZE_TIMEOUTS >= 2,
|
||||
`threshold should be >= 2 (got ${MAX_FINALIZE_TIMEOUTS})`,
|
||||
);
|
||||
assertTrue(
|
||||
MAX_FINALIZE_TIMEOUTS <= 10,
|
||||
`threshold should be <= 10 (got ${MAX_FINALIZE_TIMEOUTS})`,
|
||||
);
|
||||
}
|
||||
|
||||
// ═══ Test: timeout handlers escalate after consecutive timeouts ══════════════
|
||||
|
||||
{
|
||||
console.log("\n=== #3757: timeout handlers escalate and detach currentUnit ===");
|
||||
|
||||
const { readFileSync } = await import("node:fs");
|
||||
const phasesSource = readFileSync(
|
||||
new URL("../auto/phases.ts", import.meta.url),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
const fnIdx = phasesSource.indexOf("export async function runFinalize(");
|
||||
const fnBody = phasesSource.slice(fnIdx, fnIdx + 8000);
|
||||
|
||||
// Both timeout handlers should increment consecutiveFinalizeTimeouts
|
||||
const incrementCount = (fnBody.match(/consecutiveFinalizeTimeouts\+\+/g) || []).length;
|
||||
assertTrue(
|
||||
incrementCount >= 2,
|
||||
`should increment consecutiveFinalizeTimeouts in both pre and post handlers (found ${incrementCount})`,
|
||||
);
|
||||
|
||||
// Both timeout handlers should check MAX_FINALIZE_TIMEOUTS for escalation
|
||||
const escalationCount = (fnBody.match(/MAX_FINALIZE_TIMEOUTS/g) || []).length;
|
||||
assertTrue(
|
||||
escalationCount >= 2,
|
||||
`should check MAX_FINALIZE_TIMEOUTS in both handlers (found ${escalationCount})`,
|
||||
);
|
||||
|
||||
// Both timeout handlers should null out s.currentUnit to prevent late mutations
|
||||
const detachCount = (fnBody.match(/s\.currentUnit\s*=\s*null/g) || []).length;
|
||||
assertTrue(
|
||||
detachCount >= 2,
|
||||
`should detach s.currentUnit in both timeout handlers (found ${detachCount})`,
|
||||
);
|
||||
|
||||
// Successful finalize should reset the counter
|
||||
assertTrue(
|
||||
fnBody.includes("consecutiveFinalizeTimeouts = 0"),
|
||||
"should reset consecutiveFinalizeTimeouts on successful finalize",
|
||||
);
|
||||
}
|
||||
|
||||
report();
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue