fix(gsd): add timeout guard around postUnitPreVerification to prevent auto-loop hang

postUnitPostVerification already has a 60s timeout guard (#2344) but
postUnitPreVerification was called with bare await — if any async
operation inside it never resolves (browser teardown, worktree sync,
safety harness validation), the auto-loop freezes permanently with no
error, notification, or recovery.

Wrap postUnitPreVerification in the same withTimeout() pattern with a
dedicated FINALIZE_PRE_TIMEOUT_MS constant. On timeout, log a warning
and force-continue to the next iteration.

Closes #3757
This commit is contained in:
Jeremy 2026-04-07 20:57:30 -05:00
parent 94f9b311d3
commit 32a7af0513
3 changed files with 91 additions and 2 deletions

View file

@ -7,6 +7,9 @@
* Leaf module no imports from auto/ to avoid circular dependencies.
*/
/** Timeout for postUnitPreVerification in runFinalize (ms). */
export const FINALIZE_PRE_TIMEOUT_MS = 60_000;
/** Timeout for postUnitPostVerification in runFinalize (ms). */
export const FINALIZE_POST_TIMEOUT_MS = 60_000;

View file

@ -33,7 +33,7 @@ import { gsdRoot } from "../paths.js";
import { atomicWriteSync } from "../atomic-write.js";
import { verifyExpectedArtifact, diagnoseExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.js";
import { writeUnitRuntimeRecord } from "../unit-runtime.js";
import { withTimeout, FINALIZE_POST_TIMEOUT_MS } from "./finalize-timeout.js";
import { withTimeout, FINALIZE_PRE_TIMEOUT_MS, FINALIZE_POST_TIMEOUT_MS } from "./finalize-timeout.js";
import { getEligibleSlices } from "../slice-parallel-eligibility.js";
import { startSliceParallel } from "../slice-parallel-orchestrator.js";
import { isDbAvailable, getMilestoneSlices } from "../gsd-db.js";
@ -1450,13 +1450,36 @@ export async function runFinalize(
};
// Pre-verification processing (commit, doctor, state rebuild, etc.)
// Timeout guard: if postUnitPreVerification hangs (e.g., safety harness
// deadlock, browser teardown hang, worktree sync stall), force-continue
// after timeout so the auto-loop is not permanently frozen (#3757).
// Sidecar items use lightweight pre-verification opts
const preVerificationOpts: PreVerificationOpts | undefined = sidecarItem
? sidecarItem.kind === "hook"
? { skipSettleDelay: true, skipWorktreeSync: true }
: { skipSettleDelay: true }
: undefined;
const preResult = await deps.postUnitPreVerification(postUnitCtx, preVerificationOpts);
const preResultGuard = await withTimeout(
deps.postUnitPreVerification(postUnitCtx, preVerificationOpts),
FINALIZE_PRE_TIMEOUT_MS,
"postUnitPreVerification",
);
if (preResultGuard.timedOut) {
debugLog("autoLoop", {
phase: "pre-verification-timeout",
iteration: ic.iteration,
unitType: iterData.unitType,
unitId: iterData.unitId,
});
ctx.ui.notify(
`postUnitPreVerification timed out after ${FINALIZE_PRE_TIMEOUT_MS / 1000}s for ${iterData.unitType} ${iterData.unitId} — continuing to next iteration`,
"warning",
);
return { action: "next", data: undefined as void };
}
const preResult = preResultGuard.value;
if (preResult === "dispatched") {
debugLog("autoLoop", {
phase: "exit",

View file

@ -19,6 +19,7 @@
import { createTestContext } from "./test-helpers.ts";
import {
withTimeout,
FINALIZE_PRE_TIMEOUT_MS,
FINALIZE_POST_TIMEOUT_MS,
} from "../auto/finalize-timeout.ts";
@ -78,6 +79,25 @@ const { assertTrue, assertEq, report } = createTestContext();
assertTrue(caught, "rejection should propagate");
}
// ═══ Test: FINALIZE_PRE_TIMEOUT_MS is defined and reasonable ═════════════════
{
console.log("\n=== #3757: pre-verification timeout constant is defined and reasonable ===");
assertTrue(
typeof FINALIZE_PRE_TIMEOUT_MS === "number",
"FINALIZE_PRE_TIMEOUT_MS should be a number",
);
assertTrue(
FINALIZE_PRE_TIMEOUT_MS >= 30_000,
`pre timeout should be >= 30s (got ${FINALIZE_PRE_TIMEOUT_MS}ms)`,
);
assertTrue(
FINALIZE_PRE_TIMEOUT_MS <= 120_000,
`pre timeout should be <= 120s (got ${FINALIZE_PRE_TIMEOUT_MS}ms)`,
);
}
// ═══ Test: FINALIZE_POST_TIMEOUT_MS is defined and reasonable ═════════════════
{
@ -113,4 +133,47 @@ const { assertTrue, assertEq, report } = createTestContext();
assertEq(result.timedOut, false, "should not time out");
}
// ═══ Test: runFinalize wraps BOTH pre and post verification with withTimeout ═
{
console.log("\n=== #3757: runFinalize wraps preVerification with timeout guard ===");
const { readFileSync } = await import("node:fs");
const phasesSource = readFileSync(
new URL("../auto/phases.ts", import.meta.url),
"utf-8",
);
// Find the runFinalize function body
const fnIdx = phasesSource.indexOf("export async function runFinalize(");
assertTrue(fnIdx > 0, "runFinalize function should exist in phases.ts");
const fnBody = phasesSource.slice(fnIdx, fnIdx + 5000);
// postUnitPreVerification must be wrapped in withTimeout
const preTimeoutIdx = fnBody.indexOf("withTimeout(");
assertTrue(preTimeoutIdx > 0, "withTimeout should appear in runFinalize");
const preVerIdx = fnBody.indexOf("postUnitPreVerification");
assertTrue(preVerIdx > 0, "postUnitPreVerification should appear in runFinalize");
// The first withTimeout should wrap postUnitPreVerification (not postUnitPostVerification)
const firstWithTimeout = fnBody.slice(preTimeoutIdx, preTimeoutIdx + 200);
assertTrue(
firstWithTimeout.includes("postUnitPreVerification"),
"first withTimeout in runFinalize should wrap postUnitPreVerification",
);
// postUnitPostVerification must also be wrapped
const postVerIdx = fnBody.indexOf("postUnitPostVerification");
assertTrue(postVerIdx > 0, "postUnitPostVerification should appear in runFinalize");
// Count withTimeout occurrences — should be at least 2 (pre + post)
const timeoutCount = (fnBody.match(/withTimeout\(/g) || []).length;
assertTrue(
timeoutCount >= 2,
`runFinalize should have at least 2 withTimeout guards (found ${timeoutCount})`,
);
}
report();