fix(gsd): add timeout guard around postUnitPreVerification to prevent auto-loop hang
postUnitPostVerification already has a 60s timeout guard (#2344) but postUnitPreVerification was called with bare await — if any async operation inside it never resolves (browser teardown, worktree sync, safety harness validation), the auto-loop freezes permanently with no error, notification, or recovery. Wrap postUnitPreVerification in the same withTimeout() pattern with a dedicated FINALIZE_PRE_TIMEOUT_MS constant. On timeout, log a warning and force-continue to the next iteration. Closes #3757
This commit is contained in:
parent
94f9b311d3
commit
32a7af0513
3 changed files with 91 additions and 2 deletions
|
|
@ -7,6 +7,9 @@
|
|||
* Leaf module — no imports from auto/ to avoid circular dependencies.
|
||||
*/
|
||||
|
||||
/** Timeout for postUnitPreVerification in runFinalize (ms). */
|
||||
export const FINALIZE_PRE_TIMEOUT_MS = 60_000;
|
||||
|
||||
/** Timeout for postUnitPostVerification in runFinalize (ms). */
|
||||
export const FINALIZE_POST_TIMEOUT_MS = 60_000;
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import { gsdRoot } from "../paths.js";
|
|||
import { atomicWriteSync } from "../atomic-write.js";
|
||||
import { verifyExpectedArtifact, diagnoseExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.js";
|
||||
import { writeUnitRuntimeRecord } from "../unit-runtime.js";
|
||||
import { withTimeout, FINALIZE_POST_TIMEOUT_MS } from "./finalize-timeout.js";
|
||||
import { withTimeout, FINALIZE_PRE_TIMEOUT_MS, FINALIZE_POST_TIMEOUT_MS } from "./finalize-timeout.js";
|
||||
import { getEligibleSlices } from "../slice-parallel-eligibility.js";
|
||||
import { startSliceParallel } from "../slice-parallel-orchestrator.js";
|
||||
import { isDbAvailable, getMilestoneSlices } from "../gsd-db.js";
|
||||
|
|
@ -1450,13 +1450,36 @@ export async function runFinalize(
|
|||
};
|
||||
|
||||
// Pre-verification processing (commit, doctor, state rebuild, etc.)
|
||||
// Timeout guard: if postUnitPreVerification hangs (e.g., safety harness
|
||||
// deadlock, browser teardown hang, worktree sync stall), force-continue
|
||||
// after timeout so the auto-loop is not permanently frozen (#3757).
|
||||
// Sidecar items use lightweight pre-verification opts
|
||||
const preVerificationOpts: PreVerificationOpts | undefined = sidecarItem
|
||||
? sidecarItem.kind === "hook"
|
||||
? { skipSettleDelay: true, skipWorktreeSync: true }
|
||||
: { skipSettleDelay: true }
|
||||
: undefined;
|
||||
const preResult = await deps.postUnitPreVerification(postUnitCtx, preVerificationOpts);
|
||||
const preResultGuard = await withTimeout(
|
||||
deps.postUnitPreVerification(postUnitCtx, preVerificationOpts),
|
||||
FINALIZE_PRE_TIMEOUT_MS,
|
||||
"postUnitPreVerification",
|
||||
);
|
||||
|
||||
if (preResultGuard.timedOut) {
|
||||
debugLog("autoLoop", {
|
||||
phase: "pre-verification-timeout",
|
||||
iteration: ic.iteration,
|
||||
unitType: iterData.unitType,
|
||||
unitId: iterData.unitId,
|
||||
});
|
||||
ctx.ui.notify(
|
||||
`postUnitPreVerification timed out after ${FINALIZE_PRE_TIMEOUT_MS / 1000}s for ${iterData.unitType} ${iterData.unitId} — continuing to next iteration`,
|
||||
"warning",
|
||||
);
|
||||
return { action: "next", data: undefined as void };
|
||||
}
|
||||
|
||||
const preResult = preResultGuard.value;
|
||||
if (preResult === "dispatched") {
|
||||
debugLog("autoLoop", {
|
||||
phase: "exit",
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@
|
|||
import { createTestContext } from "./test-helpers.ts";
|
||||
import {
|
||||
withTimeout,
|
||||
FINALIZE_PRE_TIMEOUT_MS,
|
||||
FINALIZE_POST_TIMEOUT_MS,
|
||||
} from "../auto/finalize-timeout.ts";
|
||||
|
||||
|
|
@ -78,6 +79,25 @@ const { assertTrue, assertEq, report } = createTestContext();
|
|||
assertTrue(caught, "rejection should propagate");
|
||||
}
|
||||
|
||||
// ═══ Test: FINALIZE_PRE_TIMEOUT_MS is defined and reasonable ═════════════════
|
||||
|
||||
{
|
||||
console.log("\n=== #3757: pre-verification timeout constant is defined and reasonable ===");
|
||||
|
||||
assertTrue(
|
||||
typeof FINALIZE_PRE_TIMEOUT_MS === "number",
|
||||
"FINALIZE_PRE_TIMEOUT_MS should be a number",
|
||||
);
|
||||
assertTrue(
|
||||
FINALIZE_PRE_TIMEOUT_MS >= 30_000,
|
||||
`pre timeout should be >= 30s (got ${FINALIZE_PRE_TIMEOUT_MS}ms)`,
|
||||
);
|
||||
assertTrue(
|
||||
FINALIZE_PRE_TIMEOUT_MS <= 120_000,
|
||||
`pre timeout should be <= 120s (got ${FINALIZE_PRE_TIMEOUT_MS}ms)`,
|
||||
);
|
||||
}
|
||||
|
||||
// ═══ Test: FINALIZE_POST_TIMEOUT_MS is defined and reasonable ═════════════════
|
||||
|
||||
{
|
||||
|
|
@ -113,4 +133,47 @@ const { assertTrue, assertEq, report } = createTestContext();
|
|||
assertEq(result.timedOut, false, "should not time out");
|
||||
}
|
||||
|
||||
// ═══ Test: runFinalize wraps BOTH pre and post verification with withTimeout ═
|
||||
|
||||
{
|
||||
console.log("\n=== #3757: runFinalize wraps preVerification with timeout guard ===");
|
||||
|
||||
const { readFileSync } = await import("node:fs");
|
||||
const phasesSource = readFileSync(
|
||||
new URL("../auto/phases.ts", import.meta.url),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
// Find the runFinalize function body
|
||||
const fnIdx = phasesSource.indexOf("export async function runFinalize(");
|
||||
assertTrue(fnIdx > 0, "runFinalize function should exist in phases.ts");
|
||||
|
||||
const fnBody = phasesSource.slice(fnIdx, fnIdx + 5000);
|
||||
|
||||
// postUnitPreVerification must be wrapped in withTimeout
|
||||
const preTimeoutIdx = fnBody.indexOf("withTimeout(");
|
||||
assertTrue(preTimeoutIdx > 0, "withTimeout should appear in runFinalize");
|
||||
|
||||
const preVerIdx = fnBody.indexOf("postUnitPreVerification");
|
||||
assertTrue(preVerIdx > 0, "postUnitPreVerification should appear in runFinalize");
|
||||
|
||||
// The first withTimeout should wrap postUnitPreVerification (not postUnitPostVerification)
|
||||
const firstWithTimeout = fnBody.slice(preTimeoutIdx, preTimeoutIdx + 200);
|
||||
assertTrue(
|
||||
firstWithTimeout.includes("postUnitPreVerification"),
|
||||
"first withTimeout in runFinalize should wrap postUnitPreVerification",
|
||||
);
|
||||
|
||||
// postUnitPostVerification must also be wrapped
|
||||
const postVerIdx = fnBody.indexOf("postUnitPostVerification");
|
||||
assertTrue(postVerIdx > 0, "postUnitPostVerification should appear in runFinalize");
|
||||
|
||||
// Count withTimeout occurrences — should be at least 2 (pre + post)
|
||||
const timeoutCount = (fnBody.match(/withTimeout\(/g) || []).length;
|
||||
assertTrue(
|
||||
timeoutCount >= 2,
|
||||
`runFinalize should have at least 2 withTimeout guards (found ${timeoutCount})`,
|
||||
);
|
||||
}
|
||||
|
||||
report();
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue