fix: skip verification retry on spawn infra errors (ETIMEDOUT, ENOENT) (#1340)

This commit is contained in:
deseltrus 2026-03-19 14:39:13 +01:00 committed by GitHub
parent d7bf3d4e72
commit e6ceb8dfe8
4 changed files with 112 additions and 4 deletions

View file

@ -175,7 +175,23 @@ export async function runPostUnitVerification(
s.verificationRetryCount.delete(s.currentUnit.id);
s.pendingVerificationRetry = null;
return "continue";
} else if (result.discoverySource === "package-json") {
}
// Check if all failures are infra errors (ETIMEDOUT, ENOENT, etc.).
// Infra errors are transient OS-level problems the agent cannot fix —
// retrying the entire task is wasteful and creates phantom failures.
const failedChecks = result.checks.filter(c => c.exitCode !== 0);
const allInfraErrors = failedChecks.length > 0 && failedChecks.every(c => c.infraError === true);
if (allInfraErrors) {
const infraNames = failedChecks.map(f => f.command).join(", ");
ctx.ui.notify(`Verification gate: infra error (${infraNames}) — skipping retry, not a code issue`, "warning");
process.stderr.write(`verification-gate: all ${failedChecks.length} failure(s) are infra errors — treating as transient, no retry\n`);
s.verificationRetryCount.delete(s.currentUnit.id);
s.pendingVerificationRetry = null;
return "continue";
}
if (result.discoverySource === "package-json") {
// Auto-discovered checks from package.json may fail on pre-existing errors
// that the current task didn't introduce. Don't trigger the retry loop —
// log a warning and let the task proceed (#1186).
@ -189,7 +205,9 @@ export async function runPostUnitVerification(
s.verificationRetryCount.delete(s.currentUnit.id);
s.pendingVerificationRetry = null;
return "continue";
} else if (autoFixEnabled && attempt + 1 <= maxRetries) {
}
if (autoFixEnabled && attempt + 1 <= maxRetries) {
const nextAttempt = attempt + 1;
s.verificationRetryCount.set(s.currentUnit.id, nextAttempt);
s.pendingVerificationRetry = {

View file

@ -261,6 +261,71 @@ test("verification-gate: each check has durationMs", () => {
}
});
// ─── Infra Error Tagging Tests ───────────────────────────────────────────────
test("verification-gate: spawnSync ETIMEDOUT → infraError: true on the check", () => {
const tmp = makeTempDir("vg-etimedout");
try {
// Use a short timeout against a long sleep to guarantee ETIMEDOUT
const result = runVerificationGate({
basePath: tmp,
unitId: "T01",
cwd: tmp,
preferenceCommands: ["sleep 60"],
commandTimeoutMs: 200,
});
assert.equal(result.passed, false);
assert.equal(result.checks.length, 1);
assert.ok(result.checks[0].exitCode !== 0, "should have non-zero exit code");
assert.equal(result.checks[0].infraError, true, "ETIMEDOUT should be tagged as infraError");
} finally {
rmSync(tmp, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 });
}
});
test("verification-gate: real command failure does NOT have infraError", () => {
const tmp = makeTempDir("vg-real-fail");
try {
const result = runVerificationGate({
basePath: tmp,
unitId: "T01",
cwd: tmp,
// Cross-platform: node with --eval flag and no shell-sensitive characters
preferenceCommands: ["node --eval \"process.exitCode=1\""],
});
assert.equal(result.passed, false);
assert.equal(result.checks.length, 1);
assert.equal(result.checks[0].exitCode, 1);
assert.equal(result.checks[0].infraError, undefined, "real failure should not be tagged as infraError");
} finally {
rmSync(tmp, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 });
}
});
test("verification-gate: mixed infra + real failure — only infra check is tagged", () => {
const tmp = makeTempDir("vg-mixed-infra");
try {
// Use a timeout that kills "sleep 60" but lets "node --eval" complete (~80ms).
// The gate applies the same timeout to each command sequentially.
const result = runVerificationGate({
basePath: tmp,
unitId: "T01",
cwd: tmp,
preferenceCommands: ["sleep 60", "node --eval \"process.exitCode=2\""],
commandTimeoutMs: 500,
});
assert.equal(result.passed, false);
assert.equal(result.checks.length, 2);
// First check: ETIMEDOUT → infraError
assert.equal(result.checks[0].infraError, true, "timed-out command should be infraError");
// Second check: real exit 2 → no infraError
assert.equal(result.checks[1].exitCode, 2);
assert.equal(result.checks[1].infraError, undefined, "real failure should not be infraError");
} finally {
rmSync(tmp, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 });
}
});
// ─── Preference Validation Tests ─────────────────────────────────────────────
test("verification-gate: validatePreferences accepts valid verification keys", () => {

View file

@ -56,6 +56,10 @@ export interface VerificationCheck {
stderr: string;
durationMs: number;
blocking: boolean; // true for preference/task-plan sources, false for package-json (advisory only)
/** True when the failure was a spawn/infra error (ETIMEDOUT, ENOENT, ENOMEM)
* rather than the command itself failing. Infra errors are transient and
* should not trigger auto-fix retries the agent cannot fix the OS. */
infraError?: boolean;
}
/** A runtime error captured from bg-shell processes or browser console */

View file

@ -232,13 +232,20 @@ export interface RunVerificationGateOptions {
commandTimeoutMs?: number;
}
/** Error codes from spawnSync that indicate infrastructure/OS-level failures
* rather than the command itself failing. These are transient the agent
* cannot fix them, so they should not trigger auto-fix retries. */
const INFRA_ERROR_CODES = new Set(["ETIMEDOUT", "ENOENT", "ENOMEM", "EMFILE", "ENFILE", "EAGAIN"]);
/**
* Run the verification gate: discover commands, execute each via spawnSync,
* and return a structured result.
*
* - All commands run sequentially regardless of individual pass/fail.
* - `passed` is true when every command exits 0 (or no commands are discovered).
* - `passed` is true when every blocking command exits 0 (or no commands are discovered).
* - stdout/stderr per command are truncated to 10 KB.
* - Spawn/infra errors (ETIMEDOUT, ENOENT, etc.) are tagged with `infraError: true`
* so the retry logic can distinguish "the OS couldn't run this" from "the tests failed".
*/
export function runVerificationGate(options: RunVerificationGateOptions): VerificationResult {
const timestamp = Date.now();
@ -279,12 +286,26 @@ export function runVerificationGate(options: RunVerificationGateOptions): Verifi
let stderr: string;
if (result.error) {
// Command not found or spawn failure
// Spawn infrastructure failure — OS-level, not a test failure.
// Tag with infraError so the retry logic can skip auto-fix attempts.
const errCode = (result.error as NodeJS.ErrnoException).code;
const isInfra = !!errCode && INFRA_ERROR_CODES.has(errCode);
exitCode = 127;
stderr = truncate(
(result.stderr || "") + "\n" + (result.error as Error).message,
MAX_OUTPUT_BYTES,
);
checks.push({
command,
exitCode,
stdout: truncate(result.stdout, MAX_OUTPUT_BYTES),
stderr,
durationMs,
blocking,
...(isInfra ? { infraError: true } : {}),
});
continue;
} else {
// status is null when killed by signal — treat as failure
exitCode = result.status ?? 1;