From e6ceb8dfe8fe363abdd4f01ede7ddc6e0967f334 Mon Sep 17 00:00:00 2001 From: deseltrus <101901449+deseltrus@users.noreply.github.com> Date: Thu, 19 Mar 2026 14:39:13 +0100 Subject: [PATCH] fix: skip verification retry on spawn infra errors (ETIMEDOUT, ENOENT) (#1340) --- .../extensions/gsd/auto-verification.ts | 22 ++++++- .../gsd/tests/verification-gate.test.ts | 65 +++++++++++++++++++ src/resources/extensions/gsd/types.ts | 4 ++ .../extensions/gsd/verification-gate.ts | 25 ++++++- 4 files changed, 112 insertions(+), 4 deletions(-) diff --git a/src/resources/extensions/gsd/auto-verification.ts b/src/resources/extensions/gsd/auto-verification.ts index e4dd26d85..6f794bc66 100644 --- a/src/resources/extensions/gsd/auto-verification.ts +++ b/src/resources/extensions/gsd/auto-verification.ts @@ -175,7 +175,23 @@ export async function runPostUnitVerification( s.verificationRetryCount.delete(s.currentUnit.id); s.pendingVerificationRetry = null; return "continue"; - } else if (result.discoverySource === "package-json") { + } + + // Check if all failures are infra errors (ETIMEDOUT, ENOENT, etc.). + // Infra errors are transient OS-level problems the agent cannot fix — + // retrying the entire task is wasteful and creates phantom failures. + const failedChecks = result.checks.filter(c => c.exitCode !== 0); + const allInfraErrors = failedChecks.length > 0 && failedChecks.every(c => c.infraError === true); + if (allInfraErrors) { + const infraNames = failedChecks.map(f => f.command).join(", "); + ctx.ui.notify(`Verification gate: infra error (${infraNames}) — skipping retry, not a code issue`, "warning"); + process.stderr.write(`verification-gate: all ${failedChecks.length} failure(s) are infra errors — treating as transient, no retry\n`); + s.verificationRetryCount.delete(s.currentUnit.id); + s.pendingVerificationRetry = null; + return "continue"; + } + + if (result.discoverySource === "package-json") { // Auto-discovered checks from package.json may fail on pre-existing errors // that the current task didn't introduce. Don't trigger the retry loop — // log a warning and let the task proceed (#1186). @@ -189,7 +205,9 @@ export async function runPostUnitVerification( s.verificationRetryCount.delete(s.currentUnit.id); s.pendingVerificationRetry = null; return "continue"; - } else if (autoFixEnabled && attempt + 1 <= maxRetries) { + } + + if (autoFixEnabled && attempt + 1 <= maxRetries) { const nextAttempt = attempt + 1; s.verificationRetryCount.set(s.currentUnit.id, nextAttempt); s.pendingVerificationRetry = { diff --git a/src/resources/extensions/gsd/tests/verification-gate.test.ts b/src/resources/extensions/gsd/tests/verification-gate.test.ts index 4bc4aa0a1..c1c89aa4e 100644 --- a/src/resources/extensions/gsd/tests/verification-gate.test.ts +++ b/src/resources/extensions/gsd/tests/verification-gate.test.ts @@ -261,6 +261,71 @@ test("verification-gate: each check has durationMs", () => { } }); +// ─── Infra Error Tagging Tests ─────────────────────────────────────────────── + +test("verification-gate: spawnSync ETIMEDOUT → infraError: true on the check", () => { + const tmp = makeTempDir("vg-etimedout"); + try { + // Use a short timeout against a long sleep to guarantee ETIMEDOUT + const result = runVerificationGate({ + basePath: tmp, + unitId: "T01", + cwd: tmp, + preferenceCommands: ["sleep 60"], + commandTimeoutMs: 200, + }); + assert.equal(result.passed, false); + assert.equal(result.checks.length, 1); + assert.ok(result.checks[0].exitCode !== 0, "should have non-zero exit code"); + assert.equal(result.checks[0].infraError, true, "ETIMEDOUT should be tagged as infraError"); + } finally { + rmSync(tmp, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); + } +}); + +test("verification-gate: real command failure does NOT have infraError", () => { + const tmp = makeTempDir("vg-real-fail"); + try { + const result = runVerificationGate({ + basePath: tmp, + unitId: "T01", + cwd: tmp, + // Cross-platform: node with --eval flag and no shell-sensitive characters + preferenceCommands: ["node --eval \"process.exitCode=1\""], + }); + assert.equal(result.passed, false); + assert.equal(result.checks.length, 1); + assert.equal(result.checks[0].exitCode, 1); + assert.equal(result.checks[0].infraError, undefined, "real failure should not be tagged as infraError"); + } finally { + rmSync(tmp, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); + } +}); + +test("verification-gate: mixed infra + real failure — only infra check is tagged", () => { + const tmp = makeTempDir("vg-mixed-infra"); + try { + // Use a timeout that kills "sleep 60" but lets "node --eval" complete (~80ms). + // The gate applies the same timeout to each command sequentially. + const result = runVerificationGate({ + basePath: tmp, + unitId: "T01", + cwd: tmp, + preferenceCommands: ["sleep 60", "node --eval \"process.exitCode=2\""], + commandTimeoutMs: 500, + }); + assert.equal(result.passed, false); + assert.equal(result.checks.length, 2); + // First check: ETIMEDOUT → infraError + assert.equal(result.checks[0].infraError, true, "timed-out command should be infraError"); + // Second check: real exit 2 → no infraError + assert.equal(result.checks[1].exitCode, 2); + assert.equal(result.checks[1].infraError, undefined, "real failure should not be infraError"); + } finally { + rmSync(tmp, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); + } +}); + // ─── Preference Validation Tests ───────────────────────────────────────────── test("verification-gate: validatePreferences accepts valid verification keys", () => { diff --git a/src/resources/extensions/gsd/types.ts b/src/resources/extensions/gsd/types.ts index d8105bd46..ce7fe9cf4 100644 --- a/src/resources/extensions/gsd/types.ts +++ b/src/resources/extensions/gsd/types.ts @@ -56,6 +56,10 @@ export interface VerificationCheck { stderr: string; durationMs: number; blocking: boolean; // true for preference/task-plan sources, false for package-json (advisory only) + /** True when the failure was a spawn/infra error (ETIMEDOUT, ENOENT, ENOMEM) + * rather than the command itself failing. Infra errors are transient and + * should not trigger auto-fix retries — the agent cannot fix the OS. */ + infraError?: boolean; } /** A runtime error captured from bg-shell processes or browser console */ diff --git a/src/resources/extensions/gsd/verification-gate.ts b/src/resources/extensions/gsd/verification-gate.ts index 3a5ac64a7..04665d97b 100644 --- a/src/resources/extensions/gsd/verification-gate.ts +++ b/src/resources/extensions/gsd/verification-gate.ts @@ -232,13 +232,20 @@ export interface RunVerificationGateOptions { commandTimeoutMs?: number; } +/** Error codes from spawnSync that indicate infrastructure/OS-level failures + * rather than the command itself failing. These are transient — the agent + * cannot fix them, so they should not trigger auto-fix retries. */ +const INFRA_ERROR_CODES = new Set(["ETIMEDOUT", "ENOENT", "ENOMEM", "EMFILE", "ENFILE", "EAGAIN"]); + /** * Run the verification gate: discover commands, execute each via spawnSync, * and return a structured result. * * - All commands run sequentially regardless of individual pass/fail. - * - `passed` is true when every command exits 0 (or no commands are discovered). + * - `passed` is true when every blocking command exits 0 (or no commands are discovered). * - stdout/stderr per command are truncated to 10 KB. + * - Spawn/infra errors (ETIMEDOUT, ENOENT, etc.) are tagged with `infraError: true` + * so the retry logic can distinguish "the OS couldn't run this" from "the tests failed". */ export function runVerificationGate(options: RunVerificationGateOptions): VerificationResult { const timestamp = Date.now(); @@ -279,12 +286,26 @@ export function runVerificationGate(options: RunVerificationGateOptions): Verifi let stderr: string; if (result.error) { - // Command not found or spawn failure + // Spawn infrastructure failure — OS-level, not a test failure. + // Tag with infraError so the retry logic can skip auto-fix attempts. + const errCode = (result.error as NodeJS.ErrnoException).code; + const isInfra = !!errCode && INFRA_ERROR_CODES.has(errCode); exitCode = 127; stderr = truncate( (result.stderr || "") + "\n" + (result.error as Error).message, MAX_OUTPUT_BYTES, ); + + checks.push({ + command, + exitCode, + stdout: truncate(result.stdout, MAX_OUTPUT_BYTES), + stderr, + durationMs, + blocking, + ...(isInfra ? { infraError: true } : {}), + }); + continue; } else { // status is null when killed by signal — treat as failure exitCode = result.status ?? 1;