From 58903093cdeff01029d98f735efc2e98306fb7a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=82CHES?= Date: Wed, 18 Mar 2026 10:13:28 -0600 Subject: [PATCH] fix: non-blocking verification gate for auto-discovered commands (#1177) * fix: make package-json discovered verification commands non-blocking (advisory only) Auto-discovered commands from package.json scripts (typecheck, lint, test) are advisory: their failures are logged as warnings but do not block the gate or trigger retries. Only explicitly configured preference commands and task-plan verify commands remain blocking. Co-Authored-By: Claude Opus 4.6 (1M context) * fix: add missing blocking field to verification-evidence test fixtures The previous commit added `blocking: boolean` to VerificationCheck but only updated verification-gate.test.ts. The evidence test file had 26 VerificationCheck literals missing the new required field. Co-Authored-By: Claude Opus 4.6 (1M context) --------- Co-authored-by: Claude Opus 4.6 (1M context) --- .../extensions/gsd/auto-verification.ts | 34 ++++- .../gsd/tests/verification-evidence.test.ts | 50 +++--- .../gsd/tests/verification-gate.test.ts | 143 +++++++++++++++++- src/resources/extensions/gsd/types.ts | 1 + .../extensions/gsd/verification-evidence.ts | 2 + .../extensions/gsd/verification-gate.ts | 15 +- 6 files changed, 205 insertions(+), 40 deletions(-) diff --git a/src/resources/extensions/gsd/auto-verification.ts b/src/resources/extensions/gsd/auto-verification.ts index fc9a524ed..08aea1d60 100644 --- a/src/resources/extensions/gsd/auto-verification.ts +++ b/src/resources/extensions/gsd/auto-verification.ts @@ -105,19 +105,39 @@ export async function runPostUnitVerification( const completionKey = `${s.currentUnit.type}/${s.currentUnit.id}`; if (result.checks.length > 0) { - const passCount = result.checks.filter(c => c.exitCode === 0).length; - const total = result.checks.length; + const blockingChecks = result.checks.filter(c => c.blocking); + const advisoryChecks = result.checks.filter(c => !c.blocking); + const blockingPassCount = blockingChecks.filter(c => c.exitCode === 0).length; + const advisoryFailCount = advisoryChecks.filter(c => c.exitCode !== 0).length; + if (result.passed) { - ctx.ui.notify(`Verification gate: ${passCount}/${total} checks passed`); + let msg = blockingChecks.length > 0 + ? `Verification gate: ${blockingPassCount}/${blockingChecks.length} blocking checks passed` + : `Verification gate: passed (no blocking checks)`; + if (advisoryFailCount > 0) { + msg += ` (${advisoryFailCount} advisory warning${advisoryFailCount > 1 ? "s" : ""})`; + } + ctx.ui.notify(msg); + // Log advisory warnings to stderr for visibility + if (advisoryFailCount > 0) { + const advisoryFailures = advisoryChecks.filter(c => c.exitCode !== 0); + process.stderr.write(`verification-gate: ${advisoryFailCount} advisory (non-blocking) failure(s)\n`); + for (const f of advisoryFailures) { + process.stderr.write(` [advisory] ${f.command} exited ${f.exitCode}\n`); + } + } } else { - const failures = result.checks.filter(c => c.exitCode !== 0); - const failNames = failures.map(f => f.command).join(", "); + const blockingFailures = blockingChecks.filter(c => c.exitCode !== 0); + const failNames = blockingFailures.map(f => f.command).join(", "); ctx.ui.notify(`Verification gate: FAILED — ${failNames}`); - process.stderr.write(`verification-gate: ${total - passCount}/${total} checks failed\n`); - for (const f of failures) { + process.stderr.write(`verification-gate: ${blockingFailures.length}/${blockingChecks.length} blocking checks failed\n`); + for (const f of blockingFailures) { process.stderr.write(` ${f.command} exited ${f.exitCode}\n`); if (f.stderr) process.stderr.write(` stderr: ${f.stderr.slice(0, 500)}\n`); } + if (advisoryFailCount > 0) { + process.stderr.write(`verification-gate: ${advisoryFailCount} additional advisory (non-blocking) failure(s)\n`); + } } } diff --git a/src/resources/extensions/gsd/tests/verification-evidence.test.ts b/src/resources/extensions/gsd/tests/verification-evidence.test.ts index a02590a85..aa992807f 100644 --- a/src/resources/extensions/gsd/tests/verification-evidence.test.ts +++ b/src/resources/extensions/gsd/tests/verification-evidence.test.ts @@ -58,6 +58,7 @@ test("verification-evidence: writeVerificationJSON writes correct JSON shape", ( stdout: "all good", stderr: "", durationMs: 2340, + blocking: true, }, ], }); @@ -105,9 +106,9 @@ test("verification-evidence: writeVerificationJSON maps exitCode to verdict corr const result = makeResult({ passed: false, checks: [ - { command: "lint", exitCode: 0, stdout: "", stderr: "", durationMs: 100 }, - { command: "test", exitCode: 1, stdout: "", stderr: "fail", durationMs: 200 }, - { command: "audit", exitCode: 2, stdout: "", stderr: "err", durationMs: 300 }, + { command: "lint", exitCode: 0, stdout: "", stderr: "", durationMs: 100, blocking: true }, + { command: "test", exitCode: 1, stdout: "", stderr: "fail", durationMs: 200, blocking: true }, + { command: "audit", exitCode: 2, stdout: "", stderr: "err", durationMs: 300, blocking: true }, ], }); @@ -133,6 +134,7 @@ test("verification-evidence: writeVerificationJSON excludes stdout/stderr from o stdout: "hello\n", stderr: "some warning", durationMs: 50, + blocking: true, }, ], }); @@ -181,8 +183,8 @@ test("verification-evidence: writeVerificationJSON uses optional unitId when pro test("verification-evidence: formatEvidenceTable returns markdown table with correct columns", () => { const result = makeResult({ checks: [ - { command: "npm run typecheck", exitCode: 0, stdout: "", stderr: "", durationMs: 2340 }, - { command: "npm run lint", exitCode: 1, stdout: "", stderr: "err", durationMs: 1100 }, + { command: "npm run typecheck", exitCode: 0, stdout: "", stderr: "", durationMs: 2340, blocking: true }, + { command: "npm run lint", exitCode: 1, stdout: "", stderr: "err", durationMs: 1100, blocking: true }, ], }); @@ -214,9 +216,9 @@ test("verification-evidence: formatEvidenceTable returns no-checks message for e test("verification-evidence: formatEvidenceTable formats duration as seconds with 1 decimal", () => { const result = makeResult({ checks: [ - { command: "fast", exitCode: 0, stdout: "", stderr: "", durationMs: 150 }, - { command: "slow", exitCode: 0, stdout: "", stderr: "", durationMs: 2340 }, - { command: "zero", exitCode: 0, stdout: "", stderr: "", durationMs: 0 }, + { command: "fast", exitCode: 0, stdout: "", stderr: "", durationMs: 150, blocking: true }, + { command: "slow", exitCode: 0, stdout: "", stderr: "", durationMs: 2340, blocking: true }, + { command: "zero", exitCode: 0, stdout: "", stderr: "", durationMs: 0, blocking: true }, ], }); @@ -230,8 +232,8 @@ test("verification-evidence: formatEvidenceTable uses ✅/❌ emoji for pass/fai const result = makeResult({ passed: false, checks: [ - { command: "pass-cmd", exitCode: 0, stdout: "", stderr: "", durationMs: 100 }, - { command: "fail-cmd", exitCode: 1, stdout: "", stderr: "", durationMs: 200 }, + { command: "pass-cmd", exitCode: 0, stdout: "", stderr: "", durationMs: 100, blocking: true }, + { command: "fail-cmd", exitCode: 1, stdout: "", stderr: "", durationMs: 200, blocking: true }, ], }); @@ -335,8 +337,8 @@ test("verification-evidence: integration — VerificationResult → JSON → tab const result = makeResult({ passed: false, checks: [ - { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500 }, - { command: "npm run test:unit", exitCode: 1, stdout: "", stderr: "1 failed", durationMs: 3200 }, + { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500, blocking: true }, + { command: "npm run test:unit", exitCode: 1, stdout: "", stderr: "1 failed", durationMs: 3200, blocking: true }, ], discoverySource: "package-json", }); @@ -390,7 +392,7 @@ test("verification-evidence: writeVerificationJSON with retryAttempt and maxRetr const result = makeResult({ passed: false, checks: [ - { command: "npm run lint", exitCode: 1, stdout: "", stderr: "error", durationMs: 300 }, + { command: "npm run lint", exitCode: 1, stdout: "", stderr: "error", durationMs: 300, blocking: true }, ], }); @@ -415,7 +417,7 @@ test("verification-evidence: writeVerificationJSON without retry params omits re const result = makeResult({ passed: true, checks: [ - { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 }, + { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100, blocking: true }, ], }); @@ -441,7 +443,7 @@ test("verification-evidence: writeVerificationJSON includes runtimeErrors when p const result = makeResult({ passed: false, checks: [ - { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 }, + { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100, blocking: true }, ], runtimeErrors: [ { source: "bg-shell", severity: "crash", message: "Server crashed", blocking: true }, @@ -473,7 +475,7 @@ test("verification-evidence: writeVerificationJSON omits runtimeErrors when abse const result = makeResult({ passed: true, checks: [ - { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 50 }, + { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 50, blocking: true }, ], }); @@ -512,7 +514,7 @@ test("verification-evidence: formatEvidenceTable appends runtime errors section" const result = makeResult({ passed: false, checks: [ - { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 }, + { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100, blocking: true }, ], runtimeErrors: [ { source: "bg-shell", severity: "crash", message: "Server crashed with SIGKILL", blocking: true }, @@ -537,7 +539,7 @@ test("verification-evidence: formatEvidenceTable omits runtime errors section wh const result = makeResult({ passed: true, checks: [ - { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 200 }, + { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 200, blocking: true }, ], }); @@ -552,7 +554,7 @@ test("verification-evidence: formatEvidenceTable truncates runtime error message const result = makeResult({ passed: false, checks: [ - { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 }, + { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100, blocking: true }, ], runtimeErrors: [ { source: "bg-shell", severity: "error", message: longMessage, blocking: false }, @@ -598,7 +600,7 @@ test("verification-evidence: writeVerificationJSON includes auditWarnings when p const result = makeResult({ passed: true, checks: [ - { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 }, + { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100, blocking: true }, ], auditWarnings: SAMPLE_AUDIT_WARNINGS, }); @@ -627,7 +629,7 @@ test("verification-evidence: writeVerificationJSON omits auditWarnings when abse const result = makeResult({ passed: true, checks: [ - { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 50 }, + { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 50, blocking: true }, ], }); @@ -666,7 +668,7 @@ test("verification-evidence: formatEvidenceTable appends audit warnings section" const result = makeResult({ passed: true, checks: [ - { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 }, + { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100, blocking: true }, ], auditWarnings: SAMPLE_AUDIT_WARNINGS, }); @@ -689,7 +691,7 @@ test("verification-evidence: formatEvidenceTable omits audit warnings section wh const result = makeResult({ passed: true, checks: [ - { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 200 }, + { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 200, blocking: true }, ], }); @@ -705,7 +707,7 @@ test("verification-evidence: integration — VerificationResult with auditWarnin const result = makeResult({ passed: true, checks: [ - { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500 }, + { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500, blocking: true }, ], auditWarnings: [ { diff --git a/src/resources/extensions/gsd/tests/verification-gate.test.ts b/src/resources/extensions/gsd/tests/verification-gate.test.ts index 2b6b90929..4bc4aa0a1 100644 --- a/src/resources/extensions/gsd/tests/verification-gate.test.ts +++ b/src/resources/extensions/gsd/tests/verification-gate.test.ts @@ -581,7 +581,7 @@ test("formatFailureContext: formats a single failure with command, exit code, st const result: import("../types.ts").VerificationResult = { passed: false, checks: [ - { command: "npm run lint", exitCode: 1, stdout: "", stderr: "error: unused var", durationMs: 500 }, + { command: "npm run lint", exitCode: 1, stdout: "", stderr: "error: unused var", durationMs: 500, blocking: true }, ], discoverySource: "preference", timestamp: Date.now(), @@ -598,9 +598,9 @@ test("formatFailureContext: formats multiple failures", () => { const result: import("../types.ts").VerificationResult = { passed: false, checks: [ - { command: "npm run lint", exitCode: 1, stdout: "", stderr: "lint error", durationMs: 100 }, - { command: "npm run test", exitCode: 2, stdout: "", stderr: "test failure", durationMs: 200 }, - { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 50 }, + { command: "npm run lint", exitCode: 1, stdout: "", stderr: "lint error", durationMs: 100, blocking: true }, + { command: "npm run test", exitCode: 2, stdout: "", stderr: "test failure", durationMs: 200, blocking: true }, + { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 50, blocking: true }, ], discoverySource: "preference", timestamp: Date.now(), @@ -619,7 +619,7 @@ test("formatFailureContext: truncates stderr longer than 2000 chars", () => { const result: import("../types.ts").VerificationResult = { passed: false, checks: [ - { command: "big-err", exitCode: 1, stdout: "", stderr: longStderr, durationMs: 100 }, + { command: "big-err", exitCode: 1, stdout: "", stderr: longStderr, durationMs: 100, blocking: true }, ], discoverySource: "preference", timestamp: Date.now(), @@ -634,8 +634,8 @@ test("formatFailureContext: returns empty string when all checks pass", () => { const result: import("../types.ts").VerificationResult = { passed: true, checks: [ - { command: "npm run lint", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 }, - { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 200 }, + { command: "npm run lint", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100, blocking: true }, + { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 200, blocking: true }, ], discoverySource: "preference", timestamp: Date.now(), @@ -663,6 +663,7 @@ test("formatFailureContext: caps total output at 10,000 chars", () => { stdout: "", stderr: "e".repeat(1000), // 1000 chars each, 20 * ~1050 (with formatting) > 10,000 durationMs: 100, + blocking: true, }); } const result: import("../types.ts").VerificationResult = { @@ -1077,3 +1078,131 @@ test("dependency-audit: subdirectory package.json does not trigger audit", () => assert.equal(npmAuditCalled, false, "subdirectory dependency files should not trigger audit"); assert.deepStrictEqual(result, []); }); + +// ─── Non-Blocking Discovery Tests ──────────────────────────────────────────── + +test("non-blocking: package-json discovered commands failing → result.passed is still true", () => { + const tmp = makeTempDir("vg-nb-pkg-fail"); + try { + writeFileSync( + join(tmp, "package.json"), + JSON.stringify({ scripts: { lint: "eslint .", test: "vitest" } }), + ); + // These commands will fail because eslint/vitest don't exist in the temp dir + const result = runVerificationGate({ + basePath: tmp, + unitId: "T01", + cwd: tmp, + // No preference commands — discovery falls through to package.json + }); + assert.equal(result.discoverySource, "package-json"); + assert.ok(result.checks.length > 0, "should have discovered package.json checks"); + assert.equal(result.passed, true, "package-json failures should not block the gate"); + for (const check of result.checks) { + assert.equal(check.blocking, false, "package-json checks should be non-blocking"); + } + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("non-blocking: preference commands failing → result.passed is false", () => { + const tmp = makeTempDir("vg-nb-pref-fail"); + try { + const result = runVerificationGate({ + basePath: tmp, + unitId: "T01", + cwd: tmp, + preferenceCommands: ["sh -c 'exit 1'"], + }); + assert.equal(result.discoverySource, "preference"); + assert.equal(result.passed, false, "preference failures should block the gate"); + assert.equal(result.checks[0].blocking, true, "preference checks should be blocking"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("non-blocking: task-plan commands failing → result.passed is false", () => { + const tmp = makeTempDir("vg-nb-tp-fail"); + try { + const result = runVerificationGate({ + basePath: tmp, + unitId: "T01", + cwd: tmp, + taskPlanVerify: "sh -c 'exit 1'", + }); + assert.equal(result.discoverySource, "task-plan"); + assert.equal(result.passed, false, "task-plan failures should block the gate"); + assert.equal(result.checks[0].blocking, true, "task-plan checks should be blocking"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("non-blocking: blocking field is set correctly based on discovery source", () => { + const tmp = makeTempDir("vg-nb-field"); + try { + // preference → blocking + const prefResult = runVerificationGate({ + basePath: tmp, + unitId: "T01", + cwd: tmp, + preferenceCommands: ["echo ok"], + }); + assert.equal(prefResult.checks[0].blocking, true); + + // task-plan → blocking + const tpResult = runVerificationGate({ + basePath: tmp, + unitId: "T01", + cwd: tmp, + taskPlanVerify: "echo ok", + }); + assert.equal(tpResult.checks[0].blocking, true); + + // package-json → non-blocking + writeFileSync( + join(tmp, "package.json"), + JSON.stringify({ scripts: { test: "echo ok" } }), + ); + const pkgResult = runVerificationGate({ + basePath: tmp, + unitId: "T01", + cwd: tmp, + }); + assert.equal(pkgResult.checks[0].blocking, false); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("non-blocking: formatFailureContext only includes blocking failures", () => { + const result: import("../types.ts").VerificationResult = { + passed: true, + checks: [ + { command: "npm run lint", exitCode: 1, stdout: "", stderr: "lint warning", durationMs: 100, blocking: false }, + { command: "npm run test", exitCode: 1, stdout: "", stderr: "test error", durationMs: 200, blocking: true }, + { command: "npm run typecheck", exitCode: 1, stdout: "", stderr: "type error", durationMs: 50, blocking: false }, + ], + discoverySource: "preference", + timestamp: Date.now(), + }; + const output = formatFailureContext(result); + assert.ok(output.includes("`npm run test`"), "should include blocking failure"); + assert.ok(!output.includes("npm run lint"), "should not include non-blocking failure"); + assert.ok(!output.includes("npm run typecheck"), "should not include non-blocking failure"); +}); + +test("non-blocking: formatFailureContext returns empty when only non-blocking failures exist", () => { + const result: import("../types.ts").VerificationResult = { + passed: true, + checks: [ + { command: "npm run lint", exitCode: 1, stdout: "", stderr: "lint warning", durationMs: 100, blocking: false }, + { command: "npm run test", exitCode: 1, stdout: "", stderr: "test warning", durationMs: 200, blocking: false }, + ], + discoverySource: "package-json", + timestamp: Date.now(), + }; + assert.equal(formatFailureContext(result), "", "should return empty when only non-blocking failures"); +}); diff --git a/src/resources/extensions/gsd/types.ts b/src/resources/extensions/gsd/types.ts index 1e6110087..bf57ebd0e 100644 --- a/src/resources/extensions/gsd/types.ts +++ b/src/resources/extensions/gsd/types.ts @@ -55,6 +55,7 @@ export interface VerificationCheck { stdout: string; stderr: string; durationMs: number; + blocking: boolean; // true for preference/task-plan sources, false for package-json (advisory only) } /** A runtime error captured from bg-shell processes or browser console */ diff --git a/src/resources/extensions/gsd/verification-evidence.ts b/src/resources/extensions/gsd/verification-evidence.ts index 0918b40f1..0d801013a 100644 --- a/src/resources/extensions/gsd/verification-evidence.ts +++ b/src/resources/extensions/gsd/verification-evidence.ts @@ -20,6 +20,7 @@ export interface EvidenceCheckJSON { exitCode: number; durationMs: number; verdict: "pass" | "fail"; + blocking: boolean; } export interface RuntimeErrorJSON { @@ -80,6 +81,7 @@ export function writeVerificationJSON( exitCode: check.exitCode, durationMs: check.durationMs, verdict: check.exitCode === 0 ? "pass" : "fail", + blocking: check.blocking, })), ...(retryAttempt !== undefined ? { retryAttempt } : {}), ...(maxRetries !== undefined ? { maxRetries } : {}), diff --git a/src/resources/extensions/gsd/verification-gate.ts b/src/resources/extensions/gsd/verification-gate.ts index adfba6da8..3a5ac64a7 100644 --- a/src/resources/extensions/gsd/verification-gate.ts +++ b/src/resources/extensions/gsd/verification-gate.ts @@ -112,7 +112,9 @@ const MAX_FAILURE_CONTEXT_CHARS = 10_000; * Returns an empty string when all checks pass or the checks array is empty. */ export function formatFailureContext(result: VerificationResult): string { - const failures = result.checks.filter((c) => c.exitCode !== 0); + // Only include blocking failures in retry context — non-blocking (advisory) failures + // should not be injected into retry prompts to avoid noise pollution. + const failures = result.checks.filter((c) => c.exitCode !== 0 && c.blocking); if (failures.length === 0) return ""; const blocks: string[] = []; @@ -256,6 +258,10 @@ export function runVerificationGate(options: RunVerificationGateOptions): Verifi }; } + // Commands from preference and task-plan sources are blocking; + // package-json discovered commands are advisory (non-blocking). + const blocking = source === "preference" || source === "task-plan"; + const checks: VerificationCheck[] = []; for (const command of commands) { @@ -291,11 +297,16 @@ export function runVerificationGate(options: RunVerificationGateOptions): Verifi stdout: truncate(result.stdout, MAX_OUTPUT_BYTES), stderr, durationMs, + blocking, }); } + // Gate passes if all blocking checks pass (non-blocking failures are advisory) + const blockingChecks = checks.filter(c => c.blocking); + const passed = blockingChecks.length === 0 || blockingChecks.every(c => c.exitCode === 0); + return { - passed: checks.every(c => c.exitCode === 0), + passed, checks, discoverySource: source, timestamp,