fix: non-blocking verification gate for auto-discovered commands (#1177)

* fix: make package-json discovered verification commands non-blocking (advisory only) Auto-discovered commands from package.json scripts (typecheck, lint, test) are advisory: their failures are logged as warnings but do not block the gate or trigger retries. Only explicitly configured preference commands and task-plan verify commands remain blocking. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: add missing blocking field to verification-evidence test fixtures The previous commit added `blocking: boolean` to VerificationCheck but only updated verification-gate.test.ts. The evidence test file had 26 VerificationCheck literals missing the new required field. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 10:13:28 -06:00 · 2026-03-18 10:13:28 -06:00 · 58903093cd
commit 58903093cd
parent 8d04ec19fd
6 changed files with 205 additions and 40 deletions
--- a/src/resources/extensions/gsd/auto-verification.ts
+++ b/src/resources/extensions/gsd/auto-verification.ts
@ -105,19 +105,39 @@ export async function runPostUnitVerification(
    const completionKey = `${s.currentUnit.type}/${s.currentUnit.id}`;

    if (result.checks.length > 0) {
-      const passCount = result.checks.filter(c => c.exitCode === 0).length;
-      const total = result.checks.length;
+      const blockingChecks = result.checks.filter(c => c.blocking);
+      const advisoryChecks = result.checks.filter(c => !c.blocking);
+      const blockingPassCount = blockingChecks.filter(c => c.exitCode === 0).length;
+      const advisoryFailCount = advisoryChecks.filter(c => c.exitCode !== 0).length;
+
      if (result.passed) {
-        ctx.ui.notify(`Verification gate: ${passCount}/${total} checks passed`);
+        let msg = blockingChecks.length > 0
+          ? `Verification gate: ${blockingPassCount}/${blockingChecks.length} blocking checks passed`
+          : `Verification gate: passed (no blocking checks)`;
+        if (advisoryFailCount > 0) {
+          msg += ` (${advisoryFailCount} advisory warning${advisoryFailCount > 1 ? "s" : ""})`;
+        }
+        ctx.ui.notify(msg);
+        // Log advisory warnings to stderr for visibility
+        if (advisoryFailCount > 0) {
+          const advisoryFailures = advisoryChecks.filter(c => c.exitCode !== 0);
+          process.stderr.write(`verification-gate: ${advisoryFailCount} advisory (non-blocking) failure(s)\n`);
+          for (const f of advisoryFailures) {
+            process.stderr.write(`  [advisory] ${f.command} exited ${f.exitCode}\n`);
+          }
+        }
      } else {
-        const failures = result.checks.filter(c => c.exitCode !== 0);
-        const failNames = failures.map(f => f.command).join(", ");
+        const blockingFailures = blockingChecks.filter(c => c.exitCode !== 0);
+        const failNames = blockingFailures.map(f => f.command).join(", ");
        ctx.ui.notify(`Verification gate: FAILED — ${failNames}`);
-        process.stderr.write(`verification-gate: ${total - passCount}/${total} checks failed\n`);
-        for (const f of failures) {
+        process.stderr.write(`verification-gate: ${blockingFailures.length}/${blockingChecks.length} blocking checks failed\n`);
+        for (const f of blockingFailures) {
          process.stderr.write(`  ${f.command} exited ${f.exitCode}\n`);
          if (f.stderr) process.stderr.write(`  stderr: ${f.stderr.slice(0, 500)}\n`);
        }
+        if (advisoryFailCount > 0) {
+          process.stderr.write(`verification-gate: ${advisoryFailCount} additional advisory (non-blocking) failure(s)\n`);
+        }
      }
    }

--- a/src/resources/extensions/gsd/tests/verification-evidence.test.ts
+++ b/src/resources/extensions/gsd/tests/verification-evidence.test.ts
@ -58,6 +58,7 @@ test("verification-evidence: writeVerificationJSON writes correct JSON shape", (
          stdout: "all good",
          stderr: "",
          durationMs: 2340,
+          blocking: true,
        },
      ],
    });
@ -105,9 +106,9 @@ test("verification-evidence: writeVerificationJSON maps exitCode to verdict corr
    const result = makeResult({
      passed: false,
      checks: [
-        { command: "lint", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
-        { command: "test", exitCode: 1, stdout: "", stderr: "fail", durationMs: 200 },
-        { command: "audit", exitCode: 2, stdout: "", stderr: "err", durationMs: 300 },
+        { command: "lint", exitCode: 0, stdout: "", stderr: "", durationMs: 100, blocking: true },
+        { command: "test", exitCode: 1, stdout: "", stderr: "fail", durationMs: 200, blocking: true },
+        { command: "audit", exitCode: 2, stdout: "", stderr: "err", durationMs: 300, blocking: true },
      ],
    });

@ -133,6 +134,7 @@ test("verification-evidence: writeVerificationJSON excludes stdout/stderr from o
          stdout: "hello\n",
          stderr: "some warning",
          durationMs: 50,
+          blocking: true,
        },
      ],
    });
@ -181,8 +183,8 @@ test("verification-evidence: writeVerificationJSON uses optional unitId when pro
 test("verification-evidence: formatEvidenceTable returns markdown table with correct columns", () => {
  const result = makeResult({
    checks: [
-      { command: "npm run typecheck", exitCode: 0, stdout: "", stderr: "", durationMs: 2340 },
-      { command: "npm run lint", exitCode: 1, stdout: "", stderr: "err", durationMs: 1100 },
+      { command: "npm run typecheck", exitCode: 0, stdout: "", stderr: "", durationMs: 2340, blocking: true },
+      { command: "npm run lint", exitCode: 1, stdout: "", stderr: "err", durationMs: 1100, blocking: true },
    ],
  });

@ -214,9 +216,9 @@ test("verification-evidence: formatEvidenceTable returns no-checks message for e
 test("verification-evidence: formatEvidenceTable formats duration as seconds with 1 decimal", () => {
  const result = makeResult({
    checks: [
-      { command: "fast", exitCode: 0, stdout: "", stderr: "", durationMs: 150 },
-      { command: "slow", exitCode: 0, stdout: "", stderr: "", durationMs: 2340 },
-      { command: "zero", exitCode: 0, stdout: "", stderr: "", durationMs: 0 },
+      { command: "fast", exitCode: 0, stdout: "", stderr: "", durationMs: 150, blocking: true },
+      { command: "slow", exitCode: 0, stdout: "", stderr: "", durationMs: 2340, blocking: true },
+      { command: "zero", exitCode: 0, stdout: "", stderr: "", durationMs: 0, blocking: true },
    ],
  });

@ -230,8 +232,8 @@ test("verification-evidence: formatEvidenceTable uses ✅/❌ emoji for pass/fai
  const result = makeResult({
    passed: false,
    checks: [
-      { command: "pass-cmd", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
-      { command: "fail-cmd", exitCode: 1, stdout: "", stderr: "", durationMs: 200 },
+      { command: "pass-cmd", exitCode: 0, stdout: "", stderr: "", durationMs: 100, blocking: true },
+      { command: "fail-cmd", exitCode: 1, stdout: "", stderr: "", durationMs: 200, blocking: true },
    ],
  });

@ -335,8 +337,8 @@ test("verification-evidence: integration — VerificationResult → JSON → tab
    const result = makeResult({
      passed: false,
      checks: [
-        { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500 },
-        { command: "npm run test:unit", exitCode: 1, stdout: "", stderr: "1 failed", durationMs: 3200 },
+        { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500, blocking: true },
+        { command: "npm run test:unit", exitCode: 1, stdout: "", stderr: "1 failed", durationMs: 3200, blocking: true },
      ],
      discoverySource: "package-json",
    });
@ -390,7 +392,7 @@ test("verification-evidence: writeVerificationJSON with retryAttempt and maxRetr
    const result = makeResult({
      passed: false,
      checks: [
-        { command: "npm run lint", exitCode: 1, stdout: "", stderr: "error", durationMs: 300 },
+        { command: "npm run lint", exitCode: 1, stdout: "", stderr: "error", durationMs: 300, blocking: true },
      ],
    });

@ -415,7 +417,7 @@ test("verification-evidence: writeVerificationJSON without retry params omits re
    const result = makeResult({
      passed: true,
      checks: [
-        { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 },
+        { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100, blocking: true },
      ],
    });

@ -441,7 +443,7 @@ test("verification-evidence: writeVerificationJSON includes runtimeErrors when p
    const result = makeResult({
      passed: false,
      checks: [
-        { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 },
+        { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100, blocking: true },
      ],
      runtimeErrors: [
        { source: "bg-shell", severity: "crash", message: "Server crashed", blocking: true },
@ -473,7 +475,7 @@ test("verification-evidence: writeVerificationJSON omits runtimeErrors when abse
    const result = makeResult({
      passed: true,
      checks: [
-        { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 50 },
+        { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 50, blocking: true },
      ],
    });

@ -512,7 +514,7 @@ test("verification-evidence: formatEvidenceTable appends runtime errors section"
  const result = makeResult({
    passed: false,
    checks: [
-      { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
+      { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100, blocking: true },
    ],
    runtimeErrors: [
      { source: "bg-shell", severity: "crash", message: "Server crashed with SIGKILL", blocking: true },
@ -537,7 +539,7 @@ test("verification-evidence: formatEvidenceTable omits runtime errors section wh
  const result = makeResult({
    passed: true,
    checks: [
-      { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 200 },
+      { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 200, blocking: true },
    ],
  });

@ -552,7 +554,7 @@ test("verification-evidence: formatEvidenceTable truncates runtime error message
  const result = makeResult({
    passed: false,
    checks: [
-      { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
+      { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100, blocking: true },
    ],
    runtimeErrors: [
      { source: "bg-shell", severity: "error", message: longMessage, blocking: false },
@ -598,7 +600,7 @@ test("verification-evidence: writeVerificationJSON includes auditWarnings when p
    const result = makeResult({
      passed: true,
      checks: [
-        { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 },
+        { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100, blocking: true },
      ],
      auditWarnings: SAMPLE_AUDIT_WARNINGS,
    });
@ -627,7 +629,7 @@ test("verification-evidence: writeVerificationJSON omits auditWarnings when abse
    const result = makeResult({
      passed: true,
      checks: [
-        { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 50 },
+        { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 50, blocking: true },
      ],
    });

@ -666,7 +668,7 @@ test("verification-evidence: formatEvidenceTable appends audit warnings section"
  const result = makeResult({
    passed: true,
    checks: [
-      { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
+      { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100, blocking: true },
    ],
    auditWarnings: SAMPLE_AUDIT_WARNINGS,
  });
@ -689,7 +691,7 @@ test("verification-evidence: formatEvidenceTable omits audit warnings section wh
  const result = makeResult({
    passed: true,
    checks: [
-      { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 200 },
+      { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 200, blocking: true },
    ],
  });

@ -705,7 +707,7 @@ test("verification-evidence: integration — VerificationResult with auditWarnin
    const result = makeResult({
      passed: true,
      checks: [
-        { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500 },
+        { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500, blocking: true },
      ],
      auditWarnings: [
        {
--- a/src/resources/extensions/gsd/tests/verification-gate.test.ts
+++ b/src/resources/extensions/gsd/tests/verification-gate.test.ts
@ -581,7 +581,7 @@ test("formatFailureContext: formats a single failure with command, exit code, st
  const result: import("../types.ts").VerificationResult = {
    passed: false,
    checks: [
-      { command: "npm run lint", exitCode: 1, stdout: "", stderr: "error: unused var", durationMs: 500 },
+      { command: "npm run lint", exitCode: 1, stdout: "", stderr: "error: unused var", durationMs: 500, blocking: true },
    ],
    discoverySource: "preference",
    timestamp: Date.now(),
@ -598,9 +598,9 @@ test("formatFailureContext: formats multiple failures", () => {
  const result: import("../types.ts").VerificationResult = {
    passed: false,
    checks: [
-      { command: "npm run lint", exitCode: 1, stdout: "", stderr: "lint error", durationMs: 100 },
-      { command: "npm run test", exitCode: 2, stdout: "", stderr: "test failure", durationMs: 200 },
-      { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 50 },
+      { command: "npm run lint", exitCode: 1, stdout: "", stderr: "lint error", durationMs: 100, blocking: true },
+      { command: "npm run test", exitCode: 2, stdout: "", stderr: "test failure", durationMs: 200, blocking: true },
+      { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 50, blocking: true },
    ],
    discoverySource: "preference",
    timestamp: Date.now(),
@ -619,7 +619,7 @@ test("formatFailureContext: truncates stderr longer than 2000 chars", () => {
  const result: import("../types.ts").VerificationResult = {
    passed: false,
    checks: [
-      { command: "big-err", exitCode: 1, stdout: "", stderr: longStderr, durationMs: 100 },
+      { command: "big-err", exitCode: 1, stdout: "", stderr: longStderr, durationMs: 100, blocking: true },
    ],
    discoverySource: "preference",
    timestamp: Date.now(),
@ -634,8 +634,8 @@ test("formatFailureContext: returns empty string when all checks pass", () => {
  const result: import("../types.ts").VerificationResult = {
    passed: true,
    checks: [
-      { command: "npm run lint", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 },
-      { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 200 },
+      { command: "npm run lint", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100, blocking: true },
+      { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 200, blocking: true },
    ],
    discoverySource: "preference",
    timestamp: Date.now(),
@ -663,6 +663,7 @@ test("formatFailureContext: caps total output at 10,000 chars", () => {
      stdout: "",
      stderr: "e".repeat(1000), // 1000 chars each, 20 * ~1050 (with formatting) > 10,000
      durationMs: 100,
+      blocking: true,
    });
  }
  const result: import("../types.ts").VerificationResult = {
@ -1077,3 +1078,131 @@ test("dependency-audit: subdirectory package.json does not trigger audit", () =>
  assert.equal(npmAuditCalled, false, "subdirectory dependency files should not trigger audit");
  assert.deepStrictEqual(result, []);
 });
+
+// ─── Non-Blocking Discovery Tests ────────────────────────────────────────────
+
+test("non-blocking: package-json discovered commands failing → result.passed is still true", () => {
+  const tmp = makeTempDir("vg-nb-pkg-fail");
+  try {
+    writeFileSync(
+      join(tmp, "package.json"),
+      JSON.stringify({ scripts: { lint: "eslint .", test: "vitest" } }),
+    );
+    // These commands will fail because eslint/vitest don't exist in the temp dir
+    const result = runVerificationGate({
+      basePath: tmp,
+      unitId: "T01",
+      cwd: tmp,
+      // No preference commands — discovery falls through to package.json
+    });
+    assert.equal(result.discoverySource, "package-json");
+    assert.ok(result.checks.length > 0, "should have discovered package.json checks");
+    assert.equal(result.passed, true, "package-json failures should not block the gate");
+    for (const check of result.checks) {
+      assert.equal(check.blocking, false, "package-json checks should be non-blocking");
+    }
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});
+
+test("non-blocking: preference commands failing → result.passed is false", () => {
+  const tmp = makeTempDir("vg-nb-pref-fail");
+  try {
+    const result = runVerificationGate({
+      basePath: tmp,
+      unitId: "T01",
+      cwd: tmp,
+      preferenceCommands: ["sh -c 'exit 1'"],
+    });
+    assert.equal(result.discoverySource, "preference");
+    assert.equal(result.passed, false, "preference failures should block the gate");
+    assert.equal(result.checks[0].blocking, true, "preference checks should be blocking");
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});
+
+test("non-blocking: task-plan commands failing → result.passed is false", () => {
+  const tmp = makeTempDir("vg-nb-tp-fail");
+  try {
+    const result = runVerificationGate({
+      basePath: tmp,
+      unitId: "T01",
+      cwd: tmp,
+      taskPlanVerify: "sh -c 'exit 1'",
+    });
+    assert.equal(result.discoverySource, "task-plan");
+    assert.equal(result.passed, false, "task-plan failures should block the gate");
+    assert.equal(result.checks[0].blocking, true, "task-plan checks should be blocking");
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});
+
+test("non-blocking: blocking field is set correctly based on discovery source", () => {
+  const tmp = makeTempDir("vg-nb-field");
+  try {
+    // preference → blocking
+    const prefResult = runVerificationGate({
+      basePath: tmp,
+      unitId: "T01",
+      cwd: tmp,
+      preferenceCommands: ["echo ok"],
+    });
+    assert.equal(prefResult.checks[0].blocking, true);
+
+    // task-plan → blocking
+    const tpResult = runVerificationGate({
+      basePath: tmp,
+      unitId: "T01",
+      cwd: tmp,
+      taskPlanVerify: "echo ok",
+    });
+    assert.equal(tpResult.checks[0].blocking, true);
+
+    // package-json → non-blocking
+    writeFileSync(
+      join(tmp, "package.json"),
+      JSON.stringify({ scripts: { test: "echo ok" } }),
+    );
+    const pkgResult = runVerificationGate({
+      basePath: tmp,
+      unitId: "T01",
+      cwd: tmp,
+    });
+    assert.equal(pkgResult.checks[0].blocking, false);
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});
+
+test("non-blocking: formatFailureContext only includes blocking failures", () => {
+  const result: import("../types.ts").VerificationResult = {
+    passed: true,
+    checks: [
+      { command: "npm run lint", exitCode: 1, stdout: "", stderr: "lint warning", durationMs: 100, blocking: false },
+      { command: "npm run test", exitCode: 1, stdout: "", stderr: "test error", durationMs: 200, blocking: true },
+      { command: "npm run typecheck", exitCode: 1, stdout: "", stderr: "type error", durationMs: 50, blocking: false },
+    ],
+    discoverySource: "preference",
+    timestamp: Date.now(),
+  };
+  const output = formatFailureContext(result);
+  assert.ok(output.includes("`npm run test`"), "should include blocking failure");
+  assert.ok(!output.includes("npm run lint"), "should not include non-blocking failure");
+  assert.ok(!output.includes("npm run typecheck"), "should not include non-blocking failure");
+});
+
+test("non-blocking: formatFailureContext returns empty when only non-blocking failures exist", () => {
+  const result: import("../types.ts").VerificationResult = {
+    passed: true,
+    checks: [
+      { command: "npm run lint", exitCode: 1, stdout: "", stderr: "lint warning", durationMs: 100, blocking: false },
+      { command: "npm run test", exitCode: 1, stdout: "", stderr: "test warning", durationMs: 200, blocking: false },
+    ],
+    discoverySource: "package-json",
+    timestamp: Date.now(),
+  };
+  assert.equal(formatFailureContext(result), "", "should return empty when only non-blocking failures");
+});
--- a/src/resources/extensions/gsd/types.ts
+++ b/src/resources/extensions/gsd/types.ts
@ -55,6 +55,7 @@ export interface VerificationCheck {
  stdout: string;
  stderr: string;
  durationMs: number;
+  blocking: boolean;     // true for preference/task-plan sources, false for package-json (advisory only)
 }

 /** A runtime error captured from bg-shell processes or browser console */
--- a/src/resources/extensions/gsd/verification-evidence.ts
+++ b/src/resources/extensions/gsd/verification-evidence.ts
@ -20,6 +20,7 @@ export interface EvidenceCheckJSON {
  exitCode: number;
  durationMs: number;
  verdict: "pass" | "fail";
+  blocking: boolean;
 }

 export interface RuntimeErrorJSON {
@ -80,6 +81,7 @@ export function writeVerificationJSON(
      exitCode: check.exitCode,
      durationMs: check.durationMs,
      verdict: check.exitCode === 0 ? "pass" : "fail",
+      blocking: check.blocking,
    })),
    ...(retryAttempt !== undefined ? { retryAttempt } : {}),
    ...(maxRetries !== undefined ? { maxRetries } : {}),
--- a/src/resources/extensions/gsd/verification-gate.ts
+++ b/src/resources/extensions/gsd/verification-gate.ts
@ -112,7 +112,9 @@ const MAX_FAILURE_CONTEXT_CHARS = 10_000;
 * Returns an empty string when all checks pass or the checks array is empty.
 */
 export function formatFailureContext(result: VerificationResult): string {
-  const failures = result.checks.filter((c) => c.exitCode !== 0);
+  // Only include blocking failures in retry context — non-blocking (advisory) failures
+  // should not be injected into retry prompts to avoid noise pollution.
+  const failures = result.checks.filter((c) => c.exitCode !== 0 && c.blocking);
  if (failures.length === 0) return "";

  const blocks: string[] = [];
@ -256,6 +258,10 @@ export function runVerificationGate(options: RunVerificationGateOptions): Verifi
    };
  }

+  // Commands from preference and task-plan sources are blocking;
+  // package-json discovered commands are advisory (non-blocking).
+  const blocking = source === "preference" || source === "task-plan";
+
  const checks: VerificationCheck[] = [];

  for (const command of commands) {
@ -291,11 +297,16 @@ export function runVerificationGate(options: RunVerificationGateOptions): Verifi
      stdout: truncate(result.stdout, MAX_OUTPUT_BYTES),
      stderr,
      durationMs,
+      blocking,
    });
  }

+  // Gate passes if all blocking checks pass (non-blocking failures are advisory)
+  const blockingChecks = checks.filter(c => c.blocking);
+  const passed = blockingChecks.length === 0 || blockingChecks.every(c => c.exitCode === 0);
+
  return {
-    passed: checks.every(c => c.exitCode === 0),
+    passed,
    checks,
    discoverySource: source,
    timestamp,