sf: fail-closed verification gate + deferred-commit infrastructure

Fix 2: verification gate no longer passes when no commands are configured. Empty-commands result now returns passed=false, skipped=true. Updated verification-gate.test.ts; added skipped-result guard in auto-verification.ts that warns and continues (not a hard failure). Fix 3: split auto-verification.ts try/catch into two zones. Zone 1 (gate machinery: prefs load, task lookup, runVerificationGate, captureRuntimeErrors, runDependencyAudit) catches → pauseAuto + return "pause". Zone 2 (ancillary: evidence writes, UOK gate, notifications) catches → logWarning + return "continue". Added verification-fail- closed.test.ts with 11 structural tests. Fix 1 (infrastructure): added stageOnly() + commitStaged() to GitServiceImpl, added stagedPendingCommit flag to AutoSession (cleared in reset()), marked the runTurnGitAction call site in postUnitPreVerification with TODO(fix-1-deferral) for the final wiring. Fix 4: timeout handler in runFinalize now captures hadStagedPending and hadCommitted before nulling currentUnit. Clears stagedPendingCommit to prevent orphaned deferred commits. Emits a diagnostic warning for each case so operators know whether staged-but-uncommitted changes will be absorbed or whether a commit landed before verification was skipped. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-19 19:32:47 +02:00 · 2026-04-19 19:32:47 +02:00 · 315c2c49ca
commit 315c2c49ca
parent c940ebc16f
9 changed files with 353 additions and 13 deletions
--- a/src/resources/extensions/sf/auto-post-unit.ts
+++ b/src/resources/extensions/sf/auto-post-unit.ts
@ -428,6 +428,14 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
          unitId: s.currentUnit.id,
        });
      } else {
+        // TODO(fix-1-deferral): For execute-task units when turnAction === "commit",
+        // replace the full runTurnGitAction commit with a stage-only step here and
+        // set s.stagedPendingCommit = true.  Then in postUnitPostVerification, at the
+        // top of the function (before DB writes), check s.stagedPendingCommit and call
+        // s.gitService?.commitStaged(commitMessage) to perform the deferred commit after
+        // verification has passed.  This ensures changes are captured in the index before
+        // verification but the git history object is only created once verification passes.
+        // Fix 4 (phases.ts timeout handler) already handles the s.stagedPendingCommit flag.
        const gitResult = runTurnGitAction({
          basePath: s.basePath,
          action: turnAction,
--- a/src/resources/extensions/sf/auto-verification.ts
+++ b/src/resources/extensions/sf/auto-verification.ts
@ -16,6 +16,7 @@ import { resolveSliceFile, resolveSlicePath, resolveMilestoneFile } from "./path
 import { parseUnitId } from "./unit-id.js";
 import { isDbAvailable, getTask, getSliceTasks, getMilestoneSlices, type TaskRow } from "./sf-db.js";
 import { loadEffectiveSFPreferences } from "./preferences.js";
+import type { SFPreferences } from "./preferences-types.js";
 import { extractVerdict } from "./verdict-parser.js";
 import { isClosedStatus } from "./status-guards.js";
 import { loadFile } from "./files.js";
@ -28,7 +29,7 @@ import {
  runDependencyAudit,
 } from "./verification-gate.js";
 import { writeVerificationJSON, type PostExecutionCheckJSON, type EvidenceJSON } from "./verification-evidence.js";
-import { logWarning } from "./workflow-logger.js";
+import { logWarning, logError } from "./workflow-logger.js";
 import { runPostExecutionChecks, type PostExecutionResult } from "./post-execution-checks.js";
 import type { AutoSession } from "./auto/session.js";
 import type { VerificationResult as VerificationGateResult } from "./types.js";
@ -213,13 +214,22 @@ export async function runPostUnitVerification(
    return "continue";
  }

+  // ── Zone 1: Gate machinery (outer try) ──────────────────────────────────
+  // Failures here indicate broken infrastructure — pause for human review.
+  let prefs: SFPreferences | undefined;
+  let uokFlags: ReturnType<typeof resolveUokFlags>;
+  let mid: string | undefined;
+  let sid: string | undefined;
+  let tid: string | undefined;
+  let result: ReturnType<typeof runVerificationGate>;
+
  try {
    const effectivePrefs = loadEffectiveSFPreferences();
-    const prefs = effectivePrefs?.preferences;
-    const uokFlags = resolveUokFlags(prefs);
+    prefs = effectivePrefs?.preferences;
+    uokFlags = resolveUokFlags(prefs);

    // Read task plan verify field
-    const { milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id);
+    ({ milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id));
    let taskPlanVerify: string | undefined;
    if (mid && sid && tid) {
      if (isDbAvailable()) {
@ -228,12 +238,21 @@ export async function runPostUnitVerification(
      // When DB unavailable, taskPlanVerify stays undefined — gate runs without task-specific checks
    }

-    const result = runVerificationGate({
+    result = runVerificationGate({
      cwd: s.basePath,
      preferenceCommands: prefs?.verification_commands,
      taskPlanVerify,
    });

+    // Handle skipped gate (no commands discovered) — fail-closed but not a hard failure
+    if (result.skipped === true) {
+      process.stderr.write(
+        "verification-gate: no commands discovered — gate skipped, not passed\n",
+      );
+      ctx.ui.notify("[verify] SKIP — no verification commands configured", "warning");
+      return "continue";
+    }
+
    // Capture runtime errors
    const runtimeErrors = await captureRuntimeErrors();
    if (runtimeErrors.length > 0) {
@ -254,7 +273,16 @@ export async function runPostUnitVerification(
        process.stderr.write(`  [${w.severity}] ${w.name}: ${w.title}\n`);
      }
    }
+  } catch (machineryErr) {
+    logError("engine", `verification-gate machinery error — pausing for human review: ${(machineryErr as Error).message}`);
+    ctx.ui.notify("verification-gate machinery error — pausing for human review", "error");
+    await pauseAuto(ctx, pi);
+    return "pause";
+  }

+  // ── Zone 2: Ancillary post-gate work (inner try) ─────────────────────────
+  // Failures here are non-fatal — evidence writes, UOK gate calls, notifications, retry logic.
+  try {
    if (uokFlags.gates) {
      const gateRunner = new UokGateRunner();
      gateRunner.register({
@ -587,7 +615,7 @@ export async function runPostUnitVerification(
      return "pause";
    }
  } catch (err) {
-    // Gate errors are non-fatal
+    // Ancillary post-gate errors are non-fatal — log warning and continue
    logWarning("engine", `verification-gate error: ${(err as Error).message}`);
    return "continue";
  }
--- a/src/resources/extensions/sf/auto/phases.ts
+++ b/src/resources/extensions/sf/auto/phases.ts
@ -1819,12 +1819,29 @@ export async function runFinalize(
  if (preResultGuard.timedOut) {
    // Detach session from the timed-out unit so late async completions
    // cannot mutate state for the next unit (#3757).
+    const hadStagedPending = s.stagedPendingCommit;
+    const hadCommitted = s.lastGitActionStatus === "ok";
+    s.stagedPendingCommit = false; // prevent orphaned deferred commit
    s.currentUnit = null;
    clearCurrentPhase();
    // Drop any logger entries from the timed-out unit so they don't bleed
    // into the next iteration's drain.
    drainLogs();
    loopState.consecutiveFinalizeTimeouts++;
+
+    if (hadStagedPending) {
+      ctx.ui.notify(
+        "postUnitPreVerification timed out with staged-but-uncommitted changes — staged files will be included in next unit's commit.",
+        "warning",
+      );
+      logWarning("engine", "finalize-timeout: staged-pending-commit orphaned — will be absorbed by next unit");
+    } else if (hadCommitted) {
+      ctx.ui.notify(
+        "postUnitPreVerification timed out after git commit — changes are in history but verification was skipped.",
+        "warning",
+      );
+      logWarning("engine", "finalize-timeout: git commit completed before timeout — verification was not run");
+    }
    debugLog("autoLoop", {
      phase: "pre-verification-timeout",
      iteration: ic.iteration,
--- a/src/resources/extensions/sf/auto/session.ts
+++ b/src/resources/extensions/sf/auto/session.ts
@ -169,6 +169,20 @@ export class AutoSession {
  /** SHA of the pre-unit git checkpoint ref. Cleared on success or rollback. */
  checkpointSha: string | null = null;

+  // ── Deferred commit (Fix 1) ──────────────────────────────────────────────
+  /**
+   * True when postUnitPreVerification has staged files but deferred the git
+   * commit until after verification passes (Fix 1 deferral pattern).
+   *
+   * postUnitPostVerification reads this flag and calls git.commitStaged()
+   * before DB writes when it is set, then clears it.
+   *
+   * The timeout handler in phases.ts clears this flag and emits a diagnostic
+   * warning when postUnitPreVerification times out with staged-but-uncommitted
+   * changes (Fix 4).
+   */
+  stagedPendingCommit = false;
+
  // ── Signal handler ───────────────────────────────────────────────────────
  sigtermHandler: (() => void) | null = null;

@ -263,6 +277,7 @@ export class AutoSession {
    this.isolationDegraded = false;
    this.milestoneMergedInPhases = false;
    this.checkpointSha = null;
+    this.stagedPendingCommit = false;

    // Signal handler
    this.sigtermHandler = null;
--- a/src/resources/extensions/sf/git-service.ts
+++ b/src/resources/extensions/sf/git-service.ts
@ -756,6 +756,35 @@ export class GitServiceImpl {
    nativeUpdateRef(this.basePath, refPath, "HEAD");
  }

+  /**
+   * Stage files without committing. Returns true if anything was staged.
+   *
+   * Used by Fix 1 deferral: call this in postUnitPreVerification so changes
+   * are captured before verification, then call commitStaged() in
+   * postUnitPostVerification once verification has passed.
+   *
+   * @param extraExclusions Additional paths to exclude from staging.
+   */
+  stageOnly(extraExclusions: readonly string[] = []): boolean {
+    if (!nativeHasChanges(this.basePath)) return false;
+    this.smartStage(extraExclusions);
+    return nativeHasStagedChanges(this.basePath);
+  }
+
+  /**
+   * Commit already-staged files (no re-staging). Returns true if committed.
+   *
+   * Companion to stageOnly() for the Fix 1 deferred-commit pattern.
+   * Only calls nativeCommit when there are actually staged changes.
+   *
+   * @param message The commit message to use.
+   */
+  commitStaged(message: string): boolean {
+    if (!nativeHasStagedChanges(this.basePath)) return false;
+    nativeCommit(this.basePath, message, { allowEmpty: false });
+    return true;
+  }
+
  /**
   * Run pre-merge verification check. Auto-detects test runner from project
   * files, or uses custom command from prefs.pre_merge_check.
--- a/src/resources/extensions/sf/tests/verification-fail-closed.test.ts
+++ b/src/resources/extensions/sf/tests/verification-fail-closed.test.ts
@ -0,0 +1,229 @@
+/**
+ * verification-fail-closed.test.ts — Structural regression for Fix 3.
+ *
+ * Verifies that `runPostUnitVerification` in auto-verification.ts splits its
+ * single monolithic try/catch into two zones:
+ *
+ *   Zone 1 (Gate machinery): loadEffectiveSFPreferences, getTask,
+ *     runVerificationGate, captureRuntimeErrors, runDependencyAudit.
+ *     → if this throws: log error, notify UI "error", call pauseAuto,
+ *       return "pause".
+ *
+ *   Zone 2 (Ancillary): evidence writes, UOK gate runner calls,
+ *     notification calls, auto-fix retry logic.
+ *     → if this throws: log warning, return "continue".
+ *
+ * Testing strategy:
+ *   node:test (this project) does NOT enable --experimental-test-module-mocks,
+ *   so module-level mocking is unavailable (see graph-context.test.ts note).
+ *   We verify the structural invariants by inspecting the compiled source text
+ *   of auto-verification.ts directly, matching key patterns that encode the
+ *   required behavior. This is the same approach used in
+ *   auto-wrapup-inflight-guard.test.ts.
+ *
+ * Invariants checked:
+ *   1. Zone 1 outer catch calls pauseAuto and returns "pause".
+ *   2. Zone 1 outer catch uses logError (not logWarning) and the exact
+ *      required message string.
+ *   3. Zone 1 outer catch calls ctx.ui.notify with "error" level.
+ *   4. Zone 2 inner catch returns "continue" (non-fatal fallback).
+ *   5. Zone 2 inner catch uses logWarning (not logError).
+ *   6. The two catches are distinct — machinery catch appears before
+ *      ancillary catch in source order.
+ */
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+const src = readFileSync(
+  join(import.meta.dirname, "..", "auto-verification.ts"),
+  "utf-8",
+);
+
+// ─── helpers ────────────────────────────────────────────────────────────────
+
+/** Find the nth occurrence of `needle` in `haystack`. Returns -1 if not found. */
+function nthIndex(haystack: string, needle: string, n: number): number {
+  let idx = -1;
+  for (let i = 0; i < n; i++) {
+    idx = haystack.indexOf(needle, idx + 1);
+    if (idx === -1) return -1;
+  }
+  return idx;
+}
+
+// ─── Zone 1: machinery catch invariants ────────────────────────────────────
+
+describe("Zone 1 — gate machinery catch", () => {
+  it("contains the required exact error message string", () => {
+    assert.ok(
+      src.includes("verification-gate machinery error — pausing for human review"),
+      'auto-verification.ts must contain the literal string "verification-gate machinery error — pausing for human review"',
+    );
+  });
+
+  it("uses logError (not logWarning) in the machinery catch", () => {
+    // Find the machinery catch block by locating "machineryErr"
+    const machineryIdx = src.indexOf("machineryErr");
+    assert.ok(machineryIdx !== -1, "machinery catch variable 'machineryErr' must be present");
+
+    // The next 400 characters of the catch block must contain logError
+    const catchBlock = src.slice(machineryIdx, machineryIdx + 400);
+    assert.ok(
+      catchBlock.includes("logError"),
+      "machinery catch must call logError (not logWarning) for infrastructure failures",
+    );
+    assert.ok(
+      !catchBlock.includes("logWarning"),
+      "machinery catch must NOT call logWarning — infrastructure failures are errors, not warnings",
+    );
+  });
+
+  it("calls ctx.ui.notify with 'error' level in machinery catch", () => {
+    const machineryIdx = src.indexOf("machineryErr");
+    assert.ok(machineryIdx !== -1);
+    const catchBlock = src.slice(machineryIdx, machineryIdx + 400);
+
+    assert.ok(
+      catchBlock.includes('ctx.ui.notify'),
+      "machinery catch must notify the UI",
+    );
+    assert.ok(
+      catchBlock.includes('"error"'),
+      "machinery catch UI notification must use 'error' severity level",
+    );
+  });
+
+  it("calls pauseAuto in the machinery catch", () => {
+    const machineryIdx = src.indexOf("machineryErr");
+    assert.ok(machineryIdx !== -1);
+    const catchBlock = src.slice(machineryIdx, machineryIdx + 400);
+
+    assert.ok(
+      catchBlock.includes("pauseAuto"),
+      "machinery catch must call pauseAuto to halt the auto-loop",
+    );
+  });
+
+  it('returns "pause" in the machinery catch', () => {
+    const machineryIdx = src.indexOf("machineryErr");
+    assert.ok(machineryIdx !== -1);
+    const catchBlock = src.slice(machineryIdx, machineryIdx + 400);
+
+    assert.ok(
+      catchBlock.includes('return "pause"'),
+      'machinery catch must return "pause" so the caller halts the loop',
+    );
+  });
+});
+
+// ─── Zone 2: ancillary catch invariants ────────────────────────────────────
+
+describe("Zone 2 — ancillary post-gate catch", () => {
+  it("the ancillary catch uses logWarning (non-fatal path)", () => {
+    // The ancillary catch uses 'err' as the binding (not 'machineryErr')
+    // and must appear AFTER the machinery catch in source order.
+    const machineryIdx = src.indexOf("machineryErr");
+    assert.ok(machineryIdx !== -1, "machinery catch must be present");
+
+    // Find the second catch block: look for "} catch (err)" after machineryIdx
+    const ancillaryCatchIdx = src.indexOf("} catch (err)", machineryIdx);
+    assert.ok(
+      ancillaryCatchIdx !== -1,
+      "ancillary catch '} catch (err)' must appear after the machinery catch",
+    );
+
+    const catchBlock = src.slice(ancillaryCatchIdx, ancillaryCatchIdx + 300);
+    assert.ok(
+      catchBlock.includes("logWarning"),
+      "ancillary catch must use logWarning (non-fatal)",
+    );
+  });
+
+  it('returns "continue" in the ancillary catch', () => {
+    const machineryIdx = src.indexOf("machineryErr");
+    assert.ok(machineryIdx !== -1);
+    const ancillaryCatchIdx = src.indexOf("} catch (err)", machineryIdx);
+    assert.ok(ancillaryCatchIdx !== -1);
+
+    const catchBlock = src.slice(ancillaryCatchIdx, ancillaryCatchIdx + 300);
+    assert.ok(
+      catchBlock.includes('return "continue"'),
+      'ancillary catch must return "continue" — post-gate errors are non-fatal',
+    );
+  });
+
+  it("ancillary catch does NOT call pauseAuto", () => {
+    const machineryIdx = src.indexOf("machineryErr");
+    assert.ok(machineryIdx !== -1);
+    const ancillaryCatchIdx = src.indexOf("} catch (err)", machineryIdx);
+    assert.ok(ancillaryCatchIdx !== -1);
+
+    const catchBlock = src.slice(ancillaryCatchIdx, ancillaryCatchIdx + 300);
+    assert.ok(
+      !catchBlock.includes("pauseAuto"),
+      "ancillary catch must NOT call pauseAuto — post-gate errors do not halt the agent",
+    );
+  });
+});
+
+// ─── Structural ordering invariants ─────────────────────────────────────────
+
+describe("Two-zone structural ordering", () => {
+  it("machinery catch appears before ancillary catch in source", () => {
+    const machineryIdx = src.indexOf("machineryErr");
+    const ancillaryCatchIdx = src.indexOf("} catch (err)", machineryIdx + 1);
+    assert.ok(machineryIdx !== -1, "'machineryErr' binding must exist");
+    assert.ok(ancillaryCatchIdx !== -1, "'} catch (err)' must exist after machinery catch");
+    assert.ok(
+      machineryIdx < ancillaryCatchIdx,
+      "Zone 1 machinery catch must appear before Zone 2 ancillary catch",
+    );
+  });
+
+  it("Zone 1 critical calls appear before the machinery catch", () => {
+    const machineryIdx = src.indexOf("machineryErr");
+    assert.ok(machineryIdx !== -1);
+
+    // All Zone 1 calls must appear between function start and machineryErr
+    const zone1Section = src.slice(
+      src.indexOf("export async function runPostUnitVerification("),
+      machineryIdx,
+    );
+
+    assert.ok(
+      zone1Section.includes("loadEffectiveSFPreferences"),
+      "loadEffectiveSFPreferences must be in Zone 1",
+    );
+    assert.ok(
+      zone1Section.includes("runVerificationGate"),
+      "runVerificationGate must be in Zone 1",
+    );
+    assert.ok(
+      zone1Section.includes("captureRuntimeErrors"),
+      "captureRuntimeErrors must be in Zone 1",
+    );
+    assert.ok(
+      zone1Section.includes("runDependencyAudit"),
+      "runDependencyAudit must be in Zone 1",
+    );
+  });
+
+  it("logError is imported from workflow-logger", () => {
+    assert.ok(
+      src.includes('logError') && src.includes('workflow-logger'),
+      "logError must be imported from workflow-logger",
+    );
+    // Verify the import line includes logError
+    const importLine = src.slice(
+      src.indexOf('import {'),
+      src.indexOf('} from "./workflow-logger'),
+    );
+    assert.ok(
+      importLine.includes("logError"),
+      "logError must appear in the workflow-logger import",
+    );
+  });
+});
--- a/src/resources/extensions/sf/tests/verification-gate.test.ts
+++ b/src/resources/extensions/sf/tests/verification-gate.test.ts
@ -8,8 +8,8 @@
 *   4. First-non-empty-wins precedence
 *   5. All commands pass → gate passes
 *   6. One command fails → gate fails with exit code + stderr
- *   7. Missing package.json → 0 checks → pass
- *   8. Empty scripts → 0 checks → pass
+ *   7. Missing package.json → 0 checks → skipped (passed:false, skipped:true)
+ *   8. Empty scripts → 0 checks → skipped (passed:false, skipped:true)
 *   9. Preference validation for verification keys
 *  10. spawnSync error (command not found) → failure with exit code 127
 *  11. Dependency audit — git diff detection, npm audit parsing, graceful failures
@ -251,15 +251,27 @@ describe("verification-gate: execution", () => {
    assert.ok(result.checks[1].stderr.includes("err"));
  });

-  test("no commands discovered → gate passes with 0 checks", () => {
+  test("no commands discovered → gate fails with skipped:true and 0 checks", () => {
    const result = runVerificationGate({
      cwd: tmp,
    });
-    assert.equal(result.passed, true);
+    assert.equal(result.passed, false);
+    assert.equal(result.skipped, true);
    assert.equal(result.checks.length, 0);
    assert.equal(result.discoverySource, "none");
  });

+  test("zero commands returns skipped:true and passed:false", () => {
+    // Provide empty preference commands so discovery yields nothing
+    const result = runVerificationGate({
+      cwd: tmp,
+      preferenceCommands: [],
+    });
+    assert.equal(result.passed, false, "passed must be false when no commands are discovered");
+    assert.equal(result.skipped, true, "skipped must be true when no commands are discovered");
+    assert.equal(result.checks.length, 0, "checks array must be empty");
+  });
+
  test("command not found → exit code 127", () => {
    const result = runVerificationGate({
      cwd: tmp,
--- a/src/resources/extensions/sf/types.ts
+++ b/src/resources/extensions/sf/types.ts
@ -102,7 +102,8 @@ export interface AuditWarning {

 /** Aggregate result from the verification gate */
 export interface VerificationResult {
-  passed: boolean; // true if all checks passed (or no checks discovered)
+  passed: boolean; // true if all checks passed
+  skipped?: boolean; // true when no commands were discovered (fail-closed: passed=false, skipped=true)
  checks: VerificationCheck[]; // per-command results
  discoverySource: "preference" | "task-plan" | "package-json" | "none";
  timestamp: number; // Date.now() at gate start
--- a/src/resources/extensions/sf/verification-gate.ts
+++ b/src/resources/extensions/sf/verification-gate.ts
@ -233,7 +233,7 @@ export interface RunVerificationGateOptions {
 * and return a structured result.
 *
 * - All commands run sequentially regardless of individual pass/fail.
- * - `passed` is true when every command exits 0 (or no commands are discovered).
+ * - `passed` is true when every command exits 0; `skipped` is true when no commands are discovered.
 * - stdout/stderr per command are truncated to 10 KB.
 */
 export function runVerificationGate(options: RunVerificationGateOptions): VerificationResult {
@ -247,7 +247,8 @@ export function runVerificationGate(options: RunVerificationGateOptions): Verifi

  if (commands.length === 0) {
    return {
-      passed: true,
+      passed: false,
+      skipped: true,
      checks: [],
      discoverySource: source,
      timestamp,