sf: fail-closed verification gate + deferred-commit infrastructure

Fix 2: verification gate no longer passes when no commands are
configured. Empty-commands result now returns passed=false, skipped=true.
Updated verification-gate.test.ts; added skipped-result guard in
auto-verification.ts that warns and continues (not a hard failure).

Fix 3: split auto-verification.ts try/catch into two zones. Zone 1
(gate machinery: prefs load, task lookup, runVerificationGate,
captureRuntimeErrors, runDependencyAudit) catches → pauseAuto + return
"pause". Zone 2 (ancillary: evidence writes, UOK gate, notifications)
catches → logWarning + return "continue". Added verification-fail-
closed.test.ts with 11 structural tests.

Fix 1 (infrastructure): added stageOnly() + commitStaged() to
GitServiceImpl, added stagedPendingCommit flag to AutoSession (cleared
in reset()), marked the runTurnGitAction call site in
postUnitPreVerification with TODO(fix-1-deferral) for the final wiring.

Fix 4: timeout handler in runFinalize now captures hadStagedPending and
hadCommitted before nulling currentUnit. Clears stagedPendingCommit to
prevent orphaned deferred commits. Emits a diagnostic warning for each
case so operators know whether staged-but-uncommitted changes will be
absorbed or whether a commit landed before verification was skipped.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-04-19 19:32:47 +02:00
parent c940ebc16f
commit 315c2c49ca
9 changed files with 353 additions and 13 deletions

View file

@ -428,6 +428,14 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
unitId: s.currentUnit.id,
});
} else {
// TODO(fix-1-deferral): For execute-task units when turnAction === "commit",
// replace the full runTurnGitAction commit with a stage-only step here and
// set s.stagedPendingCommit = true. Then in postUnitPostVerification, at the
// top of the function (before DB writes), check s.stagedPendingCommit and call
// s.gitService?.commitStaged(commitMessage) to perform the deferred commit after
// verification has passed. This ensures changes are captured in the index before
// verification but the git history object is only created once verification passes.
// Fix 4 (phases.ts timeout handler) already handles the s.stagedPendingCommit flag.
const gitResult = runTurnGitAction({
basePath: s.basePath,
action: turnAction,

View file

@ -16,6 +16,7 @@ import { resolveSliceFile, resolveSlicePath, resolveMilestoneFile } from "./path
import { parseUnitId } from "./unit-id.js";
import { isDbAvailable, getTask, getSliceTasks, getMilestoneSlices, type TaskRow } from "./sf-db.js";
import { loadEffectiveSFPreferences } from "./preferences.js";
import type { SFPreferences } from "./preferences-types.js";
import { extractVerdict } from "./verdict-parser.js";
import { isClosedStatus } from "./status-guards.js";
import { loadFile } from "./files.js";
@ -28,7 +29,7 @@ import {
runDependencyAudit,
} from "./verification-gate.js";
import { writeVerificationJSON, type PostExecutionCheckJSON, type EvidenceJSON } from "./verification-evidence.js";
import { logWarning } from "./workflow-logger.js";
import { logWarning, logError } from "./workflow-logger.js";
import { runPostExecutionChecks, type PostExecutionResult } from "./post-execution-checks.js";
import type { AutoSession } from "./auto/session.js";
import type { VerificationResult as VerificationGateResult } from "./types.js";
@ -213,13 +214,22 @@ export async function runPostUnitVerification(
return "continue";
}
// ── Zone 1: Gate machinery (outer try) ──────────────────────────────────
// Failures here indicate broken infrastructure — pause for human review.
let prefs: SFPreferences | undefined;
let uokFlags: ReturnType<typeof resolveUokFlags>;
let mid: string | undefined;
let sid: string | undefined;
let tid: string | undefined;
let result: ReturnType<typeof runVerificationGate>;
try {
const effectivePrefs = loadEffectiveSFPreferences();
const prefs = effectivePrefs?.preferences;
const uokFlags = resolveUokFlags(prefs);
prefs = effectivePrefs?.preferences;
uokFlags = resolveUokFlags(prefs);
// Read task plan verify field
const { milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id);
({ milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id));
let taskPlanVerify: string | undefined;
if (mid && sid && tid) {
if (isDbAvailable()) {
@ -228,12 +238,21 @@ export async function runPostUnitVerification(
// When DB unavailable, taskPlanVerify stays undefined — gate runs without task-specific checks
}
const result = runVerificationGate({
result = runVerificationGate({
cwd: s.basePath,
preferenceCommands: prefs?.verification_commands,
taskPlanVerify,
});
// Handle skipped gate (no commands discovered) — fail-closed but not a hard failure
if (result.skipped === true) {
process.stderr.write(
"verification-gate: no commands discovered — gate skipped, not passed\n",
);
ctx.ui.notify("[verify] SKIP — no verification commands configured", "warning");
return "continue";
}
// Capture runtime errors
const runtimeErrors = await captureRuntimeErrors();
if (runtimeErrors.length > 0) {
@ -254,7 +273,16 @@ export async function runPostUnitVerification(
process.stderr.write(` [${w.severity}] ${w.name}: ${w.title}\n`);
}
}
} catch (machineryErr) {
logError("engine", `verification-gate machinery error — pausing for human review: ${(machineryErr as Error).message}`);
ctx.ui.notify("verification-gate machinery error — pausing for human review", "error");
await pauseAuto(ctx, pi);
return "pause";
}
// ── Zone 2: Ancillary post-gate work (inner try) ─────────────────────────
// Failures here are non-fatal — evidence writes, UOK gate calls, notifications, retry logic.
try {
if (uokFlags.gates) {
const gateRunner = new UokGateRunner();
gateRunner.register({
@ -587,7 +615,7 @@ export async function runPostUnitVerification(
return "pause";
}
} catch (err) {
// Gate errors are non-fatal
// Ancillary post-gate errors are non-fatal — log warning and continue
logWarning("engine", `verification-gate error: ${(err as Error).message}`);
return "continue";
}

View file

@ -1819,12 +1819,29 @@ export async function runFinalize(
if (preResultGuard.timedOut) {
// Detach session from the timed-out unit so late async completions
// cannot mutate state for the next unit (#3757).
const hadStagedPending = s.stagedPendingCommit;
const hadCommitted = s.lastGitActionStatus === "ok";
s.stagedPendingCommit = false; // prevent orphaned deferred commit
s.currentUnit = null;
clearCurrentPhase();
// Drop any logger entries from the timed-out unit so they don't bleed
// into the next iteration's drain.
drainLogs();
loopState.consecutiveFinalizeTimeouts++;
if (hadStagedPending) {
ctx.ui.notify(
"postUnitPreVerification timed out with staged-but-uncommitted changes — staged files will be included in next unit's commit.",
"warning",
);
logWarning("engine", "finalize-timeout: staged-pending-commit orphaned — will be absorbed by next unit");
} else if (hadCommitted) {
ctx.ui.notify(
"postUnitPreVerification timed out after git commit — changes are in history but verification was skipped.",
"warning",
);
logWarning("engine", "finalize-timeout: git commit completed before timeout — verification was not run");
}
debugLog("autoLoop", {
phase: "pre-verification-timeout",
iteration: ic.iteration,

View file

@ -169,6 +169,20 @@ export class AutoSession {
/** SHA of the pre-unit git checkpoint ref. Cleared on success or rollback. */
checkpointSha: string | null = null;
// ── Deferred commit (Fix 1) ──────────────────────────────────────────────
/**
* True when postUnitPreVerification has staged files but deferred the git
* commit until after verification passes (Fix 1 deferral pattern).
*
* postUnitPostVerification reads this flag and calls git.commitStaged()
* before DB writes when it is set, then clears it.
*
* The timeout handler in phases.ts clears this flag and emits a diagnostic
* warning when postUnitPreVerification times out with staged-but-uncommitted
* changes (Fix 4).
*/
stagedPendingCommit = false;
// ── Signal handler ───────────────────────────────────────────────────────
sigtermHandler: (() => void) | null = null;
@ -263,6 +277,7 @@ export class AutoSession {
this.isolationDegraded = false;
this.milestoneMergedInPhases = false;
this.checkpointSha = null;
this.stagedPendingCommit = false;
// Signal handler
this.sigtermHandler = null;

View file

@ -756,6 +756,35 @@ export class GitServiceImpl {
nativeUpdateRef(this.basePath, refPath, "HEAD");
}
/**
* Stage files without committing. Returns true if anything was staged.
*
* Used by Fix 1 deferral: call this in postUnitPreVerification so changes
* are captured before verification, then call commitStaged() in
* postUnitPostVerification once verification has passed.
*
* @param extraExclusions Additional paths to exclude from staging.
*/
stageOnly(extraExclusions: readonly string[] = []): boolean {
if (!nativeHasChanges(this.basePath)) return false;
this.smartStage(extraExclusions);
return nativeHasStagedChanges(this.basePath);
}
/**
* Commit already-staged files (no re-staging). Returns true if committed.
*
* Companion to stageOnly() for the Fix 1 deferred-commit pattern.
* Only calls nativeCommit when there are actually staged changes.
*
* @param message The commit message to use.
*/
commitStaged(message: string): boolean {
if (!nativeHasStagedChanges(this.basePath)) return false;
nativeCommit(this.basePath, message, { allowEmpty: false });
return true;
}
/**
* Run pre-merge verification check. Auto-detects test runner from project
* files, or uses custom command from prefs.pre_merge_check.

View file

@ -0,0 +1,229 @@
/**
* verification-fail-closed.test.ts Structural regression for Fix 3.
*
* Verifies that `runPostUnitVerification` in auto-verification.ts splits its
* single monolithic try/catch into two zones:
*
* Zone 1 (Gate machinery): loadEffectiveSFPreferences, getTask,
* runVerificationGate, captureRuntimeErrors, runDependencyAudit.
* if this throws: log error, notify UI "error", call pauseAuto,
* return "pause".
*
* Zone 2 (Ancillary): evidence writes, UOK gate runner calls,
* notification calls, auto-fix retry logic.
* if this throws: log warning, return "continue".
*
* Testing strategy:
* node:test (this project) does NOT enable --experimental-test-module-mocks,
* so module-level mocking is unavailable (see graph-context.test.ts note).
* We verify the structural invariants by inspecting the compiled source text
* of auto-verification.ts directly, matching key patterns that encode the
* required behavior. This is the same approach used in
* auto-wrapup-inflight-guard.test.ts.
*
* Invariants checked:
* 1. Zone 1 outer catch calls pauseAuto and returns "pause".
* 2. Zone 1 outer catch uses logError (not logWarning) and the exact
* required message string.
* 3. Zone 1 outer catch calls ctx.ui.notify with "error" level.
* 4. Zone 2 inner catch returns "continue" (non-fatal fallback).
* 5. Zone 2 inner catch uses logWarning (not logError).
* 6. The two catches are distinct machinery catch appears before
* ancillary catch in source order.
*/
import { describe, it } from "node:test";
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { join } from "node:path";
const src = readFileSync(
join(import.meta.dirname, "..", "auto-verification.ts"),
"utf-8",
);
// ─── helpers ────────────────────────────────────────────────────────────────
/** Find the nth occurrence of `needle` in `haystack`. Returns -1 if not found. */
function nthIndex(haystack: string, needle: string, n: number): number {
let idx = -1;
for (let i = 0; i < n; i++) {
idx = haystack.indexOf(needle, idx + 1);
if (idx === -1) return -1;
}
return idx;
}
// ─── Zone 1: machinery catch invariants ────────────────────────────────────
describe("Zone 1 — gate machinery catch", () => {
it("contains the required exact error message string", () => {
assert.ok(
src.includes("verification-gate machinery error — pausing for human review"),
'auto-verification.ts must contain the literal string "verification-gate machinery error — pausing for human review"',
);
});
it("uses logError (not logWarning) in the machinery catch", () => {
// Find the machinery catch block by locating "machineryErr"
const machineryIdx = src.indexOf("machineryErr");
assert.ok(machineryIdx !== -1, "machinery catch variable 'machineryErr' must be present");
// The next 400 characters of the catch block must contain logError
const catchBlock = src.slice(machineryIdx, machineryIdx + 400);
assert.ok(
catchBlock.includes("logError"),
"machinery catch must call logError (not logWarning) for infrastructure failures",
);
assert.ok(
!catchBlock.includes("logWarning"),
"machinery catch must NOT call logWarning — infrastructure failures are errors, not warnings",
);
});
it("calls ctx.ui.notify with 'error' level in machinery catch", () => {
const machineryIdx = src.indexOf("machineryErr");
assert.ok(machineryIdx !== -1);
const catchBlock = src.slice(machineryIdx, machineryIdx + 400);
assert.ok(
catchBlock.includes('ctx.ui.notify'),
"machinery catch must notify the UI",
);
assert.ok(
catchBlock.includes('"error"'),
"machinery catch UI notification must use 'error' severity level",
);
});
it("calls pauseAuto in the machinery catch", () => {
const machineryIdx = src.indexOf("machineryErr");
assert.ok(machineryIdx !== -1);
const catchBlock = src.slice(machineryIdx, machineryIdx + 400);
assert.ok(
catchBlock.includes("pauseAuto"),
"machinery catch must call pauseAuto to halt the auto-loop",
);
});
it('returns "pause" in the machinery catch', () => {
const machineryIdx = src.indexOf("machineryErr");
assert.ok(machineryIdx !== -1);
const catchBlock = src.slice(machineryIdx, machineryIdx + 400);
assert.ok(
catchBlock.includes('return "pause"'),
'machinery catch must return "pause" so the caller halts the loop',
);
});
});
// ─── Zone 2: ancillary catch invariants ────────────────────────────────────
describe("Zone 2 — ancillary post-gate catch", () => {
it("the ancillary catch uses logWarning (non-fatal path)", () => {
// The ancillary catch uses 'err' as the binding (not 'machineryErr')
// and must appear AFTER the machinery catch in source order.
const machineryIdx = src.indexOf("machineryErr");
assert.ok(machineryIdx !== -1, "machinery catch must be present");
// Find the second catch block: look for "} catch (err)" after machineryIdx
const ancillaryCatchIdx = src.indexOf("} catch (err)", machineryIdx);
assert.ok(
ancillaryCatchIdx !== -1,
"ancillary catch '} catch (err)' must appear after the machinery catch",
);
const catchBlock = src.slice(ancillaryCatchIdx, ancillaryCatchIdx + 300);
assert.ok(
catchBlock.includes("logWarning"),
"ancillary catch must use logWarning (non-fatal)",
);
});
it('returns "continue" in the ancillary catch', () => {
const machineryIdx = src.indexOf("machineryErr");
assert.ok(machineryIdx !== -1);
const ancillaryCatchIdx = src.indexOf("} catch (err)", machineryIdx);
assert.ok(ancillaryCatchIdx !== -1);
const catchBlock = src.slice(ancillaryCatchIdx, ancillaryCatchIdx + 300);
assert.ok(
catchBlock.includes('return "continue"'),
'ancillary catch must return "continue" — post-gate errors are non-fatal',
);
});
it("ancillary catch does NOT call pauseAuto", () => {
const machineryIdx = src.indexOf("machineryErr");
assert.ok(machineryIdx !== -1);
const ancillaryCatchIdx = src.indexOf("} catch (err)", machineryIdx);
assert.ok(ancillaryCatchIdx !== -1);
const catchBlock = src.slice(ancillaryCatchIdx, ancillaryCatchIdx + 300);
assert.ok(
!catchBlock.includes("pauseAuto"),
"ancillary catch must NOT call pauseAuto — post-gate errors do not halt the agent",
);
});
});
// ─── Structural ordering invariants ─────────────────────────────────────────
describe("Two-zone structural ordering", () => {
it("machinery catch appears before ancillary catch in source", () => {
const machineryIdx = src.indexOf("machineryErr");
const ancillaryCatchIdx = src.indexOf("} catch (err)", machineryIdx + 1);
assert.ok(machineryIdx !== -1, "'machineryErr' binding must exist");
assert.ok(ancillaryCatchIdx !== -1, "'} catch (err)' must exist after machinery catch");
assert.ok(
machineryIdx < ancillaryCatchIdx,
"Zone 1 machinery catch must appear before Zone 2 ancillary catch",
);
});
it("Zone 1 critical calls appear before the machinery catch", () => {
const machineryIdx = src.indexOf("machineryErr");
assert.ok(machineryIdx !== -1);
// All Zone 1 calls must appear between function start and machineryErr
const zone1Section = src.slice(
src.indexOf("export async function runPostUnitVerification("),
machineryIdx,
);
assert.ok(
zone1Section.includes("loadEffectiveSFPreferences"),
"loadEffectiveSFPreferences must be in Zone 1",
);
assert.ok(
zone1Section.includes("runVerificationGate"),
"runVerificationGate must be in Zone 1",
);
assert.ok(
zone1Section.includes("captureRuntimeErrors"),
"captureRuntimeErrors must be in Zone 1",
);
assert.ok(
zone1Section.includes("runDependencyAudit"),
"runDependencyAudit must be in Zone 1",
);
});
it("logError is imported from workflow-logger", () => {
assert.ok(
src.includes('logError') && src.includes('workflow-logger'),
"logError must be imported from workflow-logger",
);
// Verify the import line includes logError
const importLine = src.slice(
src.indexOf('import {'),
src.indexOf('} from "./workflow-logger'),
);
assert.ok(
importLine.includes("logError"),
"logError must appear in the workflow-logger import",
);
});
});

View file

@ -8,8 +8,8 @@
* 4. First-non-empty-wins precedence
* 5. All commands pass gate passes
* 6. One command fails gate fails with exit code + stderr
* 7. Missing package.json 0 checks pass
* 8. Empty scripts 0 checks pass
* 7. Missing package.json 0 checks skipped (passed:false, skipped:true)
* 8. Empty scripts 0 checks skipped (passed:false, skipped:true)
* 9. Preference validation for verification keys
* 10. spawnSync error (command not found) failure with exit code 127
* 11. Dependency audit git diff detection, npm audit parsing, graceful failures
@ -251,15 +251,27 @@ describe("verification-gate: execution", () => {
assert.ok(result.checks[1].stderr.includes("err"));
});
test("no commands discovered → gate passes with 0 checks", () => {
test("no commands discovered → gate fails with skipped:true and 0 checks", () => {
const result = runVerificationGate({
cwd: tmp,
});
assert.equal(result.passed, true);
assert.equal(result.passed, false);
assert.equal(result.skipped, true);
assert.equal(result.checks.length, 0);
assert.equal(result.discoverySource, "none");
});
test("zero commands returns skipped:true and passed:false", () => {
// Provide empty preference commands so discovery yields nothing
const result = runVerificationGate({
cwd: tmp,
preferenceCommands: [],
});
assert.equal(result.passed, false, "passed must be false when no commands are discovered");
assert.equal(result.skipped, true, "skipped must be true when no commands are discovered");
assert.equal(result.checks.length, 0, "checks array must be empty");
});
test("command not found → exit code 127", () => {
const result = runVerificationGate({
cwd: tmp,

View file

@ -102,7 +102,8 @@ export interface AuditWarning {
/** Aggregate result from the verification gate */
export interface VerificationResult {
passed: boolean; // true if all checks passed (or no checks discovered)
passed: boolean; // true if all checks passed
skipped?: boolean; // true when no commands were discovered (fail-closed: passed=false, skipped=true)
checks: VerificationCheck[]; // per-command results
discoverySource: "preference" | "task-plan" | "package-json" | "none";
timestamp: number; // Date.now() at gate start

View file

@ -233,7 +233,7 @@ export interface RunVerificationGateOptions {
* and return a structured result.
*
* - All commands run sequentially regardless of individual pass/fail.
* - `passed` is true when every command exits 0 (or no commands are discovered).
* - `passed` is true when every command exits 0; `skipped` is true when no commands are discovered.
* - stdout/stderr per command are truncated to 10 KB.
*/
export function runVerificationGate(options: RunVerificationGateOptions): VerificationResult {
@ -247,7 +247,8 @@ export function runVerificationGate(options: RunVerificationGateOptions): Verifi
if (commands.length === 0) {
return {
passed: true,
passed: false,
skipped: true,
checks: [],
discoverySource: source,
timestamp,