From 315c2c49cafeec47a7018ade5196c6ed681c07fc Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sun, 19 Apr 2026 19:32:47 +0200 Subject: [PATCH] sf: fail-closed verification gate + deferred-commit infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix 2: verification gate no longer passes when no commands are configured. Empty-commands result now returns passed=false, skipped=true. Updated verification-gate.test.ts; added skipped-result guard in auto-verification.ts that warns and continues (not a hard failure). Fix 3: split auto-verification.ts try/catch into two zones. Zone 1 (gate machinery: prefs load, task lookup, runVerificationGate, captureRuntimeErrors, runDependencyAudit) catches → pauseAuto + return "pause". Zone 2 (ancillary: evidence writes, UOK gate, notifications) catches → logWarning + return "continue". Added verification-fail- closed.test.ts with 11 structural tests. Fix 1 (infrastructure): added stageOnly() + commitStaged() to GitServiceImpl, added stagedPendingCommit flag to AutoSession (cleared in reset()), marked the runTurnGitAction call site in postUnitPreVerification with TODO(fix-1-deferral) for the final wiring. Fix 4: timeout handler in runFinalize now captures hadStagedPending and hadCommitted before nulling currentUnit. Clears stagedPendingCommit to prevent orphaned deferred commits. Emits a diagnostic warning for each case so operators know whether staged-but-uncommitted changes will be absorbed or whether a commit landed before verification was skipped. Co-Authored-By: Claude Sonnet 4.6 --- src/resources/extensions/sf/auto-post-unit.ts | 8 + .../extensions/sf/auto-verification.ts | 40 ++- src/resources/extensions/sf/auto/phases.ts | 17 ++ src/resources/extensions/sf/auto/session.ts | 15 ++ src/resources/extensions/sf/git-service.ts | 29 +++ .../sf/tests/verification-fail-closed.test.ts | 229 ++++++++++++++++++ .../sf/tests/verification-gate.test.ts | 20 +- src/resources/extensions/sf/types.ts | 3 +- .../extensions/sf/verification-gate.ts | 5 +- 9 files changed, 353 insertions(+), 13 deletions(-) create mode 100644 src/resources/extensions/sf/tests/verification-fail-closed.test.ts diff --git a/src/resources/extensions/sf/auto-post-unit.ts b/src/resources/extensions/sf/auto-post-unit.ts index 4de9cd4e0..bedd2aef1 100644 --- a/src/resources/extensions/sf/auto-post-unit.ts +++ b/src/resources/extensions/sf/auto-post-unit.ts @@ -428,6 +428,14 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV unitId: s.currentUnit.id, }); } else { + // TODO(fix-1-deferral): For execute-task units when turnAction === "commit", + // replace the full runTurnGitAction commit with a stage-only step here and + // set s.stagedPendingCommit = true. Then in postUnitPostVerification, at the + // top of the function (before DB writes), check s.stagedPendingCommit and call + // s.gitService?.commitStaged(commitMessage) to perform the deferred commit after + // verification has passed. This ensures changes are captured in the index before + // verification but the git history object is only created once verification passes. + // Fix 4 (phases.ts timeout handler) already handles the s.stagedPendingCommit flag. const gitResult = runTurnGitAction({ basePath: s.basePath, action: turnAction, diff --git a/src/resources/extensions/sf/auto-verification.ts b/src/resources/extensions/sf/auto-verification.ts index 782f126eb..1d71cc734 100644 --- a/src/resources/extensions/sf/auto-verification.ts +++ b/src/resources/extensions/sf/auto-verification.ts @@ -16,6 +16,7 @@ import { resolveSliceFile, resolveSlicePath, resolveMilestoneFile } from "./path import { parseUnitId } from "./unit-id.js"; import { isDbAvailable, getTask, getSliceTasks, getMilestoneSlices, type TaskRow } from "./sf-db.js"; import { loadEffectiveSFPreferences } from "./preferences.js"; +import type { SFPreferences } from "./preferences-types.js"; import { extractVerdict } from "./verdict-parser.js"; import { isClosedStatus } from "./status-guards.js"; import { loadFile } from "./files.js"; @@ -28,7 +29,7 @@ import { runDependencyAudit, } from "./verification-gate.js"; import { writeVerificationJSON, type PostExecutionCheckJSON, type EvidenceJSON } from "./verification-evidence.js"; -import { logWarning } from "./workflow-logger.js"; +import { logWarning, logError } from "./workflow-logger.js"; import { runPostExecutionChecks, type PostExecutionResult } from "./post-execution-checks.js"; import type { AutoSession } from "./auto/session.js"; import type { VerificationResult as VerificationGateResult } from "./types.js"; @@ -213,13 +214,22 @@ export async function runPostUnitVerification( return "continue"; } + // ── Zone 1: Gate machinery (outer try) ────────────────────────────────── + // Failures here indicate broken infrastructure — pause for human review. + let prefs: SFPreferences | undefined; + let uokFlags: ReturnType; + let mid: string | undefined; + let sid: string | undefined; + let tid: string | undefined; + let result: ReturnType; + try { const effectivePrefs = loadEffectiveSFPreferences(); - const prefs = effectivePrefs?.preferences; - const uokFlags = resolveUokFlags(prefs); + prefs = effectivePrefs?.preferences; + uokFlags = resolveUokFlags(prefs); // Read task plan verify field - const { milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id); + ({ milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id)); let taskPlanVerify: string | undefined; if (mid && sid && tid) { if (isDbAvailable()) { @@ -228,12 +238,21 @@ export async function runPostUnitVerification( // When DB unavailable, taskPlanVerify stays undefined — gate runs without task-specific checks } - const result = runVerificationGate({ + result = runVerificationGate({ cwd: s.basePath, preferenceCommands: prefs?.verification_commands, taskPlanVerify, }); + // Handle skipped gate (no commands discovered) — fail-closed but not a hard failure + if (result.skipped === true) { + process.stderr.write( + "verification-gate: no commands discovered — gate skipped, not passed\n", + ); + ctx.ui.notify("[verify] SKIP — no verification commands configured", "warning"); + return "continue"; + } + // Capture runtime errors const runtimeErrors = await captureRuntimeErrors(); if (runtimeErrors.length > 0) { @@ -254,7 +273,16 @@ export async function runPostUnitVerification( process.stderr.write(` [${w.severity}] ${w.name}: ${w.title}\n`); } } + } catch (machineryErr) { + logError("engine", `verification-gate machinery error — pausing for human review: ${(machineryErr as Error).message}`); + ctx.ui.notify("verification-gate machinery error — pausing for human review", "error"); + await pauseAuto(ctx, pi); + return "pause"; + } + // ── Zone 2: Ancillary post-gate work (inner try) ───────────────────────── + // Failures here are non-fatal — evidence writes, UOK gate calls, notifications, retry logic. + try { if (uokFlags.gates) { const gateRunner = new UokGateRunner(); gateRunner.register({ @@ -587,7 +615,7 @@ export async function runPostUnitVerification( return "pause"; } } catch (err) { - // Gate errors are non-fatal + // Ancillary post-gate errors are non-fatal — log warning and continue logWarning("engine", `verification-gate error: ${(err as Error).message}`); return "continue"; } diff --git a/src/resources/extensions/sf/auto/phases.ts b/src/resources/extensions/sf/auto/phases.ts index ec24a0342..4d9a5628d 100644 --- a/src/resources/extensions/sf/auto/phases.ts +++ b/src/resources/extensions/sf/auto/phases.ts @@ -1819,12 +1819,29 @@ export async function runFinalize( if (preResultGuard.timedOut) { // Detach session from the timed-out unit so late async completions // cannot mutate state for the next unit (#3757). + const hadStagedPending = s.stagedPendingCommit; + const hadCommitted = s.lastGitActionStatus === "ok"; + s.stagedPendingCommit = false; // prevent orphaned deferred commit s.currentUnit = null; clearCurrentPhase(); // Drop any logger entries from the timed-out unit so they don't bleed // into the next iteration's drain. drainLogs(); loopState.consecutiveFinalizeTimeouts++; + + if (hadStagedPending) { + ctx.ui.notify( + "postUnitPreVerification timed out with staged-but-uncommitted changes — staged files will be included in next unit's commit.", + "warning", + ); + logWarning("engine", "finalize-timeout: staged-pending-commit orphaned — will be absorbed by next unit"); + } else if (hadCommitted) { + ctx.ui.notify( + "postUnitPreVerification timed out after git commit — changes are in history but verification was skipped.", + "warning", + ); + logWarning("engine", "finalize-timeout: git commit completed before timeout — verification was not run"); + } debugLog("autoLoop", { phase: "pre-verification-timeout", iteration: ic.iteration, diff --git a/src/resources/extensions/sf/auto/session.ts b/src/resources/extensions/sf/auto/session.ts index ac4ed1aac..b40bb7193 100644 --- a/src/resources/extensions/sf/auto/session.ts +++ b/src/resources/extensions/sf/auto/session.ts @@ -169,6 +169,20 @@ export class AutoSession { /** SHA of the pre-unit git checkpoint ref. Cleared on success or rollback. */ checkpointSha: string | null = null; + // ── Deferred commit (Fix 1) ────────────────────────────────────────────── + /** + * True when postUnitPreVerification has staged files but deferred the git + * commit until after verification passes (Fix 1 deferral pattern). + * + * postUnitPostVerification reads this flag and calls git.commitStaged() + * before DB writes when it is set, then clears it. + * + * The timeout handler in phases.ts clears this flag and emits a diagnostic + * warning when postUnitPreVerification times out with staged-but-uncommitted + * changes (Fix 4). + */ + stagedPendingCommit = false; + // ── Signal handler ─────────────────────────────────────────────────────── sigtermHandler: (() => void) | null = null; @@ -263,6 +277,7 @@ export class AutoSession { this.isolationDegraded = false; this.milestoneMergedInPhases = false; this.checkpointSha = null; + this.stagedPendingCommit = false; // Signal handler this.sigtermHandler = null; diff --git a/src/resources/extensions/sf/git-service.ts b/src/resources/extensions/sf/git-service.ts index 3d0e6bffd..0c43fb17a 100644 --- a/src/resources/extensions/sf/git-service.ts +++ b/src/resources/extensions/sf/git-service.ts @@ -756,6 +756,35 @@ export class GitServiceImpl { nativeUpdateRef(this.basePath, refPath, "HEAD"); } + /** + * Stage files without committing. Returns true if anything was staged. + * + * Used by Fix 1 deferral: call this in postUnitPreVerification so changes + * are captured before verification, then call commitStaged() in + * postUnitPostVerification once verification has passed. + * + * @param extraExclusions Additional paths to exclude from staging. + */ + stageOnly(extraExclusions: readonly string[] = []): boolean { + if (!nativeHasChanges(this.basePath)) return false; + this.smartStage(extraExclusions); + return nativeHasStagedChanges(this.basePath); + } + + /** + * Commit already-staged files (no re-staging). Returns true if committed. + * + * Companion to stageOnly() for the Fix 1 deferred-commit pattern. + * Only calls nativeCommit when there are actually staged changes. + * + * @param message The commit message to use. + */ + commitStaged(message: string): boolean { + if (!nativeHasStagedChanges(this.basePath)) return false; + nativeCommit(this.basePath, message, { allowEmpty: false }); + return true; + } + /** * Run pre-merge verification check. Auto-detects test runner from project * files, or uses custom command from prefs.pre_merge_check. diff --git a/src/resources/extensions/sf/tests/verification-fail-closed.test.ts b/src/resources/extensions/sf/tests/verification-fail-closed.test.ts new file mode 100644 index 000000000..32beeeed2 --- /dev/null +++ b/src/resources/extensions/sf/tests/verification-fail-closed.test.ts @@ -0,0 +1,229 @@ +/** + * verification-fail-closed.test.ts — Structural regression for Fix 3. + * + * Verifies that `runPostUnitVerification` in auto-verification.ts splits its + * single monolithic try/catch into two zones: + * + * Zone 1 (Gate machinery): loadEffectiveSFPreferences, getTask, + * runVerificationGate, captureRuntimeErrors, runDependencyAudit. + * → if this throws: log error, notify UI "error", call pauseAuto, + * return "pause". + * + * Zone 2 (Ancillary): evidence writes, UOK gate runner calls, + * notification calls, auto-fix retry logic. + * → if this throws: log warning, return "continue". + * + * Testing strategy: + * node:test (this project) does NOT enable --experimental-test-module-mocks, + * so module-level mocking is unavailable (see graph-context.test.ts note). + * We verify the structural invariants by inspecting the compiled source text + * of auto-verification.ts directly, matching key patterns that encode the + * required behavior. This is the same approach used in + * auto-wrapup-inflight-guard.test.ts. + * + * Invariants checked: + * 1. Zone 1 outer catch calls pauseAuto and returns "pause". + * 2. Zone 1 outer catch uses logError (not logWarning) and the exact + * required message string. + * 3. Zone 1 outer catch calls ctx.ui.notify with "error" level. + * 4. Zone 2 inner catch returns "continue" (non-fatal fallback). + * 5. Zone 2 inner catch uses logWarning (not logError). + * 6. The two catches are distinct — machinery catch appears before + * ancillary catch in source order. + */ + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const src = readFileSync( + join(import.meta.dirname, "..", "auto-verification.ts"), + "utf-8", +); + +// ─── helpers ──────────────────────────────────────────────────────────────── + +/** Find the nth occurrence of `needle` in `haystack`. Returns -1 if not found. */ +function nthIndex(haystack: string, needle: string, n: number): number { + let idx = -1; + for (let i = 0; i < n; i++) { + idx = haystack.indexOf(needle, idx + 1); + if (idx === -1) return -1; + } + return idx; +} + +// ─── Zone 1: machinery catch invariants ──────────────────────────────────── + +describe("Zone 1 — gate machinery catch", () => { + it("contains the required exact error message string", () => { + assert.ok( + src.includes("verification-gate machinery error — pausing for human review"), + 'auto-verification.ts must contain the literal string "verification-gate machinery error — pausing for human review"', + ); + }); + + it("uses logError (not logWarning) in the machinery catch", () => { + // Find the machinery catch block by locating "machineryErr" + const machineryIdx = src.indexOf("machineryErr"); + assert.ok(machineryIdx !== -1, "machinery catch variable 'machineryErr' must be present"); + + // The next 400 characters of the catch block must contain logError + const catchBlock = src.slice(machineryIdx, machineryIdx + 400); + assert.ok( + catchBlock.includes("logError"), + "machinery catch must call logError (not logWarning) for infrastructure failures", + ); + assert.ok( + !catchBlock.includes("logWarning"), + "machinery catch must NOT call logWarning — infrastructure failures are errors, not warnings", + ); + }); + + it("calls ctx.ui.notify with 'error' level in machinery catch", () => { + const machineryIdx = src.indexOf("machineryErr"); + assert.ok(machineryIdx !== -1); + const catchBlock = src.slice(machineryIdx, machineryIdx + 400); + + assert.ok( + catchBlock.includes('ctx.ui.notify'), + "machinery catch must notify the UI", + ); + assert.ok( + catchBlock.includes('"error"'), + "machinery catch UI notification must use 'error' severity level", + ); + }); + + it("calls pauseAuto in the machinery catch", () => { + const machineryIdx = src.indexOf("machineryErr"); + assert.ok(machineryIdx !== -1); + const catchBlock = src.slice(machineryIdx, machineryIdx + 400); + + assert.ok( + catchBlock.includes("pauseAuto"), + "machinery catch must call pauseAuto to halt the auto-loop", + ); + }); + + it('returns "pause" in the machinery catch', () => { + const machineryIdx = src.indexOf("machineryErr"); + assert.ok(machineryIdx !== -1); + const catchBlock = src.slice(machineryIdx, machineryIdx + 400); + + assert.ok( + catchBlock.includes('return "pause"'), + 'machinery catch must return "pause" so the caller halts the loop', + ); + }); +}); + +// ─── Zone 2: ancillary catch invariants ──────────────────────────────────── + +describe("Zone 2 — ancillary post-gate catch", () => { + it("the ancillary catch uses logWarning (non-fatal path)", () => { + // The ancillary catch uses 'err' as the binding (not 'machineryErr') + // and must appear AFTER the machinery catch in source order. + const machineryIdx = src.indexOf("machineryErr"); + assert.ok(machineryIdx !== -1, "machinery catch must be present"); + + // Find the second catch block: look for "} catch (err)" after machineryIdx + const ancillaryCatchIdx = src.indexOf("} catch (err)", machineryIdx); + assert.ok( + ancillaryCatchIdx !== -1, + "ancillary catch '} catch (err)' must appear after the machinery catch", + ); + + const catchBlock = src.slice(ancillaryCatchIdx, ancillaryCatchIdx + 300); + assert.ok( + catchBlock.includes("logWarning"), + "ancillary catch must use logWarning (non-fatal)", + ); + }); + + it('returns "continue" in the ancillary catch', () => { + const machineryIdx = src.indexOf("machineryErr"); + assert.ok(machineryIdx !== -1); + const ancillaryCatchIdx = src.indexOf("} catch (err)", machineryIdx); + assert.ok(ancillaryCatchIdx !== -1); + + const catchBlock = src.slice(ancillaryCatchIdx, ancillaryCatchIdx + 300); + assert.ok( + catchBlock.includes('return "continue"'), + 'ancillary catch must return "continue" — post-gate errors are non-fatal', + ); + }); + + it("ancillary catch does NOT call pauseAuto", () => { + const machineryIdx = src.indexOf("machineryErr"); + assert.ok(machineryIdx !== -1); + const ancillaryCatchIdx = src.indexOf("} catch (err)", machineryIdx); + assert.ok(ancillaryCatchIdx !== -1); + + const catchBlock = src.slice(ancillaryCatchIdx, ancillaryCatchIdx + 300); + assert.ok( + !catchBlock.includes("pauseAuto"), + "ancillary catch must NOT call pauseAuto — post-gate errors do not halt the agent", + ); + }); +}); + +// ─── Structural ordering invariants ───────────────────────────────────────── + +describe("Two-zone structural ordering", () => { + it("machinery catch appears before ancillary catch in source", () => { + const machineryIdx = src.indexOf("machineryErr"); + const ancillaryCatchIdx = src.indexOf("} catch (err)", machineryIdx + 1); + assert.ok(machineryIdx !== -1, "'machineryErr' binding must exist"); + assert.ok(ancillaryCatchIdx !== -1, "'} catch (err)' must exist after machinery catch"); + assert.ok( + machineryIdx < ancillaryCatchIdx, + "Zone 1 machinery catch must appear before Zone 2 ancillary catch", + ); + }); + + it("Zone 1 critical calls appear before the machinery catch", () => { + const machineryIdx = src.indexOf("machineryErr"); + assert.ok(machineryIdx !== -1); + + // All Zone 1 calls must appear between function start and machineryErr + const zone1Section = src.slice( + src.indexOf("export async function runPostUnitVerification("), + machineryIdx, + ); + + assert.ok( + zone1Section.includes("loadEffectiveSFPreferences"), + "loadEffectiveSFPreferences must be in Zone 1", + ); + assert.ok( + zone1Section.includes("runVerificationGate"), + "runVerificationGate must be in Zone 1", + ); + assert.ok( + zone1Section.includes("captureRuntimeErrors"), + "captureRuntimeErrors must be in Zone 1", + ); + assert.ok( + zone1Section.includes("runDependencyAudit"), + "runDependencyAudit must be in Zone 1", + ); + }); + + it("logError is imported from workflow-logger", () => { + assert.ok( + src.includes('logError') && src.includes('workflow-logger'), + "logError must be imported from workflow-logger", + ); + // Verify the import line includes logError + const importLine = src.slice( + src.indexOf('import {'), + src.indexOf('} from "./workflow-logger'), + ); + assert.ok( + importLine.includes("logError"), + "logError must appear in the workflow-logger import", + ); + }); +}); diff --git a/src/resources/extensions/sf/tests/verification-gate.test.ts b/src/resources/extensions/sf/tests/verification-gate.test.ts index b15d539ad..0e5d6431e 100644 --- a/src/resources/extensions/sf/tests/verification-gate.test.ts +++ b/src/resources/extensions/sf/tests/verification-gate.test.ts @@ -8,8 +8,8 @@ * 4. First-non-empty-wins precedence * 5. All commands pass → gate passes * 6. One command fails → gate fails with exit code + stderr - * 7. Missing package.json → 0 checks → pass - * 8. Empty scripts → 0 checks → pass + * 7. Missing package.json → 0 checks → skipped (passed:false, skipped:true) + * 8. Empty scripts → 0 checks → skipped (passed:false, skipped:true) * 9. Preference validation for verification keys * 10. spawnSync error (command not found) → failure with exit code 127 * 11. Dependency audit — git diff detection, npm audit parsing, graceful failures @@ -251,15 +251,27 @@ describe("verification-gate: execution", () => { assert.ok(result.checks[1].stderr.includes("err")); }); - test("no commands discovered → gate passes with 0 checks", () => { + test("no commands discovered → gate fails with skipped:true and 0 checks", () => { const result = runVerificationGate({ cwd: tmp, }); - assert.equal(result.passed, true); + assert.equal(result.passed, false); + assert.equal(result.skipped, true); assert.equal(result.checks.length, 0); assert.equal(result.discoverySource, "none"); }); + test("zero commands returns skipped:true and passed:false", () => { + // Provide empty preference commands so discovery yields nothing + const result = runVerificationGate({ + cwd: tmp, + preferenceCommands: [], + }); + assert.equal(result.passed, false, "passed must be false when no commands are discovered"); + assert.equal(result.skipped, true, "skipped must be true when no commands are discovered"); + assert.equal(result.checks.length, 0, "checks array must be empty"); + }); + test("command not found → exit code 127", () => { const result = runVerificationGate({ cwd: tmp, diff --git a/src/resources/extensions/sf/types.ts b/src/resources/extensions/sf/types.ts index b6a5b78fd..4142635fa 100644 --- a/src/resources/extensions/sf/types.ts +++ b/src/resources/extensions/sf/types.ts @@ -102,7 +102,8 @@ export interface AuditWarning { /** Aggregate result from the verification gate */ export interface VerificationResult { - passed: boolean; // true if all checks passed (or no checks discovered) + passed: boolean; // true if all checks passed + skipped?: boolean; // true when no commands were discovered (fail-closed: passed=false, skipped=true) checks: VerificationCheck[]; // per-command results discoverySource: "preference" | "task-plan" | "package-json" | "none"; timestamp: number; // Date.now() at gate start diff --git a/src/resources/extensions/sf/verification-gate.ts b/src/resources/extensions/sf/verification-gate.ts index fff3c88c7..c60cf7d1a 100644 --- a/src/resources/extensions/sf/verification-gate.ts +++ b/src/resources/extensions/sf/verification-gate.ts @@ -233,7 +233,7 @@ export interface RunVerificationGateOptions { * and return a structured result. * * - All commands run sequentially regardless of individual pass/fail. - * - `passed` is true when every command exits 0 (or no commands are discovered). + * - `passed` is true when every command exits 0; `skipped` is true when no commands are discovered. * - stdout/stderr per command are truncated to 10 KB. */ export function runVerificationGate(options: RunVerificationGateOptions): VerificationResult { @@ -247,7 +247,8 @@ export function runVerificationGate(options: RunVerificationGateOptions): Verifi if (commands.length === 0) { return { - passed: true, + passed: false, + skipped: true, checks: [], discoverySource: source, timestamp,