diff --git a/src/resources/extensions/sf/docs/preferences-reference.md b/src/resources/extensions/sf/docs/preferences-reference.md index 5530a62ee..5816059a1 100644 --- a/src/resources/extensions/sf/docs/preferences-reference.md +++ b/src/resources/extensions/sf/docs/preferences-reference.md @@ -249,6 +249,8 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea - `verification_max_retries`: number — maximum number of fix-and-retry cycles for verification failures. Default: `0` (no retries). +- `verification_auto_defer_threshold`: number — confidence threshold from `0` to `1` for treating low-risk style/format/deprecation-only verification failures as deferred instead of blocking. Default: `0.75`. + - `uat_dispatch`: boolean — when `true`, enables UAT (User Acceptance Testing) dispatch mode. Default: `false`. - `post_unit_hooks`: array — hooks that fire after a unit completes. Each entry has: diff --git a/src/resources/extensions/sf/preferences-serializer.js b/src/resources/extensions/sf/preferences-serializer.js index b4a11c1a6..ed6992317 100644 --- a/src/resources/extensions/sf/preferences-serializer.js +++ b/src/resources/extensions/sf/preferences-serializer.js @@ -125,6 +125,7 @@ export function serializePreferencesToFrontmatter(prefs) { "verification_commands", "verification_auto_fix", "verification_max_retries", + "verification_auto_defer_threshold", "search_provider", "context_selection", ]; diff --git a/src/resources/extensions/sf/preferences-types.js b/src/resources/extensions/sf/preferences-types.js index 787efd46d..6632a008c 100644 --- a/src/resources/extensions/sf/preferences-types.js +++ b/src/resources/extensions/sf/preferences-types.js @@ -101,6 +101,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set([ "verification_commands", "verification_auto_fix", "verification_max_retries", + "verification_auto_defer_threshold", "search_provider", "context_selection", "widget_mode", diff --git a/src/resources/extensions/sf/preferences-validation.js b/src/resources/extensions/sf/preferences-validation.js index f9fb871a3..b83077374 100644 --- a/src/resources/extensions/sf/preferences-validation.js +++ b/src/resources/extensions/sf/preferences-validation.js @@ -1398,6 +1398,21 @@ export function validatePreferences(preferences) { errors.push("verification_max_retries must be a non-negative number"); } } + if (preferences.verification_auto_defer_threshold !== undefined) { + const raw = preferences.verification_auto_defer_threshold; + if ( + typeof raw === "number" && + Number.isFinite(raw) && + raw >= 0 && + raw <= 1 + ) { + validated.verification_auto_defer_threshold = raw; + } else { + errors.push( + "verification_auto_defer_threshold must be a number between 0 and 1", + ); + } + } // ─── Git Preferences ─────────────────────────────────────────────────── if (preferences.git && typeof preferences.git === "object") { const git = {}; diff --git a/src/resources/extensions/sf/preferences.js b/src/resources/extensions/sf/preferences.js index 5325cdd90..ac05d7fd4 100644 --- a/src/resources/extensions/sf/preferences.js +++ b/src/resources/extensions/sf/preferences.js @@ -418,6 +418,9 @@ function mergePreferences(base, override) { override.verification_auto_fix ?? base.verification_auto_fix, verification_max_retries: override.verification_max_retries ?? base.verification_max_retries, + verification_auto_defer_threshold: + override.verification_auto_defer_threshold ?? + base.verification_auto_defer_threshold, enhanced_verification: override.enhanced_verification ?? base.enhanced_verification, enhanced_verification_pre: diff --git a/src/resources/extensions/sf/templates/preferences.yaml b/src/resources/extensions/sf/templates/preferences.yaml index d0e1e54e1..decec5943 100644 --- a/src/resources/extensions/sf/templates/preferences.yaml +++ b/src/resources/extensions/sf/templates/preferences.yaml @@ -97,6 +97,7 @@ parallel: verification_commands: [] verification_auto_fix: verification_max_retries: +verification_auto_defer_threshold: notifications: enabled: on_complete: diff --git a/src/resources/extensions/sf/tests/preferences-verification-auto-defer.test.mjs b/src/resources/extensions/sf/tests/preferences-verification-auto-defer.test.mjs new file mode 100644 index 000000000..31ef41d7c --- /dev/null +++ b/src/resources/extensions/sf/tests/preferences-verification-auto-defer.test.mjs @@ -0,0 +1,29 @@ +/** + * preferences-verification-auto-defer.test.mjs — threshold preference contract. + * + * Purpose: ensure the auto-defer verification threshold survives preference + * validation instead of being silently dropped before autonomous verification. + */ +import assert from "node:assert/strict"; +import { test } from "vitest"; +import { validatePreferences } from "../preferences-validation.js"; + +test("validatePreferences_when_auto_defer_threshold_is_between_zero_and_one_keeps_value", () => { + const result = validatePreferences({ + verification_auto_defer_threshold: 0.65, + }); + + assert.deepEqual(result.errors, []); + assert.equal(result.preferences.verification_auto_defer_threshold, 0.65); +}); + +test("validatePreferences_when_auto_defer_threshold_is_out_of_range_reports_error", () => { + const result = validatePreferences({ + verification_auto_defer_threshold: 1.5, + }); + + assert.equal(result.preferences.verification_auto_defer_threshold, undefined); + assert.deepEqual(result.errors, [ + "verification_auto_defer_threshold must be a number between 0 and 1", + ]); +}); diff --git a/src/resources/extensions/sf/tests/verification-defer-policy.test.mjs b/src/resources/extensions/sf/tests/verification-defer-policy.test.mjs new file mode 100644 index 000000000..df20e734e --- /dev/null +++ b/src/resources/extensions/sf/tests/verification-defer-policy.test.mjs @@ -0,0 +1,102 @@ +/** + * verification-defer-policy.test.mjs — auto-defer classification contracts. + * + * Purpose: keep autonomous verification from retrying low-risk style findings + * while still blocking real test/build/runtime failures. + */ +import assert from "node:assert/strict"; +import { test } from "vitest"; +import { + classifyCheck, + computeDeferConfidence, + decideAutoDefer, +} from "../verification-defer-policy.js"; + +function result(checks) { + return { checks }; +} + +test("decideAutoDefer_when_all_failures_are_style_only_defers_above_threshold", () => { + const decision = decideAutoDefer( + result([ + { + command: "npx biome format --write src", + exitCode: 1, + stderr: "formatting changed", + }, + ]), + ); + + assert.equal(decision.defer, true); + assert.equal(decision.confidence, 0.9); + assert.deepEqual(decision.reasons, [ + "npx biome format --write src: deferrable", + ]); +}); + +test("decideAutoDefer_when_failure_contains_error_blocks_even_for_lint_command", () => { + const check = { + command: "eslint --fix src", + exitCode: 1, + stderr: "Parsing error: Unexpected token", + }; + + assert.equal(classifyCheck(check), "blocking"); + assert.deepEqual(decideAutoDefer(result([check])), { + defer: false, + confidence: 0, + reasons: [], + }); +}); + +test("computeDeferConfidence_when_deferrable_and_unknown_stays_below_default_threshold", () => { + const confidence = computeDeferConfidence( + result([ + { + command: "prettier --check .", + exitCode: 1, + stderr: "Code style issues found", + }, + { + command: "custom verify", + exitCode: 2, + stderr: "review output manually", + }, + ]), + ); + + assert.equal(confidence, 0.7); + assert.equal( + decideAutoDefer( + result([ + { + command: "prettier --check .", + exitCode: 1, + stderr: "Code style issues found", + }, + { + command: "custom verify", + exitCode: 2, + stderr: "review output manually", + }, + ]), + ).defer, + false, + ); +}); + +test("decideAutoDefer_when_blocking_failure_present_does_not_defer", () => { + assert.equal( + decideAutoDefer( + result([ + { command: "lint", exitCode: 1, stderr: "style warning" }, + { + command: "test", + exitCode: 1, + stderr: "fatal error: assertion failed", + }, + ]), + ).defer, + false, + ); +}); diff --git a/src/resources/extensions/sf/uok/auto-verification.js b/src/resources/extensions/sf/uok/auto-verification.js index b001f5ca9..cde810e03 100644 --- a/src/resources/extensions/sf/uok/auto-verification.js +++ b/src/resources/extensions/sf/uok/auto-verification.js @@ -27,6 +27,7 @@ import { isMilestoneComplete } from "../state.js"; import { isClosedStatus } from "../status-guards.js"; import { parseUnitId } from "../unit-id.js"; import { extractVerdict } from "../verdict-parser.js"; +import { decideAutoDefer } from "../verification-defer-policy.js"; import { writeVerificationJSON } from "../verification-evidence.js"; import { captureRuntimeErrors, @@ -748,6 +749,59 @@ export async function runPostUnitVerification(vctx, pauseAuto) { if (postExecBlockingFailure) { result.passed = false; } + // ── Auto-defer confidence check ── + // Low-risk findings (style, format, deprecation-only) should not block flow. + const deferThreshold = + typeof prefs?.verification_auto_defer_threshold === "number" + ? prefs.verification_auto_defer_threshold + : 0.75; + const deferDecision = decideAutoDefer(result, deferThreshold); + if (deferDecision.defer) { + // Do not consume a retry attempt — the finding is intentionally deferred. + s.verificationRetryCount.delete(s.currentUnit.id); + s.pendingVerificationRetry = null; + ctx.ui.notify( + `Verification deferred: low-risk findings (${deferDecision.reasons.join("; ")}) confidence=${deferDecision.confidence.toFixed(2)} >= threshold=${deferThreshold}`, + "info", + ); + process.stderr.write( + `verification-gate: deferred ${deferDecision.reasons.length} low-risk check(s) — confidence=${deferDecision.confidence.toFixed(2)}\n`, + ); + // Write evidence JSON with deferred flag + if (mid && sid && tid) { + try { + const sDir = resolveSlicePath(s.basePath, mid, sid); + if (sDir) { + const tasksDir = join(sDir, "tasks"); + const deferredResult = { + ...result, + passed: false, + deferred: true, + deferredReasons: deferDecision.reasons, + deferConfidence: deferDecision.confidence, + }; + writeVerificationJSON( + deferredResult, + tasksDir, + tid, + s.currentUnit.id, + undefined, + undefined, + tokenCount, + memoryPressureMB, + gateOutcomes, + recoveryStatus, + ); + } + } catch (evidenceErr) { + logWarning( + "engine", + `verification-evidence write error (defer): ${evidenceErr.message}`, + ); + } + } + return "continue"; + } // ── Auto-fix retry logic ── if (result.passed) { s.verificationRetryCount.delete(s.currentUnit.id); @@ -847,6 +901,13 @@ function writeVerificationJSONWithPostExec( ...(gateOutcomes !== undefined ? { gateOutcomes } : {}), ...(recoveryStatus !== undefined ? { recoveryStatus } : {}), postExecutionChecks, + ...(result.deferred === true ? { deferred: true } : {}), + ...(result.deferredReasons + ? { deferredReasons: result.deferredReasons } + : {}), + ...(result.deferConfidence !== undefined + ? { deferConfidence: result.deferConfidence } + : {}), }; if (result.runtimeErrors && result.runtimeErrors.length > 0) { evidence.runtimeErrors = result.runtimeErrors.map((e) => ({ diff --git a/src/resources/extensions/sf/verification-defer-policy.js b/src/resources/extensions/sf/verification-defer-policy.js new file mode 100644 index 000000000..ca4eda07e --- /dev/null +++ b/src/resources/extensions/sf/verification-defer-policy.js @@ -0,0 +1,115 @@ +/** + * verification-defer-policy.js — classify verification failures as deferrable vs blocking. + * + * Purpose: prevent low-risk findings (style, format, deprecation-only warnings) + * from blocking autonomous flow. When every failed check is deferrable and + * confidence exceeds the threshold, the verification gate returns "deferred" + * instead of "retry"/"pause", allowing the loop to continue without consuming + * a retry attempt. + * + * Consumer: uok/auto-verification.js after the verification gate runs but before + * retry/pause decision. + */ + +const DEFERRABLE_WORDS = + /\b(style|format|prettier|eslint.*fix|lint.*warning|deprecat|cosmetic|whitespace|trailing.*space|indent|semicolon|quote)\b/i; +const BLOCKING_WORDS = + /\b(error|fail|crash|fatal|exception|throw|reject|unhandled|assert|timeout|broken|invalid|syntax)\b/i; +const STYLE_COMMANDS = /\b(prettier|eslint.*fix|stylelint|biome.*format)\b/i; + +/** + * Classify a single failed verification check. + * + * Returns: + * - "deferrable" when stderr/command suggests style/format/deprecation-only issues. + * - "blocking" when stderr contains blocking indicators. + * - "unknown" when the failure is ambiguous. + * + * Purpose: separate advisory verification noise from failures that should stop + * autonomous execution. + * + * Consumer: decideAutoDefer before runPostUnitVerification spends a retry. + */ +export function classifyCheck(check) { + const stderr = check.stderr ?? ""; + const command = check.command ?? ""; + + // Blocking words override everything — if the output contains "error", "fail", + // "crash", "fatal", etc., it is never deferrable regardless of command name. + if (BLOCKING_WORDS.test(stderr)) return "blocking"; + + // Style/format-only commands with exit code 1 and short stderr are deferrable. + if ( + STYLE_COMMANDS.test(command) && + check.exitCode === 1 && + stderr.length < 300 + ) { + return "deferrable"; + } + + // Deferrable words in stderr with no blocking words → deferrable. + if (DEFERRABLE_WORDS.test(stderr)) return "deferrable"; + + // Deprecation-only warnings (no blocking words already checked above). + if (/\bdeprecat/i.test(stderr)) return "deferrable"; + + return "unknown"; +} + +/** + * Compute auto-defer confidence for a verification result. + * + * Returns a number from 0 to 1. + * + * Purpose: give autonomous verification an explicit confidence score before it + * decides whether a failed check is safe to defer. + * + * Consumer: decideAutoDefer and focused policy tests. + */ +export function computeDeferConfidence(result) { + const failedChecks = result.checks.filter((c) => c.exitCode !== 0); + if (failedChecks.length === 0) return 1.0; + + const classifications = failedChecks.map(classifyCheck); + const blockingCount = classifications.filter((c) => c === "blocking").length; + const deferrableCount = classifications.filter( + (c) => c === "deferrable", + ).length; + const unknownCount = classifications.filter((c) => c === "unknown").length; + + if (blockingCount > 0) return 0.0; + if (deferrableCount > 0 && unknownCount === 0) return 0.9; + if (deferrableCount > 0 && unknownCount > 0) return 0.7; + return 0.5; +} + +/** + * Decide whether a failed verification result should be auto-deferred. + * + * Purpose: allow low-risk verification failures to continue without consuming + * a retry while keeping blocking failures on the normal retry/pause path. + * + * Consumer: runPostUnitVerification after evidence has been collected. + * + * @param result - verification gate result (must have .checks) + * @param threshold - confidence threshold (default 0.75) + * @returns { defer: boolean, confidence: number, reasons: string[] } + */ +export function decideAutoDefer(result, threshold = 0.75) { + const confidence = computeDeferConfidence(result); + const failedChecks = result.checks.filter((c) => c.exitCode !== 0); + const reasons = []; + + for (const check of failedChecks) { + const cls = classifyCheck(check); + if (cls === "deferrable") { + reasons.push(`${check.command}: ${cls}`); + } + } + + return { + defer: confidence >= threshold && reasons.length > 0, + confidence, + reasons, + }; +} diff --git a/src/resources/extensions/sf/verification-evidence.js b/src/resources/extensions/sf/verification-evidence.js index bb34dd01d..166da5e60 100644 --- a/src/resources/extensions/sf/verification-evidence.js +++ b/src/resources/extensions/sf/verification-evidence.js @@ -49,6 +49,13 @@ export function writeVerificationJSON( ...(memoryPressureMB !== undefined ? { memoryPressureMB } : {}), ...(gateOutcomes !== undefined ? { gateOutcomes } : {}), ...(recoveryStatus !== undefined ? { recoveryStatus } : {}), + ...(result.deferred === true ? { deferred: true } : {}), + ...(result.deferredReasons + ? { deferredReasons: result.deferredReasons } + : {}), + ...(result.deferConfidence !== undefined + ? { deferConfidence: result.deferConfidence } + : {}), }; if (result.runtimeErrors && result.runtimeErrors.length > 0) { evidence.runtimeErrors = result.runtimeErrors.map((e) => ({