feat(sf): S03/T03 — wire OutcomeLearningGate into adaptive verification policy

Adds adaptive-verification-policy.js which reads OutcomeLearningGate trace events from the last 24h and adjusts verification_max_retries / verification_auto_fix in project preferences: - >60% verification/artifact/execution failures → reduce retries to 1, disable auto-fix - 0% failures across ≥5 samples → bump retries (capped at 3) - all other cases → no change (returns null) Wires into auto-verification.js after OutcomeLearningGate runs when outcomeLearning flag is enabled. Includes 12 node:test tests. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-11 17:40:22 +02:00 · 2026-05-11 17:40:22 +02:00 · 4132ecc1db
commit 4132ecc1db
parent 7b225696cc
3 changed files with 298 additions and 0 deletions
--- a/src/resources/extensions/sf/tests/adaptive-verification-policy.test.mjs
+++ b/src/resources/extensions/sf/tests/adaptive-verification-policy.test.mjs
@ -0,0 +1,148 @@
+import assert from "node:assert/strict";
+import { describe, it } from "node:test";
+import { computeAdaptiveVerificationPolicy } from "../uok/adaptive-verification-policy.js";
+
+function makeEvent(outcome, failureClass = "unknown") {
+	return {
+		gateId: "outcome-learning",
+		outcome,
+		failureClass,
+		ts: new Date().toISOString(),
+	};
+}
+
+// ─── Minimum sample threshold ──────────────────────────────────────────────
+
+describe("computeAdaptiveVerificationPolicy", () => {
+	it("returns_null_when_fewer_than_5_samples", () => {
+		const events = [
+			makeEvent("pass"),
+			makeEvent("pass"),
+			makeEvent("pass"),
+			makeEvent("pass"),
+		];
+		const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
+		assert.equal(result, null);
+	});
+
+	// ─── High failure rate (>60%) for verification/artifact/execution ───────
+
+	it("reduces_retries_when_verification_failure_rate_exceeds_60%", () => {
+		const events = Array.from({ length: 10 }, () =>
+			makeEvent("fail", "verification"),
+		);
+		const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
+		assert.ok(result, "Expected policy change");
+		assert.equal(result.verification_max_retries, 1);
+		assert.equal(result.verification_auto_fix, false);
+	});
+
+	it("reduces_retries_when_artifact_failure_rate_exceeds_60%", () => {
+		const events = Array.from({ length: 10 }, () =>
+			makeEvent("fail", "artifact"),
+		);
+		const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
+		assert.ok(result);
+		assert.equal(result.verification_max_retries, 1);
+		assert.equal(result.verification_auto_fix, false);
+	});
+
+	it("reduces_retries_when_execution_failure_rate_exceeds_60%", () => {
+		const events = Array.from({ length: 10 }, () =>
+			makeEvent("fail", "execution"),
+		);
+		const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
+		assert.ok(result);
+		assert.equal(result.verification_max_retries, 1);
+		assert.equal(result.verification_auto_fix, false);
+	});
+
+	it("does_not_reduce_retries_for_closeout_git_failures", () => {
+		const events = Array.from({ length: 10 }, () =>
+			makeEvent("fail", "closeout"),
+		);
+		const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
+		assert.equal(result, null);
+	});
+
+	it("does_not_reduce_retries_for_git_failures", () => {
+		const events = Array.from({ length: 10 }, () => makeEvent("fail", "git"));
+		const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
+		assert.equal(result, null);
+	});
+
+	// ─── Low failure rate (<30%) with all passing ────────────────────────────
+
+	it("increases_retries_when_all_passing", () => {
+		const events = Array.from({ length: 5 }, () => makeEvent("pass"));
+		const result = computeAdaptiveVerificationPolicy(
+			"/tmp",
+			{ verification_max_retries: 2 },
+			events,
+		);
+		assert.ok(result);
+		assert.equal(result.verification_max_retries, 3);
+		assert.equal(result.verification_auto_fix, true); // unchanged
+	});
+
+	it("caps_retries_at_3", () => {
+		const events = Array.from({ length: 5 }, () => makeEvent("pass"));
+		const result = computeAdaptiveVerificationPolicy(
+			"/tmp",
+			{ verification_max_retries: 3 },
+			events,
+		);
+		assert.equal(result, null); // already at ceiling
+	});
+
+	it("does_not_increase_retries_when_some_failures_exist", () => {
+		const events = [
+			...Array.from({ length: 4 }, () => makeEvent("pass")),
+			makeEvent("fail", "verification"),
+		];
+		const result = computeAdaptiveVerificationPolicy(
+			"/tmp",
+			{ verification_max_retries: 2 },
+			events,
+		);
+		assert.equal(result, null);
+	});
+
+	// ─── Mixed scenarios ─────────────────────────────────────────────────────
+
+	it("returns_null_when_no_change_needed", () => {
+		const events = [
+			makeEvent("pass"),
+			makeEvent("pass"),
+			makeEvent("pass"),
+			makeEvent("fail", "unknown"),
+			makeEvent("pass"),
+		];
+		const result = computeAdaptiveVerificationPolicy(
+			"/tmp",
+			{ verification_max_retries: 2, verification_auto_fix: true },
+			events,
+		);
+		assert.equal(result, null);
+	});
+
+	it("uses_defaults_when_prefs_omitted", () => {
+		const events = Array.from({ length: 10 }, () =>
+			makeEvent("fail", "verification"),
+		);
+		const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
+		assert.ok(result);
+		assert.equal(result.verification_max_retries, 1);
+		assert.equal(result.verification_auto_fix, false);
+	});
+
+	it("ignores_non_outcome_learning_events", () => {
+		const events = [
+			...Array.from({ length: 10 }, () => makeEvent("fail", "verification")),
+			{ gateId: "cost-guard", outcome: "fail", failureClass: "budget" },
+		];
+		const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
+		assert.ok(result);
+		assert.equal(result.verification_max_retries, 1);
+	});
+});
--- a/src/resources/extensions/sf/uok/adaptive-verification-policy.js
+++ b/src/resources/extensions/sf/uok/adaptive-verification-policy.js
@ -0,0 +1,118 @@
+/**
+ * Adaptive Verification Policy
+ *
+ * Purpose: close the self-improvement loop by reading OutcomeLearningGate
+ * findings and adjusting verification retry policy (verification_max_retries,
+ * verification_auto_fix) based on recent failure patterns.
+ *
+ * Consumer: auto-verification.js after OutcomeLearningGate runs.
+ */
+import { saveFile } from "../files.js";
+import {
+	getProjectSFPreferencesPath,
+	loadProjectSFPreferences,
+} from "../preferences.js";
+import { serializePreferencesToFrontmatter } from "../preferences-serializer.js";
+import { readTraceEvents } from "./trace-writer.js";
+
+const MIN_SAMPLE_SIZE = 5;
+const LOOKBACK_HOURS = 24;
+const HIGH_FAILURE_THRESHOLD = 0.6;
+const LOW_FAILURE_THRESHOLD = 0.3;
+const MAX_RETRIES_CEILING = 3;
+
+/**
+ * Compute an adaptive verification policy based on OutcomeLearningGate
+ * trace events from the last 24 hours.
+ *
+ * @param {string} basePath — project base path
+ * @param {object} currentPrefs — current effective preferences subset
+ * @param {Array<object>} [overrideEvents] — optional injected events for testing
+ * @returns {object|null} — { verification_max_retries, verification_auto_fix } or null if no change
+ */
+export function computeAdaptiveVerificationPolicy(
+	basePath,
+	currentPrefs,
+	overrideEvents,
+) {
+	const events =
+		overrideEvents ??
+		readTraceEvents(basePath, "gate_run", LOOKBACK_HOURS).filter(
+			(e) => e.gateId === "outcome-learning",
+		);
+
+	if (events.length < MIN_SAMPLE_SIZE) {
+		return null;
+	}
+
+	const currentMaxRetries = currentPrefs?.verification_max_retries ?? 2;
+	const currentAutoFix = currentPrefs?.verification_auto_fix ?? true;
+
+	let newMaxRetries = currentMaxRetries;
+	let newAutoFix = currentAutoFix;
+	let changed = false;
+
+	// Count failures by failureClass
+	const byClass = new Map();
+	let totalFails = 0;
+	for (const e of events) {
+		if (e.outcome === "fail") {
+			totalFails++;
+			const fc = e.failureClass || "unknown";
+			byClass.set(fc, (byClass.get(fc) || 0) + 1);
+		}
+	}
+
+	// High failure rate (>60%) for verification/artifact/execution → reduce retries, disable auto-fix
+	const highImpactClasses = ["verification", "artifact", "execution"];
+	for (const fc of highImpactClasses) {
+		const count = byClass.get(fc) || 0;
+		const rate = count / events.length;
+		if (rate > HIGH_FAILURE_THRESHOLD) {
+			newMaxRetries = 1;
+			newAutoFix = false;
+			changed = true;
+		}
+	}
+
+	// Low overall failure rate (<30%) and all passing → optionally increase retries
+	const overallFailureRate = totalFails / events.length;
+	if (overallFailureRate < LOW_FAILURE_THRESHOLD && totalFails === 0) {
+		const bumped = Math.min(currentMaxRetries + 1, MAX_RETRIES_CEILING);
+		if (bumped !== currentMaxRetries) {
+			newMaxRetries = bumped;
+			changed = true;
+		}
+	}
+
+	if (!changed) {
+		return null;
+	}
+
+	return {
+		verification_max_retries: newMaxRetries,
+		verification_auto_fix: newAutoFix,
+	};
+}
+
+/**
+ * Apply an adaptive policy to project preferences.
+ *
+ * @param {string} basePath — project base path
+ * @param {object} policy — { verification_max_retries, verification_auto_fix }
+ * @returns {boolean} — true if preferences were written
+ */
+export async function applyAdaptiveVerificationPolicy(_basePath, policy) {
+	const path = getProjectSFPreferencesPath();
+	const existing = loadProjectSFPreferences();
+	const prefs = existing?.preferences
+		? { ...existing.preferences }
+		: { version: 1 };
+
+	prefs.verification_max_retries = policy.verification_max_retries;
+	prefs.verification_auto_fix = policy.verification_auto_fix;
+
+	const yaml = serializePreferencesToFrontmatter(prefs);
+	await saveFile(path, yaml);
+	return true;
+}
--- a/src/resources/extensions/sf/uok/auto-verification.js
+++ b/src/resources/extensions/sf/uok/auto-verification.js
@ -35,6 +35,10 @@ import {
 	runVerificationGate,
 } from "../verification-gate.js";
 import { logError, logWarning } from "../workflow-logger.js";
+import {
+	applyAdaptiveVerificationPolicy,
+	computeAdaptiveVerificationPolicy,
+} from "./adaptive-verification-policy.js";
 import { ChaosMonkeyGate } from "./chaos-monkey.js";
 import { CostGuardGate } from "./cost-guard-gate.js";
 import { resolveUokFlags } from "./flags.js";
@ -380,6 +384,34 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
 					result.chaosMonkeyRationale = res.rationale;
 				}
 			}
+			// ── Adaptive verification policy ──
+			// After OutcomeLearningGate runs, check if failure patterns warrant
+			// adjusting retry budgets to avoid wasted cycles or allow more retries.
+			if (uokFlags.outcomeLearning) {
+				try {
+					const adaptivePolicy = computeAdaptiveVerificationPolicy(
+						s.basePath,
+						prefs,
+					);
+					if (adaptivePolicy) {
+						process.stderr.write(
+							`verification-gate: adaptive policy triggered — ` +
+								`max_retries=${adaptivePolicy.verification_max_retries}, ` +
+								`auto_fix=${adaptivePolicy.verification_auto_fix}\n`,
+						);
+						await applyAdaptiveVerificationPolicy(s.basePath, adaptivePolicy);
+						// Update local prefs so the rest of this function uses the new values
+						prefs.verification_max_retries =
+							adaptivePolicy.verification_max_retries;
+						prefs.verification_auto_fix = adaptivePolicy.verification_auto_fix;
+					}
+				} catch (adaptiveErr) {
+					logWarning(
+						"engine",
+						`adaptive-verification-policy error: ${adaptiveErr.message}`,
+					);
+				}
+			}
 		}
 		// Auto-fix retry preferences
 		const autoFixEnabled = prefs?.verification_auto_fix !== false;