From 4132ecc1db623bd3cd4ab6f7dd74dd13707d944b Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Mon, 11 May 2026 17:40:22 +0200 Subject: [PATCH] =?UTF-8?q?feat(sf):=20S03/T03=20=E2=80=94=20wire=20Outcom?= =?UTF-8?q?eLearningGate=20into=20adaptive=20verification=20policy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds adaptive-verification-policy.js which reads OutcomeLearningGate trace events from the last 24h and adjusts verification_max_retries / verification_auto_fix in project preferences: - >60% verification/artifact/execution failures → reduce retries to 1, disable auto-fix - 0% failures across ≥5 samples → bump retries (capped at 3) - all other cases → no change (returns null) Wires into auto-verification.js after OutcomeLearningGate runs when outcomeLearning flag is enabled. Includes 12 node:test tests. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../adaptive-verification-policy.test.mjs | 148 ++++++++++++++++++ .../sf/uok/adaptive-verification-policy.js | 118 ++++++++++++++ .../extensions/sf/uok/auto-verification.js | 32 ++++ 3 files changed, 298 insertions(+) create mode 100644 src/resources/extensions/sf/tests/adaptive-verification-policy.test.mjs create mode 100644 src/resources/extensions/sf/uok/adaptive-verification-policy.js diff --git a/src/resources/extensions/sf/tests/adaptive-verification-policy.test.mjs b/src/resources/extensions/sf/tests/adaptive-verification-policy.test.mjs new file mode 100644 index 000000000..13611745c --- /dev/null +++ b/src/resources/extensions/sf/tests/adaptive-verification-policy.test.mjs @@ -0,0 +1,148 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import { computeAdaptiveVerificationPolicy } from "../uok/adaptive-verification-policy.js"; + +function makeEvent(outcome, failureClass = "unknown") { + return { + gateId: "outcome-learning", + outcome, + failureClass, + ts: new Date().toISOString(), + }; +} + +// ─── Minimum sample threshold ────────────────────────────────────────────── + +describe("computeAdaptiveVerificationPolicy", () => { + it("returns_null_when_fewer_than_5_samples", () => { + const events = [ + makeEvent("pass"), + makeEvent("pass"), + makeEvent("pass"), + makeEvent("pass"), + ]; + const result = computeAdaptiveVerificationPolicy("/tmp", {}, events); + assert.equal(result, null); + }); + + // ─── High failure rate (>60%) for verification/artifact/execution ─────── + + it("reduces_retries_when_verification_failure_rate_exceeds_60%", () => { + const events = Array.from({ length: 10 }, () => + makeEvent("fail", "verification"), + ); + const result = computeAdaptiveVerificationPolicy("/tmp", {}, events); + assert.ok(result, "Expected policy change"); + assert.equal(result.verification_max_retries, 1); + assert.equal(result.verification_auto_fix, false); + }); + + it("reduces_retries_when_artifact_failure_rate_exceeds_60%", () => { + const events = Array.from({ length: 10 }, () => + makeEvent("fail", "artifact"), + ); + const result = computeAdaptiveVerificationPolicy("/tmp", {}, events); + assert.ok(result); + assert.equal(result.verification_max_retries, 1); + assert.equal(result.verification_auto_fix, false); + }); + + it("reduces_retries_when_execution_failure_rate_exceeds_60%", () => { + const events = Array.from({ length: 10 }, () => + makeEvent("fail", "execution"), + ); + const result = computeAdaptiveVerificationPolicy("/tmp", {}, events); + assert.ok(result); + assert.equal(result.verification_max_retries, 1); + assert.equal(result.verification_auto_fix, false); + }); + + it("does_not_reduce_retries_for_closeout_git_failures", () => { + const events = Array.from({ length: 10 }, () => + makeEvent("fail", "closeout"), + ); + const result = computeAdaptiveVerificationPolicy("/tmp", {}, events); + assert.equal(result, null); + }); + + it("does_not_reduce_retries_for_git_failures", () => { + const events = Array.from({ length: 10 }, () => makeEvent("fail", "git")); + const result = computeAdaptiveVerificationPolicy("/tmp", {}, events); + assert.equal(result, null); + }); + + // ─── Low failure rate (<30%) with all passing ──────────────────────────── + + it("increases_retries_when_all_passing", () => { + const events = Array.from({ length: 5 }, () => makeEvent("pass")); + const result = computeAdaptiveVerificationPolicy( + "/tmp", + { verification_max_retries: 2 }, + events, + ); + assert.ok(result); + assert.equal(result.verification_max_retries, 3); + assert.equal(result.verification_auto_fix, true); // unchanged + }); + + it("caps_retries_at_3", () => { + const events = Array.from({ length: 5 }, () => makeEvent("pass")); + const result = computeAdaptiveVerificationPolicy( + "/tmp", + { verification_max_retries: 3 }, + events, + ); + assert.equal(result, null); // already at ceiling + }); + + it("does_not_increase_retries_when_some_failures_exist", () => { + const events = [ + ...Array.from({ length: 4 }, () => makeEvent("pass")), + makeEvent("fail", "verification"), + ]; + const result = computeAdaptiveVerificationPolicy( + "/tmp", + { verification_max_retries: 2 }, + events, + ); + assert.equal(result, null); + }); + + // ─── Mixed scenarios ───────────────────────────────────────────────────── + + it("returns_null_when_no_change_needed", () => { + const events = [ + makeEvent("pass"), + makeEvent("pass"), + makeEvent("pass"), + makeEvent("fail", "unknown"), + makeEvent("pass"), + ]; + const result = computeAdaptiveVerificationPolicy( + "/tmp", + { verification_max_retries: 2, verification_auto_fix: true }, + events, + ); + assert.equal(result, null); + }); + + it("uses_defaults_when_prefs_omitted", () => { + const events = Array.from({ length: 10 }, () => + makeEvent("fail", "verification"), + ); + const result = computeAdaptiveVerificationPolicy("/tmp", {}, events); + assert.ok(result); + assert.equal(result.verification_max_retries, 1); + assert.equal(result.verification_auto_fix, false); + }); + + it("ignores_non_outcome_learning_events", () => { + const events = [ + ...Array.from({ length: 10 }, () => makeEvent("fail", "verification")), + { gateId: "cost-guard", outcome: "fail", failureClass: "budget" }, + ]; + const result = computeAdaptiveVerificationPolicy("/tmp", {}, events); + assert.ok(result); + assert.equal(result.verification_max_retries, 1); + }); +}); diff --git a/src/resources/extensions/sf/uok/adaptive-verification-policy.js b/src/resources/extensions/sf/uok/adaptive-verification-policy.js new file mode 100644 index 000000000..ce1d6df69 --- /dev/null +++ b/src/resources/extensions/sf/uok/adaptive-verification-policy.js @@ -0,0 +1,118 @@ +/** + * Adaptive Verification Policy + * + * Purpose: close the self-improvement loop by reading OutcomeLearningGate + * findings and adjusting verification retry policy (verification_max_retries, + * verification_auto_fix) based on recent failure patterns. + * + * Consumer: auto-verification.js after OutcomeLearningGate runs. + */ +import { saveFile } from "../files.js"; +import { + getProjectSFPreferencesPath, + loadProjectSFPreferences, +} from "../preferences.js"; +import { serializePreferencesToFrontmatter } from "../preferences-serializer.js"; +import { readTraceEvents } from "./trace-writer.js"; + +const MIN_SAMPLE_SIZE = 5; +const LOOKBACK_HOURS = 24; +const HIGH_FAILURE_THRESHOLD = 0.6; +const LOW_FAILURE_THRESHOLD = 0.3; +const MAX_RETRIES_CEILING = 3; + +/** + * Compute an adaptive verification policy based on OutcomeLearningGate + * trace events from the last 24 hours. + * + * @param {string} basePath — project base path + * @param {object} currentPrefs — current effective preferences subset + * @param {Array} [overrideEvents] — optional injected events for testing + * @returns {object|null} — { verification_max_retries, verification_auto_fix } or null if no change + */ +export function computeAdaptiveVerificationPolicy( + basePath, + currentPrefs, + overrideEvents, +) { + const events = + overrideEvents ?? + readTraceEvents(basePath, "gate_run", LOOKBACK_HOURS).filter( + (e) => e.gateId === "outcome-learning", + ); + + if (events.length < MIN_SAMPLE_SIZE) { + return null; + } + + const currentMaxRetries = currentPrefs?.verification_max_retries ?? 2; + const currentAutoFix = currentPrefs?.verification_auto_fix ?? true; + + let newMaxRetries = currentMaxRetries; + let newAutoFix = currentAutoFix; + let changed = false; + + // Count failures by failureClass + const byClass = new Map(); + let totalFails = 0; + for (const e of events) { + if (e.outcome === "fail") { + totalFails++; + const fc = e.failureClass || "unknown"; + byClass.set(fc, (byClass.get(fc) || 0) + 1); + } + } + + // High failure rate (>60%) for verification/artifact/execution → reduce retries, disable auto-fix + const highImpactClasses = ["verification", "artifact", "execution"]; + for (const fc of highImpactClasses) { + const count = byClass.get(fc) || 0; + const rate = count / events.length; + if (rate > HIGH_FAILURE_THRESHOLD) { + newMaxRetries = 1; + newAutoFix = false; + changed = true; + } + } + + // Low overall failure rate (<30%) and all passing → optionally increase retries + const overallFailureRate = totalFails / events.length; + if (overallFailureRate < LOW_FAILURE_THRESHOLD && totalFails === 0) { + const bumped = Math.min(currentMaxRetries + 1, MAX_RETRIES_CEILING); + if (bumped !== currentMaxRetries) { + newMaxRetries = bumped; + changed = true; + } + } + + if (!changed) { + return null; + } + + return { + verification_max_retries: newMaxRetries, + verification_auto_fix: newAutoFix, + }; +} + +/** + * Apply an adaptive policy to project preferences. + * + * @param {string} basePath — project base path + * @param {object} policy — { verification_max_retries, verification_auto_fix } + * @returns {boolean} — true if preferences were written + */ +export async function applyAdaptiveVerificationPolicy(_basePath, policy) { + const path = getProjectSFPreferencesPath(); + const existing = loadProjectSFPreferences(); + const prefs = existing?.preferences + ? { ...existing.preferences } + : { version: 1 }; + + prefs.verification_max_retries = policy.verification_max_retries; + prefs.verification_auto_fix = policy.verification_auto_fix; + + const yaml = serializePreferencesToFrontmatter(prefs); + await saveFile(path, yaml); + return true; +} diff --git a/src/resources/extensions/sf/uok/auto-verification.js b/src/resources/extensions/sf/uok/auto-verification.js index 3f85c2f30..b001f5ca9 100644 --- a/src/resources/extensions/sf/uok/auto-verification.js +++ b/src/resources/extensions/sf/uok/auto-verification.js @@ -35,6 +35,10 @@ import { runVerificationGate, } from "../verification-gate.js"; import { logError, logWarning } from "../workflow-logger.js"; +import { + applyAdaptiveVerificationPolicy, + computeAdaptiveVerificationPolicy, +} from "./adaptive-verification-policy.js"; import { ChaosMonkeyGate } from "./chaos-monkey.js"; import { CostGuardGate } from "./cost-guard-gate.js"; import { resolveUokFlags } from "./flags.js"; @@ -380,6 +384,34 @@ export async function runPostUnitVerification(vctx, pauseAuto) { result.chaosMonkeyRationale = res.rationale; } } + // ── Adaptive verification policy ── + // After OutcomeLearningGate runs, check if failure patterns warrant + // adjusting retry budgets to avoid wasted cycles or allow more retries. + if (uokFlags.outcomeLearning) { + try { + const adaptivePolicy = computeAdaptiveVerificationPolicy( + s.basePath, + prefs, + ); + if (adaptivePolicy) { + process.stderr.write( + `verification-gate: adaptive policy triggered — ` + + `max_retries=${adaptivePolicy.verification_max_retries}, ` + + `auto_fix=${adaptivePolicy.verification_auto_fix}\n`, + ); + await applyAdaptiveVerificationPolicy(s.basePath, adaptivePolicy); + // Update local prefs so the rest of this function uses the new values + prefs.verification_max_retries = + adaptivePolicy.verification_max_retries; + prefs.verification_auto_fix = adaptivePolicy.verification_auto_fix; + } + } catch (adaptiveErr) { + logWarning( + "engine", + `adaptive-verification-policy error: ${adaptiveErr.message}`, + ); + } + } } // Auto-fix retry preferences const autoFixEnabled = prefs?.verification_auto_fix !== false;