feat(sf): S03/T03 — wire OutcomeLearningGate into adaptive verification policy

Adds adaptive-verification-policy.js which reads OutcomeLearningGate
trace events from the last 24h and adjusts verification_max_retries /
verification_auto_fix in project preferences:
- >60% verification/artifact/execution failures → reduce retries to 1, disable auto-fix
- 0% failures across ≥5 samples → bump retries (capped at 3)
- all other cases → no change (returns null)

Wires into auto-verification.js after OutcomeLearningGate runs when
outcomeLearning flag is enabled. Includes 12 node:test tests.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Mikael Hugo 2026-05-11 17:40:22 +02:00
parent 7b225696cc
commit 4132ecc1db
3 changed files with 298 additions and 0 deletions

View file

@ -0,0 +1,148 @@
import assert from "node:assert/strict";
import { describe, it } from "node:test";
import { computeAdaptiveVerificationPolicy } from "../uok/adaptive-verification-policy.js";
function makeEvent(outcome, failureClass = "unknown") {
return {
gateId: "outcome-learning",
outcome,
failureClass,
ts: new Date().toISOString(),
};
}
// ─── Minimum sample threshold ──────────────────────────────────────────────
describe("computeAdaptiveVerificationPolicy", () => {
it("returns_null_when_fewer_than_5_samples", () => {
const events = [
makeEvent("pass"),
makeEvent("pass"),
makeEvent("pass"),
makeEvent("pass"),
];
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
assert.equal(result, null);
});
// ─── High failure rate (>60%) for verification/artifact/execution ───────
it("reduces_retries_when_verification_failure_rate_exceeds_60%", () => {
const events = Array.from({ length: 10 }, () =>
makeEvent("fail", "verification"),
);
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
assert.ok(result, "Expected policy change");
assert.equal(result.verification_max_retries, 1);
assert.equal(result.verification_auto_fix, false);
});
it("reduces_retries_when_artifact_failure_rate_exceeds_60%", () => {
const events = Array.from({ length: 10 }, () =>
makeEvent("fail", "artifact"),
);
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
assert.ok(result);
assert.equal(result.verification_max_retries, 1);
assert.equal(result.verification_auto_fix, false);
});
it("reduces_retries_when_execution_failure_rate_exceeds_60%", () => {
const events = Array.from({ length: 10 }, () =>
makeEvent("fail", "execution"),
);
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
assert.ok(result);
assert.equal(result.verification_max_retries, 1);
assert.equal(result.verification_auto_fix, false);
});
it("does_not_reduce_retries_for_closeout_git_failures", () => {
const events = Array.from({ length: 10 }, () =>
makeEvent("fail", "closeout"),
);
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
assert.equal(result, null);
});
it("does_not_reduce_retries_for_git_failures", () => {
const events = Array.from({ length: 10 }, () => makeEvent("fail", "git"));
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
assert.equal(result, null);
});
// ─── Low failure rate (<30%) with all passing ────────────────────────────
it("increases_retries_when_all_passing", () => {
const events = Array.from({ length: 5 }, () => makeEvent("pass"));
const result = computeAdaptiveVerificationPolicy(
"/tmp",
{ verification_max_retries: 2 },
events,
);
assert.ok(result);
assert.equal(result.verification_max_retries, 3);
assert.equal(result.verification_auto_fix, true); // unchanged
});
it("caps_retries_at_3", () => {
const events = Array.from({ length: 5 }, () => makeEvent("pass"));
const result = computeAdaptiveVerificationPolicy(
"/tmp",
{ verification_max_retries: 3 },
events,
);
assert.equal(result, null); // already at ceiling
});
it("does_not_increase_retries_when_some_failures_exist", () => {
const events = [
...Array.from({ length: 4 }, () => makeEvent("pass")),
makeEvent("fail", "verification"),
];
const result = computeAdaptiveVerificationPolicy(
"/tmp",
{ verification_max_retries: 2 },
events,
);
assert.equal(result, null);
});
// ─── Mixed scenarios ─────────────────────────────────────────────────────
it("returns_null_when_no_change_needed", () => {
const events = [
makeEvent("pass"),
makeEvent("pass"),
makeEvent("pass"),
makeEvent("fail", "unknown"),
makeEvent("pass"),
];
const result = computeAdaptiveVerificationPolicy(
"/tmp",
{ verification_max_retries: 2, verification_auto_fix: true },
events,
);
assert.equal(result, null);
});
it("uses_defaults_when_prefs_omitted", () => {
const events = Array.from({ length: 10 }, () =>
makeEvent("fail", "verification"),
);
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
assert.ok(result);
assert.equal(result.verification_max_retries, 1);
assert.equal(result.verification_auto_fix, false);
});
it("ignores_non_outcome_learning_events", () => {
const events = [
...Array.from({ length: 10 }, () => makeEvent("fail", "verification")),
{ gateId: "cost-guard", outcome: "fail", failureClass: "budget" },
];
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
assert.ok(result);
assert.equal(result.verification_max_retries, 1);
});
});

View file

@ -0,0 +1,118 @@
/**
* Adaptive Verification Policy
*
* Purpose: close the self-improvement loop by reading OutcomeLearningGate
* findings and adjusting verification retry policy (verification_max_retries,
* verification_auto_fix) based on recent failure patterns.
*
* Consumer: auto-verification.js after OutcomeLearningGate runs.
*/
import { saveFile } from "../files.js";
import {
getProjectSFPreferencesPath,
loadProjectSFPreferences,
} from "../preferences.js";
import { serializePreferencesToFrontmatter } from "../preferences-serializer.js";
import { readTraceEvents } from "./trace-writer.js";
const MIN_SAMPLE_SIZE = 5;
const LOOKBACK_HOURS = 24;
const HIGH_FAILURE_THRESHOLD = 0.6;
const LOW_FAILURE_THRESHOLD = 0.3;
const MAX_RETRIES_CEILING = 3;
/**
* Compute an adaptive verification policy based on OutcomeLearningGate
* trace events from the last 24 hours.
*
* @param {string} basePath project base path
* @param {object} currentPrefs current effective preferences subset
* @param {Array<object>} [overrideEvents] optional injected events for testing
* @returns {object|null} { verification_max_retries, verification_auto_fix } or null if no change
*/
export function computeAdaptiveVerificationPolicy(
basePath,
currentPrefs,
overrideEvents,
) {
const events =
overrideEvents ??
readTraceEvents(basePath, "gate_run", LOOKBACK_HOURS).filter(
(e) => e.gateId === "outcome-learning",
);
if (events.length < MIN_SAMPLE_SIZE) {
return null;
}
const currentMaxRetries = currentPrefs?.verification_max_retries ?? 2;
const currentAutoFix = currentPrefs?.verification_auto_fix ?? true;
let newMaxRetries = currentMaxRetries;
let newAutoFix = currentAutoFix;
let changed = false;
// Count failures by failureClass
const byClass = new Map();
let totalFails = 0;
for (const e of events) {
if (e.outcome === "fail") {
totalFails++;
const fc = e.failureClass || "unknown";
byClass.set(fc, (byClass.get(fc) || 0) + 1);
}
}
// High failure rate (>60%) for verification/artifact/execution → reduce retries, disable auto-fix
const highImpactClasses = ["verification", "artifact", "execution"];
for (const fc of highImpactClasses) {
const count = byClass.get(fc) || 0;
const rate = count / events.length;
if (rate > HIGH_FAILURE_THRESHOLD) {
newMaxRetries = 1;
newAutoFix = false;
changed = true;
}
}
// Low overall failure rate (<30%) and all passing → optionally increase retries
const overallFailureRate = totalFails / events.length;
if (overallFailureRate < LOW_FAILURE_THRESHOLD && totalFails === 0) {
const bumped = Math.min(currentMaxRetries + 1, MAX_RETRIES_CEILING);
if (bumped !== currentMaxRetries) {
newMaxRetries = bumped;
changed = true;
}
}
if (!changed) {
return null;
}
return {
verification_max_retries: newMaxRetries,
verification_auto_fix: newAutoFix,
};
}
/**
* Apply an adaptive policy to project preferences.
*
* @param {string} basePath project base path
* @param {object} policy { verification_max_retries, verification_auto_fix }
* @returns {boolean} true if preferences were written
*/
export async function applyAdaptiveVerificationPolicy(_basePath, policy) {
const path = getProjectSFPreferencesPath();
const existing = loadProjectSFPreferences();
const prefs = existing?.preferences
? { ...existing.preferences }
: { version: 1 };
prefs.verification_max_retries = policy.verification_max_retries;
prefs.verification_auto_fix = policy.verification_auto_fix;
const yaml = serializePreferencesToFrontmatter(prefs);
await saveFile(path, yaml);
return true;
}

View file

@ -35,6 +35,10 @@ import {
runVerificationGate,
} from "../verification-gate.js";
import { logError, logWarning } from "../workflow-logger.js";
import {
applyAdaptiveVerificationPolicy,
computeAdaptiveVerificationPolicy,
} from "./adaptive-verification-policy.js";
import { ChaosMonkeyGate } from "./chaos-monkey.js";
import { CostGuardGate } from "./cost-guard-gate.js";
import { resolveUokFlags } from "./flags.js";
@ -380,6 +384,34 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
result.chaosMonkeyRationale = res.rationale;
}
}
// ── Adaptive verification policy ──
// After OutcomeLearningGate runs, check if failure patterns warrant
// adjusting retry budgets to avoid wasted cycles or allow more retries.
if (uokFlags.outcomeLearning) {
try {
const adaptivePolicy = computeAdaptiveVerificationPolicy(
s.basePath,
prefs,
);
if (adaptivePolicy) {
process.stderr.write(
`verification-gate: adaptive policy triggered — ` +
`max_retries=${adaptivePolicy.verification_max_retries}, ` +
`auto_fix=${adaptivePolicy.verification_auto_fix}\n`,
);
await applyAdaptiveVerificationPolicy(s.basePath, adaptivePolicy);
// Update local prefs so the rest of this function uses the new values
prefs.verification_max_retries =
adaptivePolicy.verification_max_retries;
prefs.verification_auto_fix = adaptivePolicy.verification_auto_fix;
}
} catch (adaptiveErr) {
logWarning(
"engine",
`adaptive-verification-policy error: ${adaptiveErr.message}`,
);
}
}
}
// Auto-fix retry preferences
const autoFixEnabled = prefs?.verification_auto_fix !== false;