feat(sf): S03/T03 — wire OutcomeLearningGate into adaptive verification policy
Adds adaptive-verification-policy.js which reads OutcomeLearningGate trace events from the last 24h and adjusts verification_max_retries / verification_auto_fix in project preferences: - >60% verification/artifact/execution failures → reduce retries to 1, disable auto-fix - 0% failures across ≥5 samples → bump retries (capped at 3) - all other cases → no change (returns null) Wires into auto-verification.js after OutcomeLearningGate runs when outcomeLearning flag is enabled. Includes 12 node:test tests. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
7b225696cc
commit
4132ecc1db
3 changed files with 298 additions and 0 deletions
|
|
@ -0,0 +1,148 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { describe, it } from "node:test";
|
||||
import { computeAdaptiveVerificationPolicy } from "../uok/adaptive-verification-policy.js";
|
||||
|
||||
function makeEvent(outcome, failureClass = "unknown") {
|
||||
return {
|
||||
gateId: "outcome-learning",
|
||||
outcome,
|
||||
failureClass,
|
||||
ts: new Date().toISOString(),
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Minimum sample threshold ──────────────────────────────────────────────
|
||||
|
||||
describe("computeAdaptiveVerificationPolicy", () => {
|
||||
it("returns_null_when_fewer_than_5_samples", () => {
|
||||
const events = [
|
||||
makeEvent("pass"),
|
||||
makeEvent("pass"),
|
||||
makeEvent("pass"),
|
||||
makeEvent("pass"),
|
||||
];
|
||||
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
// ─── High failure rate (>60%) for verification/artifact/execution ───────
|
||||
|
||||
it("reduces_retries_when_verification_failure_rate_exceeds_60%", () => {
|
||||
const events = Array.from({ length: 10 }, () =>
|
||||
makeEvent("fail", "verification"),
|
||||
);
|
||||
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
|
||||
assert.ok(result, "Expected policy change");
|
||||
assert.equal(result.verification_max_retries, 1);
|
||||
assert.equal(result.verification_auto_fix, false);
|
||||
});
|
||||
|
||||
it("reduces_retries_when_artifact_failure_rate_exceeds_60%", () => {
|
||||
const events = Array.from({ length: 10 }, () =>
|
||||
makeEvent("fail", "artifact"),
|
||||
);
|
||||
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
|
||||
assert.ok(result);
|
||||
assert.equal(result.verification_max_retries, 1);
|
||||
assert.equal(result.verification_auto_fix, false);
|
||||
});
|
||||
|
||||
it("reduces_retries_when_execution_failure_rate_exceeds_60%", () => {
|
||||
const events = Array.from({ length: 10 }, () =>
|
||||
makeEvent("fail", "execution"),
|
||||
);
|
||||
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
|
||||
assert.ok(result);
|
||||
assert.equal(result.verification_max_retries, 1);
|
||||
assert.equal(result.verification_auto_fix, false);
|
||||
});
|
||||
|
||||
it("does_not_reduce_retries_for_closeout_git_failures", () => {
|
||||
const events = Array.from({ length: 10 }, () =>
|
||||
makeEvent("fail", "closeout"),
|
||||
);
|
||||
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
it("does_not_reduce_retries_for_git_failures", () => {
|
||||
const events = Array.from({ length: 10 }, () => makeEvent("fail", "git"));
|
||||
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
// ─── Low failure rate (<30%) with all passing ────────────────────────────
|
||||
|
||||
it("increases_retries_when_all_passing", () => {
|
||||
const events = Array.from({ length: 5 }, () => makeEvent("pass"));
|
||||
const result = computeAdaptiveVerificationPolicy(
|
||||
"/tmp",
|
||||
{ verification_max_retries: 2 },
|
||||
events,
|
||||
);
|
||||
assert.ok(result);
|
||||
assert.equal(result.verification_max_retries, 3);
|
||||
assert.equal(result.verification_auto_fix, true); // unchanged
|
||||
});
|
||||
|
||||
it("caps_retries_at_3", () => {
|
||||
const events = Array.from({ length: 5 }, () => makeEvent("pass"));
|
||||
const result = computeAdaptiveVerificationPolicy(
|
||||
"/tmp",
|
||||
{ verification_max_retries: 3 },
|
||||
events,
|
||||
);
|
||||
assert.equal(result, null); // already at ceiling
|
||||
});
|
||||
|
||||
it("does_not_increase_retries_when_some_failures_exist", () => {
|
||||
const events = [
|
||||
...Array.from({ length: 4 }, () => makeEvent("pass")),
|
||||
makeEvent("fail", "verification"),
|
||||
];
|
||||
const result = computeAdaptiveVerificationPolicy(
|
||||
"/tmp",
|
||||
{ verification_max_retries: 2 },
|
||||
events,
|
||||
);
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
// ─── Mixed scenarios ─────────────────────────────────────────────────────
|
||||
|
||||
it("returns_null_when_no_change_needed", () => {
|
||||
const events = [
|
||||
makeEvent("pass"),
|
||||
makeEvent("pass"),
|
||||
makeEvent("pass"),
|
||||
makeEvent("fail", "unknown"),
|
||||
makeEvent("pass"),
|
||||
];
|
||||
const result = computeAdaptiveVerificationPolicy(
|
||||
"/tmp",
|
||||
{ verification_max_retries: 2, verification_auto_fix: true },
|
||||
events,
|
||||
);
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
it("uses_defaults_when_prefs_omitted", () => {
|
||||
const events = Array.from({ length: 10 }, () =>
|
||||
makeEvent("fail", "verification"),
|
||||
);
|
||||
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
|
||||
assert.ok(result);
|
||||
assert.equal(result.verification_max_retries, 1);
|
||||
assert.equal(result.verification_auto_fix, false);
|
||||
});
|
||||
|
||||
it("ignores_non_outcome_learning_events", () => {
|
||||
const events = [
|
||||
...Array.from({ length: 10 }, () => makeEvent("fail", "verification")),
|
||||
{ gateId: "cost-guard", outcome: "fail", failureClass: "budget" },
|
||||
];
|
||||
const result = computeAdaptiveVerificationPolicy("/tmp", {}, events);
|
||||
assert.ok(result);
|
||||
assert.equal(result.verification_max_retries, 1);
|
||||
});
|
||||
});
|
||||
118
src/resources/extensions/sf/uok/adaptive-verification-policy.js
Normal file
118
src/resources/extensions/sf/uok/adaptive-verification-policy.js
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
/**
|
||||
* Adaptive Verification Policy
|
||||
*
|
||||
* Purpose: close the self-improvement loop by reading OutcomeLearningGate
|
||||
* findings and adjusting verification retry policy (verification_max_retries,
|
||||
* verification_auto_fix) based on recent failure patterns.
|
||||
*
|
||||
* Consumer: auto-verification.js after OutcomeLearningGate runs.
|
||||
*/
|
||||
import { saveFile } from "../files.js";
|
||||
import {
|
||||
getProjectSFPreferencesPath,
|
||||
loadProjectSFPreferences,
|
||||
} from "../preferences.js";
|
||||
import { serializePreferencesToFrontmatter } from "../preferences-serializer.js";
|
||||
import { readTraceEvents } from "./trace-writer.js";
|
||||
|
||||
const MIN_SAMPLE_SIZE = 5;
|
||||
const LOOKBACK_HOURS = 24;
|
||||
const HIGH_FAILURE_THRESHOLD = 0.6;
|
||||
const LOW_FAILURE_THRESHOLD = 0.3;
|
||||
const MAX_RETRIES_CEILING = 3;
|
||||
|
||||
/**
|
||||
* Compute an adaptive verification policy based on OutcomeLearningGate
|
||||
* trace events from the last 24 hours.
|
||||
*
|
||||
* @param {string} basePath — project base path
|
||||
* @param {object} currentPrefs — current effective preferences subset
|
||||
* @param {Array<object>} [overrideEvents] — optional injected events for testing
|
||||
* @returns {object|null} — { verification_max_retries, verification_auto_fix } or null if no change
|
||||
*/
|
||||
export function computeAdaptiveVerificationPolicy(
|
||||
basePath,
|
||||
currentPrefs,
|
||||
overrideEvents,
|
||||
) {
|
||||
const events =
|
||||
overrideEvents ??
|
||||
readTraceEvents(basePath, "gate_run", LOOKBACK_HOURS).filter(
|
||||
(e) => e.gateId === "outcome-learning",
|
||||
);
|
||||
|
||||
if (events.length < MIN_SAMPLE_SIZE) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const currentMaxRetries = currentPrefs?.verification_max_retries ?? 2;
|
||||
const currentAutoFix = currentPrefs?.verification_auto_fix ?? true;
|
||||
|
||||
let newMaxRetries = currentMaxRetries;
|
||||
let newAutoFix = currentAutoFix;
|
||||
let changed = false;
|
||||
|
||||
// Count failures by failureClass
|
||||
const byClass = new Map();
|
||||
let totalFails = 0;
|
||||
for (const e of events) {
|
||||
if (e.outcome === "fail") {
|
||||
totalFails++;
|
||||
const fc = e.failureClass || "unknown";
|
||||
byClass.set(fc, (byClass.get(fc) || 0) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
// High failure rate (>60%) for verification/artifact/execution → reduce retries, disable auto-fix
|
||||
const highImpactClasses = ["verification", "artifact", "execution"];
|
||||
for (const fc of highImpactClasses) {
|
||||
const count = byClass.get(fc) || 0;
|
||||
const rate = count / events.length;
|
||||
if (rate > HIGH_FAILURE_THRESHOLD) {
|
||||
newMaxRetries = 1;
|
||||
newAutoFix = false;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Low overall failure rate (<30%) and all passing → optionally increase retries
|
||||
const overallFailureRate = totalFails / events.length;
|
||||
if (overallFailureRate < LOW_FAILURE_THRESHOLD && totalFails === 0) {
|
||||
const bumped = Math.min(currentMaxRetries + 1, MAX_RETRIES_CEILING);
|
||||
if (bumped !== currentMaxRetries) {
|
||||
newMaxRetries = bumped;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!changed) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
verification_max_retries: newMaxRetries,
|
||||
verification_auto_fix: newAutoFix,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply an adaptive policy to project preferences.
|
||||
*
|
||||
* @param {string} basePath — project base path
|
||||
* @param {object} policy — { verification_max_retries, verification_auto_fix }
|
||||
* @returns {boolean} — true if preferences were written
|
||||
*/
|
||||
export async function applyAdaptiveVerificationPolicy(_basePath, policy) {
|
||||
const path = getProjectSFPreferencesPath();
|
||||
const existing = loadProjectSFPreferences();
|
||||
const prefs = existing?.preferences
|
||||
? { ...existing.preferences }
|
||||
: { version: 1 };
|
||||
|
||||
prefs.verification_max_retries = policy.verification_max_retries;
|
||||
prefs.verification_auto_fix = policy.verification_auto_fix;
|
||||
|
||||
const yaml = serializePreferencesToFrontmatter(prefs);
|
||||
await saveFile(path, yaml);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -35,6 +35,10 @@ import {
|
|||
runVerificationGate,
|
||||
} from "../verification-gate.js";
|
||||
import { logError, logWarning } from "../workflow-logger.js";
|
||||
import {
|
||||
applyAdaptiveVerificationPolicy,
|
||||
computeAdaptiveVerificationPolicy,
|
||||
} from "./adaptive-verification-policy.js";
|
||||
import { ChaosMonkeyGate } from "./chaos-monkey.js";
|
||||
import { CostGuardGate } from "./cost-guard-gate.js";
|
||||
import { resolveUokFlags } from "./flags.js";
|
||||
|
|
@ -380,6 +384,34 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
|
|||
result.chaosMonkeyRationale = res.rationale;
|
||||
}
|
||||
}
|
||||
// ── Adaptive verification policy ──
|
||||
// After OutcomeLearningGate runs, check if failure patterns warrant
|
||||
// adjusting retry budgets to avoid wasted cycles or allow more retries.
|
||||
if (uokFlags.outcomeLearning) {
|
||||
try {
|
||||
const adaptivePolicy = computeAdaptiveVerificationPolicy(
|
||||
s.basePath,
|
||||
prefs,
|
||||
);
|
||||
if (adaptivePolicy) {
|
||||
process.stderr.write(
|
||||
`verification-gate: adaptive policy triggered — ` +
|
||||
`max_retries=${adaptivePolicy.verification_max_retries}, ` +
|
||||
`auto_fix=${adaptivePolicy.verification_auto_fix}\n`,
|
||||
);
|
||||
await applyAdaptiveVerificationPolicy(s.basePath, adaptivePolicy);
|
||||
// Update local prefs so the rest of this function uses the new values
|
||||
prefs.verification_max_retries =
|
||||
adaptivePolicy.verification_max_retries;
|
||||
prefs.verification_auto_fix = adaptivePolicy.verification_auto_fix;
|
||||
}
|
||||
} catch (adaptiveErr) {
|
||||
logWarning(
|
||||
"engine",
|
||||
`adaptive-verification-policy error: ${adaptiveErr.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Auto-fix retry preferences
|
||||
const autoFixEnabled = prefs?.verification_auto_fix !== false;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue