fix: restore full content of R074/R075 swarm files from worktrees
The prior commit (cc32ab79d) accidentally landed truncated versions of the
new R074 + R075 files due to a cherry-pick partial-state. Restored:
- inline-runtime-gate.js: 74→96 LOC
- inline-runtime-gate.test.mjs: 115→273 LOC (15 tests; 2 sonnet-imagined
bootstrapGateRegistry/BOOTSTRAP_GATES tests rewritten to assert SF's
actual side-effect-on-import registry pattern)
- adversarial-budget.js: 86→106 LOC
- adversarial-budget.test.mjs: 63→132 LOC (9 tests)
- adversarial-finding-bridge.js: 123→191 LOC
- adversarial-finding-bridge.test.mjs: 98→216 LOC (14 tests)
45/45 tests pass across the four affected files.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
cc32ab79d9
commit
425bba7d39
6 changed files with 774 additions and 243 deletions
|
|
@ -1,27 +1,42 @@
|
|||
/**
|
||||
* adversarial-finding-bridge.js — quarantine smoke_gate on severe findings.
|
||||
* adversarial-finding-bridge.js — Bridge high-severity adversarial findings to
|
||||
* smoke_gate quarantine.
|
||||
*
|
||||
* Purpose: connect high-severity adversarial findings to the existing
|
||||
* smoke_gate quarantine control so a challenge result can stop promotion until
|
||||
* an operator reviews the finding.
|
||||
* Purpose: R075's spec requires that high-severity adversarial-finding self-feedback
|
||||
* entries trigger smoke_gate quarantine — just as crash-loop-classifier triggers
|
||||
* quarantineCrashLoop. This bridge is the NEW listener that enforces that policy
|
||||
* without touching autonomous-rollback.js (currently in-flight on another worktree).
|
||||
*
|
||||
* Consumer: session-start self-feedback drain in bootstrap/register-hooks.js.
|
||||
* Compatibility: designed to coexist with BOTH the main-branch shape of
|
||||
* quarantineCrashLoop (which calls setExperimentalFlag + recordSelfFeedback
|
||||
* directly) AND the post-77421502a narrowed shape (which will remain compatible
|
||||
* because this bridge calls setExperimentalFlag directly, not through
|
||||
* quarantineCrashLoop).
|
||||
*
|
||||
* Consumer: self-feedback-drain step (called from phases-pre-dispatch.js session
|
||||
* startup) and any future drain hook registry.
|
||||
*/
|
||||
import { setExperimentalFlag } from "../experimental.js";
|
||||
|
||||
import { recordSelfFeedback } from "../self-feedback.js";
|
||||
import { setExperimentalFlag } from "../experimental.js";
|
||||
import { isDbAvailable, listSelfFeedbackEntries } from "../sf-db.js";
|
||||
|
||||
/**
|
||||
* Return true for adversarial findings severe enough to quarantine smoke_gate.
|
||||
* Return true when an entry is a high-severity adversarial finding that should
|
||||
* trigger quarantine.
|
||||
*
|
||||
* Purpose: keep the bridge predicate explicit and testable.
|
||||
* Purpose: pure predicate so callers can filter without coupling to bridge
|
||||
* implementation.
|
||||
*
|
||||
* Consumer: bridgeAdversarialFindingToQuarantine and drainAdversarialFindingsBridge.
|
||||
* @param {object} entry - self_feedback entry
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isHighSeverityAdversarialFinding(entry) {
|
||||
if (!entry || typeof entry !== "object") return false;
|
||||
const kind = String(entry.kind ?? "");
|
||||
const severity = String(entry.severity ?? "");
|
||||
// Accept both plain "adversarial-finding" and domain:family shapes such as
|
||||
// "adversarial-finding:red-team" so the bridge survives future kind narrowing.
|
||||
return (
|
||||
(kind === "adversarial-finding" ||
|
||||
kind.startsWith("adversarial-finding:")) &&
|
||||
|
|
@ -29,6 +44,16 @@ export function isHighSeverityAdversarialFinding(entry) {
|
|||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive a stable idempotency key for an adversarial-finding quarantine action.
|
||||
*
|
||||
* Purpose: prevent the bridge from firing twice for the same source entry.
|
||||
* The key is stored in the child entry's evidence so a second call can check
|
||||
* whether a child already exists.
|
||||
*
|
||||
* @param {object} sourceEntry
|
||||
* @returns {string}
|
||||
*/
|
||||
function quarantineKeyFor(sourceEntry) {
|
||||
return `quarantine:adversarial-finding:${sourceEntry.id ?? "unknown"}`;
|
||||
}
|
||||
|
|
@ -43,12 +68,30 @@ function alreadyQuarantinedByKey(key) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Bridge one high-severity adversarial finding to smoke_gate quarantine.
|
||||
* Bridge a high-severity adversarial-finding entry to smoke_gate quarantine.
|
||||
*
|
||||
* Purpose: make adversarial review actionable by flipping the promotion gate
|
||||
* and recording a child self-feedback entry with a durable source reference.
|
||||
* Purpose: when an adversarial review unit files a high-severity finding, this
|
||||
* function flips the smoke_gate feature flag to false (halting ledger writes)
|
||||
* and writes a child self-feedback entry of kind "smoke-gate-quarantined" that
|
||||
* cross-references the source.
|
||||
*
|
||||
* Consumer: drainAdversarialFindingsBridge.
|
||||
* Injection surface (`opts`):
|
||||
* - `setExperimentalFlag`: `(name, value) => void` — testable
|
||||
* - `recordSelfFeedback`: `(entry, basePath) => void` — testable
|
||||
* - `alreadyQuarantined`: `(key, basePath) => boolean` — testable idempotence check
|
||||
*
|
||||
* Returns an object describing the outcome:
|
||||
* - `{ ok: false, reason: string }` when entry is skipped (wrong kind/severity)
|
||||
* - `{ ok: true, quarantined: false, reason: 'already-quarantined' }` when idempotent
|
||||
* - `{ ok: true, quarantined: true, childEntryId?: string }` on success
|
||||
*
|
||||
* @param {object} entry - self_feedback entry
|
||||
* @param {object} [opts]
|
||||
* @param {string} [opts.basePath]
|
||||
* @param {Function} [opts.setExperimentalFlag]
|
||||
* @param {Function} [opts.recordSelfFeedback]
|
||||
* @param {Function} [opts.alreadyQuarantined]
|
||||
* @returns {{ ok: boolean, quarantined?: boolean, reason?: string, childEntryId?: string }}
|
||||
*/
|
||||
export function bridgeAdversarialFindingToQuarantine(entry, opts = {}) {
|
||||
if (!isHighSeverityAdversarialFinding(entry)) {
|
||||
|
|
@ -59,29 +102,36 @@ export function bridgeAdversarialFindingToQuarantine(entry, opts = {}) {
|
|||
}
|
||||
|
||||
const basePath = opts.basePath ?? process.cwd();
|
||||
const quarantineKey = quarantineKeyFor(entry);
|
||||
const alreadyQuarantined =
|
||||
opts.alreadyQuarantined ?? ((key) => alreadyQuarantinedByKey(key));
|
||||
if (alreadyQuarantined(quarantineKey, basePath)) {
|
||||
return { ok: true, quarantined: false, reason: "already-quarantined" };
|
||||
}
|
||||
|
||||
const setFlag =
|
||||
opts.setExperimentalFlag ??
|
||||
((name, value) => setExperimentalFlag(name, value));
|
||||
const record =
|
||||
opts.recordSelfFeedback ?? ((child, bp) => recordSelfFeedback(child, bp));
|
||||
opts.recordSelfFeedback ?? ((e, bp) => recordSelfFeedback(e, bp));
|
||||
|
||||
// ── Idempotence check ──────────────────────────────────────────────────────
|
||||
// The default check looks for a child smoke-gate-quarantined entry whose
|
||||
// evidence references this source entry's id. The opts.alreadyQuarantined
|
||||
// injection point lets tests bypass this without real DB access.
|
||||
const quarantineKey = quarantineKeyFor(entry);
|
||||
const alreadyQuarantined =
|
||||
opts.alreadyQuarantined ?? ((key) => alreadyQuarantinedByKey(key));
|
||||
|
||||
if (alreadyQuarantined(quarantineKey, basePath)) {
|
||||
return { ok: true, quarantined: false, reason: "already-quarantined" };
|
||||
}
|
||||
|
||||
// ── Flip smoke_gate ────────────────────────────────────────────────────────
|
||||
try {
|
||||
setFlag("smoke_gate", false);
|
||||
} catch {
|
||||
/* child entry below still records the attempted quarantine */
|
||||
// Non-fatal — if flag write fails we still want to record the child entry.
|
||||
}
|
||||
|
||||
// ── Write child entry ──────────────────────────────────────────────────────
|
||||
const childEntry = {
|
||||
kind: "smoke-gate-quarantined",
|
||||
severity: "high",
|
||||
summary: `smoke_gate disabled by adversarial-finding bridge. Source: ${entry.id ?? "unknown"} - ${String(entry.summary ?? "").slice(0, 200)}`,
|
||||
summary: `smoke_gate disabled by adversarial-finding bridge. Source: ${entry.id ?? "unknown"} — ${(entry.summary ?? "").slice(0, 200)}`,
|
||||
evidence: {
|
||||
sourceEntryId: entry.id,
|
||||
sourceKind: entry.kind,
|
||||
|
|
@ -91,25 +141,39 @@ export function bridgeAdversarialFindingToQuarantine(entry, opts = {}) {
|
|||
"high-severity adversarial-finding triggered smoke_gate quarantine",
|
||||
},
|
||||
suggestedFix:
|
||||
"Review the source adversarial-finding entry and decide whether the finding warrants quarantine. Re-enable smoke_gate after the issue is resolved.",
|
||||
"Review the source adversarial-finding entry and decide whether the finding " +
|
||||
"warrants the quarantine. Re-enable smoke_gate via `sf experimental on smoke_gate` " +
|
||||
"after the issue is resolved.",
|
||||
};
|
||||
|
||||
let childEntryId;
|
||||
try {
|
||||
childEntryId = record(childEntry, basePath)?.entry?.id;
|
||||
const result = record(childEntry, basePath);
|
||||
childEntryId = result?.entry?.id;
|
||||
} catch {
|
||||
/* quarantine flag decision already happened */
|
||||
// Non-fatal — quarantine still effective even if child write fails
|
||||
}
|
||||
|
||||
return { ok: true, quarantined: true, childEntryId };
|
||||
}
|
||||
|
||||
/**
|
||||
* Process blocked self-feedback entries and quarantine severe adversarial finds.
|
||||
* Drain step: scan all open self-feedback entries and bridge any high-severity
|
||||
* adversarial-finding entries to smoke_gate quarantine.
|
||||
*
|
||||
* Purpose: make findings from previous runs effective at the next session
|
||||
* boundary, even if the bridge was not loaded when the finding was filed.
|
||||
* Purpose: hook into the session-start drain so that adversarial-finding
|
||||
* entries filed during previous runs are processed even if the bridge
|
||||
* was not running when the entry was created.
|
||||
*
|
||||
* Consumer: bootstrap/register-hooks.js self-feedback drain.
|
||||
* Returns the number of entries processed (quarantined or already handled).
|
||||
*
|
||||
* @param {object[]} entries - array of self_feedback entries to scan
|
||||
* @param {object} [opts]
|
||||
* @param {string} [opts.basePath]
|
||||
* @param {Function} [opts.setExperimentalFlag]
|
||||
* @param {Function} [opts.recordSelfFeedback]
|
||||
* @param {Function} [opts.alreadyQuarantined]
|
||||
* @returns {number}
|
||||
*/
|
||||
export function drainAdversarialFindingsBridge(entries, opts = {}) {
|
||||
if (!Array.isArray(entries)) return 0;
|
||||
|
|
@ -117,7 +181,7 @@ export function drainAdversarialFindingsBridge(entries, opts = {}) {
|
|||
for (const entry of entries) {
|
||||
if (!isHighSeverityAdversarialFinding(entry)) continue;
|
||||
bridgeAdversarialFindingToQuarantine(entry, opts);
|
||||
count += 1;
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,63 +1,156 @@
|
|||
/**
|
||||
* adversarial-budget.test.mjs — R075 adversarial review token budget.
|
||||
* adversarial-budget.test.mjs — Unit tests for the adversarial review token budget.
|
||||
*
|
||||
* Purpose: verify challenge review budget calculation and feedback filing
|
||||
* behavior before wiring the cap into dispatch envelopes.
|
||||
* Covers:
|
||||
* (a) Budget cap respected: model call envelope carries maxOutputTokens
|
||||
* (b) Over-budget short-circuit: enforceAdversarialBudget fires structured log
|
||||
*/
|
||||
|
||||
import assert from "node:assert/strict";
|
||||
import { afterEach, test } from "vitest";
|
||||
import { ADVERSARIAL_REVIEW_MAX_TOKENS } from "../constants.js";
|
||||
import {
|
||||
enforceAdversarialBudget,
|
||||
isAdversarialBudgetExceeded,
|
||||
import { test } from "vitest";
|
||||
import * as AdversarialBudget from "../uok/adversarial-budget.js";
|
||||
import * as Constants from "../constants.js";
|
||||
|
||||
const {
|
||||
resolveAdversarialBudget,
|
||||
} from "../uok/adversarial-budget.js";
|
||||
isAdversarialBudgetExceeded,
|
||||
enforceAdversarialBudget,
|
||||
} = AdversarialBudget;
|
||||
const { ADVERSARIAL_REVIEW_MAX_TOKENS } = Constants;
|
||||
|
||||
let savedBudget;
|
||||
// ─── resolveAdversarialBudget ──────────────────────────────────────────────
|
||||
|
||||
afterEach(() => {
|
||||
if (savedBudget === undefined) delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
else process.env.SF_ADVERSARIAL_MAX_TOKENS = savedBudget;
|
||||
});
|
||||
|
||||
test("resolveAdversarialBudget_when_env_unset_returns_constant", () => {
|
||||
savedBudget = process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
test("resolveAdversarialBudget returns compile-time constant when env is unset", () => {
|
||||
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
assert.equal(resolveAdversarialBudget(), ADVERSARIAL_REVIEW_MAX_TOKENS);
|
||||
});
|
||||
|
||||
test("resolveAdversarialBudget_when_env_positive_uses_override", () => {
|
||||
savedBudget = process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
test("resolveAdversarialBudget uses SF_ADVERSARIAL_MAX_TOKENS override when set", () => {
|
||||
process.env.SF_ADVERSARIAL_MAX_TOKENS = "12345";
|
||||
assert.equal(resolveAdversarialBudget(), 12345);
|
||||
try {
|
||||
assert.equal(resolveAdversarialBudget(), 12345);
|
||||
} finally {
|
||||
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
}
|
||||
});
|
||||
|
||||
test("isAdversarialBudgetExceeded_when_at_limit_returns_true", () => {
|
||||
savedBudget = process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
test("resolveAdversarialBudget ignores non-positive override", () => {
|
||||
process.env.SF_ADVERSARIAL_MAX_TOKENS = "0";
|
||||
try {
|
||||
assert.equal(resolveAdversarialBudget(), ADVERSARIAL_REVIEW_MAX_TOKENS);
|
||||
} finally {
|
||||
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
}
|
||||
});
|
||||
|
||||
// ─── isAdversarialBudgetExceeded ──────────────────────────────────────────
|
||||
|
||||
test("isAdversarialBudgetExceeded returns false when under budget", () => {
|
||||
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
assert.equal(
|
||||
isAdversarialBudgetExceeded(ADVERSARIAL_REVIEW_MAX_TOKENS),
|
||||
true,
|
||||
);
|
||||
assert.equal(isAdversarialBudgetExceeded(0), false);
|
||||
assert.equal(
|
||||
isAdversarialBudgetExceeded(ADVERSARIAL_REVIEW_MAX_TOKENS - 1),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
test("enforceAdversarialBudget_when_over_limit_records_feedback", () => {
|
||||
savedBudget = process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
test("isAdversarialBudgetExceeded returns true at exactly the budget limit", () => {
|
||||
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
const entries = [];
|
||||
assert.equal(
|
||||
isAdversarialBudgetExceeded(ADVERSARIAL_REVIEW_MAX_TOKENS),
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test("isAdversarialBudgetExceeded returns true when over budget", () => {
|
||||
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
assert.equal(
|
||||
isAdversarialBudgetExceeded(ADVERSARIAL_REVIEW_MAX_TOKENS + 1000),
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test("isAdversarialBudgetExceeded returns false for non-finite input", () => {
|
||||
assert.equal(isAdversarialBudgetExceeded(NaN), false);
|
||||
assert.equal(isAdversarialBudgetExceeded(Infinity), false);
|
||||
});
|
||||
|
||||
// ─── enforceAdversarialBudget (a): cap respected → returns null under budget ─
|
||||
|
||||
test("enforceAdversarialBudget returns null when under budget (cap respected)", () => {
|
||||
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
const calls = [];
|
||||
const result = enforceAdversarialBudget(
|
||||
ADVERSARIAL_REVIEW_MAX_TOKENS + 1,
|
||||
100,
|
||||
{ unitId: "challenge-M001", milestoneId: "M001" },
|
||||
{ recordSelfFeedback: (entry) => entries.push(entry) },
|
||||
{ recordSelfFeedback: (entry) => calls.push(entry) },
|
||||
);
|
||||
assert.equal(result, null);
|
||||
assert.equal(calls.length, 0);
|
||||
});
|
||||
|
||||
// ─── enforceAdversarialBudget (b): over-budget short-circuit fires log ───────
|
||||
|
||||
test("enforceAdversarialBudget short-circuits and logs when over budget", () => {
|
||||
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
const logged = [];
|
||||
const result = enforceAdversarialBudget(
|
||||
ADVERSARIAL_REVIEW_MAX_TOKENS + 5000,
|
||||
{ unitId: "challenge-M002", milestoneId: "M002", basePath: "/tmp/fake" },
|
||||
{
|
||||
recordSelfFeedback: (entry) => {
|
||||
logged.push(entry);
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
// Short-circuit result is returned
|
||||
assert.ok(result !== null, "should return a short-circuit object");
|
||||
assert.equal(result.shortCircuit, true);
|
||||
assert.equal(result.reason, "adversarial-budget-exceeded");
|
||||
assert.equal(entries.length, 1);
|
||||
assert.equal(entries[0].kind, "adversarial-budget-exceeded");
|
||||
assert.equal(entries[0].evidence.unitId, "challenge-M001");
|
||||
assert.equal(result.tokenCount, ADVERSARIAL_REVIEW_MAX_TOKENS + 5000);
|
||||
assert.equal(result.budget, ADVERSARIAL_REVIEW_MAX_TOKENS);
|
||||
|
||||
// Structured log entry was filed
|
||||
assert.equal(logged.length, 1);
|
||||
const entry = logged[0];
|
||||
assert.equal(entry.kind, "adversarial-budget-exceeded");
|
||||
assert.equal(entry.reason, "adversarial-budget-exceeded");
|
||||
assert.ok(
|
||||
entry.summary.includes("challenge-M002"),
|
||||
"summary should include unitId",
|
||||
);
|
||||
assert.equal(entry.evidence.tokenCount, ADVERSARIAL_REVIEW_MAX_TOKENS + 5000);
|
||||
assert.equal(entry.evidence.budget, ADVERSARIAL_REVIEW_MAX_TOKENS);
|
||||
assert.equal(entry.evidence.unitId, "challenge-M002");
|
||||
assert.equal(entry.evidence.milestoneId, "M002");
|
||||
});
|
||||
|
||||
test("enforceAdversarialBudget fires at exactly the budget boundary", () => {
|
||||
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
const logged = [];
|
||||
const result = enforceAdversarialBudget(
|
||||
ADVERSARIAL_REVIEW_MAX_TOKENS,
|
||||
{},
|
||||
{
|
||||
recordSelfFeedback: (entry) => {
|
||||
logged.push(entry);
|
||||
},
|
||||
},
|
||||
);
|
||||
assert.ok(result !== null, "exactly at limit should short-circuit");
|
||||
assert.equal(logged.length, 1);
|
||||
});
|
||||
|
||||
// ─── maxOutputTokens plumbed into dispatch envelope ───────────────────────────
|
||||
|
||||
test("resolveAdversarialBudget value matches ADVERSARIAL_REVIEW_MAX_TOKENS constant (budget cap passthrough)", () => {
|
||||
// This verifies that the value exposed to dispatch callers equals the constant.
|
||||
// auto-dispatch.js sets maxOutputTokens: resolveAdversarialBudget() in the
|
||||
// challenge envelope — this test confirms the round-trip is correct.
|
||||
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
|
||||
const budget = resolveAdversarialBudget();
|
||||
assert.equal(budget, ADVERSARIAL_REVIEW_MAX_TOKENS);
|
||||
assert.equal(typeof budget, "number");
|
||||
assert.ok(budget > 0, "budget must be positive");
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,18 +1,35 @@
|
|||
/**
|
||||
* adversarial-finding-bridge.test.mjs — R075 finding-to-quarantine bridge.
|
||||
* adversarial-finding-bridge.test.mjs — Tests for the adversarial-finding →
|
||||
* smoke_gate quarantine bridge (R075/R066).
|
||||
*
|
||||
* Purpose: verify high-severity adversarial findings disable smoke_gate once
|
||||
* and create a child self-feedback entry with a stable source reference.
|
||||
* Covers:
|
||||
* (1) High-sev adversarial-finding flips smoke_gate to false
|
||||
* (2) Medium-sev adversarial-finding does NOT flip smoke_gate
|
||||
* (3) Child entry has correct cross-reference (sourceEntryId, quarantineKey)
|
||||
* (4) Idempotent: calling twice does not double-write the child entry
|
||||
*/
|
||||
|
||||
import assert from "node:assert/strict";
|
||||
import { test } from "vitest";
|
||||
import {
|
||||
isHighSeverityAdversarialFinding,
|
||||
bridgeAdversarialFindingToQuarantine,
|
||||
drainAdversarialFindingsBridge,
|
||||
isHighSeverityAdversarialFinding,
|
||||
} from "../safety/adversarial-finding-bridge.js";
|
||||
|
||||
test("isHighSeverityAdversarialFinding_when_high_adversarial_returns_true", () => {
|
||||
// ─── isHighSeverityAdversarialFinding ─────────────────────────────────────────
|
||||
|
||||
test("isHighSeverityAdversarialFinding returns true for high-sev adversarial-finding", () => {
|
||||
assert.equal(
|
||||
isHighSeverityAdversarialFinding({
|
||||
kind: "adversarial-finding",
|
||||
severity: "high",
|
||||
}),
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test("isHighSeverityAdversarialFinding returns true for domain:family shape", () => {
|
||||
assert.equal(
|
||||
isHighSeverityAdversarialFinding({
|
||||
kind: "adversarial-finding:red-team",
|
||||
|
|
@ -20,6 +37,9 @@ test("isHighSeverityAdversarialFinding_when_high_adversarial_returns_true", () =
|
|||
}),
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test("isHighSeverityAdversarialFinding returns false for medium severity", () => {
|
||||
assert.equal(
|
||||
isHighSeverityAdversarialFinding({
|
||||
kind: "adversarial-finding",
|
||||
|
|
@ -29,70 +49,222 @@ test("isHighSeverityAdversarialFinding_when_high_adversarial_returns_true", () =
|
|||
);
|
||||
});
|
||||
|
||||
test("bridgeAdversarialFindingToQuarantine_when_high_finding_disables_smoke_gate", () => {
|
||||
const flags = [];
|
||||
const entries = [];
|
||||
const result = bridgeAdversarialFindingToQuarantine(
|
||||
{
|
||||
id: "sf-finding-1",
|
||||
kind: "adversarial-finding",
|
||||
test("isHighSeverityAdversarialFinding returns false for wrong kind", () => {
|
||||
assert.equal(
|
||||
isHighSeverityAdversarialFinding({
|
||||
kind: "crash-loop-detected",
|
||||
severity: "high",
|
||||
summary: "Promotion bypass found.",
|
||||
},
|
||||
{
|
||||
setExperimentalFlag: (name, value) => flags.push({ name, value }),
|
||||
recordSelfFeedback: (entry) => {
|
||||
entries.push(entry);
|
||||
return { entry: { id: "child-1" } };
|
||||
},
|
||||
alreadyQuarantined: () => false,
|
||||
},
|
||||
}),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
test("isHighSeverityAdversarialFinding returns false for null entry", () => {
|
||||
assert.equal(isHighSeverityAdversarialFinding(null), false);
|
||||
});
|
||||
|
||||
// ─── (1) High-sev adversarial-finding flips smoke_gate ───────────────────────
|
||||
|
||||
test("bridgeAdversarialFindingToQuarantine flips smoke_gate for high-sev finding", () => {
|
||||
const flagCalls = [];
|
||||
const recorded = [];
|
||||
const entry = {
|
||||
id: "sf-abc-high01",
|
||||
kind: "adversarial-finding",
|
||||
severity: "high",
|
||||
summary: "Red-team found critical assumption gap in M010 architecture",
|
||||
};
|
||||
const result = bridgeAdversarialFindingToQuarantine(entry, {
|
||||
basePath: "/tmp/fake-project",
|
||||
setExperimentalFlag: (name, value) => flagCalls.push({ name, value }),
|
||||
recordSelfFeedback: (e) => {
|
||||
recorded.push(e);
|
||||
return { entry: { ...e, id: "sf-child-01" } };
|
||||
},
|
||||
alreadyQuarantined: () => false,
|
||||
});
|
||||
|
||||
assert.equal(result.ok, true);
|
||||
assert.equal(result.quarantined, true);
|
||||
assert.deepEqual(flags, [{ name: "smoke_gate", value: false }]);
|
||||
assert.equal(entries.length, 1);
|
||||
assert.equal(entries[0].kind, "smoke-gate-quarantined");
|
||||
assert.equal(entries[0].evidence.sourceEntryId, "sf-finding-1");
|
||||
|
||||
// smoke_gate must have been flipped to false
|
||||
assert.equal(flagCalls.length, 1);
|
||||
assert.deepEqual(flagCalls[0], { name: "smoke_gate", value: false });
|
||||
});
|
||||
|
||||
test("bridgeAdversarialFindingToQuarantine_when_already_quarantined_is_idempotent", () => {
|
||||
const flags = [];
|
||||
const entries = [];
|
||||
const result = bridgeAdversarialFindingToQuarantine(
|
||||
// ─── (2) Medium-sev does NOT flip smoke_gate ──────────────────────────────────
|
||||
|
||||
test("bridgeAdversarialFindingToQuarantine skips medium-sev adversarial-finding", () => {
|
||||
const flagCalls = [];
|
||||
const recorded = [];
|
||||
const entry = {
|
||||
id: "sf-abc-med01",
|
||||
kind: "adversarial-finding",
|
||||
severity: "medium",
|
||||
summary: "Minor assumption not tested",
|
||||
};
|
||||
const result = bridgeAdversarialFindingToQuarantine(entry, {
|
||||
basePath: "/tmp/fake-project",
|
||||
setExperimentalFlag: (name, value) => flagCalls.push({ name, value }),
|
||||
recordSelfFeedback: (e) => {
|
||||
recorded.push(e);
|
||||
},
|
||||
alreadyQuarantined: () => false,
|
||||
});
|
||||
|
||||
assert.equal(result.ok, false);
|
||||
assert.ok(result.reason, "should return a reason string");
|
||||
assert.equal(
|
||||
flagCalls.length,
|
||||
0,
|
||||
"smoke_gate must NOT be flipped for medium severity",
|
||||
);
|
||||
assert.equal(recorded.length, 0);
|
||||
});
|
||||
|
||||
// ─── (3) Child entry has correct cross-reference ──────────────────────────────
|
||||
|
||||
test("bridgeAdversarialFindingToQuarantine child entry has correct cross-reference fields", () => {
|
||||
const recorded = [];
|
||||
const entry = {
|
||||
id: "sf-abc-cross01",
|
||||
kind: "adversarial-finding",
|
||||
severity: "high",
|
||||
summary: "Architecture assumption untested",
|
||||
};
|
||||
const result = bridgeAdversarialFindingToQuarantine(entry, {
|
||||
basePath: "/tmp/fake-project",
|
||||
setExperimentalFlag: () => {},
|
||||
recordSelfFeedback: (e) => {
|
||||
recorded.push(e);
|
||||
return { entry: { ...e, id: "sf-child-cross01" } };
|
||||
},
|
||||
alreadyQuarantined: () => false,
|
||||
});
|
||||
|
||||
assert.equal(result.ok, true);
|
||||
assert.equal(result.quarantined, true);
|
||||
|
||||
// Child entry was recorded
|
||||
assert.equal(recorded.length, 1);
|
||||
const child = recorded[0];
|
||||
|
||||
assert.equal(child.kind, "smoke-gate-quarantined");
|
||||
assert.equal(child.severity, "high");
|
||||
assert.ok(
|
||||
child.summary.includes("sf-abc-cross01"),
|
||||
"summary should reference source entry id",
|
||||
);
|
||||
|
||||
// Evidence cross-reference
|
||||
assert.equal(child.evidence.sourceEntryId, "sf-abc-cross01");
|
||||
assert.equal(child.evidence.sourceKind, "adversarial-finding");
|
||||
assert.equal(child.evidence.sourceSeverity, "high");
|
||||
assert.ok(
|
||||
typeof child.evidence.quarantineKey === "string" &&
|
||||
child.evidence.quarantineKey.length > 0,
|
||||
"quarantineKey must be present",
|
||||
);
|
||||
assert.ok(
|
||||
child.evidence.quarantineKey.includes("sf-abc-cross01"),
|
||||
"quarantineKey must reference source id",
|
||||
);
|
||||
});
|
||||
|
||||
// ─── (4) Idempotent: calling twice does not double-write ─────────────────────
|
||||
|
||||
test("bridgeAdversarialFindingToQuarantine is idempotent (second call with alreadyQuarantined=true)", () => {
|
||||
const flagCalls = [];
|
||||
const recorded = [];
|
||||
const entry = {
|
||||
id: "sf-abc-idem01",
|
||||
kind: "adversarial-finding",
|
||||
severity: "high",
|
||||
summary: "Already-processed finding",
|
||||
};
|
||||
|
||||
// First call — not yet quarantined
|
||||
const first = bridgeAdversarialFindingToQuarantine(entry, {
|
||||
setExperimentalFlag: (n, v) => flagCalls.push({ n, v }),
|
||||
recordSelfFeedback: (e) => {
|
||||
recorded.push(e);
|
||||
return { entry: { ...e, id: "sf-child-idem01" } };
|
||||
},
|
||||
alreadyQuarantined: () => false,
|
||||
});
|
||||
assert.equal(first.quarantined, true);
|
||||
|
||||
// Second call — already quarantined
|
||||
const second = bridgeAdversarialFindingToQuarantine(entry, {
|
||||
setExperimentalFlag: (n, v) => flagCalls.push({ n, v }),
|
||||
recordSelfFeedback: (e) => {
|
||||
recorded.push(e);
|
||||
},
|
||||
alreadyQuarantined: () => true,
|
||||
});
|
||||
|
||||
assert.equal(second.ok, true);
|
||||
assert.equal(second.quarantined, false);
|
||||
assert.equal(second.reason, "already-quarantined");
|
||||
|
||||
// smoke_gate only flipped once (from the first call)
|
||||
assert.equal(flagCalls.length, 1);
|
||||
// Child entry only written once
|
||||
assert.equal(recorded.length, 1);
|
||||
});
|
||||
|
||||
// ─── drainAdversarialFindingsBridge ───────────────────────────────────────────
|
||||
|
||||
test("drainAdversarialFindingsBridge processes only high-sev adversarial-finding entries", () => {
|
||||
const quarantined = [];
|
||||
const entries = [
|
||||
{
|
||||
id: "sf-finding-1",
|
||||
id: "sf-e1",
|
||||
kind: "adversarial-finding",
|
||||
severity: "high",
|
||||
summary: "High finding",
|
||||
},
|
||||
{
|
||||
setExperimentalFlag: (name, value) => flags.push({ name, value }),
|
||||
recordSelfFeedback: (entry) => entries.push(entry),
|
||||
alreadyQuarantined: () => true,
|
||||
id: "sf-e2",
|
||||
kind: "adversarial-finding",
|
||||
severity: "medium",
|
||||
summary: "Medium finding",
|
||||
},
|
||||
);
|
||||
|
||||
assert.equal(result.reason, "already-quarantined");
|
||||
assert.equal(flags.length, 0);
|
||||
assert.equal(entries.length, 0);
|
||||
});
|
||||
|
||||
test("drainAdversarialFindingsBridge_when_mixed_entries_processes_only_high_findings", () => {
|
||||
const flags = [];
|
||||
const count = drainAdversarialFindingsBridge(
|
||||
[
|
||||
{ id: "one", kind: "adversarial-finding", severity: "high" },
|
||||
{ id: "two", kind: "adversarial-finding", severity: "medium" },
|
||||
{ id: "three", kind: "gap", severity: "high" },
|
||||
],
|
||||
{
|
||||
setExperimentalFlag: (name, value) => flags.push({ name, value }),
|
||||
recordSelfFeedback: () => {},
|
||||
alreadyQuarantined: () => false,
|
||||
id: "sf-e3",
|
||||
kind: "crash-loop-detected",
|
||||
severity: "high",
|
||||
summary: "Crash loop",
|
||||
},
|
||||
);
|
||||
{
|
||||
id: "sf-e4",
|
||||
kind: "adversarial-finding",
|
||||
severity: "high",
|
||||
summary: "Another high",
|
||||
},
|
||||
];
|
||||
|
||||
assert.equal(count, 1);
|
||||
assert.deepEqual(flags, [{ name: "smoke_gate", value: false }]);
|
||||
const count = drainAdversarialFindingsBridge(entries, {
|
||||
setExperimentalFlag: (name, value) =>
|
||||
quarantined.push({
|
||||
name,
|
||||
value,
|
||||
id: entries.find(
|
||||
(e) => e.severity === "high" && e.kind === "adversarial-finding",
|
||||
)?.id,
|
||||
}),
|
||||
recordSelfFeedback: () => ({ entry: { id: "child" } }),
|
||||
alreadyQuarantined: () => false,
|
||||
});
|
||||
|
||||
// Only the 2 high-sev adversarial-finding entries should be processed
|
||||
assert.equal(count, 2);
|
||||
});
|
||||
|
||||
test("drainAdversarialFindingsBridge returns 0 for empty array", () => {
|
||||
assert.equal(drainAdversarialFindingsBridge([], {}), 0);
|
||||
});
|
||||
|
||||
test("drainAdversarialFindingsBridge returns 0 for null input", () => {
|
||||
assert.equal(drainAdversarialFindingsBridge(null, {}), 0);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,28 +1,49 @@
|
|||
/**
|
||||
* inline-runtime-gate.test.mjs — R074 inline dispatch safety gate.
|
||||
* inline-runtime-gate.test.mjs — covers R074 gate logic for all 4 R020×R066 × env-var combinations.
|
||||
*
|
||||
* Purpose: verify inline dispatch fails closed until R020 and R066 are
|
||||
* validated, with SF_INLINE_DISPATCH=1 as the explicit audited bypass.
|
||||
* Purpose: verify that inline dispatch is blocked by default when R020 or R066
|
||||
* are not validated, that SF_INLINE_DISPATCH=1 is an audited bypass, and that
|
||||
* both validated means pass with no env-var needed.
|
||||
*/
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, beforeEach, describe, test } from "vitest";
|
||||
import { closeDatabase, openDatabase, upsertRequirement } from "../sf-db.js";
|
||||
import { inlineRuntimeGate } from "../uok/inline-runtime-gate.js";
|
||||
|
||||
const roots = [];
|
||||
let savedBypass;
|
||||
import assert from "node:assert/strict";
|
||||
import * as Fs from "node:fs";
|
||||
import * as Os from "node:os";
|
||||
import * as Path from "node:path";
|
||||
import * as Vitest from "vitest";
|
||||
import * as Db from "../sf-db.js";
|
||||
import * as Gate from "../uok/inline-runtime-gate.js";
|
||||
import * as Bootstrap from "../uok/gate-registry-bootstrap.js";
|
||||
import { UokGateRunner } from "../uok/gate-runner.js";
|
||||
|
||||
const { mkdirSync, mkdtempSync, rmSync } = Fs;
|
||||
const { tmpdir } = Os;
|
||||
const { join } = Path;
|
||||
const { afterEach, beforeEach, describe, test } = Vitest;
|
||||
const { closeDatabase, openDatabase, upsertRequirement } = Db;
|
||||
const { inlineRuntimeGate, isInlineEligible } = Gate;
|
||||
const { bootstrapGateRegistry, BOOTSTRAP_GATES } = Bootstrap;
|
||||
|
||||
const tmpRoots = [];
|
||||
|
||||
function makeProject() {
|
||||
const root = mkdtempSync(join(tmpdir(), "sf-inline-gate-"));
|
||||
mkdirSync(join(root, ".sf"), { recursive: true });
|
||||
roots.push(root);
|
||||
openDatabase(join(root, ".sf", "sf.db"));
|
||||
tmpRoots.push(root);
|
||||
return root;
|
||||
}
|
||||
|
||||
function makeCtx(basePath, overrides = {}) {
|
||||
return {
|
||||
basePath,
|
||||
traceId: "trace-1",
|
||||
turnId: "turn-1",
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function seedRequirement(id, status) {
|
||||
upsertRequirement({
|
||||
id,
|
||||
|
|
@ -40,76 +61,213 @@ function seedRequirement(id, status) {
|
|||
});
|
||||
}
|
||||
|
||||
let savedEnv;
|
||||
|
||||
beforeEach(() => {
|
||||
savedBypass = process.env.SF_INLINE_DISPATCH;
|
||||
savedEnv = process.env.SF_INLINE_DISPATCH;
|
||||
delete process.env.SF_INLINE_DISPATCH;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
closeDatabase();
|
||||
for (const root of roots.splice(0))
|
||||
rmSync(root, { recursive: true, force: true });
|
||||
if (savedBypass === undefined) delete process.env.SF_INLINE_DISPATCH;
|
||||
else process.env.SF_INLINE_DISPATCH = savedBypass;
|
||||
for (const dir of tmpRoots.splice(0)) {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
if (savedEnv === undefined) {
|
||||
delete process.env.SF_INLINE_DISPATCH;
|
||||
} else {
|
||||
process.env.SF_INLINE_DISPATCH = savedEnv;
|
||||
}
|
||||
});
|
||||
|
||||
describe("inlineRuntimeGate", () => {
|
||||
test("execute_when_requirements_missing_fails_closed", () => {
|
||||
const root = makeProject();
|
||||
const result = inlineRuntimeGate.execute({
|
||||
basePath: root,
|
||||
unitType: "validate-milestone",
|
||||
unitId: "M001",
|
||||
});
|
||||
// ─── isInlineEligible ──────────────────────────────────────────────────────
|
||||
|
||||
describe("isInlineEligible", () => {
|
||||
test("returns true for execute-task", () => {
|
||||
assert.equal(isInlineEligible("execute-task"), true);
|
||||
});
|
||||
|
||||
test("returns false for complete-slice", () => {
|
||||
assert.equal(isInlineEligible("complete-slice"), false);
|
||||
});
|
||||
|
||||
test("returns false for plan-milestone", () => {
|
||||
assert.equal(isInlineEligible("plan-milestone"), false);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Gate contract ──────────────────────────────────────────────────────────
|
||||
|
||||
describe("inlineRuntimeGate shape", () => {
|
||||
test("has correct id and type", () => {
|
||||
assert.equal(inlineRuntimeGate.id, "inline-runtime-gate");
|
||||
assert.equal(inlineRuntimeGate.type, "policy");
|
||||
assert.equal(typeof inlineRuntimeGate.execute, "function");
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Case 1: R020 not validated, R066 not validated — no bypass ─────────────
|
||||
|
||||
describe("both requirements not validated, no bypass", () => {
|
||||
test("returns fail with policy failureClass", () => {
|
||||
const project = makeProject();
|
||||
openDatabase(join(project, ".sf", "sf.db"));
|
||||
seedRequirement("R020", "active");
|
||||
seedRequirement("R066", "active");
|
||||
|
||||
const result = inlineRuntimeGate.execute(makeCtx(project));
|
||||
|
||||
assert.equal(result.outcome, "fail");
|
||||
assert.equal(result.failureClass, "policy");
|
||||
assert.match(
|
||||
result.rationale,
|
||||
/inline dispatch refused: R020=active R066=active both must be 'validated' or set SF_INLINE_DISPATCH=1 for audited bypass/,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Case 2: R020 validated, R066 not validated — no bypass ────────────────
|
||||
|
||||
describe("R020 validated, R066 not validated, no bypass", () => {
|
||||
test("returns fail because R066 is not validated", () => {
|
||||
const project = makeProject();
|
||||
openDatabase(join(project, ".sf", "sf.db"));
|
||||
seedRequirement("R020", "validated");
|
||||
seedRequirement("R066", "active");
|
||||
|
||||
const result = inlineRuntimeGate.execute(makeCtx(project));
|
||||
|
||||
assert.equal(result.outcome, "fail");
|
||||
assert.equal(result.failureClass, "policy");
|
||||
assert.match(result.rationale, /R020=validated R066=active/);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Case 3: R020 not validated, R066 validated — no bypass ────────────────
|
||||
|
||||
describe("R020 not validated, R066 validated, no bypass", () => {
|
||||
test("returns fail because R020 is not validated", () => {
|
||||
const project = makeProject();
|
||||
openDatabase(join(project, ".sf", "sf.db"));
|
||||
seedRequirement("R020", "active");
|
||||
seedRequirement("R066", "validated");
|
||||
|
||||
const result = inlineRuntimeGate.execute(makeCtx(project));
|
||||
|
||||
assert.equal(result.outcome, "fail");
|
||||
assert.equal(result.failureClass, "policy");
|
||||
assert.match(result.rationale, /R020=active R066=validated/);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Case 4: both validated — pass without bypass ──────────────────────────
|
||||
|
||||
describe("both R020 and R066 validated", () => {
|
||||
test("returns pass regardless of env var", () => {
|
||||
const project = makeProject();
|
||||
openDatabase(join(project, ".sf", "sf.db"));
|
||||
seedRequirement("R020", "validated");
|
||||
seedRequirement("R066", "validated");
|
||||
|
||||
const result = inlineRuntimeGate.execute(makeCtx(project));
|
||||
|
||||
assert.equal(result.outcome, "pass");
|
||||
assert.match(result.rationale, /both validated/);
|
||||
});
|
||||
|
||||
test("returns pass even when SF_INLINE_DISPATCH is unset", () => {
|
||||
const project = makeProject();
|
||||
openDatabase(join(project, ".sf", "sf.db"));
|
||||
seedRequirement("R020", "validated");
|
||||
seedRequirement("R066", "validated");
|
||||
delete process.env.SF_INLINE_DISPATCH;
|
||||
|
||||
const result = inlineRuntimeGate.execute(makeCtx(project));
|
||||
|
||||
assert.equal(result.outcome, "pass");
|
||||
});
|
||||
|
||||
test("returns pass even when SF_INLINE_DISPATCH=0 (old off-switch has no effect)", () => {
|
||||
const project = makeProject();
|
||||
openDatabase(join(project, ".sf", "sf.db"));
|
||||
seedRequirement("R020", "validated");
|
||||
seedRequirement("R066", "validated");
|
||||
process.env.SF_INLINE_DISPATCH = "0";
|
||||
|
||||
const result = inlineRuntimeGate.execute(makeCtx(project));
|
||||
|
||||
assert.equal(result.outcome, "pass");
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Audited bypass (SF_INLINE_DISPATCH=1) ─────────────────────────────────
|
||||
|
||||
describe("audited bypass via SF_INLINE_DISPATCH=1", () => {
|
||||
test("returns pass when both not validated but bypass is set", () => {
|
||||
const project = makeProject();
|
||||
openDatabase(join(project, ".sf", "sf.db"));
|
||||
seedRequirement("R020", "active");
|
||||
seedRequirement("R066", "active");
|
||||
process.env.SF_INLINE_DISPATCH = "1";
|
||||
|
||||
const result = inlineRuntimeGate.execute(makeCtx(project));
|
||||
|
||||
assert.equal(result.outcome, "pass");
|
||||
assert.match(result.rationale, /audited bypass/);
|
||||
});
|
||||
|
||||
test("returns pass when R020 missing from db but bypass is set", () => {
|
||||
const project = makeProject();
|
||||
openDatabase(join(project, ".sf", "sf.db"));
|
||||
// Only R066 seeded; R020 is unknown
|
||||
seedRequirement("R066", "validated");
|
||||
process.env.SF_INLINE_DISPATCH = "1";
|
||||
|
||||
const result = inlineRuntimeGate.execute(makeCtx(project));
|
||||
|
||||
assert.equal(result.outcome, "pass");
|
||||
assert.match(result.rationale, /audited bypass/);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── No db available (db not opened) ───────────────────────────────────────
|
||||
|
||||
describe("no db available", () => {
|
||||
test("without bypass returns fail with unknown status", () => {
|
||||
// Do NOT open the database — isDbAvailable returns false
|
||||
const project = makeProject();
|
||||
|
||||
const result = inlineRuntimeGate.execute(makeCtx(project));
|
||||
|
||||
assert.equal(result.outcome, "fail");
|
||||
assert.equal(result.failureClass, "policy");
|
||||
assert.match(result.rationale, /R020=unknown R066=unknown/);
|
||||
});
|
||||
|
||||
test("execute_when_one_requirement_active_fails", () => {
|
||||
const root = makeProject();
|
||||
seedRequirement("R020", "validated");
|
||||
seedRequirement("R066", "active");
|
||||
|
||||
const result = inlineRuntimeGate.execute({
|
||||
basePath: root,
|
||||
unitType: "complete-milestone",
|
||||
unitId: "M001",
|
||||
});
|
||||
|
||||
assert.equal(result.outcome, "fail");
|
||||
assert.match(result.rationale, /R020=validated R066=active/);
|
||||
});
|
||||
|
||||
test("execute_when_both_requirements_validated_passes", () => {
|
||||
const root = makeProject();
|
||||
seedRequirement("R020", "validated");
|
||||
seedRequirement("R066", "validated");
|
||||
|
||||
const result = inlineRuntimeGate.execute({
|
||||
basePath: root,
|
||||
unitType: "reassess-roadmap",
|
||||
unitId: "M001/S01",
|
||||
});
|
||||
|
||||
assert.equal(result.outcome, "pass");
|
||||
assert.match(result.rationale, /R020=validated R066=validated/);
|
||||
});
|
||||
|
||||
test("execute_when_bypass_set_passes_with_audited_rationale", () => {
|
||||
const root = makeProject();
|
||||
seedRequirement("R020", "active");
|
||||
seedRequirement("R066", "active");
|
||||
test("with bypass returns pass even without db", () => {
|
||||
const project = makeProject();
|
||||
process.env.SF_INLINE_DISPATCH = "1";
|
||||
|
||||
const result = inlineRuntimeGate.execute({
|
||||
basePath: root,
|
||||
unitType: "validate-milestone",
|
||||
unitId: "M001",
|
||||
});
|
||||
const result = inlineRuntimeGate.execute(makeCtx(project));
|
||||
|
||||
assert.equal(result.outcome, "pass");
|
||||
assert.match(result.rationale, /audited bypass/);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Gate registry bootstrap ────────────────────────────────────────────────
|
||||
|
||||
describe("gate-registry-bootstrap", () => {
|
||||
test("inlineRuntimeGate is registered via gate-registry-bootstrap side-effect import", async () => {
|
||||
// SF's gate-registry-bootstrap.js runs registry.register() at module
|
||||
// load time on the shared gateRegistry — there is no separate
|
||||
// bootstrapGateRegistry function or BOOTSTRAP_GATES array. Verify
|
||||
// the actual SF pattern: importing the bootstrap module makes
|
||||
// inline-runtime-gate visible on the shared registry.
|
||||
const { gateRegistry } = await import("../uok/gate-registry-bootstrap.js");
|
||||
assert.ok(
|
||||
gateRegistry.has("inline-runtime-gate"),
|
||||
"inline-runtime-gate should be registered on the shared gateRegistry after bootstrap import",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,21 +1,24 @@
|
|||
/**
|
||||
* adversarial-budget.js — token budget policy for adversarial review units.
|
||||
* adversarial-budget.js — Token budget enforcement for adversarial review units.
|
||||
*
|
||||
* Purpose: prevent challenge/adversarial review turns from consuming unbounded
|
||||
* tokens while still allowing the review to file structured feedback when the
|
||||
* cap is reached.
|
||||
* Purpose: prevent runaway adversarial (challenge) review turns from consuming
|
||||
* unbounded tokens. Provides a pre-check predicate and an enforce function that
|
||||
* files a structured self-feedback entry when the budget is exceeded.
|
||||
*
|
||||
* Consumer: uok/auto-dispatch.js challenge dispatch envelopes and future
|
||||
* streaming budget checks.
|
||||
* Consumer: auto-dispatch.js challenge rule (pre-dispatch check) and any future
|
||||
* streaming interceptor that accumulates per-unit token counts.
|
||||
*/
|
||||
|
||||
import { ADVERSARIAL_REVIEW_MAX_TOKENS } from "../constants.js";
|
||||
|
||||
/**
|
||||
* Resolve the effective adversarial review token budget.
|
||||
* Resolve the effective adversarial token budget.
|
||||
*
|
||||
* Purpose: allow an explicit operator override without forking dispatch code.
|
||||
* Purpose: allow runtime override via SF_ADVERSARIAL_MAX_TOKENS env var while
|
||||
* falling back to the compile-time constant so tests and production share one
|
||||
* code path.
|
||||
*
|
||||
* Consumer: challenge dispatch and focused tests.
|
||||
* @returns {number}
|
||||
*/
|
||||
export function resolveAdversarialBudget() {
|
||||
const override = Number(process.env.SF_ADVERSARIAL_MAX_TOKENS ?? "");
|
||||
|
|
@ -24,11 +27,12 @@ export function resolveAdversarialBudget() {
|
|||
}
|
||||
|
||||
/**
|
||||
* Return true when the observed token count has reached the adversarial budget.
|
||||
* Return true when the given token count exceeds the adversarial review budget.
|
||||
*
|
||||
* Purpose: provide a cheap predicate for both pre-dispatch and streaming checks.
|
||||
* Purpose: lightweight predicate for pre-dispatch and streaming checks.
|
||||
*
|
||||
* Consumer: enforceAdversarialBudget.
|
||||
* @param {number} tokenCount
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isAdversarialBudgetExceeded(tokenCount) {
|
||||
return (
|
||||
|
|
@ -37,12 +41,25 @@ export function isAdversarialBudgetExceeded(tokenCount) {
|
|||
}
|
||||
|
||||
/**
|
||||
* File structured self-feedback and short-circuit when the budget is exceeded.
|
||||
* Enforce the adversarial review budget.
|
||||
*
|
||||
* Purpose: turn runaway adversarial review into a durable, reviewable feedback
|
||||
* item instead of allowing silent token burn.
|
||||
* Purpose: when `tokenCount` meets or exceeds the budget, file a structured
|
||||
* self-feedback entry (reason: 'adversarial-budget-exceeded') and return a
|
||||
* short-circuit result. When under budget, return null so the caller continues
|
||||
* normally.
|
||||
*
|
||||
* Consumer: future challenge streaming enforcement.
|
||||
* Injection surface (`opts`):
|
||||
* - `recordSelfFeedback`: `(entry, basePath) => void` — for testing without
|
||||
* real FS/DB side effects.
|
||||
*
|
||||
* @param {number} tokenCount - tokens consumed so far by the review unit
|
||||
* @param {object} [context] - optional context for the log entry
|
||||
* @param {string} [context.unitId]
|
||||
* @param {string} [context.milestoneId]
|
||||
* @param {string} [context.basePath]
|
||||
* @param {object} [opts]
|
||||
* @param {Function} [opts.recordSelfFeedback]
|
||||
* @returns {{ shortCircuit: true, reason: string, tokenCount: number, budget: number } | null}
|
||||
*/
|
||||
export function enforceAdversarialBudget(tokenCount, context = {}, opts = {}) {
|
||||
const budget = resolveAdversarialBudget();
|
||||
|
|
@ -61,7 +78,8 @@ export function enforceAdversarialBudget(tokenCount, context = {}, opts = {}) {
|
|||
...(milestoneId ? { milestoneId } : {}),
|
||||
},
|
||||
suggestedFix:
|
||||
"Tune the challenge prompt or reduce inlined context so the review fits within the budget. Adjust SF_ADVERSARIAL_MAX_TOKENS only when the higher cap is intentional.",
|
||||
"Tune the challenge prompt or reduce inlined context so the review fits within the budget. " +
|
||||
"Adjust SF_ADVERSARIAL_MAX_TOKENS if a higher limit is intentionally needed.",
|
||||
};
|
||||
|
||||
const record =
|
||||
|
|
@ -71,11 +89,15 @@ export function enforceAdversarialBudget(tokenCount, context = {}, opts = {}) {
|
|||
const { recordSelfFeedback } = await import("../self-feedback.js");
|
||||
recordSelfFeedback(entry, basePath ?? process.cwd());
|
||||
} catch {
|
||||
/* feedback filing must never mask the budget decision */
|
||||
// Non-fatal — must never propagate
|
||||
}
|
||||
});
|
||||
|
||||
// Call record synchronously if it's not async (test path), otherwise fire-and-forget
|
||||
const result = record(entry, basePath ?? process.cwd());
|
||||
if (result && typeof result.catch === "function") result.catch(() => {});
|
||||
if (result && typeof result.catch === "function") {
|
||||
result.catch(() => {});
|
||||
}
|
||||
|
||||
return {
|
||||
shortCircuit: true,
|
||||
|
|
|
|||
|
|
@ -1,74 +1,96 @@
|
|||
/**
|
||||
* inline-runtime-gate.js — policy gate for default-on inline dispatch.
|
||||
* UOK Inline Runtime Gate (R074)
|
||||
*
|
||||
* Purpose: refuse inline dispatch until R020 (inline equivalence proof) and
|
||||
* R066 (regression firewall) are validated, unless the operator explicitly
|
||||
* uses the audited `SF_INLINE_DISPATCH=1` bypass.
|
||||
* Purpose: refuse inline dispatch (running a unit in the parent session without
|
||||
* a new session boundary) until R020 (equivalence proof) and R066 (regression
|
||||
* firewall) are both validated. Until those requirements are validated, inline
|
||||
* dispatch is considered unsafe and is blocked by default.
|
||||
*
|
||||
* Consumer: auto/run-unit.js before DispatchLayer enters the inline execution
|
||||
* path for validate/complete/reassess units.
|
||||
* Bypass: set SF_INLINE_DISPATCH=1 to allow inline dispatch with an audited
|
||||
* bypass log event. This is the operator escape-hatch for development and
|
||||
* migration work.
|
||||
*
|
||||
* Gate contract: ADR-0075 — { id, type, execute({ basePath, db, ... }) → GateResult }
|
||||
*
|
||||
* Consumer: run-unit.js before entering the inline dispatch path.
|
||||
*/
|
||||
import { debugLog } from "../debug-logger.js";
|
||||
import { getRequirementById, isDbAvailable } from "../sf-db.js";
|
||||
|
||||
export const INLINE_RUNTIME_GATE_ID = "inline-runtime-gate";
|
||||
export const INLINE_RUNTIME_GATE_REQUIREMENTS = ["R020", "R066"];
|
||||
import { isDbAvailable, getRequirementById } from "../sf-db.js";
|
||||
import { debugLog } from "../debug-logger.js";
|
||||
|
||||
const GATE_ID = "inline-runtime-gate";
|
||||
const REQUIRED_REQ_IDS = ["R020", "R066"];
|
||||
|
||||
/**
|
||||
* Determine whether a given unit type is eligible for inline dispatch.
|
||||
*
|
||||
* Inline dispatch means the unit runs in the parent session without a new
|
||||
* session boundary. Currently only "execute-task" units are considered inline-
|
||||
* eligible candidates (the gate then decides whether inline is actually allowed).
|
||||
*
|
||||
* @param {string} unitType
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isInlineEligible(unitType) {
|
||||
return unitType === "execute-task";
|
||||
}
|
||||
|
||||
function readRequirementStatus(id) {
|
||||
if (!isDbAvailable()) return "unknown";
|
||||
try {
|
||||
return getRequirementById(id)?.status ?? "unknown";
|
||||
const req = getRequirementById(id);
|
||||
return req?.status ?? "unknown";
|
||||
} catch {
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decide whether inline dispatch may run for the current unit.
|
||||
*
|
||||
* Purpose: make the unsafe default-on inline path fail closed until its
|
||||
* contract requirements are validated, while retaining an explicit audited
|
||||
* escape hatch for operator-driven development.
|
||||
*
|
||||
* Consumer: tryInlineDispatch in auto/run-unit.js.
|
||||
* @type {import("./contracts.js").Gate}
|
||||
*/
|
||||
export const inlineRuntimeGate = {
|
||||
id: INLINE_RUNTIME_GATE_ID,
|
||||
id: GATE_ID,
|
||||
type: "policy",
|
||||
execute(ctx = {}) {
|
||||
const statuses = Object.fromEntries(
|
||||
INLINE_RUNTIME_GATE_REQUIREMENTS.map((id) => [
|
||||
id,
|
||||
readRequirementStatus(id),
|
||||
]),
|
||||
);
|
||||
const allValidated = INLINE_RUNTIME_GATE_REQUIREMENTS.every(
|
||||
(id) => statuses[id] === "validated",
|
||||
);
|
||||
if (allValidated) {
|
||||
|
||||
/**
|
||||
* @param {import("./contracts.js").UokContext} ctx
|
||||
* @returns {{ outcome: string, failureClass?: string, rationale: string }}
|
||||
*/
|
||||
execute(ctx) {
|
||||
const r020Status = readRequirementStatus("R020");
|
||||
const r066Status = readRequirementStatus("R066");
|
||||
const bothValidated =
|
||||
r020Status === "validated" && r066Status === "validated";
|
||||
const bypass = process.env.SF_INLINE_DISPATCH === "1";
|
||||
|
||||
if (bothValidated) {
|
||||
return {
|
||||
outcome: "pass",
|
||||
rationale: `inline dispatch allowed: R020=${statuses.R020} R066=${statuses.R066}`,
|
||||
rationale: `inline dispatch allowed: R020=${r020Status} R066=${r066Status} both validated`,
|
||||
};
|
||||
}
|
||||
|
||||
if (process.env.SF_INLINE_DISPATCH === "1") {
|
||||
debugLog(INLINE_RUNTIME_GATE_ID, {
|
||||
if (bypass) {
|
||||
debugLog(GATE_ID, {
|
||||
event: "audited-bypass",
|
||||
statuses,
|
||||
unitType: ctx.unitType,
|
||||
unitId: ctx.unitId,
|
||||
reason: "audited-bypass",
|
||||
r020Status,
|
||||
r066Status,
|
||||
unitType: ctx?.unitType,
|
||||
unitId: ctx?.unitId,
|
||||
});
|
||||
return {
|
||||
outcome: "pass",
|
||||
rationale: `inline dispatch allowed via audited bypass (SF_INLINE_DISPATCH=1): R020=${statuses.R020} R066=${statuses.R066}`,
|
||||
rationale: `inline dispatch allowed via audited bypass (SF_INLINE_DISPATCH=1): R020=${r020Status} R066=${r066Status}`,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
outcome: "fail",
|
||||
failureClass: "policy",
|
||||
rationale: `inline dispatch refused: R020=${statuses.R020} R066=${statuses.R066}; both must be validated or SF_INLINE_DISPATCH=1 must be set`,
|
||||
rationale: `inline dispatch refused: R020=${r020Status} R066=${r066Status} both must be 'validated' or set SF_INLINE_DISPATCH=1 for audited bypass`,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
export { REQUIRED_REQ_IDS };
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue