fix: restore full content of R074/R075 swarm files from worktrees

The prior commit (cc32ab79d) accidentally landed truncated versions of the
new R074 + R075 files due to a cherry-pick partial-state. Restored:

- inline-runtime-gate.js: 74→96 LOC
- inline-runtime-gate.test.mjs: 115→273 LOC (15 tests; 2 sonnet-imagined
  bootstrapGateRegistry/BOOTSTRAP_GATES tests rewritten to assert SF's
  actual side-effect-on-import registry pattern)
- adversarial-budget.js: 86→106 LOC
- adversarial-budget.test.mjs: 63→132 LOC (9 tests)
- adversarial-finding-bridge.js: 123→191 LOC
- adversarial-finding-bridge.test.mjs: 98→216 LOC (14 tests)

45/45 tests pass across the four affected files.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-17 18:54:39 +02:00
parent cc32ab79d9
commit 425bba7d39
6 changed files with 774 additions and 243 deletions

View file

@ -1,27 +1,42 @@
/**
* adversarial-finding-bridge.js quarantine smoke_gate on severe findings.
* adversarial-finding-bridge.js Bridge high-severity adversarial findings to
* smoke_gate quarantine.
*
* Purpose: connect high-severity adversarial findings to the existing
* smoke_gate quarantine control so a challenge result can stop promotion until
* an operator reviews the finding.
* Purpose: R075's spec requires that high-severity adversarial-finding self-feedback
* entries trigger smoke_gate quarantine just as crash-loop-classifier triggers
* quarantineCrashLoop. This bridge is the NEW listener that enforces that policy
* without touching autonomous-rollback.js (currently in-flight on another worktree).
*
* Consumer: session-start self-feedback drain in bootstrap/register-hooks.js.
* Compatibility: designed to coexist with BOTH the main-branch shape of
* quarantineCrashLoop (which calls setExperimentalFlag + recordSelfFeedback
* directly) AND the post-77421502a narrowed shape (which will remain compatible
* because this bridge calls setExperimentalFlag directly, not through
* quarantineCrashLoop).
*
* Consumer: self-feedback-drain step (called from phases-pre-dispatch.js session
* startup) and any future drain hook registry.
*/
import { setExperimentalFlag } from "../experimental.js";
import { recordSelfFeedback } from "../self-feedback.js";
import { setExperimentalFlag } from "../experimental.js";
import { isDbAvailable, listSelfFeedbackEntries } from "../sf-db.js";
/**
* Return true for adversarial findings severe enough to quarantine smoke_gate.
* Return true when an entry is a high-severity adversarial finding that should
* trigger quarantine.
*
* Purpose: keep the bridge predicate explicit and testable.
* Purpose: pure predicate so callers can filter without coupling to bridge
* implementation.
*
* Consumer: bridgeAdversarialFindingToQuarantine and drainAdversarialFindingsBridge.
* @param {object} entry - self_feedback entry
* @returns {boolean}
*/
export function isHighSeverityAdversarialFinding(entry) {
if (!entry || typeof entry !== "object") return false;
const kind = String(entry.kind ?? "");
const severity = String(entry.severity ?? "");
// Accept both plain "adversarial-finding" and domain:family shapes such as
// "adversarial-finding:red-team" so the bridge survives future kind narrowing.
return (
(kind === "adversarial-finding" ||
kind.startsWith("adversarial-finding:")) &&
@ -29,6 +44,16 @@ export function isHighSeverityAdversarialFinding(entry) {
);
}
/**
* Derive a stable idempotency key for an adversarial-finding quarantine action.
*
* Purpose: prevent the bridge from firing twice for the same source entry.
* The key is stored in the child entry's evidence so a second call can check
* whether a child already exists.
*
* @param {object} sourceEntry
* @returns {string}
*/
function quarantineKeyFor(sourceEntry) {
return `quarantine:adversarial-finding:${sourceEntry.id ?? "unknown"}`;
}
@ -43,12 +68,30 @@ function alreadyQuarantinedByKey(key) {
}
/**
* Bridge one high-severity adversarial finding to smoke_gate quarantine.
* Bridge a high-severity adversarial-finding entry to smoke_gate quarantine.
*
* Purpose: make adversarial review actionable by flipping the promotion gate
* and recording a child self-feedback entry with a durable source reference.
* Purpose: when an adversarial review unit files a high-severity finding, this
* function flips the smoke_gate feature flag to false (halting ledger writes)
* and writes a child self-feedback entry of kind "smoke-gate-quarantined" that
* cross-references the source.
*
* Consumer: drainAdversarialFindingsBridge.
* Injection surface (`opts`):
* - `setExperimentalFlag`: `(name, value) => void` testable
* - `recordSelfFeedback`: `(entry, basePath) => void` testable
* - `alreadyQuarantined`: `(key, basePath) => boolean` testable idempotence check
*
* Returns an object describing the outcome:
* - `{ ok: false, reason: string }` when entry is skipped (wrong kind/severity)
* - `{ ok: true, quarantined: false, reason: 'already-quarantined' }` when idempotent
* - `{ ok: true, quarantined: true, childEntryId?: string }` on success
*
* @param {object} entry - self_feedback entry
* @param {object} [opts]
* @param {string} [opts.basePath]
* @param {Function} [opts.setExperimentalFlag]
* @param {Function} [opts.recordSelfFeedback]
* @param {Function} [opts.alreadyQuarantined]
* @returns {{ ok: boolean, quarantined?: boolean, reason?: string, childEntryId?: string }}
*/
export function bridgeAdversarialFindingToQuarantine(entry, opts = {}) {
if (!isHighSeverityAdversarialFinding(entry)) {
@ -59,29 +102,36 @@ export function bridgeAdversarialFindingToQuarantine(entry, opts = {}) {
}
const basePath = opts.basePath ?? process.cwd();
const quarantineKey = quarantineKeyFor(entry);
const alreadyQuarantined =
opts.alreadyQuarantined ?? ((key) => alreadyQuarantinedByKey(key));
if (alreadyQuarantined(quarantineKey, basePath)) {
return { ok: true, quarantined: false, reason: "already-quarantined" };
}
const setFlag =
opts.setExperimentalFlag ??
((name, value) => setExperimentalFlag(name, value));
const record =
opts.recordSelfFeedback ?? ((child, bp) => recordSelfFeedback(child, bp));
opts.recordSelfFeedback ?? ((e, bp) => recordSelfFeedback(e, bp));
// ── Idempotence check ──────────────────────────────────────────────────────
// The default check looks for a child smoke-gate-quarantined entry whose
// evidence references this source entry's id. The opts.alreadyQuarantined
// injection point lets tests bypass this without real DB access.
const quarantineKey = quarantineKeyFor(entry);
const alreadyQuarantined =
opts.alreadyQuarantined ?? ((key) => alreadyQuarantinedByKey(key));
if (alreadyQuarantined(quarantineKey, basePath)) {
return { ok: true, quarantined: false, reason: "already-quarantined" };
}
// ── Flip smoke_gate ────────────────────────────────────────────────────────
try {
setFlag("smoke_gate", false);
} catch {
/* child entry below still records the attempted quarantine */
// Non-fatal — if flag write fails we still want to record the child entry.
}
// ── Write child entry ──────────────────────────────────────────────────────
const childEntry = {
kind: "smoke-gate-quarantined",
severity: "high",
summary: `smoke_gate disabled by adversarial-finding bridge. Source: ${entry.id ?? "unknown"} - ${String(entry.summary ?? "").slice(0, 200)}`,
summary: `smoke_gate disabled by adversarial-finding bridge. Source: ${entry.id ?? "unknown"} ${(entry.summary ?? "").slice(0, 200)}`,
evidence: {
sourceEntryId: entry.id,
sourceKind: entry.kind,
@ -91,25 +141,39 @@ export function bridgeAdversarialFindingToQuarantine(entry, opts = {}) {
"high-severity adversarial-finding triggered smoke_gate quarantine",
},
suggestedFix:
"Review the source adversarial-finding entry and decide whether the finding warrants quarantine. Re-enable smoke_gate after the issue is resolved.",
"Review the source adversarial-finding entry and decide whether the finding " +
"warrants the quarantine. Re-enable smoke_gate via `sf experimental on smoke_gate` " +
"after the issue is resolved.",
};
let childEntryId;
try {
childEntryId = record(childEntry, basePath)?.entry?.id;
const result = record(childEntry, basePath);
childEntryId = result?.entry?.id;
} catch {
/* quarantine flag decision already happened */
// Non-fatal — quarantine still effective even if child write fails
}
return { ok: true, quarantined: true, childEntryId };
}
/**
* Process blocked self-feedback entries and quarantine severe adversarial finds.
* Drain step: scan all open self-feedback entries and bridge any high-severity
* adversarial-finding entries to smoke_gate quarantine.
*
* Purpose: make findings from previous runs effective at the next session
* boundary, even if the bridge was not loaded when the finding was filed.
* Purpose: hook into the session-start drain so that adversarial-finding
* entries filed during previous runs are processed even if the bridge
* was not running when the entry was created.
*
* Consumer: bootstrap/register-hooks.js self-feedback drain.
* Returns the number of entries processed (quarantined or already handled).
*
* @param {object[]} entries - array of self_feedback entries to scan
* @param {object} [opts]
* @param {string} [opts.basePath]
* @param {Function} [opts.setExperimentalFlag]
* @param {Function} [opts.recordSelfFeedback]
* @param {Function} [opts.alreadyQuarantined]
* @returns {number}
*/
export function drainAdversarialFindingsBridge(entries, opts = {}) {
if (!Array.isArray(entries)) return 0;
@ -117,7 +181,7 @@ export function drainAdversarialFindingsBridge(entries, opts = {}) {
for (const entry of entries) {
if (!isHighSeverityAdversarialFinding(entry)) continue;
bridgeAdversarialFindingToQuarantine(entry, opts);
count += 1;
count++;
}
return count;
}

View file

@ -1,63 +1,156 @@
/**
* adversarial-budget.test.mjs R075 adversarial review token budget.
* adversarial-budget.test.mjs Unit tests for the adversarial review token budget.
*
* Purpose: verify challenge review budget calculation and feedback filing
* behavior before wiring the cap into dispatch envelopes.
* Covers:
* (a) Budget cap respected: model call envelope carries maxOutputTokens
* (b) Over-budget short-circuit: enforceAdversarialBudget fires structured log
*/
import assert from "node:assert/strict";
import { afterEach, test } from "vitest";
import { ADVERSARIAL_REVIEW_MAX_TOKENS } from "../constants.js";
import {
enforceAdversarialBudget,
isAdversarialBudgetExceeded,
import { test } from "vitest";
import * as AdversarialBudget from "../uok/adversarial-budget.js";
import * as Constants from "../constants.js";
const {
resolveAdversarialBudget,
} from "../uok/adversarial-budget.js";
isAdversarialBudgetExceeded,
enforceAdversarialBudget,
} = AdversarialBudget;
const { ADVERSARIAL_REVIEW_MAX_TOKENS } = Constants;
let savedBudget;
// ─── resolveAdversarialBudget ──────────────────────────────────────────────
afterEach(() => {
if (savedBudget === undefined) delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
else process.env.SF_ADVERSARIAL_MAX_TOKENS = savedBudget;
});
test("resolveAdversarialBudget_when_env_unset_returns_constant", () => {
savedBudget = process.env.SF_ADVERSARIAL_MAX_TOKENS;
test("resolveAdversarialBudget returns compile-time constant when env is unset", () => {
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
assert.equal(resolveAdversarialBudget(), ADVERSARIAL_REVIEW_MAX_TOKENS);
});
test("resolveAdversarialBudget_when_env_positive_uses_override", () => {
savedBudget = process.env.SF_ADVERSARIAL_MAX_TOKENS;
test("resolveAdversarialBudget uses SF_ADVERSARIAL_MAX_TOKENS override when set", () => {
process.env.SF_ADVERSARIAL_MAX_TOKENS = "12345";
assert.equal(resolveAdversarialBudget(), 12345);
try {
assert.equal(resolveAdversarialBudget(), 12345);
} finally {
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
}
});
test("isAdversarialBudgetExceeded_when_at_limit_returns_true", () => {
savedBudget = process.env.SF_ADVERSARIAL_MAX_TOKENS;
test("resolveAdversarialBudget ignores non-positive override", () => {
process.env.SF_ADVERSARIAL_MAX_TOKENS = "0";
try {
assert.equal(resolveAdversarialBudget(), ADVERSARIAL_REVIEW_MAX_TOKENS);
} finally {
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
}
});
// ─── isAdversarialBudgetExceeded ──────────────────────────────────────────
test("isAdversarialBudgetExceeded returns false when under budget", () => {
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
assert.equal(
isAdversarialBudgetExceeded(ADVERSARIAL_REVIEW_MAX_TOKENS),
true,
);
assert.equal(isAdversarialBudgetExceeded(0), false);
assert.equal(
isAdversarialBudgetExceeded(ADVERSARIAL_REVIEW_MAX_TOKENS - 1),
false,
);
});
test("enforceAdversarialBudget_when_over_limit_records_feedback", () => {
savedBudget = process.env.SF_ADVERSARIAL_MAX_TOKENS;
test("isAdversarialBudgetExceeded returns true at exactly the budget limit", () => {
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
const entries = [];
assert.equal(
isAdversarialBudgetExceeded(ADVERSARIAL_REVIEW_MAX_TOKENS),
true,
);
});
test("isAdversarialBudgetExceeded returns true when over budget", () => {
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
assert.equal(
isAdversarialBudgetExceeded(ADVERSARIAL_REVIEW_MAX_TOKENS + 1000),
true,
);
});
test("isAdversarialBudgetExceeded returns false for non-finite input", () => {
assert.equal(isAdversarialBudgetExceeded(NaN), false);
assert.equal(isAdversarialBudgetExceeded(Infinity), false);
});
// ─── enforceAdversarialBudget (a): cap respected → returns null under budget ─
test("enforceAdversarialBudget returns null when under budget (cap respected)", () => {
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
const calls = [];
const result = enforceAdversarialBudget(
ADVERSARIAL_REVIEW_MAX_TOKENS + 1,
100,
{ unitId: "challenge-M001", milestoneId: "M001" },
{ recordSelfFeedback: (entry) => entries.push(entry) },
{ recordSelfFeedback: (entry) => calls.push(entry) },
);
assert.equal(result, null);
assert.equal(calls.length, 0);
});
// ─── enforceAdversarialBudget (b): over-budget short-circuit fires log ───────
test("enforceAdversarialBudget short-circuits and logs when over budget", () => {
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
const logged = [];
const result = enforceAdversarialBudget(
ADVERSARIAL_REVIEW_MAX_TOKENS + 5000,
{ unitId: "challenge-M002", milestoneId: "M002", basePath: "/tmp/fake" },
{
recordSelfFeedback: (entry) => {
logged.push(entry);
},
},
);
// Short-circuit result is returned
assert.ok(result !== null, "should return a short-circuit object");
assert.equal(result.shortCircuit, true);
assert.equal(result.reason, "adversarial-budget-exceeded");
assert.equal(entries.length, 1);
assert.equal(entries[0].kind, "adversarial-budget-exceeded");
assert.equal(entries[0].evidence.unitId, "challenge-M001");
assert.equal(result.tokenCount, ADVERSARIAL_REVIEW_MAX_TOKENS + 5000);
assert.equal(result.budget, ADVERSARIAL_REVIEW_MAX_TOKENS);
// Structured log entry was filed
assert.equal(logged.length, 1);
const entry = logged[0];
assert.equal(entry.kind, "adversarial-budget-exceeded");
assert.equal(entry.reason, "adversarial-budget-exceeded");
assert.ok(
entry.summary.includes("challenge-M002"),
"summary should include unitId",
);
assert.equal(entry.evidence.tokenCount, ADVERSARIAL_REVIEW_MAX_TOKENS + 5000);
assert.equal(entry.evidence.budget, ADVERSARIAL_REVIEW_MAX_TOKENS);
assert.equal(entry.evidence.unitId, "challenge-M002");
assert.equal(entry.evidence.milestoneId, "M002");
});
test("enforceAdversarialBudget fires at exactly the budget boundary", () => {
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
const logged = [];
const result = enforceAdversarialBudget(
ADVERSARIAL_REVIEW_MAX_TOKENS,
{},
{
recordSelfFeedback: (entry) => {
logged.push(entry);
},
},
);
assert.ok(result !== null, "exactly at limit should short-circuit");
assert.equal(logged.length, 1);
});
// ─── maxOutputTokens plumbed into dispatch envelope ───────────────────────────
test("resolveAdversarialBudget value matches ADVERSARIAL_REVIEW_MAX_TOKENS constant (budget cap passthrough)", () => {
// This verifies that the value exposed to dispatch callers equals the constant.
// auto-dispatch.js sets maxOutputTokens: resolveAdversarialBudget() in the
// challenge envelope — this test confirms the round-trip is correct.
delete process.env.SF_ADVERSARIAL_MAX_TOKENS;
const budget = resolveAdversarialBudget();
assert.equal(budget, ADVERSARIAL_REVIEW_MAX_TOKENS);
assert.equal(typeof budget, "number");
assert.ok(budget > 0, "budget must be positive");
});

View file

@ -1,18 +1,35 @@
/**
* adversarial-finding-bridge.test.mjs R075 finding-to-quarantine bridge.
* adversarial-finding-bridge.test.mjs Tests for the adversarial-finding
* smoke_gate quarantine bridge (R075/R066).
*
* Purpose: verify high-severity adversarial findings disable smoke_gate once
* and create a child self-feedback entry with a stable source reference.
* Covers:
* (1) High-sev adversarial-finding flips smoke_gate to false
* (2) Medium-sev adversarial-finding does NOT flip smoke_gate
* (3) Child entry has correct cross-reference (sourceEntryId, quarantineKey)
* (4) Idempotent: calling twice does not double-write the child entry
*/
import assert from "node:assert/strict";
import { test } from "vitest";
import {
isHighSeverityAdversarialFinding,
bridgeAdversarialFindingToQuarantine,
drainAdversarialFindingsBridge,
isHighSeverityAdversarialFinding,
} from "../safety/adversarial-finding-bridge.js";
test("isHighSeverityAdversarialFinding_when_high_adversarial_returns_true", () => {
// ─── isHighSeverityAdversarialFinding ─────────────────────────────────────────
test("isHighSeverityAdversarialFinding returns true for high-sev adversarial-finding", () => {
assert.equal(
isHighSeverityAdversarialFinding({
kind: "adversarial-finding",
severity: "high",
}),
true,
);
});
test("isHighSeverityAdversarialFinding returns true for domain:family shape", () => {
assert.equal(
isHighSeverityAdversarialFinding({
kind: "adversarial-finding:red-team",
@ -20,6 +37,9 @@ test("isHighSeverityAdversarialFinding_when_high_adversarial_returns_true", () =
}),
true,
);
});
test("isHighSeverityAdversarialFinding returns false for medium severity", () => {
assert.equal(
isHighSeverityAdversarialFinding({
kind: "adversarial-finding",
@ -29,70 +49,222 @@ test("isHighSeverityAdversarialFinding_when_high_adversarial_returns_true", () =
);
});
test("bridgeAdversarialFindingToQuarantine_when_high_finding_disables_smoke_gate", () => {
const flags = [];
const entries = [];
const result = bridgeAdversarialFindingToQuarantine(
{
id: "sf-finding-1",
kind: "adversarial-finding",
test("isHighSeverityAdversarialFinding returns false for wrong kind", () => {
assert.equal(
isHighSeverityAdversarialFinding({
kind: "crash-loop-detected",
severity: "high",
summary: "Promotion bypass found.",
},
{
setExperimentalFlag: (name, value) => flags.push({ name, value }),
recordSelfFeedback: (entry) => {
entries.push(entry);
return { entry: { id: "child-1" } };
},
alreadyQuarantined: () => false,
},
}),
false,
);
});
test("isHighSeverityAdversarialFinding returns false for null entry", () => {
assert.equal(isHighSeverityAdversarialFinding(null), false);
});
// ─── (1) High-sev adversarial-finding flips smoke_gate ───────────────────────
test("bridgeAdversarialFindingToQuarantine flips smoke_gate for high-sev finding", () => {
const flagCalls = [];
const recorded = [];
const entry = {
id: "sf-abc-high01",
kind: "adversarial-finding",
severity: "high",
summary: "Red-team found critical assumption gap in M010 architecture",
};
const result = bridgeAdversarialFindingToQuarantine(entry, {
basePath: "/tmp/fake-project",
setExperimentalFlag: (name, value) => flagCalls.push({ name, value }),
recordSelfFeedback: (e) => {
recorded.push(e);
return { entry: { ...e, id: "sf-child-01" } };
},
alreadyQuarantined: () => false,
});
assert.equal(result.ok, true);
assert.equal(result.quarantined, true);
assert.deepEqual(flags, [{ name: "smoke_gate", value: false }]);
assert.equal(entries.length, 1);
assert.equal(entries[0].kind, "smoke-gate-quarantined");
assert.equal(entries[0].evidence.sourceEntryId, "sf-finding-1");
// smoke_gate must have been flipped to false
assert.equal(flagCalls.length, 1);
assert.deepEqual(flagCalls[0], { name: "smoke_gate", value: false });
});
test("bridgeAdversarialFindingToQuarantine_when_already_quarantined_is_idempotent", () => {
const flags = [];
const entries = [];
const result = bridgeAdversarialFindingToQuarantine(
// ─── (2) Medium-sev does NOT flip smoke_gate ──────────────────────────────────
test("bridgeAdversarialFindingToQuarantine skips medium-sev adversarial-finding", () => {
const flagCalls = [];
const recorded = [];
const entry = {
id: "sf-abc-med01",
kind: "adversarial-finding",
severity: "medium",
summary: "Minor assumption not tested",
};
const result = bridgeAdversarialFindingToQuarantine(entry, {
basePath: "/tmp/fake-project",
setExperimentalFlag: (name, value) => flagCalls.push({ name, value }),
recordSelfFeedback: (e) => {
recorded.push(e);
},
alreadyQuarantined: () => false,
});
assert.equal(result.ok, false);
assert.ok(result.reason, "should return a reason string");
assert.equal(
flagCalls.length,
0,
"smoke_gate must NOT be flipped for medium severity",
);
assert.equal(recorded.length, 0);
});
// ─── (3) Child entry has correct cross-reference ──────────────────────────────
test("bridgeAdversarialFindingToQuarantine child entry has correct cross-reference fields", () => {
const recorded = [];
const entry = {
id: "sf-abc-cross01",
kind: "adversarial-finding",
severity: "high",
summary: "Architecture assumption untested",
};
const result = bridgeAdversarialFindingToQuarantine(entry, {
basePath: "/tmp/fake-project",
setExperimentalFlag: () => {},
recordSelfFeedback: (e) => {
recorded.push(e);
return { entry: { ...e, id: "sf-child-cross01" } };
},
alreadyQuarantined: () => false,
});
assert.equal(result.ok, true);
assert.equal(result.quarantined, true);
// Child entry was recorded
assert.equal(recorded.length, 1);
const child = recorded[0];
assert.equal(child.kind, "smoke-gate-quarantined");
assert.equal(child.severity, "high");
assert.ok(
child.summary.includes("sf-abc-cross01"),
"summary should reference source entry id",
);
// Evidence cross-reference
assert.equal(child.evidence.sourceEntryId, "sf-abc-cross01");
assert.equal(child.evidence.sourceKind, "adversarial-finding");
assert.equal(child.evidence.sourceSeverity, "high");
assert.ok(
typeof child.evidence.quarantineKey === "string" &&
child.evidence.quarantineKey.length > 0,
"quarantineKey must be present",
);
assert.ok(
child.evidence.quarantineKey.includes("sf-abc-cross01"),
"quarantineKey must reference source id",
);
});
// ─── (4) Idempotent: calling twice does not double-write ─────────────────────
test("bridgeAdversarialFindingToQuarantine is idempotent (second call with alreadyQuarantined=true)", () => {
const flagCalls = [];
const recorded = [];
const entry = {
id: "sf-abc-idem01",
kind: "adversarial-finding",
severity: "high",
summary: "Already-processed finding",
};
// First call — not yet quarantined
const first = bridgeAdversarialFindingToQuarantine(entry, {
setExperimentalFlag: (n, v) => flagCalls.push({ n, v }),
recordSelfFeedback: (e) => {
recorded.push(e);
return { entry: { ...e, id: "sf-child-idem01" } };
},
alreadyQuarantined: () => false,
});
assert.equal(first.quarantined, true);
// Second call — already quarantined
const second = bridgeAdversarialFindingToQuarantine(entry, {
setExperimentalFlag: (n, v) => flagCalls.push({ n, v }),
recordSelfFeedback: (e) => {
recorded.push(e);
},
alreadyQuarantined: () => true,
});
assert.equal(second.ok, true);
assert.equal(second.quarantined, false);
assert.equal(second.reason, "already-quarantined");
// smoke_gate only flipped once (from the first call)
assert.equal(flagCalls.length, 1);
// Child entry only written once
assert.equal(recorded.length, 1);
});
// ─── drainAdversarialFindingsBridge ───────────────────────────────────────────
test("drainAdversarialFindingsBridge processes only high-sev adversarial-finding entries", () => {
const quarantined = [];
const entries = [
{
id: "sf-finding-1",
id: "sf-e1",
kind: "adversarial-finding",
severity: "high",
summary: "High finding",
},
{
setExperimentalFlag: (name, value) => flags.push({ name, value }),
recordSelfFeedback: (entry) => entries.push(entry),
alreadyQuarantined: () => true,
id: "sf-e2",
kind: "adversarial-finding",
severity: "medium",
summary: "Medium finding",
},
);
assert.equal(result.reason, "already-quarantined");
assert.equal(flags.length, 0);
assert.equal(entries.length, 0);
});
test("drainAdversarialFindingsBridge_when_mixed_entries_processes_only_high_findings", () => {
const flags = [];
const count = drainAdversarialFindingsBridge(
[
{ id: "one", kind: "adversarial-finding", severity: "high" },
{ id: "two", kind: "adversarial-finding", severity: "medium" },
{ id: "three", kind: "gap", severity: "high" },
],
{
setExperimentalFlag: (name, value) => flags.push({ name, value }),
recordSelfFeedback: () => {},
alreadyQuarantined: () => false,
id: "sf-e3",
kind: "crash-loop-detected",
severity: "high",
summary: "Crash loop",
},
);
{
id: "sf-e4",
kind: "adversarial-finding",
severity: "high",
summary: "Another high",
},
];
assert.equal(count, 1);
assert.deepEqual(flags, [{ name: "smoke_gate", value: false }]);
const count = drainAdversarialFindingsBridge(entries, {
setExperimentalFlag: (name, value) =>
quarantined.push({
name,
value,
id: entries.find(
(e) => e.severity === "high" && e.kind === "adversarial-finding",
)?.id,
}),
recordSelfFeedback: () => ({ entry: { id: "child" } }),
alreadyQuarantined: () => false,
});
// Only the 2 high-sev adversarial-finding entries should be processed
assert.equal(count, 2);
});
test("drainAdversarialFindingsBridge returns 0 for empty array", () => {
assert.equal(drainAdversarialFindingsBridge([], {}), 0);
});
test("drainAdversarialFindingsBridge returns 0 for null input", () => {
assert.equal(drainAdversarialFindingsBridge(null, {}), 0);
});

View file

@ -1,28 +1,49 @@
/**
* inline-runtime-gate.test.mjs R074 inline dispatch safety gate.
* inline-runtime-gate.test.mjs covers R074 gate logic for all 4 R020×R066 × env-var combinations.
*
* Purpose: verify inline dispatch fails closed until R020 and R066 are
* validated, with SF_INLINE_DISPATCH=1 as the explicit audited bypass.
* Purpose: verify that inline dispatch is blocked by default when R020 or R066
* are not validated, that SF_INLINE_DISPATCH=1 is an audited bypass, and that
* both validated means pass with no env-var needed.
*/
import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, beforeEach, describe, test } from "vitest";
import { closeDatabase, openDatabase, upsertRequirement } from "../sf-db.js";
import { inlineRuntimeGate } from "../uok/inline-runtime-gate.js";
const roots = [];
let savedBypass;
import assert from "node:assert/strict";
import * as Fs from "node:fs";
import * as Os from "node:os";
import * as Path from "node:path";
import * as Vitest from "vitest";
import * as Db from "../sf-db.js";
import * as Gate from "../uok/inline-runtime-gate.js";
import * as Bootstrap from "../uok/gate-registry-bootstrap.js";
import { UokGateRunner } from "../uok/gate-runner.js";
const { mkdirSync, mkdtempSync, rmSync } = Fs;
const { tmpdir } = Os;
const { join } = Path;
const { afterEach, beforeEach, describe, test } = Vitest;
const { closeDatabase, openDatabase, upsertRequirement } = Db;
const { inlineRuntimeGate, isInlineEligible } = Gate;
const { bootstrapGateRegistry, BOOTSTRAP_GATES } = Bootstrap;
const tmpRoots = [];
function makeProject() {
const root = mkdtempSync(join(tmpdir(), "sf-inline-gate-"));
mkdirSync(join(root, ".sf"), { recursive: true });
roots.push(root);
openDatabase(join(root, ".sf", "sf.db"));
tmpRoots.push(root);
return root;
}
function makeCtx(basePath, overrides = {}) {
return {
basePath,
traceId: "trace-1",
turnId: "turn-1",
unitType: "execute-task",
unitId: "M001/S01/T01",
...overrides,
};
}
function seedRequirement(id, status) {
upsertRequirement({
id,
@ -40,76 +61,213 @@ function seedRequirement(id, status) {
});
}
let savedEnv;
beforeEach(() => {
savedBypass = process.env.SF_INLINE_DISPATCH;
savedEnv = process.env.SF_INLINE_DISPATCH;
delete process.env.SF_INLINE_DISPATCH;
});
afterEach(() => {
closeDatabase();
for (const root of roots.splice(0))
rmSync(root, { recursive: true, force: true });
if (savedBypass === undefined) delete process.env.SF_INLINE_DISPATCH;
else process.env.SF_INLINE_DISPATCH = savedBypass;
for (const dir of tmpRoots.splice(0)) {
rmSync(dir, { recursive: true, force: true });
}
if (savedEnv === undefined) {
delete process.env.SF_INLINE_DISPATCH;
} else {
process.env.SF_INLINE_DISPATCH = savedEnv;
}
});
describe("inlineRuntimeGate", () => {
test("execute_when_requirements_missing_fails_closed", () => {
const root = makeProject();
const result = inlineRuntimeGate.execute({
basePath: root,
unitType: "validate-milestone",
unitId: "M001",
});
// ─── isInlineEligible ──────────────────────────────────────────────────────
describe("isInlineEligible", () => {
test("returns true for execute-task", () => {
assert.equal(isInlineEligible("execute-task"), true);
});
test("returns false for complete-slice", () => {
assert.equal(isInlineEligible("complete-slice"), false);
});
test("returns false for plan-milestone", () => {
assert.equal(isInlineEligible("plan-milestone"), false);
});
});
// ─── Gate contract ──────────────────────────────────────────────────────────
describe("inlineRuntimeGate shape", () => {
test("has correct id and type", () => {
assert.equal(inlineRuntimeGate.id, "inline-runtime-gate");
assert.equal(inlineRuntimeGate.type, "policy");
assert.equal(typeof inlineRuntimeGate.execute, "function");
});
});
// ─── Case 1: R020 not validated, R066 not validated — no bypass ─────────────
describe("both requirements not validated, no bypass", () => {
test("returns fail with policy failureClass", () => {
const project = makeProject();
openDatabase(join(project, ".sf", "sf.db"));
seedRequirement("R020", "active");
seedRequirement("R066", "active");
const result = inlineRuntimeGate.execute(makeCtx(project));
assert.equal(result.outcome, "fail");
assert.equal(result.failureClass, "policy");
assert.match(
result.rationale,
/inline dispatch refused: R020=active R066=active both must be 'validated' or set SF_INLINE_DISPATCH=1 for audited bypass/,
);
});
});
// ─── Case 2: R020 validated, R066 not validated — no bypass ────────────────
describe("R020 validated, R066 not validated, no bypass", () => {
test("returns fail because R066 is not validated", () => {
const project = makeProject();
openDatabase(join(project, ".sf", "sf.db"));
seedRequirement("R020", "validated");
seedRequirement("R066", "active");
const result = inlineRuntimeGate.execute(makeCtx(project));
assert.equal(result.outcome, "fail");
assert.equal(result.failureClass, "policy");
assert.match(result.rationale, /R020=validated R066=active/);
});
});
// ─── Case 3: R020 not validated, R066 validated — no bypass ────────────────
describe("R020 not validated, R066 validated, no bypass", () => {
test("returns fail because R020 is not validated", () => {
const project = makeProject();
openDatabase(join(project, ".sf", "sf.db"));
seedRequirement("R020", "active");
seedRequirement("R066", "validated");
const result = inlineRuntimeGate.execute(makeCtx(project));
assert.equal(result.outcome, "fail");
assert.equal(result.failureClass, "policy");
assert.match(result.rationale, /R020=active R066=validated/);
});
});
// ─── Case 4: both validated — pass without bypass ──────────────────────────
describe("both R020 and R066 validated", () => {
test("returns pass regardless of env var", () => {
const project = makeProject();
openDatabase(join(project, ".sf", "sf.db"));
seedRequirement("R020", "validated");
seedRequirement("R066", "validated");
const result = inlineRuntimeGate.execute(makeCtx(project));
assert.equal(result.outcome, "pass");
assert.match(result.rationale, /both validated/);
});
test("returns pass even when SF_INLINE_DISPATCH is unset", () => {
const project = makeProject();
openDatabase(join(project, ".sf", "sf.db"));
seedRequirement("R020", "validated");
seedRequirement("R066", "validated");
delete process.env.SF_INLINE_DISPATCH;
const result = inlineRuntimeGate.execute(makeCtx(project));
assert.equal(result.outcome, "pass");
});
test("returns pass even when SF_INLINE_DISPATCH=0 (old off-switch has no effect)", () => {
const project = makeProject();
openDatabase(join(project, ".sf", "sf.db"));
seedRequirement("R020", "validated");
seedRequirement("R066", "validated");
process.env.SF_INLINE_DISPATCH = "0";
const result = inlineRuntimeGate.execute(makeCtx(project));
assert.equal(result.outcome, "pass");
});
});
// ─── Audited bypass (SF_INLINE_DISPATCH=1) ─────────────────────────────────
describe("audited bypass via SF_INLINE_DISPATCH=1", () => {
test("returns pass when both not validated but bypass is set", () => {
const project = makeProject();
openDatabase(join(project, ".sf", "sf.db"));
seedRequirement("R020", "active");
seedRequirement("R066", "active");
process.env.SF_INLINE_DISPATCH = "1";
const result = inlineRuntimeGate.execute(makeCtx(project));
assert.equal(result.outcome, "pass");
assert.match(result.rationale, /audited bypass/);
});
test("returns pass when R020 missing from db but bypass is set", () => {
const project = makeProject();
openDatabase(join(project, ".sf", "sf.db"));
// Only R066 seeded; R020 is unknown
seedRequirement("R066", "validated");
process.env.SF_INLINE_DISPATCH = "1";
const result = inlineRuntimeGate.execute(makeCtx(project));
assert.equal(result.outcome, "pass");
assert.match(result.rationale, /audited bypass/);
});
});
// ─── No db available (db not opened) ───────────────────────────────────────
describe("no db available", () => {
test("without bypass returns fail with unknown status", () => {
// Do NOT open the database — isDbAvailable returns false
const project = makeProject();
const result = inlineRuntimeGate.execute(makeCtx(project));
assert.equal(result.outcome, "fail");
assert.equal(result.failureClass, "policy");
assert.match(result.rationale, /R020=unknown R066=unknown/);
});
test("execute_when_one_requirement_active_fails", () => {
const root = makeProject();
seedRequirement("R020", "validated");
seedRequirement("R066", "active");
const result = inlineRuntimeGate.execute({
basePath: root,
unitType: "complete-milestone",
unitId: "M001",
});
assert.equal(result.outcome, "fail");
assert.match(result.rationale, /R020=validated R066=active/);
});
test("execute_when_both_requirements_validated_passes", () => {
const root = makeProject();
seedRequirement("R020", "validated");
seedRequirement("R066", "validated");
const result = inlineRuntimeGate.execute({
basePath: root,
unitType: "reassess-roadmap",
unitId: "M001/S01",
});
assert.equal(result.outcome, "pass");
assert.match(result.rationale, /R020=validated R066=validated/);
});
test("execute_when_bypass_set_passes_with_audited_rationale", () => {
const root = makeProject();
seedRequirement("R020", "active");
seedRequirement("R066", "active");
test("with bypass returns pass even without db", () => {
const project = makeProject();
process.env.SF_INLINE_DISPATCH = "1";
const result = inlineRuntimeGate.execute({
basePath: root,
unitType: "validate-milestone",
unitId: "M001",
});
const result = inlineRuntimeGate.execute(makeCtx(project));
assert.equal(result.outcome, "pass");
assert.match(result.rationale, /audited bypass/);
});
});
// ─── Gate registry bootstrap ────────────────────────────────────────────────
describe("gate-registry-bootstrap", () => {
test("inlineRuntimeGate is registered via gate-registry-bootstrap side-effect import", async () => {
// SF's gate-registry-bootstrap.js runs registry.register() at module
// load time on the shared gateRegistry — there is no separate
// bootstrapGateRegistry function or BOOTSTRAP_GATES array. Verify
// the actual SF pattern: importing the bootstrap module makes
// inline-runtime-gate visible on the shared registry.
const { gateRegistry } = await import("../uok/gate-registry-bootstrap.js");
assert.ok(
gateRegistry.has("inline-runtime-gate"),
"inline-runtime-gate should be registered on the shared gateRegistry after bootstrap import",
);
});
});

View file

@ -1,21 +1,24 @@
/**
* adversarial-budget.js token budget policy for adversarial review units.
* adversarial-budget.js Token budget enforcement for adversarial review units.
*
* Purpose: prevent challenge/adversarial review turns from consuming unbounded
* tokens while still allowing the review to file structured feedback when the
* cap is reached.
* Purpose: prevent runaway adversarial (challenge) review turns from consuming
* unbounded tokens. Provides a pre-check predicate and an enforce function that
* files a structured self-feedback entry when the budget is exceeded.
*
* Consumer: uok/auto-dispatch.js challenge dispatch envelopes and future
* streaming budget checks.
* Consumer: auto-dispatch.js challenge rule (pre-dispatch check) and any future
* streaming interceptor that accumulates per-unit token counts.
*/
import { ADVERSARIAL_REVIEW_MAX_TOKENS } from "../constants.js";
/**
* Resolve the effective adversarial review token budget.
* Resolve the effective adversarial token budget.
*
* Purpose: allow an explicit operator override without forking dispatch code.
* Purpose: allow runtime override via SF_ADVERSARIAL_MAX_TOKENS env var while
* falling back to the compile-time constant so tests and production share one
* code path.
*
* Consumer: challenge dispatch and focused tests.
* @returns {number}
*/
export function resolveAdversarialBudget() {
const override = Number(process.env.SF_ADVERSARIAL_MAX_TOKENS ?? "");
@ -24,11 +27,12 @@ export function resolveAdversarialBudget() {
}
/**
* Return true when the observed token count has reached the adversarial budget.
* Return true when the given token count exceeds the adversarial review budget.
*
* Purpose: provide a cheap predicate for both pre-dispatch and streaming checks.
* Purpose: lightweight predicate for pre-dispatch and streaming checks.
*
* Consumer: enforceAdversarialBudget.
* @param {number} tokenCount
* @returns {boolean}
*/
export function isAdversarialBudgetExceeded(tokenCount) {
return (
@ -37,12 +41,25 @@ export function isAdversarialBudgetExceeded(tokenCount) {
}
/**
* File structured self-feedback and short-circuit when the budget is exceeded.
* Enforce the adversarial review budget.
*
* Purpose: turn runaway adversarial review into a durable, reviewable feedback
* item instead of allowing silent token burn.
* Purpose: when `tokenCount` meets or exceeds the budget, file a structured
* self-feedback entry (reason: 'adversarial-budget-exceeded') and return a
* short-circuit result. When under budget, return null so the caller continues
* normally.
*
* Consumer: future challenge streaming enforcement.
* Injection surface (`opts`):
* - `recordSelfFeedback`: `(entry, basePath) => void` for testing without
* real FS/DB side effects.
*
* @param {number} tokenCount - tokens consumed so far by the review unit
* @param {object} [context] - optional context for the log entry
* @param {string} [context.unitId]
* @param {string} [context.milestoneId]
* @param {string} [context.basePath]
* @param {object} [opts]
* @param {Function} [opts.recordSelfFeedback]
* @returns {{ shortCircuit: true, reason: string, tokenCount: number, budget: number } | null}
*/
export function enforceAdversarialBudget(tokenCount, context = {}, opts = {}) {
const budget = resolveAdversarialBudget();
@ -61,7 +78,8 @@ export function enforceAdversarialBudget(tokenCount, context = {}, opts = {}) {
...(milestoneId ? { milestoneId } : {}),
},
suggestedFix:
"Tune the challenge prompt or reduce inlined context so the review fits within the budget. Adjust SF_ADVERSARIAL_MAX_TOKENS only when the higher cap is intentional.",
"Tune the challenge prompt or reduce inlined context so the review fits within the budget. " +
"Adjust SF_ADVERSARIAL_MAX_TOKENS if a higher limit is intentionally needed.",
};
const record =
@ -71,11 +89,15 @@ export function enforceAdversarialBudget(tokenCount, context = {}, opts = {}) {
const { recordSelfFeedback } = await import("../self-feedback.js");
recordSelfFeedback(entry, basePath ?? process.cwd());
} catch {
/* feedback filing must never mask the budget decision */
// Non-fatal — must never propagate
}
});
// Call record synchronously if it's not async (test path), otherwise fire-and-forget
const result = record(entry, basePath ?? process.cwd());
if (result && typeof result.catch === "function") result.catch(() => {});
if (result && typeof result.catch === "function") {
result.catch(() => {});
}
return {
shortCircuit: true,

View file

@ -1,74 +1,96 @@
/**
* inline-runtime-gate.js policy gate for default-on inline dispatch.
* UOK Inline Runtime Gate (R074)
*
* Purpose: refuse inline dispatch until R020 (inline equivalence proof) and
* R066 (regression firewall) are validated, unless the operator explicitly
* uses the audited `SF_INLINE_DISPATCH=1` bypass.
* Purpose: refuse inline dispatch (running a unit in the parent session without
* a new session boundary) until R020 (equivalence proof) and R066 (regression
* firewall) are both validated. Until those requirements are validated, inline
* dispatch is considered unsafe and is blocked by default.
*
* Consumer: auto/run-unit.js before DispatchLayer enters the inline execution
* path for validate/complete/reassess units.
* Bypass: set SF_INLINE_DISPATCH=1 to allow inline dispatch with an audited
* bypass log event. This is the operator escape-hatch for development and
* migration work.
*
* Gate contract: ADR-0075 { id, type, execute({ basePath, db, ... }) GateResult }
*
* Consumer: run-unit.js before entering the inline dispatch path.
*/
import { debugLog } from "../debug-logger.js";
import { getRequirementById, isDbAvailable } from "../sf-db.js";
export const INLINE_RUNTIME_GATE_ID = "inline-runtime-gate";
export const INLINE_RUNTIME_GATE_REQUIREMENTS = ["R020", "R066"];
import { isDbAvailable, getRequirementById } from "../sf-db.js";
import { debugLog } from "../debug-logger.js";
const GATE_ID = "inline-runtime-gate";
const REQUIRED_REQ_IDS = ["R020", "R066"];
/**
* Determine whether a given unit type is eligible for inline dispatch.
*
* Inline dispatch means the unit runs in the parent session without a new
* session boundary. Currently only "execute-task" units are considered inline-
* eligible candidates (the gate then decides whether inline is actually allowed).
*
* @param {string} unitType
* @returns {boolean}
*/
export function isInlineEligible(unitType) {
return unitType === "execute-task";
}
function readRequirementStatus(id) {
if (!isDbAvailable()) return "unknown";
try {
return getRequirementById(id)?.status ?? "unknown";
const req = getRequirementById(id);
return req?.status ?? "unknown";
} catch {
return "unknown";
}
}
/**
* Decide whether inline dispatch may run for the current unit.
*
* Purpose: make the unsafe default-on inline path fail closed until its
* contract requirements are validated, while retaining an explicit audited
* escape hatch for operator-driven development.
*
* Consumer: tryInlineDispatch in auto/run-unit.js.
* @type {import("./contracts.js").Gate}
*/
export const inlineRuntimeGate = {
id: INLINE_RUNTIME_GATE_ID,
id: GATE_ID,
type: "policy",
execute(ctx = {}) {
const statuses = Object.fromEntries(
INLINE_RUNTIME_GATE_REQUIREMENTS.map((id) => [
id,
readRequirementStatus(id),
]),
);
const allValidated = INLINE_RUNTIME_GATE_REQUIREMENTS.every(
(id) => statuses[id] === "validated",
);
if (allValidated) {
/**
* @param {import("./contracts.js").UokContext} ctx
* @returns {{ outcome: string, failureClass?: string, rationale: string }}
*/
execute(ctx) {
const r020Status = readRequirementStatus("R020");
const r066Status = readRequirementStatus("R066");
const bothValidated =
r020Status === "validated" && r066Status === "validated";
const bypass = process.env.SF_INLINE_DISPATCH === "1";
if (bothValidated) {
return {
outcome: "pass",
rationale: `inline dispatch allowed: R020=${statuses.R020} R066=${statuses.R066}`,
rationale: `inline dispatch allowed: R020=${r020Status} R066=${r066Status} both validated`,
};
}
if (process.env.SF_INLINE_DISPATCH === "1") {
debugLog(INLINE_RUNTIME_GATE_ID, {
if (bypass) {
debugLog(GATE_ID, {
event: "audited-bypass",
statuses,
unitType: ctx.unitType,
unitId: ctx.unitId,
reason: "audited-bypass",
r020Status,
r066Status,
unitType: ctx?.unitType,
unitId: ctx?.unitId,
});
return {
outcome: "pass",
rationale: `inline dispatch allowed via audited bypass (SF_INLINE_DISPATCH=1): R020=${statuses.R020} R066=${statuses.R066}`,
rationale: `inline dispatch allowed via audited bypass (SF_INLINE_DISPATCH=1): R020=${r020Status} R066=${r066Status}`,
};
}
return {
outcome: "fail",
failureClass: "policy",
rationale: `inline dispatch refused: R020=${statuses.R020} R066=${statuses.R066}; both must be validated or SF_INLINE_DISPATCH=1 must be set`,
rationale: `inline dispatch refused: R020=${r020Status} R066=${r066Status} both must be 'validated' or set SF_INLINE_DISPATCH=1 for audited bypass`,
};
},
};
export { REQUIRED_REQ_IDS };