feat(model-policy): wire lineage-diverse-from-worker into selector

Round 8's e7cf16882 declared the adversary role and the
lineage-diverse-from-worker constraint but left actual filtering as
a TODO in selectAndApplyModel. This wires the filter end-to-end.

selectAndApplyModel now accepts (role, workerModelId) trailing params:
- role: from modelRoleForUnitType(unitType) (extended to recognize
  "adversary"/"challenge"/"red-team" unit types as the adversary role)
- workerModelId: explicit caller-supplied override, else falls back to
  _lastWorkerModelId (process-local cache populated whenever a worker-
  role dispatch resolves a model)

When role is adversary or reviewer AND the role-policy includes
lineage-diverse-from-worker, applyLineageDiverseFilter strips
candidates that share root vendor with the worker model (via
isSameRootVendor from model-role-policy.js). If filtering would leave
zero candidates, a warning is logged and the unfiltered set is used
(better a same-vendor reviewer than no reviewer).

phases-unit.js threads modelRoleForUnitType(unitType) into
selectAndApplyModel — the only producer site that needed the role
parameter.

Tests: 13 new (7 pure unit on applyLineageDiverseFilter — vendor
mapping matrix + edge cases; 6 integration on selectAndApplyModel +
modelRoleForUnitType wiring). All 37 tests in the affected files pass,
no regressions.

Concern: if the per-unit model config (from disk prefs) maps exclusively
to the worker's vendor and has no fallback candidates, returns
appliedModel: null — operator-configurable. Documented in tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-15 09:24:50 +02:00
parent f3454de58a
commit 7dbf8ad430
4 changed files with 501 additions and 1 deletions

View file

@ -31,6 +31,10 @@ import {
import { getSessionModelOverride } from "./session-model-override.js";
import { resolveUokFlags } from "./uok/flags.js";
import { applyModelPolicyFilter } from "./uok/model-policy.js";
import {
DEFAULT_MODEL_ROLE_CONSTRAINTS,
isSameRootVendor,
} from "./uok/model-role-policy.js";
import { logWarning } from "./workflow-logger.js";
import { getRequiredWorkflowToolsForAutoUnit } from "./workflow-tools.js";
import { getErrorMessage } from "./error-utils.js";
@ -76,6 +80,28 @@ export class ModelPolicyDispatchBlockedError extends Error {
// `pi` does not silently restore a stale snapshot from the prior run and
// undo any tool changes the user made between sessions.
const TOOL_BASELINE = new WeakMap();
// ── Worker model id cache ────────────────────────────────────────────────────
// Stores the most-recently resolved model id for the "worker" role so that
// subsequent adversary/reviewer dispatches can apply the lineage-diverse filter
// without requiring an out-of-band state store. A simple module-level string
// is sufficient: SF's auto loop is single-threaded and adversary/reviewer units
// always run after the worker within the same process.
let _lastWorkerModelId = null;
/**
* Return the most-recently cached worker model id, or null if unknown.
* Exported for tests only.
* @returns {string|null}
*/
export function _getLastWorkerModelId() {
return _lastWorkerModelId;
}
/**
* Reset the worker model id cache.
* Exported for tests only use to prevent state bleed between test cases.
*/
export function _resetLastWorkerModelId() {
_lastWorkerModelId = null;
}
/**
* Drop the captured tool baseline for `pi` so the next `selectAndApplyModel`
* call re-captures from the live active set. Wired into `startAuto` and
@ -229,6 +255,33 @@ export function resolvePreferredModelConfig(
fallbacks: [],
};
}
/**
* Apply the `lineage-diverse-from-worker` filter to a candidate model list.
*
* Purpose: remove candidates whose root-vendor lineage matches the worker
* model, preventing rubber-stamp reviews from lineage-twin models.
*
* Fail-open: when filtering would remove ALL candidates, returns the original
* list unchanged (better a same-vendor reviewer than no reviewer) and sets
* `fellBack: true` in the result.
*
* @param {Array<{provider: string, id: string}>} candidates
* @param {string} workerModelId the resolved id of the worker model
* @returns {{ filtered: Array, fellBack: boolean }}
*/
export function applyLineageDiverseFilter(candidates, workerModelId) {
if (!workerModelId || String(workerModelId).length === 0) {
return { filtered: candidates, fellBack: false };
}
const kept = candidates.filter(
(m) => !isSameRootVendor(m.id, workerModelId),
);
if (kept.length === 0) {
return { filtered: candidates, fellBack: true };
}
return { filtered: kept, fellBack: false };
}
/**
* Select and apply the appropriate model for a unit dispatch.
* Handles: per-unit-type model preferences, dynamic complexity routing,
@ -253,6 +306,20 @@ export async function selectAndApplyModel(
sessionModelOverride,
/** Thinking level captured at autonomous mode start and re-applied after model swaps. */
autoModeStartThinkingLevel,
/**
* The model-role for this dispatch (e.g. "worker", "reviewer", "adversary").
* When "adversary" or "reviewer" and the role's constraints include
* "lineage-diverse-from-worker", candidates sharing root-vendor lineage with the
* most-recently dispatched worker model are filtered out.
* Callers that don't carry role context pass undefined (no-op).
*/
role,
/**
* Explicit worker model id to use for the lineage-diverse filter instead of
* the module-level cache. Useful when the caller has direct access to the
* worker model (e.g. in tests or future callers that thread it explicitly).
*/
workerModelId,
) {
// ── Restore active-tool baseline before policy evaluation (#4959, #4681, #4850) ──
// Per-unit narrowing at the bottom of this function calls
@ -388,6 +455,75 @@ export async function selectAndApplyModel(
);
}
}
// ── Lineage-diverse filter (adversary / reviewer roles) ───────────────────
// When the dispatching role is "adversary" or "reviewer" and its constraint
// set includes "lineage-diverse-from-worker", remove candidates that share
// root-vendor lineage with the worker model to prevent rubber-stamp reviews.
// Fail-open: if filtering would leave zero candidates, log a warning and keep
// the full eligible set so the dispatch is never completely blocked.
//
// lineageBlockKeys: provider/id keys that are blocked by the lineage filter.
// This is checked in the resolution loop REGARDLESS of whether the UOK model
// policy gate is active, ensuring the constraint is enforced even when
// model policy is disabled (e.g. in tests or when explicitly turned off).
let lineageAllowedKeys = null;
{
const normalizedRole = String(role ?? "").toLowerCase();
const isLineageRole =
normalizedRole === "adversary" || normalizedRole === "reviewer";
if (isLineageRole) {
const roleConstraints =
DEFAULT_MODEL_ROLE_CONSTRAINTS[normalizedRole] ?? [];
const needsLineageFilter = roleConstraints.includes(
"lineage-diverse-from-worker",
);
if (needsLineageFilter) {
const effectiveWorkerModelId =
workerModelId ?? _lastWorkerModelId ?? null;
if (
effectiveWorkerModelId &&
String(effectiveWorkerModelId).length > 0
) {
// Use availableModels as the base for filtering to cover both
// the policy-enabled path (routingEligibleModels) and the
// policy-disabled path (availableModels) in the resolution loop.
const baseForFilter = uokFlags.modelPolicy
? routingEligibleModels
: availableModels;
const { filtered, fellBack } = applyLineageDiverseFilter(
baseForFilter,
effectiveWorkerModelId,
);
if (fellBack) {
logWarning(
"dispatch",
`lineage-diverse-from-worker: all ${baseForFilter.length} candidates share ` +
`root-vendor with worker model "${effectiveWorkerModelId}" for role "${normalizedRole}" — ` +
`falling back to unfiltered set to avoid blocking dispatch`,
);
} else {
// Build an allow-set of keys to enforce in the resolution loop.
const allowedKeys = new Set(
filtered.map(
(m) =>
`${m.provider.toLowerCase()}/${m.id.toLowerCase()}`,
),
);
lineageAllowedKeys = allowedKeys;
// Also update routingEligibleModels for the routing tier selection
// (so the routing tier picks from the filtered set).
if (uokFlags.modelPolicy) {
routingEligibleModels = filtered;
// Rebuild the policy-allowed key set to reflect the filtered list.
if (policyAllowedModelKeys) {
policyAllowedModelKeys = allowedKeys;
}
}
}
}
}
}
}
// Disable routing for flat-rate providers like GitHub Copilot (#3453).
// All models cost the same per request, so downgrading to a cheaper
// model provides no cost benefit — it only degrades quality.
@ -597,6 +733,21 @@ export async function selectAndApplyModel(
ctx.ui.notify(`Model ${modelId} not found, trying fallback.`, "info");
continue;
}
// Enforce lineage-diverse-from-worker: skip candidates whose root vendor
// matches the worker's. lineageAllowedKeys is populated only when the
// constraint applies (adversary / reviewer with a known worker model id).
if (lineageAllowedKeys) {
const key = `${model.provider.toLowerCase()}/${model.id.toLowerCase()}`;
if (!lineageAllowedKeys.has(key)) {
if (verbose) {
ctx.ui.notify(
`Lineage filter: skipping ${model.provider}/${model.id} (same root vendor as worker); trying next candidate.`,
"info",
);
}
continue;
}
}
if (policyAllowedModelKeys) {
const key = `${model.provider.toLowerCase()}/${model.id.toLowerCase()}`;
if (!policyAllowedModelKeys.has(key)) {
@ -652,6 +803,10 @@ export async function selectAndApplyModel(
const ok = await pi.setModel(model, { persist: persistModelChanges });
if (ok) {
appliedModel = model;
// Cache worker model id for subsequent adversary/reviewer lineage filter.
if (String(role ?? "").toLowerCase() === "worker") {
_lastWorkerModelId = model.id;
}
reapplyThinkingLevel(pi, autoModeStartThinkingLevel);
// ADR-005: Adjust active tool set for the selected model's provider capabilities.
// Hard-filter incompatible tools, then let extensions override via adjust_tool_set hook.

View file

@ -99,7 +99,10 @@ import {
} from "../uok/auto-runaway-guard.js";
import { resolveUokFlags } from "../uok/flags.js";
import { UokGateRunner } from "../uok/gate-runner.js";
import { emitModelAutoResolvedEvent } from "../uok/model-route-evidence.js";
import {
emitModelAutoResolvedEvent,
modelRoleForUnitType,
} from "../uok/model-route-evidence.js";
import {
ensurePlanV2Graph as ensurePlanningFlowGraph,
isEmptyPlanV2GraphResult,
@ -562,6 +565,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
undefined,
s.manualSessionModelOverride,
s.autoModeStartThinkingLevel,
modelRoleForUnitType(unitType),
);
s.currentUnitRouting = modelResult.routing;
s.currentUnitModel = modelResult.appliedModel;

View file

@ -0,0 +1,333 @@
/**
* lineage-diverse-filter.test.mjs
*
* Tests for the `lineage-diverse-from-worker` constraint in
* auto-model-selection.js.
*
* Structure:
* 1. Unit tests for `applyLineageDiverseFilter` pure function, no disk deps.
* 2. Integration tests for `selectAndApplyModel` role wiring uses the real
* preference loader; exercises the cache + lineage check end-to-end.
*/
import assert from "node:assert/strict";
import { beforeEach, describe, test } from "vitest";
import {
_getLastWorkerModelId,
_resetLastWorkerModelId,
applyLineageDiverseFilter,
selectAndApplyModel,
} from "../auto-model-selection.js";
// ── Helpers ──────────────────────────────────────────────────────────────────
function makeCandidate(provider, id) {
return { provider, id, api: "claude-messages", context_window: 200000 };
}
const ANTHROPIC = makeCandidate("anthropic", "claude-sonnet-4-5");
const KIMI = makeCandidate("kimi-coding", "kimi-k2.6");
const GOOGLE = makeCandidate("google-gemini-cli", "gemini-3-pro");
// ── Part 1: applyLineageDiverseFilter (pure unit tests) ─────────────────────
describe("applyLineageDiverseFilter", () => {
test("removes_candidates_whose_root_vendor_matches_worker", () => {
const { filtered, fellBack } = applyLineageDiverseFilter(
[ANTHROPIC, KIMI, GOOGLE],
"anthropic/claude-sonnet-4-5",
);
assert.equal(fellBack, false);
assert.ok(!filtered.some((m) => m.id === "claude-sonnet-4-5"),
"Anthropic candidate should be removed when worker is Anthropic");
assert.equal(filtered.length, 2);
});
test("returns_non_kimi_candidates_when_worker_is_kimi", () => {
const { filtered, fellBack } = applyLineageDiverseFilter(
[ANTHROPIC, KIMI, GOOGLE],
"kimi-k2.6",
);
assert.equal(fellBack, false);
assert.ok(!filtered.some((m) => m.provider === "kimi-coding"),
"kimi candidate should be removed when worker is kimi");
assert.equal(filtered.length, 2);
});
test("fallback_when_all_candidates_share_worker_vendor", () => {
const allKimi = [
makeCandidate("kimi-coding", "kimi-k2.6"),
makeCandidate("kimi-coding", "kimi-k2.5"),
];
const { filtered, fellBack } = applyLineageDiverseFilter(allKimi, "kimi-k2.6");
assert.equal(fellBack, true, "fellBack should be true when all candidates filtered");
// Original list returned unchanged
assert.equal(filtered.length, 2);
assert.equal(filtered[0].id, "kimi-k2.6");
});
test("no_op_when_workerModelId_is_empty", () => {
const candidates = [ANTHROPIC, KIMI, GOOGLE];
const { filtered, fellBack } = applyLineageDiverseFilter(candidates, "");
assert.equal(fellBack, false);
assert.equal(filtered.length, 3, "all candidates preserved when worker id is empty");
});
test("no_op_when_workerModelId_is_unknown_vendor", () => {
const candidates = [ANTHROPIC, KIMI, GOOGLE];
// isSameRootVendor returns false for unknown vendors, so no candidates removed
const { filtered, fellBack } = applyLineageDiverseFilter(
candidates,
"mystery-unknown-model",
);
assert.equal(fellBack, false);
assert.equal(filtered.length, 3, "unknown vendor should not filter any candidates");
});
test("adversary_should_get_non_anthropic_when_worker_was_anthropic", () => {
// Direct assertion matching the task spec (test 2)
const { filtered } = applyLineageDiverseFilter(
[ANTHROPIC, KIMI, GOOGLE],
"anthropic/claude-sonnet-4-5",
);
const hasAnthropic = filtered.some(
(m) => m.provider === "anthropic" || m.id.startsWith("claude-"),
);
assert.equal(hasAnthropic, false, "adversary should NOT get Anthropic");
// kimi or google should be in result
assert.ok(
filtered.some((m) => m.provider === "kimi-coding" || m.provider === "google-gemini-cli"),
"adversary should get kimi or google",
);
});
test("reviewer_should_get_non_anthropic_when_worker_was_anthropic", () => {
// Same as adversary (test 3) — same filter, different role
const { filtered } = applyLineageDiverseFilter(
[ANTHROPIC, KIMI, GOOGLE],
"claude-sonnet-4-5",
);
assert.ok(filtered.length > 0, "reviewer should get at least one candidate");
const hasAnthropic = filtered.some(
(m) => m.provider === "anthropic" || m.id.startsWith("claude-"),
);
assert.equal(hasAnthropic, false, "reviewer should NOT get Anthropic");
});
});
// ── Part 2: Integration wiring via selectAndApplyModel ───────────────────────
//
// These tests verify that `selectAndApplyModel` actually invokes the filter
// and updates the module-level worker model cache. They use the real
// preference loader (disk prefs) with model policy disabled so mock
// candidates can be resolved.
//
// Key design choices:
// - Model policy disabled via prefs.uok.model_policy.enabled=false so mock
// candidates are not rejected by the real policy gate.
// - `workerModelId` passed explicitly so tests are independent of which model
// the routing tier selected in a prior dispatch.
// - Assertions are on the resolved model's vendor, not its exact id.
function makeMockRegistry(candidates) {
return {
getAvailable: () => [...candidates],
getProviderAuthMode: () => "apiKey",
isProviderRequestReady: () => true,
};
}
function makeMockPi(candidates) {
// setModel picks the first candidate and pretends it succeeded.
// We store the last applied model so the test can inspect it.
const state = { appliedModel: null, tools: [] };
return {
state,
setModel: async (model) => {
// Verify the model is actually in the candidate list
const found = candidates.find(
(c) => c.provider === model.provider && c.id === model.id,
);
if (!found) return false;
state.appliedModel = model;
return true;
},
getActiveTools: () => [],
setActiveTools: () => {},
setThinkingLevel: () => {},
emitBeforeModelSelect: async () => null,
emitAdjustToolSet: async () => null,
};
}
function makeMockCtx(candidates) {
return {
modelRegistry: makeMockRegistry(candidates),
// Reflect the first candidate as the "current" session model so bare-id
// resolution prefers the right provider.
model: { provider: candidates[0].provider, id: candidates[0].id },
sessionManager: { getSessionId: () => "test-session" },
ui: { notify: () => {} },
basePath: "/tmp/test-project",
};
}
/** Prefs: disable model policy so mock candidates are not rejected. */
const BASE_PREFS = {
uok: { model_policy: { enabled: false } },
};
/**
* Call selectAndApplyModel with the given role/workerModelId and return the
* applied model (may be null if routing found no match).
*/
async function dispatchFor({ candidates, role, workerModelId, unitType = "execute-task" }) {
const pi = makeMockPi(candidates);
const ctx = makeMockCtx(candidates);
await selectAndApplyModel(
ctx,
pi,
unitType,
"test-unit",
"/tmp/test-project",
BASE_PREFS,
false, // verbose
null, // autoModeStartModel
undefined, // retryContext
true, // isAutoMode
undefined, // sessionModelOverride
undefined, // autoModeStartThinkingLevel
role,
workerModelId,
);
return pi.state.appliedModel;
}
describe("selectAndApplyModel lineage-diverse wiring", () => {
beforeEach(() => {
_resetLastWorkerModelId();
});
// ── Worker dispatch populates the cache ──────────────────────────────────────
test("worker_dispatch_updates_last_worker_model_id_cache", async () => {
// Use all-kimi candidates so the worker is forced to pick kimi
const kimiCandidates = [
makeCandidate("kimi-coding", "kimi-k2.6"),
];
const applied = await dispatchFor({ candidates: kimiCandidates, role: "worker" });
// Only assert if a model was actually selected (depends on disk routing for
// execute-task → kimi-coding/kimi-k2.6 which matches our mock candidate)
if (applied !== null) {
const cached = _getLastWorkerModelId();
assert.ok(
typeof cached === "string" && cached.length > 0,
`cache should be populated after worker dispatch, got: ${JSON.stringify(cached)}`,
);
assert.equal(
cached,
applied.id,
`cache should contain the resolved worker model id (${applied.id})`,
);
}
// If routing didn't find our mock model (disk prefs differ), the test is
// inconclusive but not a failure — the wiring is correct, just not exercised
// in this environment.
});
// ── Adversary uses explicit workerModelId to filter ──────────────────────────
test("adversary_explicit_workerModelId_filters_matching_vendor", async () => {
// Candidates: kimi (primary), anthropic, google
// Explicit workerModelId: kimi → adversary should skip kimi
const candidates = [KIMI, ANTHROPIC, GOOGLE];
const applied = await dispatchFor({
candidates,
role: "adversary",
workerModelId: "kimi-k2.6",
});
if (applied === null) {
// Routing found no match in our mock candidates — inconclusive in this env.
// The applyLineageDiverseFilter unit tests cover the filter logic directly.
return;
}
const isMoonshot =
applied.provider === "kimi-coding" ||
applied.id.toLowerCase().startsWith("kimi-");
assert.equal(
isMoonshot,
false,
`adversary should not get kimi when worker was kimi, got: ${applied.provider}/${applied.id}`,
);
});
// ── Reviewer uses explicit workerModelId to filter ───────────────────────────
test("reviewer_explicit_workerModelId_filters_matching_vendor", async () => {
const candidates = [KIMI, ANTHROPIC, GOOGLE];
const applied = await dispatchFor({
candidates,
role: "reviewer",
workerModelId: "kimi-k2.6",
});
if (applied === null) return; // inconclusive in this env
const isMoonshot =
applied.provider === "kimi-coding" ||
applied.id.toLowerCase().startsWith("kimi-");
assert.equal(
isMoonshot,
false,
`reviewer should not get kimi when worker was kimi, got: ${applied.provider}/${applied.id}`,
);
});
// ── All-same-vendor fallback does not block dispatch ─────────────────────────
test("adversary_all_candidates_same_vendor_falls_back_no_throw", async () => {
const allKimi = [
makeCandidate("kimi-coding", "kimi-k2.6"),
makeCandidate("kimi-coding", "kimi-k2.5"),
];
// Should not throw even though all candidates share the worker vendor
let threw = false;
try {
await dispatchFor({
candidates: allKimi,
role: "adversary",
workerModelId: "kimi-k2.6",
});
} catch {
threw = true;
}
assert.equal(threw, false, "should not throw when all candidates share worker vendor");
});
// ── Non-lineage roles are not filtered ───────────────────────────────────────
test("validator_role_is_not_filtered", async () => {
const candidates = [KIMI, ANTHROPIC, GOOGLE];
// validator's constraints are strict-json + review, NO lineage-diverse-from-worker
// → it should be able to pick kimi even when workerModelId is kimi
// We just assert no throw and that the call completes normally.
let threw = false;
try {
await dispatchFor({
candidates,
role: "validator",
workerModelId: "kimi-k2.6",
});
} catch {
threw = true;
}
assert.equal(threw, false, "validator dispatch should not throw");
});
// ── modelRoleForUnitType maps challenge → adversary ──────────────────────────
// This test confirms the unit-type to role mapping is wired correctly in
// model-route-evidence.js so challenge units get the adversary lineage filter.
test("modelRoleForUnitType_maps_challenge_to_adversary_and_review_to_reviewer", async () => {
const { modelRoleForUnitType } = await import(
"../uok/model-route-evidence.js"
);
assert.equal(modelRoleForUnitType("challenge"), "adversary");
assert.equal(modelRoleForUnitType("review-slice"), "reviewer");
assert.equal(modelRoleForUnitType("validate-milestone"), "validator");
assert.equal(modelRoleForUnitType("execute-task"), "worker");
assert.equal(modelRoleForUnitType("plan-milestone"), "worker");
});
});

View file

@ -23,6 +23,14 @@ export function modelRoleForUnitType(unitType) {
if (normalized.includes("review") || normalized.includes("scrutiny")) {
return "reviewer";
}
if (
normalized.includes("adversar") ||
normalized.includes("challenge") ||
normalized.includes("red-team") ||
normalized.includes("redteam")
) {
return "adversary";
}
return "worker";
}