feat(model-routing): autonomous fallback strict to enabledModels allowlist
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions

Autonomous mode's model-fallback chain bypassed enabledModels — when zai
429'd, the chain happily fell through to mistral/codestral-latest even
though only minimax/*, kimi-coding/*, zai/*, ollama-cloud/* were allowed.
Of 52 dispatches in this repo's journal this session, 10 (~19%)
escaped the allowlist (mistral×2, opencode-go×3, google-gemini-cli×5).

enabledModels was honored by interactive cycling (settings-manager.ts)
and by self-feedback-drain.js for triage routing, but
auto-model-selection.js's fallback chain in selectAndApplyModel never
read it.

Now: isModelInEnabledList(provider, modelId, enabledModels) filters
each fallback candidate. Supports exact "provider/model" or
"provider/*" wildcard. Empty/undefined list = open behavior (no
regression for setups without an allowlist).

readEnabledModels reads ~/.sf/agent/settings.json once per chain;
swallows IO errors → undefined → no constraint (safe failure mode).

Escape hatch: SF_BYPASS_ENABLED_MODELS=1 disables the check for
emergency / misconfigured cases.

When ALL candidates are filtered out and the chain exhausts, throws
a clear error directing the operator to add to allowlist or unset.

Tests: 13 in enabled-models-fallback.test.mjs covering pattern matrix,
multi-candidate chain skipping, bypass env, and exhaustion path.
Full suite 1906 pass, no regressions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-15 14:02:58 +02:00
parent 089bf0cbeb
commit 2a58f4ebec
2 changed files with 389 additions and 11 deletions

View file

@ -61,21 +61,11 @@ function readEnabledModels() {
try {
const settingsPath = join(homedir(), ".sf", "agent", "settings.json");
const settings = JSON.parse(readFileSync(settingsPath, "utf-8"));
if (process.env.SF_DEBUG_ENABLED_MODELS === "1") {
process.stderr.write(
`[readEnabledModels] HOME=${process.env.HOME} homedir=${homedir()} path=${settingsPath} enabledModels=${JSON.stringify(settings?.enabledModels)}\n`,
);
}
return Array.isArray(settings?.enabledModels) &&
settings.enabledModels.length > 0
? settings.enabledModels
: undefined;
} catch (err) {
if (process.env.SF_DEBUG_ENABLED_MODELS === "1") {
process.stderr.write(
`[readEnabledModels] error: ${err?.message}\n`,
);
}
} catch {
return undefined; // settings missing or unreadable → no allowlist constraint
}
}

View file

@ -0,0 +1,388 @@
/**
* enabled-models-fallback.test.mjs
*
* Tests for the enabledModels allowlist enforcement in the autonomous
* fallback chain (auto-model-selection.js) and the isModelInEnabledList
* helper (preferences-models.js).
*
* Structure:
* 1. Unit tests for `isModelInEnabledList` pure function, pattern matrix.
* 2. Integration tests for `selectAndApplyModel` with enabledModels wired in
* via disk-backed preferences.yaml + settings.json.
* 3. Bypass via SF_BYPASS_ENABLED_MODELS=1.
* 4. All-filtered exhaustion error path.
*/
import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, test } from "vitest";
import { isModelInEnabledList } from "../preferences-models.js";
// Import preferences.js to wire up _initPrefsLoader (circular dep resolver)
import "../preferences.js";
import { selectAndApplyModel } from "../auto-model-selection.js";
// ── Test environment setup ───────────────────────────────────────────────────
const originalHome = process.env.HOME;
const originalSfHome = process.env.SF_HOME;
const originalBypass = process.env.SF_BYPASS_ENABLED_MODELS;
const originalCwd = process.cwd();
const tmpDirs = [];
afterEach(() => {
process.chdir(originalCwd);
// Restore individual env vars (not object replacement, which breaks native bindings)
if (originalHome !== undefined) {
process.env.HOME = originalHome;
} else {
delete process.env.HOME;
}
if (originalSfHome !== undefined) {
process.env.SF_HOME = originalSfHome;
} else {
delete process.env.SF_HOME;
}
if (originalBypass !== undefined) {
process.env.SF_BYPASS_ENABLED_MODELS = originalBypass;
} else {
delete process.env.SF_BYPASS_ENABLED_MODELS;
}
while (tmpDirs.length > 0) {
rmSync(tmpDirs.pop(), { recursive: true, force: true });
}
});
/**
* Create a temp project with:
* - preferences.yaml configuring an execution model chain
* - settings.json with the given enabledModels (undefined = omit the key)
*
* Sets HOME, SF_HOME, and cwd to the project so that loadEffectiveSFPreferences
* and readEnabledModels both resolve to the temp files.
*/
function makeEnv({ enabledModels, prefsYaml } = {}) {
const root = mkdtempSync(join(homedir(), "sf-em-test-"));
tmpDirs.push(root);
const home = join(root, "home");
const project = join(root, "project");
mkdirSync(join(home, ".sf", "agent"), { recursive: true });
mkdirSync(join(project, ".sf"), { recursive: true });
// preferences.yaml — model chain for execution
const yaml =
prefsYaml ??
[
"version: 1",
"models:",
" execution:",
" model: zai/glm-5-pro",
" fallbacks:",
" - kimi-coding/kimi-k2.6",
" - mistral/mistral-large-2512",
"",
].join("\n");
writeFileSync(join(project, ".sf", "preferences.yaml"), yaml, "utf-8");
// settings.json — enabledModels allowlist
const settings = {};
if (enabledModels !== undefined) {
settings.enabledModels = enabledModels;
}
writeFileSync(
join(home, ".sf", "agent", "settings.json"),
JSON.stringify(settings, null, 2),
"utf-8",
);
process.env.HOME = home;
process.env.SF_HOME = join(home, ".sf");
process.chdir(project);
return { home, project };
}
// ── Mock helpers ─────────────────────────────────────────────────────────────
function makeCandidate(provider, id) {
return { provider, id, api: "anthropic-messages", contextWindow: 200000 };
}
function makeMockRegistry(candidates) {
return {
getAvailable: () => [...candidates],
getProviderAuthMode: () => "apiKey",
isProviderRequestReady: () => true,
};
}
function makeMockPi(candidates) {
const state = { appliedModel: null };
return {
state,
setModel: async (model) => {
const found = candidates.find(
(c) => c.provider === model.provider && c.id === model.id,
);
if (!found) return false;
state.appliedModel = model;
return true;
},
getActiveTools: () => [],
setActiveTools: () => {},
setThinkingLevel: () => {},
emitBeforeModelSelect: async () => null,
emitAdjustToolSet: async () => null,
};
}
function makeMockCtx(candidates, notified = []) {
return {
modelRegistry: makeMockRegistry(candidates),
model: { provider: candidates[0].provider, id: candidates[0].id },
sessionManager: { getSessionId: () => "test-session" },
ui: { notify: (msg, level) => notified.push({ msg, level }) },
};
}
/**
* Base prefs that:
* - disable model policy (so mock candidates are not rejected)
* - use burn-max profile to disable dynamic routing (so the disk-configured
* primary model is always tried first, without complexity-based override)
*/
const BASE_PREFS = {
uok: { model_policy: { enabled: false } },
token_profile: "burn-max",
};
async function dispatch(candidates, notified = []) {
const pi = makeMockPi(candidates);
const ctx = makeMockCtx(candidates, notified);
await selectAndApplyModel(
ctx,
pi,
"execute-task",
"test-unit",
"/tmp/test-project",
BASE_PREFS,
false, // verbose
null, // autoModeStartModel
undefined, // retryContext
true, // isAutoMode
undefined, // sessionModelOverride
undefined, // autoModeStartThinkingLevel
);
return pi.state.appliedModel;
}
// ── Part 1: isModelInEnabledList (pure unit tests) ────────────────────────────
describe("isModelInEnabledList", () => {
test("empty_list_allows_everything", () => {
assert.ok(isModelInEnabledList("mistral", "mistral-large-2512", []));
assert.ok(isModelInEnabledList("anthropic", "claude-opus-4-6", []));
});
test("undefined_allows_everything", () => {
assert.ok(isModelInEnabledList("mistral", "mistral-large-2512", undefined));
assert.ok(isModelInEnabledList("kimi-coding", "kimi-k2.6", undefined));
});
test("exact_match", () => {
const list = ["kimi-coding/kimi-k2.6"];
assert.ok(isModelInEnabledList("kimi-coding", "kimi-k2.6", list));
assert.ok(!isModelInEnabledList("mistral", "mistral-large-2512", list));
assert.ok(!isModelInEnabledList("kimi-coding", "kimi-k2.5", list));
});
test("wildcard_provider_star", () => {
const list = ["minimax/*"];
assert.ok(isModelInEnabledList("minimax", "minimax-m2.5", list));
assert.ok(isModelInEnabledList("minimax", "minimax-m2.5-free", list));
assert.ok(!isModelInEnabledList("mistral", "mistral-large-2512", list));
});
test("multiple_patterns_any_match_passes", () => {
const list = ["kimi-coding/*", "zai/*", "minimax/minimax-m2.5"];
assert.ok(isModelInEnabledList("kimi-coding", "kimi-k2.6", list));
assert.ok(isModelInEnabledList("zai", "glm-5-pro", list));
assert.ok(isModelInEnabledList("minimax", "minimax-m2.5", list));
assert.ok(!isModelInEnabledList("minimax", "minimax-m2.5-free", list));
assert.ok(!isModelInEnabledList("mistral", "mistral-large-2512", list));
});
test("non_string_entries_skipped_gracefully", () => {
// @ts-expect-error intentional bad input
const list = [null, 42, "kimi-coding/kimi-k2.6", undefined];
assert.ok(isModelInEnabledList("kimi-coding", "kimi-k2.6", list));
assert.ok(!isModelInEnabledList("mistral", "mistral-large", list));
});
test("no_match_returns_false", () => {
const list = ["kimi-coding/*", "zai/*"];
assert.ok(!isModelInEnabledList("mistral", "mistral-large-2512", list));
assert.ok(!isModelInEnabledList("anthropic", "claude-opus-4-6", list));
});
test("wildcard_only_matches_same_provider_prefix", () => {
// "kimi-coding/*" should not match "kimi-coding-v2/kimi-k2.6"
const list = ["kimi-coding/*"];
assert.ok(!isModelInEnabledList("kimi-coding-v2", "kimi-k2.6", list));
assert.ok(isModelInEnabledList("kimi-coding", "anything", list));
});
});
// ── Part 2: fallback chain respects enabledModels ─────────────────────────────
//
// preferences.yaml pins execution chain to:
// primary: zai/glm-5-pro
// fallbacks: [kimi-coding/kimi-k2.6, mistral/mistral-large-2512]
//
// burn-max token_profile disables dynamic routing so the configured primary
// is always tried first.
describe("selectAndApplyModel with enabledModels", () => {
const ZAI = makeCandidate("zai", "glm-5-pro");
const KIMI = makeCandidate("kimi-coding", "kimi-k2.6");
const MISTRAL = makeCandidate("mistral", "mistral-large-2512");
test("allowlist_kimi_only_skips_zai_and_mistral_picks_kimi", async () => {
makeEnv({ enabledModels: ["kimi-coding/*"] });
const notified = [];
const applied = await dispatch([ZAI, KIMI, MISTRAL], notified);
assert.ok(applied !== null, "a model should have been applied");
assert.equal(applied.provider, "kimi-coding");
assert.equal(applied.id, "kimi-k2.6");
// zai (primary) should be skipped with a notification
const skipMsgs = notified.filter((n) =>
n.msg.includes("not in enabledModels"),
);
assert.ok(
skipMsgs.some((n) => n.msg.includes("zai")),
`expected zai skip notification; got: ${JSON.stringify(skipMsgs)}`,
);
// mistral is never tried because kimi (second fallback) already succeeds —
// only zai (primary) needs to be skipped for kimi to win
});
test("no_enabledModels_in_settings_allows_all_candidates", async () => {
makeEnv({ enabledModels: undefined }); // no key in settings.json
const notified = [];
const applied = await dispatch([ZAI, KIMI, MISTRAL], notified);
assert.ok(applied !== null, "a model should have been applied");
// Primary (zai/glm-5-pro) should be selected — no filter applied
assert.equal(applied.provider, "zai");
assert.equal(applied.id, "glm-5-pro");
const skipMsgs = notified.filter((n) =>
n.msg.includes("not in enabledModels"),
);
assert.equal(skipMsgs.length, 0, "no enabledModels notifications expected");
});
test("empty_enabledModels_array_allows_all_candidates", async () => {
makeEnv({ enabledModels: [] }); // empty array → no constraint
const applied = await dispatch([ZAI, KIMI, MISTRAL]);
assert.ok(applied !== null, "a model should have been applied");
// Primary zai should win with no filter
assert.equal(applied.provider, "zai");
assert.equal(applied.id, "glm-5-pro");
});
});
// ── Part 3: SF_BYPASS_ENABLED_MODELS=1 disables the filter ───────────────────
describe("SF_BYPASS_ENABLED_MODELS bypass", () => {
const ZAI = makeCandidate("zai", "glm-5-pro");
const KIMI = makeCandidate("kimi-coding", "kimi-k2.6");
test("bypass_env_var_disables_filter_even_when_allowlist_excludes_model", async () => {
makeEnv({
enabledModels: ["kimi-coding/*"], // zai is excluded
prefsYaml: [
"version: 1",
"models:",
" execution:",
" model: zai/glm-5-pro",
" fallbacks:",
" - kimi-coding/kimi-k2.6",
"",
].join("\n"),
});
process.env.SF_BYPASS_ENABLED_MODELS = "1";
const notified = [];
const applied = await dispatch([ZAI, KIMI], notified);
// With bypass, zai should be selected even though it is not in enabledModels
assert.ok(applied !== null, "a model should have been applied");
assert.equal(
applied.provider,
"zai",
"bypass should allow zai even though it is not in kimi-coding/*",
);
const skipMsgs = notified.filter((n) =>
n.msg.includes("not in enabledModels"),
);
assert.equal(skipMsgs.length, 0, "no skip notifications when bypassed");
});
});
// ── Part 4: All-filtered exhaustion error path ────────────────────────────────
describe("enabledModels exhaustion error", () => {
const ZAI = makeCandidate("zai", "glm-5-pro");
const MISTRAL = makeCandidate("mistral", "mistral-large-2512");
test("throws_when_all_resolvable_candidates_filtered_by_allowlist", async () => {
// Chain: zai (primary) + mistral (fallback) — neither is in kimi-coding/*
makeEnv({
enabledModels: ["kimi-coding/*"],
prefsYaml: [
"version: 1",
"models:",
" execution:",
" model: zai/glm-5-pro",
" fallbacks:",
" - mistral/mistral-large-2512",
"",
].join("\n"),
});
const pi = makeMockPi([ZAI, MISTRAL]);
const ctx = makeMockCtx([ZAI, MISTRAL]);
await assert.rejects(
() =>
selectAndApplyModel(
ctx,
pi,
"execute-task",
"test-unit",
"/tmp/test-project",
BASE_PREFS,
false,
null,
undefined,
true,
undefined,
undefined,
),
(err) => {
assert.ok(
err.message.includes(
"All fallback candidates filtered by enabledModels allowlist",
),
`expected exhaustion message, got: ${err.message}`,
);
return true;
},
);
});
});