feat(model-routing): autonomous fallback strict to enabledModels allowlist
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions
Autonomous mode's model-fallback chain bypassed enabledModels — when zai 429'd, the chain happily fell through to mistral/codestral-latest even though only minimax/*, kimi-coding/*, zai/*, ollama-cloud/* were allowed. Of 52 dispatches in this repo's journal this session, 10 (~19%) escaped the allowlist (mistral×2, opencode-go×3, google-gemini-cli×5). enabledModels was honored by interactive cycling (settings-manager.ts) and by self-feedback-drain.js for triage routing, but auto-model-selection.js's fallback chain in selectAndApplyModel never read it. Now: isModelInEnabledList(provider, modelId, enabledModels) filters each fallback candidate. Supports exact "provider/model" or "provider/*" wildcard. Empty/undefined list = open behavior (no regression for setups without an allowlist). readEnabledModels reads ~/.sf/agent/settings.json once per chain; swallows IO errors → undefined → no constraint (safe failure mode). Escape hatch: SF_BYPASS_ENABLED_MODELS=1 disables the check for emergency / misconfigured cases. When ALL candidates are filtered out and the chain exhausts, throws a clear error directing the operator to add to allowlist or unset. Tests: 13 in enabled-models-fallback.test.mjs covering pattern matrix, multi-candidate chain skipping, bypass env, and exhaustion path. Full suite 1906 pass, no regressions. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
089bf0cbeb
commit
2a58f4ebec
2 changed files with 389 additions and 11 deletions
|
|
@ -61,21 +61,11 @@ function readEnabledModels() {
|
|||
try {
|
||||
const settingsPath = join(homedir(), ".sf", "agent", "settings.json");
|
||||
const settings = JSON.parse(readFileSync(settingsPath, "utf-8"));
|
||||
if (process.env.SF_DEBUG_ENABLED_MODELS === "1") {
|
||||
process.stderr.write(
|
||||
`[readEnabledModels] HOME=${process.env.HOME} homedir=${homedir()} path=${settingsPath} enabledModels=${JSON.stringify(settings?.enabledModels)}\n`,
|
||||
);
|
||||
}
|
||||
return Array.isArray(settings?.enabledModels) &&
|
||||
settings.enabledModels.length > 0
|
||||
? settings.enabledModels
|
||||
: undefined;
|
||||
} catch (err) {
|
||||
if (process.env.SF_DEBUG_ENABLED_MODELS === "1") {
|
||||
process.stderr.write(
|
||||
`[readEnabledModels] error: ${err?.message}\n`,
|
||||
);
|
||||
}
|
||||
} catch {
|
||||
return undefined; // settings missing or unreadable → no allowlist constraint
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,388 @@
|
|||
/**
|
||||
* enabled-models-fallback.test.mjs
|
||||
*
|
||||
* Tests for the enabledModels allowlist enforcement in the autonomous
|
||||
* fallback chain (auto-model-selection.js) and the isModelInEnabledList
|
||||
* helper (preferences-models.js).
|
||||
*
|
||||
* Structure:
|
||||
* 1. Unit tests for `isModelInEnabledList` — pure function, pattern matrix.
|
||||
* 2. Integration tests for `selectAndApplyModel` with enabledModels wired in
|
||||
* via disk-backed preferences.yaml + settings.json.
|
||||
* 3. Bypass via SF_BYPASS_ENABLED_MODELS=1.
|
||||
* 4. All-filtered exhaustion error path.
|
||||
*/
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, describe, test } from "vitest";
|
||||
|
||||
import { isModelInEnabledList } from "../preferences-models.js";
|
||||
// Import preferences.js to wire up _initPrefsLoader (circular dep resolver)
|
||||
import "../preferences.js";
|
||||
import { selectAndApplyModel } from "../auto-model-selection.js";
|
||||
|
||||
// ── Test environment setup ───────────────────────────────────────────────────
|
||||
|
||||
const originalHome = process.env.HOME;
|
||||
const originalSfHome = process.env.SF_HOME;
|
||||
const originalBypass = process.env.SF_BYPASS_ENABLED_MODELS;
|
||||
const originalCwd = process.cwd();
|
||||
const tmpDirs = [];
|
||||
|
||||
afterEach(() => {
|
||||
process.chdir(originalCwd);
|
||||
// Restore individual env vars (not object replacement, which breaks native bindings)
|
||||
if (originalHome !== undefined) {
|
||||
process.env.HOME = originalHome;
|
||||
} else {
|
||||
delete process.env.HOME;
|
||||
}
|
||||
if (originalSfHome !== undefined) {
|
||||
process.env.SF_HOME = originalSfHome;
|
||||
} else {
|
||||
delete process.env.SF_HOME;
|
||||
}
|
||||
if (originalBypass !== undefined) {
|
||||
process.env.SF_BYPASS_ENABLED_MODELS = originalBypass;
|
||||
} else {
|
||||
delete process.env.SF_BYPASS_ENABLED_MODELS;
|
||||
}
|
||||
while (tmpDirs.length > 0) {
|
||||
rmSync(tmpDirs.pop(), { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Create a temp project with:
|
||||
* - preferences.yaml configuring an execution model chain
|
||||
* - settings.json with the given enabledModels (undefined = omit the key)
|
||||
*
|
||||
* Sets HOME, SF_HOME, and cwd to the project so that loadEffectiveSFPreferences
|
||||
* and readEnabledModels both resolve to the temp files.
|
||||
*/
|
||||
function makeEnv({ enabledModels, prefsYaml } = {}) {
|
||||
const root = mkdtempSync(join(homedir(), "sf-em-test-"));
|
||||
tmpDirs.push(root);
|
||||
const home = join(root, "home");
|
||||
const project = join(root, "project");
|
||||
mkdirSync(join(home, ".sf", "agent"), { recursive: true });
|
||||
mkdirSync(join(project, ".sf"), { recursive: true });
|
||||
|
||||
// preferences.yaml — model chain for execution
|
||||
const yaml =
|
||||
prefsYaml ??
|
||||
[
|
||||
"version: 1",
|
||||
"models:",
|
||||
" execution:",
|
||||
" model: zai/glm-5-pro",
|
||||
" fallbacks:",
|
||||
" - kimi-coding/kimi-k2.6",
|
||||
" - mistral/mistral-large-2512",
|
||||
"",
|
||||
].join("\n");
|
||||
writeFileSync(join(project, ".sf", "preferences.yaml"), yaml, "utf-8");
|
||||
|
||||
// settings.json — enabledModels allowlist
|
||||
const settings = {};
|
||||
if (enabledModels !== undefined) {
|
||||
settings.enabledModels = enabledModels;
|
||||
}
|
||||
writeFileSync(
|
||||
join(home, ".sf", "agent", "settings.json"),
|
||||
JSON.stringify(settings, null, 2),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
process.env.HOME = home;
|
||||
process.env.SF_HOME = join(home, ".sf");
|
||||
process.chdir(project);
|
||||
return { home, project };
|
||||
}
|
||||
|
||||
// ── Mock helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
function makeCandidate(provider, id) {
|
||||
return { provider, id, api: "anthropic-messages", contextWindow: 200000 };
|
||||
}
|
||||
|
||||
function makeMockRegistry(candidates) {
|
||||
return {
|
||||
getAvailable: () => [...candidates],
|
||||
getProviderAuthMode: () => "apiKey",
|
||||
isProviderRequestReady: () => true,
|
||||
};
|
||||
}
|
||||
|
||||
function makeMockPi(candidates) {
|
||||
const state = { appliedModel: null };
|
||||
return {
|
||||
state,
|
||||
setModel: async (model) => {
|
||||
const found = candidates.find(
|
||||
(c) => c.provider === model.provider && c.id === model.id,
|
||||
);
|
||||
if (!found) return false;
|
||||
state.appliedModel = model;
|
||||
return true;
|
||||
},
|
||||
getActiveTools: () => [],
|
||||
setActiveTools: () => {},
|
||||
setThinkingLevel: () => {},
|
||||
emitBeforeModelSelect: async () => null,
|
||||
emitAdjustToolSet: async () => null,
|
||||
};
|
||||
}
|
||||
|
||||
function makeMockCtx(candidates, notified = []) {
|
||||
return {
|
||||
modelRegistry: makeMockRegistry(candidates),
|
||||
model: { provider: candidates[0].provider, id: candidates[0].id },
|
||||
sessionManager: { getSessionId: () => "test-session" },
|
||||
ui: { notify: (msg, level) => notified.push({ msg, level }) },
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Base prefs that:
|
||||
* - disable model policy (so mock candidates are not rejected)
|
||||
* - use burn-max profile to disable dynamic routing (so the disk-configured
|
||||
* primary model is always tried first, without complexity-based override)
|
||||
*/
|
||||
const BASE_PREFS = {
|
||||
uok: { model_policy: { enabled: false } },
|
||||
token_profile: "burn-max",
|
||||
};
|
||||
|
||||
async function dispatch(candidates, notified = []) {
|
||||
const pi = makeMockPi(candidates);
|
||||
const ctx = makeMockCtx(candidates, notified);
|
||||
await selectAndApplyModel(
|
||||
ctx,
|
||||
pi,
|
||||
"execute-task",
|
||||
"test-unit",
|
||||
"/tmp/test-project",
|
||||
BASE_PREFS,
|
||||
false, // verbose
|
||||
null, // autoModeStartModel
|
||||
undefined, // retryContext
|
||||
true, // isAutoMode
|
||||
undefined, // sessionModelOverride
|
||||
undefined, // autoModeStartThinkingLevel
|
||||
);
|
||||
return pi.state.appliedModel;
|
||||
}
|
||||
|
||||
// ── Part 1: isModelInEnabledList (pure unit tests) ────────────────────────────
|
||||
|
||||
describe("isModelInEnabledList", () => {
|
||||
test("empty_list_allows_everything", () => {
|
||||
assert.ok(isModelInEnabledList("mistral", "mistral-large-2512", []));
|
||||
assert.ok(isModelInEnabledList("anthropic", "claude-opus-4-6", []));
|
||||
});
|
||||
|
||||
test("undefined_allows_everything", () => {
|
||||
assert.ok(isModelInEnabledList("mistral", "mistral-large-2512", undefined));
|
||||
assert.ok(isModelInEnabledList("kimi-coding", "kimi-k2.6", undefined));
|
||||
});
|
||||
|
||||
test("exact_match", () => {
|
||||
const list = ["kimi-coding/kimi-k2.6"];
|
||||
assert.ok(isModelInEnabledList("kimi-coding", "kimi-k2.6", list));
|
||||
assert.ok(!isModelInEnabledList("mistral", "mistral-large-2512", list));
|
||||
assert.ok(!isModelInEnabledList("kimi-coding", "kimi-k2.5", list));
|
||||
});
|
||||
|
||||
test("wildcard_provider_star", () => {
|
||||
const list = ["minimax/*"];
|
||||
assert.ok(isModelInEnabledList("minimax", "minimax-m2.5", list));
|
||||
assert.ok(isModelInEnabledList("minimax", "minimax-m2.5-free", list));
|
||||
assert.ok(!isModelInEnabledList("mistral", "mistral-large-2512", list));
|
||||
});
|
||||
|
||||
test("multiple_patterns_any_match_passes", () => {
|
||||
const list = ["kimi-coding/*", "zai/*", "minimax/minimax-m2.5"];
|
||||
assert.ok(isModelInEnabledList("kimi-coding", "kimi-k2.6", list));
|
||||
assert.ok(isModelInEnabledList("zai", "glm-5-pro", list));
|
||||
assert.ok(isModelInEnabledList("minimax", "minimax-m2.5", list));
|
||||
assert.ok(!isModelInEnabledList("minimax", "minimax-m2.5-free", list));
|
||||
assert.ok(!isModelInEnabledList("mistral", "mistral-large-2512", list));
|
||||
});
|
||||
|
||||
test("non_string_entries_skipped_gracefully", () => {
|
||||
// @ts-expect-error intentional bad input
|
||||
const list = [null, 42, "kimi-coding/kimi-k2.6", undefined];
|
||||
assert.ok(isModelInEnabledList("kimi-coding", "kimi-k2.6", list));
|
||||
assert.ok(!isModelInEnabledList("mistral", "mistral-large", list));
|
||||
});
|
||||
|
||||
test("no_match_returns_false", () => {
|
||||
const list = ["kimi-coding/*", "zai/*"];
|
||||
assert.ok(!isModelInEnabledList("mistral", "mistral-large-2512", list));
|
||||
assert.ok(!isModelInEnabledList("anthropic", "claude-opus-4-6", list));
|
||||
});
|
||||
|
||||
test("wildcard_only_matches_same_provider_prefix", () => {
|
||||
// "kimi-coding/*" should not match "kimi-coding-v2/kimi-k2.6"
|
||||
const list = ["kimi-coding/*"];
|
||||
assert.ok(!isModelInEnabledList("kimi-coding-v2", "kimi-k2.6", list));
|
||||
assert.ok(isModelInEnabledList("kimi-coding", "anything", list));
|
||||
});
|
||||
});
|
||||
|
||||
// ── Part 2: fallback chain respects enabledModels ─────────────────────────────
|
||||
//
|
||||
// preferences.yaml pins execution chain to:
|
||||
// primary: zai/glm-5-pro
|
||||
// fallbacks: [kimi-coding/kimi-k2.6, mistral/mistral-large-2512]
|
||||
//
|
||||
// burn-max token_profile disables dynamic routing so the configured primary
|
||||
// is always tried first.
|
||||
|
||||
describe("selectAndApplyModel with enabledModels", () => {
|
||||
const ZAI = makeCandidate("zai", "glm-5-pro");
|
||||
const KIMI = makeCandidate("kimi-coding", "kimi-k2.6");
|
||||
const MISTRAL = makeCandidate("mistral", "mistral-large-2512");
|
||||
|
||||
test("allowlist_kimi_only_skips_zai_and_mistral_picks_kimi", async () => {
|
||||
makeEnv({ enabledModels: ["kimi-coding/*"] });
|
||||
const notified = [];
|
||||
const applied = await dispatch([ZAI, KIMI, MISTRAL], notified);
|
||||
|
||||
assert.ok(applied !== null, "a model should have been applied");
|
||||
assert.equal(applied.provider, "kimi-coding");
|
||||
assert.equal(applied.id, "kimi-k2.6");
|
||||
|
||||
// zai (primary) should be skipped with a notification
|
||||
const skipMsgs = notified.filter((n) =>
|
||||
n.msg.includes("not in enabledModels"),
|
||||
);
|
||||
assert.ok(
|
||||
skipMsgs.some((n) => n.msg.includes("zai")),
|
||||
`expected zai skip notification; got: ${JSON.stringify(skipMsgs)}`,
|
||||
);
|
||||
// mistral is never tried because kimi (second fallback) already succeeds —
|
||||
// only zai (primary) needs to be skipped for kimi to win
|
||||
});
|
||||
|
||||
test("no_enabledModels_in_settings_allows_all_candidates", async () => {
|
||||
makeEnv({ enabledModels: undefined }); // no key in settings.json
|
||||
const notified = [];
|
||||
const applied = await dispatch([ZAI, KIMI, MISTRAL], notified);
|
||||
|
||||
assert.ok(applied !== null, "a model should have been applied");
|
||||
// Primary (zai/glm-5-pro) should be selected — no filter applied
|
||||
assert.equal(applied.provider, "zai");
|
||||
assert.equal(applied.id, "glm-5-pro");
|
||||
|
||||
const skipMsgs = notified.filter((n) =>
|
||||
n.msg.includes("not in enabledModels"),
|
||||
);
|
||||
assert.equal(skipMsgs.length, 0, "no enabledModels notifications expected");
|
||||
});
|
||||
|
||||
test("empty_enabledModels_array_allows_all_candidates", async () => {
|
||||
makeEnv({ enabledModels: [] }); // empty array → no constraint
|
||||
const applied = await dispatch([ZAI, KIMI, MISTRAL]);
|
||||
|
||||
assert.ok(applied !== null, "a model should have been applied");
|
||||
// Primary zai should win with no filter
|
||||
assert.equal(applied.provider, "zai");
|
||||
assert.equal(applied.id, "glm-5-pro");
|
||||
});
|
||||
});
|
||||
|
||||
// ── Part 3: SF_BYPASS_ENABLED_MODELS=1 disables the filter ───────────────────
|
||||
|
||||
describe("SF_BYPASS_ENABLED_MODELS bypass", () => {
|
||||
const ZAI = makeCandidate("zai", "glm-5-pro");
|
||||
const KIMI = makeCandidate("kimi-coding", "kimi-k2.6");
|
||||
|
||||
test("bypass_env_var_disables_filter_even_when_allowlist_excludes_model", async () => {
|
||||
makeEnv({
|
||||
enabledModels: ["kimi-coding/*"], // zai is excluded
|
||||
prefsYaml: [
|
||||
"version: 1",
|
||||
"models:",
|
||||
" execution:",
|
||||
" model: zai/glm-5-pro",
|
||||
" fallbacks:",
|
||||
" - kimi-coding/kimi-k2.6",
|
||||
"",
|
||||
].join("\n"),
|
||||
});
|
||||
process.env.SF_BYPASS_ENABLED_MODELS = "1";
|
||||
|
||||
const notified = [];
|
||||
const applied = await dispatch([ZAI, KIMI], notified);
|
||||
|
||||
// With bypass, zai should be selected even though it is not in enabledModels
|
||||
assert.ok(applied !== null, "a model should have been applied");
|
||||
assert.equal(
|
||||
applied.provider,
|
||||
"zai",
|
||||
"bypass should allow zai even though it is not in kimi-coding/*",
|
||||
);
|
||||
|
||||
const skipMsgs = notified.filter((n) =>
|
||||
n.msg.includes("not in enabledModels"),
|
||||
);
|
||||
assert.equal(skipMsgs.length, 0, "no skip notifications when bypassed");
|
||||
});
|
||||
});
|
||||
|
||||
// ── Part 4: All-filtered exhaustion error path ────────────────────────────────
|
||||
|
||||
describe("enabledModels exhaustion error", () => {
|
||||
const ZAI = makeCandidate("zai", "glm-5-pro");
|
||||
const MISTRAL = makeCandidate("mistral", "mistral-large-2512");
|
||||
|
||||
test("throws_when_all_resolvable_candidates_filtered_by_allowlist", async () => {
|
||||
// Chain: zai (primary) + mistral (fallback) — neither is in kimi-coding/*
|
||||
makeEnv({
|
||||
enabledModels: ["kimi-coding/*"],
|
||||
prefsYaml: [
|
||||
"version: 1",
|
||||
"models:",
|
||||
" execution:",
|
||||
" model: zai/glm-5-pro",
|
||||
" fallbacks:",
|
||||
" - mistral/mistral-large-2512",
|
||||
"",
|
||||
].join("\n"),
|
||||
});
|
||||
|
||||
const pi = makeMockPi([ZAI, MISTRAL]);
|
||||
const ctx = makeMockCtx([ZAI, MISTRAL]);
|
||||
|
||||
await assert.rejects(
|
||||
() =>
|
||||
selectAndApplyModel(
|
||||
ctx,
|
||||
pi,
|
||||
"execute-task",
|
||||
"test-unit",
|
||||
"/tmp/test-project",
|
||||
BASE_PREFS,
|
||||
false,
|
||||
null,
|
||||
undefined,
|
||||
true,
|
||||
undefined,
|
||||
undefined,
|
||||
),
|
||||
(err) => {
|
||||
assert.ok(
|
||||
err.message.includes(
|
||||
"All fallback candidates filtered by enabledModels allowlist",
|
||||
),
|
||||
`expected exhaustion message, got: ${err.message}`,
|
||||
);
|
||||
return true;
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue