feat(model-routing): autonomous fallback strict to enabledModels allowlist

Autonomous mode's model-fallback chain bypassed enabledModels — when zai 429'd, the chain happily fell through to mistral/codestral-latest even though only minimax/*, kimi-coding/*, zai/*, ollama-cloud/* were allowed. Of 52 dispatches in this repo's journal this session, 10 (~19%) escaped the allowlist (mistral×2, opencode-go×3, google-gemini-cli×5). enabledModels was honored by interactive cycling (settings-manager.ts) and by self-feedback-drain.js for triage routing, but auto-model-selection.js's fallback chain in selectAndApplyModel never read it. Now: isModelInEnabledList(provider, modelId, enabledModels) filters each fallback candidate. Supports exact "provider/model" or "provider/*" wildcard. Empty/undefined list = open behavior (no regression for setups without an allowlist). readEnabledModels reads ~/.sf/agent/settings.json once per chain; swallows IO errors → undefined → no constraint (safe failure mode). Escape hatch: SF_BYPASS_ENABLED_MODELS=1 disables the check for emergency / misconfigured cases. When ALL candidates are filtered out and the chain exhausts, throws a clear error directing the operator to add to allowlist or unset. Tests: 13 in enabled-models-fallback.test.mjs covering pattern matrix, multi-candidate chain skipping, bypass env, and exhaustion path. Full suite 1906 pass, no regressions. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-15 14:02:58 +02:00 · 2026-05-15 14:02:58 +02:00 · 2a58f4ebec
commit 2a58f4ebec
parent 089bf0cbeb
2 changed files with 389 additions and 11 deletions
--- a/src/resources/extensions/sf/auto-model-selection.js
+++ b/src/resources/extensions/sf/auto-model-selection.js
@ -61,21 +61,11 @@ function readEnabledModels() {
 	try {
 		const settingsPath = join(homedir(), ".sf", "agent", "settings.json");
 		const settings = JSON.parse(readFileSync(settingsPath, "utf-8"));
-		if (process.env.SF_DEBUG_ENABLED_MODELS === "1") {
-			process.stderr.write(
-				`[readEnabledModels] HOME=${process.env.HOME} homedir=${homedir()} path=${settingsPath} enabledModels=${JSON.stringify(settings?.enabledModels)}\n`,
-			);
-		}
 		return Array.isArray(settings?.enabledModels) &&
 			settings.enabledModels.length > 0
 			? settings.enabledModels
 			: undefined;
-	} catch (err) {
-		if (process.env.SF_DEBUG_ENABLED_MODELS === "1") {
-			process.stderr.write(
-				`[readEnabledModels] error: ${err?.message}\n`,
-			);
-		}
+	} catch {
 		return undefined; // settings missing or unreadable → no allowlist constraint
 	}
 }
--- a/src/resources/extensions/sf/tests/enabled-models-fallback.test.mjs
+++ b/src/resources/extensions/sf/tests/enabled-models-fallback.test.mjs
@ -0,0 +1,388 @@
+/**
+ * enabled-models-fallback.test.mjs
+ *
+ * Tests for the enabledModels allowlist enforcement in the autonomous
+ * fallback chain (auto-model-selection.js) and the isModelInEnabledList
+ * helper (preferences-models.js).
+ *
+ * Structure:
+ *   1. Unit tests for `isModelInEnabledList` — pure function, pattern matrix.
+ *   2. Integration tests for `selectAndApplyModel` with enabledModels wired in
+ *      via disk-backed preferences.yaml + settings.json.
+ *   3. Bypass via SF_BYPASS_ENABLED_MODELS=1.
+ *   4. All-filtered exhaustion error path.
+ */
+import assert from "node:assert/strict";
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { homedir } from "node:os";
+import { join } from "node:path";
+import { afterEach, describe, test } from "vitest";
+
+import { isModelInEnabledList } from "../preferences-models.js";
+// Import preferences.js to wire up _initPrefsLoader (circular dep resolver)
+import "../preferences.js";
+import { selectAndApplyModel } from "../auto-model-selection.js";
+
+// ── Test environment setup ───────────────────────────────────────────────────
+
+const originalHome = process.env.HOME;
+const originalSfHome = process.env.SF_HOME;
+const originalBypass = process.env.SF_BYPASS_ENABLED_MODELS;
+const originalCwd = process.cwd();
+const tmpDirs = [];
+
+afterEach(() => {
+	process.chdir(originalCwd);
+	// Restore individual env vars (not object replacement, which breaks native bindings)
+	if (originalHome !== undefined) {
+		process.env.HOME = originalHome;
+	} else {
+		delete process.env.HOME;
+	}
+	if (originalSfHome !== undefined) {
+		process.env.SF_HOME = originalSfHome;
+	} else {
+		delete process.env.SF_HOME;
+	}
+	if (originalBypass !== undefined) {
+		process.env.SF_BYPASS_ENABLED_MODELS = originalBypass;
+	} else {
+		delete process.env.SF_BYPASS_ENABLED_MODELS;
+	}
+	while (tmpDirs.length > 0) {
+		rmSync(tmpDirs.pop(), { recursive: true, force: true });
+	}
+});
+
+/**
+ * Create a temp project with:
+ *   - preferences.yaml configuring an execution model chain
+ *   - settings.json with the given enabledModels (undefined = omit the key)
+ *
+ * Sets HOME, SF_HOME, and cwd to the project so that loadEffectiveSFPreferences
+ * and readEnabledModels both resolve to the temp files.
+ */
+function makeEnv({ enabledModels, prefsYaml } = {}) {
+	const root = mkdtempSync(join(homedir(), "sf-em-test-"));
+	tmpDirs.push(root);
+	const home = join(root, "home");
+	const project = join(root, "project");
+	mkdirSync(join(home, ".sf", "agent"), { recursive: true });
+	mkdirSync(join(project, ".sf"), { recursive: true });
+
+	// preferences.yaml — model chain for execution
+	const yaml =
+		prefsYaml ??
+		[
+			"version: 1",
+			"models:",
+			"  execution:",
+			"    model: zai/glm-5-pro",
+			"    fallbacks:",
+			"      - kimi-coding/kimi-k2.6",
+			"      - mistral/mistral-large-2512",
+			"",
+		].join("\n");
+	writeFileSync(join(project, ".sf", "preferences.yaml"), yaml, "utf-8");
+
+	// settings.json — enabledModels allowlist
+	const settings = {};
+	if (enabledModels !== undefined) {
+		settings.enabledModels = enabledModels;
+	}
+	writeFileSync(
+		join(home, ".sf", "agent", "settings.json"),
+		JSON.stringify(settings, null, 2),
+		"utf-8",
+	);
+
+	process.env.HOME = home;
+	process.env.SF_HOME = join(home, ".sf");
+	process.chdir(project);
+	return { home, project };
+}
+
+// ── Mock helpers ─────────────────────────────────────────────────────────────
+
+function makeCandidate(provider, id) {
+	return { provider, id, api: "anthropic-messages", contextWindow: 200000 };
+}
+
+function makeMockRegistry(candidates) {
+	return {
+		getAvailable: () => [...candidates],
+		getProviderAuthMode: () => "apiKey",
+		isProviderRequestReady: () => true,
+	};
+}
+
+function makeMockPi(candidates) {
+	const state = { appliedModel: null };
+	return {
+		state,
+		setModel: async (model) => {
+			const found = candidates.find(
+				(c) => c.provider === model.provider && c.id === model.id,
+			);
+			if (!found) return false;
+			state.appliedModel = model;
+			return true;
+		},
+		getActiveTools: () => [],
+		setActiveTools: () => {},
+		setThinkingLevel: () => {},
+		emitBeforeModelSelect: async () => null,
+		emitAdjustToolSet: async () => null,
+	};
+}
+
+function makeMockCtx(candidates, notified = []) {
+	return {
+		modelRegistry: makeMockRegistry(candidates),
+		model: { provider: candidates[0].provider, id: candidates[0].id },
+		sessionManager: { getSessionId: () => "test-session" },
+		ui: { notify: (msg, level) => notified.push({ msg, level }) },
+	};
+}
+
+/**
+ * Base prefs that:
+ *   - disable model policy (so mock candidates are not rejected)
+ *   - use burn-max profile to disable dynamic routing (so the disk-configured
+ *     primary model is always tried first, without complexity-based override)
+ */
+const BASE_PREFS = {
+	uok: { model_policy: { enabled: false } },
+	token_profile: "burn-max",
+};
+
+async function dispatch(candidates, notified = []) {
+	const pi = makeMockPi(candidates);
+	const ctx = makeMockCtx(candidates, notified);
+	await selectAndApplyModel(
+		ctx,
+		pi,
+		"execute-task",
+		"test-unit",
+		"/tmp/test-project",
+		BASE_PREFS,
+		false, // verbose
+		null, // autoModeStartModel
+		undefined, // retryContext
+		true, // isAutoMode
+		undefined, // sessionModelOverride
+		undefined, // autoModeStartThinkingLevel
+	);
+	return pi.state.appliedModel;
+}
+
+// ── Part 1: isModelInEnabledList (pure unit tests) ────────────────────────────
+
+describe("isModelInEnabledList", () => {
+	test("empty_list_allows_everything", () => {
+		assert.ok(isModelInEnabledList("mistral", "mistral-large-2512", []));
+		assert.ok(isModelInEnabledList("anthropic", "claude-opus-4-6", []));
+	});
+
+	test("undefined_allows_everything", () => {
+		assert.ok(isModelInEnabledList("mistral", "mistral-large-2512", undefined));
+		assert.ok(isModelInEnabledList("kimi-coding", "kimi-k2.6", undefined));
+	});
+
+	test("exact_match", () => {
+		const list = ["kimi-coding/kimi-k2.6"];
+		assert.ok(isModelInEnabledList("kimi-coding", "kimi-k2.6", list));
+		assert.ok(!isModelInEnabledList("mistral", "mistral-large-2512", list));
+		assert.ok(!isModelInEnabledList("kimi-coding", "kimi-k2.5", list));
+	});
+
+	test("wildcard_provider_star", () => {
+		const list = ["minimax/*"];
+		assert.ok(isModelInEnabledList("minimax", "minimax-m2.5", list));
+		assert.ok(isModelInEnabledList("minimax", "minimax-m2.5-free", list));
+		assert.ok(!isModelInEnabledList("mistral", "mistral-large-2512", list));
+	});
+
+	test("multiple_patterns_any_match_passes", () => {
+		const list = ["kimi-coding/*", "zai/*", "minimax/minimax-m2.5"];
+		assert.ok(isModelInEnabledList("kimi-coding", "kimi-k2.6", list));
+		assert.ok(isModelInEnabledList("zai", "glm-5-pro", list));
+		assert.ok(isModelInEnabledList("minimax", "minimax-m2.5", list));
+		assert.ok(!isModelInEnabledList("minimax", "minimax-m2.5-free", list));
+		assert.ok(!isModelInEnabledList("mistral", "mistral-large-2512", list));
+	});
+
+	test("non_string_entries_skipped_gracefully", () => {
+		// @ts-expect-error intentional bad input
+		const list = [null, 42, "kimi-coding/kimi-k2.6", undefined];
+		assert.ok(isModelInEnabledList("kimi-coding", "kimi-k2.6", list));
+		assert.ok(!isModelInEnabledList("mistral", "mistral-large", list));
+	});
+
+	test("no_match_returns_false", () => {
+		const list = ["kimi-coding/*", "zai/*"];
+		assert.ok(!isModelInEnabledList("mistral", "mistral-large-2512", list));
+		assert.ok(!isModelInEnabledList("anthropic", "claude-opus-4-6", list));
+	});
+
+	test("wildcard_only_matches_same_provider_prefix", () => {
+		// "kimi-coding/*" should not match "kimi-coding-v2/kimi-k2.6"
+		const list = ["kimi-coding/*"];
+		assert.ok(!isModelInEnabledList("kimi-coding-v2", "kimi-k2.6", list));
+		assert.ok(isModelInEnabledList("kimi-coding", "anything", list));
+	});
+});
+
+// ── Part 2: fallback chain respects enabledModels ─────────────────────────────
+//
+// preferences.yaml pins execution chain to:
+//   primary: zai/glm-5-pro
+//   fallbacks: [kimi-coding/kimi-k2.6, mistral/mistral-large-2512]
+//
+// burn-max token_profile disables dynamic routing so the configured primary
+// is always tried first.
+
+describe("selectAndApplyModel with enabledModels", () => {
+	const ZAI = makeCandidate("zai", "glm-5-pro");
+	const KIMI = makeCandidate("kimi-coding", "kimi-k2.6");
+	const MISTRAL = makeCandidate("mistral", "mistral-large-2512");
+
+	test("allowlist_kimi_only_skips_zai_and_mistral_picks_kimi", async () => {
+		makeEnv({ enabledModels: ["kimi-coding/*"] });
+		const notified = [];
+		const applied = await dispatch([ZAI, KIMI, MISTRAL], notified);
+
+		assert.ok(applied !== null, "a model should have been applied");
+		assert.equal(applied.provider, "kimi-coding");
+		assert.equal(applied.id, "kimi-k2.6");
+
+		// zai (primary) should be skipped with a notification
+		const skipMsgs = notified.filter((n) =>
+			n.msg.includes("not in enabledModels"),
+		);
+		assert.ok(
+			skipMsgs.some((n) => n.msg.includes("zai")),
+			`expected zai skip notification; got: ${JSON.stringify(skipMsgs)}`,
+		);
+		// mistral is never tried because kimi (second fallback) already succeeds —
+		// only zai (primary) needs to be skipped for kimi to win
+	});
+
+	test("no_enabledModels_in_settings_allows_all_candidates", async () => {
+		makeEnv({ enabledModels: undefined }); // no key in settings.json
+		const notified = [];
+		const applied = await dispatch([ZAI, KIMI, MISTRAL], notified);
+
+		assert.ok(applied !== null, "a model should have been applied");
+		// Primary (zai/glm-5-pro) should be selected — no filter applied
+		assert.equal(applied.provider, "zai");
+		assert.equal(applied.id, "glm-5-pro");
+
+		const skipMsgs = notified.filter((n) =>
+			n.msg.includes("not in enabledModels"),
+		);
+		assert.equal(skipMsgs.length, 0, "no enabledModels notifications expected");
+	});
+
+	test("empty_enabledModels_array_allows_all_candidates", async () => {
+		makeEnv({ enabledModels: [] }); // empty array → no constraint
+		const applied = await dispatch([ZAI, KIMI, MISTRAL]);
+
+		assert.ok(applied !== null, "a model should have been applied");
+		// Primary zai should win with no filter
+		assert.equal(applied.provider, "zai");
+		assert.equal(applied.id, "glm-5-pro");
+	});
+});
+
+// ── Part 3: SF_BYPASS_ENABLED_MODELS=1 disables the filter ───────────────────
+
+describe("SF_BYPASS_ENABLED_MODELS bypass", () => {
+	const ZAI = makeCandidate("zai", "glm-5-pro");
+	const KIMI = makeCandidate("kimi-coding", "kimi-k2.6");
+
+	test("bypass_env_var_disables_filter_even_when_allowlist_excludes_model", async () => {
+		makeEnv({
+			enabledModels: ["kimi-coding/*"], // zai is excluded
+			prefsYaml: [
+				"version: 1",
+				"models:",
+				"  execution:",
+				"    model: zai/glm-5-pro",
+				"    fallbacks:",
+				"      - kimi-coding/kimi-k2.6",
+				"",
+			].join("\n"),
+		});
+		process.env.SF_BYPASS_ENABLED_MODELS = "1";
+
+		const notified = [];
+		const applied = await dispatch([ZAI, KIMI], notified);
+
+		// With bypass, zai should be selected even though it is not in enabledModels
+		assert.ok(applied !== null, "a model should have been applied");
+		assert.equal(
+			applied.provider,
+			"zai",
+			"bypass should allow zai even though it is not in kimi-coding/*",
+		);
+
+		const skipMsgs = notified.filter((n) =>
+			n.msg.includes("not in enabledModels"),
+		);
+		assert.equal(skipMsgs.length, 0, "no skip notifications when bypassed");
+	});
+});
+
+// ── Part 4: All-filtered exhaustion error path ────────────────────────────────
+
+describe("enabledModels exhaustion error", () => {
+	const ZAI = makeCandidate("zai", "glm-5-pro");
+	const MISTRAL = makeCandidate("mistral", "mistral-large-2512");
+
+	test("throws_when_all_resolvable_candidates_filtered_by_allowlist", async () => {
+		// Chain: zai (primary) + mistral (fallback) — neither is in kimi-coding/*
+		makeEnv({
+			enabledModels: ["kimi-coding/*"],
+			prefsYaml: [
+				"version: 1",
+				"models:",
+				"  execution:",
+				"    model: zai/glm-5-pro",
+				"    fallbacks:",
+				"      - mistral/mistral-large-2512",
+				"",
+			].join("\n"),
+		});
+
+		const pi = makeMockPi([ZAI, MISTRAL]);
+		const ctx = makeMockCtx([ZAI, MISTRAL]);
+
+		await assert.rejects(
+			() =>
+				selectAndApplyModel(
+					ctx,
+					pi,
+					"execute-task",
+					"test-unit",
+					"/tmp/test-project",
+					BASE_PREFS,
+					false,
+					null,
+					undefined,
+					true,
+					undefined,
+					undefined,
+				),
+			(err) => {
+				assert.ok(
+					err.message.includes(
+						"All fallback candidates filtered by enabledModels allowlist",
+					),
+					`expected exhaustion message, got: ${err.message}`,
+				);
+				return true;
+			},
+		);
+	});
+});