fix: align provider route selection

This commit is contained in:
Mikael Hugo 2026-05-05 17:37:01 +02:00
parent 1e8a05dc70
commit 66e8265320
8 changed files with 183 additions and 53 deletions

View file

@ -115,7 +115,7 @@
"@anthropic-ai/vertex-sdk": "^0.14.4",
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
"@clack/prompts": "^1.1.0",
"@google/gemini-cli-core": "^0.40.1",
"@google/gemini-cli-core": "0.40.1",
"@google/genai": "^1.40.0",
"@mariozechner/jiti": "^2.6.2",
"@mistralai/mistralai": "^2.2.1",

View file

@ -11,12 +11,11 @@
import {
AuthType,
CodeAssistServer,
getOauthClient,
makeFakeConfig,
retryWithBackoff,
setupUser,
} from "@google/gemini-cli-core";
import { createCodeAssistContentGenerator } from "@google/gemini-cli-core/dist/src/code_assist/codeAssist.js";
import type { ContentGenerator } from "@google/gemini-cli-core/dist/src/core/contentGenerator.js";
import type {
Content,
GenerateContentParameters,
@ -99,23 +98,26 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
let toolCallCounter = 0;
/**
* Build a CodeAssistServer using cli-core's own auth + project discovery.
* Build a Code Assist content generator using cli-core's own auth + project discovery.
*
* - getOauthClient() reads ~/.gemini/oauth_creds.json when present, refreshes if
* expired, and returns an authenticated AuthClient. cli-core owns any
* interactive login flow it needs.
* expired. cli-core owns any interactive login flow it needs.
* - setupUser() asks the Code Assist API for the project + tier tied to this
* identity (free-tier auto-provisioned if needed; otherwise whatever the
* user has been onboarded to server-side).
* - createCodeAssistContentGenerator() passes the returned tier and paid-tier
* data into CodeAssistServer, matching the official Gemini CLI path.
*
* Both calls memoize internally inside cli-core repeat invocations are
* cheap.
*/
async function getCodeAssistServer(): Promise<CodeAssistServer> {
async function getCodeAssistServer(): Promise<ContentGenerator> {
const config = makeFakeConfig();
const authClient = await getOauthClient(AuthType.LOGIN_WITH_GOOGLE, config);
const userData = await setupUser(authClient, config);
return new CodeAssistServer(authClient, userData.projectId, { headers: {} });
return createCodeAssistContentGenerator(
{ headers: {} },
AuthType.LOGIN_WITH_GOOGLE,
config,
);
}
function parseDurationMs(value: string): number | undefined {

View file

@ -109,7 +109,7 @@ describe("ModelRegistry.getModelsForProxy — basic", () => {
it("returns all candidates when multiple providers share the model id", () => {
const registry = createRegistry();
registerNone(registry, "zai", "glm-4-air");
registerNone(registry, "opencode", "glm-4-air");
registerNone(registry, "opencode-go", "glm-4-air");
const result = registry.getModelsForProxy("glm-4-air");
assert.equal(result.length, 2);
});
@ -402,45 +402,41 @@ describe("ModelRegistry.getModelsForProxy — basic", () => {
// ── getModelsForProxy — family priority ordering ──────────────────────────────
describe("ModelRegistry.getModelsForProxy — family priority ordering", () => {
it("GLM family: zai before opencode before opencode-go", () => {
it("GLM family: zai before subscribed/free relays, never OpenRouter", () => {
const registry = createRegistry();
// Register in reverse priority order to confirm sorting
registerNone(registry, "openrouter", "glm-4-air");
registerNone(registry, "ollama-cloud", "glm-4-air");
registerNone(registry, "opencode-go", "glm-4-air");
registerNone(registry, "opencode", "glm-4-air");
registerNone(registry, "zai", "glm-4-air");
const result = registry.getModelsForProxy("glm-4-air");
const providers = result.map((m) => m.provider);
assert.equal(providers[0], "zai", "zai must be first for GLM");
assert.ok(
providers.indexOf("opencode") < providers.indexOf("opencode-go"),
"opencode before opencode-go",
);
assert.deepEqual(providers, ["zai", "opencode-go", "ollama-cloud"]);
});
it("Kimi family: kimi-coding before opencode", () => {
it("Kimi family: kimi-coding before subscribed/free relays, never OpenRouter", () => {
const registry = createRegistry();
registerNone(registry, "openrouter", "kimi-k2");
registerNone(registry, "opencode", "kimi-k2");
registerNone(registry, "opencode-go", "kimi-k2");
registerNone(registry, "ollama-cloud", "kimi-k2");
registerNone(registry, "kimi-coding", "kimi-k2");
const result = registry.getModelsForProxy("kimi-k2");
const providers = result.map((m) => m.provider);
assert.equal(
providers[0],
"kimi-coding",
"kimi-coding must lead for kimi- models",
);
assert.deepEqual(providers, ["kimi-coding", "ollama-cloud", "opencode-go"]);
});
it("MiniMax family: minimax before minimax-cn", () => {
it("MiniMax family: direct providers before subscribed/free relays, never OpenRouter", () => {
const registry = createRegistry();
registerNone(registry, "openrouter", "MiniMax-Text-01");
registerNone(registry, "ollama-cloud", "MiniMax-Text-01");
registerNone(registry, "opencode-go", "MiniMax-Text-01");
registerNone(registry, "minimax-cn", "MiniMax-Text-01");
registerNone(registry, "minimax", "MiniMax-Text-01");
const result = registry.getModelsForProxy("MiniMax-Text-01");
const providers = result.map((m) => m.provider);
assert.equal(
providers[0],
"minimax",
"minimax (international) before minimax-cn",
);
assert.deepEqual(providers, ["minimax", "opencode-go", "ollama-cloud"]);
});
it("Gemini family: google-gemini-cli only for bare model routing", () => {
@ -468,10 +464,10 @@ describe("ModelRegistry.getModelsForProxy — family priority ordering", () => {
describe("ModelRegistry.getModelsForProxy — auth-ready candidates first", () => {
it("provider with auth precedes same-priority provider without auth", () => {
// zai has auth (hasAuth → true), opencode does not
// zai has auth (hasAuth → true), opencode-go does not
const registry = createRegistry((p) => p === "zai");
registerApiKey(registry, "zai", "glm-4-air");
registerApiKey(registry, "opencode", "glm-4-air");
registerApiKey(registry, "opencode-go", "glm-4-air");
const result = registry.getModelsForProxy("glm-4-air");
const providers = result.map((m) => m.provider);
// zai is already first by family priority AND by auth — stays first
@ -479,16 +475,16 @@ describe("ModelRegistry.getModelsForProxy — auth-ready candidates first", () =
});
it("lower-priority provider with auth beats higher-priority one without auth", () => {
// opencode has auth, zai does not
const registry = createRegistry((p) => p === "opencode");
// opencode-go has auth, zai does not
const registry = createRegistry((p) => p === "opencode-go");
registerApiKey(registry, "zai", "glm-4-air");
registerApiKey(registry, "opencode", "glm-4-air");
registerApiKey(registry, "opencode-go", "glm-4-air");
const result = registry.getModelsForProxy("glm-4-air");
// opencode has auth so moves to withAuth bucket (before zai which has none)
// opencode-go has auth so moves to withAuth bucket (before zai which has none)
const providers = result.map((m) => m.provider);
assert.equal(
providers[0],
"opencode",
"opencode-go",
"auth-ready provider surfaces first regardless of family order",
);
});
@ -496,7 +492,7 @@ describe("ModelRegistry.getModelsForProxy — auth-ready candidates first", () =
it("none-auth providers are always request-ready and not demoted", () => {
const registry = createRegistry(() => false);
registerNone(registry, "zai", "glm-4-air");
registerNone(registry, "opencode", "glm-4-air");
registerNone(registry, "opencode-go", "glm-4-air");
const result = registry.getModelsForProxy("glm-4-air");
// Both none-auth — family order preserved
assert.equal(result[0].provider, "zai");
@ -580,6 +576,7 @@ describe("ModelRegistry provider_model_allow filter", () => {
const registry = createRegistry();
registerNone(registry, "minimax", "MiniMax-M2");
registerNone(registry, "minimax-cn", "MiniMax-M2");
registerNone(registry, "opencode-go", "MiniMax-M2");
const result = registry.getModelsForProxy(
"MiniMax-M2",
@ -591,7 +588,7 @@ describe("ModelRegistry provider_model_allow filter", () => {
assert.deepEqual(
result.map((m) => `${m.provider}/${m.id}`),
["minimax-cn/MiniMax-M2"],
["opencode-go/MiniMax-M2"],
);
});

View file

@ -78,12 +78,26 @@ export const PROXY_FAMILY_PRIORITY: ReadonlyArray<{
{
match: /^MiniMax-/i,
prefix: "MiniMax-",
providers: ["minimax", "minimax-cn"],
providers: ["minimax"],
family_failover: ["opencode-go", "ollama-cloud"],
global_fallback: false,
},
// ZAI direct API for GLM
{ match: /^glm-/i, prefix: "glm-", providers: ["zai"] },
{
match: /^glm-|^z-ai\/glm-/i,
prefix: "glm-",
providers: ["zai"],
family_failover: ["opencode-go", "ollama-cloud"],
global_fallback: false,
},
// Kimi Code direct API
{ match: /^kimi-/i, prefix: "kimi-", providers: ["kimi-coding"] },
{
match: /^kimi-|^moonshotai\/kimi-/i,
prefix: "kimi-",
providers: ["kimi-coding"],
family_failover: ["ollama-cloud", "opencode-go"],
global_fallback: false,
},
// MiMo/Xiaomi — direct API via Xiaomi MiMo Open Platform (api.xiaomimimo.com)
// or the Token Plan endpoint (token-plan-sgp.xiaomimimo.com). Both served
// under the `xiaomi` provider namespace.
@ -255,6 +269,7 @@ const HIDDEN_MODEL_PROVIDERS = new Set([
"google-vertex",
"groq",
"github-copilot",
"minimax-cn",
"xai",
"xiaomi-token-plan-ams",
"xiaomi-token-plan-cn",
@ -1166,9 +1181,8 @@ export class ModelRegistry {
r.match.test(modelId),
);
// Order: direct family providers → family-scoped failover → global fallback.
// Overrides replace only the direct list (keeps family_failover + global
// chain intact) so a user pinning "glm- → [zai]" still picks up
// opencode-go / openrouter / ollama-cloud as last resort.
// Overrides replace only the direct list while preserving the family's
// explicit failover/containment policy.
const familyProviders = overrideEntry?.[1] ?? familyEntry?.providers ?? [];
const familyFailover = familyEntry?.family_failover ?? [];
const seen = new Set([...familyProviders, ...familyFailover]);

View file

@ -108,12 +108,15 @@ function restoreToolBaseline(pi) {
}
}
const BARE_MODEL_FAMILY_PRIORITY = [
{ match: /^glm-/i, providers: ["zai", "opencode", "opencode-go"] },
{ match: /^glm-/i, providers: ["zai", "opencode-go", "ollama-cloud"] },
{
match: /^kimi-/i,
providers: ["kimi-coding", "ollama-cloud", "opencode", "opencode-go"],
providers: ["kimi-coding", "ollama-cloud", "opencode-go"],
},
{
match: /^MiniMax-|^minimax-/i,
providers: ["minimax", "opencode-go", "ollama-cloud"],
},
{ match: /^MiniMax-|^minimax-/i, providers: ["minimax", "minimax-cn"] },
{
match: /^mimo-|^xiaomi-/i,
providers: ["xiaomi", "opencode-go"],

View file

@ -1,5 +1,6 @@
// SF — Persistent per-project blocklist of provider/model pairs that the
// provider has rejected at request time for account entitlement reasons.
// provider has rejected at request time for account entitlement or temporary
// capacity reasons.
//
// Lives at `.sf/runtime/blocked-models.json` so the block survives /sf autonomous
// restarts. Auto-mode model selection skips blocked entries; agent-end
@ -16,6 +17,14 @@ function blockedModelsPath(basePath) {
function modelKey(provider, id) {
return `${provider.toLowerCase()}/${id.toLowerCase()}`;
}
function activeBlockedEntries(entries, now = Date.now()) {
return entries.filter(
(e) =>
typeof e.expiresAt !== "number" ||
!Number.isFinite(e.expiresAt) ||
e.expiresAt > now,
);
}
function readFileSafe(path) {
if (!existsSync(path)) return { version: 1, blocked: [] };
try {
@ -27,7 +36,7 @@ function readFileSafe(path) {
const blocked = parsed.blocked.filter(
(e) => !!e && typeof e.provider === "string" && typeof e.id === "string",
);
return { version: 1, blocked };
return { version: 1, blocked: activeBlockedEntries(blocked) };
} catch {
// Corrupted JSON: treat as empty so a bad file never blocks dispatch.
return { version: 1, blocked: [] };
@ -46,7 +55,7 @@ export function isModelBlocked(basePath, provider, id) {
/**
* Add a provider/model pair to the persistent blocklist (e.g., after account entitlement rejection).
*/
export function blockModel(basePath, provider, id, reason) {
export function blockModel(basePath, provider, id, reason, options = {}) {
const path = blockedModelsPath(basePath);
mkdirSync(dirname(path), { recursive: true });
// Ensure the file exists before we try to lock it — proper-lockfile requires
@ -62,14 +71,30 @@ export function blockModel(basePath, provider, id, reason) {
withFileLockSync(path, () => {
const current = readFileSafe(path);
const target = modelKey(provider, id);
if (current.blocked.some((e) => modelKey(e.provider, e.id) === target)) {
const existing = current.blocked.find(
(e) => modelKey(e.provider, e.id) === target,
);
if (existing) {
if (
typeof options.expiresAt === "number" &&
(!existing.expiresAt || options.expiresAt > existing.expiresAt)
) {
existing.expiresAt = options.expiresAt;
existing.reason = reason;
writeFileSync(path, JSON.stringify(current, null, 2) + "\n", "utf-8");
}
return;
}
const expiresAt =
typeof options.expiresAt === "number" &&
Number.isFinite(options.expiresAt)
? options.expiresAt
: undefined;
const next = {
version: 1,
blocked: [
...current.blocked,
{ provider, id, reason, blockedAt: Date.now() },
{ provider, id, reason, blockedAt: Date.now(), expiresAt },
],
};
writeFileSync(path, JSON.stringify(next, null, 2) + "\n", "utf-8");

View file

@ -25,6 +25,19 @@ import { logWarning } from "../workflow-logger.js";
import { clearDiscussionFlowState } from "./write-gate.js";
const retryState = createRetryState();
const GEMINI_CAPACITY_COOLDOWN_MS = 2 * 60_000;
const GEMINI_CAPACITY_MAX_COOLDOWN_MS = 30 * 60_000;
function temporaryRouteBlockMs(provider, cls) {
if (provider !== "google-gemini-cli") return undefined;
if (cls.kind !== "rate-limit" && cls.kind !== "server") return undefined;
const retryAfterMs =
"retryAfterMs" in cls && typeof cls.retryAfterMs === "number"
? cls.retryAfterMs
: undefined;
const base = Math.max(retryAfterMs ?? 0, GEMINI_CAPACITY_COOLDOWN_MS);
return Math.min(base, GEMINI_CAPACITY_MAX_COOLDOWN_MS);
}
/**
* Reset the module-level retry state so a resumed auto-session starts fresh.
* Called by provider-error-resume.ts before startAuto() so legacy paused
@ -245,6 +258,30 @@ export async function handleAgentEnd(pi, event, ctx) {
// ── 2. Decide & Act ──────────────────────────────────────────────────
// --- Route failures: try configured fallback first, then any available route ---
if (isModelRouteFailure(cls) && dash.currentUnit) {
const blockMs = temporaryRouteBlockMs(currentRoute?.provider, cls);
if (
blockMs &&
dash.basePath &&
currentRoute?.provider &&
currentRoute?.id
) {
try {
blockModel(
dash.basePath,
currentRoute.provider,
currentRoute.id,
rawErrorMsg || cls.kind,
{ expiresAt: Date.now() + blockMs },
);
ctx.ui.notify(
`Cooling down ${currentRoute.provider}/${currentRoute.id} for ${Math.ceil(blockMs / 1000)}s after provider capacity rejection.`,
"warning",
);
} catch (err) {
const m = err instanceof Error ? err.message : String(err);
logWarning("bootstrap", `Failed to persist model cooldown: ${m}`);
}
}
const switched = await trySwitchToFallbackModel({
pi,
ctx,

View file

@ -0,0 +1,52 @@
import assert from "node:assert/strict";
import { mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, test } from "vitest";
import {
blockModel,
isModelBlocked,
loadBlockedModels,
} from "../blocked-models.js";
let tmp;
afterEach(() => {
if (tmp) rmSync(tmp, { recursive: true, force: true });
tmp = undefined;
});
function tempProject() {
tmp = mkdtempSync(join(tmpdir(), "sf-blocked-models-"));
return tmp;
}
describe("blocked models", () => {
test("isModelBlocked_when_temporary_block_expired_returns_false", () => {
const basePath = tempProject();
blockModel(basePath, "google-gemini-cli", "gemini-2.5-pro", "capacity", {
expiresAt: Date.now() - 1_000,
});
assert.equal(
isModelBlocked(basePath, "google-gemini-cli", "gemini-2.5-pro"),
false,
);
assert.deepEqual(loadBlockedModels(basePath), []);
});
test("isModelBlocked_when_temporary_block_active_returns_true", () => {
const basePath = tempProject();
blockModel(basePath, "google-gemini-cli", "gemini-2.5-pro", "capacity", {
expiresAt: Date.now() + 60_000,
});
assert.equal(
isModelBlocked(basePath, "google-gemini-cli", "gemini-2.5-pro"),
true,
);
});
});