fix: align provider route selection
This commit is contained in:
parent
1e8a05dc70
commit
66e8265320
8 changed files with 183 additions and 53 deletions
|
|
@ -115,7 +115,7 @@
|
|||
"@anthropic-ai/vertex-sdk": "^0.14.4",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
|
||||
"@clack/prompts": "^1.1.0",
|
||||
"@google/gemini-cli-core": "^0.40.1",
|
||||
"@google/gemini-cli-core": "0.40.1",
|
||||
"@google/genai": "^1.40.0",
|
||||
"@mariozechner/jiti": "^2.6.2",
|
||||
"@mistralai/mistralai": "^2.2.1",
|
||||
|
|
|
|||
|
|
@ -11,12 +11,11 @@
|
|||
|
||||
import {
|
||||
AuthType,
|
||||
CodeAssistServer,
|
||||
getOauthClient,
|
||||
makeFakeConfig,
|
||||
retryWithBackoff,
|
||||
setupUser,
|
||||
} from "@google/gemini-cli-core";
|
||||
import { createCodeAssistContentGenerator } from "@google/gemini-cli-core/dist/src/code_assist/codeAssist.js";
|
||||
import type { ContentGenerator } from "@google/gemini-cli-core/dist/src/core/contentGenerator.js";
|
||||
import type {
|
||||
Content,
|
||||
GenerateContentParameters,
|
||||
|
|
@ -99,23 +98,26 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
|
|||
let toolCallCounter = 0;
|
||||
|
||||
/**
|
||||
* Build a CodeAssistServer using cli-core's own auth + project discovery.
|
||||
* Build a Code Assist content generator using cli-core's own auth + project discovery.
|
||||
*
|
||||
* - getOauthClient() reads ~/.gemini/oauth_creds.json when present, refreshes if
|
||||
* expired, and returns an authenticated AuthClient. cli-core owns any
|
||||
* interactive login flow it needs.
|
||||
* expired. cli-core owns any interactive login flow it needs.
|
||||
* - setupUser() asks the Code Assist API for the project + tier tied to this
|
||||
* identity (free-tier auto-provisioned if needed; otherwise whatever the
|
||||
* user has been onboarded to server-side).
|
||||
* - createCodeAssistContentGenerator() passes the returned tier and paid-tier
|
||||
* data into CodeAssistServer, matching the official Gemini CLI path.
|
||||
*
|
||||
* Both calls memoize internally inside cli-core — repeat invocations are
|
||||
* cheap.
|
||||
*/
|
||||
async function getCodeAssistServer(): Promise<CodeAssistServer> {
|
||||
async function getCodeAssistServer(): Promise<ContentGenerator> {
|
||||
const config = makeFakeConfig();
|
||||
const authClient = await getOauthClient(AuthType.LOGIN_WITH_GOOGLE, config);
|
||||
const userData = await setupUser(authClient, config);
|
||||
return new CodeAssistServer(authClient, userData.projectId, { headers: {} });
|
||||
return createCodeAssistContentGenerator(
|
||||
{ headers: {} },
|
||||
AuthType.LOGIN_WITH_GOOGLE,
|
||||
config,
|
||||
);
|
||||
}
|
||||
|
||||
function parseDurationMs(value: string): number | undefined {
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ describe("ModelRegistry.getModelsForProxy — basic", () => {
|
|||
it("returns all candidates when multiple providers share the model id", () => {
|
||||
const registry = createRegistry();
|
||||
registerNone(registry, "zai", "glm-4-air");
|
||||
registerNone(registry, "opencode", "glm-4-air");
|
||||
registerNone(registry, "opencode-go", "glm-4-air");
|
||||
const result = registry.getModelsForProxy("glm-4-air");
|
||||
assert.equal(result.length, 2);
|
||||
});
|
||||
|
|
@ -402,45 +402,41 @@ describe("ModelRegistry.getModelsForProxy — basic", () => {
|
|||
// ── getModelsForProxy — family priority ordering ──────────────────────────────
|
||||
|
||||
describe("ModelRegistry.getModelsForProxy — family priority ordering", () => {
|
||||
it("GLM family: zai before opencode before opencode-go", () => {
|
||||
it("GLM family: zai before subscribed/free relays, never OpenRouter", () => {
|
||||
const registry = createRegistry();
|
||||
// Register in reverse priority order to confirm sorting
|
||||
registerNone(registry, "openrouter", "glm-4-air");
|
||||
registerNone(registry, "ollama-cloud", "glm-4-air");
|
||||
registerNone(registry, "opencode-go", "glm-4-air");
|
||||
registerNone(registry, "opencode", "glm-4-air");
|
||||
registerNone(registry, "zai", "glm-4-air");
|
||||
const result = registry.getModelsForProxy("glm-4-air");
|
||||
const providers = result.map((m) => m.provider);
|
||||
assert.equal(providers[0], "zai", "zai must be first for GLM");
|
||||
assert.ok(
|
||||
providers.indexOf("opencode") < providers.indexOf("opencode-go"),
|
||||
"opencode before opencode-go",
|
||||
);
|
||||
assert.deepEqual(providers, ["zai", "opencode-go", "ollama-cloud"]);
|
||||
});
|
||||
|
||||
it("Kimi family: kimi-coding before opencode", () => {
|
||||
it("Kimi family: kimi-coding before subscribed/free relays, never OpenRouter", () => {
|
||||
const registry = createRegistry();
|
||||
registerNone(registry, "openrouter", "kimi-k2");
|
||||
registerNone(registry, "opencode", "kimi-k2");
|
||||
registerNone(registry, "opencode-go", "kimi-k2");
|
||||
registerNone(registry, "ollama-cloud", "kimi-k2");
|
||||
registerNone(registry, "kimi-coding", "kimi-k2");
|
||||
const result = registry.getModelsForProxy("kimi-k2");
|
||||
const providers = result.map((m) => m.provider);
|
||||
assert.equal(
|
||||
providers[0],
|
||||
"kimi-coding",
|
||||
"kimi-coding must lead for kimi- models",
|
||||
);
|
||||
assert.deepEqual(providers, ["kimi-coding", "ollama-cloud", "opencode-go"]);
|
||||
});
|
||||
|
||||
it("MiniMax family: minimax before minimax-cn", () => {
|
||||
it("MiniMax family: direct providers before subscribed/free relays, never OpenRouter", () => {
|
||||
const registry = createRegistry();
|
||||
registerNone(registry, "openrouter", "MiniMax-Text-01");
|
||||
registerNone(registry, "ollama-cloud", "MiniMax-Text-01");
|
||||
registerNone(registry, "opencode-go", "MiniMax-Text-01");
|
||||
registerNone(registry, "minimax-cn", "MiniMax-Text-01");
|
||||
registerNone(registry, "minimax", "MiniMax-Text-01");
|
||||
const result = registry.getModelsForProxy("MiniMax-Text-01");
|
||||
const providers = result.map((m) => m.provider);
|
||||
assert.equal(
|
||||
providers[0],
|
||||
"minimax",
|
||||
"minimax (international) before minimax-cn",
|
||||
);
|
||||
assert.deepEqual(providers, ["minimax", "opencode-go", "ollama-cloud"]);
|
||||
});
|
||||
|
||||
it("Gemini family: google-gemini-cli only for bare model routing", () => {
|
||||
|
|
@ -468,10 +464,10 @@ describe("ModelRegistry.getModelsForProxy — family priority ordering", () => {
|
|||
|
||||
describe("ModelRegistry.getModelsForProxy — auth-ready candidates first", () => {
|
||||
it("provider with auth precedes same-priority provider without auth", () => {
|
||||
// zai has auth (hasAuth → true), opencode does not
|
||||
// zai has auth (hasAuth → true), opencode-go does not
|
||||
const registry = createRegistry((p) => p === "zai");
|
||||
registerApiKey(registry, "zai", "glm-4-air");
|
||||
registerApiKey(registry, "opencode", "glm-4-air");
|
||||
registerApiKey(registry, "opencode-go", "glm-4-air");
|
||||
const result = registry.getModelsForProxy("glm-4-air");
|
||||
const providers = result.map((m) => m.provider);
|
||||
// zai is already first by family priority AND by auth — stays first
|
||||
|
|
@ -479,16 +475,16 @@ describe("ModelRegistry.getModelsForProxy — auth-ready candidates first", () =
|
|||
});
|
||||
|
||||
it("lower-priority provider with auth beats higher-priority one without auth", () => {
|
||||
// opencode has auth, zai does not
|
||||
const registry = createRegistry((p) => p === "opencode");
|
||||
// opencode-go has auth, zai does not
|
||||
const registry = createRegistry((p) => p === "opencode-go");
|
||||
registerApiKey(registry, "zai", "glm-4-air");
|
||||
registerApiKey(registry, "opencode", "glm-4-air");
|
||||
registerApiKey(registry, "opencode-go", "glm-4-air");
|
||||
const result = registry.getModelsForProxy("glm-4-air");
|
||||
// opencode has auth so moves to withAuth bucket (before zai which has none)
|
||||
// opencode-go has auth so moves to withAuth bucket (before zai which has none)
|
||||
const providers = result.map((m) => m.provider);
|
||||
assert.equal(
|
||||
providers[0],
|
||||
"opencode",
|
||||
"opencode-go",
|
||||
"auth-ready provider surfaces first regardless of family order",
|
||||
);
|
||||
});
|
||||
|
|
@ -496,7 +492,7 @@ describe("ModelRegistry.getModelsForProxy — auth-ready candidates first", () =
|
|||
it("none-auth providers are always request-ready and not demoted", () => {
|
||||
const registry = createRegistry(() => false);
|
||||
registerNone(registry, "zai", "glm-4-air");
|
||||
registerNone(registry, "opencode", "glm-4-air");
|
||||
registerNone(registry, "opencode-go", "glm-4-air");
|
||||
const result = registry.getModelsForProxy("glm-4-air");
|
||||
// Both none-auth — family order preserved
|
||||
assert.equal(result[0].provider, "zai");
|
||||
|
|
@ -580,6 +576,7 @@ describe("ModelRegistry provider_model_allow filter", () => {
|
|||
const registry = createRegistry();
|
||||
registerNone(registry, "minimax", "MiniMax-M2");
|
||||
registerNone(registry, "minimax-cn", "MiniMax-M2");
|
||||
registerNone(registry, "opencode-go", "MiniMax-M2");
|
||||
|
||||
const result = registry.getModelsForProxy(
|
||||
"MiniMax-M2",
|
||||
|
|
@ -591,7 +588,7 @@ describe("ModelRegistry provider_model_allow filter", () => {
|
|||
|
||||
assert.deepEqual(
|
||||
result.map((m) => `${m.provider}/${m.id}`),
|
||||
["minimax-cn/MiniMax-M2"],
|
||||
["opencode-go/MiniMax-M2"],
|
||||
);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -78,12 +78,26 @@ export const PROXY_FAMILY_PRIORITY: ReadonlyArray<{
|
|||
{
|
||||
match: /^MiniMax-/i,
|
||||
prefix: "MiniMax-",
|
||||
providers: ["minimax", "minimax-cn"],
|
||||
providers: ["minimax"],
|
||||
family_failover: ["opencode-go", "ollama-cloud"],
|
||||
global_fallback: false,
|
||||
},
|
||||
// ZAI direct API for GLM
|
||||
{ match: /^glm-/i, prefix: "glm-", providers: ["zai"] },
|
||||
{
|
||||
match: /^glm-|^z-ai\/glm-/i,
|
||||
prefix: "glm-",
|
||||
providers: ["zai"],
|
||||
family_failover: ["opencode-go", "ollama-cloud"],
|
||||
global_fallback: false,
|
||||
},
|
||||
// Kimi Code direct API
|
||||
{ match: /^kimi-/i, prefix: "kimi-", providers: ["kimi-coding"] },
|
||||
{
|
||||
match: /^kimi-|^moonshotai\/kimi-/i,
|
||||
prefix: "kimi-",
|
||||
providers: ["kimi-coding"],
|
||||
family_failover: ["ollama-cloud", "opencode-go"],
|
||||
global_fallback: false,
|
||||
},
|
||||
// MiMo/Xiaomi — direct API via Xiaomi MiMo Open Platform (api.xiaomimimo.com)
|
||||
// or the Token Plan endpoint (token-plan-sgp.xiaomimimo.com). Both served
|
||||
// under the `xiaomi` provider namespace.
|
||||
|
|
@ -255,6 +269,7 @@ const HIDDEN_MODEL_PROVIDERS = new Set([
|
|||
"google-vertex",
|
||||
"groq",
|
||||
"github-copilot",
|
||||
"minimax-cn",
|
||||
"xai",
|
||||
"xiaomi-token-plan-ams",
|
||||
"xiaomi-token-plan-cn",
|
||||
|
|
@ -1166,9 +1181,8 @@ export class ModelRegistry {
|
|||
r.match.test(modelId),
|
||||
);
|
||||
// Order: direct family providers → family-scoped failover → global fallback.
|
||||
// Overrides replace only the direct list (keeps family_failover + global
|
||||
// chain intact) so a user pinning "glm- → [zai]" still picks up
|
||||
// opencode-go / openrouter / ollama-cloud as last resort.
|
||||
// Overrides replace only the direct list while preserving the family's
|
||||
// explicit failover/containment policy.
|
||||
const familyProviders = overrideEntry?.[1] ?? familyEntry?.providers ?? [];
|
||||
const familyFailover = familyEntry?.family_failover ?? [];
|
||||
const seen = new Set([...familyProviders, ...familyFailover]);
|
||||
|
|
|
|||
|
|
@ -108,12 +108,15 @@ function restoreToolBaseline(pi) {
|
|||
}
|
||||
}
|
||||
const BARE_MODEL_FAMILY_PRIORITY = [
|
||||
{ match: /^glm-/i, providers: ["zai", "opencode", "opencode-go"] },
|
||||
{ match: /^glm-/i, providers: ["zai", "opencode-go", "ollama-cloud"] },
|
||||
{
|
||||
match: /^kimi-/i,
|
||||
providers: ["kimi-coding", "ollama-cloud", "opencode", "opencode-go"],
|
||||
providers: ["kimi-coding", "ollama-cloud", "opencode-go"],
|
||||
},
|
||||
{
|
||||
match: /^MiniMax-|^minimax-/i,
|
||||
providers: ["minimax", "opencode-go", "ollama-cloud"],
|
||||
},
|
||||
{ match: /^MiniMax-|^minimax-/i, providers: ["minimax", "minimax-cn"] },
|
||||
{
|
||||
match: /^mimo-|^xiaomi-/i,
|
||||
providers: ["xiaomi", "opencode-go"],
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
// SF — Persistent per-project blocklist of provider/model pairs that the
|
||||
// provider has rejected at request time for account entitlement reasons.
|
||||
// provider has rejected at request time for account entitlement or temporary
|
||||
// capacity reasons.
|
||||
//
|
||||
// Lives at `.sf/runtime/blocked-models.json` so the block survives /sf autonomous
|
||||
// restarts. Auto-mode model selection skips blocked entries; agent-end
|
||||
|
|
@ -16,6 +17,14 @@ function blockedModelsPath(basePath) {
|
|||
function modelKey(provider, id) {
|
||||
return `${provider.toLowerCase()}/${id.toLowerCase()}`;
|
||||
}
|
||||
function activeBlockedEntries(entries, now = Date.now()) {
|
||||
return entries.filter(
|
||||
(e) =>
|
||||
typeof e.expiresAt !== "number" ||
|
||||
!Number.isFinite(e.expiresAt) ||
|
||||
e.expiresAt > now,
|
||||
);
|
||||
}
|
||||
function readFileSafe(path) {
|
||||
if (!existsSync(path)) return { version: 1, blocked: [] };
|
||||
try {
|
||||
|
|
@ -27,7 +36,7 @@ function readFileSafe(path) {
|
|||
const blocked = parsed.blocked.filter(
|
||||
(e) => !!e && typeof e.provider === "string" && typeof e.id === "string",
|
||||
);
|
||||
return { version: 1, blocked };
|
||||
return { version: 1, blocked: activeBlockedEntries(blocked) };
|
||||
} catch {
|
||||
// Corrupted JSON: treat as empty so a bad file never blocks dispatch.
|
||||
return { version: 1, blocked: [] };
|
||||
|
|
@ -46,7 +55,7 @@ export function isModelBlocked(basePath, provider, id) {
|
|||
/**
|
||||
* Add a provider/model pair to the persistent blocklist (e.g., after account entitlement rejection).
|
||||
*/
|
||||
export function blockModel(basePath, provider, id, reason) {
|
||||
export function blockModel(basePath, provider, id, reason, options = {}) {
|
||||
const path = blockedModelsPath(basePath);
|
||||
mkdirSync(dirname(path), { recursive: true });
|
||||
// Ensure the file exists before we try to lock it — proper-lockfile requires
|
||||
|
|
@ -62,14 +71,30 @@ export function blockModel(basePath, provider, id, reason) {
|
|||
withFileLockSync(path, () => {
|
||||
const current = readFileSafe(path);
|
||||
const target = modelKey(provider, id);
|
||||
if (current.blocked.some((e) => modelKey(e.provider, e.id) === target)) {
|
||||
const existing = current.blocked.find(
|
||||
(e) => modelKey(e.provider, e.id) === target,
|
||||
);
|
||||
if (existing) {
|
||||
if (
|
||||
typeof options.expiresAt === "number" &&
|
||||
(!existing.expiresAt || options.expiresAt > existing.expiresAt)
|
||||
) {
|
||||
existing.expiresAt = options.expiresAt;
|
||||
existing.reason = reason;
|
||||
writeFileSync(path, JSON.stringify(current, null, 2) + "\n", "utf-8");
|
||||
}
|
||||
return;
|
||||
}
|
||||
const expiresAt =
|
||||
typeof options.expiresAt === "number" &&
|
||||
Number.isFinite(options.expiresAt)
|
||||
? options.expiresAt
|
||||
: undefined;
|
||||
const next = {
|
||||
version: 1,
|
||||
blocked: [
|
||||
...current.blocked,
|
||||
{ provider, id, reason, blockedAt: Date.now() },
|
||||
{ provider, id, reason, blockedAt: Date.now(), expiresAt },
|
||||
],
|
||||
};
|
||||
writeFileSync(path, JSON.stringify(next, null, 2) + "\n", "utf-8");
|
||||
|
|
|
|||
|
|
@ -25,6 +25,19 @@ import { logWarning } from "../workflow-logger.js";
|
|||
import { clearDiscussionFlowState } from "./write-gate.js";
|
||||
|
||||
const retryState = createRetryState();
|
||||
const GEMINI_CAPACITY_COOLDOWN_MS = 2 * 60_000;
|
||||
const GEMINI_CAPACITY_MAX_COOLDOWN_MS = 30 * 60_000;
|
||||
|
||||
function temporaryRouteBlockMs(provider, cls) {
|
||||
if (provider !== "google-gemini-cli") return undefined;
|
||||
if (cls.kind !== "rate-limit" && cls.kind !== "server") return undefined;
|
||||
const retryAfterMs =
|
||||
"retryAfterMs" in cls && typeof cls.retryAfterMs === "number"
|
||||
? cls.retryAfterMs
|
||||
: undefined;
|
||||
const base = Math.max(retryAfterMs ?? 0, GEMINI_CAPACITY_COOLDOWN_MS);
|
||||
return Math.min(base, GEMINI_CAPACITY_MAX_COOLDOWN_MS);
|
||||
}
|
||||
/**
|
||||
* Reset the module-level retry state so a resumed auto-session starts fresh.
|
||||
* Called by provider-error-resume.ts before startAuto() so legacy paused
|
||||
|
|
@ -245,6 +258,30 @@ export async function handleAgentEnd(pi, event, ctx) {
|
|||
// ── 2. Decide & Act ──────────────────────────────────────────────────
|
||||
// --- Route failures: try configured fallback first, then any available route ---
|
||||
if (isModelRouteFailure(cls) && dash.currentUnit) {
|
||||
const blockMs = temporaryRouteBlockMs(currentRoute?.provider, cls);
|
||||
if (
|
||||
blockMs &&
|
||||
dash.basePath &&
|
||||
currentRoute?.provider &&
|
||||
currentRoute?.id
|
||||
) {
|
||||
try {
|
||||
blockModel(
|
||||
dash.basePath,
|
||||
currentRoute.provider,
|
||||
currentRoute.id,
|
||||
rawErrorMsg || cls.kind,
|
||||
{ expiresAt: Date.now() + blockMs },
|
||||
);
|
||||
ctx.ui.notify(
|
||||
`Cooling down ${currentRoute.provider}/${currentRoute.id} for ${Math.ceil(blockMs / 1000)}s after provider capacity rejection.`,
|
||||
"warning",
|
||||
);
|
||||
} catch (err) {
|
||||
const m = err instanceof Error ? err.message : String(err);
|
||||
logWarning("bootstrap", `Failed to persist model cooldown: ${m}`);
|
||||
}
|
||||
}
|
||||
const switched = await trySwitchToFallbackModel({
|
||||
pi,
|
||||
ctx,
|
||||
|
|
|
|||
52
src/resources/extensions/sf/tests/blocked-models.test.mjs
Normal file
52
src/resources/extensions/sf/tests/blocked-models.test.mjs
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { mkdtempSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, describe, test } from "vitest";
|
||||
|
||||
import {
|
||||
blockModel,
|
||||
isModelBlocked,
|
||||
loadBlockedModels,
|
||||
} from "../blocked-models.js";
|
||||
|
||||
let tmp;
|
||||
|
||||
afterEach(() => {
|
||||
if (tmp) rmSync(tmp, { recursive: true, force: true });
|
||||
tmp = undefined;
|
||||
});
|
||||
|
||||
function tempProject() {
|
||||
tmp = mkdtempSync(join(tmpdir(), "sf-blocked-models-"));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
describe("blocked models", () => {
|
||||
test("isModelBlocked_when_temporary_block_expired_returns_false", () => {
|
||||
const basePath = tempProject();
|
||||
|
||||
blockModel(basePath, "google-gemini-cli", "gemini-2.5-pro", "capacity", {
|
||||
expiresAt: Date.now() - 1_000,
|
||||
});
|
||||
|
||||
assert.equal(
|
||||
isModelBlocked(basePath, "google-gemini-cli", "gemini-2.5-pro"),
|
||||
false,
|
||||
);
|
||||
assert.deepEqual(loadBlockedModels(basePath), []);
|
||||
});
|
||||
|
||||
test("isModelBlocked_when_temporary_block_active_returns_true", () => {
|
||||
const basePath = tempProject();
|
||||
|
||||
blockModel(basePath, "google-gemini-cli", "gemini-2.5-pro", "capacity", {
|
||||
expiresAt: Date.now() + 60_000,
|
||||
});
|
||||
|
||||
assert.equal(
|
||||
isModelBlocked(basePath, "google-gemini-cli", "gemini-2.5-pro"),
|
||||
true,
|
||||
);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue