From 66e8265320b0219eb202bf9c2587fc8974239a64 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Tue, 5 May 2026 17:37:01 +0200 Subject: [PATCH] fix: align provider route selection --- package.json | 2 +- .../pi-ai/src/providers/google-gemini-cli.ts | 22 ++++---- .../core/model-registry-proxy-routing.test.ts | 53 +++++++++---------- .../src/core/model-registry.ts | 26 ++++++--- .../extensions/sf/auto-model-selection.js | 9 ++-- src/resources/extensions/sf/blocked-models.js | 35 ++++++++++-- .../sf/bootstrap/agent-end-recovery.js | 37 +++++++++++++ .../sf/tests/blocked-models.test.mjs | 52 ++++++++++++++++++ 8 files changed, 183 insertions(+), 53 deletions(-) create mode 100644 src/resources/extensions/sf/tests/blocked-models.test.mjs diff --git a/package.json b/package.json index b226c8f18..de38c0e99 100644 --- a/package.json +++ b/package.json @@ -115,7 +115,7 @@ "@anthropic-ai/vertex-sdk": "^0.14.4", "@aws-sdk/client-bedrock-runtime": "^3.983.0", "@clack/prompts": "^1.1.0", - "@google/gemini-cli-core": "^0.40.1", + "@google/gemini-cli-core": "0.40.1", "@google/genai": "^1.40.0", "@mariozechner/jiti": "^2.6.2", "@mistralai/mistralai": "^2.2.1", diff --git a/packages/pi-ai/src/providers/google-gemini-cli.ts b/packages/pi-ai/src/providers/google-gemini-cli.ts index dfd010f7b..12c2866f1 100644 --- a/packages/pi-ai/src/providers/google-gemini-cli.ts +++ b/packages/pi-ai/src/providers/google-gemini-cli.ts @@ -11,12 +11,11 @@ import { AuthType, - CodeAssistServer, - getOauthClient, makeFakeConfig, retryWithBackoff, - setupUser, } from "@google/gemini-cli-core"; +import { createCodeAssistContentGenerator } from "@google/gemini-cli-core/dist/src/code_assist/codeAssist.js"; +import type { ContentGenerator } from "@google/gemini-cli-core/dist/src/core/contentGenerator.js"; import type { Content, GenerateContentParameters, @@ -99,23 +98,26 @@ export interface GoogleGeminiCliOptions extends StreamOptions { let toolCallCounter = 0; /** - * Build a CodeAssistServer using cli-core's own auth + project discovery. + * Build a Code Assist content generator using cli-core's own auth + project discovery. * * - getOauthClient() reads ~/.gemini/oauth_creds.json when present, refreshes if - * expired, and returns an authenticated AuthClient. cli-core owns any - * interactive login flow it needs. + * expired. cli-core owns any interactive login flow it needs. * - setupUser() asks the Code Assist API for the project + tier tied to this * identity (free-tier auto-provisioned if needed; otherwise whatever the * user has been onboarded to server-side). + * - createCodeAssistContentGenerator() passes the returned tier and paid-tier + * data into CodeAssistServer, matching the official Gemini CLI path. * * Both calls memoize internally inside cli-core — repeat invocations are * cheap. */ -async function getCodeAssistServer(): Promise { +async function getCodeAssistServer(): Promise { const config = makeFakeConfig(); - const authClient = await getOauthClient(AuthType.LOGIN_WITH_GOOGLE, config); - const userData = await setupUser(authClient, config); - return new CodeAssistServer(authClient, userData.projectId, { headers: {} }); + return createCodeAssistContentGenerator( + { headers: {} }, + AuthType.LOGIN_WITH_GOOGLE, + config, + ); } function parseDurationMs(value: string): number | undefined { diff --git a/packages/pi-coding-agent/src/core/model-registry-proxy-routing.test.ts b/packages/pi-coding-agent/src/core/model-registry-proxy-routing.test.ts index b484747cb..13b9c8b75 100644 --- a/packages/pi-coding-agent/src/core/model-registry-proxy-routing.test.ts +++ b/packages/pi-coding-agent/src/core/model-registry-proxy-routing.test.ts @@ -109,7 +109,7 @@ describe("ModelRegistry.getModelsForProxy — basic", () => { it("returns all candidates when multiple providers share the model id", () => { const registry = createRegistry(); registerNone(registry, "zai", "glm-4-air"); - registerNone(registry, "opencode", "glm-4-air"); + registerNone(registry, "opencode-go", "glm-4-air"); const result = registry.getModelsForProxy("glm-4-air"); assert.equal(result.length, 2); }); @@ -402,45 +402,41 @@ describe("ModelRegistry.getModelsForProxy — basic", () => { // ── getModelsForProxy — family priority ordering ────────────────────────────── describe("ModelRegistry.getModelsForProxy — family priority ordering", () => { - it("GLM family: zai before opencode before opencode-go", () => { + it("GLM family: zai before subscribed/free relays, never OpenRouter", () => { const registry = createRegistry(); // Register in reverse priority order to confirm sorting + registerNone(registry, "openrouter", "glm-4-air"); + registerNone(registry, "ollama-cloud", "glm-4-air"); registerNone(registry, "opencode-go", "glm-4-air"); registerNone(registry, "opencode", "glm-4-air"); registerNone(registry, "zai", "glm-4-air"); const result = registry.getModelsForProxy("glm-4-air"); const providers = result.map((m) => m.provider); - assert.equal(providers[0], "zai", "zai must be first for GLM"); - assert.ok( - providers.indexOf("opencode") < providers.indexOf("opencode-go"), - "opencode before opencode-go", - ); + assert.deepEqual(providers, ["zai", "opencode-go", "ollama-cloud"]); }); - it("Kimi family: kimi-coding before opencode", () => { + it("Kimi family: kimi-coding before subscribed/free relays, never OpenRouter", () => { const registry = createRegistry(); + registerNone(registry, "openrouter", "kimi-k2"); registerNone(registry, "opencode", "kimi-k2"); + registerNone(registry, "opencode-go", "kimi-k2"); + registerNone(registry, "ollama-cloud", "kimi-k2"); registerNone(registry, "kimi-coding", "kimi-k2"); const result = registry.getModelsForProxy("kimi-k2"); const providers = result.map((m) => m.provider); - assert.equal( - providers[0], - "kimi-coding", - "kimi-coding must lead for kimi- models", - ); + assert.deepEqual(providers, ["kimi-coding", "ollama-cloud", "opencode-go"]); }); - it("MiniMax family: minimax before minimax-cn", () => { + it("MiniMax family: direct providers before subscribed/free relays, never OpenRouter", () => { const registry = createRegistry(); + registerNone(registry, "openrouter", "MiniMax-Text-01"); + registerNone(registry, "ollama-cloud", "MiniMax-Text-01"); + registerNone(registry, "opencode-go", "MiniMax-Text-01"); registerNone(registry, "minimax-cn", "MiniMax-Text-01"); registerNone(registry, "minimax", "MiniMax-Text-01"); const result = registry.getModelsForProxy("MiniMax-Text-01"); const providers = result.map((m) => m.provider); - assert.equal( - providers[0], - "minimax", - "minimax (international) before minimax-cn", - ); + assert.deepEqual(providers, ["minimax", "opencode-go", "ollama-cloud"]); }); it("Gemini family: google-gemini-cli only for bare model routing", () => { @@ -468,10 +464,10 @@ describe("ModelRegistry.getModelsForProxy — family priority ordering", () => { describe("ModelRegistry.getModelsForProxy — auth-ready candidates first", () => { it("provider with auth precedes same-priority provider without auth", () => { - // zai has auth (hasAuth → true), opencode does not + // zai has auth (hasAuth → true), opencode-go does not const registry = createRegistry((p) => p === "zai"); registerApiKey(registry, "zai", "glm-4-air"); - registerApiKey(registry, "opencode", "glm-4-air"); + registerApiKey(registry, "opencode-go", "glm-4-air"); const result = registry.getModelsForProxy("glm-4-air"); const providers = result.map((m) => m.provider); // zai is already first by family priority AND by auth — stays first @@ -479,16 +475,16 @@ describe("ModelRegistry.getModelsForProxy — auth-ready candidates first", () = }); it("lower-priority provider with auth beats higher-priority one without auth", () => { - // opencode has auth, zai does not - const registry = createRegistry((p) => p === "opencode"); + // opencode-go has auth, zai does not + const registry = createRegistry((p) => p === "opencode-go"); registerApiKey(registry, "zai", "glm-4-air"); - registerApiKey(registry, "opencode", "glm-4-air"); + registerApiKey(registry, "opencode-go", "glm-4-air"); const result = registry.getModelsForProxy("glm-4-air"); - // opencode has auth so moves to withAuth bucket (before zai which has none) + // opencode-go has auth so moves to withAuth bucket (before zai which has none) const providers = result.map((m) => m.provider); assert.equal( providers[0], - "opencode", + "opencode-go", "auth-ready provider surfaces first regardless of family order", ); }); @@ -496,7 +492,7 @@ describe("ModelRegistry.getModelsForProxy — auth-ready candidates first", () = it("none-auth providers are always request-ready and not demoted", () => { const registry = createRegistry(() => false); registerNone(registry, "zai", "glm-4-air"); - registerNone(registry, "opencode", "glm-4-air"); + registerNone(registry, "opencode-go", "glm-4-air"); const result = registry.getModelsForProxy("glm-4-air"); // Both none-auth — family order preserved assert.equal(result[0].provider, "zai"); @@ -580,6 +576,7 @@ describe("ModelRegistry provider_model_allow filter", () => { const registry = createRegistry(); registerNone(registry, "minimax", "MiniMax-M2"); registerNone(registry, "minimax-cn", "MiniMax-M2"); + registerNone(registry, "opencode-go", "MiniMax-M2"); const result = registry.getModelsForProxy( "MiniMax-M2", @@ -591,7 +588,7 @@ describe("ModelRegistry provider_model_allow filter", () => { assert.deepEqual( result.map((m) => `${m.provider}/${m.id}`), - ["minimax-cn/MiniMax-M2"], + ["opencode-go/MiniMax-M2"], ); }); diff --git a/packages/pi-coding-agent/src/core/model-registry.ts b/packages/pi-coding-agent/src/core/model-registry.ts index 285cc89e8..525cb7437 100644 --- a/packages/pi-coding-agent/src/core/model-registry.ts +++ b/packages/pi-coding-agent/src/core/model-registry.ts @@ -78,12 +78,26 @@ export const PROXY_FAMILY_PRIORITY: ReadonlyArray<{ { match: /^MiniMax-/i, prefix: "MiniMax-", - providers: ["minimax", "minimax-cn"], + providers: ["minimax"], + family_failover: ["opencode-go", "ollama-cloud"], + global_fallback: false, }, // ZAI direct API for GLM - { match: /^glm-/i, prefix: "glm-", providers: ["zai"] }, + { + match: /^glm-|^z-ai\/glm-/i, + prefix: "glm-", + providers: ["zai"], + family_failover: ["opencode-go", "ollama-cloud"], + global_fallback: false, + }, // Kimi Code direct API - { match: /^kimi-/i, prefix: "kimi-", providers: ["kimi-coding"] }, + { + match: /^kimi-|^moonshotai\/kimi-/i, + prefix: "kimi-", + providers: ["kimi-coding"], + family_failover: ["ollama-cloud", "opencode-go"], + global_fallback: false, + }, // MiMo/Xiaomi — direct API via Xiaomi MiMo Open Platform (api.xiaomimimo.com) // or the Token Plan endpoint (token-plan-sgp.xiaomimimo.com). Both served // under the `xiaomi` provider namespace. @@ -255,6 +269,7 @@ const HIDDEN_MODEL_PROVIDERS = new Set([ "google-vertex", "groq", "github-copilot", + "minimax-cn", "xai", "xiaomi-token-plan-ams", "xiaomi-token-plan-cn", @@ -1166,9 +1181,8 @@ export class ModelRegistry { r.match.test(modelId), ); // Order: direct family providers → family-scoped failover → global fallback. - // Overrides replace only the direct list (keeps family_failover + global - // chain intact) so a user pinning "glm- → [zai]" still picks up - // opencode-go / openrouter / ollama-cloud as last resort. + // Overrides replace only the direct list while preserving the family's + // explicit failover/containment policy. const familyProviders = overrideEntry?.[1] ?? familyEntry?.providers ?? []; const familyFailover = familyEntry?.family_failover ?? []; const seen = new Set([...familyProviders, ...familyFailover]); diff --git a/src/resources/extensions/sf/auto-model-selection.js b/src/resources/extensions/sf/auto-model-selection.js index 5f601495b..fa5959eea 100644 --- a/src/resources/extensions/sf/auto-model-selection.js +++ b/src/resources/extensions/sf/auto-model-selection.js @@ -108,12 +108,15 @@ function restoreToolBaseline(pi) { } } const BARE_MODEL_FAMILY_PRIORITY = [ - { match: /^glm-/i, providers: ["zai", "opencode", "opencode-go"] }, + { match: /^glm-/i, providers: ["zai", "opencode-go", "ollama-cloud"] }, { match: /^kimi-/i, - providers: ["kimi-coding", "ollama-cloud", "opencode", "opencode-go"], + providers: ["kimi-coding", "ollama-cloud", "opencode-go"], + }, + { + match: /^MiniMax-|^minimax-/i, + providers: ["minimax", "opencode-go", "ollama-cloud"], }, - { match: /^MiniMax-|^minimax-/i, providers: ["minimax", "minimax-cn"] }, { match: /^mimo-|^xiaomi-/i, providers: ["xiaomi", "opencode-go"], diff --git a/src/resources/extensions/sf/blocked-models.js b/src/resources/extensions/sf/blocked-models.js index f1ef2ea34..51ff7988c 100644 --- a/src/resources/extensions/sf/blocked-models.js +++ b/src/resources/extensions/sf/blocked-models.js @@ -1,5 +1,6 @@ // SF — Persistent per-project blocklist of provider/model pairs that the -// provider has rejected at request time for account entitlement reasons. +// provider has rejected at request time for account entitlement or temporary +// capacity reasons. // // Lives at `.sf/runtime/blocked-models.json` so the block survives /sf autonomous // restarts. Auto-mode model selection skips blocked entries; agent-end @@ -16,6 +17,14 @@ function blockedModelsPath(basePath) { function modelKey(provider, id) { return `${provider.toLowerCase()}/${id.toLowerCase()}`; } +function activeBlockedEntries(entries, now = Date.now()) { + return entries.filter( + (e) => + typeof e.expiresAt !== "number" || + !Number.isFinite(e.expiresAt) || + e.expiresAt > now, + ); +} function readFileSafe(path) { if (!existsSync(path)) return { version: 1, blocked: [] }; try { @@ -27,7 +36,7 @@ function readFileSafe(path) { const blocked = parsed.blocked.filter( (e) => !!e && typeof e.provider === "string" && typeof e.id === "string", ); - return { version: 1, blocked }; + return { version: 1, blocked: activeBlockedEntries(blocked) }; } catch { // Corrupted JSON: treat as empty so a bad file never blocks dispatch. return { version: 1, blocked: [] }; @@ -46,7 +55,7 @@ export function isModelBlocked(basePath, provider, id) { /** * Add a provider/model pair to the persistent blocklist (e.g., after account entitlement rejection). */ -export function blockModel(basePath, provider, id, reason) { +export function blockModel(basePath, provider, id, reason, options = {}) { const path = blockedModelsPath(basePath); mkdirSync(dirname(path), { recursive: true }); // Ensure the file exists before we try to lock it — proper-lockfile requires @@ -62,14 +71,30 @@ export function blockModel(basePath, provider, id, reason) { withFileLockSync(path, () => { const current = readFileSafe(path); const target = modelKey(provider, id); - if (current.blocked.some((e) => modelKey(e.provider, e.id) === target)) { + const existing = current.blocked.find( + (e) => modelKey(e.provider, e.id) === target, + ); + if (existing) { + if ( + typeof options.expiresAt === "number" && + (!existing.expiresAt || options.expiresAt > existing.expiresAt) + ) { + existing.expiresAt = options.expiresAt; + existing.reason = reason; + writeFileSync(path, JSON.stringify(current, null, 2) + "\n", "utf-8"); + } return; } + const expiresAt = + typeof options.expiresAt === "number" && + Number.isFinite(options.expiresAt) + ? options.expiresAt + : undefined; const next = { version: 1, blocked: [ ...current.blocked, - { provider, id, reason, blockedAt: Date.now() }, + { provider, id, reason, blockedAt: Date.now(), expiresAt }, ], }; writeFileSync(path, JSON.stringify(next, null, 2) + "\n", "utf-8"); diff --git a/src/resources/extensions/sf/bootstrap/agent-end-recovery.js b/src/resources/extensions/sf/bootstrap/agent-end-recovery.js index ab79616b5..a893d601f 100644 --- a/src/resources/extensions/sf/bootstrap/agent-end-recovery.js +++ b/src/resources/extensions/sf/bootstrap/agent-end-recovery.js @@ -25,6 +25,19 @@ import { logWarning } from "../workflow-logger.js"; import { clearDiscussionFlowState } from "./write-gate.js"; const retryState = createRetryState(); +const GEMINI_CAPACITY_COOLDOWN_MS = 2 * 60_000; +const GEMINI_CAPACITY_MAX_COOLDOWN_MS = 30 * 60_000; + +function temporaryRouteBlockMs(provider, cls) { + if (provider !== "google-gemini-cli") return undefined; + if (cls.kind !== "rate-limit" && cls.kind !== "server") return undefined; + const retryAfterMs = + "retryAfterMs" in cls && typeof cls.retryAfterMs === "number" + ? cls.retryAfterMs + : undefined; + const base = Math.max(retryAfterMs ?? 0, GEMINI_CAPACITY_COOLDOWN_MS); + return Math.min(base, GEMINI_CAPACITY_MAX_COOLDOWN_MS); +} /** * Reset the module-level retry state so a resumed auto-session starts fresh. * Called by provider-error-resume.ts before startAuto() so legacy paused @@ -245,6 +258,30 @@ export async function handleAgentEnd(pi, event, ctx) { // ── 2. Decide & Act ────────────────────────────────────────────────── // --- Route failures: try configured fallback first, then any available route --- if (isModelRouteFailure(cls) && dash.currentUnit) { + const blockMs = temporaryRouteBlockMs(currentRoute?.provider, cls); + if ( + blockMs && + dash.basePath && + currentRoute?.provider && + currentRoute?.id + ) { + try { + blockModel( + dash.basePath, + currentRoute.provider, + currentRoute.id, + rawErrorMsg || cls.kind, + { expiresAt: Date.now() + blockMs }, + ); + ctx.ui.notify( + `Cooling down ${currentRoute.provider}/${currentRoute.id} for ${Math.ceil(blockMs / 1000)}s after provider capacity rejection.`, + "warning", + ); + } catch (err) { + const m = err instanceof Error ? err.message : String(err); + logWarning("bootstrap", `Failed to persist model cooldown: ${m}`); + } + } const switched = await trySwitchToFallbackModel({ pi, ctx, diff --git a/src/resources/extensions/sf/tests/blocked-models.test.mjs b/src/resources/extensions/sf/tests/blocked-models.test.mjs new file mode 100644 index 000000000..3db68df89 --- /dev/null +++ b/src/resources/extensions/sf/tests/blocked-models.test.mjs @@ -0,0 +1,52 @@ +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, describe, test } from "vitest"; + +import { + blockModel, + isModelBlocked, + loadBlockedModels, +} from "../blocked-models.js"; + +let tmp; + +afterEach(() => { + if (tmp) rmSync(tmp, { recursive: true, force: true }); + tmp = undefined; +}); + +function tempProject() { + tmp = mkdtempSync(join(tmpdir(), "sf-blocked-models-")); + return tmp; +} + +describe("blocked models", () => { + test("isModelBlocked_when_temporary_block_expired_returns_false", () => { + const basePath = tempProject(); + + blockModel(basePath, "google-gemini-cli", "gemini-2.5-pro", "capacity", { + expiresAt: Date.now() - 1_000, + }); + + assert.equal( + isModelBlocked(basePath, "google-gemini-cli", "gemini-2.5-pro"), + false, + ); + assert.deepEqual(loadBlockedModels(basePath), []); + }); + + test("isModelBlocked_when_temporary_block_active_returns_true", () => { + const basePath = tempProject(); + + blockModel(basePath, "google-gemini-cli", "gemini-2.5-pro", "capacity", { + expiresAt: Date.now() + 60_000, + }); + + assert.equal( + isModelBlocked(basePath, "google-gemini-cli", "gemini-2.5-pro"), + true, + ); + }); +});