From c6a3fa6a6adcda3dd96a6f5a78ef123feeb7283a Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Thu, 14 May 2026 03:32:35 +0200 Subject: [PATCH] feat(gemini-cli): discover account models via gemini-cli-core + retry on capacity errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related fixes for the google-gemini-cli provider, both motivated by today's dogfood diagnosis: SF was pinned to a single model (gemini-3-flash-preview) even though the AI Ultra account has access to seven (verified via the live gemini-cli-core probe), and a transient "No capacity available for model X on the server" was classified as `unknown` so SF gave up instead of retrying. 1. Account snapshot + model discovery in @singularity-forge/google-gemini-cli-provider - Add `snapshotGeminiCliAccount(cwd?)` returning { projectId, userTierId, userTierName, paidTier, models } where `models[]` carries each modelId with usedFraction, remainingFraction, and resetTime. Built on the same setupUser + CodeAssistServer.retrieveUserQuota path usage-bar.js already uses, but extracted to the dedicated package so any consumer (model picker, capacity diagnostics, catalog cache) can call one helper. - Add `discoverGeminiCliModels(cwd?)` as a thin "just the IDs" wrapper. - Both are best-effort: any failure (OAuth expired, no project, network) returns null silently — never throws. 2. SF-side cache writer at src/resources/extensions/sf/gemini-catalog.js - Delegates discovery to the package; only handles cache file path, 6-hour TTL, and the session_start lifecycle hook. - Cache lands at .sf/runtime/model-catalog/google-gemini-cli.json with the same shape as the generic model-catalog-cache, so getKnownModelIds and the model picker pick it up transparently. - Wired into bootstrap/register-hooks.js session_start in parallel with the existing scheduleModelCatalogRefresh (the generic REST + API-key path can't reach gemini-cli's OAuth-only Code Assist endpoint). 3. Capacity error classification fix - error-classifier.js SERVER_RE now matches "no capacity (available|left)", "capacity (unavailable|exhausted)", and "no capacity ... on the server". Previously these fell through to kind=unknown, which is not transient, so agent-end-recovery never retried — even though the same handler already caps gemini-cli rate-limit backoff at 30s for exactly this class of transient. With the pattern matched as `server`, the existing retry-with-backoff path covers it. The full extension test suite (1386 tests) passes. Typecheck clean for both the package and the SF extensions. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../google-gemini-cli-provider/src/index.ts | 107 +++++++++++++++++- .../extensions/sf/bootstrap/register-hooks.js | 13 +++ .../extensions/sf/error-classifier.js | 2 +- src/resources/extensions/sf/gemini-catalog.js | 96 ++++++++++++++++ 4 files changed, 215 insertions(+), 3 deletions(-) create mode 100644 src/resources/extensions/sf/gemini-catalog.js diff --git a/packages/google-gemini-cli-provider/src/index.ts b/packages/google-gemini-cli-provider/src/index.ts index 4630472c6..8f72f84d2 100644 --- a/packages/google-gemini-cli-provider/src/index.ts +++ b/packages/google-gemini-cli-provider/src/index.ts @@ -5,9 +5,16 @@ * dedicated workspace package so provider code can depend on one small helper * instead of embedding the upstream integration inline. * - * Consumer: `@singularity-forge/ai` Google Gemini provider. + * Consumer: `@singularity-forge/ai` Google Gemini provider, plus SF-side + * background catalog discovery. */ -import { AuthType, makeFakeConfig } from "@google/gemini-cli-core"; +import { + AuthType, + CodeAssistServer, + getOauthClient, + makeFakeConfig, + setupUser, +} from "@google/gemini-cli-core"; import { type ContentGenerator, createContentGenerator, @@ -43,3 +50,99 @@ export async function createGeminiCliContentGenerator( ); return createContentGenerator(generatorConfig, config); } + +/** + * Per-model quota bucket from CodeAssistServer.retrieveUserQuota. + */ +export interface GeminiQuotaBucket { + modelId: string; + usedFraction: number; + remainingFraction: number; + resetTime?: string; +} + +/** + * Snapshot of the active gemini-cli account: tier identity, project, and the + * full per-model quota table. + * + * Why a single struct: every consumer (model picker, usage UI, capacity + * diagnostics, catalog cache) needs the same three pieces of data. Returning + * them together avoids three separate OAuth round trips. + */ +export interface GeminiAccountSnapshot { + projectId: string; + /** Active tier id from setupUser.userTier (e.g. "free-tier", "standard-tier"). */ + userTierId?: string; + /** Active tier human label from setupUser.userTierName. */ + userTierName?: string; + /** + * Paid tier descriptor when the account has one (e.g. AI Ultra). Carries + * id like "g1-ultra-tier" and the marketing name. Distinct from the + * effective userTier — a free-tier session can still have a paidTier + * marker if the underlying account is subscribed. + */ + paidTier?: { id?: string; name?: string }; + models: GeminiQuotaBucket[]; +} + +/** + * Discover the active gemini-cli account: tier, project, and every model the + * account has access to (with per-model usage fraction and reset time). + * + * Best-effort: any failure (OAuth expired, no project, network) returns null + * silently so callers can downgrade gracefully. + * + * Consumer: SF-side background catalog cache, usage UI, capacity diagnostics. + */ +export async function snapshotGeminiCliAccount( + cwd?: string, +): Promise { + try { + const config = makeFakeConfig({ cwd: cwd ?? process.cwd() }); + const authClient = await getOauthClient(AuthType.LOGIN_WITH_GOOGLE, config); + const userData = await setupUser(authClient, config); + const projectId = userData?.projectId; + if (!projectId || typeof projectId !== "string") return null; + const server = new CodeAssistServer(authClient, projectId, { headers: {} }); + const data = await server.retrieveUserQuota({ project: projectId }); + const models: GeminiQuotaBucket[] = []; + for (const b of data?.buckets ?? []) { + const modelId = typeof b.modelId === "string" ? b.modelId : ""; + if (!modelId) continue; + const remainingFraction = + typeof b.remainingFraction === "number" ? b.remainingFraction : 1; + models.push({ + modelId, + usedFraction: 1 - remainingFraction, + remainingFraction, + resetTime: + typeof b.resetTime === "string" ? b.resetTime : undefined, + }); + } + if (models.length === 0) return null; + return { + projectId, + userTierId: + typeof userData?.userTier === "string" ? userData.userTier : undefined, + userTierName: userData?.userTierName, + paidTier: userData?.paidTier + ? { id: userData.paidTier.id, name: userData.paidTier.name } + : undefined, + models, + }; + } catch { + return null; + } +} + +/** + * Convenience wrapper: just the model IDs the active gemini-cli account has + * access to. Returns null on failure (same contract as snapshotGeminiCliAccount). + */ +export async function discoverGeminiCliModels( + cwd?: string, +): Promise { + const snap = await snapshotGeminiCliAccount(cwd); + if (!snap) return null; + return snap.models.map((m) => m.modelId); +} diff --git a/src/resources/extensions/sf/bootstrap/register-hooks.js b/src/resources/extensions/sf/bootstrap/register-hooks.js index 1831de204..1e0f76cac 100644 --- a/src/resources/extensions/sf/bootstrap/register-hooks.js +++ b/src/resources/extensions/sf/bootstrap/register-hooks.js @@ -504,6 +504,19 @@ export function registerHooks(pi, ecosystemHandlers = []) { } catch { /* non-fatal — model catalog refresh must never block session start */ } + // Refresh the gemini-cli model catalog separately because google-gemini-cli + // uses OAuth via @google/gemini-cli-core, not API-key REST, so it is not + // reachable through the generic refresh above. The cache lands in + // .sf/runtime/model-catalog/google-gemini-cli.json so getKnownModelIds and + // the model picker pick it up the same way as other providers. + try { + const { scheduleGeminiCatalogRefresh } = await import( + "../gemini-catalog.js" + ); + scheduleGeminiCatalogRefresh(process.cwd()); + } catch { + /* non-fatal — gemini catalog refresh must never block session start */ + } // Detect drift in source-of-truth markdown files since last session. try { const { detectMdFileDrift, formatDriftReport } = await import( diff --git a/src/resources/extensions/sf/error-classifier.js b/src/resources/extensions/sf/error-classifier.js index dfc0c9c4b..304b337c2 100644 --- a/src/resources/extensions/sf/error-classifier.js +++ b/src/resources/extensions/sf/error-classifier.js @@ -41,7 +41,7 @@ const AFFORDABILITY_RE = const NETWORK_RE = /network|ECONNRESET|ETIMEDOUT|ECONNREFUSED|socket hang up|fetch failed|connection.*reset|dns/i; const SERVER_RE = - /internal server error|500|502|503|overloaded|server_error|api_error|service.?unavailable/i; + /internal server error|500|502|503|overloaded|server_error|api_error|service.?unavailable|no capacity (?:available|left)|capacity (?:unavailable|exhausted)|no capacity .* on the server/i; // ECONNRESET/ECONNREFUSED are in NETWORK_RE (same-model retry first). const CONNECTION_RE = /terminated|connection.?(?:refused|error)|other side closed|EPIPE|network.?(?:is\s+)?unavailable|stream_exhausted(?:_without_result)?/i; diff --git a/src/resources/extensions/sf/gemini-catalog.js b/src/resources/extensions/sf/gemini-catalog.js new file mode 100644 index 000000000..be443758f --- /dev/null +++ b/src/resources/extensions/sf/gemini-catalog.js @@ -0,0 +1,96 @@ +/** + * gemini-catalog.js — SF-side cache writer for gemini-cli model discovery. + * + * The OAuth + Code Assist plumbing lives in the dedicated + * @singularity-forge/google-gemini-cli-provider package (where the rest of the + * gemini-cli-core integration is centralized). This module only handles the + * SF-specific concerns: where on disk to cache the result, how often to + * refresh, and the session_start lifecycle hook. + * + * Why a separate cache from the generic model-catalog-cache: that cache is + * keyed off providers with REST /v1/models and API-key auth. google-gemini-cli + * has neither — it uses OAuth via gemini-cli-core and surfaces models implicit + * in the per-model quota table. Cache file shape stays compatible with + * model-catalog-cache.getKnownModelIds so consumers read both transparently. + */ +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { sfRuntimeRoot } from "./paths.js"; + +const GEMINI_CLI_PROVIDER_ID = "google-gemini-cli"; +const CATALOG_TTL_MS = 6 * 60 * 60 * 1000; + +function cacheFilePath(basePath) { + return join( + sfRuntimeRoot(basePath), + "model-catalog", + `${GEMINI_CLI_PROVIDER_ID}.json`, + ); +} + +function isCacheFresh(basePath) { + try { + const path = cacheFilePath(basePath); + if (!existsSync(path)) return false; + const entry = JSON.parse(readFileSync(path, "utf-8")); + if (!entry?.fetchedAt || !Array.isArray(entry.modelIds)) return false; + return Date.now() - new Date(entry.fetchedAt).getTime() <= CATALOG_TTL_MS; + } catch { + return false; + } +} + +function writeCacheEntry(basePath, modelIds) { + try { + mkdirSync(join(sfRuntimeRoot(basePath), "model-catalog"), { + recursive: true, + }); + writeFileSync( + cacheFilePath(basePath), + JSON.stringify({ + fetchedAt: new Date().toISOString(), + modelIds, + }), + "utf-8", + ); + } catch { + // Best-effort — never fail the caller. + } +} + +/** + * Discover and cache the gemini-cli model list. Returns the model IDs on + * success, null on any failure. + * + * Consumer: scheduleGeminiCatalogRefresh during session_start. + */ +export async function refreshGeminiCatalog(basePath) { + try { + const { discoverGeminiCliModels } = await import( + "@singularity-forge/google-gemini-cli-provider" + ); + const modelIds = await discoverGeminiCliModels(basePath); + if (!modelIds || modelIds.length === 0) return null; + writeCacheEntry(basePath, modelIds); + return modelIds; + } catch { + return null; + } +} + +/** + * Fire-and-forget background refresh of the gemini-cli model catalog. Skipped + * if the on-disk cache is already fresh (within CATALOG_TTL_MS). + * + * Consumer: bootstrap/register-hooks.js session_start hook. + */ +export function scheduleGeminiCatalogRefresh(basePath) { + if (isCacheFresh(basePath)) return; + setImmediate(async () => { + try { + await refreshGeminiCatalog(basePath); + } catch { + // Per-provider failure is silently swallowed. + } + }); +}