feat(gemini-cli): discover account models via gemini-cli-core + retry on capacity errors

Two related fixes for the google-gemini-cli provider, both motivated by today's dogfood diagnosis: SF was pinned to a single model (gemini-3-flash-preview) even though the AI Ultra account has access to seven (verified via the live gemini-cli-core probe), and a transient "No capacity available for model X on the server" was classified as `unknown` so SF gave up instead of retrying. 1. Account snapshot + model discovery in @singularity-forge/google-gemini-cli-provider - Add `snapshotGeminiCliAccount(cwd?)` returning { projectId, userTierId, userTierName, paidTier, models } where `models[]` carries each modelId with usedFraction, remainingFraction, and resetTime. Built on the same setupUser + CodeAssistServer.retrieveUserQuota path usage-bar.js already uses, but extracted to the dedicated package so any consumer (model picker, capacity diagnostics, catalog cache) can call one helper. - Add `discoverGeminiCliModels(cwd?)` as a thin "just the IDs" wrapper. - Both are best-effort: any failure (OAuth expired, no project, network) returns null silently — never throws. 2. SF-side cache writer at src/resources/extensions/sf/gemini-catalog.js - Delegates discovery to the package; only handles cache file path, 6-hour TTL, and the session_start lifecycle hook. - Cache lands at .sf/runtime/model-catalog/google-gemini-cli.json with the same shape as the generic model-catalog-cache, so getKnownModelIds and the model picker pick it up transparently. - Wired into bootstrap/register-hooks.js session_start in parallel with the existing scheduleModelCatalogRefresh (the generic REST + API-key path can't reach gemini-cli's OAuth-only Code Assist endpoint). 3. Capacity error classification fix - error-classifier.js SERVER_RE now matches "no capacity (available|left)", "capacity (unavailable|exhausted)", and "no capacity ... on the server". Previously these fell through to kind=unknown, which is not transient, so agent-end-recovery never retried — even though the same handler already caps gemini-cli rate-limit backoff at 30s for exactly this class of transient. With the pattern matched as `server`, the existing retry-with-backoff path covers it. The full extension test suite (1386 tests) passes. Typecheck clean for both the package and the SF extensions. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 03:32:35 +02:00 · 2026-05-14 03:32:35 +02:00 · c6a3fa6a6a
commit c6a3fa6a6a
parent 1d753af6b6
4 changed files with 215 additions and 3 deletions
--- a/packages/google-gemini-cli-provider/src/index.ts
+++ b/packages/google-gemini-cli-provider/src/index.ts
@ -5,9 +5,16 @@
 * dedicated workspace package so provider code can depend on one small helper
 * instead of embedding the upstream integration inline.
 *
- * Consumer: `@singularity-forge/ai` Google Gemini provider.
+ * Consumer: `@singularity-forge/ai` Google Gemini provider, plus SF-side
+ * background catalog discovery.
 */
-import { AuthType, makeFakeConfig } from "@google/gemini-cli-core";
+import {
+	AuthType,
+	CodeAssistServer,
+	getOauthClient,
+	makeFakeConfig,
+	setupUser,
+} from "@google/gemini-cli-core";
 import {
 	type ContentGenerator,
 	createContentGenerator,
@ -43,3 +50,99 @@ export async function createGeminiCliContentGenerator(
 	);
 	return createContentGenerator(generatorConfig, config);
 }
+
+/**
+ * Per-model quota bucket from CodeAssistServer.retrieveUserQuota.
+ */
+export interface GeminiQuotaBucket {
+	modelId: string;
+	usedFraction: number;
+	remainingFraction: number;
+	resetTime?: string;
+}
+
+/**
+ * Snapshot of the active gemini-cli account: tier identity, project, and the
+ * full per-model quota table.
+ *
+ * Why a single struct: every consumer (model picker, usage UI, capacity
+ * diagnostics, catalog cache) needs the same three pieces of data. Returning
+ * them together avoids three separate OAuth round trips.
+ */
+export interface GeminiAccountSnapshot {
+	projectId: string;
+	/** Active tier id from setupUser.userTier (e.g. "free-tier", "standard-tier"). */
+	userTierId?: string;
+	/** Active tier human label from setupUser.userTierName. */
+	userTierName?: string;
+	/**
+	 * Paid tier descriptor when the account has one (e.g. AI Ultra). Carries
+	 * id like "g1-ultra-tier" and the marketing name. Distinct from the
+	 * effective userTier — a free-tier session can still have a paidTier
+	 * marker if the underlying account is subscribed.
+	 */
+	paidTier?: { id?: string; name?: string };
+	models: GeminiQuotaBucket[];
+}
+
+/**
+ * Discover the active gemini-cli account: tier, project, and every model the
+ * account has access to (with per-model usage fraction and reset time).
+ *
+ * Best-effort: any failure (OAuth expired, no project, network) returns null
+ * silently so callers can downgrade gracefully.
+ *
+ * Consumer: SF-side background catalog cache, usage UI, capacity diagnostics.
+ */
+export async function snapshotGeminiCliAccount(
+	cwd?: string,
+): Promise<GeminiAccountSnapshot | null> {
+	try {
+		const config = makeFakeConfig({ cwd: cwd ?? process.cwd() });
+		const authClient = await getOauthClient(AuthType.LOGIN_WITH_GOOGLE, config);
+		const userData = await setupUser(authClient, config);
+		const projectId = userData?.projectId;
+		if (!projectId || typeof projectId !== "string") return null;
+		const server = new CodeAssistServer(authClient, projectId, { headers: {} });
+		const data = await server.retrieveUserQuota({ project: projectId });
+		const models: GeminiQuotaBucket[] = [];
+		for (const b of data?.buckets ?? []) {
+			const modelId = typeof b.modelId === "string" ? b.modelId : "";
+			if (!modelId) continue;
+			const remainingFraction =
+				typeof b.remainingFraction === "number" ? b.remainingFraction : 1;
+			models.push({
+				modelId,
+				usedFraction: 1 - remainingFraction,
+				remainingFraction,
+				resetTime:
+					typeof b.resetTime === "string" ? b.resetTime : undefined,
+			});
+		}
+		if (models.length === 0) return null;
+		return {
+			projectId,
+			userTierId:
+				typeof userData?.userTier === "string" ? userData.userTier : undefined,
+			userTierName: userData?.userTierName,
+			paidTier: userData?.paidTier
+				? { id: userData.paidTier.id, name: userData.paidTier.name }
+				: undefined,
+			models,
+		};
+	} catch {
+		return null;
+	}
+}
+
+/**
+ * Convenience wrapper: just the model IDs the active gemini-cli account has
+ * access to. Returns null on failure (same contract as snapshotGeminiCliAccount).
+ */
+export async function discoverGeminiCliModels(
+	cwd?: string,
+): Promise<string[] | null> {
+	const snap = await snapshotGeminiCliAccount(cwd);
+	if (!snap) return null;
+	return snap.models.map((m) => m.modelId);
+}
--- a/src/resources/extensions/sf/bootstrap/register-hooks.js
+++ b/src/resources/extensions/sf/bootstrap/register-hooks.js
@ -504,6 +504,19 @@ export function registerHooks(pi, ecosystemHandlers = []) {
 		} catch {
 			/* non-fatal — model catalog refresh must never block session start */
 		}
+		// Refresh the gemini-cli model catalog separately because google-gemini-cli
+		// uses OAuth via @google/gemini-cli-core, not API-key REST, so it is not
+		// reachable through the generic refresh above. The cache lands in
+		// .sf/runtime/model-catalog/google-gemini-cli.json so getKnownModelIds and
+		// the model picker pick it up the same way as other providers.
+		try {
+			const { scheduleGeminiCatalogRefresh } = await import(
+				"../gemini-catalog.js"
+			);
+			scheduleGeminiCatalogRefresh(process.cwd());
+		} catch {
+			/* non-fatal — gemini catalog refresh must never block session start */
+		}
 		// Detect drift in source-of-truth markdown files since last session.
 		try {
 			const { detectMdFileDrift, formatDriftReport } = await import(
--- a/src/resources/extensions/sf/error-classifier.js
+++ b/src/resources/extensions/sf/error-classifier.js
@ -41,7 +41,7 @@ const AFFORDABILITY_RE =
 const NETWORK_RE =
 	/network|ECONNRESET|ETIMEDOUT|ECONNREFUSED|socket hang up|fetch failed|connection.*reset|dns/i;
 const SERVER_RE =
-	/internal server error|500|502|503|overloaded|server_error|api_error|service.?unavailable/i;
+	/internal server error|500|502|503|overloaded|server_error|api_error|service.?unavailable|no capacity (?:available|left)|capacity (?:unavailable|exhausted)|no capacity .* on the server/i;
 // ECONNRESET/ECONNREFUSED are in NETWORK_RE (same-model retry first).
 const CONNECTION_RE =
 	/terminated|connection.?(?:refused|error)|other side closed|EPIPE|network.?(?:is\s+)?unavailable|stream_exhausted(?:_without_result)?/i;
--- a/src/resources/extensions/sf/gemini-catalog.js
+++ b/src/resources/extensions/sf/gemini-catalog.js
@ -0,0 +1,96 @@
+/**
+ * gemini-catalog.js — SF-side cache writer for gemini-cli model discovery.
+ *
+ * The OAuth + Code Assist plumbing lives in the dedicated
+ * @singularity-forge/google-gemini-cli-provider package (where the rest of the
+ * gemini-cli-core integration is centralized). This module only handles the
+ * SF-specific concerns: where on disk to cache the result, how often to
+ * refresh, and the session_start lifecycle hook.
+ *
+ * Why a separate cache from the generic model-catalog-cache: that cache is
+ * keyed off providers with REST /v1/models and API-key auth. google-gemini-cli
+ * has neither — it uses OAuth via gemini-cli-core and surfaces models implicit
+ * in the per-model quota table. Cache file shape stays compatible with
+ * model-catalog-cache.getKnownModelIds so consumers read both transparently.
+ */
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { sfRuntimeRoot } from "./paths.js";
+
+const GEMINI_CLI_PROVIDER_ID = "google-gemini-cli";
+const CATALOG_TTL_MS = 6 * 60 * 60 * 1000;
+
+function cacheFilePath(basePath) {
+	return join(
+		sfRuntimeRoot(basePath),
+		"model-catalog",
+		`${GEMINI_CLI_PROVIDER_ID}.json`,
+	);
+}
+
+function isCacheFresh(basePath) {
+	try {
+		const path = cacheFilePath(basePath);
+		if (!existsSync(path)) return false;
+		const entry = JSON.parse(readFileSync(path, "utf-8"));
+		if (!entry?.fetchedAt || !Array.isArray(entry.modelIds)) return false;
+		return Date.now() - new Date(entry.fetchedAt).getTime() <= CATALOG_TTL_MS;
+	} catch {
+		return false;
+	}
+}
+
+function writeCacheEntry(basePath, modelIds) {
+	try {
+		mkdirSync(join(sfRuntimeRoot(basePath), "model-catalog"), {
+			recursive: true,
+		});
+		writeFileSync(
+			cacheFilePath(basePath),
+			JSON.stringify({
+				fetchedAt: new Date().toISOString(),
+				modelIds,
+			}),
+			"utf-8",
+		);
+	} catch {
+		// Best-effort — never fail the caller.
+	}
+}
+
+/**
+ * Discover and cache the gemini-cli model list. Returns the model IDs on
+ * success, null on any failure.
+ *
+ * Consumer: scheduleGeminiCatalogRefresh during session_start.
+ */
+export async function refreshGeminiCatalog(basePath) {
+	try {
+		const { discoverGeminiCliModels } = await import(
+			"@singularity-forge/google-gemini-cli-provider"
+		);
+		const modelIds = await discoverGeminiCliModels(basePath);
+		if (!modelIds || modelIds.length === 0) return null;
+		writeCacheEntry(basePath, modelIds);
+		return modelIds;
+	} catch {
+		return null;
+	}
+}
+
+/**
+ * Fire-and-forget background refresh of the gemini-cli model catalog. Skipped
+ * if the on-disk cache is already fresh (within CATALOG_TTL_MS).
+ *
+ * Consumer: bootstrap/register-hooks.js session_start hook.
+ */
+export function scheduleGeminiCatalogRefresh(basePath) {
+	if (isCacheFresh(basePath)) return;
+	setImmediate(async () => {
+		try {
+			await refreshGeminiCatalog(basePath);
+		} catch {
+			// Per-provider failure is silently swallowed.
+		}
+	});
+}