diff --git a/.plans/issue-125-provider-fallback.md b/.plans/issue-125-provider-fallback.md new file mode 100644 index 000000000..32a2632f9 --- /dev/null +++ b/.plans/issue-125-provider-fallback.md @@ -0,0 +1,380 @@ +# Issue #125: Provider Fallback When Multiple Providers Configured +# Copyright (c) 2026 Jeremy McSpadden + +## Overview + +Add cross-provider fallback so that when a provider hits rate/quota limits, the system +automatically switches to another provider that serves an equivalent model (or a +user-configured fallback chain of different models). + +## Current State + +The codebase already supports: +- **Multi-credential per provider** — round-robin or session-sticky selection +- **Per-credential backoff tracking** — rate_limit (30s), quota_exhausted (30min), server_error (20s) +- **Credential rotation on error** — `markUsageLimitReached()` backs off one key and returns + whether another key exists for the same provider +- **Retry with exponential backoff** — 3 retries, 2s/4s/8s delays +- **Error classification** — quota_exhausted, rate_limit, server_error, unknown + +The gap: fallback only works within a single provider (multiple API keys). There is no +mechanism to fall back to a *different provider* serving the same or equivalent model. + +--- + +## Architecture + +### Phase 1: Fallback Chain Configuration & Storage + +**Goal:** Let users define ordered fallback chains that map a primary model to backup +model+provider combos. + +#### 1.1 — Settings Schema (`settings-manager.ts`) + +Add a new top-level setting: + +```typescript +interface FallbackChainEntry { + provider: string; // e.g. "zai", "alibaba", "openai" + model: string; // e.g. "glm-5", "claude-opus-4-6" + priority: number; // lower = higher priority (1 = primary) +} + +interface FallbackSettings { + enabled: boolean; // default: false + chains: Record; // keyed by chain name + // Example: + // "coding": [ + // { provider: "zai", model: "glm-5", priority: 1 }, + // { provider: "alibaba", model: "glm-5", priority: 2 }, + // { provider: "openai", model: "gpt-4.1", priority: 3 } + // ] +} +``` + +**Files to modify:** +- `packages/pi-coding-agent/src/core/settings-manager.ts` — add `getFallbackSettings()`, + `setFallbackChain()`, `removeFallbackChain()`, getter/setter for `fallback.enabled` + +#### 1.2 — Settings File Location + +Stored in the existing `~/.pi/agent/settings.json` under a new `fallback` key. + +#### 1.3 — CLI Configuration Commands + +Add subcommands to the existing settings CLI: +- `pi settings fallback enable/disable` +- `pi settings fallback add-chain --provider

--model --priority ` +- `pi settings fallback remove-chain ` +- `pi settings fallback list` + +**Files to modify:** +- `packages/pi-coding-agent/src/cli/commands/settings.ts` (or equivalent CLI entry point) + +--- + +### Phase 2: Provider-Level Backoff Tracking + +**Goal:** Track backoff state at the provider level (not just credential level) so the +fallback system knows when an entire provider is unavailable. + +#### 2.1 — Extend AuthStorage (`auth-storage.ts`) + +Add a provider-level backoff map alongside the existing credential-level one: + +```typescript +private providerBackoff: Map = new Map(); +// Map +``` + +**New methods:** +```typescript +markProviderExhausted(provider: string, errorType: UsageLimitErrorType): void +isProviderAvailable(provider: string): boolean +getProviderBackoffRemaining(provider: string): number // ms until available, 0 if available +``` + +**Logic:** When `markUsageLimitReached()` returns `false` (all credentials for a provider +are backed off), also mark the provider itself as backed off with the longest remaining +credential backoff duration. + +**Files to modify:** +- `packages/pi-coding-agent/src/core/auth-storage.ts` + +--- + +### Phase 3: Fallback Resolution Engine + +**Goal:** Given a current model+provider that just failed, find the next available +fallback from the configured chain. + +#### 3.1 — FallbackResolver (`fallback-resolver.ts` — new file) + +```typescript +// packages/pi-coding-agent/src/core/fallback-resolver.ts + +export interface FallbackResult { + model: Model; + reason: string; // "quota_exhausted on zai, falling back to alibaba" +} + +export class FallbackResolver { + constructor( + private settings: SettingsManager, + private authStorage: AuthStorage, + private modelRegistry: ModelRegistry, + ) {} + + /** + * Find the next available fallback for the current model. + * Returns null if no fallback is configured or available. + */ + async findFallback( + currentModel: Model, + errorType: UsageLimitErrorType, + ): Promise { + // 1. Check if fallback is enabled + // 2. Find chain(s) containing currentModel's provider+model + // 3. Sort by priority + // 4. Skip entries where provider is backed off + // 5. Skip entries without valid API keys + // 6. Return first available, or null + } + + /** + * Find the chain a model belongs to. + */ + findChainForModel(provider: string, modelId: string): FallbackChainEntry[] | null + + /** + * Get the highest-priority available model from a chain. + * Used on session start to pick the best available model. + */ + async getBestAvailable(chainName: string): Promise +} +``` + +#### 3.2 — Model Equivalence + +For same-model cross-provider fallback (Phase 1 of the feature), the chain entries +explicitly name the provider+model pairs. No automatic equivalence detection needed — +the user defines what's equivalent. + +--- + +### Phase 4: Integrate Fallback into Retry Flow + +**Goal:** When credential rotation fails (all keys for a provider exhausted), try the +fallback chain before giving up or doing exponential backoff. + +#### 4.1 — Modify `_handleRetryableError()` (`agent-session.ts`) + +Current flow: +``` +1. Classify error +2. Try credential rotation within provider → if success, retry immediately +3. If quota_exhausted and all backed off → give up +4. Exponential backoff retry +``` + +New flow: +``` +1. Classify error +2. Try credential rotation within provider → if success, retry immediately +3. ** Try provider fallback via FallbackResolver ** + a. If fallback found → swap model on agent, retry immediately + b. Emit event: "fallback_provider_switch" with old/new provider info +4. If quota_exhausted and no fallback → give up +5. Exponential backoff retry +``` + +**Key changes in agent-session.ts (~lines 2317-2370):** + +```typescript +// After credential rotation fails: +if (!hasAlternate) { + const fallbackResult = await this.fallbackResolver?.findFallback( + this.agent.model, + errorType, + ); + + if (fallbackResult) { + // Swap to fallback model + this.agent.setModel(fallbackResult.model); + this._removeLastError(); + this._emitEvent("auto_retry_start", { + attempt: this._retryAttempt + 1, + delayMs: 0, + reason: fallbackResult.reason, + }); + await this.agent.continue(); + return true; + } +} +``` + +#### 4.2 — Agent Model Swapping + +The agent needs a method to swap its model mid-conversation: + +```typescript +// agent.ts or agent-loop.ts +setModel(model: Model): void { + this.config.model = model; + // Re-resolve API key for new provider +} +``` + +**Important:** The API key must also be re-resolved since we're switching providers. +The `getApiKey` callback in `AgentOptions` already takes a provider string, so this +should work naturally. + +**Files to modify:** +- `packages/pi-coding-agent/src/core/agent-session.ts` +- `packages/pi-ai/src/agent.ts` or `packages/pi-ai/src/agent-loop.ts` + +--- + +### Phase 5: Provider Restoration (Auto-Upgrade) + +**Goal:** When a higher-priority provider's backoff expires, switch back to it. + +#### 5.1 — Pre-Request Priority Check + +Before each LLM request, check if a higher-priority provider in the chain has become +available again: + +```typescript +// In agent-loop.ts streamAssistantResponse(), before calling streamFn: +if (this.fallbackResolver) { + const bestAvailable = await this.fallbackResolver.getBestAvailable(currentChain); + if (bestAvailable && bestAvailable.model.provider !== currentModel.provider) { + // Upgrade back to higher-priority provider + this.setModel(bestAvailable.model); + this._emitEvent("fallback_provider_restored", { ... }); + } +} +``` + +#### 5.2 — Quota Reset Awareness (Future Enhancement) + +For now, rely on backoff expiry times. A future enhancement could: +- Parse rate limit headers for reset timestamps +- Store per-provider quota windows (5-hour, daily, weekly, monthly) +- Predict when quota will restore based on usage patterns + +This is complex and should be a separate issue. + +--- + +### Phase 6: User-Facing Events & UI + +**Goal:** Surface fallback activity to the user so they know what's happening. + +#### 6.1 — New Events + +```typescript +type FallbackEvent = + | { type: "fallback_provider_switch"; from: string; to: string; reason: string } + | { type: "fallback_provider_restored"; provider: string; reason: string } + | { type: "fallback_chain_exhausted"; chain: string; reason: string } +``` + +#### 6.2 — TUI Integration + +Display a brief notification in the TUI when fallback occurs: +- `⚡ Switched from zai/glm-5 → alibaba/glm-5 (rate limit)` +- `✓ Restored to zai/glm-5 (quota available)` +- `⚠ All providers in chain "coding" exhausted` + +**Files to modify:** +- `packages/pi-tui/src/` — event handler for new fallback events +- Status bar or notification area in the TUI + +--- + +## Implementation Order + +| Step | Phase | Effort | Dependencies | +|------|-------|--------|-------------| +| 1 | Phase 1.1-1.2: Settings schema | Small | None | +| 2 | Phase 2: Provider-level backoff | Small | None | +| 3 | Phase 3: FallbackResolver | Medium | Steps 1, 2 | +| 4 | Phase 4: Retry integration | Medium | Step 3 | +| 5 | Phase 5.1: Auto-restoration | Small | Step 4 | +| 6 | Phase 1.3: CLI commands | Small | Step 1 | +| 7 | Phase 6: Events & UI | Small | Step 4 | + +Steps 1 and 2 can be done in parallel. Steps 6 and 7 can be done in parallel. + +--- + +## Key Design Decisions + +### 1. Explicit chains vs automatic model equivalence +**Decision:** Explicit user-configured chains. +**Why:** Automatic equivalence is unreliable — models with the same name from different +providers may have different capabilities, limits, or pricing. Users should explicitly +opt in to which models they consider interchangeable. + +### 2. Where fallback sits in the retry flow +**Decision:** After credential rotation, before exponential backoff. +**Why:** Provider fallback is a better recovery than waiting and retrying the same +exhausted provider. If the fallback also fails, exponential backoff still kicks in. + +### 3. Model swap vs new agent +**Decision:** Swap model on existing agent mid-conversation. +**Why:** Creating a new agent would lose conversation context. The agent's `streamFn` +already accepts model as a parameter, and `getApiKey` resolves per-provider, so +swapping is straightforward. + +### 4. Restoration strategy +**Decision:** Check before each request (lazy check on backoff expiry). +**Why:** No background timers needed. The cost of one `isProviderAvailable()` check +per request is negligible. More sophisticated quota tracking can be added later. + +### 5. Scope of fallback +**Decision:** Per-session, not per-agent-type (initially). +**Why:** The issue mentions per-agent-type toggle, but the simpler initial implementation +is a global fallback chain that applies to any session using a model in the chain. +Per-agent-type scoping can be added by extending the chain config with an `agentTypes` +filter. + +--- + +## Risks & Mitigations + +| Risk | Impact | Mitigation | +|------|--------|-----------| +| Model swap mid-conversation changes behavior | Medium | Log the swap, let user disable fallback | +| Different providers have different tool/feature support | High | Validate fallback model supports same API features before swapping | +| Credential resolution race conditions | Low | Use existing file-lock mechanism in auth-storage | +| Chain misconfiguration (nonexistent model) | Low | Validate chain entries on save, warn on invalid | +| Backoff timing mismatch with actual quota reset | Medium | Conservative backoff defaults; Phase 5.2 for future improvement | + +--- + +## Testing Strategy + +1. **Unit tests for FallbackResolver** — mock auth-storage and model-registry, test chain + resolution, priority ordering, backoff skipping +2. **Unit tests for extended auth-storage** — provider-level backoff tracking +3. **Integration test for retry flow** — simulate rate limit → credential fallback → + provider fallback → restoration +4. **E2E test** — configure a chain, hit rate limit on provider A, verify automatic + switch to provider B +5. **Settings tests** — validate chain CRUD operations, persistence, invalid input handling + +--- + +## Files Summary + +| File | Action | Changes | +|------|--------|---------| +| `packages/pi-coding-agent/src/core/settings-manager.ts` | Modify | Add FallbackSettings types, getters/setters | +| `packages/pi-coding-agent/src/core/auth-storage.ts` | Modify | Add provider-level backoff tracking | +| `packages/pi-coding-agent/src/core/fallback-resolver.ts` | **New** | FallbackResolver class | +| `packages/pi-coding-agent/src/core/agent-session.ts` | Modify | Integrate fallback into retry flow | +| `packages/pi-ai/src/agent.ts` | Modify | Add `setModel()` method | +| `packages/pi-coding-agent/src/cli/commands/settings.ts` | Modify | Add fallback CLI subcommands | +| `packages/pi-tui/src/` | Modify | Fallback event display | diff --git a/packages/pi-coding-agent/src/core/agent-session.ts b/packages/pi-coding-agent/src/core/agent-session.ts index 86250b88c..510f79988 100644 --- a/packages/pi-coding-agent/src/core/agent-session.ts +++ b/packages/pi-coding-agent/src/core/agent-session.ts @@ -70,6 +70,7 @@ import { wrapToolsWithExtensions, } from "./extensions/index.js"; import type { BashExecutionMessage, CustomMessage } from "./messages.js"; +import { FallbackResolver } from "./fallback-resolver.js"; import type { ModelRegistry } from "./model-registry.js"; import { expandPromptTemplate, type PromptTemplate } from "./prompt-templates.js"; import type { ResourceExtensionPaths, ResourceLoader } from "./resource-loader.js"; @@ -120,7 +121,10 @@ export type AgentSessionEvent = errorMessage?: string; } | { type: "auto_retry_start"; attempt: number; maxAttempts: number; delayMs: number; errorMessage: string } - | { type: "auto_retry_end"; success: boolean; attempt: number; finalError?: string }; + | { type: "auto_retry_end"; success: boolean; attempt: number; finalError?: string } + | { type: "fallback_provider_switch"; from: string; to: string; reason: string } + | { type: "fallback_provider_restored"; provider: string; reason: string } + | { type: "fallback_chain_exhausted"; reason: string }; /** Listener function for agent session events */ export type AgentSessionEventListener = (event: AgentSessionEvent) => void; @@ -267,6 +271,9 @@ export class AgentSession { // Model registry for API key resolution private _modelRegistry: ModelRegistry; + // Provider fallback resolver + private _fallbackResolver: FallbackResolver; + // Tool registry for extension getTools/setTools private _toolRegistry: Map = new Map(); private _toolPromptSnippets: Map = new Map(); @@ -284,6 +291,11 @@ export class AgentSession { this._customTools = config.customTools ?? []; this._cwd = config.cwd; this._modelRegistry = config.modelRegistry; + this._fallbackResolver = new FallbackResolver( + this.settingsManager, + this._modelRegistry.authStorage, + this._modelRegistry, + ); this._extensionRunnerRef = config.extensionRunnerRef; this._initialActiveToolNames = config.initialActiveToolNames; this._baseToolsOverride = config.baseToolsOverride; @@ -303,6 +315,11 @@ export class AgentSession { return this._modelRegistry; } + /** Fallback resolver for cross-provider fallback */ + get fallbackResolver(): FallbackResolver { + return this._fallbackResolver; + } + // ========================================================================= // Event Subscription // ========================================================================= @@ -868,6 +885,19 @@ export class AgentSession { ); } + // Check if a higher-priority provider in the fallback chain has recovered + const restoration = await this._fallbackResolver.checkForRestoration(this.model); + if (restoration) { + const previousProvider = `${this.model.provider}/${this.model.id}`; + this.agent.setModel(restoration.model); + this.sessionManager.appendModelChange(restoration.model.provider, restoration.model.id); + this._emit({ + type: "fallback_provider_restored", + provider: `${restoration.model.provider}/${restoration.model.id}`, + reason: `Restored from ${previousProvider}`, + }); + } + // Validate API key const apiKey = await this._modelRegistry.getApiKey(this.model, this.sessionId); if (!apiKey) { @@ -2354,20 +2384,66 @@ export class AgentSession { return true; } - // All credentials are backed off. For quota-exhausted errors the backoff is very - // long (30+ min), so retrying immediately is futile and will only produce - // confusing secondary errors (e.g. "Authentication failed"). Give up now and - // surface the original quota error to the user. - if (errorType === "quota_exhausted") { - this._emit({ - type: "auto_retry_end", - success: false, - attempt: this._retryAttempt, - finalError: message.errorMessage, - }); - this._retryAttempt = 0; - this._resolveRetry(); - return false; + // All credentials are backed off. Try cross-provider fallback before giving up. + if (isCredentialError) { + const fallbackResult = await this._fallbackResolver.findFallback( + this.model, + errorType, + ); + + if (fallbackResult) { + // Swap to fallback model — don't persist to settings + const previousProvider = this.model.provider; + this.agent.setModel(fallbackResult.model); + this.sessionManager.appendModelChange(fallbackResult.model.provider, fallbackResult.model.id); + + // Remove error message from agent state + const msgs = this.agent.state.messages; + if (msgs.length > 0 && msgs[msgs.length - 1].role === "assistant") { + this.agent.replaceMessages(msgs.slice(0, -1)); + } + + this._emit({ + type: "fallback_provider_switch", + from: `${previousProvider}/${this.model?.id}`, + to: `${fallbackResult.model.provider}/${fallbackResult.model.id}`, + reason: fallbackResult.reason, + }); + + this._emit({ + type: "auto_retry_start", + attempt: this._retryAttempt + 1, + maxAttempts: settings.maxRetries, + delayMs: 0, + errorMessage: `${message.errorMessage} (${fallbackResult.reason})`, + }); + + // Retry immediately with fallback provider - don't increment _retryAttempt + setTimeout(() => { + this.agent.continue().catch(() => { + // Retry failed - will be caught by next agent_end + }); + }, 0); + + return true; + } + + // No fallback available either + if (errorType === "quota_exhausted") { + this._emit({ + type: "fallback_chain_exhausted", + reason: `All providers exhausted for ${this.model.provider}/${this.model.id}`, + }); + this._emit({ + type: "auto_retry_end", + success: false, + attempt: this._retryAttempt, + finalError: message.errorMessage, + }); + this._retryAttempt = 0; + this._resolveRetry(); + return false; + } } } diff --git a/packages/pi-coding-agent/src/core/auth-storage.ts b/packages/pi-coding-agent/src/core/auth-storage.ts index bb10e5dfe..6028b085c 100644 --- a/packages/pi-coding-agent/src/core/auth-storage.ts +++ b/packages/pi-coding-agent/src/core/auth-storage.ts @@ -248,6 +248,13 @@ export class AuthStorage { */ private credentialBackoff: Map> = new Map(); + /** + * Provider-level backoff tracking. + * Set when all credentials for a provider are backed off. + * Map + */ + private providerBackoff: Map = new Map(); + private constructor(private storage: AuthStorageBackend) { this.reload(); } @@ -398,6 +405,7 @@ export class AuthStorage { delete this.data[provider]; this.providerRoundRobinIndex.delete(provider); this.credentialBackoff.delete(provider); + this.providerBackoff.delete(provider); this.persistProviderChange(provider, undefined); } @@ -484,6 +492,43 @@ export class AuthStorage { return true; } + /** + * Mark an entire provider as exhausted. + * Called when all credentials for a provider are backed off. + */ + markProviderExhausted(provider: string, errorType: UsageLimitErrorType): void { + const backoffMs = getBackoffDuration(errorType); + this.providerBackoff.set(provider, Date.now() + backoffMs); + } + + /** + * Check if a provider is currently available (not backed off at provider level). + */ + isProviderAvailable(provider: string): boolean { + const expiresAt = this.providerBackoff.get(provider); + if (expiresAt === undefined) return true; + if (Date.now() >= expiresAt) { + this.providerBackoff.delete(provider); + return true; + } + return false; + } + + /** + * Get milliseconds remaining until provider backoff expires. + * Returns 0 if provider is available. + */ + getProviderBackoffRemaining(provider: string): number { + const expiresAt = this.providerBackoff.get(provider); + if (expiresAt === undefined) return 0; + const remaining = expiresAt - Date.now(); + if (remaining <= 0) { + this.providerBackoff.delete(provider); + return 0; + } + return remaining; + } + /** * Check if a credential index is currently backed off. */ diff --git a/packages/pi-coding-agent/src/core/fallback-resolver.test.ts b/packages/pi-coding-agent/src/core/fallback-resolver.test.ts new file mode 100644 index 000000000..c62f5d473 --- /dev/null +++ b/packages/pi-coding-agent/src/core/fallback-resolver.test.ts @@ -0,0 +1,229 @@ +// GSD Provider Fallback Resolver Tests +// Copyright (c) 2026 Jeremy McSpadden + +import { describe, it, beforeEach, mock } from "node:test"; +import assert from "node:assert/strict"; +import { FallbackResolver } from "./fallback-resolver.js"; +import type { Api, Model } from "@gsd/pi-ai"; +import type { AuthStorage } from "./auth-storage.js"; +import type { ModelRegistry } from "./model-registry.js"; +import type { FallbackChainEntry, SettingsManager } from "./settings-manager.js"; + +function createMockModel(provider: string, id: string): Model { + return { + id, + name: id, + api: "openai-completions" as Api, + provider, + baseUrl: `https://api.${provider}.com`, + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: 16384, + } as Model; +} + +const zaiModel = createMockModel("zai", "glm-5"); +const alibabaModel = createMockModel("alibaba", "glm-5"); +const openaiModel = createMockModel("openai", "gpt-4.1"); + +const defaultChain: FallbackChainEntry[] = [ + { provider: "zai", model: "glm-5", priority: 1 }, + { provider: "alibaba", model: "glm-5", priority: 2 }, + { provider: "openai", model: "gpt-4.1", priority: 3 }, +]; + +function createResolver(overrides?: { + enabled?: boolean; + isProviderAvailable?: (provider: string) => boolean; + hasAuth?: (provider: string) => boolean; + find?: (provider: string, modelId: string) => Model | undefined; +}) { + const settingsManager = { + getFallbackSettings: () => ({ + enabled: overrides?.enabled ?? true, + chains: { coding: defaultChain }, + }), + } as unknown as SettingsManager; + + const authStorage = { + markProviderExhausted: mock.fn(), + isProviderAvailable: overrides?.isProviderAvailable ?? (() => true), + hasAuth: overrides?.hasAuth ?? (() => true), + } as unknown as AuthStorage; + + const modelRegistry = { + find: overrides?.find ?? ((provider: string, modelId: string) => { + if (provider === "zai" && modelId === "glm-5") return zaiModel; + if (provider === "alibaba" && modelId === "glm-5") return alibabaModel; + if (provider === "openai" && modelId === "gpt-4.1") return openaiModel; + return undefined; + }), + } as unknown as ModelRegistry; + + return { resolver: new FallbackResolver(settingsManager, authStorage, modelRegistry), authStorage }; +} + +// ─── findFallback ──────────────────────────────────────────────────────────── + +describe("FallbackResolver — findFallback", () => { + it("returns next available provider when current fails", async () => { + const { resolver } = createResolver(); + const result = await resolver.findFallback(zaiModel, "quota_exhausted"); + + assert.notEqual(result, null); + assert.equal(result!.model.provider, "alibaba"); + assert.equal(result!.model.id, "glm-5"); + assert.equal(result!.chainName, "coding"); + }); + + it("marks current provider as exhausted", async () => { + const { resolver, authStorage } = createResolver(); + await resolver.findFallback(zaiModel, "rate_limit"); + + const fn = authStorage.markProviderExhausted as any; + assert.equal(fn.mock.calls.length, 1); + assert.equal(fn.mock.calls[0].arguments[0], "zai"); + assert.equal(fn.mock.calls[0].arguments[1], "rate_limit"); + }); + + it("skips backed-off providers", async () => { + const { resolver } = createResolver({ + isProviderAvailable: (provider: string) => provider !== "alibaba", + }); + + const result = await resolver.findFallback(zaiModel, "quota_exhausted"); + + assert.notEqual(result, null); + assert.equal(result!.model.provider, "openai"); + assert.equal(result!.model.id, "gpt-4.1"); + }); + + it("returns null when all providers are backed off", async () => { + const { resolver } = createResolver({ + isProviderAvailable: () => false, + }); + + const result = await resolver.findFallback(zaiModel, "quota_exhausted"); + assert.equal(result, null); + }); + + it("returns null when fallback is disabled", async () => { + const { resolver } = createResolver({ enabled: false }); + const result = await resolver.findFallback(zaiModel, "quota_exhausted"); + assert.equal(result, null); + }); + + it("returns null when model is not in any chain", async () => { + const { resolver } = createResolver(); + const unknownModel = createMockModel("unknown", "some-model"); + const result = await resolver.findFallback(unknownModel, "quota_exhausted"); + assert.equal(result, null); + }); + + it("skips providers without auth", async () => { + const { resolver } = createResolver({ + hasAuth: (provider: string) => provider !== "alibaba", + }); + + const result = await resolver.findFallback(zaiModel, "quota_exhausted"); + + assert.notEqual(result, null); + assert.equal(result!.model.provider, "openai"); + }); + + it("skips providers with no model in registry", async () => { + const { resolver } = createResolver({ + find: (provider: string, modelId: string) => { + if (provider === "alibaba") return undefined; + if (provider === "openai" && modelId === "gpt-4.1") return openaiModel; + return undefined; + }, + }); + + const result = await resolver.findFallback(zaiModel, "quota_exhausted"); + + assert.notEqual(result, null); + assert.equal(result!.model.provider, "openai"); + }); +}); + +// ─── checkForRestoration ───────────────────────────────────────────────────── + +describe("FallbackResolver — checkForRestoration", () => { + it("returns higher-priority provider when recovered", async () => { + const { resolver } = createResolver(); + const result = await resolver.checkForRestoration(alibabaModel); + + assert.notEqual(result, null); + assert.equal(result!.model.provider, "zai"); + assert.equal(result!.model.id, "glm-5"); + }); + + it("returns null when already at highest priority", async () => { + const { resolver } = createResolver(); + const result = await resolver.checkForRestoration(zaiModel); + assert.equal(result, null); + }); + + it("returns null when higher-priority provider is still backed off", async () => { + const { resolver } = createResolver({ + isProviderAvailable: (provider: string) => provider !== "zai", + }); + + const result = await resolver.checkForRestoration(alibabaModel); + assert.equal(result, null); + }); + + it("returns null when fallback is disabled", async () => { + const { resolver } = createResolver({ enabled: false }); + const result = await resolver.checkForRestoration(alibabaModel); + assert.equal(result, null); + }); +}); + +// ─── getBestAvailable ──────────────────────────────────────────────────────── + +describe("FallbackResolver — getBestAvailable", () => { + it("returns highest-priority available provider", async () => { + const { resolver } = createResolver(); + const result = await resolver.getBestAvailable("coding"); + + assert.notEqual(result, null); + assert.equal(result!.model.provider, "zai"); + }); + + it("skips backed-off providers", async () => { + const { resolver } = createResolver({ + isProviderAvailable: (provider: string) => provider !== "zai", + }); + + const result = await resolver.getBestAvailable("coding"); + + assert.notEqual(result, null); + assert.equal(result!.model.provider, "alibaba"); + }); + + it("returns null for unknown chain", async () => { + const { resolver } = createResolver(); + const result = await resolver.getBestAvailable("nonexistent"); + assert.equal(result, null); + }); +}); + +// ─── findChainsForModel ────────────────────────────────────────────────────── + +describe("FallbackResolver — findChainsForModel", () => { + it("finds chains containing a model", () => { + const { resolver } = createResolver(); + const chains = resolver.findChainsForModel("zai", "glm-5"); + assert.deepEqual(chains, ["coding"]); + }); + + it("returns empty array for model not in any chain", () => { + const { resolver } = createResolver(); + const chains = resolver.findChainsForModel("unknown", "model"); + assert.deepEqual(chains, []); + }); +}); diff --git a/packages/pi-coding-agent/src/core/fallback-resolver.ts b/packages/pi-coding-agent/src/core/fallback-resolver.ts new file mode 100644 index 000000000..5d6b61499 --- /dev/null +++ b/packages/pi-coding-agent/src/core/fallback-resolver.ts @@ -0,0 +1,165 @@ +// GSD Provider Fallback Resolver +// Copyright (c) 2026 Jeremy McSpadden + +/** + * FallbackResolver - Cross-provider fallback when rate/quota limits are hit. + * + * When a provider's credentials are all exhausted, this resolver finds the next + * available provider+model from a user-configured fallback chain. It also handles + * restoration: checking if a higher-priority provider has recovered before each request. + */ + +import type { Api, Model } from "@gsd/pi-ai"; +import type { AuthStorage, UsageLimitErrorType } from "./auth-storage.js"; +import type { ModelRegistry } from "./model-registry.js"; +import type { FallbackChainEntry, SettingsManager } from "./settings-manager.js"; + +export interface FallbackResult { + model: Model; + chainName: string; + reason: string; +} + +export class FallbackResolver { + constructor( + private settingsManager: SettingsManager, + private authStorage: AuthStorage, + private modelRegistry: ModelRegistry, + ) {} + + /** + * Find the next available fallback for a model that just failed. + * Searches all chains for entries matching the current model's provider+id, + * then returns the next available entry with lower priority (higher number). + * + * @returns FallbackResult if a fallback is available, null otherwise + */ + async findFallback( + currentModel: Model, + errorType: UsageLimitErrorType, + ): Promise { + const { enabled, chains } = this.settingsManager.getFallbackSettings(); + if (!enabled) return null; + + // Mark the current provider as exhausted at the provider level + this.authStorage.markProviderExhausted(currentModel.provider, errorType); + + // Search all chains for one containing the current model + for (const [chainName, entries] of Object.entries(chains)) { + const currentIndex = entries.findIndex( + (e) => e.provider === currentModel.provider && e.model === currentModel.id, + ); + + if (currentIndex === -1) continue; + + // Try entries after the current one (already sorted by priority) + const result = await this._findAvailableInChain(chainName, entries, currentIndex + 1); + if (result) return result; + + // Wrap around: try entries before the current one + const wrapResult = await this._findAvailableInChain(chainName, entries, 0, currentIndex); + if (wrapResult) return wrapResult; + } + + return null; + } + + /** + * Check if a higher-priority provider in the chain has recovered. + * Called before each LLM request to restore the best available provider. + * + * @returns FallbackResult if a better provider is available, null if current is best + */ + async checkForRestoration(currentModel: Model): Promise { + const { enabled, chains } = this.settingsManager.getFallbackSettings(); + if (!enabled) return null; + + for (const [chainName, entries] of Object.entries(chains)) { + const currentIndex = entries.findIndex( + (e) => e.provider === currentModel.provider && e.model === currentModel.id, + ); + + if (currentIndex === -1) continue; + + // Only check entries with higher priority (lower index = higher priority) + if (currentIndex === 0) continue; // Already at highest priority + + const result = await this._findAvailableInChain(chainName, entries, 0, currentIndex); + if (result) { + return { + ...result, + reason: `${result.model.provider}/${result.model.id} recovered, restoring from fallback`, + }; + } + } + + return null; + } + + /** + * Get the best available model from a named chain. + * Useful for initial model selection. + */ + async getBestAvailable(chainName: string): Promise { + const { enabled, chains } = this.settingsManager.getFallbackSettings(); + if (!enabled) return null; + + const entries = chains[chainName]; + if (!entries || entries.length === 0) return null; + + return this._findAvailableInChain(chainName, entries, 0); + } + + /** + * Find the chain(s) a model belongs to. + */ + findChainsForModel(provider: string, modelId: string): string[] { + const { chains } = this.settingsManager.getFallbackSettings(); + const result: string[] = []; + + for (const [chainName, entries] of Object.entries(chains)) { + if (entries.some((e) => e.provider === provider && e.model === modelId)) { + result.push(chainName); + } + } + + return result; + } + + /** + * Search a chain for the first available entry starting from startIndex. + */ + private async _findAvailableInChain( + chainName: string, + entries: FallbackChainEntry[], + startIndex: number, + endIndex?: number, + ): Promise { + const end = endIndex ?? entries.length; + + for (let i = startIndex; i < end; i++) { + const entry = entries[i]; + + // Check provider-level backoff + if (!this.authStorage.isProviderAvailable(entry.provider)) { + continue; + } + + // Check if model exists in registry + const model = this.modelRegistry.find(entry.provider, entry.model); + if (!model) continue; + + // Check if API key is available + const hasAuth = this.authStorage.hasAuth(entry.provider); + if (!hasAuth) continue; + + return { + model, + chainName, + reason: `falling back to ${entry.provider}/${entry.model}`, + }; + } + + return null; + } +} diff --git a/packages/pi-coding-agent/src/core/index.ts b/packages/pi-coding-agent/src/core/index.ts index e84191d79..10c6f1753 100644 --- a/packages/pi-coding-agent/src/core/index.ts +++ b/packages/pi-coding-agent/src/core/index.ts @@ -12,6 +12,7 @@ export { type SessionStats, } from "./agent-session.js"; export { type BashExecutorOptions, type BashResult, executeBash, executeBashWithOperations } from "./bash-executor.js"; +export { FallbackResolver, type FallbackResult } from "./fallback-resolver.js"; export type { CompactionResult } from "./compaction/index.js"; export { createEventBus, type EventBus, type EventBusController } from "./event-bus.js"; diff --git a/packages/pi-coding-agent/src/core/settings-manager.ts b/packages/pi-coding-agent/src/core/settings-manager.ts index 7f53f0791..ce1f7bbd7 100644 --- a/packages/pi-coding-agent/src/core/settings-manager.ts +++ b/packages/pi-coding-agent/src/core/settings-manager.ts @@ -68,6 +68,17 @@ export interface TaskIsolationSettings { merge?: "patch" | "branch"; // default: "patch" } +export interface FallbackChainEntry { + provider: string; + model: string; + priority: number; +} + +export interface FallbackSettings { + enabled?: boolean; // default: false + chains?: Record; // keyed by chain name +} + export type TransportSetting = Transport; /** @@ -122,6 +133,7 @@ export interface Settings { async?: AsyncSettings; bashInterceptor?: BashInterceptorSettings; taskIsolation?: TaskIsolationSettings; + fallback?: FallbackSettings; } /** Deep merge settings: project/overrides take precedence, nested objects merge recursively */ @@ -1010,4 +1022,58 @@ export class SettingsManager { getTaskIsolationMerge(): "patch" | "branch" { return this.settings.taskIsolation?.merge ?? "patch"; } + + getFallbackEnabled(): boolean { + return this.settings.fallback?.enabled ?? false; + } + + setFallbackEnabled(enabled: boolean): void { + if (!this.globalSettings.fallback) { + this.globalSettings.fallback = {}; + } + this.globalSettings.fallback.enabled = enabled; + this.markModified("fallback", "enabled"); + this.save(); + } + + getFallbackChains(): Record { + return this.settings.fallback?.chains ?? {}; + } + + getFallbackChain(name: string): FallbackChainEntry[] | undefined { + return this.settings.fallback?.chains?.[name]; + } + + setFallbackChain(name: string, entries: FallbackChainEntry[]): void { + if (!this.globalSettings.fallback) { + this.globalSettings.fallback = {}; + } + if (!this.globalSettings.fallback.chains) { + this.globalSettings.fallback.chains = {}; + } + // Sort by priority + this.globalSettings.fallback.chains[name] = [...entries].sort((a, b) => a.priority - b.priority); + this.markModified("fallback"); + this.save(); + } + + removeFallbackChain(name: string): boolean { + if (!this.globalSettings.fallback?.chains?.[name]) { + return false; + } + delete this.globalSettings.fallback.chains[name]; + if (Object.keys(this.globalSettings.fallback.chains).length === 0) { + delete this.globalSettings.fallback.chains; + } + this.markModified("fallback"); + this.save(); + return true; + } + + getFallbackSettings(): { enabled: boolean; chains: Record } { + return { + enabled: this.getFallbackEnabled(), + chains: this.getFallbackChains(), + }; + } } diff --git a/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts b/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts index 9a8770ad7..4d098f59c 100644 --- a/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts +++ b/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts @@ -2382,6 +2382,24 @@ export class InteractiveMode { this.ui.requestRender(); break; } + + case "fallback_provider_switch": { + this.showStatus(`Switched from ${event.from} → ${event.to} (${event.reason})`); + this.ui.requestRender(); + break; + } + + case "fallback_provider_restored": { + this.showStatus(`Restored to ${event.provider}`); + this.ui.requestRender(); + break; + } + + case "fallback_chain_exhausted": { + this.showError(event.reason); + this.ui.requestRender(); + break; + } } }