singularity-forge/packages/pi-coding-agent/src/core/fallback-resolver.ts

// SF Provider Fallback Resolver
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>

/**
 * FallbackResolver - Fresh model reselection when rate/quota limits are hit.
 *
 * When a provider/model becomes unhealthy, this resolver picks a fresh model from
 * the current available registry rather than walking a preconfigured fallback chain.
 */

import type { Api, Model } from "@singularity-forge/pi-ai";
import type { AuthStorage, UsageLimitErrorType } from "./auth-storage.js";
import type { ModelRegistry } from "./model-registry.js";
import type {
	FallbackChainEntry,
	SettingsManager,
} from "./settings-manager.js";

export interface FallbackResult {
	model: Model<Api>;
	chainName: string;
	reason: string;
}

export class FallbackResolver {
	constructor(
		private settingsManager: SettingsManager,
		private authStorage: AuthStorage,
		private modelRegistry: ModelRegistry,
	) {}

	/**
	 * Find a fresh replacement for a model that just failed.
	 * Ignores fallback chains and reselects from the current available registry.
	 *
	 * @returns FallbackResult if a replacement is available, null otherwise
	 */
	async findFallback(
		currentModel: Model<Api>,
		errorType: UsageLimitErrorType,
	): Promise<FallbackResult | null> {
		const { enabled } = this.settingsManager.getFallbackSettings();
		if (!enabled) return null;

		// Mark the current provider as exhausted at the provider level.
		// Skip for quota_exhausted — quotas are typically per-model (e.g.
		// google-gemini-cli's Code Assist per-model limits), so other models
		// from the same provider may still be available.
		if (errorType !== "quota_exhausted") {
			this.authStorage.markProviderExhausted(currentModel.provider, errorType);
		}

		return this._findAnyAvailableFallback(currentModel);
	}

	/**
	 * Automatic restoration is disabled when replacement is always reselected
	 * from scratch instead of following a chain.
	 */
	async checkForRestoration(
		_currentModel: Model<Api>,
	): Promise<FallbackResult | null> {
		return null;
	}

	/**
	 * Get the best available model from a named chain.
	 * Useful for initial model selection.
	 */
	async getBestAvailable(chainName: string): Promise<FallbackResult | null> {
		const { enabled, chains } = this.settingsManager.getFallbackSettings();
		if (!enabled) return null;

		const entries = chains[chainName];
		if (!entries || entries.length === 0) return null;

		return this._findAvailableInChain(chainName, entries, 0);
	}

	/**
	 * Find the chain(s) a model belongs to.
	 */
	findChainsForModel(provider: string, modelId: string): string[] {
		const { chains } = this.settingsManager.getFallbackSettings();
		const result: string[] = [];

		for (const [chainName, entries] of Object.entries(chains)) {
			if (entries.some((e) => e.provider === provider && e.model === modelId)) {
				result.push(chainName);
			}
		}

		return result;
	}

	/**
	 * Search a chain for the first available entry starting from startIndex.
	 */
	private async _findAvailableInChain(
		chainName: string,
		entries: FallbackChainEntry[],
		startIndex: number,
		endIndex?: number,
	): Promise<FallbackResult | null> {
		const end = endIndex ?? entries.length;

		for (let i = startIndex; i < end; i++) {
			const entry = entries[i];

			// Check provider-level backoff
			if (!this.authStorage.isProviderAvailable(entry.provider)) {
				continue;
			}

			// Check if model exists in registry
			const model = this.modelRegistry.find(entry.provider, entry.model);
			if (!model) continue;

			// Check if provider is request-ready for fallback (authMode-aware)
			if (!this.modelRegistry.isProviderRequestReady(entry.provider)) continue;

			return {
				model,
				chainName,
				reason: `falling back to ${entry.provider}/${entry.model}`,
			};
		}

		return null;
	}

	/**
	 * Free-selection fallback when no chain contains the current model.
	 * Picks any available model from the registry with a different provider.
	 * Prefers models with reasoning capability if the current model has it.
	 */
	private _findAnyAvailableFallback(
		currentModel: Model<Api>,
	): FallbackResult | null {
		const allModels = this.modelRegistry.getAvailable();
		const candidates = allModels.filter((m) => {
			// Exclude same provider — credential rotation was already tried
			if (m.provider === currentModel.provider) return false;
			// Exclude exhausted providers
			if (!this.authStorage.isProviderAvailable(m.provider)) return false;
			// Exclude models without auth
			if (!this.modelRegistry.isProviderRequestReady(m.provider)) return false;
			return true;
		});

		if (candidates.length === 0) return null;

		// Sort: prefer models with matching reasoning capability, then by context window
		candidates.sort((a, b) => {
			const aReasoningMatch = a.reasoning === currentModel.reasoning ? 1 : 0;
			const bReasoningMatch = b.reasoning === currentModel.reasoning ? 1 : 0;
			if (aReasoningMatch !== bReasoningMatch) {
				return bReasoningMatch - aReasoningMatch;
			}
			return (b.contextWindow ?? 0) - (a.contextWindow ?? 0);
		});

		const chosen = candidates[0];
		return {
			model: chosen,
			chainName: "fresh-selection",
			reason: `reselected ${chosen.provider}/${chosen.id} from available models`,
		};
	}
}
chore: sync workspace state after rebrand - Rebrand commits already in history (gsd → forge) - Sync pre-existing doc, docker, and CI config updates - All rebrand artifacts verified in place: * Native crates: forge-engine, forge-ast, forge-grep * Log prefixes: [forge] across 22+ files * Binary: ~/bin/sf-run * Workspace scopes: @sf-run/, @singularity-forge/ * Nix flake: Rust toolchain ready System ready for: nix develop && bun run build:native Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-04-15 14:54:20 +02:00			`// SF Provider Fallback Resolver`
feat: add cross-provider fallback when rate/quota limits are hit (#125) When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-14 15:45:44 -05:00			`// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>`

			`/**`
feat: record retrieval evidence across context tools 2026-05-07 18:17:41 +02:00			`* FallbackResolver - Fresh model reselection when rate/quota limits are hit.`
feat: add cross-provider fallback when rate/quota limits are hit (#125) When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-14 15:45:44 -05:00			`*`
feat: record retrieval evidence across context tools 2026-05-07 18:17:41 +02:00			`* When a provider/model becomes unhealthy, this resolver picks a fresh model from`
			`* the current available registry rather than walking a preconfigured fallback chain.`
feat: add cross-provider fallback when rate/quota limits are hit (#125) When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-14 15:45:44 -05:00			`*/`

Rename @sf-run/* → @singularity-forge/* package scope - All 373 source files updated - Package.json scopes in all workspace packages - Loader workspace symlink dir updated - RpcClient import unified from pi-coding-agent (fixes type mismatch) - Scripts, configs, flake.nix updated - Workspace symlinks rebuilt 2026-04-15 22:56:33 +02:00			`import type { Api, Model } from "@singularity-forge/pi-ai";`
feat: add cross-provider fallback when rate/quota limits are hit (#125) When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-14 15:45:44 -05:00			`import type { AuthStorage, UsageLimitErrorType } from "./auth-storage.js";`
			`import type { ModelRegistry } from "./model-registry.js";`
style: format repository with biome 2026-05-05 14:31:16 +02:00			`import type {`
			`FallbackChainEntry,`
			`SettingsManager,`
			`} from "./settings-manager.js";`
feat: add cross-provider fallback when rate/quota limits are hit (#125) When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-14 15:45:44 -05:00
			`export interface FallbackResult {`
			`model: Model<Api>;`
			`chainName: string;`
			`reason: string;`
			`}`

			`export class FallbackResolver {`
			`constructor(`
			`private settingsManager: SettingsManager,`
			`private authStorage: AuthStorage,`
			`private modelRegistry: ModelRegistry,`
			`) {}`

			`/**`
feat: record retrieval evidence across context tools 2026-05-07 18:17:41 +02:00			`* Find a fresh replacement for a model that just failed.`
			`* Ignores fallback chains and reselects from the current available registry.`
feat: add cross-provider fallback when rate/quota limits are hit (#125) When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-14 15:45:44 -05:00			`*`
feat: record retrieval evidence across context tools 2026-05-07 18:17:41 +02:00			`* @returns FallbackResult if a replacement is available, null otherwise`
feat: add cross-provider fallback when rate/quota limits are hit (#125) When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-14 15:45:44 -05:00			`*/`
			`async findFallback(`
			`currentModel: Model<Api>,`
			`errorType: UsageLimitErrorType,`
			`): Promise<FallbackResult \| null> {`
feat: record retrieval evidence across context tools 2026-05-07 18:17:41 +02:00			`const { enabled } = this.settingsManager.getFallbackSettings();`
feat: add cross-provider fallback when rate/quota limits are hit (#125) When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-14 15:45:44 -05:00			`if (!enabled) return null;`

sf snapshot: uncommitted changes after 120m inactivity 2026-05-04 14:46:50 +02:00			`// Mark the current provider as exhausted at the provider level.`
			`// Skip for quota_exhausted — quotas are typically per-model (e.g.`
			`// google-gemini-cli's Code Assist per-model limits), so other models`
			`// from the same provider may still be available.`
			`if (errorType !== "quota_exhausted") {`
			`this.authStorage.markProviderExhausted(currentModel.provider, errorType);`
			`}`
feat: add cross-provider fallback when rate/quota limits are hit (#125) When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-14 15:45:44 -05:00
sf snapshot: pre-dispatch, uncommitted changes after 83m inactivity 2026-05-04 09:47:30 +02:00			`return this._findAnyAvailableFallback(currentModel);`
feat: add cross-provider fallback when rate/quota limits are hit (#125) When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-14 15:45:44 -05:00			`}`

			`/**`
feat: record retrieval evidence across context tools 2026-05-07 18:17:41 +02:00			`* Automatic restoration is disabled when replacement is always reselected`
			`* from scratch instead of following a chain.`
feat: add cross-provider fallback when rate/quota limits are hit (#125) When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-14 15:45:44 -05:00			`*/`
style: format repository with biome 2026-05-05 14:31:16 +02:00			`async checkForRestoration(`
feat: record retrieval evidence across context tools 2026-05-07 18:17:41 +02:00			`_currentModel: Model<Api>,`
style: format repository with biome 2026-05-05 14:31:16 +02:00			`): Promise<FallbackResult \| null> {`
feat: add cross-provider fallback when rate/quota limits are hit (#125) When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-14 15:45:44 -05:00			`return null;`
			`}`

			`/**`
			`* Get the best available model from a named chain.`
			`* Useful for initial model selection.`
			`*/`
			`async getBestAvailable(chainName: string): Promise<FallbackResult \| null> {`
			`const { enabled, chains } = this.settingsManager.getFallbackSettings();`
			`if (!enabled) return null;`

			`const entries = chains[chainName];`
			`if (!entries \|\| entries.length === 0) return null;`

			`return this._findAvailableInChain(chainName, entries, 0);`
			`}`

			`/**`
			`* Find the chain(s) a model belongs to.`
			`*/`
			`findChainsForModel(provider: string, modelId: string): string[] {`
			`const { chains } = this.settingsManager.getFallbackSettings();`
			`const result: string[] = [];`

			`for (const [chainName, entries] of Object.entries(chains)) {`
			`if (entries.some((e) => e.provider === provider && e.model === modelId)) {`
			`result.push(chainName);`
			`}`
			`}`

			`return result;`
			`}`

			`/**`
			`* Search a chain for the first available entry starting from startIndex.`
			`*/`
			`private async _findAvailableInChain(`
			`chainName: string,`
			`entries: FallbackChainEntry[],`
			`startIndex: number,`
			`endIndex?: number,`
			`): Promise<FallbackResult \| null> {`
			`const end = endIndex ?? entries.length;`

			`for (let i = startIndex; i < end; i++) {`
			`const entry = entries[i];`

			`// Check provider-level backoff`
			`if (!this.authStorage.isProviderAvailable(entry.provider)) {`
			`continue;`
			`}`

			`// Check if model exists in registry`
			`const model = this.modelRegistry.find(entry.provider, entry.model);`
			`if (!model) continue;`

feat(core): support for 'non-api-key' provider extensions like Claude Code CLI (#2382) * feat(core): add generic native post-install hooks for package install * feat(core): add before/after install/remove lifecycle hooks * refactor(core): remove postInstall alias from lifecycle hook fallback * feat(core): complete authMode support for keyless providers The initial authMode implementation fixed model-registry, sdk, and fallback-resolver but missed agent-session.ts (6 callsites) and compaction-orchestrator.ts (2 callsites) that block externalCli providers at runtime. Architecture: separate readiness gating from credential retrieval. - isProviderRequestReady(): authMode-aware readiness check - getApiKey()/getApiKeyForProvider(): return undefined for externalCli/none providers instead of triggering auth errors - All 8 callsites in agent-session and compaction-orchestrator now gate on readiness, not key presence - Downstream signatures (compaction, branch-summarization) accept apiKey: string \| undefined - Replaced hardcoded ollama exception in discoverModels with isProviderRequestReady Zero behavioral change for classic apiKey/oauth providers. * feat(core): add isReady callback for provider readiness verification Extensions can now provide an isReady() callback when registering any provider. isProviderRequestReady() calls it before default auth checks, allowing providers to verify actual reachability (CLI authenticated, API key valid, service online) rather than relying solely on credential presence. * test(core): expand authMode test coverage Cover all four auth modes (apiKey, oauth, externalCli, none), isReady callback behavior, getProviderAuthMode defaults, isProviderRequestReady for each mode, getAvailable filtering, and getApiKey early-return for keyless providers. * chore: remove provider-api-bridge files from this branch These files implement GSD core → provider-api wiring (deps + tool registry) and belong in a separate PR. Reverts register-extension.ts to upstream state. 2026-03-24 21:50:12 +00:00			`// Check if provider is request-ready for fallback (authMode-aware)`
			`if (!this.modelRegistry.isProviderRequestReady(entry.provider)) continue;`
feat: add cross-provider fallback when rate/quota limits are hit (#125) When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-14 15:45:44 -05:00
			`return {`
			`model,`
			`chainName,`
			reason: `falling back to ${entry.provider}/${entry.model}`,
			`};`
			`}`

			`return null;`
			`}`
sf snapshot: pre-dispatch, uncommitted changes after 83m inactivity 2026-05-04 09:47:30 +02:00
			`/**`
			`* Free-selection fallback when no chain contains the current model.`
			`* Picks any available model from the registry with a different provider.`
			`* Prefers models with reasoning capability if the current model has it.`
			`*/`
			`private _findAnyAvailableFallback(`
			`currentModel: Model<Api>,`
			`): FallbackResult \| null {`
			`const allModels = this.modelRegistry.getAvailable();`
			`const candidates = allModels.filter((m) => {`
			`// Exclude same provider — credential rotation was already tried`
			`if (m.provider === currentModel.provider) return false;`
			`// Exclude exhausted providers`
			`if (!this.authStorage.isProviderAvailable(m.provider)) return false;`
			`// Exclude models without auth`
			`if (!this.modelRegistry.isProviderRequestReady(m.provider)) return false;`
			`return true;`
			`});`

			`if (candidates.length === 0) return null;`

			`// Sort: prefer models with matching reasoning capability, then by context window`
			`candidates.sort((a, b) => {`
			`const aReasoningMatch = a.reasoning === currentModel.reasoning ? 1 : 0;`
			`const bReasoningMatch = b.reasoning === currentModel.reasoning ? 1 : 0;`
			`if (aReasoningMatch !== bReasoningMatch) {`
			`return bReasoningMatch - aReasoningMatch;`
			`}`
			`return (b.contextWindow ?? 0) - (a.contextWindow ?? 0);`
			`});`

			`const chosen = candidates[0];`
			`return {`
			`model: chosen,`
feat: record retrieval evidence across context tools 2026-05-07 18:17:41 +02:00			`chainName: "fresh-selection",`
			reason: `reselected ${chosen.provider}/${chosen.id} from available models`,
sf snapshot: pre-dispatch, uncommitted changes after 83m inactivity 2026-05-04 09:47:30 +02:00			`};`
			`}`
feat: add cross-provider fallback when rate/quota limits are hit (#125) When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-14 15:45:44 -05:00			`}`