singularity-forge/packages/coding-agent/src/core/fallback-resolver.ts

// SF Provider Fallback Resolver
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>

/**
 * FallbackResolver - Fresh model reselection when rate/quota limits are hit.
 *
 * When a provider/model becomes unhealthy, this resolver picks a fresh model from
 * the current available registry rather than walking a preconfigured fallback chain.
 */

import type { Api, Model } from "@singularity-forge/ai";
import type { AuthStorage, UsageLimitErrorType } from "./auth-storage.js";
import type {
	BeforeModelSelectEvent,
	BeforeModelSelectResult,
} from "./extensions/types.js";
import type { ModelRegistry } from "./model-registry.js";
import type {
	FallbackChainEntry,
	SettingsManager,
} from "./settings-manager.js";

export interface FallbackResult {
	model: Model<Api>;
	chainName: string;
	reason: string;
}

type EmitBeforeModelSelect = (
	event: Omit<BeforeModelSelectEvent, "type">,
) => Promise<BeforeModelSelectResult | undefined>;

export class FallbackResolver {
	/** Current unit context set by autonomous dispatch before each unit runs. */
	private _unitContext: { unitType: string; unitId: string } | null = null;

	constructor(
		private settingsManager: SettingsManager,
		private authStorage: AuthStorage,
		private modelRegistry: ModelRegistry,
		/** Optional hook emitter — when provided, fires before_model_select so the
		 * learning system can influence which replacement model is chosen. */
		private emitBeforeModelSelect?: EmitBeforeModelSelect,
	) {}

	/**
	 * Set the active unit context so fallback outcome records use the correct
	 * unit_type/unit_id rather than a generic sentinel.
	 *
	 * Purpose: autonomous dispatch calls this before running each unit so that
	 * any mid-unit model failure is attributed to the right unit type (execute-task,
	 * complete-slice, plan-milestone, …) for accurate Bayesian weighting.
	 *
	 * Consumer: agent-session.ts setFallbackUnitContext, called from SF extension
	 * autonomous loop via pi.setFallbackUnitContext().
	 */
	setUnitContext(ctx: { unitType: string; unitId: string } | null): void {
		this._unitContext = ctx;
	}

	/**
	 * Find a fresh replacement for a model that just failed.
	 * Ignores fallback chains and reselects from the current available registry.
	 *
	 * @returns FallbackResult if a replacement is available, null otherwise
	 */
	async findFallback(
		currentModel: Model<Api>,
		errorType: UsageLimitErrorType,
	): Promise<FallbackResult | null> {
		const { enabled } = this.settingsManager.getFallbackSettings();
		if (!enabled) return null;

		// Mark the current provider as exhausted at the provider level.
		// Skip for quota_exhausted — quotas are typically per-model (e.g.
		// google-gemini-cli's Code Assist per-model limits), so other models
		// from the same provider may still be available.
		if (errorType !== "quota_exhausted") {
			this.authStorage.markProviderExhausted(currentModel.provider, errorType);
		}

		return this._findAnyAvailableFallback(currentModel, errorType);
	}

	/**
	 * Automatic restoration is disabled when replacement is always reselected
	 * from scratch instead of following a chain.
	 */
	async checkForRestoration(
		_currentModel: Model<Api>,
	): Promise<FallbackResult | null> {
		return null;
	}

	/**
	 * Get the best available model from a named chain.
	 * Useful for initial model selection.
	 */
	async getBestAvailable(chainName: string): Promise<FallbackResult | null> {
		const { enabled, chains } = this.settingsManager.getFallbackSettings();
		if (!enabled) return null;

		const entries = chains[chainName];
		if (!entries || entries.length === 0) return null;

		return this._findAvailableInChain(chainName, entries, 0);
	}

	/**
	 * Find the chain(s) a model belongs to.
	 */
	findChainsForModel(provider: string, modelId: string): string[] {
		const { chains } = this.settingsManager.getFallbackSettings();
		const result: string[] = [];

		for (const [chainName, entries] of Object.entries(chains)) {
			if (entries.some((e) => e.provider === provider && e.model === modelId)) {
				result.push(chainName);
			}
		}

		return result;
	}

	/**
	 * Search a chain for the first available entry starting from startIndex.
	 */
	private async _findAvailableInChain(
		chainName: string,
		entries: FallbackChainEntry[],
		startIndex: number,
		endIndex?: number,
	): Promise<FallbackResult | null> {
		const end = endIndex ?? entries.length;

		for (let i = startIndex; i < end; i++) {
			const entry = entries[i];

			// Check provider-level backoff
			if (!this.authStorage.isProviderAvailable(entry.provider)) {
				continue;
			}

			// Check if model exists in registry
			const model = this.modelRegistry.find(entry.provider, entry.model);
			if (!model) continue;

			// Check if provider is request-ready for fallback (authMode-aware)
			if (!this.modelRegistry.isProviderRequestReady(entry.provider)) continue;

			return {
				model,
				chainName,
				reason: `falling back to ${entry.provider}/${entry.model}`,
			};
		}

		return null;
	}

	/**
	 * Free-selection fallback when no chain contains the current model.
	 * Fires before_model_select hook so the learning system can rank candidates
	 * by outcome history and Bayesian benchmarks. Falls back to heuristic sort
	 * (reasoning match + context window) if the hook is unavailable or returns nothing.
	 */
	private async _findAnyAvailableFallback(
		currentModel: Model<Api>,
		errorType?: UsageLimitErrorType,
	): Promise<FallbackResult | null> {
		const allModels = this.modelRegistry.getAvailable();
		const candidates = allModels.filter((m) => {
			// Exclude same provider — credential rotation was already tried
			if (m.provider === currentModel.provider) return false;
			// Exclude exhausted providers
			if (!this.authStorage.isProviderAvailable(m.provider)) return false;
			// Exclude models without auth
			if (!this.modelRegistry.isProviderRequestReady(m.provider)) return false;
			return true;
		});

		if (candidates.length === 0) return null;

		// Fire before_model_select so the learning system can:
		// 1. Record the current model as failed (reason="fallback")
		// 2. Return the best outcome-weighted replacement
		if (this.emitBeforeModelSelect) {
			try {
				const unitType = this._unitContext?.unitType ?? "execute-task";
				const unitId =
					this._unitContext?.unitId ??
					`fallback:${currentModel.provider}/${currentModel.id}`;
				const result = await this.emitBeforeModelSelect({
					unitType,
					unitId,
					classification: {
						tier: "standard",
						reason: errorType ?? "unknown",
						downgraded: false,
					},
					eligibleModels: candidates.map((m) => `${m.provider}/${m.id}`),
					currentModelId: `${currentModel.provider}/${currentModel.id}`,
					reason: "fallback",
				});
				if (result?.modelId) {
					const slashIdx = result.modelId.indexOf("/");
					if (slashIdx > 0) {
						const provider = result.modelId.slice(0, slashIdx);
						const modelId = result.modelId.slice(slashIdx + 1);
						const preferred = candidates.find(
							(m) => m.provider === provider && m.id === modelId,
						);
						if (preferred) {
							return {
								model: preferred,
								chainName: "learned-fallback",
								reason: `learned routing selected ${result.modelId} (outcome-weighted)`,
							};
						}
					}
				}
			} catch {
				// Hook failure → fall through to heuristic sort
			}
		}

		// Heuristic: prefer reasoning capability match, then larger context window
		candidates.sort((a, b) => {
			const aReasoningMatch = a.reasoning === currentModel.reasoning ? 1 : 0;
			const bReasoningMatch = b.reasoning === currentModel.reasoning ? 1 : 0;
			if (aReasoningMatch !== bReasoningMatch) {
				return bReasoningMatch - aReasoningMatch;
			}
			return (b.contextWindow ?? 0) - (a.contextWindow ?? 0);
		});

		const chosen = candidates[0];
		return {
			model: chosen,
			chainName: "fresh-selection",
			reason: `reselected ${chosen.provider}/${chosen.id} from available models`,
		};
	}
}