singularity-forge/src/resources/extensions/sf/model-cost-table.js

// SF Extension — Model Cost Table
// Static cost reference for known models, used by the dynamic router
// for cross-provider cost comparison.
//
// Costs are approximate per-1K-token rates in USD (input tokens).
// Updated with SF releases. Users can override via preferences.
/**
 * Bundled cost table for known models.
 * Updated periodically with SF releases.
 */
export const BUNDLED_COST_TABLE = [
	// Anthropic
	{
		id: "claude-opus-4-6",
		inputPer1k: 0.015,
		outputPer1k: 0.075,
		updatedAt: "2025-03-15",
	},
	{
		id: "claude-sonnet-4-6",
		inputPer1k: 0.003,
		outputPer1k: 0.015,
		updatedAt: "2025-03-15",
	},
	{
		id: "claude-haiku-4-5",
		inputPer1k: 0.0008,
		outputPer1k: 0.004,
		updatedAt: "2025-03-15",
	},
	{
		id: "claude-sonnet-4-5-20250514",
		inputPer1k: 0.003,
		outputPer1k: 0.015,
		updatedAt: "2025-03-15",
	},
	{
		id: "claude-3-5-sonnet-latest",
		inputPer1k: 0.003,
		outputPer1k: 0.015,
		updatedAt: "2025-03-15",
	},
	{
		id: "claude-3-5-haiku-latest",
		inputPer1k: 0.0008,
		outputPer1k: 0.004,
		updatedAt: "2025-03-15",
	},
	{
		id: "claude-3-opus-latest",
		inputPer1k: 0.015,
		outputPer1k: 0.075,
		updatedAt: "2025-03-15",
	},
	// OpenAI
	{
		id: "gpt-4o",
		inputPer1k: 0.0025,
		outputPer1k: 0.01,
		updatedAt: "2025-03-15",
	},
	{
		id: "gpt-4o-mini",
		inputPer1k: 0.00015,
		outputPer1k: 0.0006,
		updatedAt: "2025-03-15",
	},
	{
		id: "gpt-4.1",
		inputPer1k: 0.002,
		outputPer1k: 0.008,
		updatedAt: "2026-03-29",
	},
	{
		id: "gpt-4.1-mini",
		inputPer1k: 0.0004,
		outputPer1k: 0.0016,
		updatedAt: "2026-03-29",
	},
	{
		id: "gpt-4.1-nano",
		inputPer1k: 0.0001,
		outputPer1k: 0.0004,
		updatedAt: "2026-03-29",
	},
	{ id: "gpt-5", inputPer1k: 0.01, outputPer1k: 0.04, updatedAt: "2026-03-29" },
	{
		id: "gpt-5-mini",
		inputPer1k: 0.0003,
		outputPer1k: 0.0012,
		updatedAt: "2026-03-29",
	},
	{
		id: "gpt-5-nano",
		inputPer1k: 0.0001,
		outputPer1k: 0.0004,
		updatedAt: "2026-03-29",
	},
	{
		id: "gpt-5-pro",
		inputPer1k: 0.015,
		outputPer1k: 0.06,
		updatedAt: "2026-03-29",
	},
	{ id: "o1", inputPer1k: 0.015, outputPer1k: 0.06, updatedAt: "2025-03-15" },
	{ id: "o3", inputPer1k: 0.015, outputPer1k: 0.06, updatedAt: "2025-03-15" },
	{
		id: "o4-mini",
		inputPer1k: 0.005,
		outputPer1k: 0.02,
		updatedAt: "2026-03-29",
	},
	{
		id: "o4-mini-deep-research",
		inputPer1k: 0.005,
		outputPer1k: 0.02,
		updatedAt: "2026-03-29",
	},
	{
		id: "gpt-4-turbo",
		inputPer1k: 0.01,
		outputPer1k: 0.03,
		updatedAt: "2025-03-15",
	},
	// OpenAI Codex
	{
		id: "gpt-5.1",
		inputPer1k: 0.005,
		outputPer1k: 0.02,
		updatedAt: "2026-03-29",
	},
	{
		id: "gpt-5.1-codex-max",
		inputPer1k: 0.003,
		outputPer1k: 0.012,
		updatedAt: "2026-03-29",
	},
	{
		id: "gpt-5.1-codex-mini",
		inputPer1k: 0.0003,
		outputPer1k: 0.0012,
		updatedAt: "2026-03-29",
	},
	{
		id: "gpt-5.2",
		inputPer1k: 0.005,
		outputPer1k: 0.02,
		updatedAt: "2026-03-29",
	},
	{
		id: "gpt-5.2-codex",
		inputPer1k: 0.005,
		outputPer1k: 0.02,
		updatedAt: "2026-03-29",
	},
	{
		id: "gpt-5.3-codex",
		inputPer1k: 0.005,
		outputPer1k: 0.02,
		updatedAt: "2026-03-29",
	},
	{
		id: "gpt-5.3-codex-spark",
		inputPer1k: 0.0003,
		outputPer1k: 0.0012,
		updatedAt: "2026-03-29",
	},
	{
		id: "gpt-5.4",
		inputPer1k: 0.005,
		outputPer1k: 0.02,
		updatedAt: "2026-03-29",
	},
	{
		id: "gpt-5.4-mini",
		inputPer1k: 0.00075,
		outputPer1k: 0.0045,
		updatedAt: "2026-04-18",
	},
	// GPT-5.5 API list price, also used for live Codex OAuth routing.
	// Source: https://openai.com/api/pricing/
	{
		id: "gpt-5.5",
		inputPer1k: 0.005,
		outputPer1k: 0.03,
		updatedAt: "2026-04-23",
	},
	// Google
	{
		id: "gemini-2.0-flash",
		inputPer1k: 0.0001,
		outputPer1k: 0.0004,
		updatedAt: "2025-03-15",
	},
	{
		id: "gemini-flash-2.0",
		inputPer1k: 0.0001,
		outputPer1k: 0.0004,
		updatedAt: "2025-03-15",
	},
	{
		id: "gemini-2.5-pro",
		inputPer1k: 0.00125,
		outputPer1k: 0.005,
		updatedAt: "2025-03-15",
	},
	// Mistral
	{
		id: "codestral-latest",
		inputPer1k: 0.0003,
		outputPer1k: 0.0009,
		updatedAt: "2026-04-29",
	},
	{
		id: "devstral-2512",
		inputPer1k: 0.0004,
		outputPer1k: 0.002,
		updatedAt: "2026-04-29",
	},
	{
		id: "devstral-medium-latest",
		inputPer1k: 0.0004,
		outputPer1k: 0.002,
		updatedAt: "2026-04-29",
	},
	{
		id: "devstral-small-2507",
		inputPer1k: 0.0001,
		outputPer1k: 0.0003,
		updatedAt: "2026-04-29",
	},
	{
		id: "magistral-medium-latest",
		inputPer1k: 0.002,
		outputPer1k: 0.005,
		updatedAt: "2026-04-29",
	},
	{
		id: "magistral-small",
		inputPer1k: 0.0005,
		outputPer1k: 0.0015,
		updatedAt: "2026-04-29",
	},
	{
		id: "ministral-3b-latest",
		inputPer1k: 0.00004,
		outputPer1k: 0.00004,
		updatedAt: "2026-04-29",
	},
	{
		id: "ministral-8b-latest",
		inputPer1k: 0.0001,
		outputPer1k: 0.0001,
		updatedAt: "2026-04-29",
	},
	{
		id: "mistral-large-2512",
		inputPer1k: 0.0005,
		outputPer1k: 0.0015,
		updatedAt: "2026-04-29",
	},
	{
		id: "mistral-large-latest",
		inputPer1k: 0.0005,
		outputPer1k: 0.0015,
		updatedAt: "2026-04-29",
	},
	{
		id: "mistral-medium-latest",
		inputPer1k: 0.0004,
		outputPer1k: 0.002,
		updatedAt: "2026-04-29",
	},
	{
		id: "mistral-nemo",
		inputPer1k: 0.00015,
		outputPer1k: 0.00015,
		updatedAt: "2026-04-29",
	},
	{
		id: "mistral-small-2603",
		inputPer1k: 0.00015,
		outputPer1k: 0.0006,
		updatedAt: "2026-04-29",
	},
	{
		id: "mistral-small-latest",
		inputPer1k: 0.00015,
		outputPer1k: 0.0006,
		updatedAt: "2026-04-29",
	},
	{
		id: "pixtral-12b",
		inputPer1k: 0.00015,
		outputPer1k: 0.00015,
		updatedAt: "2026-04-29",
	},
	{
		id: "pixtral-large-latest",
		inputPer1k: 0.002,
		outputPer1k: 0.006,
		updatedAt: "2026-04-29",
	},
	// DeepSeek
	{
		id: "deepseek-chat",
		inputPer1k: 0.00014,
		outputPer1k: 0.00028,
		updatedAt: "2025-03-15",
	},
	// Gemini preview / future models
	{
		id: "gemini-3.1-pro-preview",
		inputPer1k: 0.00125,
		outputPer1k: 0.005,
		updatedAt: "2026-05-01",
	},
	{
		id: "gemini-3.1-flash-lite-preview",
		inputPer1k: 0.0001,
		outputPer1k: 0.0004,
		updatedAt: "2026-05-01",
	},
	{
		id: "gemini-3-pro-preview",
		inputPer1k: 0.00125,
		outputPer1k: 0.005,
		updatedAt: "2026-05-01",
	},
	{
		id: "gemini-3-flash-preview",
		inputPer1k: 0.0001,
		outputPer1k: 0.0004,
		updatedAt: "2026-05-01",
	},
	{
		id: "gemini-2.5-flash",
		inputPer1k: 0.0001,
		outputPer1k: 0.0004,
		updatedAt: "2026-05-01",
	},
	{
		id: "gemini-2.5-flash-lite",
		inputPer1k: 0.00005,
		outputPer1k: 0.0002,
		updatedAt: "2026-05-01",
	},
	// GLM (ZhipuAI)
	{
		id: "glm-4.7",
		inputPer1k: 0.0006,
		outputPer1k: 0.0024,
		updatedAt: "2026-05-01",
	},
	{
		id: "glm-4.7-flash",
		inputPer1k: 0,
		outputPer1k: 0,
		updatedAt: "2026-05-01",
	},
	{
		id: "glm-4.7-flashx",
		inputPer1k: 0.00007,
		outputPer1k: 0.00028,
		updatedAt: "2026-05-01",
	},
	{
		id: "glm-5",
		inputPer1k: 0.001,
		outputPer1k: 0.004,
		updatedAt: "2026-05-01",
	},
	{
		id: "glm-5-turbo",
		inputPer1k: 0.0012,
		outputPer1k: 0.0048,
		updatedAt: "2026-05-01",
	},
	{
		id: "glm-5.1",
		inputPer1k: 0.0014,
		outputPer1k: 0.0056,
		updatedAt: "2026-05-01",
	},
	{
		id: "glm-5v-turbo",
		inputPer1k: 0.0012,
		outputPer1k: 0.0048,
		updatedAt: "2026-05-01",
	},
	// Qwen (Alibaba)
	{
		id: "qwen3-coder:480b",
		inputPer1k: 0.0004,
		outputPer1k: 0.0016,
		updatedAt: "2026-05-01",
	},
	{
		id: "qwen3-coder-next",
		inputPer1k: 0.0004,
		outputPer1k: 0.0016,
		updatedAt: "2026-05-01",
	},
	{
		id: "qwen3-next:80b",
		inputPer1k: 0.0002,
		outputPer1k: 0.0008,
		updatedAt: "2026-05-01",
	},
	// Kimi (Moonshot)
	{
		id: "kimi-k2.6",
		inputPer1k: 0.0006,
		outputPer1k: 0.0024,
		updatedAt: "2026-05-01",
	},
	{
		id: "kimi-for-coding",
		inputPer1k: 0.0006,
		outputPer1k: 0.0024,
		updatedAt: "2026-05-01",
	},
	{
		id: "kimi-k2-thinking",
		inputPer1k: 0.001,
		outputPer1k: 0.004,
		updatedAt: "2026-05-01",
	},
	// MiniMax
	{
		id: "MiniMax-M2.7",
		inputPer1k: 0.0006,
		outputPer1k: 0.0024,
		updatedAt: "2026-05-01",
	},
	{
		id: "MiniMax-M2.7-highspeed",
		inputPer1k: 0.0006,
		outputPer1k: 0.0024,
		updatedAt: "2026-05-01",
	},
	// Mistral versioned variants
	{
		id: "devstral-medium-2507",
		inputPer1k: 0.0004,
		outputPer1k: 0.002,
		updatedAt: "2026-05-01",
	},
	{
		id: "devstral-small-2505",
		inputPer1k: 0.0001,
		outputPer1k: 0.0003,
		updatedAt: "2026-05-01",
	},
	{
		id: "labs-devstral-small-2512",
		inputPer1k: 0.0001,
		outputPer1k: 0.0003,
		updatedAt: "2026-05-01",
	},
	{
		id: "mistral-large-2411",
		inputPer1k: 0.002,
		outputPer1k: 0.006,
		updatedAt: "2026-05-01",
	},
	{
		id: "mistral-medium-2505",
		inputPer1k: 0.0004,
		outputPer1k: 0.002,
		updatedAt: "2026-05-01",
	},
	{
		id: "mistral-medium-2508",
		inputPer1k: 0.0004,
		outputPer1k: 0.002,
		updatedAt: "2026-05-01",
	},
	{
		id: "mistral-small-2506",
		inputPer1k: 0.0001,
		outputPer1k: 0.0006,
		updatedAt: "2026-05-01",
	},
	{
		id: "open-mistral-7b",
		inputPer1k: 0.00025,
		outputPer1k: 0.00025,
		updatedAt: "2026-05-01",
	},
	{
		id: "open-mixtral-8x22b",
		inputPer1k: 0.002,
		outputPer1k: 0.006,
		updatedAt: "2026-05-01",
	},
	{
		id: "open-mixtral-8x7b",
		inputPer1k: 0.0007,
		outputPer1k: 0.0007,
		updatedAt: "2026-05-01",
	},
];
/**
 * Lookup cost for a model ID. Returns undefined if not found.
 */
export function lookupModelCost(modelId) {
	const bareId = modelId.includes("/") ? modelId.split("/").pop() : modelId;
	return (
		BUNDLED_COST_TABLE.find((e) => e.id === bareId) ??
		BUNDLED_COST_TABLE.find(
			(e) => bareId.includes(e.id) || e.id.includes(bareId),
		)
	);
}
/**
 * Compare two models by input cost. Returns negative if a is cheaper.
 */
export function compareModelCost(modelIdA, modelIdB) {
	const costA = lookupModelCost(modelIdA)?.inputPer1k ?? 999;
	const costB = lookupModelCost(modelIdB)?.inputPer1k ?? 999;
	return costA - costB;
}
/**
 * Return the effective per-token cost (in USD per token, not per 1K) for a
 * given provider/model pair, taking subscription amortization into account.
 *
 * Resolution order:
 *  1. If the provider matches the configured subscription provider and
 *     `monthly_cost_usd` is set, compute:
 *       amortized = monthly_cost_usd / max(tokens_used_this_month, 1_000_000)
 *     (The denominator floor of 1 M tokens prevents unrealistically high cost
 *     estimates early in the month while keeping the number meaningful.)
 *  2. Otherwise fall back to the static BUNDLED_COST_TABLE input rate / 1000.
 *  3. If the model is not in the table either, return 0 (unknown / free).
 *
 * The returned value is in USD per single token (not per 1K), so callers can
 * multiply directly by token counts.
 */
export function getEffectiveTokenCost(provider, modelId, subscription) {
	const providerKey = provider.toLowerCase();
	const subProvider = subscription?.provider?.toLowerCase();
	if (
		subProvider &&
		providerKey === subProvider &&
		subscription?.monthly_cost_usd != null &&
		subscription.monthly_cost_usd > 0
	) {
		// Amortize monthly cost over tokens consumed this month.
		// Use a floor of 1_000_000 tokens so cost is non-trivially large early
		// in the month (prevents showing $100/token in week 1).
		const tokensUsed = Math.max(
			subscription.tokens_used_this_month ?? 0,
			1_000_000,
		);
		const amortized = subscription.monthly_cost_usd / tokensUsed;
		return {
			inputPerToken: amortized,
			outputPerToken: amortized, // treat input/output symmetrically for subscriptions
			isSubscription: true,
		};
	}
	const entry = lookupModelCost(modelId);
	if (!entry) {
		return { inputPerToken: 0, outputPerToken: 0, isSubscription: false };
	}
	return {
		inputPerToken: entry.inputPer1k / 1000,
		outputPerToken: entry.outputPer1k / 1000,
		isSubscription: false,
	};
}
/**
 * Estimate total USD cost for a completed request given token counts.
 * Uses getEffectiveTokenCost internally so subscription amortization applies.
 */
export function estimateRequestCost(
	provider,
	modelId,
	inputTokens,
	outputTokens,
	subscription,
) {
	const { inputPerToken, outputPerToken } = getEffectiveTokenCost(
		provider,
		modelId,
		subscription,
	);
	return inputTokens * inputPerToken + outputTokens * outputPerToken;
}