diff --git a/src/resources/extensions/sf/benchmark-selector.js b/src/resources/extensions/sf/benchmark-selector.js index 99c05ae85..754fc09af 100644 --- a/src/resources/extensions/sf/benchmark-selector.js +++ b/src/resources/extensions/sf/benchmark-selector.js @@ -25,6 +25,61 @@ import { join } from "node:path"; import { tierOrdinal } from "./complexity-classifier.js"; import { lookup } from "./model-registry.js"; import { getModelTier } from "./model-router.js"; +import { getProviderQuotaState } from "./provider-quota-cache.js"; + +// ─── Quota-aware routing (phase 2) ──────────────────────────────────────────── +// Bias dispatch toward under-used subscriptions ("spend the subs") and away +// from near-exhausted ones (avoid 429 walls). Multiplier is applied to the +// benchmark score before sort, so it only re-orders within the existing +// score → cost → coverage → preference ladder. Unknown quota state means +// neutral 1.0 — never punish a provider for having no public quota API. +// +// Curve, keyed on max(usedFraction) across all windows for the provider: +// < 0.20 → 1.15 (boost: lots of headroom, prefer to use it) +// < 0.50 → 1.00 (neutral) +// < 0.70 → 0.92 (slight steer away) +// < 0.90 → 0.50 (strong de-prioritize) +// < 0.95 → 0.20 (near-exhaustion) +// ≥ 0.95 → 0.05 (effectively skip; only used if no alternatives) +// +// The "max across windows" choice means kimi-coding's 5h-rolling window +// (typically tighter) binds the decision even when the weekly is fresh. +const QUOTA_BOOST = 1.15; +const QUOTA_NEUTRAL = 1.0; +const QUOTA_MILD = 0.92; +const QUOTA_STRONG_DEMOTE = 0.5; +const QUOTA_NEAR_EXHAUSTION = 0.2; +const QUOTA_SKIP = 0.05; + +export function quotaHeadroomMultiplier(providerKey, getQuotaState) { + const resolver = getQuotaState ?? getProviderQuotaState; + let state; + try { + state = resolver(providerKey); + } catch { + return QUOTA_NEUTRAL; + } + if ( + !state || + !state.ok || + !Array.isArray(state.windows) || + state.windows.length === 0 + ) { + return QUOTA_NEUTRAL; + } + let maxUsed = 0; + for (const w of state.windows) { + if (typeof w?.usedFraction === "number" && Number.isFinite(w.usedFraction)) { + if (w.usedFraction > maxUsed) maxUsed = w.usedFraction; + } + } + if (maxUsed < 0.2) return QUOTA_BOOST; + if (maxUsed < 0.5) return QUOTA_NEUTRAL; + if (maxUsed < 0.7) return QUOTA_MILD; + if (maxUsed < 0.9) return QUOTA_STRONG_DEMOTE; + if (maxUsed < 0.95) return QUOTA_NEAR_EXHAUSTION; + return QUOTA_SKIP; +} // ─── Benchmark File Loader ─────────────────────────────────────────────────── let _benchmarksCache = null; @@ -459,10 +514,16 @@ export function selectByBenchmarks(unitType, candidates, opts = {}) { .map((c) => { const { score, coverage } = scoreCandidate(c, weights, benchmarks); const fullId = `${c.provider}/${c.id}`; + const quotaMul = quotaHeadroomMultiplier( + c.provider.toLowerCase(), + opts.getQuotaState, + ); return { id: fullId, provider: c.provider.toLowerCase(), - score, + score: score * quotaMul, + baseScore: score, + quotaMultiplier: quotaMul, coverage, cost: estimateCostPerMillion(c, unitType), capabilitySignal: capabilityTieBreakScore(c, unitType), diff --git a/src/resources/extensions/sf/tests/benchmark-selector-quota.test.mjs b/src/resources/extensions/sf/tests/benchmark-selector-quota.test.mjs new file mode 100644 index 000000000..3332d8b63 --- /dev/null +++ b/src/resources/extensions/sf/tests/benchmark-selector-quota.test.mjs @@ -0,0 +1,165 @@ +/** + * benchmark-selector-quota.test.mjs + * + * Tests for phase-2 quota-aware routing: the quotaHeadroomMultiplier + * function and its integration into selectByBenchmarks. Verifies the + * curve, the "max across windows" rule, the unknown-quota neutral + * fallback, and that high-usage providers are demoted in selection. + */ +import assert from "node:assert/strict"; +import { describe, test } from "vitest"; + +import "../preferences.js"; // wires circular dep loader + +import { + quotaHeadroomMultiplier, + selectByBenchmarks, +} from "../benchmark-selector.js"; + +// ─── Multiplier curve ──────────────────────────────────────────────────────── + +function stub(usedFractions) { + return () => ({ + ok: true, + fetchedAt: new Date().toISOString(), + windows: usedFractions.map((f, i) => ({ + label: `w${i}`, + used: f * 100, + limit: 100, + usedFraction: f, + })), + }); +} + +describe("quotaHeadroomMultiplier", () => { + test("unknown provider → neutral 1.0", () => { + assert.equal(quotaHeadroomMultiplier("nope", () => null), 1.0); + }); + + test("not-ok entry → neutral 1.0", () => { + assert.equal( + quotaHeadroomMultiplier("zai", () => ({ ok: false, error: "x", windows: [] })), + 1.0, + ); + }); + + test("empty windows → neutral 1.0", () => { + assert.equal( + quotaHeadroomMultiplier("kimi-coding", () => ({ ok: true, windows: [] })), + 1.0, + ); + }); + + test("0% used → 1.15 boost (spend the sub)", () => { + assert.equal(quotaHeadroomMultiplier("minimax", stub([0])), 1.15); + }); + + test("19% used → still boost (under 0.2 threshold)", () => { + assert.equal(quotaHeadroomMultiplier("kimi-coding", stub([0.19])), 1.15); + }); + + test("30% used → neutral 1.0", () => { + assert.equal(quotaHeadroomMultiplier("kimi-coding", stub([0.3])), 1.0); + }); + + test("60% used → mild 0.92", () => { + assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.6])), 0.92); + }); + + test("80% used → strong demote 0.5", () => { + assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.807])), 0.5); + }); + + test("93% used → near-exhaustion 0.2", () => { + assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.93])), 0.2); + }); + + test("98% used → skip 0.05", () => { + assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.98])), 0.05); + }); + + test("multiple windows: max binds", () => { + // 5h window is 85% used, weekly is 20%. Binding constraint = 85% → strong demote 0.5. + assert.equal( + quotaHeadroomMultiplier("kimi-coding", stub([0.2, 0.85])), + 0.5, + ); + }); + + test("getQuotaState throwing → neutral (never crashes)", () => { + assert.equal( + quotaHeadroomMultiplier("x", () => { + throw new Error("boom"); + }), + 1.0, + ); + }); +}); + +// ─── Integration with selectByBenchmarks ───────────────────────────────────── + +describe("selectByBenchmarks with quota state", () => { + // Two candidates with identical strong benchmarks; the one with high + // quota usage should be demoted below the under-used one. + const candidates = [ + { provider: "minimax", id: "MiniMax-M2.7" }, // in static benchmarks + { provider: "kimi-coding", id: "kimi-k2.6" }, // in static benchmarks + ]; + + test("identical benchmark candidates: quota state demotes the high-usage provider", () => { + // minimax: 0% used (boost), kimi-coding: 90% used (strong demote). + // Expectation: minimax wins primary slot. + const result = selectByBenchmarks("execute-task", candidates, { + getQuotaState: (pid) => { + if (pid === "minimax") return stub([0])(); + if (pid === "kimi-coding") return stub([0.9])(); + return null; + }, + providerPreference: ["kimi-coding", "minimax"], // kimi listed first + }); + assert.ok(result, "should produce a result"); + assert.ok( + result.primary.startsWith("minimax/"), + `primary should be minimax/* (under-used) but got ${result.primary}`, + ); + }); + + test("inverse quota: high-usage kimi flips to under-used kimi → kimi wins", () => { + const result = selectByBenchmarks("execute-task", candidates, { + getQuotaState: (pid) => { + if (pid === "minimax") return stub([0.9])(); // demote + if (pid === "kimi-coding") return stub([0])(); // boost + return null; + }, + providerPreference: ["kimi-coding", "minimax"], + }); + assert.ok(result.primary.startsWith("kimi-coding/")); + }); + + test("unknown quota state for both: falls back to original ordering (no skew)", () => { + const resultA = selectByBenchmarks("execute-task", candidates, { + getQuotaState: () => null, + providerPreference: ["kimi-coding", "minimax"], + }); + const resultB = selectByBenchmarks("execute-task", candidates, { + providerPreference: ["kimi-coding", "minimax"], + }); + // Both should produce the same primary — neutral multiplier means no skew. + assert.equal(resultA?.primary, resultB?.primary); + }); + + test("near-exhausted provider effectively last", () => { + const result = selectByBenchmarks("execute-task", candidates, { + getQuotaState: (pid) => { + if (pid === "minimax") return stub([0.3])(); // neutral + if (pid === "kimi-coding") return stub([0.98])(); // skip-tier + return null; + }, + providerPreference: ["kimi-coding", "minimax"], + }); + assert.ok( + result.primary.startsWith("minimax/"), + "a 98%-used provider must not be primary when alternatives exist", + ); + }); +});