feat(benchmark-selector): phase 2 — quota-aware routing weight

Bias dispatch toward under-used subscriptions ("spend the subs") and de-prioritize near-exhausted ones (avoid 429 walls). Multiplier is applied to the benchmark score before sort, so it only re-orders within the existing score → cost → coverage → preference ladder. Unknown quota state stays neutral 1.0 — never punish a provider for having no public quota API. Curve, keyed on max(usedFraction) across all windows: < 0.20 → 1.15 (boost — lots of headroom, prefer to use it) < 0.50 → 1.00 (neutral) < 0.70 → 0.92 (slight steer away) < 0.90 → 0.50 (strong de-prioritize) < 0.95 → 0.20 (near-exhaustion) ≥ 0.95 → 0.05 (effectively skip) Max-across-windows means kimi-coding's 5h-rolling window (tighter) binds the decision even when the weekly is fresh. New exported helper quotaHeadroomMultiplier(providerKey, getQuotaState?) takes the resolver as optional dep for testability; defaults to getProviderQuotaState from provider-quota-cache.js. 16 new tests cover the curve and the selectByBenchmarks integration (unknown quota → unchanged, demoted high-usage provider, boosted under-used provider, near-exhausted skipped when alternatives exist). Filed as SF backlog item sf-mpmp8ie6xf-z4cxhg before — now closes that loop. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 18:09:33 +02:00 · 2026-05-16 18:09:33 +02:00 · 8d0f41436b
commit 8d0f41436b
parent b39cf3387e
2 changed files with 227 additions and 1 deletions
--- a/src/resources/extensions/sf/benchmark-selector.js
+++ b/src/resources/extensions/sf/benchmark-selector.js
@ -25,6 +25,61 @@ import { join } from "node:path";
 import { tierOrdinal } from "./complexity-classifier.js";
 import { lookup } from "./model-registry.js";
 import { getModelTier } from "./model-router.js";
+import { getProviderQuotaState } from "./provider-quota-cache.js";
+
+// ─── Quota-aware routing (phase 2) ────────────────────────────────────────────
+// Bias dispatch toward under-used subscriptions ("spend the subs") and away
+// from near-exhausted ones (avoid 429 walls). Multiplier is applied to the
+// benchmark score before sort, so it only re-orders within the existing
+// score → cost → coverage → preference ladder. Unknown quota state means
+// neutral 1.0 — never punish a provider for having no public quota API.
+//
+// Curve, keyed on max(usedFraction) across all windows for the provider:
+//   < 0.20 → 1.15  (boost: lots of headroom, prefer to use it)
+//   < 0.50 → 1.00  (neutral)
+//   < 0.70 → 0.92  (slight steer away)
+//   < 0.90 → 0.50  (strong de-prioritize)
+//   < 0.95 → 0.20  (near-exhaustion)
+//   ≥ 0.95 → 0.05  (effectively skip; only used if no alternatives)
+//
+// The "max across windows" choice means kimi-coding's 5h-rolling window
+// (typically tighter) binds the decision even when the weekly is fresh.
+const QUOTA_BOOST = 1.15;
+const QUOTA_NEUTRAL = 1.0;
+const QUOTA_MILD = 0.92;
+const QUOTA_STRONG_DEMOTE = 0.5;
+const QUOTA_NEAR_EXHAUSTION = 0.2;
+const QUOTA_SKIP = 0.05;
+
+export function quotaHeadroomMultiplier(providerKey, getQuotaState) {
+	const resolver = getQuotaState ?? getProviderQuotaState;
+	let state;
+	try {
+		state = resolver(providerKey);
+	} catch {
+		return QUOTA_NEUTRAL;
+	}
+	if (
+		!state ||
+		!state.ok ||
+		!Array.isArray(state.windows) ||
+		state.windows.length === 0
+	) {
+		return QUOTA_NEUTRAL;
+	}
+	let maxUsed = 0;
+	for (const w of state.windows) {
+		if (typeof w?.usedFraction === "number" && Number.isFinite(w.usedFraction)) {
+			if (w.usedFraction > maxUsed) maxUsed = w.usedFraction;
+		}
+	}
+	if (maxUsed < 0.2) return QUOTA_BOOST;
+	if (maxUsed < 0.5) return QUOTA_NEUTRAL;
+	if (maxUsed < 0.7) return QUOTA_MILD;
+	if (maxUsed < 0.9) return QUOTA_STRONG_DEMOTE;
+	if (maxUsed < 0.95) return QUOTA_NEAR_EXHAUSTION;
+	return QUOTA_SKIP;
+}

 // ─── Benchmark File Loader ───────────────────────────────────────────────────
 let _benchmarksCache = null;
@ -459,10 +514,16 @@ export function selectByBenchmarks(unitType, candidates, opts = {}) {
 		.map((c) => {
 			const { score, coverage } = scoreCandidate(c, weights, benchmarks);
 			const fullId = `${c.provider}/${c.id}`;
+			const quotaMul = quotaHeadroomMultiplier(
+				c.provider.toLowerCase(),
+				opts.getQuotaState,
+			);
 			return {
 				id: fullId,
 				provider: c.provider.toLowerCase(),
-				score,
+				score: score * quotaMul,
+				baseScore: score,
+				quotaMultiplier: quotaMul,
 				coverage,
 				cost: estimateCostPerMillion(c, unitType),
 				capabilitySignal: capabilityTieBreakScore(c, unitType),
--- a/src/resources/extensions/sf/tests/benchmark-selector-quota.test.mjs
+++ b/src/resources/extensions/sf/tests/benchmark-selector-quota.test.mjs
@ -0,0 +1,165 @@
+/**
+ * benchmark-selector-quota.test.mjs
+ *
+ * Tests for phase-2 quota-aware routing: the quotaHeadroomMultiplier
+ * function and its integration into selectByBenchmarks. Verifies the
+ * curve, the "max across windows" rule, the unknown-quota neutral
+ * fallback, and that high-usage providers are demoted in selection.
+ */
+import assert from "node:assert/strict";
+import { describe, test } from "vitest";
+
+import "../preferences.js"; // wires circular dep loader
+
+import {
+	quotaHeadroomMultiplier,
+	selectByBenchmarks,
+} from "../benchmark-selector.js";
+
+// ─── Multiplier curve ────────────────────────────────────────────────────────
+
+function stub(usedFractions) {
+	return () => ({
+		ok: true,
+		fetchedAt: new Date().toISOString(),
+		windows: usedFractions.map((f, i) => ({
+			label: `w${i}`,
+			used: f * 100,
+			limit: 100,
+			usedFraction: f,
+		})),
+	});
+}
+
+describe("quotaHeadroomMultiplier", () => {
+	test("unknown provider → neutral 1.0", () => {
+		assert.equal(quotaHeadroomMultiplier("nope", () => null), 1.0);
+	});
+
+	test("not-ok entry → neutral 1.0", () => {
+		assert.equal(
+			quotaHeadroomMultiplier("zai", () => ({ ok: false, error: "x", windows: [] })),
+			1.0,
+		);
+	});
+
+	test("empty windows → neutral 1.0", () => {
+		assert.equal(
+			quotaHeadroomMultiplier("kimi-coding", () => ({ ok: true, windows: [] })),
+			1.0,
+		);
+	});
+
+	test("0% used → 1.15 boost (spend the sub)", () => {
+		assert.equal(quotaHeadroomMultiplier("minimax", stub([0])), 1.15);
+	});
+
+	test("19% used → still boost (under 0.2 threshold)", () => {
+		assert.equal(quotaHeadroomMultiplier("kimi-coding", stub([0.19])), 1.15);
+	});
+
+	test("30% used → neutral 1.0", () => {
+		assert.equal(quotaHeadroomMultiplier("kimi-coding", stub([0.3])), 1.0);
+	});
+
+	test("60% used → mild 0.92", () => {
+		assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.6])), 0.92);
+	});
+
+	test("80% used → strong demote 0.5", () => {
+		assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.807])), 0.5);
+	});
+
+	test("93% used → near-exhaustion 0.2", () => {
+		assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.93])), 0.2);
+	});
+
+	test("98% used → skip 0.05", () => {
+		assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.98])), 0.05);
+	});
+
+	test("multiple windows: max binds", () => {
+		// 5h window is 85% used, weekly is 20%. Binding constraint = 85% → strong demote 0.5.
+		assert.equal(
+			quotaHeadroomMultiplier("kimi-coding", stub([0.2, 0.85])),
+			0.5,
+		);
+	});
+
+	test("getQuotaState throwing → neutral (never crashes)", () => {
+		assert.equal(
+			quotaHeadroomMultiplier("x", () => {
+				throw new Error("boom");
+			}),
+			1.0,
+		);
+	});
+});
+
+// ─── Integration with selectByBenchmarks ─────────────────────────────────────
+
+describe("selectByBenchmarks with quota state", () => {
+	// Two candidates with identical strong benchmarks; the one with high
+	// quota usage should be demoted below the under-used one.
+	const candidates = [
+		{ provider: "minimax", id: "MiniMax-M2.7" }, // in static benchmarks
+		{ provider: "kimi-coding", id: "kimi-k2.6" }, // in static benchmarks
+	];
+
+	test("identical benchmark candidates: quota state demotes the high-usage provider", () => {
+		// minimax: 0% used (boost), kimi-coding: 90% used (strong demote).
+		// Expectation: minimax wins primary slot.
+		const result = selectByBenchmarks("execute-task", candidates, {
+			getQuotaState: (pid) => {
+				if (pid === "minimax") return stub([0])();
+				if (pid === "kimi-coding") return stub([0.9])();
+				return null;
+			},
+			providerPreference: ["kimi-coding", "minimax"], // kimi listed first
+		});
+		assert.ok(result, "should produce a result");
+		assert.ok(
+			result.primary.startsWith("minimax/"),
+			`primary should be minimax/* (under-used) but got ${result.primary}`,
+		);
+	});
+
+	test("inverse quota: high-usage kimi flips to under-used kimi → kimi wins", () => {
+		const result = selectByBenchmarks("execute-task", candidates, {
+			getQuotaState: (pid) => {
+				if (pid === "minimax") return stub([0.9])(); // demote
+				if (pid === "kimi-coding") return stub([0])(); // boost
+				return null;
+			},
+			providerPreference: ["kimi-coding", "minimax"],
+		});
+		assert.ok(result.primary.startsWith("kimi-coding/"));
+	});
+
+	test("unknown quota state for both: falls back to original ordering (no skew)", () => {
+		const resultA = selectByBenchmarks("execute-task", candidates, {
+			getQuotaState: () => null,
+			providerPreference: ["kimi-coding", "minimax"],
+		});
+		const resultB = selectByBenchmarks("execute-task", candidates, {
+			providerPreference: ["kimi-coding", "minimax"],
+		});
+		// Both should produce the same primary — neutral multiplier means no skew.
+		assert.equal(resultA?.primary, resultB?.primary);
+	});
+
+	test("near-exhausted provider effectively last", () => {
+		const result = selectByBenchmarks("execute-task", candidates, {
+			getQuotaState: (pid) => {
+				if (pid === "minimax") return stub([0.3])(); // neutral
+				if (pid === "kimi-coding") return stub([0.98])(); // skip-tier
+				return null;
+			},
+			providerPreference: ["kimi-coding", "minimax"],
+		});
+		assert.ok(
+			result.primary.startsWith("minimax/"),
+			"a 98%-used provider must not be primary when alternatives exist",
+		);
+	});
+});