feat(benchmark-selector): phase 2 — quota-aware routing weight

Bias dispatch toward under-used subscriptions ("spend the subs") and
de-prioritize near-exhausted ones (avoid 429 walls). Multiplier is
applied to the benchmark score before sort, so it only re-orders
within the existing score → cost → coverage → preference ladder.
Unknown quota state stays neutral 1.0 — never punish a provider for
having no public quota API.

Curve, keyed on max(usedFraction) across all windows:
  < 0.20 → 1.15  (boost — lots of headroom, prefer to use it)
  < 0.50 → 1.00  (neutral)
  < 0.70 → 0.92  (slight steer away)
  < 0.90 → 0.50  (strong de-prioritize)
  < 0.95 → 0.20  (near-exhaustion)
  ≥ 0.95 → 0.05  (effectively skip)

Max-across-windows means kimi-coding's 5h-rolling window (tighter)
binds the decision even when the weekly is fresh.

New exported helper quotaHeadroomMultiplier(providerKey, getQuotaState?)
takes the resolver as optional dep for testability; defaults to
getProviderQuotaState from provider-quota-cache.js.

16 new tests cover the curve and the selectByBenchmarks integration
(unknown quota → unchanged, demoted high-usage provider, boosted
under-used provider, near-exhausted skipped when alternatives exist).

Filed as SF backlog item sf-mpmp8ie6xf-z4cxhg before — now closes
that loop.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-16 18:09:33 +02:00
parent b39cf3387e
commit 8d0f41436b
2 changed files with 227 additions and 1 deletions

View file

@ -25,6 +25,61 @@ import { join } from "node:path";
import { tierOrdinal } from "./complexity-classifier.js";
import { lookup } from "./model-registry.js";
import { getModelTier } from "./model-router.js";
import { getProviderQuotaState } from "./provider-quota-cache.js";
// ─── Quota-aware routing (phase 2) ────────────────────────────────────────────
// Bias dispatch toward under-used subscriptions ("spend the subs") and away
// from near-exhausted ones (avoid 429 walls). Multiplier is applied to the
// benchmark score before sort, so it only re-orders within the existing
// score → cost → coverage → preference ladder. Unknown quota state means
// neutral 1.0 — never punish a provider for having no public quota API.
//
// Curve, keyed on max(usedFraction) across all windows for the provider:
// < 0.20 → 1.15 (boost: lots of headroom, prefer to use it)
// < 0.50 → 1.00 (neutral)
// < 0.70 → 0.92 (slight steer away)
// < 0.90 → 0.50 (strong de-prioritize)
// < 0.95 → 0.20 (near-exhaustion)
// ≥ 0.95 → 0.05 (effectively skip; only used if no alternatives)
//
// The "max across windows" choice means kimi-coding's 5h-rolling window
// (typically tighter) binds the decision even when the weekly is fresh.
const QUOTA_BOOST = 1.15;
const QUOTA_NEUTRAL = 1.0;
const QUOTA_MILD = 0.92;
const QUOTA_STRONG_DEMOTE = 0.5;
const QUOTA_NEAR_EXHAUSTION = 0.2;
const QUOTA_SKIP = 0.05;
export function quotaHeadroomMultiplier(providerKey, getQuotaState) {
const resolver = getQuotaState ?? getProviderQuotaState;
let state;
try {
state = resolver(providerKey);
} catch {
return QUOTA_NEUTRAL;
}
if (
!state ||
!state.ok ||
!Array.isArray(state.windows) ||
state.windows.length === 0
) {
return QUOTA_NEUTRAL;
}
let maxUsed = 0;
for (const w of state.windows) {
if (typeof w?.usedFraction === "number" && Number.isFinite(w.usedFraction)) {
if (w.usedFraction > maxUsed) maxUsed = w.usedFraction;
}
}
if (maxUsed < 0.2) return QUOTA_BOOST;
if (maxUsed < 0.5) return QUOTA_NEUTRAL;
if (maxUsed < 0.7) return QUOTA_MILD;
if (maxUsed < 0.9) return QUOTA_STRONG_DEMOTE;
if (maxUsed < 0.95) return QUOTA_NEAR_EXHAUSTION;
return QUOTA_SKIP;
}
// ─── Benchmark File Loader ───────────────────────────────────────────────────
let _benchmarksCache = null;
@ -459,10 +514,16 @@ export function selectByBenchmarks(unitType, candidates, opts = {}) {
.map((c) => {
const { score, coverage } = scoreCandidate(c, weights, benchmarks);
const fullId = `${c.provider}/${c.id}`;
const quotaMul = quotaHeadroomMultiplier(
c.provider.toLowerCase(),
opts.getQuotaState,
);
return {
id: fullId,
provider: c.provider.toLowerCase(),
score,
score: score * quotaMul,
baseScore: score,
quotaMultiplier: quotaMul,
coverage,
cost: estimateCostPerMillion(c, unitType),
capabilitySignal: capabilityTieBreakScore(c, unitType),

View file

@ -0,0 +1,165 @@
/**
* benchmark-selector-quota.test.mjs
*
* Tests for phase-2 quota-aware routing: the quotaHeadroomMultiplier
* function and its integration into selectByBenchmarks. Verifies the
* curve, the "max across windows" rule, the unknown-quota neutral
* fallback, and that high-usage providers are demoted in selection.
*/
import assert from "node:assert/strict";
import { describe, test } from "vitest";
import "../preferences.js"; // wires circular dep loader
import {
quotaHeadroomMultiplier,
selectByBenchmarks,
} from "../benchmark-selector.js";
// ─── Multiplier curve ────────────────────────────────────────────────────────
function stub(usedFractions) {
return () => ({
ok: true,
fetchedAt: new Date().toISOString(),
windows: usedFractions.map((f, i) => ({
label: `w${i}`,
used: f * 100,
limit: 100,
usedFraction: f,
})),
});
}
describe("quotaHeadroomMultiplier", () => {
test("unknown provider → neutral 1.0", () => {
assert.equal(quotaHeadroomMultiplier("nope", () => null), 1.0);
});
test("not-ok entry → neutral 1.0", () => {
assert.equal(
quotaHeadroomMultiplier("zai", () => ({ ok: false, error: "x", windows: [] })),
1.0,
);
});
test("empty windows → neutral 1.0", () => {
assert.equal(
quotaHeadroomMultiplier("kimi-coding", () => ({ ok: true, windows: [] })),
1.0,
);
});
test("0% used → 1.15 boost (spend the sub)", () => {
assert.equal(quotaHeadroomMultiplier("minimax", stub([0])), 1.15);
});
test("19% used → still boost (under 0.2 threshold)", () => {
assert.equal(quotaHeadroomMultiplier("kimi-coding", stub([0.19])), 1.15);
});
test("30% used → neutral 1.0", () => {
assert.equal(quotaHeadroomMultiplier("kimi-coding", stub([0.3])), 1.0);
});
test("60% used → mild 0.92", () => {
assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.6])), 0.92);
});
test("80% used → strong demote 0.5", () => {
assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.807])), 0.5);
});
test("93% used → near-exhaustion 0.2", () => {
assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.93])), 0.2);
});
test("98% used → skip 0.05", () => {
assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.98])), 0.05);
});
test("multiple windows: max binds", () => {
// 5h window is 85% used, weekly is 20%. Binding constraint = 85% → strong demote 0.5.
assert.equal(
quotaHeadroomMultiplier("kimi-coding", stub([0.2, 0.85])),
0.5,
);
});
test("getQuotaState throwing → neutral (never crashes)", () => {
assert.equal(
quotaHeadroomMultiplier("x", () => {
throw new Error("boom");
}),
1.0,
);
});
});
// ─── Integration with selectByBenchmarks ─────────────────────────────────────
describe("selectByBenchmarks with quota state", () => {
// Two candidates with identical strong benchmarks; the one with high
// quota usage should be demoted below the under-used one.
const candidates = [
{ provider: "minimax", id: "MiniMax-M2.7" }, // in static benchmarks
{ provider: "kimi-coding", id: "kimi-k2.6" }, // in static benchmarks
];
test("identical benchmark candidates: quota state demotes the high-usage provider", () => {
// minimax: 0% used (boost), kimi-coding: 90% used (strong demote).
// Expectation: minimax wins primary slot.
const result = selectByBenchmarks("execute-task", candidates, {
getQuotaState: (pid) => {
if (pid === "minimax") return stub([0])();
if (pid === "kimi-coding") return stub([0.9])();
return null;
},
providerPreference: ["kimi-coding", "minimax"], // kimi listed first
});
assert.ok(result, "should produce a result");
assert.ok(
result.primary.startsWith("minimax/"),
`primary should be minimax/* (under-used) but got ${result.primary}`,
);
});
test("inverse quota: high-usage kimi flips to under-used kimi → kimi wins", () => {
const result = selectByBenchmarks("execute-task", candidates, {
getQuotaState: (pid) => {
if (pid === "minimax") return stub([0.9])(); // demote
if (pid === "kimi-coding") return stub([0])(); // boost
return null;
},
providerPreference: ["kimi-coding", "minimax"],
});
assert.ok(result.primary.startsWith("kimi-coding/"));
});
test("unknown quota state for both: falls back to original ordering (no skew)", () => {
const resultA = selectByBenchmarks("execute-task", candidates, {
getQuotaState: () => null,
providerPreference: ["kimi-coding", "minimax"],
});
const resultB = selectByBenchmarks("execute-task", candidates, {
providerPreference: ["kimi-coding", "minimax"],
});
// Both should produce the same primary — neutral multiplier means no skew.
assert.equal(resultA?.primary, resultB?.primary);
});
test("near-exhausted provider effectively last", () => {
const result = selectByBenchmarks("execute-task", candidates, {
getQuotaState: (pid) => {
if (pid === "minimax") return stub([0.3])(); // neutral
if (pid === "kimi-coding") return stub([0.98])(); // skip-tier
return null;
},
providerPreference: ["kimi-coding", "minimax"],
});
assert.ok(
result.primary.startsWith("minimax/"),
"a 98%-used provider must not be primary when alternatives exist",
);
});
});