feat(benchmark-selector): phase 2 — quota-aware routing weight
Bias dispatch toward under-used subscriptions ("spend the subs") and
de-prioritize near-exhausted ones (avoid 429 walls). Multiplier is
applied to the benchmark score before sort, so it only re-orders
within the existing score → cost → coverage → preference ladder.
Unknown quota state stays neutral 1.0 — never punish a provider for
having no public quota API.
Curve, keyed on max(usedFraction) across all windows:
< 0.20 → 1.15 (boost — lots of headroom, prefer to use it)
< 0.50 → 1.00 (neutral)
< 0.70 → 0.92 (slight steer away)
< 0.90 → 0.50 (strong de-prioritize)
< 0.95 → 0.20 (near-exhaustion)
≥ 0.95 → 0.05 (effectively skip)
Max-across-windows means kimi-coding's 5h-rolling window (tighter)
binds the decision even when the weekly is fresh.
New exported helper quotaHeadroomMultiplier(providerKey, getQuotaState?)
takes the resolver as optional dep for testability; defaults to
getProviderQuotaState from provider-quota-cache.js.
16 new tests cover the curve and the selectByBenchmarks integration
(unknown quota → unchanged, demoted high-usage provider, boosted
under-used provider, near-exhausted skipped when alternatives exist).
Filed as SF backlog item sf-mpmp8ie6xf-z4cxhg before — now closes
that loop.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b39cf3387e
commit
8d0f41436b
2 changed files with 227 additions and 1 deletions
|
|
@ -25,6 +25,61 @@ import { join } from "node:path";
|
|||
import { tierOrdinal } from "./complexity-classifier.js";
|
||||
import { lookup } from "./model-registry.js";
|
||||
import { getModelTier } from "./model-router.js";
|
||||
import { getProviderQuotaState } from "./provider-quota-cache.js";
|
||||
|
||||
// ─── Quota-aware routing (phase 2) ────────────────────────────────────────────
|
||||
// Bias dispatch toward under-used subscriptions ("spend the subs") and away
|
||||
// from near-exhausted ones (avoid 429 walls). Multiplier is applied to the
|
||||
// benchmark score before sort, so it only re-orders within the existing
|
||||
// score → cost → coverage → preference ladder. Unknown quota state means
|
||||
// neutral 1.0 — never punish a provider for having no public quota API.
|
||||
//
|
||||
// Curve, keyed on max(usedFraction) across all windows for the provider:
|
||||
// < 0.20 → 1.15 (boost: lots of headroom, prefer to use it)
|
||||
// < 0.50 → 1.00 (neutral)
|
||||
// < 0.70 → 0.92 (slight steer away)
|
||||
// < 0.90 → 0.50 (strong de-prioritize)
|
||||
// < 0.95 → 0.20 (near-exhaustion)
|
||||
// ≥ 0.95 → 0.05 (effectively skip; only used if no alternatives)
|
||||
//
|
||||
// The "max across windows" choice means kimi-coding's 5h-rolling window
|
||||
// (typically tighter) binds the decision even when the weekly is fresh.
|
||||
const QUOTA_BOOST = 1.15;
|
||||
const QUOTA_NEUTRAL = 1.0;
|
||||
const QUOTA_MILD = 0.92;
|
||||
const QUOTA_STRONG_DEMOTE = 0.5;
|
||||
const QUOTA_NEAR_EXHAUSTION = 0.2;
|
||||
const QUOTA_SKIP = 0.05;
|
||||
|
||||
export function quotaHeadroomMultiplier(providerKey, getQuotaState) {
|
||||
const resolver = getQuotaState ?? getProviderQuotaState;
|
||||
let state;
|
||||
try {
|
||||
state = resolver(providerKey);
|
||||
} catch {
|
||||
return QUOTA_NEUTRAL;
|
||||
}
|
||||
if (
|
||||
!state ||
|
||||
!state.ok ||
|
||||
!Array.isArray(state.windows) ||
|
||||
state.windows.length === 0
|
||||
) {
|
||||
return QUOTA_NEUTRAL;
|
||||
}
|
||||
let maxUsed = 0;
|
||||
for (const w of state.windows) {
|
||||
if (typeof w?.usedFraction === "number" && Number.isFinite(w.usedFraction)) {
|
||||
if (w.usedFraction > maxUsed) maxUsed = w.usedFraction;
|
||||
}
|
||||
}
|
||||
if (maxUsed < 0.2) return QUOTA_BOOST;
|
||||
if (maxUsed < 0.5) return QUOTA_NEUTRAL;
|
||||
if (maxUsed < 0.7) return QUOTA_MILD;
|
||||
if (maxUsed < 0.9) return QUOTA_STRONG_DEMOTE;
|
||||
if (maxUsed < 0.95) return QUOTA_NEAR_EXHAUSTION;
|
||||
return QUOTA_SKIP;
|
||||
}
|
||||
|
||||
// ─── Benchmark File Loader ───────────────────────────────────────────────────
|
||||
let _benchmarksCache = null;
|
||||
|
|
@ -459,10 +514,16 @@ export function selectByBenchmarks(unitType, candidates, opts = {}) {
|
|||
.map((c) => {
|
||||
const { score, coverage } = scoreCandidate(c, weights, benchmarks);
|
||||
const fullId = `${c.provider}/${c.id}`;
|
||||
const quotaMul = quotaHeadroomMultiplier(
|
||||
c.provider.toLowerCase(),
|
||||
opts.getQuotaState,
|
||||
);
|
||||
return {
|
||||
id: fullId,
|
||||
provider: c.provider.toLowerCase(),
|
||||
score,
|
||||
score: score * quotaMul,
|
||||
baseScore: score,
|
||||
quotaMultiplier: quotaMul,
|
||||
coverage,
|
||||
cost: estimateCostPerMillion(c, unitType),
|
||||
capabilitySignal: capabilityTieBreakScore(c, unitType),
|
||||
|
|
|
|||
|
|
@ -0,0 +1,165 @@
|
|||
/**
|
||||
* benchmark-selector-quota.test.mjs
|
||||
*
|
||||
* Tests for phase-2 quota-aware routing: the quotaHeadroomMultiplier
|
||||
* function and its integration into selectByBenchmarks. Verifies the
|
||||
* curve, the "max across windows" rule, the unknown-quota neutral
|
||||
* fallback, and that high-usage providers are demoted in selection.
|
||||
*/
|
||||
import assert from "node:assert/strict";
|
||||
import { describe, test } from "vitest";
|
||||
|
||||
import "../preferences.js"; // wires circular dep loader
|
||||
|
||||
import {
|
||||
quotaHeadroomMultiplier,
|
||||
selectByBenchmarks,
|
||||
} from "../benchmark-selector.js";
|
||||
|
||||
// ─── Multiplier curve ────────────────────────────────────────────────────────
|
||||
|
||||
function stub(usedFractions) {
|
||||
return () => ({
|
||||
ok: true,
|
||||
fetchedAt: new Date().toISOString(),
|
||||
windows: usedFractions.map((f, i) => ({
|
||||
label: `w${i}`,
|
||||
used: f * 100,
|
||||
limit: 100,
|
||||
usedFraction: f,
|
||||
})),
|
||||
});
|
||||
}
|
||||
|
||||
describe("quotaHeadroomMultiplier", () => {
|
||||
test("unknown provider → neutral 1.0", () => {
|
||||
assert.equal(quotaHeadroomMultiplier("nope", () => null), 1.0);
|
||||
});
|
||||
|
||||
test("not-ok entry → neutral 1.0", () => {
|
||||
assert.equal(
|
||||
quotaHeadroomMultiplier("zai", () => ({ ok: false, error: "x", windows: [] })),
|
||||
1.0,
|
||||
);
|
||||
});
|
||||
|
||||
test("empty windows → neutral 1.0", () => {
|
||||
assert.equal(
|
||||
quotaHeadroomMultiplier("kimi-coding", () => ({ ok: true, windows: [] })),
|
||||
1.0,
|
||||
);
|
||||
});
|
||||
|
||||
test("0% used → 1.15 boost (spend the sub)", () => {
|
||||
assert.equal(quotaHeadroomMultiplier("minimax", stub([0])), 1.15);
|
||||
});
|
||||
|
||||
test("19% used → still boost (under 0.2 threshold)", () => {
|
||||
assert.equal(quotaHeadroomMultiplier("kimi-coding", stub([0.19])), 1.15);
|
||||
});
|
||||
|
||||
test("30% used → neutral 1.0", () => {
|
||||
assert.equal(quotaHeadroomMultiplier("kimi-coding", stub([0.3])), 1.0);
|
||||
});
|
||||
|
||||
test("60% used → mild 0.92", () => {
|
||||
assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.6])), 0.92);
|
||||
});
|
||||
|
||||
test("80% used → strong demote 0.5", () => {
|
||||
assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.807])), 0.5);
|
||||
});
|
||||
|
||||
test("93% used → near-exhaustion 0.2", () => {
|
||||
assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.93])), 0.2);
|
||||
});
|
||||
|
||||
test("98% used → skip 0.05", () => {
|
||||
assert.equal(quotaHeadroomMultiplier("openrouter", stub([0.98])), 0.05);
|
||||
});
|
||||
|
||||
test("multiple windows: max binds", () => {
|
||||
// 5h window is 85% used, weekly is 20%. Binding constraint = 85% → strong demote 0.5.
|
||||
assert.equal(
|
||||
quotaHeadroomMultiplier("kimi-coding", stub([0.2, 0.85])),
|
||||
0.5,
|
||||
);
|
||||
});
|
||||
|
||||
test("getQuotaState throwing → neutral (never crashes)", () => {
|
||||
assert.equal(
|
||||
quotaHeadroomMultiplier("x", () => {
|
||||
throw new Error("boom");
|
||||
}),
|
||||
1.0,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Integration with selectByBenchmarks ─────────────────────────────────────
|
||||
|
||||
describe("selectByBenchmarks with quota state", () => {
|
||||
// Two candidates with identical strong benchmarks; the one with high
|
||||
// quota usage should be demoted below the under-used one.
|
||||
const candidates = [
|
||||
{ provider: "minimax", id: "MiniMax-M2.7" }, // in static benchmarks
|
||||
{ provider: "kimi-coding", id: "kimi-k2.6" }, // in static benchmarks
|
||||
];
|
||||
|
||||
test("identical benchmark candidates: quota state demotes the high-usage provider", () => {
|
||||
// minimax: 0% used (boost), kimi-coding: 90% used (strong demote).
|
||||
// Expectation: minimax wins primary slot.
|
||||
const result = selectByBenchmarks("execute-task", candidates, {
|
||||
getQuotaState: (pid) => {
|
||||
if (pid === "minimax") return stub([0])();
|
||||
if (pid === "kimi-coding") return stub([0.9])();
|
||||
return null;
|
||||
},
|
||||
providerPreference: ["kimi-coding", "minimax"], // kimi listed first
|
||||
});
|
||||
assert.ok(result, "should produce a result");
|
||||
assert.ok(
|
||||
result.primary.startsWith("minimax/"),
|
||||
`primary should be minimax/* (under-used) but got ${result.primary}`,
|
||||
);
|
||||
});
|
||||
|
||||
test("inverse quota: high-usage kimi flips to under-used kimi → kimi wins", () => {
|
||||
const result = selectByBenchmarks("execute-task", candidates, {
|
||||
getQuotaState: (pid) => {
|
||||
if (pid === "minimax") return stub([0.9])(); // demote
|
||||
if (pid === "kimi-coding") return stub([0])(); // boost
|
||||
return null;
|
||||
},
|
||||
providerPreference: ["kimi-coding", "minimax"],
|
||||
});
|
||||
assert.ok(result.primary.startsWith("kimi-coding/"));
|
||||
});
|
||||
|
||||
test("unknown quota state for both: falls back to original ordering (no skew)", () => {
|
||||
const resultA = selectByBenchmarks("execute-task", candidates, {
|
||||
getQuotaState: () => null,
|
||||
providerPreference: ["kimi-coding", "minimax"],
|
||||
});
|
||||
const resultB = selectByBenchmarks("execute-task", candidates, {
|
||||
providerPreference: ["kimi-coding", "minimax"],
|
||||
});
|
||||
// Both should produce the same primary — neutral multiplier means no skew.
|
||||
assert.equal(resultA?.primary, resultB?.primary);
|
||||
});
|
||||
|
||||
test("near-exhausted provider effectively last", () => {
|
||||
const result = selectByBenchmarks("execute-task", candidates, {
|
||||
getQuotaState: (pid) => {
|
||||
if (pid === "minimax") return stub([0.3])(); // neutral
|
||||
if (pid === "kimi-coding") return stub([0.98])(); // skip-tier
|
||||
return null;
|
||||
},
|
||||
providerPreference: ["kimi-coding", "minimax"],
|
||||
});
|
||||
assert.ok(
|
||||
result.primary.startsWith("minimax/"),
|
||||
"a 98%-used provider must not be primary when alternatives exist",
|
||||
);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue