fix(quota): match real API shapes for kimi-coding / minimax / zai
Dogfooded `sf headless usage` against live APIs and discovered three
shape mismatches in the phase-1 fetchers:
- kimi-coding returns numeric fields as STRINGS ("limit": "100") and
uses camelCase `resetTime`. Added toNum() coercion + reset hint
extraction. Now reports Weekly + 5h rolling windows correctly.
- minimax response is `{ model_remains: [{ model_name,
current_interval_total_count, current_interval_usage_count,
current_weekly_total_count, current_weekly_usage_count, end_time,
weekly_end_time, ...}] }` — per-model rolling + weekly windows, not
the flat `remaining_tokens`/`total_tokens` shape I had assumed.
Rewrote parser to emit one window per model entry.
- zai uses a `{ code, msg, success, data }` envelope. When
`success: false` (e.g. user lacks an active coding plan), parser
now surfaces vendor msg as the entry error instead of silently
emitting no windows.
Tests updated to mirror real shapes; added one for zai's failure
envelope. 12 tests pass (was 11).
Live result from re-running `sf headless usage`:
- openrouter: 80.7% used, $7.71 remaining (real signal — watch this)
- kimi-coding: Weekly 32%, 5h 4%
- minimax: MiniMax-M* 5h 1.4% + coding-plan-vlm/search 1.4%
- gemini-cli: 0.0-0.4% across all models (clean)
- zai: surfaces "user does not have a coding plan" — may need a
different endpoint or scope depending on the user's account setup.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c0d089f9ca
commit
8fa9a4b8fa
2 changed files with 127 additions and 52 deletions
|
|
@ -63,6 +63,16 @@ function clampFraction(used, limit) {
|
|||
return Math.max(0, Math.min(1, used / limit));
|
||||
}
|
||||
|
||||
/** Coerce a value that may be a number, numeric string, or undefined to a number. */
|
||||
function toNum(v) {
|
||||
if (typeof v === "number" && Number.isFinite(v)) return v;
|
||||
if (typeof v === "string" && v.trim() !== "") {
|
||||
const n = Number(v);
|
||||
return Number.isFinite(n) ? n : undefined;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// ─── Per-provider fetchers ───────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
|
|
@ -78,19 +88,25 @@ async function fetchKimiCodingQuota(apiKey) {
|
|||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
const payload = await res.json();
|
||||
const windows = [];
|
||||
// Real API returns numeric fields as STRINGS ("limit": "100") and uses
|
||||
// `resetTime` (camelCase) for the reset hint. Coerce via toNum().
|
||||
const summary = payload?.usage;
|
||||
if (summary && typeof summary === "object") {
|
||||
const limit = typeof summary.limit === "number" ? summary.limit : 0;
|
||||
let used = typeof summary.used === "number" ? summary.used : undefined;
|
||||
if (used === undefined && typeof summary.remaining === "number") {
|
||||
used = limit - summary.remaining;
|
||||
const limit = toNum(summary.limit);
|
||||
let used = toNum(summary.used);
|
||||
if (used === undefined) {
|
||||
const remaining = toNum(summary.remaining);
|
||||
if (typeof remaining === "number" && typeof limit === "number") {
|
||||
used = limit - remaining;
|
||||
}
|
||||
}
|
||||
if (typeof used === "number") {
|
||||
if (typeof used === "number" && typeof limit === "number") {
|
||||
windows.push({
|
||||
label: String(summary.name ?? "Weekly limit"),
|
||||
label: String(summary.name ?? "Weekly"),
|
||||
used,
|
||||
limit,
|
||||
usedFraction: clampFraction(used, limit),
|
||||
resetHint: summary.resetTime ? String(summary.resetTime) : undefined,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -98,21 +114,25 @@ async function fetchKimiCodingQuota(apiKey) {
|
|||
for (const item of payload.limits) {
|
||||
const detail = item?.detail ?? item;
|
||||
const window = item?.window;
|
||||
const limit = typeof detail?.limit === "number" ? detail.limit : 0;
|
||||
let used = typeof detail?.used === "number" ? detail.used : undefined;
|
||||
if (used === undefined && typeof detail?.remaining === "number") {
|
||||
used = limit - detail.remaining;
|
||||
const limit = toNum(detail?.limit);
|
||||
let used = toNum(detail?.used);
|
||||
if (used === undefined) {
|
||||
const remaining = toNum(detail?.remaining);
|
||||
if (typeof remaining === "number" && typeof limit === "number") {
|
||||
used = limit - remaining;
|
||||
}
|
||||
}
|
||||
if (typeof used !== "number") continue;
|
||||
if (typeof used !== "number" || typeof limit !== "number") continue;
|
||||
const winLabel =
|
||||
window?.duration && window?.timeUnit
|
||||
? `${window.duration} ${window.timeUnit}`
|
||||
? `${window.duration} ${String(window.timeUnit).replace(/^TIME_UNIT_/, "").toLowerCase()}`
|
||||
: undefined;
|
||||
windows.push({
|
||||
label: String(detail?.name ?? winLabel ?? "rolling window"),
|
||||
used,
|
||||
limit,
|
||||
usedFraction: clampFraction(used, limit),
|
||||
resetHint: detail?.resetTime ? String(detail.resetTime) : undefined,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -153,8 +173,11 @@ async function fetchOpenrouterQuota(apiKey) {
|
|||
|
||||
/**
|
||||
* MiniMax — GET https://api.minimax.io/v1/token_plan/remains with Bearer auth.
|
||||
* The 5h rolling window is the load-bearing limit; response shape varies but
|
||||
* commonly: { remaining_tokens, total_tokens, reset_time, ... }.
|
||||
* Real shape: { model_remains: [{ model_name, current_interval_total_count,
|
||||
* current_interval_usage_count, current_weekly_total_count,
|
||||
* current_weekly_usage_count, end_time, weekly_end_time, ...}] }.
|
||||
* One entry per model family. Emit one window per model that has a non-zero
|
||||
* interval cap; weekly windows are also surfaced when configured.
|
||||
*/
|
||||
async function fetchMinimaxQuota(apiKey) {
|
||||
const res = await fetch("https://api.minimax.io/v1/token_plan/remains", {
|
||||
|
|
@ -164,37 +187,56 @@ async function fetchMinimaxQuota(apiKey) {
|
|||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
const payload = await res.json();
|
||||
const windows = [];
|
||||
const remaining =
|
||||
typeof payload?.remaining_tokens === "number"
|
||||
? payload.remaining_tokens
|
||||
: typeof payload?.remain === "number"
|
||||
? payload.remain
|
||||
: undefined;
|
||||
const total =
|
||||
typeof payload?.total_tokens === "number"
|
||||
? payload.total_tokens
|
||||
: typeof payload?.total === "number"
|
||||
? payload.total
|
||||
: undefined;
|
||||
if (typeof remaining === "number" && typeof total === "number") {
|
||||
const used = total - remaining;
|
||||
windows.push({
|
||||
label: "token plan",
|
||||
used,
|
||||
limit: total,
|
||||
usedFraction: clampFraction(used, total),
|
||||
resetHint: payload?.reset_time
|
||||
? String(payload.reset_time)
|
||||
: undefined,
|
||||
});
|
||||
const remains = Array.isArray(payload?.model_remains)
|
||||
? payload.model_remains
|
||||
: [];
|
||||
for (const entry of remains) {
|
||||
if (!entry || typeof entry !== "object") continue;
|
||||
const intervalTotal = toNum(entry.current_interval_total_count);
|
||||
const intervalUsed = toNum(entry.current_interval_usage_count);
|
||||
if (
|
||||
typeof intervalTotal === "number" &&
|
||||
typeof intervalUsed === "number" &&
|
||||
intervalTotal > 0
|
||||
) {
|
||||
windows.push({
|
||||
label: `${entry.model_name ?? "model"} (5h)`,
|
||||
used: intervalUsed,
|
||||
limit: intervalTotal,
|
||||
usedFraction: clampFraction(intervalUsed, intervalTotal),
|
||||
resetHint:
|
||||
typeof entry.end_time === "number"
|
||||
? new Date(entry.end_time).toISOString()
|
||||
: undefined,
|
||||
});
|
||||
}
|
||||
const weeklyTotal = toNum(entry.current_weekly_total_count);
|
||||
const weeklyUsed = toNum(entry.current_weekly_usage_count);
|
||||
if (
|
||||
typeof weeklyTotal === "number" &&
|
||||
typeof weeklyUsed === "number" &&
|
||||
weeklyTotal > 0
|
||||
) {
|
||||
windows.push({
|
||||
label: `${entry.model_name ?? "model"} (weekly)`,
|
||||
used: weeklyUsed,
|
||||
limit: weeklyTotal,
|
||||
usedFraction: clampFraction(weeklyUsed, weeklyTotal),
|
||||
resetHint:
|
||||
typeof entry.weekly_end_time === "number"
|
||||
? new Date(entry.weekly_end_time).toISOString()
|
||||
: undefined,
|
||||
});
|
||||
}
|
||||
}
|
||||
return { windows, raw: payload };
|
||||
}
|
||||
|
||||
/**
|
||||
* Z.AI — GET https://api.z.ai/api/monitor/usage/quota/limit with Bearer auth.
|
||||
* Returns 5h token limit + MCP monthly quota per the opencode-mystatus tool.
|
||||
* Field names vary; we accept common synonyms.
|
||||
* Real responses use a `code` / `msg` / `success` / `data` envelope. When
|
||||
* `success: false` (e.g. user has no active coding plan), we surface the
|
||||
* vendor's message as the error rather than silently emitting no windows.
|
||||
*/
|
||||
async function fetchZaiQuota(apiKey) {
|
||||
const res = await fetch("https://api.z.ai/api/monitor/usage/quota/limit", {
|
||||
|
|
@ -203,6 +245,12 @@ async function fetchZaiQuota(apiKey) {
|
|||
});
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
const payload = await res.json();
|
||||
// Treat envelope-level failures as fetch errors so they surface properly.
|
||||
if (payload?.success === false) {
|
||||
throw new Error(
|
||||
`z.ai: ${payload?.msg ?? "unknown error"} (code ${payload?.code ?? "?"})`,
|
||||
);
|
||||
}
|
||||
const windows = [];
|
||||
const buckets = Array.isArray(payload?.data)
|
||||
? payload.data
|
||||
|
|
|
|||
|
|
@ -93,15 +93,16 @@ describe("QUOTA_CAPABLE_PROVIDER_IDS", () => {
|
|||
// ─── kimi-coding ─────────────────────────────────────────────────────────────
|
||||
|
||||
describe("runProviderQuotaRefreshIfStale — kimi-coding", () => {
|
||||
test("hits /coding/v1/usages with Bearer auth and parses windows", async () => {
|
||||
test("hits /coding/v1/usages with Bearer auth and parses windows (real shape uses STRING numbers)", async () => {
|
||||
const home = tempSfHome();
|
||||
// Real API encodes numeric fields as strings; parser must coerce.
|
||||
const calls = stubFetch({
|
||||
"https://api.kimi.com/coding/v1/usages": {
|
||||
usage: { limit: 1000, used: 250, name: "Weekly" },
|
||||
usage: { limit: "1000", used: "250", name: "Weekly" },
|
||||
limits: [
|
||||
{
|
||||
detail: { limit: 200, used: 80, name: "5h" },
|
||||
window: { duration: 5, timeUnit: "hours" },
|
||||
detail: { limit: "200", used: "80", name: "5h" },
|
||||
window: { duration: 5, timeUnit: "TIME_UNIT_MINUTE" },
|
||||
},
|
||||
],
|
||||
},
|
||||
|
|
@ -125,11 +126,11 @@ describe("runProviderQuotaRefreshIfStale — kimi-coding", () => {
|
|||
assert.equal(entry.windows[1].usedFraction, 0.4);
|
||||
});
|
||||
|
||||
test("falls back from `used` to `limit - remaining`", async () => {
|
||||
test("falls back from `used` to `limit - remaining` (string-encoded)", async () => {
|
||||
const home = tempSfHome();
|
||||
stubFetch({
|
||||
"https://api.kimi.com/coding/v1/usages": {
|
||||
usage: { limit: 1000, remaining: 600, name: "Weekly" },
|
||||
usage: { limit: "1000", remaining: "600", name: "Weekly" },
|
||||
},
|
||||
});
|
||||
await runProviderQuotaRefreshIfStale(home, makeAuth({ "kimi-coding": "k" }));
|
||||
|
|
@ -139,6 +140,23 @@ describe("runProviderQuotaRefreshIfStale — kimi-coding", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe("runProviderQuotaRefreshIfStale — zai envelope error", () => {
|
||||
test("success:false response surfaces vendor msg as an error entry", async () => {
|
||||
const home = tempSfHome();
|
||||
stubFetch({
|
||||
"https://api.z.ai/api/monitor/usage/quota/limit": {
|
||||
code: 500,
|
||||
msg: "current user does not have a coding plan",
|
||||
success: false,
|
||||
},
|
||||
});
|
||||
await runProviderQuotaRefreshIfStale(home, makeAuth({ zai: "test-zai" }));
|
||||
const all = getAllProviderQuotaEntries();
|
||||
assert.equal(all["zai"].ok, false);
|
||||
assert.match(all["zai"].error, /does not have a coding plan/);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── openrouter ──────────────────────────────────────────────────────────────
|
||||
|
||||
describe("runProviderQuotaRefreshIfStale — openrouter", () => {
|
||||
|
|
@ -170,13 +188,20 @@ describe("runProviderQuotaRefreshIfStale — openrouter", () => {
|
|||
// ─── minimax ─────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("runProviderQuotaRefreshIfStale — minimax", () => {
|
||||
test("hits /v1/token_plan/remains and parses remaining_tokens / total_tokens", async () => {
|
||||
test("hits /v1/token_plan/remains and emits one window per model_remains entry", async () => {
|
||||
const home = tempSfHome();
|
||||
stubFetch({
|
||||
"https://api.minimax.io/v1/token_plan/remains": {
|
||||
remaining_tokens: 700,
|
||||
total_tokens: 1000,
|
||||
reset_time: "2026-05-17T00:00:00Z",
|
||||
model_remains: [
|
||||
{
|
||||
model_name: "MiniMax-M*",
|
||||
current_interval_total_count: 1000,
|
||||
current_interval_usage_count: 250,
|
||||
current_weekly_total_count: 0,
|
||||
current_weekly_usage_count: 0,
|
||||
end_time: 1778961600000,
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
|
|
@ -187,10 +212,12 @@ describe("runProviderQuotaRefreshIfStale — minimax", () => {
|
|||
|
||||
const entry = getProviderQuotaState("minimax");
|
||||
assert.equal(entry.ok, true);
|
||||
assert.equal(entry.windows[0].used, 300);
|
||||
assert.equal(entry.windows.length, 1);
|
||||
assert.equal(entry.windows[0].label, "MiniMax-M* (5h)");
|
||||
assert.equal(entry.windows[0].used, 250);
|
||||
assert.equal(entry.windows[0].limit, 1000);
|
||||
assert.equal(entry.windows[0].usedFraction, 0.3);
|
||||
assert.equal(entry.windows[0].resetHint, "2026-05-17T00:00:00Z");
|
||||
assert.equal(entry.windows[0].usedFraction, 0.25);
|
||||
assert.ok(entry.windows[0].resetHint);
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue