fix(quota): match real API shapes for kimi-coding / minimax / zai

Dogfooded `sf headless usage` against live APIs and discovered three
shape mismatches in the phase-1 fetchers:

- kimi-coding returns numeric fields as STRINGS ("limit": "100") and
  uses camelCase `resetTime`. Added toNum() coercion + reset hint
  extraction. Now reports Weekly + 5h rolling windows correctly.

- minimax response is `{ model_remains: [{ model_name,
  current_interval_total_count, current_interval_usage_count,
  current_weekly_total_count, current_weekly_usage_count, end_time,
  weekly_end_time, ...}] }` — per-model rolling + weekly windows, not
  the flat `remaining_tokens`/`total_tokens` shape I had assumed.
  Rewrote parser to emit one window per model entry.

- zai uses a `{ code, msg, success, data }` envelope. When
  `success: false` (e.g. user lacks an active coding plan), parser
  now surfaces vendor msg as the entry error instead of silently
  emitting no windows.

Tests updated to mirror real shapes; added one for zai's failure
envelope. 12 tests pass (was 11).

Live result from re-running `sf headless usage`:
  - openrouter: 80.7% used, $7.71 remaining (real signal — watch this)
  - kimi-coding: Weekly 32%, 5h 4%
  - minimax: MiniMax-M* 5h 1.4% + coding-plan-vlm/search 1.4%
  - gemini-cli: 0.0-0.4% across all models (clean)
  - zai: surfaces "user does not have a coding plan" — may need a
    different endpoint or scope depending on the user's account setup.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-16 17:59:53 +02:00
parent c0d089f9ca
commit 8fa9a4b8fa
2 changed files with 127 additions and 52 deletions

View file

@ -63,6 +63,16 @@ function clampFraction(used, limit) {
return Math.max(0, Math.min(1, used / limit));
}
/** Coerce a value that may be a number, numeric string, or undefined to a number. */
function toNum(v) {
if (typeof v === "number" && Number.isFinite(v)) return v;
if (typeof v === "string" && v.trim() !== "") {
const n = Number(v);
return Number.isFinite(n) ? n : undefined;
}
return undefined;
}
// ─── Per-provider fetchers ───────────────────────────────────────────────────
/**
@ -78,19 +88,25 @@ async function fetchKimiCodingQuota(apiKey) {
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const payload = await res.json();
const windows = [];
// Real API returns numeric fields as STRINGS ("limit": "100") and uses
// `resetTime` (camelCase) for the reset hint. Coerce via toNum().
const summary = payload?.usage;
if (summary && typeof summary === "object") {
const limit = typeof summary.limit === "number" ? summary.limit : 0;
let used = typeof summary.used === "number" ? summary.used : undefined;
if (used === undefined && typeof summary.remaining === "number") {
used = limit - summary.remaining;
const limit = toNum(summary.limit);
let used = toNum(summary.used);
if (used === undefined) {
const remaining = toNum(summary.remaining);
if (typeof remaining === "number" && typeof limit === "number") {
used = limit - remaining;
}
}
if (typeof used === "number") {
if (typeof used === "number" && typeof limit === "number") {
windows.push({
label: String(summary.name ?? "Weekly limit"),
label: String(summary.name ?? "Weekly"),
used,
limit,
usedFraction: clampFraction(used, limit),
resetHint: summary.resetTime ? String(summary.resetTime) : undefined,
});
}
}
@ -98,21 +114,25 @@ async function fetchKimiCodingQuota(apiKey) {
for (const item of payload.limits) {
const detail = item?.detail ?? item;
const window = item?.window;
const limit = typeof detail?.limit === "number" ? detail.limit : 0;
let used = typeof detail?.used === "number" ? detail.used : undefined;
if (used === undefined && typeof detail?.remaining === "number") {
used = limit - detail.remaining;
const limit = toNum(detail?.limit);
let used = toNum(detail?.used);
if (used === undefined) {
const remaining = toNum(detail?.remaining);
if (typeof remaining === "number" && typeof limit === "number") {
used = limit - remaining;
}
}
if (typeof used !== "number") continue;
if (typeof used !== "number" || typeof limit !== "number") continue;
const winLabel =
window?.duration && window?.timeUnit
? `${window.duration} ${window.timeUnit}`
? `${window.duration} ${String(window.timeUnit).replace(/^TIME_UNIT_/, "").toLowerCase()}`
: undefined;
windows.push({
label: String(detail?.name ?? winLabel ?? "rolling window"),
used,
limit,
usedFraction: clampFraction(used, limit),
resetHint: detail?.resetTime ? String(detail.resetTime) : undefined,
});
}
}
@ -153,8 +173,11 @@ async function fetchOpenrouterQuota(apiKey) {
/**
* MiniMax GET https://api.minimax.io/v1/token_plan/remains with Bearer auth.
* The 5h rolling window is the load-bearing limit; response shape varies but
* commonly: { remaining_tokens, total_tokens, reset_time, ... }.
* Real shape: { model_remains: [{ model_name, current_interval_total_count,
* current_interval_usage_count, current_weekly_total_count,
* current_weekly_usage_count, end_time, weekly_end_time, ...}] }.
* One entry per model family. Emit one window per model that has a non-zero
* interval cap; weekly windows are also surfaced when configured.
*/
async function fetchMinimaxQuota(apiKey) {
const res = await fetch("https://api.minimax.io/v1/token_plan/remains", {
@ -164,37 +187,56 @@ async function fetchMinimaxQuota(apiKey) {
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const payload = await res.json();
const windows = [];
const remaining =
typeof payload?.remaining_tokens === "number"
? payload.remaining_tokens
: typeof payload?.remain === "number"
? payload.remain
: undefined;
const total =
typeof payload?.total_tokens === "number"
? payload.total_tokens
: typeof payload?.total === "number"
? payload.total
: undefined;
if (typeof remaining === "number" && typeof total === "number") {
const used = total - remaining;
windows.push({
label: "token plan",
used,
limit: total,
usedFraction: clampFraction(used, total),
resetHint: payload?.reset_time
? String(payload.reset_time)
: undefined,
});
const remains = Array.isArray(payload?.model_remains)
? payload.model_remains
: [];
for (const entry of remains) {
if (!entry || typeof entry !== "object") continue;
const intervalTotal = toNum(entry.current_interval_total_count);
const intervalUsed = toNum(entry.current_interval_usage_count);
if (
typeof intervalTotal === "number" &&
typeof intervalUsed === "number" &&
intervalTotal > 0
) {
windows.push({
label: `${entry.model_name ?? "model"} (5h)`,
used: intervalUsed,
limit: intervalTotal,
usedFraction: clampFraction(intervalUsed, intervalTotal),
resetHint:
typeof entry.end_time === "number"
? new Date(entry.end_time).toISOString()
: undefined,
});
}
const weeklyTotal = toNum(entry.current_weekly_total_count);
const weeklyUsed = toNum(entry.current_weekly_usage_count);
if (
typeof weeklyTotal === "number" &&
typeof weeklyUsed === "number" &&
weeklyTotal > 0
) {
windows.push({
label: `${entry.model_name ?? "model"} (weekly)`,
used: weeklyUsed,
limit: weeklyTotal,
usedFraction: clampFraction(weeklyUsed, weeklyTotal),
resetHint:
typeof entry.weekly_end_time === "number"
? new Date(entry.weekly_end_time).toISOString()
: undefined,
});
}
}
return { windows, raw: payload };
}
/**
* Z.AI GET https://api.z.ai/api/monitor/usage/quota/limit with Bearer auth.
* Returns 5h token limit + MCP monthly quota per the opencode-mystatus tool.
* Field names vary; we accept common synonyms.
* Real responses use a `code` / `msg` / `success` / `data` envelope. When
* `success: false` (e.g. user has no active coding plan), we surface the
* vendor's message as the error rather than silently emitting no windows.
*/
async function fetchZaiQuota(apiKey) {
const res = await fetch("https://api.z.ai/api/monitor/usage/quota/limit", {
@ -203,6 +245,12 @@ async function fetchZaiQuota(apiKey) {
});
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const payload = await res.json();
// Treat envelope-level failures as fetch errors so they surface properly.
if (payload?.success === false) {
throw new Error(
`z.ai: ${payload?.msg ?? "unknown error"} (code ${payload?.code ?? "?"})`,
);
}
const windows = [];
const buckets = Array.isArray(payload?.data)
? payload.data

View file

@ -93,15 +93,16 @@ describe("QUOTA_CAPABLE_PROVIDER_IDS", () => {
// ─── kimi-coding ─────────────────────────────────────────────────────────────
describe("runProviderQuotaRefreshIfStale — kimi-coding", () => {
test("hits /coding/v1/usages with Bearer auth and parses windows", async () => {
test("hits /coding/v1/usages with Bearer auth and parses windows (real shape uses STRING numbers)", async () => {
const home = tempSfHome();
// Real API encodes numeric fields as strings; parser must coerce.
const calls = stubFetch({
"https://api.kimi.com/coding/v1/usages": {
usage: { limit: 1000, used: 250, name: "Weekly" },
usage: { limit: "1000", used: "250", name: "Weekly" },
limits: [
{
detail: { limit: 200, used: 80, name: "5h" },
window: { duration: 5, timeUnit: "hours" },
detail: { limit: "200", used: "80", name: "5h" },
window: { duration: 5, timeUnit: "TIME_UNIT_MINUTE" },
},
],
},
@ -125,11 +126,11 @@ describe("runProviderQuotaRefreshIfStale — kimi-coding", () => {
assert.equal(entry.windows[1].usedFraction, 0.4);
});
test("falls back from `used` to `limit - remaining`", async () => {
test("falls back from `used` to `limit - remaining` (string-encoded)", async () => {
const home = tempSfHome();
stubFetch({
"https://api.kimi.com/coding/v1/usages": {
usage: { limit: 1000, remaining: 600, name: "Weekly" },
usage: { limit: "1000", remaining: "600", name: "Weekly" },
},
});
await runProviderQuotaRefreshIfStale(home, makeAuth({ "kimi-coding": "k" }));
@ -139,6 +140,23 @@ describe("runProviderQuotaRefreshIfStale — kimi-coding", () => {
});
});
describe("runProviderQuotaRefreshIfStale — zai envelope error", () => {
test("success:false response surfaces vendor msg as an error entry", async () => {
const home = tempSfHome();
stubFetch({
"https://api.z.ai/api/monitor/usage/quota/limit": {
code: 500,
msg: "current user does not have a coding plan",
success: false,
},
});
await runProviderQuotaRefreshIfStale(home, makeAuth({ zai: "test-zai" }));
const all = getAllProviderQuotaEntries();
assert.equal(all["zai"].ok, false);
assert.match(all["zai"].error, /does not have a coding plan/);
});
});
// ─── openrouter ──────────────────────────────────────────────────────────────
describe("runProviderQuotaRefreshIfStale — openrouter", () => {
@ -170,13 +188,20 @@ describe("runProviderQuotaRefreshIfStale — openrouter", () => {
// ─── minimax ─────────────────────────────────────────────────────────────────
describe("runProviderQuotaRefreshIfStale — minimax", () => {
test("hits /v1/token_plan/remains and parses remaining_tokens / total_tokens", async () => {
test("hits /v1/token_plan/remains and emits one window per model_remains entry", async () => {
const home = tempSfHome();
stubFetch({
"https://api.minimax.io/v1/token_plan/remains": {
remaining_tokens: 700,
total_tokens: 1000,
reset_time: "2026-05-17T00:00:00Z",
model_remains: [
{
model_name: "MiniMax-M*",
current_interval_total_count: 1000,
current_interval_usage_count: 250,
current_weekly_total_count: 0,
current_weekly_usage_count: 0,
end_time: 1778961600000,
},
],
},
});
@ -187,10 +212,12 @@ describe("runProviderQuotaRefreshIfStale — minimax", () => {
const entry = getProviderQuotaState("minimax");
assert.equal(entry.ok, true);
assert.equal(entry.windows[0].used, 300);
assert.equal(entry.windows.length, 1);
assert.equal(entry.windows[0].label, "MiniMax-M* (5h)");
assert.equal(entry.windows[0].used, 250);
assert.equal(entry.windows[0].limit, 1000);
assert.equal(entry.windows[0].usedFraction, 0.3);
assert.equal(entry.windows[0].resetHint, "2026-05-17T00:00:00Z");
assert.equal(entry.windows[0].usedFraction, 0.25);
assert.ok(entry.windows[0].resetHint);
});
});