fix(quota/zai): use raw key auth (no Bearer) + correct response shape

Cross-referenced vbgate/opencode-mystatus reference implementation
and found two real bugs in the zai fetcher:

1. Auth header: zai's monitor endpoint expects `Authorization: <key>`
   with NO `Bearer ` prefix. Using Bearer caused the server to treat
   the call as unauthenticated and return the generic "no coding
   plan" response even for active coding-plan users.

2. Response shape: real envelope is
     { code, msg, success, data: { limits: [
       { type: "TOKENS_LIMIT"|"TIME_LIMIT", usage, currentValue,
         percentage, nextResetTime? } ] } }
   Was looking for `data: [...]` directly and using `limit`/`used`
   fields. Now parses `data.data.limits[].usage` / `.currentValue`.

3. Added User-Agent header to match the reference tool.

Live probe finding: this user's z.ai key works fine for inference
(/api/coding/paas/v4/models returns 200 with the full model list)
but the monitor endpoint reports "no coding plan" — meaning their
account uses the regular pay-as-you-go z.ai/zhipu tier, not the
separately-billed "Coding Plan" subscription that the monitor
endpoint serves. The 429s they observe during inference are
rate-limit RPM/TPM errors, not coding-plan window exhaustion.
Code change is correct; the error message is now accurate and
actionable.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-16 18:04:12 +02:00
parent 8fa9a4b8fa
commit b39cf3387e
2 changed files with 67 additions and 40 deletions

View file

@ -233,59 +233,62 @@ async function fetchMinimaxQuota(apiKey) {
}
/**
* Z.AI GET https://api.z.ai/api/monitor/usage/quota/limit with Bearer auth.
* Real responses use a `code` / `msg` / `success` / `data` envelope. When
* `success: false` (e.g. user has no active coding plan), we surface the
* vendor's message as the error rather than silently emitting no windows.
* Z.AI GET https://api.z.ai/api/monitor/usage/quota/limit.
*
* Two non-obvious things about this endpoint (cross-referenced with
* vbgate/opencode-mystatus reference impl):
*
* 1. Auth header is the RAW api key with NO `Bearer ` prefix. Using
* `Authorization: Bearer <key>` causes the server to treat the call
* as unauthenticated and respond with success:false / "user does not
* have a coding plan" even when the user is actively on the plan.
*
* 2. Real response shape:
* { code: 200, msg, success: true,
* data: { limits: [
* { type: "TOKENS_LIMIT" | "TIME_LIMIT",
* usage: <total>, currentValue: <used>,
* percentage: <0-100>, nextResetTime?: <ms epoch>
* }, ...
* ] } }
*/
async function fetchZaiQuota(apiKey) {
const res = await fetch("https://api.z.ai/api/monitor/usage/quota/limit", {
method: "GET",
headers: { Authorization: `Bearer ${apiKey}` },
headers: {
Authorization: apiKey,
"Content-Type": "application/json",
"User-Agent": "SingularityForge-Quota/1.0",
},
});
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const payload = await res.json();
// Treat envelope-level failures as fetch errors so they surface properly.
if (payload?.success === false) {
if (payload?.success === false || payload?.code !== 200) {
throw new Error(
`z.ai: ${payload?.msg ?? "unknown error"} (code ${payload?.code ?? "?"})`,
);
}
const windows = [];
const buckets = Array.isArray(payload?.data)
? payload.data
: Array.isArray(payload?.limits)
? payload.limits
: Array.isArray(payload)
? payload
: [];
const buckets = Array.isArray(payload?.data?.limits) ? payload.data.limits : [];
for (const bucket of buckets) {
if (!bucket || typeof bucket !== "object") continue;
const limit =
typeof bucket.limit === "number"
? bucket.limit
: typeof bucket.total === "number"
? bucket.total
: undefined;
let used =
typeof bucket.used === "number"
? bucket.used
: typeof bucket.consumed === "number"
? bucket.consumed
: undefined;
if (used === undefined && typeof bucket.remaining === "number" && typeof limit === "number") {
used = limit - bucket.remaining;
}
const limit = toNum(bucket.usage);
const used = toNum(bucket.currentValue);
if (typeof used !== "number" || typeof limit !== "number") continue;
const label =
bucket.type === "TOKENS_LIMIT"
? "5h tokens"
: bucket.type === "TIME_LIMIT"
? "MCP monthly"
: String(bucket.type ?? "quota");
windows.push({
label: String(bucket.name ?? bucket.type ?? "quota"),
label,
used,
limit,
usedFraction: clampFraction(used, limit),
resetHint: bucket.reset_time
? String(bucket.reset_time)
: bucket.resetAt
? String(bucket.resetAt)
resetHint:
typeof bucket.nextResetTime === "number"
? new Date(bucket.nextResetTime).toISOString()
: undefined,
});
}

View file

@ -224,19 +224,43 @@ describe("runProviderQuotaRefreshIfStale — minimax", () => {
// ─── zai ─────────────────────────────────────────────────────────────────────
describe("runProviderQuotaRefreshIfStale — zai", () => {
test("hits /api/monitor/usage/quota/limit and parses bucket array", async () => {
test("uses raw key auth (no Bearer prefix) and parses {type, usage, currentValue}", async () => {
const home = tempSfHome();
stubFetch({
const calls = stubFetch({
"https://api.z.ai/api/monitor/usage/quota/limit": {
data: [
{ name: "5h tokens", limit: 5000, used: 1500 },
{ name: "MCP monthly", limit: 100, used: 70 },
],
code: 200,
msg: "ok",
success: true,
data: {
limits: [
{
type: "TOKENS_LIMIT",
usage: 5000,
currentValue: 1500,
percentage: 30,
nextResetTime: 1779004800000,
},
{
type: "TIME_LIMIT",
usage: 100,
currentValue: 70,
percentage: 70,
},
],
},
},
});
await runProviderQuotaRefreshIfStale(home, makeAuth({ zai: "test-zai" }));
const zaiCall = calls.find((c) => c.url.includes("z.ai"));
assert.ok(zaiCall);
assert.equal(
zaiCall.headers.Authorization,
"test-zai",
"zai auth header must be the raw key, NO Bearer prefix",
);
const entry = getProviderQuotaState("zai");
assert.equal(entry.ok, true);
assert.equal(entry.windows.length, 2);