fix(quota/zai): use raw key auth (no Bearer) + correct response shape
Cross-referenced vbgate/opencode-mystatus reference implementation
and found two real bugs in the zai fetcher:
1. Auth header: zai's monitor endpoint expects `Authorization: <key>`
with NO `Bearer ` prefix. Using Bearer caused the server to treat
the call as unauthenticated and return the generic "no coding
plan" response even for active coding-plan users.
2. Response shape: real envelope is
{ code, msg, success, data: { limits: [
{ type: "TOKENS_LIMIT"|"TIME_LIMIT", usage, currentValue,
percentage, nextResetTime? } ] } }
Was looking for `data: [...]` directly and using `limit`/`used`
fields. Now parses `data.data.limits[].usage` / `.currentValue`.
3. Added User-Agent header to match the reference tool.
Live probe finding: this user's z.ai key works fine for inference
(/api/coding/paas/v4/models returns 200 with the full model list)
but the monitor endpoint reports "no coding plan" — meaning their
account uses the regular pay-as-you-go z.ai/zhipu tier, not the
separately-billed "Coding Plan" subscription that the monitor
endpoint serves. The 429s they observe during inference are
rate-limit RPM/TPM errors, not coding-plan window exhaustion.
Code change is correct; the error message is now accurate and
actionable.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
8fa9a4b8fa
commit
b39cf3387e
2 changed files with 67 additions and 40 deletions
|
|
@ -233,59 +233,62 @@ async function fetchMinimaxQuota(apiKey) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Z.AI — GET https://api.z.ai/api/monitor/usage/quota/limit with Bearer auth.
|
||||
* Real responses use a `code` / `msg` / `success` / `data` envelope. When
|
||||
* `success: false` (e.g. user has no active coding plan), we surface the
|
||||
* vendor's message as the error rather than silently emitting no windows.
|
||||
* Z.AI — GET https://api.z.ai/api/monitor/usage/quota/limit.
|
||||
*
|
||||
* Two non-obvious things about this endpoint (cross-referenced with
|
||||
* vbgate/opencode-mystatus reference impl):
|
||||
*
|
||||
* 1. Auth header is the RAW api key with NO `Bearer ` prefix. Using
|
||||
* `Authorization: Bearer <key>` causes the server to treat the call
|
||||
* as unauthenticated and respond with success:false / "user does not
|
||||
* have a coding plan" even when the user is actively on the plan.
|
||||
*
|
||||
* 2. Real response shape:
|
||||
* { code: 200, msg, success: true,
|
||||
* data: { limits: [
|
||||
* { type: "TOKENS_LIMIT" | "TIME_LIMIT",
|
||||
* usage: <total>, currentValue: <used>,
|
||||
* percentage: <0-100>, nextResetTime?: <ms epoch>
|
||||
* }, ...
|
||||
* ] } }
|
||||
*/
|
||||
async function fetchZaiQuota(apiKey) {
|
||||
const res = await fetch("https://api.z.ai/api/monitor/usage/quota/limit", {
|
||||
method: "GET",
|
||||
headers: { Authorization: `Bearer ${apiKey}` },
|
||||
headers: {
|
||||
Authorization: apiKey,
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": "SingularityForge-Quota/1.0",
|
||||
},
|
||||
});
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
const payload = await res.json();
|
||||
// Treat envelope-level failures as fetch errors so they surface properly.
|
||||
if (payload?.success === false) {
|
||||
if (payload?.success === false || payload?.code !== 200) {
|
||||
throw new Error(
|
||||
`z.ai: ${payload?.msg ?? "unknown error"} (code ${payload?.code ?? "?"})`,
|
||||
);
|
||||
}
|
||||
const windows = [];
|
||||
const buckets = Array.isArray(payload?.data)
|
||||
? payload.data
|
||||
: Array.isArray(payload?.limits)
|
||||
? payload.limits
|
||||
: Array.isArray(payload)
|
||||
? payload
|
||||
: [];
|
||||
const buckets = Array.isArray(payload?.data?.limits) ? payload.data.limits : [];
|
||||
for (const bucket of buckets) {
|
||||
if (!bucket || typeof bucket !== "object") continue;
|
||||
const limit =
|
||||
typeof bucket.limit === "number"
|
||||
? bucket.limit
|
||||
: typeof bucket.total === "number"
|
||||
? bucket.total
|
||||
: undefined;
|
||||
let used =
|
||||
typeof bucket.used === "number"
|
||||
? bucket.used
|
||||
: typeof bucket.consumed === "number"
|
||||
? bucket.consumed
|
||||
: undefined;
|
||||
if (used === undefined && typeof bucket.remaining === "number" && typeof limit === "number") {
|
||||
used = limit - bucket.remaining;
|
||||
}
|
||||
const limit = toNum(bucket.usage);
|
||||
const used = toNum(bucket.currentValue);
|
||||
if (typeof used !== "number" || typeof limit !== "number") continue;
|
||||
const label =
|
||||
bucket.type === "TOKENS_LIMIT"
|
||||
? "5h tokens"
|
||||
: bucket.type === "TIME_LIMIT"
|
||||
? "MCP monthly"
|
||||
: String(bucket.type ?? "quota");
|
||||
windows.push({
|
||||
label: String(bucket.name ?? bucket.type ?? "quota"),
|
||||
label,
|
||||
used,
|
||||
limit,
|
||||
usedFraction: clampFraction(used, limit),
|
||||
resetHint: bucket.reset_time
|
||||
? String(bucket.reset_time)
|
||||
: bucket.resetAt
|
||||
? String(bucket.resetAt)
|
||||
resetHint:
|
||||
typeof bucket.nextResetTime === "number"
|
||||
? new Date(bucket.nextResetTime).toISOString()
|
||||
: undefined,
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -224,19 +224,43 @@ describe("runProviderQuotaRefreshIfStale — minimax", () => {
|
|||
// ─── zai ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("runProviderQuotaRefreshIfStale — zai", () => {
|
||||
test("hits /api/monitor/usage/quota/limit and parses bucket array", async () => {
|
||||
test("uses raw key auth (no Bearer prefix) and parses {type, usage, currentValue}", async () => {
|
||||
const home = tempSfHome();
|
||||
stubFetch({
|
||||
const calls = stubFetch({
|
||||
"https://api.z.ai/api/monitor/usage/quota/limit": {
|
||||
data: [
|
||||
{ name: "5h tokens", limit: 5000, used: 1500 },
|
||||
{ name: "MCP monthly", limit: 100, used: 70 },
|
||||
],
|
||||
code: 200,
|
||||
msg: "ok",
|
||||
success: true,
|
||||
data: {
|
||||
limits: [
|
||||
{
|
||||
type: "TOKENS_LIMIT",
|
||||
usage: 5000,
|
||||
currentValue: 1500,
|
||||
percentage: 30,
|
||||
nextResetTime: 1779004800000,
|
||||
},
|
||||
{
|
||||
type: "TIME_LIMIT",
|
||||
usage: 100,
|
||||
currentValue: 70,
|
||||
percentage: 70,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
await runProviderQuotaRefreshIfStale(home, makeAuth({ zai: "test-zai" }));
|
||||
|
||||
const zaiCall = calls.find((c) => c.url.includes("z.ai"));
|
||||
assert.ok(zaiCall);
|
||||
assert.equal(
|
||||
zaiCall.headers.Authorization,
|
||||
"test-zai",
|
||||
"zai auth header must be the raw key, NO Bearer prefix",
|
||||
);
|
||||
|
||||
const entry = getProviderQuotaState("zai");
|
||||
assert.equal(entry.ok, true);
|
||||
assert.equal(entry.windows.length, 2);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue