fix(retry-handler): classify 529/overloaded as rate_limit for fallback walk
Minimax and other Anthropic-protocol providers return HTTP 529 with `overloaded_error` bodies under heavy load. The retryable regex (line 119) matched `overloaded` so the error was retried, but the rate-limit classifier (line 423) only matched `429`, so the error never triggered credential rotation or cross-provider fallback — the handler looped on the same provider forever. Adds `529|overloaded` to the rate-limit classifier so 529 responses route through the same backoff + fallback path as real rate limits.
This commit is contained in:
parent
80ce0f4855
commit
c0de3538ec
2 changed files with 33 additions and 1 deletions
|
|
@ -172,6 +172,38 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
|
|||
assert.ok(retryStart, "Regular 429 should enter backoff retry");
|
||||
});
|
||||
|
||||
it("classifies 529 overloaded_error as rate_limit, not quota_exhausted", async () => {
|
||||
// Minimax and other Anthropic-protocol providers return HTTP 529 with
|
||||
// `overloaded_error` bodies under heavy load. These must route through the
|
||||
// rate_limit path so credential rotation and cross-provider fallback fire.
|
||||
const { deps, emittedEvents } = createMockDeps({
|
||||
model: createMockModel("anthropic", "claude-opus-4-6"),
|
||||
markUsageLimitReachedResult: false,
|
||||
fallbackResult: null,
|
||||
});
|
||||
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage(
|
||||
'529 {"type":"error","error":{"type":"overloaded_error","message":"The server cluster is currently under high load. Please retry after a short wait and thank you for your patience. (2064) (529)"},"request_id":"062e76f8f25cd919caa3af4baaa49203"}'
|
||||
);
|
||||
|
||||
const result = await handler.handleRetryableError(msg);
|
||||
|
||||
// Should enter the backoff loop (rate_limit path, not quota_exhausted)
|
||||
assert.equal(result, true);
|
||||
|
||||
const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start");
|
||||
assert.ok(retryStart, "529 overloaded_error should enter backoff retry as rate_limit");
|
||||
|
||||
// Must NOT be treated as quota_exhausted (would emit fallback_chain_exhausted)
|
||||
const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted");
|
||||
assert.equal(
|
||||
chainExhausted,
|
||||
undefined,
|
||||
"529 overloaded_error must NOT be classified as quota_exhausted",
|
||||
);
|
||||
});
|
||||
|
||||
it("classifies OpenRouter credit affordability errors as quota_exhausted", async () => {
|
||||
const { deps, emittedEvents } = createMockDeps({
|
||||
model: createMockModel("openrouter", "openai/gpt-5-pro"),
|
||||
|
|
|
|||
|
|
@ -420,7 +420,7 @@ export class RetryHandler {
|
|||
if (/requires more credits|can only afford|insufficient credits|not enough credits|credit balance/i.test(err))
|
||||
return "quota_exhausted";
|
||||
if (/quota|billing|exceeded.*limit|usage.*limit/i.test(err)) return "quota_exhausted";
|
||||
if (/rate.?limit|too many requests|429/i.test(err)) return "rate_limit";
|
||||
if (/rate.?limit|too many requests|429|529|overloaded/i.test(err)) return "rate_limit";
|
||||
if (/500|502|503|504|server.?error|internal.?error|service.?unavailable/i.test(err)) return "server_error";
|
||||
return "unknown";
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue