fix(retry-handler): classify 529/overloaded as rate_limit for fallback walk

Minimax and other Anthropic-protocol providers return HTTP 529 with
`overloaded_error` bodies under heavy load. The retryable regex (line 119)
matched `overloaded` so the error was retried, but the rate-limit
classifier (line 423) only matched `429`, so the error never triggered
credential rotation or cross-provider fallback — the handler looped on
the same provider forever.

Adds `529|overloaded` to the rate-limit classifier so 529 responses
route through the same backoff + fallback path as real rate limits.
This commit is contained in:
ace-pm 2026-04-15 11:04:41 +02:00
parent 80ce0f4855
commit c0de3538ec
2 changed files with 33 additions and 1 deletions

View file

@ -172,6 +172,38 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
assert.ok(retryStart, "Regular 429 should enter backoff retry");
});
it("classifies 529 overloaded_error as rate_limit, not quota_exhausted", async () => {
// Minimax and other Anthropic-protocol providers return HTTP 529 with
// `overloaded_error` bodies under heavy load. These must route through the
// rate_limit path so credential rotation and cross-provider fallback fire.
const { deps, emittedEvents } = createMockDeps({
model: createMockModel("anthropic", "claude-opus-4-6"),
markUsageLimitReachedResult: false,
fallbackResult: null,
});
const handler = new RetryHandler(deps);
const msg = errorMessage(
'529 {"type":"error","error":{"type":"overloaded_error","message":"The server cluster is currently under high load. Please retry after a short wait and thank you for your patience. (2064) (529)"},"request_id":"062e76f8f25cd919caa3af4baaa49203"}'
);
const result = await handler.handleRetryableError(msg);
// Should enter the backoff loop (rate_limit path, not quota_exhausted)
assert.equal(result, true);
const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start");
assert.ok(retryStart, "529 overloaded_error should enter backoff retry as rate_limit");
// Must NOT be treated as quota_exhausted (would emit fallback_chain_exhausted)
const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted");
assert.equal(
chainExhausted,
undefined,
"529 overloaded_error must NOT be classified as quota_exhausted",
);
});
it("classifies OpenRouter credit affordability errors as quota_exhausted", async () => {
const { deps, emittedEvents } = createMockDeps({
model: createMockModel("openrouter", "openai/gpt-5-pro"),

View file

@ -420,7 +420,7 @@ export class RetryHandler {
if (/requires more credits|can only afford|insufficient credits|not enough credits|credit balance/i.test(err))
return "quota_exhausted";
if (/quota|billing|exceeded.*limit|usage.*limit/i.test(err)) return "quota_exhausted";
if (/rate.?limit|too many requests|429/i.test(err)) return "rate_limit";
if (/rate.?limit|too many requests|429|529|overloaded/i.test(err)) return "rate_limit";
if (/500|502|503|504|server.?error|internal.?error|service.?unavailable/i.test(err)) return "server_error";
return "unknown";
}