From c0de3538eca4f6c325c726cd617c37e3da1eff73 Mon Sep 17 00:00:00 2001 From: ace-pm Date: Wed, 15 Apr 2026 11:04:41 +0200 Subject: [PATCH] fix(retry-handler): classify 529/overloaded as rate_limit for fallback walk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Minimax and other Anthropic-protocol providers return HTTP 529 with `overloaded_error` bodies under heavy load. The retryable regex (line 119) matched `overloaded` so the error was retried, but the rate-limit classifier (line 423) only matched `429`, so the error never triggered credential rotation or cross-provider fallback — the handler looped on the same provider forever. Adds `529|overloaded` to the rate-limit classifier so 529 responses route through the same backoff + fallback path as real rate limits. --- .../src/core/retry-handler.test.ts | 32 +++++++++++++++++++ .../pi-coding-agent/src/core/retry-handler.ts | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/packages/pi-coding-agent/src/core/retry-handler.test.ts b/packages/pi-coding-agent/src/core/retry-handler.test.ts index df3c8988d..c8d04b87f 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.test.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.test.ts @@ -172,6 +172,38 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => { assert.ok(retryStart, "Regular 429 should enter backoff retry"); }); + it("classifies 529 overloaded_error as rate_limit, not quota_exhausted", async () => { + // Minimax and other Anthropic-protocol providers return HTTP 529 with + // `overloaded_error` bodies under heavy load. These must route through the + // rate_limit path so credential rotation and cross-provider fallback fire. + const { deps, emittedEvents } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6"), + markUsageLimitReachedResult: false, + fallbackResult: null, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage( + '529 {"type":"error","error":{"type":"overloaded_error","message":"The server cluster is currently under high load. Please retry after a short wait and thank you for your patience. (2064) (529)"},"request_id":"062e76f8f25cd919caa3af4baaa49203"}' + ); + + const result = await handler.handleRetryableError(msg); + + // Should enter the backoff loop (rate_limit path, not quota_exhausted) + assert.equal(result, true); + + const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start"); + assert.ok(retryStart, "529 overloaded_error should enter backoff retry as rate_limit"); + + // Must NOT be treated as quota_exhausted (would emit fallback_chain_exhausted) + const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted"); + assert.equal( + chainExhausted, + undefined, + "529 overloaded_error must NOT be classified as quota_exhausted", + ); + }); + it("classifies OpenRouter credit affordability errors as quota_exhausted", async () => { const { deps, emittedEvents } = createMockDeps({ model: createMockModel("openrouter", "openai/gpt-5-pro"), diff --git a/packages/pi-coding-agent/src/core/retry-handler.ts b/packages/pi-coding-agent/src/core/retry-handler.ts index 399d92fd4..b85133915 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.ts @@ -420,7 +420,7 @@ export class RetryHandler { if (/requires more credits|can only afford|insufficient credits|not enough credits|credit balance/i.test(err)) return "quota_exhausted"; if (/quota|billing|exceeded.*limit|usage.*limit/i.test(err)) return "quota_exhausted"; - if (/rate.?limit|too many requests|429/i.test(err)) return "rate_limit"; + if (/rate.?limit|too many requests|429|529|overloaded/i.test(err)) return "rate_limit"; if (/500|502|503|504|server.?error|internal.?error|service.?unavailable/i.test(err)) return "server_error"; return "unknown"; }