From fde2aafa64053a2276b68476251cd304a5f58668 Mon Sep 17 00:00:00 2001 From: Tibsfox Date: Sun, 5 Apr 2026 09:37:28 -0700 Subject: [PATCH] fix(retry): prevent 429 quota cascade and 30-min lockout --- .../src/core/retry-handler.test.ts | 80 +++++++++++++++++++ .../pi-coding-agent/src/core/retry-handler.ts | 60 ++++++++------ packages/pi-coding-agent/src/core/sdk.ts | 18 ++--- .../gsd/tests/provider-errors.test.ts | 3 +- 4 files changed, 127 insertions(+), 34 deletions(-) diff --git a/packages/pi-coding-agent/src/core/retry-handler.test.ts b/packages/pi-coding-agent/src/core/retry-handler.test.ts index 26b27455c..a85b3f234 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.test.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.test.ts @@ -278,5 +278,85 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => { const msg = errorMessage("Extra usage is required for long context requests."); assert.equal(handler.isRetryableError(msg), true); }); + + it("does NOT consider credential cooldown error as retryable (#3429)", () => { + // The credential cooldown message from getApiKey() must not re-enter + // the retry handler. Re-entry creates cascading empty error entries + // in the session file that break resume. + const { deps } = createMockDeps(); + const handler = new RetryHandler(deps); + const msg = errorMessage( + 'All credentials for "anthropic" are in a cooldown window. ' + + 'Please wait a moment and try again, or switch to a different provider.', + ); + assert.equal(handler.isRetryableError(msg), false); + }); + }); + + describe("quota_exhausted credential backoff (#3430)", () => { + it("does NOT call markUsageLimitReached for quota_exhausted errors", async () => { + // "Extra usage is required" is an account-level billing gate. + // Backing off the credential for 30 minutes blocks all provider + // requests and has no effect on the billing condition. + const { deps, markUsageLimitReached } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6[1m]"), + markUsageLimitReachedResult: false, + fallbackResult: null, + findModelResult: () => undefined, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage( + '429 {"type":"error","error":{"type":"rate_limit_error","message":"Extra usage is required for long context requests."}}', + ); + + await handler.handleRetryableError(msg); + + assert.equal( + markUsageLimitReached.mock.calls.length, + 0, + "markUsageLimitReached must NOT be called for quota_exhausted errors", + ); + }); + + it("still calls markUsageLimitReached for regular rate_limit errors", async () => { + const { deps, markUsageLimitReached } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6"), + markUsageLimitReachedResult: false, + fallbackResult: null, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage("429 Too Many Requests"); + + await handler.handleRetryableError(msg); + + assert.equal( + markUsageLimitReached.mock.calls.length, + 1, + "markUsageLimitReached should be called for rate_limit errors", + ); + }); + + it("still tries cross-provider fallback for quota_exhausted without credential backoff", async () => { + const fallbackModel = createMockModel("openai", "gpt-4o"); + const { deps, markUsageLimitReached, continueFn } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6[1m]"), + markUsageLimitReachedResult: false, + fallbackResult: { model: fallbackModel, reason: "cross-provider fallback" }, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage("Extra usage is required for long context requests."); + + const result = await handler.handleRetryableError(msg); + + assert.equal(result, true, "should retry with fallback provider"); + assert.equal( + markUsageLimitReached.mock.calls.length, + 0, + "should NOT back off credentials before trying fallback", + ); + }); }); }); diff --git a/packages/pi-coding-agent/src/core/retry-handler.ts b/packages/pi-coding-agent/src/core/retry-handler.ts index 538e3a447..4dbac58d5 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.ts @@ -109,7 +109,11 @@ export class RetryHandler { if (isContextOverflow(message, contextWindow)) return false; const err = message.errorMessage; - return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|temporarily backed off|extra usage is required/i.test( + // "temporarily backed off" is intentionally excluded: it is an internally- + // generated error from getApiKey() when credentials are in a backoff window. + // Re-entering the retry handler for that message creates a cascade of empty + // error entries in the session file, breaking resume (#3429). + return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|extra usage is required/i.test( err, ); } @@ -139,34 +143,42 @@ export class RetryHandler { const retryGeneration = this._retryGeneration; if (this._deps.getModel() && message.errorMessage) { const errorType = this._classifyErrorType(message.errorMessage); - const isCredentialError = errorType === "rate_limit" || errorType === "quota_exhausted"; - const hasAlternate = - isCredentialError && - this._deps.modelRegistry.authStorage.markUsageLimitReached( - this._deps.getModel()!.provider, - this._deps.getSessionId(), - { errorType }, - ); + const isRateLimit = errorType === "rate_limit"; + const isQuotaError = errorType === "quota_exhausted"; - if (hasAlternate) { - this._removeLastAssistantError(); + // Credential rotation — only for transient rate limits (#3430). + // Quota errors ("Extra usage is required") are account-level billing + // gates; rotating to another credential on the same account won't help + // and the 30-minute backoff blocks all provider requests needlessly. + if (isRateLimit) { + const hasAlternate = + this._deps.modelRegistry.authStorage.markUsageLimitReached( + this._deps.getModel()!.provider, + this._deps.getSessionId(), + { errorType }, + ); - this._deps.emit({ - type: "auto_retry_start", - attempt: this._retryAttempt + 1, - maxAttempts: settings.maxRetries, - delayMs: 0, - errorMessage: `${message.errorMessage} (switching credential)`, - }); + if (hasAlternate) { + this._removeLastAssistantError(); - // Retry immediately with the next credential - don't increment _retryAttempt - this._scheduleContinue(retryGeneration); + this._deps.emit({ + type: "auto_retry_start", + attempt: this._retryAttempt + 1, + maxAttempts: settings.maxRetries, + delayMs: 0, + errorMessage: `${message.errorMessage} (switching credential)`, + }); - return true; + // Retry immediately with the next credential - don't increment _retryAttempt + this._scheduleContinue(retryGeneration); + + return true; + } } - // All credentials are backed off. Try cross-provider fallback before giving up. - if (isCredentialError) { + // Cross-provider fallback — for rate limits with all creds backed off, + // or quota errors (which skip credential backoff entirely). + if (isRateLimit || isQuotaError) { const fallbackResult = await this._deps.fallbackResolver.findFallback( this._deps.getModel()!, errorType, @@ -200,7 +212,7 @@ export class RetryHandler { } // No fallback available either - if (errorType === "quota_exhausted") { + if (isQuotaError) { // Try long-context model downgrade ([1m] → base) before giving up const downgraded = this._tryLongContextDowngrade(message, retryGeneration); if (downgraded) return true; diff --git a/packages/pi-coding-agent/src/core/sdk.ts b/packages/pi-coding-agent/src/core/sdk.ts index 8d8f8cf04..230a60e4c 100644 --- a/packages/pi-coding-agent/src/core/sdk.ts +++ b/packages/pi-coding-agent/src/core/sdk.ts @@ -372,16 +372,16 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} await new Promise(resolve => setTimeout(resolve, baseDelayMs * attempt)); } - // All retries exhausted — throw descriptive error - // Check if credentials exist but are temporarily backed off - // (e.g., after a 429 quota exhaustion). Provide a specific error - // so the retry handler knows this is transient, not a permanent - // auth failure. + // All retries exhausted — throw descriptive error. + // Check if credentials exist but are temporarily in a backoff window + // (e.g., after a 429). This message intentionally avoids phrases like + // "rate limit" / "429" to prevent isRetryableError() from re-entering + // the retry handler and creating cascading error entries (#3429). const hasAuth = modelRegistry.authStorage.hasAuth(resolvedProvider); if (hasAuth) { throw new Error( - `All credentials for "${resolvedProvider}" are temporarily backed off due to rate limiting. ` + - `The request will be retried automatically when backoff expires.`, + `All credentials for "${resolvedProvider}" are in a cooldown window. ` + + `Please wait a moment and try again, or switch to a different provider.`, ); } const model = agent.state.model; @@ -391,8 +391,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} // surface a specific message instead of the misleading "Authentication failed". if (modelRegistry.authStorage.areAllCredentialsBackedOff(resolvedProvider)) { throw new Error( - `Rate limit in effect for "${resolvedProvider}". ` + - `Please wait before retrying or switch to a different model.`, + `All credentials for "${resolvedProvider}" are in a cooldown window. ` + + `Please wait a moment and try again, or switch to a different provider.`, ); } throw new Error( diff --git a/src/resources/extensions/gsd/tests/provider-errors.test.ts b/src/resources/extensions/gsd/tests/provider-errors.test.ts index e4ec992d4..0ece725c3 100644 --- a/src/resources/extensions/gsd/tests/provider-errors.test.ts +++ b/src/resources/extensions/gsd/tests/provider-errors.test.ts @@ -464,7 +464,8 @@ test("agent-session retryable error regex matches server_error (underscore)", () // This regex is extracted from _isRetryableError in agent-session.ts. // It must match both "server error" (space) and "server_error" (underscore) // to properly classify Codex streaming errors as retryable. - const retryableRegex = /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|temporarily backed off/i; + // "temporarily backed off" intentionally excluded — see #3429 + const retryableRegex = /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|extra usage is required/i; // server_error (with underscore — Codex streaming error format) assert.ok(retryableRegex.test("Codex server_error: An error occurred"));