fix(retry): prevent 429 quota cascade and 30-min lockout

This commit is contained in:
Tibsfox 2026-04-05 09:37:28 -07:00
parent 092d1c0a9e
commit fde2aafa64
4 changed files with 127 additions and 34 deletions

View file

@ -278,5 +278,85 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
const msg = errorMessage("Extra usage is required for long context requests.");
assert.equal(handler.isRetryableError(msg), true);
});
it("does NOT consider credential cooldown error as retryable (#3429)", () => {
// The credential cooldown message from getApiKey() must not re-enter
// the retry handler. Re-entry creates cascading empty error entries
// in the session file that break resume.
const { deps } = createMockDeps();
const handler = new RetryHandler(deps);
const msg = errorMessage(
'All credentials for "anthropic" are in a cooldown window. ' +
'Please wait a moment and try again, or switch to a different provider.',
);
assert.equal(handler.isRetryableError(msg), false);
});
});
describe("quota_exhausted credential backoff (#3430)", () => {
it("does NOT call markUsageLimitReached for quota_exhausted errors", async () => {
// "Extra usage is required" is an account-level billing gate.
// Backing off the credential for 30 minutes blocks all provider
// requests and has no effect on the billing condition.
const { deps, markUsageLimitReached } = createMockDeps({
model: createMockModel("anthropic", "claude-opus-4-6[1m]"),
markUsageLimitReachedResult: false,
fallbackResult: null,
findModelResult: () => undefined,
});
const handler = new RetryHandler(deps);
const msg = errorMessage(
'429 {"type":"error","error":{"type":"rate_limit_error","message":"Extra usage is required for long context requests."}}',
);
await handler.handleRetryableError(msg);
assert.equal(
markUsageLimitReached.mock.calls.length,
0,
"markUsageLimitReached must NOT be called for quota_exhausted errors",
);
});
it("still calls markUsageLimitReached for regular rate_limit errors", async () => {
const { deps, markUsageLimitReached } = createMockDeps({
model: createMockModel("anthropic", "claude-opus-4-6"),
markUsageLimitReachedResult: false,
fallbackResult: null,
});
const handler = new RetryHandler(deps);
const msg = errorMessage("429 Too Many Requests");
await handler.handleRetryableError(msg);
assert.equal(
markUsageLimitReached.mock.calls.length,
1,
"markUsageLimitReached should be called for rate_limit errors",
);
});
it("still tries cross-provider fallback for quota_exhausted without credential backoff", async () => {
const fallbackModel = createMockModel("openai", "gpt-4o");
const { deps, markUsageLimitReached, continueFn } = createMockDeps({
model: createMockModel("anthropic", "claude-opus-4-6[1m]"),
markUsageLimitReachedResult: false,
fallbackResult: { model: fallbackModel, reason: "cross-provider fallback" },
});
const handler = new RetryHandler(deps);
const msg = errorMessage("Extra usage is required for long context requests.");
const result = await handler.handleRetryableError(msg);
assert.equal(result, true, "should retry with fallback provider");
assert.equal(
markUsageLimitReached.mock.calls.length,
0,
"should NOT back off credentials before trying fallback",
);
});
});
});

View file

@ -109,7 +109,11 @@ export class RetryHandler {
if (isContextOverflow(message, contextWindow)) return false;
const err = message.errorMessage;
return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|temporarily backed off|extra usage is required/i.test(
// "temporarily backed off" is intentionally excluded: it is an internally-
// generated error from getApiKey() when credentials are in a backoff window.
// Re-entering the retry handler for that message creates a cascade of empty
// error entries in the session file, breaking resume (#3429).
return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|extra usage is required/i.test(
err,
);
}
@ -139,34 +143,42 @@ export class RetryHandler {
const retryGeneration = this._retryGeneration;
if (this._deps.getModel() && message.errorMessage) {
const errorType = this._classifyErrorType(message.errorMessage);
const isCredentialError = errorType === "rate_limit" || errorType === "quota_exhausted";
const hasAlternate =
isCredentialError &&
this._deps.modelRegistry.authStorage.markUsageLimitReached(
this._deps.getModel()!.provider,
this._deps.getSessionId(),
{ errorType },
);
const isRateLimit = errorType === "rate_limit";
const isQuotaError = errorType === "quota_exhausted";
if (hasAlternate) {
this._removeLastAssistantError();
// Credential rotation — only for transient rate limits (#3430).
// Quota errors ("Extra usage is required") are account-level billing
// gates; rotating to another credential on the same account won't help
// and the 30-minute backoff blocks all provider requests needlessly.
if (isRateLimit) {
const hasAlternate =
this._deps.modelRegistry.authStorage.markUsageLimitReached(
this._deps.getModel()!.provider,
this._deps.getSessionId(),
{ errorType },
);
this._deps.emit({
type: "auto_retry_start",
attempt: this._retryAttempt + 1,
maxAttempts: settings.maxRetries,
delayMs: 0,
errorMessage: `${message.errorMessage} (switching credential)`,
});
if (hasAlternate) {
this._removeLastAssistantError();
// Retry immediately with the next credential - don't increment _retryAttempt
this._scheduleContinue(retryGeneration);
this._deps.emit({
type: "auto_retry_start",
attempt: this._retryAttempt + 1,
maxAttempts: settings.maxRetries,
delayMs: 0,
errorMessage: `${message.errorMessage} (switching credential)`,
});
return true;
// Retry immediately with the next credential - don't increment _retryAttempt
this._scheduleContinue(retryGeneration);
return true;
}
}
// All credentials are backed off. Try cross-provider fallback before giving up.
if (isCredentialError) {
// Cross-provider fallback — for rate limits with all creds backed off,
// or quota errors (which skip credential backoff entirely).
if (isRateLimit || isQuotaError) {
const fallbackResult = await this._deps.fallbackResolver.findFallback(
this._deps.getModel()!,
errorType,
@ -200,7 +212,7 @@ export class RetryHandler {
}
// No fallback available either
if (errorType === "quota_exhausted") {
if (isQuotaError) {
// Try long-context model downgrade ([1m] → base) before giving up
const downgraded = this._tryLongContextDowngrade(message, retryGeneration);
if (downgraded) return true;

View file

@ -372,16 +372,16 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
await new Promise(resolve => setTimeout(resolve, baseDelayMs * attempt));
}
// All retries exhausted — throw descriptive error
// Check if credentials exist but are temporarily backed off
// (e.g., after a 429 quota exhaustion). Provide a specific error
// so the retry handler knows this is transient, not a permanent
// auth failure.
// All retries exhausted — throw descriptive error.
// Check if credentials exist but are temporarily in a backoff window
// (e.g., after a 429). This message intentionally avoids phrases like
// "rate limit" / "429" to prevent isRetryableError() from re-entering
// the retry handler and creating cascading error entries (#3429).
const hasAuth = modelRegistry.authStorage.hasAuth(resolvedProvider);
if (hasAuth) {
throw new Error(
`All credentials for "${resolvedProvider}" are temporarily backed off due to rate limiting. ` +
`The request will be retried automatically when backoff expires.`,
`All credentials for "${resolvedProvider}" are in a cooldown window. ` +
`Please wait a moment and try again, or switch to a different provider.`,
);
}
const model = agent.state.model;
@ -391,8 +391,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
// surface a specific message instead of the misleading "Authentication failed".
if (modelRegistry.authStorage.areAllCredentialsBackedOff(resolvedProvider)) {
throw new Error(
`Rate limit in effect for "${resolvedProvider}". ` +
`Please wait before retrying or switch to a different model.`,
`All credentials for "${resolvedProvider}" are in a cooldown window. ` +
`Please wait a moment and try again, or switch to a different provider.`,
);
}
throw new Error(

View file

@ -464,7 +464,8 @@ test("agent-session retryable error regex matches server_error (underscore)", ()
// This regex is extracted from _isRetryableError in agent-session.ts.
// It must match both "server error" (space) and "server_error" (underscore)
// to properly classify Codex streaming errors as retryable.
const retryableRegex = /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|temporarily backed off/i;
// "temporarily backed off" intentionally excluded — see #3429
const retryableRegex = /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|extra usage is required/i;
// server_error (with underscore — Codex streaming error format)
assert.ok(retryableRegex.test("Codex server_error: An error occurred"));