fix(retry): prevent 429 quota cascade and 30-min lockout
This commit is contained in:
parent
092d1c0a9e
commit
fde2aafa64
4 changed files with 127 additions and 34 deletions
|
|
@ -278,5 +278,85 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
|
|||
const msg = errorMessage("Extra usage is required for long context requests.");
|
||||
assert.equal(handler.isRetryableError(msg), true);
|
||||
});
|
||||
|
||||
it("does NOT consider credential cooldown error as retryable (#3429)", () => {
|
||||
// The credential cooldown message from getApiKey() must not re-enter
|
||||
// the retry handler. Re-entry creates cascading empty error entries
|
||||
// in the session file that break resume.
|
||||
const { deps } = createMockDeps();
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage(
|
||||
'All credentials for "anthropic" are in a cooldown window. ' +
|
||||
'Please wait a moment and try again, or switch to a different provider.',
|
||||
);
|
||||
assert.equal(handler.isRetryableError(msg), false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("quota_exhausted credential backoff (#3430)", () => {
|
||||
it("does NOT call markUsageLimitReached for quota_exhausted errors", async () => {
|
||||
// "Extra usage is required" is an account-level billing gate.
|
||||
// Backing off the credential for 30 minutes blocks all provider
|
||||
// requests and has no effect on the billing condition.
|
||||
const { deps, markUsageLimitReached } = createMockDeps({
|
||||
model: createMockModel("anthropic", "claude-opus-4-6[1m]"),
|
||||
markUsageLimitReachedResult: false,
|
||||
fallbackResult: null,
|
||||
findModelResult: () => undefined,
|
||||
});
|
||||
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage(
|
||||
'429 {"type":"error","error":{"type":"rate_limit_error","message":"Extra usage is required for long context requests."}}',
|
||||
);
|
||||
|
||||
await handler.handleRetryableError(msg);
|
||||
|
||||
assert.equal(
|
||||
markUsageLimitReached.mock.calls.length,
|
||||
0,
|
||||
"markUsageLimitReached must NOT be called for quota_exhausted errors",
|
||||
);
|
||||
});
|
||||
|
||||
it("still calls markUsageLimitReached for regular rate_limit errors", async () => {
|
||||
const { deps, markUsageLimitReached } = createMockDeps({
|
||||
model: createMockModel("anthropic", "claude-opus-4-6"),
|
||||
markUsageLimitReachedResult: false,
|
||||
fallbackResult: null,
|
||||
});
|
||||
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage("429 Too Many Requests");
|
||||
|
||||
await handler.handleRetryableError(msg);
|
||||
|
||||
assert.equal(
|
||||
markUsageLimitReached.mock.calls.length,
|
||||
1,
|
||||
"markUsageLimitReached should be called for rate_limit errors",
|
||||
);
|
||||
});
|
||||
|
||||
it("still tries cross-provider fallback for quota_exhausted without credential backoff", async () => {
|
||||
const fallbackModel = createMockModel("openai", "gpt-4o");
|
||||
const { deps, markUsageLimitReached, continueFn } = createMockDeps({
|
||||
model: createMockModel("anthropic", "claude-opus-4-6[1m]"),
|
||||
markUsageLimitReachedResult: false,
|
||||
fallbackResult: { model: fallbackModel, reason: "cross-provider fallback" },
|
||||
});
|
||||
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage("Extra usage is required for long context requests.");
|
||||
|
||||
const result = await handler.handleRetryableError(msg);
|
||||
|
||||
assert.equal(result, true, "should retry with fallback provider");
|
||||
assert.equal(
|
||||
markUsageLimitReached.mock.calls.length,
|
||||
0,
|
||||
"should NOT back off credentials before trying fallback",
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -109,7 +109,11 @@ export class RetryHandler {
|
|||
if (isContextOverflow(message, contextWindow)) return false;
|
||||
|
||||
const err = message.errorMessage;
|
||||
return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|temporarily backed off|extra usage is required/i.test(
|
||||
// "temporarily backed off" is intentionally excluded: it is an internally-
|
||||
// generated error from getApiKey() when credentials are in a backoff window.
|
||||
// Re-entering the retry handler for that message creates a cascade of empty
|
||||
// error entries in the session file, breaking resume (#3429).
|
||||
return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|extra usage is required/i.test(
|
||||
err,
|
||||
);
|
||||
}
|
||||
|
|
@ -139,34 +143,42 @@ export class RetryHandler {
|
|||
const retryGeneration = this._retryGeneration;
|
||||
if (this._deps.getModel() && message.errorMessage) {
|
||||
const errorType = this._classifyErrorType(message.errorMessage);
|
||||
const isCredentialError = errorType === "rate_limit" || errorType === "quota_exhausted";
|
||||
const hasAlternate =
|
||||
isCredentialError &&
|
||||
this._deps.modelRegistry.authStorage.markUsageLimitReached(
|
||||
this._deps.getModel()!.provider,
|
||||
this._deps.getSessionId(),
|
||||
{ errorType },
|
||||
);
|
||||
const isRateLimit = errorType === "rate_limit";
|
||||
const isQuotaError = errorType === "quota_exhausted";
|
||||
|
||||
if (hasAlternate) {
|
||||
this._removeLastAssistantError();
|
||||
// Credential rotation — only for transient rate limits (#3430).
|
||||
// Quota errors ("Extra usage is required") are account-level billing
|
||||
// gates; rotating to another credential on the same account won't help
|
||||
// and the 30-minute backoff blocks all provider requests needlessly.
|
||||
if (isRateLimit) {
|
||||
const hasAlternate =
|
||||
this._deps.modelRegistry.authStorage.markUsageLimitReached(
|
||||
this._deps.getModel()!.provider,
|
||||
this._deps.getSessionId(),
|
||||
{ errorType },
|
||||
);
|
||||
|
||||
this._deps.emit({
|
||||
type: "auto_retry_start",
|
||||
attempt: this._retryAttempt + 1,
|
||||
maxAttempts: settings.maxRetries,
|
||||
delayMs: 0,
|
||||
errorMessage: `${message.errorMessage} (switching credential)`,
|
||||
});
|
||||
if (hasAlternate) {
|
||||
this._removeLastAssistantError();
|
||||
|
||||
// Retry immediately with the next credential - don't increment _retryAttempt
|
||||
this._scheduleContinue(retryGeneration);
|
||||
this._deps.emit({
|
||||
type: "auto_retry_start",
|
||||
attempt: this._retryAttempt + 1,
|
||||
maxAttempts: settings.maxRetries,
|
||||
delayMs: 0,
|
||||
errorMessage: `${message.errorMessage} (switching credential)`,
|
||||
});
|
||||
|
||||
return true;
|
||||
// Retry immediately with the next credential - don't increment _retryAttempt
|
||||
this._scheduleContinue(retryGeneration);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// All credentials are backed off. Try cross-provider fallback before giving up.
|
||||
if (isCredentialError) {
|
||||
// Cross-provider fallback — for rate limits with all creds backed off,
|
||||
// or quota errors (which skip credential backoff entirely).
|
||||
if (isRateLimit || isQuotaError) {
|
||||
const fallbackResult = await this._deps.fallbackResolver.findFallback(
|
||||
this._deps.getModel()!,
|
||||
errorType,
|
||||
|
|
@ -200,7 +212,7 @@ export class RetryHandler {
|
|||
}
|
||||
|
||||
// No fallback available either
|
||||
if (errorType === "quota_exhausted") {
|
||||
if (isQuotaError) {
|
||||
// Try long-context model downgrade ([1m] → base) before giving up
|
||||
const downgraded = this._tryLongContextDowngrade(message, retryGeneration);
|
||||
if (downgraded) return true;
|
||||
|
|
|
|||
|
|
@ -372,16 +372,16 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|||
await new Promise(resolve => setTimeout(resolve, baseDelayMs * attempt));
|
||||
}
|
||||
|
||||
// All retries exhausted — throw descriptive error
|
||||
// Check if credentials exist but are temporarily backed off
|
||||
// (e.g., after a 429 quota exhaustion). Provide a specific error
|
||||
// so the retry handler knows this is transient, not a permanent
|
||||
// auth failure.
|
||||
// All retries exhausted — throw descriptive error.
|
||||
// Check if credentials exist but are temporarily in a backoff window
|
||||
// (e.g., after a 429). This message intentionally avoids phrases like
|
||||
// "rate limit" / "429" to prevent isRetryableError() from re-entering
|
||||
// the retry handler and creating cascading error entries (#3429).
|
||||
const hasAuth = modelRegistry.authStorage.hasAuth(resolvedProvider);
|
||||
if (hasAuth) {
|
||||
throw new Error(
|
||||
`All credentials for "${resolvedProvider}" are temporarily backed off due to rate limiting. ` +
|
||||
`The request will be retried automatically when backoff expires.`,
|
||||
`All credentials for "${resolvedProvider}" are in a cooldown window. ` +
|
||||
`Please wait a moment and try again, or switch to a different provider.`,
|
||||
);
|
||||
}
|
||||
const model = agent.state.model;
|
||||
|
|
@ -391,8 +391,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|||
// surface a specific message instead of the misleading "Authentication failed".
|
||||
if (modelRegistry.authStorage.areAllCredentialsBackedOff(resolvedProvider)) {
|
||||
throw new Error(
|
||||
`Rate limit in effect for "${resolvedProvider}". ` +
|
||||
`Please wait before retrying or switch to a different model.`,
|
||||
`All credentials for "${resolvedProvider}" are in a cooldown window. ` +
|
||||
`Please wait a moment and try again, or switch to a different provider.`,
|
||||
);
|
||||
}
|
||||
throw new Error(
|
||||
|
|
|
|||
|
|
@ -464,7 +464,8 @@ test("agent-session retryable error regex matches server_error (underscore)", ()
|
|||
// This regex is extracted from _isRetryableError in agent-session.ts.
|
||||
// It must match both "server error" (space) and "server_error" (underscore)
|
||||
// to properly classify Codex streaming errors as retryable.
|
||||
const retryableRegex = /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|temporarily backed off/i;
|
||||
// "temporarily backed off" intentionally excluded — see #3429
|
||||
const retryableRegex = /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|extra usage is required/i;
|
||||
|
||||
// server_error (with underscore — Codex streaming error format)
|
||||
assert.ok(retryableRegex.test("Codex server_error: An error occurred"));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue