feat: use server-requested retry delay for Anthropic rate limits

Anthropic's 429 responses include retry-after and x-ratelimit-reset-*
headers that tell us exactly when to retry. Previously we ignored these
and used exponential backoff (2s, 4s, 8s), which is both wrong and
misleading in the UI countdown.

- Add retryAfterMs to AssistantMessage as the structured carrier
- Extract retry-after / x-ratelimit-reset-requests / x-ratelimit-reset-tokens
  from Anthropic SDK APIError.headers in the provider catch block
- Session uses retryAfterMs when present (capped by maxDelayMs=60s),
  falls back to exponential backoff for errors with no timing hint

The UI countdown now shows the actual Anthropic reset time. No UI changes needed.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Lex Christopherson 2026-03-13 16:51:17 -06:00
parent 7664163c1f
commit ca8697ae26
3 changed files with 67 additions and 1 deletions

View file

@ -190,6 +190,43 @@ function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]):
return merged;
}
/**
* Extract retry delay from Anthropic error response headers (in milliseconds).
* Checks: retry-after (seconds or RFC date), x-ratelimit-reset-requests, x-ratelimit-reset-tokens.
* Returns undefined if no valid delay is found or if the delay is in the past.
*/
export function extractRetryAfterMs(headers: Headers | { get(name: string): string | null }, errorText = ""): number | undefined {
const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
const retryAfter = headers.get("retry-after");
if (retryAfter) {
const seconds = Number(retryAfter);
if (Number.isFinite(seconds)) {
const delay = normalizeDelay(seconds * 1000);
if (delay !== undefined) return delay;
}
const asDate = new Date(retryAfter).getTime();
if (!Number.isNaN(asDate)) {
const delay = normalizeDelay(asDate - Date.now());
if (delay !== undefined) return delay;
}
}
// x-ratelimit-reset-requests / x-ratelimit-reset-tokens are Unix timestamps (seconds)
for (const header of ["x-ratelimit-reset-requests", "x-ratelimit-reset-tokens"]) {
const value = headers.get(header);
if (value) {
const resetSeconds = Number(value);
if (Number.isFinite(resetSeconds)) {
const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
if (delay !== undefined) return delay;
}
}
}
return undefined;
}
export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOptions> = (
model: Model<"anthropic-messages">,
context: Context,
@ -415,6 +452,12 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
for (const block of output.content) delete (block as any).index;
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
if (error instanceof Anthropic.APIError && error.headers) {
const retryAfterMs = extractRetryAfterMs(error.headers, error.message);
if (retryAfterMs !== undefined) {
output.retryAfterMs = retryAfterMs;
}
}
stream.push({ type: "error", reason: output.stopReason, error: output });
stream.end();
}

View file

@ -189,6 +189,8 @@ export interface AssistantMessage {
usage: Usage;
stopReason: StopReason;
errorMessage?: string;
/** Server-requested retry delay in milliseconds (from Retry-After or rate limit headers). */
retryAfterMs?: number;
timestamp: number; // Unix timestamp in milliseconds
}

View file

@ -2365,7 +2365,28 @@ export class AgentSession {
return false;
}
const delayMs = settings.baseDelayMs * 2 ** (this._retryAttempt - 1);
// Use server-requested delay when available (rate limit headers), capped by maxDelayMs.
// Fall back to exponential backoff when no server hint is present.
const exponentialDelayMs = settings.baseDelayMs * 2 ** (this._retryAttempt - 1);
let delayMs: number;
if (message.retryAfterMs !== undefined) {
const cap = settings.maxDelayMs > 0 ? settings.maxDelayMs : Infinity;
if (message.retryAfterMs > cap) {
// Server wants us to wait longer than our max — give up immediately
this._emit({
type: "auto_retry_end",
success: false,
attempt: this._retryAttempt - 1,
finalError: `Rate limit reset in ${Math.ceil(message.retryAfterMs / 1000)}s (max: ${Math.ceil(cap / 1000)}s). ${message.errorMessage || ""}`.trim(),
});
this._retryAttempt = 0;
this._resolveRetry();
return false;
}
delayMs = message.retryAfterMs;
} else {
delayMs = exponentialDelayMs;
}
this._emit({
type: "auto_retry_start",