feat: use server-requested retry delay for Anthropic rate limits
Anthropic's 429 responses include retry-after and x-ratelimit-reset-* headers that tell us exactly when to retry. Previously we ignored these and used exponential backoff (2s, 4s, 8s), which is both wrong and misleading in the UI countdown. - Add retryAfterMs to AssistantMessage as the structured carrier - Extract retry-after / x-ratelimit-reset-requests / x-ratelimit-reset-tokens from Anthropic SDK APIError.headers in the provider catch block - Session uses retryAfterMs when present (capped by maxDelayMs=60s), falls back to exponential backoff for errors with no timing hint The UI countdown now shows the actual Anthropic reset time. No UI changes needed. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
7664163c1f
commit
ca8697ae26
3 changed files with 67 additions and 1 deletions
|
|
@ -190,6 +190,43 @@ function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]):
|
|||
return merged;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract retry delay from Anthropic error response headers (in milliseconds).
|
||||
* Checks: retry-after (seconds or RFC date), x-ratelimit-reset-requests, x-ratelimit-reset-tokens.
|
||||
* Returns undefined if no valid delay is found or if the delay is in the past.
|
||||
*/
|
||||
export function extractRetryAfterMs(headers: Headers | { get(name: string): string | null }, errorText = ""): number | undefined {
|
||||
const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
|
||||
|
||||
const retryAfter = headers.get("retry-after");
|
||||
if (retryAfter) {
|
||||
const seconds = Number(retryAfter);
|
||||
if (Number.isFinite(seconds)) {
|
||||
const delay = normalizeDelay(seconds * 1000);
|
||||
if (delay !== undefined) return delay;
|
||||
}
|
||||
const asDate = new Date(retryAfter).getTime();
|
||||
if (!Number.isNaN(asDate)) {
|
||||
const delay = normalizeDelay(asDate - Date.now());
|
||||
if (delay !== undefined) return delay;
|
||||
}
|
||||
}
|
||||
|
||||
// x-ratelimit-reset-requests / x-ratelimit-reset-tokens are Unix timestamps (seconds)
|
||||
for (const header of ["x-ratelimit-reset-requests", "x-ratelimit-reset-tokens"]) {
|
||||
const value = headers.get(header);
|
||||
if (value) {
|
||||
const resetSeconds = Number(value);
|
||||
if (Number.isFinite(resetSeconds)) {
|
||||
const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
|
||||
if (delay !== undefined) return delay;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOptions> = (
|
||||
model: Model<"anthropic-messages">,
|
||||
context: Context,
|
||||
|
|
@ -415,6 +452,12 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
|
|||
for (const block of output.content) delete (block as any).index;
|
||||
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
||||
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
|
||||
if (error instanceof Anthropic.APIError && error.headers) {
|
||||
const retryAfterMs = extractRetryAfterMs(error.headers, error.message);
|
||||
if (retryAfterMs !== undefined) {
|
||||
output.retryAfterMs = retryAfterMs;
|
||||
}
|
||||
}
|
||||
stream.push({ type: "error", reason: output.stopReason, error: output });
|
||||
stream.end();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -189,6 +189,8 @@ export interface AssistantMessage {
|
|||
usage: Usage;
|
||||
stopReason: StopReason;
|
||||
errorMessage?: string;
|
||||
/** Server-requested retry delay in milliseconds (from Retry-After or rate limit headers). */
|
||||
retryAfterMs?: number;
|
||||
timestamp: number; // Unix timestamp in milliseconds
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2365,7 +2365,28 @@ export class AgentSession {
|
|||
return false;
|
||||
}
|
||||
|
||||
const delayMs = settings.baseDelayMs * 2 ** (this._retryAttempt - 1);
|
||||
// Use server-requested delay when available (rate limit headers), capped by maxDelayMs.
|
||||
// Fall back to exponential backoff when no server hint is present.
|
||||
const exponentialDelayMs = settings.baseDelayMs * 2 ** (this._retryAttempt - 1);
|
||||
let delayMs: number;
|
||||
if (message.retryAfterMs !== undefined) {
|
||||
const cap = settings.maxDelayMs > 0 ? settings.maxDelayMs : Infinity;
|
||||
if (message.retryAfterMs > cap) {
|
||||
// Server wants us to wait longer than our max — give up immediately
|
||||
this._emit({
|
||||
type: "auto_retry_end",
|
||||
success: false,
|
||||
attempt: this._retryAttempt - 1,
|
||||
finalError: `Rate limit reset in ${Math.ceil(message.retryAfterMs / 1000)}s (max: ${Math.ceil(cap / 1000)}s). ${message.errorMessage || ""}`.trim(),
|
||||
});
|
||||
this._retryAttempt = 0;
|
||||
this._resolveRetry();
|
||||
return false;
|
||||
}
|
||||
delayMs = message.retryAfterMs;
|
||||
} else {
|
||||
delayMs = exponentialDelayMs;
|
||||
}
|
||||
|
||||
this._emit({
|
||||
type: "auto_retry_start",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue