diff --git a/packages/pi-ai/src/providers/anthropic.ts b/packages/pi-ai/src/providers/anthropic.ts index ba59f1478..7a823e3d7 100644 --- a/packages/pi-ai/src/providers/anthropic.ts +++ b/packages/pi-ai/src/providers/anthropic.ts @@ -190,6 +190,43 @@ function mergeHeaders(...headerSources: (Record | undefined)[]): return merged; } +/** + * Extract retry delay from Anthropic error response headers (in milliseconds). + * Checks: retry-after (seconds or RFC date), x-ratelimit-reset-requests, x-ratelimit-reset-tokens. + * Returns undefined if no valid delay is found or if the delay is in the past. + */ +export function extractRetryAfterMs(headers: Headers | { get(name: string): string | null }, errorText = ""): number | undefined { + const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined); + + const retryAfter = headers.get("retry-after"); + if (retryAfter) { + const seconds = Number(retryAfter); + if (Number.isFinite(seconds)) { + const delay = normalizeDelay(seconds * 1000); + if (delay !== undefined) return delay; + } + const asDate = new Date(retryAfter).getTime(); + if (!Number.isNaN(asDate)) { + const delay = normalizeDelay(asDate - Date.now()); + if (delay !== undefined) return delay; + } + } + + // x-ratelimit-reset-requests / x-ratelimit-reset-tokens are Unix timestamps (seconds) + for (const header of ["x-ratelimit-reset-requests", "x-ratelimit-reset-tokens"]) { + const value = headers.get(header); + if (value) { + const resetSeconds = Number(value); + if (Number.isFinite(resetSeconds)) { + const delay = normalizeDelay(resetSeconds * 1000 - Date.now()); + if (delay !== undefined) return delay; + } + } + } + + return undefined; +} + export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOptions> = ( model: Model<"anthropic-messages">, context: Context, @@ -415,6 +452,12 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti for (const block of output.content) delete (block as any).index; output.stopReason = options?.signal?.aborted ? "aborted" : "error"; output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error); + if (error instanceof Anthropic.APIError && error.headers) { + const retryAfterMs = extractRetryAfterMs(error.headers, error.message); + if (retryAfterMs !== undefined) { + output.retryAfterMs = retryAfterMs; + } + } stream.push({ type: "error", reason: output.stopReason, error: output }); stream.end(); } diff --git a/packages/pi-ai/src/types.ts b/packages/pi-ai/src/types.ts index cc4a4309b..96f64c08f 100644 --- a/packages/pi-ai/src/types.ts +++ b/packages/pi-ai/src/types.ts @@ -189,6 +189,8 @@ export interface AssistantMessage { usage: Usage; stopReason: StopReason; errorMessage?: string; + /** Server-requested retry delay in milliseconds (from Retry-After or rate limit headers). */ + retryAfterMs?: number; timestamp: number; // Unix timestamp in milliseconds } diff --git a/packages/pi-coding-agent/src/core/agent-session.ts b/packages/pi-coding-agent/src/core/agent-session.ts index bbfa0371d..13e030e49 100644 --- a/packages/pi-coding-agent/src/core/agent-session.ts +++ b/packages/pi-coding-agent/src/core/agent-session.ts @@ -2365,7 +2365,28 @@ export class AgentSession { return false; } - const delayMs = settings.baseDelayMs * 2 ** (this._retryAttempt - 1); + // Use server-requested delay when available (rate limit headers), capped by maxDelayMs. + // Fall back to exponential backoff when no server hint is present. + const exponentialDelayMs = settings.baseDelayMs * 2 ** (this._retryAttempt - 1); + let delayMs: number; + if (message.retryAfterMs !== undefined) { + const cap = settings.maxDelayMs > 0 ? settings.maxDelayMs : Infinity; + if (message.retryAfterMs > cap) { + // Server wants us to wait longer than our max — give up immediately + this._emit({ + type: "auto_retry_end", + success: false, + attempt: this._retryAttempt - 1, + finalError: `Rate limit reset in ${Math.ceil(message.retryAfterMs / 1000)}s (max: ${Math.ceil(cap / 1000)}s). ${message.errorMessage || ""}`.trim(), + }); + this._retryAttempt = 0; + this._resolveRetry(); + return false; + } + delayMs = message.retryAfterMs; + } else { + delayMs = exponentialDelayMs; + } this._emit({ type: "auto_retry_start",