diff --git a/packages/pi-ai/src/providers/anthropic.ts b/packages/pi-ai/src/providers/anthropic.ts index 192b9eb58..acc633c3b 100644 --- a/packages/pi-ai/src/providers/anthropic.ts +++ b/packages/pi-ai/src/providers/anthropic.ts @@ -203,6 +203,28 @@ function mergeHeaders(...headerSources: (Record | undefined)[]): return merged; } +/** + * Detect transient network errors that are likely to succeed on retry. + * Covers WebSocket disconnects (Tailscale, VPN), TCP resets, and DNS failures. + */ +function isTransientNetworkError(error: unknown): boolean { + if (!(error instanceof Error)) return false; + const msg = error.message.toLowerCase(); + const code = (error as NodeJS.ErrnoException).code; + return ( + code === 'ECONNRESET' || + code === 'EPIPE' || + code === 'ETIMEDOUT' || + code === 'ENOTFOUND' || + code === 'EAI_AGAIN' || + msg.includes('connector_closed') || + msg.includes('socket hang up') || + msg.includes('network') || + msg.includes('connection') && msg.includes('closed') || + msg.includes('fetch failed') + ); +} + /** * Extract retry delay from Anthropic error response headers (in milliseconds). * Checks: retry-after (seconds or RFC date), x-ratelimit-reset-requests, x-ratelimit-reset-tokens. @@ -497,6 +519,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti output.retryAfterMs = retryAfterMs; } } + // Mark transient network errors as retriable so auto-mode can + // detect them and retry instead of stopping (#833). + if (isTransientNetworkError(error)) { + output.retryAfterMs = output.retryAfterMs ?? 5000; + } stream.push({ type: "error", reason: output.stopReason, error: output }); stream.end(); }