From 9317816aa2e53e05a3213b04be2916cc24c07474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=82CHES?= Date: Sat, 14 Mar 2026 07:15:00 -0600 Subject: [PATCH] fix: prevent credential backoff on transport errors and handle quota exhaustion gracefully (#353) Fixes #349, #339 Co-authored-by: Claude Opus 4.6 (1M context) --- packages/pi-agent-core/src/agent-loop.ts | 50 ++++++++++++++++++- .../pi-coding-agent/src/core/agent-session.ts | 7 ++- .../src/core/auth-storage.test.ts | 47 +++++++++++++++++ .../pi-coding-agent/src/core/auth-storage.ts | 8 +++ packages/pi-coding-agent/src/core/sdk.ts | 11 ++++ 5 files changed, 120 insertions(+), 3 deletions(-) diff --git a/packages/pi-agent-core/src/agent-loop.ts b/packages/pi-agent-core/src/agent-loop.ts index 43eb3693f..8dee70a08 100644 --- a/packages/pi-agent-core/src/agent-loop.ts +++ b/packages/pi-agent-core/src/agent-loop.ts @@ -21,6 +21,34 @@ import type { StreamFn, } from "./types.js"; +const ZERO_USAGE = { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, +} as const; + +/** + * Build an AssistantMessage for an unhandled error caught outside runLoop. + * Uses the model from config so the message satisfies the full interface. + */ +function createErrorMessage(error: unknown, config: AgentLoopConfig): AssistantMessage { + const msg = error instanceof Error ? error.message : String(error); + return { + role: "assistant", + content: [{ type: "text", text: msg }], + api: config.model.api, + provider: config.model.provider, + model: config.model.id, + usage: ZERO_USAGE, + stopReason: "error", + errorMessage: msg, + timestamp: Date.now(), + }; +} + /** * Start an agent loop with a new prompt message. * The prompt is added to the context and events are emitted for it. @@ -48,7 +76,16 @@ export function agentLoop( stream.push({ type: "message_end", message: prompt }); } - await runLoop(currentContext, newMessages, config, signal, stream, streamFn); + try { + await runLoop(currentContext, newMessages, config, signal, stream, streamFn); + } catch (error) { + const errMsg = createErrorMessage(error, config); + stream.push({ type: "message_start", message: errMsg }); + stream.push({ type: "message_end", message: errMsg }); + stream.push({ type: "turn_end", message: errMsg, toolResults: [] }); + stream.push({ type: "agent_end", messages: [...newMessages, errMsg] }); + stream.end([...newMessages, errMsg]); + } })(); return stream; @@ -85,7 +122,16 @@ export function agentLoopContinue( stream.push({ type: "agent_start" }); stream.push({ type: "turn_start" }); - await runLoop(currentContext, newMessages, config, signal, stream, streamFn); + try { + await runLoop(currentContext, newMessages, config, signal, stream, streamFn); + } catch (error) { + const errMsg = createErrorMessage(error, config); + stream.push({ type: "message_start", message: errMsg }); + stream.push({ type: "message_end", message: errMsg }); + stream.push({ type: "turn_end", message: errMsg, toolResults: [] }); + stream.push({ type: "agent_end", messages: [...newMessages, errMsg] }); + stream.end([...newMessages, errMsg]); + } })(); return stream; diff --git a/packages/pi-coding-agent/src/core/agent-session.ts b/packages/pi-coding-agent/src/core/agent-session.ts index deb18023a..3b3124417 100644 --- a/packages/pi-coding-agent/src/core/agent-session.ts +++ b/packages/pi-coding-agent/src/core/agent-session.ts @@ -2316,9 +2316,14 @@ export class AgentSession { // Try credential fallback before counting against retry budget. // If another credential is available, switch to it and retry immediately. + // Only attempt credential rotation for errors that indicate a credential-level + // problem (rate limit, quota exhaustion, server error). Transport failures + // ("unknown") like connection resets are not credential-specific — rotating + // won't help and backing off the only credential causes "Authentication failed". if (this.model && message.errorMessage) { const errorType = this._classifyErrorType(message.errorMessage); - const hasAlternate = this._modelRegistry.authStorage.markUsageLimitReached( + const isCredentialError = errorType !== "unknown"; + const hasAlternate = isCredentialError && this._modelRegistry.authStorage.markUsageLimitReached( this.model.provider, this.sessionId, { errorType }, diff --git a/packages/pi-coding-agent/src/core/auth-storage.test.ts b/packages/pi-coding-agent/src/core/auth-storage.test.ts index 6d4445c28..f91947ca9 100644 --- a/packages/pi-coding-agent/src/core/auth-storage.test.ts +++ b/packages/pi-coding-agent/src/core/auth-storage.test.ts @@ -158,6 +158,53 @@ describe("AuthStorage — rate-limit backoff", () => { assert.equal(hasAlternate, false); }); + it("single credential: unknown error type skips backoff entirely", async () => { + const storage = inMemory({ anthropic: makeKey("sk-only") }); + await storage.getApiKey("anthropic"); + + // Mark with unknown error type (transport failure) + const hasAlternate = storage.markUsageLimitReached("anthropic", undefined, { + errorType: "unknown", + }); + assert.equal(hasAlternate, false); + + // Key should still be available — backoff was not applied + const key = await storage.getApiKey("anthropic"); + assert.equal(key, "sk-only"); + }); + + it("multiple credentials: unknown error type still backs off the used credential", async () => { + const storage = inMemory({ + anthropic: [makeKey("sk-1"), makeKey("sk-2")], + }); + await storage.getApiKey("anthropic"); // uses sk-1 + + // Mark with unknown error type — should still back off when alternates exist + const hasAlternate = storage.markUsageLimitReached("anthropic", undefined, { + errorType: "unknown", + }); + assert.equal(hasAlternate, true); + + // Next call should return sk-2 + const key = await storage.getApiKey("anthropic"); + assert.equal(key, "sk-2"); + }); + + it("single credential: rate_limit error type still backs off", async () => { + const storage = inMemory({ anthropic: makeKey("sk-only") }); + await storage.getApiKey("anthropic"); + + // rate_limit should still back off even single credentials + const hasAlternate = storage.markUsageLimitReached("anthropic", undefined, { + errorType: "rate_limit", + }); + assert.equal(hasAlternate, false); + + // Key should be backed off + const key = await storage.getApiKey("anthropic"); + assert.equal(key, undefined); + }); + it("session-sticky: marks the correct credential as backed off", async () => { const storage = inMemory({ anthropic: [makeKey("sk-1"), makeKey("sk-2")], diff --git a/packages/pi-coding-agent/src/core/auth-storage.ts b/packages/pi-coding-agent/src/core/auth-storage.ts index bce7d910e..bb10e5dfe 100644 --- a/packages/pi-coding-agent/src/core/auth-storage.ts +++ b/packages/pi-coding-agent/src/core/auth-storage.ts @@ -549,6 +549,14 @@ export class AuthStorage { if (credentials.length === 0) return false; const errorType = options?.errorType ?? "rate_limit"; + + // For unknown/transport errors (e.g. connection reset, "terminated"), + // don't back off the only credential — it would make getApiKey() return + // undefined and surface a misleading "Authentication failed" message. + if (errorType === "unknown" && credentials.length === 1) { + return false; + } + const backoffMs = getBackoffDuration(errorType); // Determine which credential was just used (same logic as selectCredentialIndex diff --git a/packages/pi-coding-agent/src/core/sdk.ts b/packages/pi-coding-agent/src/core/sdk.ts index 98dc78f7a..9e9d0009a 100644 --- a/packages/pi-coding-agent/src/core/sdk.ts +++ b/packages/pi-coding-agent/src/core/sdk.ts @@ -319,6 +319,17 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} } const key = await modelRegistry.getApiKeyForProvider(resolvedProvider); if (!key) { + // Check if credentials exist but are temporarily backed off + // (e.g., after a 429 quota exhaustion). Provide a specific error + // so the retry handler knows this is transient, not a permanent + // auth failure. + const hasAuth = modelRegistry.authStorage.hasAuth(resolvedProvider); + if (hasAuth) { + throw new Error( + `All credentials for "${resolvedProvider}" are temporarily backed off due to rate limiting. ` + + `The request will be retried automatically when backoff expires.`, + ); + } const model = agent.state.model; const isOAuth = model && modelRegistry.isUsingOAuth(model); if (isOAuth) {