fix: prevent credential backoff on transport errors and handle quota exhaustion gracefully (#353)

Fixes #349, #339 Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-14 07:15:00 -06:00 · 2026-03-14 07:15:00 -06:00 · 9317816aa2
commit 9317816aa2
parent d4312c60ec
5 changed files with 120 additions and 3 deletions
--- a/packages/pi-agent-core/src/agent-loop.ts
+++ b/packages/pi-agent-core/src/agent-loop.ts
@ -21,6 +21,34 @@ import type {
 	StreamFn,
 } from "./types.js";

+const ZERO_USAGE = {
+	input: 0,
+	output: 0,
+	cacheRead: 0,
+	cacheWrite: 0,
+	totalTokens: 0,
+	cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+} as const;
+
+/**
+ * Build an AssistantMessage for an unhandled error caught outside runLoop.
+ * Uses the model from config so the message satisfies the full interface.
+ */
+function createErrorMessage(error: unknown, config: AgentLoopConfig): AssistantMessage {
+	const msg = error instanceof Error ? error.message : String(error);
+	return {
+		role: "assistant",
+		content: [{ type: "text", text: msg }],
+		api: config.model.api,
+		provider: config.model.provider,
+		model: config.model.id,
+		usage: ZERO_USAGE,
+		stopReason: "error",
+		errorMessage: msg,
+		timestamp: Date.now(),
+	};
+}
+
 /**
 * Start an agent loop with a new prompt message.
 * The prompt is added to the context and events are emitted for it.
@ -48,7 +76,16 @@ export function agentLoop(
 			stream.push({ type: "message_end", message: prompt });
 		}

-		await runLoop(currentContext, newMessages, config, signal, stream, streamFn);
+		try {
+			await runLoop(currentContext, newMessages, config, signal, stream, streamFn);
+		} catch (error) {
+			const errMsg = createErrorMessage(error, config);
+			stream.push({ type: "message_start", message: errMsg });
+			stream.push({ type: "message_end", message: errMsg });
+			stream.push({ type: "turn_end", message: errMsg, toolResults: [] });
+			stream.push({ type: "agent_end", messages: [...newMessages, errMsg] });
+			stream.end([...newMessages, errMsg]);
+		}
 	})();

 	return stream;
@ -85,7 +122,16 @@ export function agentLoopContinue(
 		stream.push({ type: "agent_start" });
 		stream.push({ type: "turn_start" });

-		await runLoop(currentContext, newMessages, config, signal, stream, streamFn);
+		try {
+			await runLoop(currentContext, newMessages, config, signal, stream, streamFn);
+		} catch (error) {
+			const errMsg = createErrorMessage(error, config);
+			stream.push({ type: "message_start", message: errMsg });
+			stream.push({ type: "message_end", message: errMsg });
+			stream.push({ type: "turn_end", message: errMsg, toolResults: [] });
+			stream.push({ type: "agent_end", messages: [...newMessages, errMsg] });
+			stream.end([...newMessages, errMsg]);
+		}
 	})();

 	return stream;
--- a/packages/pi-coding-agent/src/core/agent-session.ts
+++ b/packages/pi-coding-agent/src/core/agent-session.ts
@ -2316,9 +2316,14 @@ export class AgentSession {

 		// Try credential fallback before counting against retry budget.
 		// If another credential is available, switch to it and retry immediately.
+		// Only attempt credential rotation for errors that indicate a credential-level
+		// problem (rate limit, quota exhaustion, server error). Transport failures
+		// ("unknown") like connection resets are not credential-specific — rotating
+		// won't help and backing off the only credential causes "Authentication failed".
 		if (this.model && message.errorMessage) {
 			const errorType = this._classifyErrorType(message.errorMessage);
-			const hasAlternate = this._modelRegistry.authStorage.markUsageLimitReached(
+			const isCredentialError = errorType !== "unknown";
+			const hasAlternate = isCredentialError && this._modelRegistry.authStorage.markUsageLimitReached(
 				this.model.provider,
 				this.sessionId,
 				{ errorType },
--- a/packages/pi-coding-agent/src/core/auth-storage.test.ts
+++ b/packages/pi-coding-agent/src/core/auth-storage.test.ts
@ -158,6 +158,53 @@ describe("AuthStorage — rate-limit backoff", () => {
 		assert.equal(hasAlternate, false);
 	});

+	it("single credential: unknown error type skips backoff entirely", async () => {
+		const storage = inMemory({ anthropic: makeKey("sk-only") });
+		await storage.getApiKey("anthropic");
+
+		// Mark with unknown error type (transport failure)
+		const hasAlternate = storage.markUsageLimitReached("anthropic", undefined, {
+			errorType: "unknown",
+		});
+		assert.equal(hasAlternate, false);
+
+		// Key should still be available — backoff was not applied
+		const key = await storage.getApiKey("anthropic");
+		assert.equal(key, "sk-only");
+	});
+
+	it("multiple credentials: unknown error type still backs off the used credential", async () => {
+		const storage = inMemory({
+			anthropic: [makeKey("sk-1"), makeKey("sk-2")],
+		});
+		await storage.getApiKey("anthropic"); // uses sk-1
+
+		// Mark with unknown error type — should still back off when alternates exist
+		const hasAlternate = storage.markUsageLimitReached("anthropic", undefined, {
+			errorType: "unknown",
+		});
+		assert.equal(hasAlternate, true);
+
+		// Next call should return sk-2
+		const key = await storage.getApiKey("anthropic");
+		assert.equal(key, "sk-2");
+	});
+
+	it("single credential: rate_limit error type still backs off", async () => {
+		const storage = inMemory({ anthropic: makeKey("sk-only") });
+		await storage.getApiKey("anthropic");
+
+		// rate_limit should still back off even single credentials
+		const hasAlternate = storage.markUsageLimitReached("anthropic", undefined, {
+			errorType: "rate_limit",
+		});
+		assert.equal(hasAlternate, false);
+
+		// Key should be backed off
+		const key = await storage.getApiKey("anthropic");
+		assert.equal(key, undefined);
+	});
+
 	it("session-sticky: marks the correct credential as backed off", async () => {
 		const storage = inMemory({
 			anthropic: [makeKey("sk-1"), makeKey("sk-2")],
--- a/packages/pi-coding-agent/src/core/auth-storage.ts
+++ b/packages/pi-coding-agent/src/core/auth-storage.ts
@ -549,6 +549,14 @@ export class AuthStorage {
 		if (credentials.length === 0) return false;

 		const errorType = options?.errorType ?? "rate_limit";
+
+		// For unknown/transport errors (e.g. connection reset, "terminated"),
+		// don't back off the only credential — it would make getApiKey() return
+		// undefined and surface a misleading "Authentication failed" message.
+		if (errorType === "unknown" && credentials.length === 1) {
+			return false;
+		}
+
 		const backoffMs = getBackoffDuration(errorType);

 		// Determine which credential was just used (same logic as selectCredentialIndex
--- a/packages/pi-coding-agent/src/core/sdk.ts
+++ b/packages/pi-coding-agent/src/core/sdk.ts
@ -319,6 +319,17 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			}
 			const key = await modelRegistry.getApiKeyForProvider(resolvedProvider);
 			if (!key) {
+				// Check if credentials exist but are temporarily backed off
+				// (e.g., after a 429 quota exhaustion). Provide a specific error
+				// so the retry handler knows this is transient, not a permanent
+				// auth failure.
+				const hasAuth = modelRegistry.authStorage.hasAuth(resolvedProvider);
+				if (hasAuth) {
+					throw new Error(
+						`All credentials for "${resolvedProvider}" are temporarily backed off due to rate limiting. ` +
+							`The request will be retried automatically when backoff expires.`,
+					);
+				}
 				const model = agent.state.model;
 				const isOAuth = model && modelRegistry.isUsingOAuth(model);
 				if (isOAuth) {