fix: prevent credential backoff on transport errors and handle quota exhaustion gracefully (#353)
Fixes #349, #339 Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d4312c60ec
commit
9317816aa2
5 changed files with 120 additions and 3 deletions
|
|
@ -21,6 +21,34 @@ import type {
|
|||
StreamFn,
|
||||
} from "./types.js";
|
||||
|
||||
const ZERO_USAGE = {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Build an AssistantMessage for an unhandled error caught outside runLoop.
|
||||
* Uses the model from config so the message satisfies the full interface.
|
||||
*/
|
||||
function createErrorMessage(error: unknown, config: AgentLoopConfig): AssistantMessage {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
return {
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: msg }],
|
||||
api: config.model.api,
|
||||
provider: config.model.provider,
|
||||
model: config.model.id,
|
||||
usage: ZERO_USAGE,
|
||||
stopReason: "error",
|
||||
errorMessage: msg,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Start an agent loop with a new prompt message.
|
||||
* The prompt is added to the context and events are emitted for it.
|
||||
|
|
@ -48,7 +76,16 @@ export function agentLoop(
|
|||
stream.push({ type: "message_end", message: prompt });
|
||||
}
|
||||
|
||||
await runLoop(currentContext, newMessages, config, signal, stream, streamFn);
|
||||
try {
|
||||
await runLoop(currentContext, newMessages, config, signal, stream, streamFn);
|
||||
} catch (error) {
|
||||
const errMsg = createErrorMessage(error, config);
|
||||
stream.push({ type: "message_start", message: errMsg });
|
||||
stream.push({ type: "message_end", message: errMsg });
|
||||
stream.push({ type: "turn_end", message: errMsg, toolResults: [] });
|
||||
stream.push({ type: "agent_end", messages: [...newMessages, errMsg] });
|
||||
stream.end([...newMessages, errMsg]);
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
|
|
@ -85,7 +122,16 @@ export function agentLoopContinue(
|
|||
stream.push({ type: "agent_start" });
|
||||
stream.push({ type: "turn_start" });
|
||||
|
||||
await runLoop(currentContext, newMessages, config, signal, stream, streamFn);
|
||||
try {
|
||||
await runLoop(currentContext, newMessages, config, signal, stream, streamFn);
|
||||
} catch (error) {
|
||||
const errMsg = createErrorMessage(error, config);
|
||||
stream.push({ type: "message_start", message: errMsg });
|
||||
stream.push({ type: "message_end", message: errMsg });
|
||||
stream.push({ type: "turn_end", message: errMsg, toolResults: [] });
|
||||
stream.push({ type: "agent_end", messages: [...newMessages, errMsg] });
|
||||
stream.end([...newMessages, errMsg]);
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
|
|
|
|||
|
|
@ -2316,9 +2316,14 @@ export class AgentSession {
|
|||
|
||||
// Try credential fallback before counting against retry budget.
|
||||
// If another credential is available, switch to it and retry immediately.
|
||||
// Only attempt credential rotation for errors that indicate a credential-level
|
||||
// problem (rate limit, quota exhaustion, server error). Transport failures
|
||||
// ("unknown") like connection resets are not credential-specific — rotating
|
||||
// won't help and backing off the only credential causes "Authentication failed".
|
||||
if (this.model && message.errorMessage) {
|
||||
const errorType = this._classifyErrorType(message.errorMessage);
|
||||
const hasAlternate = this._modelRegistry.authStorage.markUsageLimitReached(
|
||||
const isCredentialError = errorType !== "unknown";
|
||||
const hasAlternate = isCredentialError && this._modelRegistry.authStorage.markUsageLimitReached(
|
||||
this.model.provider,
|
||||
this.sessionId,
|
||||
{ errorType },
|
||||
|
|
|
|||
|
|
@ -158,6 +158,53 @@ describe("AuthStorage — rate-limit backoff", () => {
|
|||
assert.equal(hasAlternate, false);
|
||||
});
|
||||
|
||||
it("single credential: unknown error type skips backoff entirely", async () => {
|
||||
const storage = inMemory({ anthropic: makeKey("sk-only") });
|
||||
await storage.getApiKey("anthropic");
|
||||
|
||||
// Mark with unknown error type (transport failure)
|
||||
const hasAlternate = storage.markUsageLimitReached("anthropic", undefined, {
|
||||
errorType: "unknown",
|
||||
});
|
||||
assert.equal(hasAlternate, false);
|
||||
|
||||
// Key should still be available — backoff was not applied
|
||||
const key = await storage.getApiKey("anthropic");
|
||||
assert.equal(key, "sk-only");
|
||||
});
|
||||
|
||||
it("multiple credentials: unknown error type still backs off the used credential", async () => {
|
||||
const storage = inMemory({
|
||||
anthropic: [makeKey("sk-1"), makeKey("sk-2")],
|
||||
});
|
||||
await storage.getApiKey("anthropic"); // uses sk-1
|
||||
|
||||
// Mark with unknown error type — should still back off when alternates exist
|
||||
const hasAlternate = storage.markUsageLimitReached("anthropic", undefined, {
|
||||
errorType: "unknown",
|
||||
});
|
||||
assert.equal(hasAlternate, true);
|
||||
|
||||
// Next call should return sk-2
|
||||
const key = await storage.getApiKey("anthropic");
|
||||
assert.equal(key, "sk-2");
|
||||
});
|
||||
|
||||
it("single credential: rate_limit error type still backs off", async () => {
|
||||
const storage = inMemory({ anthropic: makeKey("sk-only") });
|
||||
await storage.getApiKey("anthropic");
|
||||
|
||||
// rate_limit should still back off even single credentials
|
||||
const hasAlternate = storage.markUsageLimitReached("anthropic", undefined, {
|
||||
errorType: "rate_limit",
|
||||
});
|
||||
assert.equal(hasAlternate, false);
|
||||
|
||||
// Key should be backed off
|
||||
const key = await storage.getApiKey("anthropic");
|
||||
assert.equal(key, undefined);
|
||||
});
|
||||
|
||||
it("session-sticky: marks the correct credential as backed off", async () => {
|
||||
const storage = inMemory({
|
||||
anthropic: [makeKey("sk-1"), makeKey("sk-2")],
|
||||
|
|
|
|||
|
|
@ -549,6 +549,14 @@ export class AuthStorage {
|
|||
if (credentials.length === 0) return false;
|
||||
|
||||
const errorType = options?.errorType ?? "rate_limit";
|
||||
|
||||
// For unknown/transport errors (e.g. connection reset, "terminated"),
|
||||
// don't back off the only credential — it would make getApiKey() return
|
||||
// undefined and surface a misleading "Authentication failed" message.
|
||||
if (errorType === "unknown" && credentials.length === 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const backoffMs = getBackoffDuration(errorType);
|
||||
|
||||
// Determine which credential was just used (same logic as selectCredentialIndex
|
||||
|
|
|
|||
|
|
@ -319,6 +319,17 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|||
}
|
||||
const key = await modelRegistry.getApiKeyForProvider(resolvedProvider);
|
||||
if (!key) {
|
||||
// Check if credentials exist but are temporarily backed off
|
||||
// (e.g., after a 429 quota exhaustion). Provide a specific error
|
||||
// so the retry handler knows this is transient, not a permanent
|
||||
// auth failure.
|
||||
const hasAuth = modelRegistry.authStorage.hasAuth(resolvedProvider);
|
||||
if (hasAuth) {
|
||||
throw new Error(
|
||||
`All credentials for "${resolvedProvider}" are temporarily backed off due to rate limiting. ` +
|
||||
`The request will be retried automatically when backoff expires.`,
|
||||
);
|
||||
}
|
||||
const model = agent.state.model;
|
||||
const isOAuth = model && modelRegistry.isUsingOAuth(model);
|
||||
if (isOAuth) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue