fix(auto): recover from OpenRouter credit affordability errors

This commit is contained in:
Jeremy 2026-04-12 22:48:55 -05:00
parent 804f1d4b94
commit 724464c7ae
4 changed files with 156 additions and 2 deletions

View file

@ -171,6 +171,25 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start");
assert.ok(retryStart, "Regular 429 should enter backoff retry");
});
it("classifies OpenRouter credit affordability errors as quota_exhausted", async () => {
const { deps, emittedEvents } = createMockDeps({
model: createMockModel("openrouter", "openai/gpt-5-pro"),
markUsageLimitReachedResult: false,
fallbackResult: null,
});
const handler = new RetryHandler(deps);
const msg = errorMessage(
"402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.",
);
const result = await handler.handleRetryableError(msg);
assert.equal(result, true, "affordability error should trigger credit-aware retry");
const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start");
assert.ok(retryStart, "Expected immediate retry after reducing max tokens");
});
});
describe("long-context model downgrade", () => {
@ -271,6 +290,61 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
});
});
describe("credit-aware maxTokens retry", () => {
it("reduces maxTokens on same model when provider reports affordable cap", async () => {
const expensiveModel = createMockModel("openrouter", "openai/gpt-5-pro");
expensiveModel.maxTokens = 128000;
const { deps, emittedEvents, onModelChangeFn } = createMockDeps({
model: expensiveModel,
markUsageLimitReachedResult: false,
fallbackResult: null,
});
const handler = new RetryHandler(deps);
const msg = errorMessage(
"402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.",
);
const result = await handler.handleRetryableError(msg);
assert.equal(result, true, "should retry after reducing maxTokens");
const setModelCalls = (deps.agent.setModel as any).mock.calls;
assert.equal(setModelCalls.length, 1, "should apply one model downgrade");
const downgraded = setModelCalls[0].arguments[0] as Model<Api>;
assert.equal(downgraded.provider, "openrouter");
assert.equal(downgraded.id, "openai/gpt-5-pro");
assert.equal(downgraded.maxTokens, 297, "expected affordability cap with safety buffer");
assert.equal(onModelChangeFn.mock.calls.length, 1, "should notify about model update");
const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch");
assert.ok(switchEvent, "should emit model-adjustment event");
assert.ok(
String(switchEvent?.reason || "").includes("credit-aware retry"),
"switch reason should mention credit-aware retry",
);
});
it("does not mark credentials in cooldown for affordability quota errors", async () => {
const expensiveModel = createMockModel("openrouter", "openai/gpt-5-pro");
expensiveModel.maxTokens = 128000;
const { deps, markUsageLimitReached } = createMockDeps({
model: expensiveModel,
markUsageLimitReachedResult: false,
fallbackResult: null,
});
const handler = new RetryHandler(deps);
const msg = errorMessage(
"402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.",
);
await handler.handleRetryableError(msg);
assert.equal(markUsageLimitReached.mock.calls.length, 0, "quota error should skip credential cooldown");
});
});
describe("isRetryableError", () => {
it("considers long-context entitlement error as retryable", () => {
const { deps } = createMockDeps();
@ -291,6 +365,15 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
);
assert.equal(handler.isRetryableError(msg), false);
});
it("considers OpenRouter affordability credit errors as retryable", () => {
const { deps } = createMockDeps();
const handler = new RetryHandler(deps);
const msg = errorMessage(
"402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.",
);
assert.equal(handler.isRetryableError(msg), true);
});
});
describe("third-party block claude-code fallback (#3772)", () => {

View file

@ -116,7 +116,7 @@ export class RetryHandler {
// generated error from getApiKey() when credentials are in a backoff window.
// Re-entering the retry handler for that message creates a cascade of empty
// error entries in the session file, breaking resume (#3429).
return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|extra usage is required|(?:out of|no) extra usage|third.party.*draw from extra|third.party.*not.*available/i.test(
return /overloaded|rate.?limit|too many requests|402|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|requires more credits|can only afford|insufficient credits|not enough credits|extra usage is required|(?:out of|no) extra usage|third.party.*draw from extra|third.party.*not.*available/i.test(
err,
);
}
@ -158,6 +158,14 @@ export class RetryHandler {
const isRateLimit = errorType === "rate_limit";
const isQuotaError = errorType === "quota_exhausted";
// Credit-aware retry (OpenRouter-style 402 affordability errors):
// when provider reports "can only afford N", lower maxTokens and retry
// on the same model before rotating credentials/providers.
if (isQuotaError) {
const adjusted = this._tryAffordableMaxTokensRetry(message, retryGeneration);
if (adjusted) return true;
}
// Credential rotation — only for transient rate limits (#3430).
// Quota errors ("Extra usage is required") are account-level billing
// gates; rotating to another credential on the same account won't help
@ -409,12 +417,63 @@ export class RetryHandler {
// Long-context entitlement errors are billing gates, not transient rate limits.
// Must be checked before the generic 429/rate_limit regex.
if (/extra usage is required|long context required/i.test(err)) return "quota_exhausted";
if (/requires more credits|can only afford|insufficient credits|not enough credits|credit balance/i.test(err))
return "quota_exhausted";
if (/quota|billing|exceeded.*limit|usage.*limit/i.test(err)) return "quota_exhausted";
if (/rate.?limit|too many requests|429/i.test(err)) return "rate_limit";
if (/500|502|503|504|server.?error|internal.?error|service.?unavailable/i.test(err)) return "server_error";
return "unknown";
}
/**
* Attempt a same-model retry by reducing maxTokens when provider reports
* an affordability cap (e.g., "can only afford 329").
*/
private _tryAffordableMaxTokensRetry(message: AssistantMessage, retryGeneration: number): boolean {
const currentModel = this._deps.getModel();
if (!currentModel || !message.errorMessage) return false;
// Example: "can only afford 329"
const match = message.errorMessage.match(/can only afford\s+([\d,]+)/i);
if (!match?.[1]) return false;
const affordable = Number.parseInt(match[1].replace(/,/g, ""), 10);
if (!Number.isFinite(affordable) || affordable <= 0) return false;
// Leave a small buffer so slight input variance doesn't immediately re-fail.
const safetyBuffer = Math.min(64, Math.max(16, Math.floor(affordable * 0.1)));
const targetMaxTokens = Math.max(64, affordable - safetyBuffer);
const downgradedMaxTokens = Math.min(currentModel.maxTokens, targetMaxTokens);
if (downgradedMaxTokens >= currentModel.maxTokens) return false;
const downgradedModel = {
...currentModel,
maxTokens: downgradedMaxTokens,
};
this._deps.agent.setModel(downgradedModel);
this._deps.onModelChange(downgradedModel);
this._removeLastAssistantError();
this._deps.emit({
type: "fallback_provider_switch",
from: `${currentModel.provider}/${currentModel.id} (maxTokens=${currentModel.maxTokens})`,
to: `${downgradedModel.provider}/${downgradedModel.id} (maxTokens=${downgradedModel.maxTokens})`,
reason: `credit-aware retry: provider affordable cap ${affordable} tokens`,
});
this._deps.emit({
type: "auto_retry_start",
attempt: this._retryAttempt + 1,
maxAttempts: this._deps.settingsManager.getRetrySettings().maxRetries,
delayMs: 0,
errorMessage: `${message.errorMessage} (reducing max tokens)`,
});
this._scheduleContinue(retryGeneration);
return true;
}
/**
* Attempt to downgrade a long-context model (e.g. claude-opus-4-6[1m]) to its
* base model (claude-opus-4-6) when the account lacks the long-context billing

View file

@ -44,6 +44,9 @@ export function resetRetryState(state: RetryState): void {
const PERMANENT_RE = /auth|unauthorized|forbidden|invalid.*key|invalid.*api|billing|quota exceeded|account/i;
const RATE_LIMIT_RE = /rate.?limit|too many requests|429/i;
// OpenRouter affordability-style quota errors should be treated as transient
// so core retry logic can lower maxTokens and continue in-session.
const AFFORDABILITY_RE = /requires more credits|can only afford|insufficient credits|not enough credits|fewer max_tokens/i;
const NETWORK_RE = /network|ECONNRESET|ETIMEDOUT|ECONNREFUSED|socket hang up|fetch failed|connection.*reset|dns/i;
const SERVER_RE = /internal server error|500|502|503|overloaded|server_error|api_error|service.?unavailable/i;
// ECONNRESET/ECONNREFUSED are in NETWORK_RE (same-model retry first).
@ -67,7 +70,7 @@ const RESET_DELAY_RE = /reset in (\d+)s/i;
*/
export function classifyError(errorMsg: string, retryAfterMs?: number): ErrorClass {
const isPermanent = PERMANENT_RE.test(errorMsg);
const isRateLimit = RATE_LIMIT_RE.test(errorMsg);
const isRateLimit = RATE_LIMIT_RE.test(errorMsg) || AFFORDABILITY_RE.test(errorMsg);
// 1. Permanent — but rate limit takes precedence
if (isPermanent && !isRateLimit) {

View file

@ -32,6 +32,15 @@ test("classifyError detects rate limit from message", () => {
assert.equal(result.kind, "rate-limit");
});
test("classifyError treats OpenRouter affordability errors as transient rate-limit class", () => {
const result = classifyError(
"402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.",
);
assert.ok(isTransient(result));
assert.equal(result.kind, "rate-limit");
assert.ok("retryAfterMs" in result && result.retryAfterMs > 0);
});
test("classifyError extracts reset delay from message", () => {
const result = classifyError("rate limit exceeded, reset in 45s");
assert.equal(result.kind, "rate-limit");