fix(auto): recover from OpenRouter credit affordability errors
This commit is contained in:
parent
804f1d4b94
commit
724464c7ae
4 changed files with 156 additions and 2 deletions
|
|
@ -171,6 +171,25 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
|
|||
const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start");
|
||||
assert.ok(retryStart, "Regular 429 should enter backoff retry");
|
||||
});
|
||||
|
||||
it("classifies OpenRouter credit affordability errors as quota_exhausted", async () => {
|
||||
const { deps, emittedEvents } = createMockDeps({
|
||||
model: createMockModel("openrouter", "openai/gpt-5-pro"),
|
||||
markUsageLimitReachedResult: false,
|
||||
fallbackResult: null,
|
||||
});
|
||||
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage(
|
||||
"402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.",
|
||||
);
|
||||
|
||||
const result = await handler.handleRetryableError(msg);
|
||||
|
||||
assert.equal(result, true, "affordability error should trigger credit-aware retry");
|
||||
const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start");
|
||||
assert.ok(retryStart, "Expected immediate retry after reducing max tokens");
|
||||
});
|
||||
});
|
||||
|
||||
describe("long-context model downgrade", () => {
|
||||
|
|
@ -271,6 +290,61 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe("credit-aware maxTokens retry", () => {
|
||||
it("reduces maxTokens on same model when provider reports affordable cap", async () => {
|
||||
const expensiveModel = createMockModel("openrouter", "openai/gpt-5-pro");
|
||||
expensiveModel.maxTokens = 128000;
|
||||
|
||||
const { deps, emittedEvents, onModelChangeFn } = createMockDeps({
|
||||
model: expensiveModel,
|
||||
markUsageLimitReachedResult: false,
|
||||
fallbackResult: null,
|
||||
});
|
||||
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage(
|
||||
"402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.",
|
||||
);
|
||||
|
||||
const result = await handler.handleRetryableError(msg);
|
||||
assert.equal(result, true, "should retry after reducing maxTokens");
|
||||
|
||||
const setModelCalls = (deps.agent.setModel as any).mock.calls;
|
||||
assert.equal(setModelCalls.length, 1, "should apply one model downgrade");
|
||||
const downgraded = setModelCalls[0].arguments[0] as Model<Api>;
|
||||
assert.equal(downgraded.provider, "openrouter");
|
||||
assert.equal(downgraded.id, "openai/gpt-5-pro");
|
||||
assert.equal(downgraded.maxTokens, 297, "expected affordability cap with safety buffer");
|
||||
|
||||
assert.equal(onModelChangeFn.mock.calls.length, 1, "should notify about model update");
|
||||
const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch");
|
||||
assert.ok(switchEvent, "should emit model-adjustment event");
|
||||
assert.ok(
|
||||
String(switchEvent?.reason || "").includes("credit-aware retry"),
|
||||
"switch reason should mention credit-aware retry",
|
||||
);
|
||||
});
|
||||
|
||||
it("does not mark credentials in cooldown for affordability quota errors", async () => {
|
||||
const expensiveModel = createMockModel("openrouter", "openai/gpt-5-pro");
|
||||
expensiveModel.maxTokens = 128000;
|
||||
|
||||
const { deps, markUsageLimitReached } = createMockDeps({
|
||||
model: expensiveModel,
|
||||
markUsageLimitReachedResult: false,
|
||||
fallbackResult: null,
|
||||
});
|
||||
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage(
|
||||
"402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.",
|
||||
);
|
||||
|
||||
await handler.handleRetryableError(msg);
|
||||
assert.equal(markUsageLimitReached.mock.calls.length, 0, "quota error should skip credential cooldown");
|
||||
});
|
||||
});
|
||||
|
||||
describe("isRetryableError", () => {
|
||||
it("considers long-context entitlement error as retryable", () => {
|
||||
const { deps } = createMockDeps();
|
||||
|
|
@ -291,6 +365,15 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
|
|||
);
|
||||
assert.equal(handler.isRetryableError(msg), false);
|
||||
});
|
||||
|
||||
it("considers OpenRouter affordability credit errors as retryable", () => {
|
||||
const { deps } = createMockDeps();
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage(
|
||||
"402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.",
|
||||
);
|
||||
assert.equal(handler.isRetryableError(msg), true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("third-party block claude-code fallback (#3772)", () => {
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ export class RetryHandler {
|
|||
// generated error from getApiKey() when credentials are in a backoff window.
|
||||
// Re-entering the retry handler for that message creates a cascade of empty
|
||||
// error entries in the session file, breaking resume (#3429).
|
||||
return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|extra usage is required|(?:out of|no) extra usage|third.party.*draw from extra|third.party.*not.*available/i.test(
|
||||
return /overloaded|rate.?limit|too many requests|402|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|requires more credits|can only afford|insufficient credits|not enough credits|extra usage is required|(?:out of|no) extra usage|third.party.*draw from extra|third.party.*not.*available/i.test(
|
||||
err,
|
||||
);
|
||||
}
|
||||
|
|
@ -158,6 +158,14 @@ export class RetryHandler {
|
|||
const isRateLimit = errorType === "rate_limit";
|
||||
const isQuotaError = errorType === "quota_exhausted";
|
||||
|
||||
// Credit-aware retry (OpenRouter-style 402 affordability errors):
|
||||
// when provider reports "can only afford N", lower maxTokens and retry
|
||||
// on the same model before rotating credentials/providers.
|
||||
if (isQuotaError) {
|
||||
const adjusted = this._tryAffordableMaxTokensRetry(message, retryGeneration);
|
||||
if (adjusted) return true;
|
||||
}
|
||||
|
||||
// Credential rotation — only for transient rate limits (#3430).
|
||||
// Quota errors ("Extra usage is required") are account-level billing
|
||||
// gates; rotating to another credential on the same account won't help
|
||||
|
|
@ -409,12 +417,63 @@ export class RetryHandler {
|
|||
// Long-context entitlement errors are billing gates, not transient rate limits.
|
||||
// Must be checked before the generic 429/rate_limit regex.
|
||||
if (/extra usage is required|long context required/i.test(err)) return "quota_exhausted";
|
||||
if (/requires more credits|can only afford|insufficient credits|not enough credits|credit balance/i.test(err))
|
||||
return "quota_exhausted";
|
||||
if (/quota|billing|exceeded.*limit|usage.*limit/i.test(err)) return "quota_exhausted";
|
||||
if (/rate.?limit|too many requests|429/i.test(err)) return "rate_limit";
|
||||
if (/500|502|503|504|server.?error|internal.?error|service.?unavailable/i.test(err)) return "server_error";
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt a same-model retry by reducing maxTokens when provider reports
|
||||
* an affordability cap (e.g., "can only afford 329").
|
||||
*/
|
||||
private _tryAffordableMaxTokensRetry(message: AssistantMessage, retryGeneration: number): boolean {
|
||||
const currentModel = this._deps.getModel();
|
||||
if (!currentModel || !message.errorMessage) return false;
|
||||
|
||||
// Example: "can only afford 329"
|
||||
const match = message.errorMessage.match(/can only afford\s+([\d,]+)/i);
|
||||
if (!match?.[1]) return false;
|
||||
|
||||
const affordable = Number.parseInt(match[1].replace(/,/g, ""), 10);
|
||||
if (!Number.isFinite(affordable) || affordable <= 0) return false;
|
||||
|
||||
// Leave a small buffer so slight input variance doesn't immediately re-fail.
|
||||
const safetyBuffer = Math.min(64, Math.max(16, Math.floor(affordable * 0.1)));
|
||||
const targetMaxTokens = Math.max(64, affordable - safetyBuffer);
|
||||
const downgradedMaxTokens = Math.min(currentModel.maxTokens, targetMaxTokens);
|
||||
if (downgradedMaxTokens >= currentModel.maxTokens) return false;
|
||||
|
||||
const downgradedModel = {
|
||||
...currentModel,
|
||||
maxTokens: downgradedMaxTokens,
|
||||
};
|
||||
|
||||
this._deps.agent.setModel(downgradedModel);
|
||||
this._deps.onModelChange(downgradedModel);
|
||||
this._removeLastAssistantError();
|
||||
|
||||
this._deps.emit({
|
||||
type: "fallback_provider_switch",
|
||||
from: `${currentModel.provider}/${currentModel.id} (maxTokens=${currentModel.maxTokens})`,
|
||||
to: `${downgradedModel.provider}/${downgradedModel.id} (maxTokens=${downgradedModel.maxTokens})`,
|
||||
reason: `credit-aware retry: provider affordable cap ${affordable} tokens`,
|
||||
});
|
||||
|
||||
this._deps.emit({
|
||||
type: "auto_retry_start",
|
||||
attempt: this._retryAttempt + 1,
|
||||
maxAttempts: this._deps.settingsManager.getRetrySettings().maxRetries,
|
||||
delayMs: 0,
|
||||
errorMessage: `${message.errorMessage} (reducing max tokens)`,
|
||||
});
|
||||
|
||||
this._scheduleContinue(retryGeneration);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to downgrade a long-context model (e.g. claude-opus-4-6[1m]) to its
|
||||
* base model (claude-opus-4-6) when the account lacks the long-context billing
|
||||
|
|
|
|||
|
|
@ -44,6 +44,9 @@ export function resetRetryState(state: RetryState): void {
|
|||
|
||||
const PERMANENT_RE = /auth|unauthorized|forbidden|invalid.*key|invalid.*api|billing|quota exceeded|account/i;
|
||||
const RATE_LIMIT_RE = /rate.?limit|too many requests|429/i;
|
||||
// OpenRouter affordability-style quota errors should be treated as transient
|
||||
// so core retry logic can lower maxTokens and continue in-session.
|
||||
const AFFORDABILITY_RE = /requires more credits|can only afford|insufficient credits|not enough credits|fewer max_tokens/i;
|
||||
const NETWORK_RE = /network|ECONNRESET|ETIMEDOUT|ECONNREFUSED|socket hang up|fetch failed|connection.*reset|dns/i;
|
||||
const SERVER_RE = /internal server error|500|502|503|overloaded|server_error|api_error|service.?unavailable/i;
|
||||
// ECONNRESET/ECONNREFUSED are in NETWORK_RE (same-model retry first).
|
||||
|
|
@ -67,7 +70,7 @@ const RESET_DELAY_RE = /reset in (\d+)s/i;
|
|||
*/
|
||||
export function classifyError(errorMsg: string, retryAfterMs?: number): ErrorClass {
|
||||
const isPermanent = PERMANENT_RE.test(errorMsg);
|
||||
const isRateLimit = RATE_LIMIT_RE.test(errorMsg);
|
||||
const isRateLimit = RATE_LIMIT_RE.test(errorMsg) || AFFORDABILITY_RE.test(errorMsg);
|
||||
|
||||
// 1. Permanent — but rate limit takes precedence
|
||||
if (isPermanent && !isRateLimit) {
|
||||
|
|
|
|||
|
|
@ -32,6 +32,15 @@ test("classifyError detects rate limit from message", () => {
|
|||
assert.equal(result.kind, "rate-limit");
|
||||
});
|
||||
|
||||
test("classifyError treats OpenRouter affordability errors as transient rate-limit class", () => {
|
||||
const result = classifyError(
|
||||
"402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.",
|
||||
);
|
||||
assert.ok(isTransient(result));
|
||||
assert.equal(result.kind, "rate-limit");
|
||||
assert.ok("retryAfterMs" in result && result.retryAfterMs > 0);
|
||||
});
|
||||
|
||||
test("classifyError extracts reset delay from message", () => {
|
||||
const result = classifyError("rate limit exceeded, reset in 45s");
|
||||
assert.equal(result.kind, "rate-limit");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue