diff --git a/packages/pi-coding-agent/src/core/retry-handler.test.ts b/packages/pi-coding-agent/src/core/retry-handler.test.ts new file mode 100644 index 000000000..04a0aba09 --- /dev/null +++ b/packages/pi-coding-agent/src/core/retry-handler.test.ts @@ -0,0 +1,255 @@ +/** + * RetryHandler tests — long-context entitlement 429 error handling (#2803) + * + * Verifies that "Extra usage is required for long context requests" errors + * are classified as quota_exhausted (not rate_limit) and trigger a model + * downgrade from [1m] to base when no cross-provider fallback exists. + */ + +import { describe, it, beforeEach, mock, type Mock } from "node:test"; +import assert from "node:assert/strict"; +import { RetryHandler, type RetryHandlerDeps } from "./retry-handler.js"; +import type { Api, AssistantMessage, Model } from "@gsd/pi-ai"; +import type { FallbackResolver } from "./fallback-resolver.js"; +import type { ModelRegistry } from "./model-registry.js"; +import type { SettingsManager } from "./settings-manager.js"; + +// ─── Helpers ──────────────────────────────────────────────────────────────── + +function createMockModel(provider: string, id: string): Model { + return { + id, + name: id, + api: "anthropic" as Api, + provider, + baseUrl: "https://api.anthropic.com", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1_000_000, + maxTokens: 16384, + } as Model; +} + +function errorMessage(msg: string): AssistantMessage { + return { + role: "assistant", + content: [], + api: "anthropic-messages", + provider: "anthropic", + model: "claude-opus-4-6[1m]", + usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } }, + stopReason: "error", + errorMessage: msg, + timestamp: Date.now(), + } as AssistantMessage; +} + +interface MockDeps { + deps: RetryHandlerDeps; + emittedEvents: Array>; + continueFn: Mock<() => Promise>; + onModelChangeFn: Mock<(model: Model) => void>; + markUsageLimitReached: Mock<(...args: any[]) => boolean>; + findFallback: Mock<(...args: any[]) => Promise>; + findModel: Mock<(provider: string, modelId: string) => Model | undefined>; +} + +function createMockDeps(overrides?: { + model?: Model; + retryEnabled?: boolean; + markUsageLimitReachedResult?: boolean; + fallbackResult?: any; + findModelResult?: (provider: string, modelId: string) => Model | undefined; +}): MockDeps { + const model = overrides?.model ?? createMockModel("anthropic", "claude-opus-4-6[1m]"); + const emittedEvents: Array> = []; + const continueFn = mock.fn(async () => {}); + const onModelChangeFn = mock.fn((_model: Model) => {}); + const markUsageLimitReached = mock.fn( + () => overrides?.markUsageLimitReachedResult ?? false, + ); + const findFallback = mock.fn(async () => overrides?.fallbackResult ?? null); + const findModel = mock.fn( + overrides?.findModelResult ?? ((_provider: string, _modelId: string) => undefined), + ); + + const messages: Array<{ role: string } & Record> = []; + + const deps: RetryHandlerDeps = { + agent: { + continue: continueFn, + state: { messages }, + setModel: mock.fn(), + replaceMessages: mock.fn((newMessages: any[]) => { + messages.length = 0; + messages.push(...newMessages); + }), + } as any, + settingsManager: { + getRetryEnabled: () => overrides?.retryEnabled ?? true, + getRetrySettings: () => ({ + enabled: overrides?.retryEnabled ?? true, + maxRetries: 5, + baseDelayMs: 1000, + maxDelayMs: 30000, + }), + } as unknown as SettingsManager, + modelRegistry: { + authStorage: { + markUsageLimitReached, + }, + find: findModel, + } as unknown as ModelRegistry, + fallbackResolver: { + findFallback, + } as unknown as FallbackResolver, + getModel: () => model, + getSessionId: () => "test-session", + emit: (event: any) => emittedEvents.push(event), + onModelChange: onModelChangeFn, + }; + + return { deps, emittedEvents, continueFn, onModelChangeFn, markUsageLimitReached, findFallback, findModel }; +} + +// ─── _classifyErrorType (tested via handleRetryableError behavior) ────────── + +describe("RetryHandler — long-context entitlement 429 (#2803)", () => { + + describe("error classification", () => { + it("classifies 'Extra usage is required for long context requests' as quota_exhausted, not rate_limit", async () => { + // When the error is classified as quota_exhausted AND no alternate credentials + // AND no fallback, the handler should emit fallback_chain_exhausted and stop. + // If misclassified as rate_limit, it would enter the backoff loop instead. + const { deps, emittedEvents, findModel } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6[1m]"), + markUsageLimitReachedResult: false, // no alternate credentials + fallbackResult: null, // no cross-provider fallback + findModelResult: () => undefined, // no base model either + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage( + '429 {"type":"error","error":{"type":"rate_limit_error","message":"Extra usage is required for long context requests."}}' + ); + + const result = await handler.handleRetryableError(msg); + + // Should NOT retry (would be true if misclassified as rate_limit entering backoff) + assert.equal(result, false); + + // Should emit fallback_chain_exhausted (quota_exhausted path), NOT auto_retry_start (backoff path) + const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted"); + assert.ok(chainExhausted, "Expected fallback_chain_exhausted event for entitlement error"); + + const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start"); + assert.equal(retryStart, undefined, "Should NOT emit auto_retry_start for entitlement error"); + }); + + it("still classifies regular 429 rate limits as rate_limit", async () => { + // A normal "rate limit" 429 should still be classified as rate_limit + const { deps, emittedEvents } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6"), + markUsageLimitReachedResult: false, + fallbackResult: null, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage("429 Too Many Requests"); + + const result = await handler.handleRetryableError(msg); + + // Should enter the backoff loop (rate_limit path, not quota_exhausted) + assert.equal(result, true); + + const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start"); + assert.ok(retryStart, "Regular 429 should enter backoff retry"); + }); + }); + + describe("long-context model downgrade", () => { + it("downgrades from [1m] to base model when entitlement error and no fallback", async () => { + const baseModel = createMockModel("anthropic", "claude-opus-4-6"); + const { deps, emittedEvents, onModelChangeFn, continueFn } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6[1m]"), + markUsageLimitReachedResult: false, + fallbackResult: null, + findModelResult: (provider: string, modelId: string) => { + if (provider === "anthropic" && modelId === "claude-opus-4-6") return baseModel; + return undefined; + }, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage("Extra usage is required for long context requests."); + + const result = await handler.handleRetryableError(msg); + + assert.equal(result, true, "Should retry after downgrade"); + + // Should have called setModel with the base model + const setModelCalls = (deps.agent.setModel as any).mock.calls; + assert.equal(setModelCalls.length, 1); + assert.equal(setModelCalls[0].arguments[0].id, "claude-opus-4-6"); + + // Should have notified about model change + assert.equal(onModelChangeFn.mock.calls.length, 1); + + // Should emit a fallback_provider_switch event indicating downgrade + const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch"); + assert.ok(switchEvent, "Expected fallback_provider_switch event for downgrade"); + assert.ok(switchEvent!.reason.includes("long context downgrade"), `reason should mention downgrade: ${switchEvent!.reason}`); + }); + + it("emits fallback_chain_exhausted when base model is also unavailable", async () => { + const { deps, emittedEvents } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6[1m]"), + markUsageLimitReachedResult: false, + fallbackResult: null, + findModelResult: () => undefined, // base model not found + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage("Extra usage is required for long context requests."); + + const result = await handler.handleRetryableError(msg); + + assert.equal(result, false); + const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted"); + assert.ok(chainExhausted, "Expected fallback_chain_exhausted when base model unavailable"); + }); + + it("does not attempt downgrade for non-[1m] models", async () => { + // When a regular model (no [1m] suffix) gets a quota_exhausted error + // with no fallback, it should just stop — no downgrade attempt. + const { deps, emittedEvents } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6"), + markUsageLimitReachedResult: false, + fallbackResult: null, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage("Extra usage is required for long context requests."); + + const result = await handler.handleRetryableError(msg); + + assert.equal(result, false); + const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted"); + assert.ok(chainExhausted); + + // No downgrade switch should occur + const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch"); + assert.equal(switchEvent, undefined, "Should not switch for non-[1m] models"); + }); + }); + + describe("isRetryableError", () => { + it("considers long-context entitlement error as retryable", () => { + const { deps } = createMockDeps(); + const handler = new RetryHandler(deps); + const msg = errorMessage("Extra usage is required for long context requests."); + assert.equal(handler.isRetryableError(msg), true); + }); + }); +}); diff --git a/packages/pi-coding-agent/src/core/retry-handler.ts b/packages/pi-coding-agent/src/core/retry-handler.ts index 9bdeac8f6..3e1f50daf 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.ts @@ -107,7 +107,7 @@ export class RetryHandler { if (isContextOverflow(message, contextWindow)) return false; const err = message.errorMessage; - return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|temporarily backed off/i.test( + return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|temporarily backed off|extra usage is required/i.test( err, ); } @@ -202,6 +202,10 @@ export class RetryHandler { // No fallback available either if (errorType === "quota_exhausted") { + // Try long-context model downgrade ([1m] → base) before giving up + const downgraded = this._tryLongContextDowngrade(message); + if (downgraded) return true; + this._deps.emit({ type: "fallback_chain_exhausted", reason: `All providers exhausted for ${this._deps.getModel()!.provider}/${this._deps.getModel()!.id}`, @@ -343,12 +347,59 @@ export class RetryHandler { */ private _classifyErrorType(errorMessage: string): UsageLimitErrorType { const err = errorMessage.toLowerCase(); + // Long-context entitlement errors are billing gates, not transient rate limits. + // Must be checked before the generic 429/rate_limit regex. + if (/extra usage is required|long context required/i.test(err)) return "quota_exhausted"; if (/quota|billing|exceeded.*limit|usage.*limit/i.test(err)) return "quota_exhausted"; if (/rate.?limit|too many requests|429/i.test(err)) return "rate_limit"; if (/500|502|503|504|server.?error|internal.?error|service.?unavailable/i.test(err)) return "server_error"; return "unknown"; } + /** + * Attempt to downgrade a long-context model (e.g. claude-opus-4-6[1m]) to its + * base model (claude-opus-4-6) when the account lacks the long-context billing + * entitlement. Returns true if the downgrade was initiated. + */ + private _tryLongContextDowngrade(message: AssistantMessage): boolean { + const currentModel = this._deps.getModel(); + if (!currentModel) return false; + + // Only attempt downgrade for [1m] (or similar long-context) model IDs + const match = currentModel.id.match(/^(.+)\[\d+m\]$/); + if (!match) return false; + + const baseModelId = match[1]; + const baseModel = this._deps.modelRegistry.find(currentModel.provider, baseModelId); + if (!baseModel) return false; + + const previousId = currentModel.id; + this._deps.agent.setModel(baseModel); + this._deps.onModelChange(baseModel); + this._removeLastAssistantError(); + + this._deps.emit({ + type: "fallback_provider_switch", + from: `${currentModel.provider}/${previousId}`, + to: `${baseModel.provider}/${baseModel.id}`, + reason: `long context downgrade: ${previousId} → ${baseModel.id}`, + }); + + this._deps.emit({ + type: "auto_retry_start", + attempt: this._retryAttempt + 1, + maxAttempts: this._deps.settingsManager.getRetrySettings().maxRetries, + delayMs: 0, + errorMessage: `${message.errorMessage} (long context downgrade)`, + }); + + setTimeout(() => { + this._deps.agent.continue().catch(() => {}); + }, 0); + + return true; + } + /** Remove the last assistant error message from agent state */ private _removeLastAssistantError(): void { const messages = this._deps.agent.state.messages;