diff --git a/packages/pi-coding-agent/src/core/agent-session-model-switch.test.ts b/packages/pi-coding-agent/src/core/agent-session-model-switch.test.ts new file mode 100644 index 000000000..f86dac6ca --- /dev/null +++ b/packages/pi-coding-agent/src/core/agent-session-model-switch.test.ts @@ -0,0 +1,21 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const source = readFileSync(join(process.cwd(), "packages/pi-coding-agent/src/core/agent-session.ts"), "utf-8"); + +test("agent-session: explicit model switches cancel retry before applying new model", () => { + const start = source.indexOf("private async _applyModelChange("); + assert.ok(start >= 0, "missing _applyModelChange"); + const window = source.slice(start, start + 900); + const abortIdx = window.indexOf("this._retryHandler.abortRetry();"); + const setModelIdx = window.indexOf("this.agent.setModel(model);"); + + assert.ok(abortIdx >= 0, "_applyModelChange should cancel any in-flight retry"); + assert.ok(setModelIdx >= 0, "_applyModelChange should set the new model"); + assert.ok( + abortIdx < setModelIdx, + "retry cancellation must happen before applying the new model to prevent stale provider retries", + ); +}); diff --git a/packages/pi-coding-agent/src/core/agent-session.ts b/packages/pi-coding-agent/src/core/agent-session.ts index fe5e1f853..0598c4575 100644 --- a/packages/pi-coding-agent/src/core/agent-session.ts +++ b/packages/pi-coding-agent/src/core/agent-session.ts @@ -1633,6 +1633,10 @@ export class AgentSession { options?: { persist?: boolean }, ): Promise { const previousModel = this.model; + // Explicit model switches must cancel any in-flight retry loop from the + // previous provider/model. Otherwise stale provider backoff errors can + // continue to land after the user or runtime has already switched models. + this._retryHandler.abortRetry(); this.agent.setModel(model); this.sessionManager.appendModelChange(model.provider, model.id); if (options?.persist !== false) { diff --git a/packages/pi-coding-agent/src/core/retry-handler.test.ts b/packages/pi-coding-agent/src/core/retry-handler.test.ts index 04a0aba09..26b27455c 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.test.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.test.ts @@ -61,6 +61,11 @@ function createMockDeps(overrides?: { markUsageLimitReachedResult?: boolean; fallbackResult?: any; findModelResult?: (provider: string, modelId: string) => Model | undefined; + retrySettings?: { + maxRetries?: number; + baseDelayMs?: number; + maxDelayMs?: number; + }; }): MockDeps { const model = overrides?.model ?? createMockModel("anthropic", "claude-opus-4-6[1m]"); const emittedEvents: Array> = []; @@ -90,9 +95,9 @@ function createMockDeps(overrides?: { getRetryEnabled: () => overrides?.retryEnabled ?? true, getRetrySettings: () => ({ enabled: overrides?.retryEnabled ?? true, - maxRetries: 5, - baseDelayMs: 1000, - maxDelayMs: 30000, + maxRetries: overrides?.retrySettings?.maxRetries ?? 5, + baseDelayMs: overrides?.retrySettings?.baseDelayMs ?? 1000, + maxDelayMs: overrides?.retrySettings?.maxDelayMs ?? 30000, }), } as unknown as SettingsManager, modelRegistry: { @@ -244,6 +249,28 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => { }); }); + describe("retry cancellation", () => { + it("cancels queued immediate continue callbacks when retry is aborted", async () => { + const { deps, emittedEvents, continueFn } = createMockDeps({ + markUsageLimitReachedResult: true, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage("429 Too Many Requests"); + + const result = await handler.handleRetryableError(msg); + assert.equal(result, true, "retry should be initiated"); + + handler.abortRetry(); + await new Promise((resolve) => setTimeout(resolve, 10)); + + assert.equal(continueFn.mock.calls.length, 0, "cancelled retry must not continue after explicit abort"); + const endEvents = emittedEvents.filter((e) => e.type === "auto_retry_end"); + assert.equal(endEvents.length, 1, "retry cancellation should emit a single auto_retry_end event"); + assert.equal(endEvents[0]?.finalError, "Retry cancelled"); + }); + }); + describe("isRetryableError", () => { it("considers long-context entitlement error as retryable", () => { const { deps } = createMockDeps(); diff --git a/packages/pi-coding-agent/src/core/retry-handler.ts b/packages/pi-coding-agent/src/core/retry-handler.ts index 3e1f50daf..538e3a447 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.ts @@ -37,6 +37,8 @@ export class RetryHandler { private _retryAttempt = 0; private _retryPromise: Promise | undefined = undefined; private _retryResolve: (() => void) | undefined = undefined; + private _retryGeneration = 0; + private _continueTimeout: ReturnType | undefined = undefined; constructor(private readonly _deps: RetryHandlerDeps) {} @@ -134,6 +136,7 @@ export class RetryHandler { } // Try credential fallback before counting against retry budget. + const retryGeneration = this._retryGeneration; if (this._deps.getModel() && message.errorMessage) { const errorType = this._classifyErrorType(message.errorMessage); const isCredentialError = errorType === "rate_limit" || errorType === "quota_exhausted"; @@ -157,9 +160,7 @@ export class RetryHandler { }); // Retry immediately with the next credential - don't increment _retryAttempt - setTimeout(() => { - this._deps.agent.continue().catch(() => {}); - }, 0); + this._scheduleContinue(retryGeneration); return true; } @@ -193,9 +194,7 @@ export class RetryHandler { }); // Retry immediately with fallback provider - don't increment _retryAttempt - setTimeout(() => { - this._deps.agent.continue().catch(() => {}); - }, 0); + this._scheduleContinue(retryGeneration); return true; } @@ -203,7 +202,7 @@ export class RetryHandler { // No fallback available either if (errorType === "quota_exhausted") { // Try long-context model downgrade ([1m] → base) before giving up - const downgraded = this._tryLongContextDowngrade(message); + const downgraded = this._tryLongContextDowngrade(message, retryGeneration); if (downgraded) return true; this._deps.emit({ @@ -274,7 +273,12 @@ export class RetryHandler { try { await sleep(delayMs, this._retryAbortController.signal); } catch { - // Aborted during sleep + // Aborted during sleep. If the retry generation already advanced, this + // cancellation was handled externally (e.g. explicit model switch). + if (retryGeneration !== this._retryGeneration) { + this._retryAbortController = undefined; + return false; + } const attempt = this._retryAttempt; this._retryAttempt = 0; this._retryAbortController = undefined; @@ -290,16 +294,36 @@ export class RetryHandler { this._retryAbortController = undefined; // Retry via continue() - use setTimeout to break out of event handler chain - setTimeout(() => { - this._deps.agent.continue().catch(() => {}); - }, 0); + this._scheduleContinue(retryGeneration); return true; } /** Cancel in-progress retry */ abortRetry(): void { - this._retryAbortController?.abort(); + const hadRetry = + this._retryPromise !== undefined + || this._retryAbortController !== undefined + || this._continueTimeout !== undefined; + if (!hadRetry) return; + + const attempt = this._retryAttempt > 0 ? this._retryAttempt : 1; + this._retryGeneration++; + if (this._continueTimeout) { + clearTimeout(this._continueTimeout); + this._continueTimeout = undefined; + } + if (this._retryAbortController) { + this._retryAbortController.abort(); + this._retryAbortController = undefined; + } + this._retryAttempt = 0; + this._deps.emit({ + type: "auto_retry_end", + success: false, + attempt, + finalError: "Retry cancelled", + }); this._resolveRetry(); } @@ -330,6 +354,17 @@ export class RetryHandler { } } + private _scheduleContinue(retryGeneration: number): void { + if (this._continueTimeout) { + clearTimeout(this._continueTimeout); + } + this._continueTimeout = setTimeout(() => { + this._continueTimeout = undefined; + if (retryGeneration !== this._retryGeneration) return; + this._deps.agent.continue().catch(() => {}); + }, 0); + } + private _findLastAssistantInMessages( messages: Array<{ role: string } & Record>, ): AssistantMessage | undefined { @@ -361,7 +396,7 @@ export class RetryHandler { * base model (claude-opus-4-6) when the account lacks the long-context billing * entitlement. Returns true if the downgrade was initiated. */ - private _tryLongContextDowngrade(message: AssistantMessage): boolean { + private _tryLongContextDowngrade(message: AssistantMessage, retryGeneration: number): boolean { const currentModel = this._deps.getModel(); if (!currentModel) return false; @@ -393,9 +428,7 @@ export class RetryHandler { errorMessage: `${message.errorMessage} (long context downgrade)`, }); - setTimeout(() => { - this._deps.agent.continue().catch(() => {}); - }, 0); + this._scheduleContinue(retryGeneration); return true; }