From 58208634ac1cc1f5927cad2550984893b25e5c44 Mon Sep 17 00:00:00 2001
From: deseltrus <simulacraverse@protonmail.com>
Date: Fri, 3 Apr 2026 16:20:30 +0200
Subject: [PATCH] fix(pi-coding-agent): cancel stale retries after model switch

Explicit model changes aborted the active backoff sleep but left already-queued retry callbacks alive. That let stale provider retries continue after a user or runtime model switch, which could keep surfacing the old provider's backoff errors in the new session state.

Cancel the full retry generation on explicit model switches by clearing queued continue callbacks in RetryHandler, then cover the behavior with regression tests for both the session switch ordering and the queued-retry cancellation path.
---
 .../core/agent-session-model-switch.test.ts   | 21 ++++++
 .../pi-coding-agent/src/core/agent-session.ts |  4 ++
 .../src/core/retry-handler.test.ts            | 33 +++++++++-
 .../pi-coding-agent/src/core/retry-handler.ts | 65 ++++++++++++++-----
 4 files changed, 104 insertions(+), 19 deletions(-)
 create mode 100644 packages/pi-coding-agent/src/core/agent-session-model-switch.test.ts
diff --git a/packages/pi-coding-agent/src/core/agent-session-model-switch.test.ts b/packages/pi-coding-agent/src/core/agent-session-model-switch.test.ts
new file mode 100644
index 000000000..f86dac6ca
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/agent-session-model-switch.test.ts
@@ -0,0 +1,21 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+const source = readFileSync(join(process.cwd(), "packages/pi-coding-agent/src/core/agent-session.ts"), "utf-8");
+
+test("agent-session: explicit model switches cancel retry before applying new model", () => {
+	const start = source.indexOf("private async _applyModelChange(");
+	assert.ok(start >= 0, "missing _applyModelChange");
+	const window = source.slice(start, start + 900);
+	const abortIdx = window.indexOf("this._retryHandler.abortRetry();");
+	const setModelIdx = window.indexOf("this.agent.setModel(model);");
+
+	assert.ok(abortIdx >= 0, "_applyModelChange should cancel any in-flight retry");
+	assert.ok(setModelIdx >= 0, "_applyModelChange should set the new model");
+	assert.ok(
+		abortIdx < setModelIdx,
+		"retry cancellation must happen before applying the new model to prevent stale provider retries",
+	);
+});
diff --git a/packages/pi-coding-agent/src/core/agent-session.ts b/packages/pi-coding-agent/src/core/agent-session.ts
index fe5e1f853..0598c4575 100644
--- a/packages/pi-coding-agent/src/core/agent-session.ts
+++ b/packages/pi-coding-agent/src/core/agent-session.ts
@@ -1633,6 +1633,10 @@ export class AgentSession {
 		options?: { persist?: boolean },
 	): Promise<void> {
 		const previousModel = this.model;
+		// Explicit model switches must cancel any in-flight retry loop from the
+		// previous provider/model. Otherwise stale provider backoff errors can
+		// continue to land after the user or runtime has already switched models.
+		this._retryHandler.abortRetry();
 		this.agent.setModel(model);
 		this.sessionManager.appendModelChange(model.provider, model.id);
 		if (options?.persist !== false) {
diff --git a/packages/pi-coding-agent/src/core/retry-handler.test.ts b/packages/pi-coding-agent/src/core/retry-handler.test.ts
index 04a0aba09..26b27455c 100644
--- a/packages/pi-coding-agent/src/core/retry-handler.test.ts
+++ b/packages/pi-coding-agent/src/core/retry-handler.test.ts
@@ -61,6 +61,11 @@ function createMockDeps(overrides?: {
 	markUsageLimitReachedResult?: boolean;
 	fallbackResult?: any;
 	findModelResult?: (provider: string, modelId: string) => Model<Api> | undefined;
+	retrySettings?: {
+		maxRetries?: number;
+		baseDelayMs?: number;
+		maxDelayMs?: number;
+	};
 }): MockDeps {
 	const model = overrides?.model ?? createMockModel("anthropic", "claude-opus-4-6[1m]");
 	const emittedEvents: Array<Record<string, any>> = [];
@@ -90,9 +95,9 @@ function createMockDeps(overrides?: {
 			getRetryEnabled: () => overrides?.retryEnabled ?? true,
 			getRetrySettings: () => ({
 				enabled: overrides?.retryEnabled ?? true,
-				maxRetries: 5,
-				baseDelayMs: 1000,
-				maxDelayMs: 30000,
+				maxRetries: overrides?.retrySettings?.maxRetries ?? 5,
+				baseDelayMs: overrides?.retrySettings?.baseDelayMs ?? 1000,
+				maxDelayMs: overrides?.retrySettings?.maxDelayMs ?? 30000,
 			}),
 		} as unknown as SettingsManager,
 		modelRegistry: {
@@ -244,6 +249,28 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
 		});
 	});
 
+	describe("retry cancellation", () => {
+		it("cancels queued immediate continue callbacks when retry is aborted", async () => {
+			const { deps, emittedEvents, continueFn } = createMockDeps({
+				markUsageLimitReachedResult: true,
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("429 Too Many Requests");
+
+			const result = await handler.handleRetryableError(msg);
+			assert.equal(result, true, "retry should be initiated");
+
+			handler.abortRetry();
+			await new Promise((resolve) => setTimeout(resolve, 10));
+
+			assert.equal(continueFn.mock.calls.length, 0, "cancelled retry must not continue after explicit abort");
+			const endEvents = emittedEvents.filter((e) => e.type === "auto_retry_end");
+			assert.equal(endEvents.length, 1, "retry cancellation should emit a single auto_retry_end event");
+			assert.equal(endEvents[0]?.finalError, "Retry cancelled");
+		});
+	});
+
 	describe("isRetryableError", () => {
 		it("considers long-context entitlement error as retryable", () => {
 			const { deps } = createMockDeps();
diff --git a/packages/pi-coding-agent/src/core/retry-handler.ts b/packages/pi-coding-agent/src/core/retry-handler.ts
index 3e1f50daf..538e3a447 100644
--- a/packages/pi-coding-agent/src/core/retry-handler.ts
+++ b/packages/pi-coding-agent/src/core/retry-handler.ts
@@ -37,6 +37,8 @@ export class RetryHandler {
 	private _retryAttempt = 0;
 	private _retryPromise: Promise<void> | undefined = undefined;
 	private _retryResolve: (() => void) | undefined = undefined;
+	private _retryGeneration = 0;
+	private _continueTimeout: ReturnType<typeof setTimeout> | undefined = undefined;
 
 	constructor(private readonly _deps: RetryHandlerDeps) {}
 
@@ -134,6 +136,7 @@ export class RetryHandler {
 		}
 
 		// Try credential fallback before counting against retry budget.
+		const retryGeneration = this._retryGeneration;
 		if (this._deps.getModel() && message.errorMessage) {
 			const errorType = this._classifyErrorType(message.errorMessage);
 			const isCredentialError = errorType === "rate_limit" || errorType === "quota_exhausted";
@@ -157,9 +160,7 @@ export class RetryHandler {
 				});
 
 				// Retry immediately with the next credential - don't increment _retryAttempt
-				setTimeout(() => {
-					this._deps.agent.continue().catch(() => {});
-				}, 0);
+				this._scheduleContinue(retryGeneration);
 
 				return true;
 			}
@@ -193,9 +194,7 @@ export class RetryHandler {
 					});
 
 					// Retry immediately with fallback provider - don't increment _retryAttempt
-					setTimeout(() => {
-						this._deps.agent.continue().catch(() => {});
-					}, 0);
+					this._scheduleContinue(retryGeneration);
 
 					return true;
 				}
@@ -203,7 +202,7 @@ export class RetryHandler {
 				// No fallback available either
 				if (errorType === "quota_exhausted") {
 					// Try long-context model downgrade ([1m] → base) before giving up
-					const downgraded = this._tryLongContextDowngrade(message);
+					const downgraded = this._tryLongContextDowngrade(message, retryGeneration);
 					if (downgraded) return true;
 
 					this._deps.emit({
@@ -274,7 +273,12 @@ export class RetryHandler {
 		try {
 			await sleep(delayMs, this._retryAbortController.signal);
 		} catch {
-			// Aborted during sleep
+			// Aborted during sleep. If the retry generation already advanced, this
+			// cancellation was handled externally (e.g. explicit model switch).
+			if (retryGeneration !== this._retryGeneration) {
+				this._retryAbortController = undefined;
+				return false;
+			}
 			const attempt = this._retryAttempt;
 			this._retryAttempt = 0;
 			this._retryAbortController = undefined;
@@ -290,16 +294,36 @@ export class RetryHandler {
 		this._retryAbortController = undefined;
 
 		// Retry via continue() - use setTimeout to break out of event handler chain
-		setTimeout(() => {
-			this._deps.agent.continue().catch(() => {});
-		}, 0);
+		this._scheduleContinue(retryGeneration);
 
 		return true;
 	}
 
 	/** Cancel in-progress retry */
 	abortRetry(): void {
-		this._retryAbortController?.abort();
+		const hadRetry =
+			this._retryPromise !== undefined
+			|| this._retryAbortController !== undefined
+			|| this._continueTimeout !== undefined;
+		if (!hadRetry) return;
+
+		const attempt = this._retryAttempt > 0 ? this._retryAttempt : 1;
+		this._retryGeneration++;
+		if (this._continueTimeout) {
+			clearTimeout(this._continueTimeout);
+			this._continueTimeout = undefined;
+		}
+		if (this._retryAbortController) {
+			this._retryAbortController.abort();
+			this._retryAbortController = undefined;
+		}
+		this._retryAttempt = 0;
+		this._deps.emit({
+			type: "auto_retry_end",
+			success: false,
+			attempt,
+			finalError: "Retry cancelled",
+		});
 		this._resolveRetry();
 	}
 
@@ -330,6 +354,17 @@ export class RetryHandler {
 		}
 	}
 
+	private _scheduleContinue(retryGeneration: number): void {
+		if (this._continueTimeout) {
+			clearTimeout(this._continueTimeout);
+		}
+		this._continueTimeout = setTimeout(() => {
+			this._continueTimeout = undefined;
+			if (retryGeneration !== this._retryGeneration) return;
+			this._deps.agent.continue().catch(() => {});
+		}, 0);
+	}
+
 	private _findLastAssistantInMessages(
 		messages: Array<{ role: string } & Record<string, any>>,
 	): AssistantMessage | undefined {
@@ -361,7 +396,7 @@ export class RetryHandler {
 	 * base model (claude-opus-4-6) when the account lacks the long-context billing
 	 * entitlement. Returns true if the downgrade was initiated.
 	 */
-	private _tryLongContextDowngrade(message: AssistantMessage): boolean {
+	private _tryLongContextDowngrade(message: AssistantMessage, retryGeneration: number): boolean {
 		const currentModel = this._deps.getModel();
 		if (!currentModel) return false;
 
@@ -393,9 +428,7 @@ export class RetryHandler {
 			errorMessage: `${message.errorMessage} (long context downgrade)`,
 		});
 
-		setTimeout(() => {
-			this._deps.agent.continue().catch(() => {});
-		}, 0);
+		this._scheduleContinue(retryGeneration);
 
 		return true;
 	}