Merge pull request #3451 from deseltrus/fix/stale-retries-after-model-switch

fix: cancel stale retries after model switch
This commit is contained in:
Jeremy McSpadden 2026-04-04 18:27:33 -05:00 committed by GitHub
commit 12ef95024c
4 changed files with 104 additions and 19 deletions

View file

@ -0,0 +1,21 @@
import test from "node:test";
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { join } from "node:path";
const source = readFileSync(join(process.cwd(), "packages/pi-coding-agent/src/core/agent-session.ts"), "utf-8");
test("agent-session: explicit model switches cancel retry before applying new model", () => {
const start = source.indexOf("private async _applyModelChange(");
assert.ok(start >= 0, "missing _applyModelChange");
const window = source.slice(start, start + 900);
const abortIdx = window.indexOf("this._retryHandler.abortRetry();");
const setModelIdx = window.indexOf("this.agent.setModel(model);");
assert.ok(abortIdx >= 0, "_applyModelChange should cancel any in-flight retry");
assert.ok(setModelIdx >= 0, "_applyModelChange should set the new model");
assert.ok(
abortIdx < setModelIdx,
"retry cancellation must happen before applying the new model to prevent stale provider retries",
);
});

View file

@ -1633,6 +1633,10 @@ export class AgentSession {
options?: { persist?: boolean },
): Promise<void> {
const previousModel = this.model;
// Explicit model switches must cancel any in-flight retry loop from the
// previous provider/model. Otherwise stale provider backoff errors can
// continue to land after the user or runtime has already switched models.
this._retryHandler.abortRetry();
this.agent.setModel(model);
this.sessionManager.appendModelChange(model.provider, model.id);
if (options?.persist !== false) {

View file

@ -61,6 +61,11 @@ function createMockDeps(overrides?: {
markUsageLimitReachedResult?: boolean;
fallbackResult?: any;
findModelResult?: (provider: string, modelId: string) => Model<Api> | undefined;
retrySettings?: {
maxRetries?: number;
baseDelayMs?: number;
maxDelayMs?: number;
};
}): MockDeps {
const model = overrides?.model ?? createMockModel("anthropic", "claude-opus-4-6[1m]");
const emittedEvents: Array<Record<string, any>> = [];
@ -90,9 +95,9 @@ function createMockDeps(overrides?: {
getRetryEnabled: () => overrides?.retryEnabled ?? true,
getRetrySettings: () => ({
enabled: overrides?.retryEnabled ?? true,
maxRetries: 5,
baseDelayMs: 1000,
maxDelayMs: 30000,
maxRetries: overrides?.retrySettings?.maxRetries ?? 5,
baseDelayMs: overrides?.retrySettings?.baseDelayMs ?? 1000,
maxDelayMs: overrides?.retrySettings?.maxDelayMs ?? 30000,
}),
} as unknown as SettingsManager,
modelRegistry: {
@ -244,6 +249,28 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
});
});
describe("retry cancellation", () => {
it("cancels queued immediate continue callbacks when retry is aborted", async () => {
const { deps, emittedEvents, continueFn } = createMockDeps({
markUsageLimitReachedResult: true,
});
const handler = new RetryHandler(deps);
const msg = errorMessage("429 Too Many Requests");
const result = await handler.handleRetryableError(msg);
assert.equal(result, true, "retry should be initiated");
handler.abortRetry();
await new Promise((resolve) => setTimeout(resolve, 10));
assert.equal(continueFn.mock.calls.length, 0, "cancelled retry must not continue after explicit abort");
const endEvents = emittedEvents.filter((e) => e.type === "auto_retry_end");
assert.equal(endEvents.length, 1, "retry cancellation should emit a single auto_retry_end event");
assert.equal(endEvents[0]?.finalError, "Retry cancelled");
});
});
describe("isRetryableError", () => {
it("considers long-context entitlement error as retryable", () => {
const { deps } = createMockDeps();

View file

@ -37,6 +37,8 @@ export class RetryHandler {
private _retryAttempt = 0;
private _retryPromise: Promise<void> | undefined = undefined;
private _retryResolve: (() => void) | undefined = undefined;
private _retryGeneration = 0;
private _continueTimeout: ReturnType<typeof setTimeout> | undefined = undefined;
constructor(private readonly _deps: RetryHandlerDeps) {}
@ -134,6 +136,7 @@ export class RetryHandler {
}
// Try credential fallback before counting against retry budget.
const retryGeneration = this._retryGeneration;
if (this._deps.getModel() && message.errorMessage) {
const errorType = this._classifyErrorType(message.errorMessage);
const isCredentialError = errorType === "rate_limit" || errorType === "quota_exhausted";
@ -157,9 +160,7 @@ export class RetryHandler {
});
// Retry immediately with the next credential - don't increment _retryAttempt
setTimeout(() => {
this._deps.agent.continue().catch(() => {});
}, 0);
this._scheduleContinue(retryGeneration);
return true;
}
@ -193,9 +194,7 @@ export class RetryHandler {
});
// Retry immediately with fallback provider - don't increment _retryAttempt
setTimeout(() => {
this._deps.agent.continue().catch(() => {});
}, 0);
this._scheduleContinue(retryGeneration);
return true;
}
@ -203,7 +202,7 @@ export class RetryHandler {
// No fallback available either
if (errorType === "quota_exhausted") {
// Try long-context model downgrade ([1m] → base) before giving up
const downgraded = this._tryLongContextDowngrade(message);
const downgraded = this._tryLongContextDowngrade(message, retryGeneration);
if (downgraded) return true;
this._deps.emit({
@ -274,7 +273,12 @@ export class RetryHandler {
try {
await sleep(delayMs, this._retryAbortController.signal);
} catch {
// Aborted during sleep
// Aborted during sleep. If the retry generation already advanced, this
// cancellation was handled externally (e.g. explicit model switch).
if (retryGeneration !== this._retryGeneration) {
this._retryAbortController = undefined;
return false;
}
const attempt = this._retryAttempt;
this._retryAttempt = 0;
this._retryAbortController = undefined;
@ -290,16 +294,36 @@ export class RetryHandler {
this._retryAbortController = undefined;
// Retry via continue() - use setTimeout to break out of event handler chain
setTimeout(() => {
this._deps.agent.continue().catch(() => {});
}, 0);
this._scheduleContinue(retryGeneration);
return true;
}
/** Cancel in-progress retry */
abortRetry(): void {
this._retryAbortController?.abort();
const hadRetry =
this._retryPromise !== undefined
|| this._retryAbortController !== undefined
|| this._continueTimeout !== undefined;
if (!hadRetry) return;
const attempt = this._retryAttempt > 0 ? this._retryAttempt : 1;
this._retryGeneration++;
if (this._continueTimeout) {
clearTimeout(this._continueTimeout);
this._continueTimeout = undefined;
}
if (this._retryAbortController) {
this._retryAbortController.abort();
this._retryAbortController = undefined;
}
this._retryAttempt = 0;
this._deps.emit({
type: "auto_retry_end",
success: false,
attempt,
finalError: "Retry cancelled",
});
this._resolveRetry();
}
@ -330,6 +354,17 @@ export class RetryHandler {
}
}
private _scheduleContinue(retryGeneration: number): void {
if (this._continueTimeout) {
clearTimeout(this._continueTimeout);
}
this._continueTimeout = setTimeout(() => {
this._continueTimeout = undefined;
if (retryGeneration !== this._retryGeneration) return;
this._deps.agent.continue().catch(() => {});
}, 0);
}
private _findLastAssistantInMessages(
messages: Array<{ role: string } & Record<string, any>>,
): AssistantMessage | undefined {
@ -361,7 +396,7 @@ export class RetryHandler {
* base model (claude-opus-4-6) when the account lacks the long-context billing
* entitlement. Returns true if the downgrade was initiated.
*/
private _tryLongContextDowngrade(message: AssistantMessage): boolean {
private _tryLongContextDowngrade(message: AssistantMessage, retryGeneration: number): boolean {
const currentModel = this._deps.getModel();
if (!currentModel) return false;
@ -393,9 +428,7 @@ export class RetryHandler {
errorMessage: `${message.errorMessage} (long context downgrade)`,
});
setTimeout(() => {
this._deps.agent.continue().catch(() => {});
}, 0);
this._scheduleContinue(retryGeneration);
return true;
}