fix(gsd): resume auto-mode after transient provider pause (#2822)

Transient provider recovery previously sent a hidden continue message after the backoff timer elapsed, but the auto loop had already exited. Resume the paused session through startAuto() instead so the timer actually restarts auto-mode, and cover the resumed, duplicate-resume, and missing-base-path cases with regression tests.

Closes #2813
This commit is contained in:
mastertyko 2026-03-27 21:50:40 +01:00 committed by GitHub
parent b8d4f03747
commit 447a57ae0f
3 changed files with 156 additions and 4 deletions

View file

@ -7,6 +7,7 @@ import { pauseAutoForProviderError } from "../provider-error-pause.js";
import { isSessionSwitchInFlight, resolveAgentEnd } from "../auto-loop.js";
import { resolveModelId } from "../auto-model-selection.js";
import { clearDiscussionFlowState } from "./write-gate.js";
import { resumeAutoAfterProviderDelay } from "./provider-error-resume.js";
import {
classifyError,
createRetryState,
@ -44,10 +45,10 @@ async function pauseTransientWithBackoff(
retryAfterMs,
resume: allowAutoResume
? () => {
pi.sendMessage(
{ customType: "gsd-auto-timeout-recovery", content: "Continue execution — provider error recovery delay elapsed.", display: false },
{ triggerTurn: true },
);
void resumeAutoAfterProviderDelay(pi, ctx).catch((err) => {
const message = err instanceof Error ? err.message : String(err);
ctx.ui.notify(`Provider error recovery delay elapsed, but auto-mode failed to resume: ${message}`, "error");
});
}
: undefined,
});

View file

@ -0,0 +1,53 @@
import type {
ExtensionAPI,
ExtensionCommandContext,
ExtensionContext,
} from "@gsd/pi-coding-agent";
import { getAutoDashboardData, startAuto, type AutoDashboardData } from "../auto.js";
type AutoResumeSnapshot = Pick<AutoDashboardData, "active" | "paused" | "stepMode" | "basePath">;
export interface ProviderErrorResumeDeps {
getSnapshot(): AutoResumeSnapshot;
startAuto(
ctx: ExtensionCommandContext,
pi: ExtensionAPI,
base: string,
verboseMode: boolean,
options?: { step?: boolean },
): Promise<void>;
}
const defaultDeps: ProviderErrorResumeDeps = {
getSnapshot: () => getAutoDashboardData(),
startAuto,
};
export async function resumeAutoAfterProviderDelay(
pi: ExtensionAPI,
ctx: ExtensionContext,
deps: ProviderErrorResumeDeps = defaultDeps,
): Promise<"resumed" | "already-active" | "not-paused" | "missing-base"> {
const snapshot = deps.getSnapshot();
if (snapshot.active) return "already-active";
if (!snapshot.paused) return "not-paused";
if (!snapshot.basePath) {
ctx.ui.notify(
"Provider error recovery delay elapsed, but no paused auto-mode base path was available. Leaving auto-mode paused.",
"warning",
);
return "missing-base";
}
await deps.startAuto(
ctx as ExtensionCommandContext,
pi,
snapshot.basePath,
false,
{ step: snapshot.stepMode },
);
return "resumed";
}

View file

@ -12,6 +12,7 @@ import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
import { classifyError, isTransient, isTransientNetworkError } from "../error-classifier.ts";
import { pauseAutoForProviderError } from "../provider-error-pause.ts";
import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.ts";
import { getNextFallbackModel } from "../preferences.ts";
const __dirname = dirname(fileURLToPath(import.meta.url));
@ -268,6 +269,90 @@ test("pauseAutoForProviderError falls back to indefinite pause when not rate lim
]);
});
// ── resumeAutoAfterProviderDelay ────────────────────────────────────────────
test("resumeAutoAfterProviderDelay restarts paused auto-mode from the recorded base path", async () => {
const startCalls: Array<{ base: string; verboseMode: boolean; step?: boolean }> = [];
const result = await resumeAutoAfterProviderDelay(
{} as any,
{ ui: { notify() {} } } as any,
{
getSnapshot: () => ({
active: false,
paused: true,
stepMode: true,
basePath: "/tmp/project",
}),
startAuto: async (_ctx, _pi, base, verboseMode, options) => {
startCalls.push({ base, verboseMode, step: options?.step });
},
},
);
assert.equal(result, "resumed");
assert.deepEqual(startCalls, [
{ base: "/tmp/project", verboseMode: false, step: true },
]);
});
test("resumeAutoAfterProviderDelay does not double-start when auto-mode is already active", async () => {
let startCalls = 0;
const result = await resumeAutoAfterProviderDelay(
{} as any,
{ ui: { notify() {} } } as any,
{
getSnapshot: () => ({
active: true,
paused: false,
stepMode: false,
basePath: "/tmp/project",
}),
startAuto: async () => {
startCalls += 1;
},
},
);
assert.equal(result, "already-active");
assert.equal(startCalls, 0);
});
test("resumeAutoAfterProviderDelay leaves auto paused when no base path is available", async () => {
const notifications: Array<{ message: string; level: string }> = [];
let startCalls = 0;
const result = await resumeAutoAfterProviderDelay(
{} as any,
{
ui: {
notify(message: string, level?: string) {
notifications.push({ message, level: level ?? "info" });
},
},
} as any,
{
getSnapshot: () => ({
active: false,
paused: true,
stepMode: false,
basePath: "",
}),
startAuto: async () => {
startCalls += 1;
},
},
);
assert.equal(result, "missing-base");
assert.equal(startCalls, 0);
assert.deepEqual(notifications, [
{
message: "Provider error recovery delay elapsed, but no paused auto-mode base path was available. Leaving auto-mode paused.",
level: "warning",
},
]);
});
// ── Escalating backoff for transient errors (#1166) ─────────────────────────
test("agent-end-recovery.ts tracks consecutive transient errors for escalating backoff", () => {
@ -303,6 +388,19 @@ test("agent-end-recovery.ts applies escalating delay for repeated transient erro
);
});
test("agent-end-recovery.ts resumes transient provider pauses through startAuto instead of a hidden prompt", () => {
const src = readFileSync(join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"), "utf-8");
assert.ok(
src.includes("resumeAutoAfterProviderDelay"),
"agent-end-recovery.ts must resume paused auto-mode through resumeAutoAfterProviderDelay (#2813)",
);
assert.ok(
!src.includes('Continue execution — provider error recovery delay elapsed.'),
"transient provider resume must not rely on a hidden continue prompt (#2813)",
);
});
// ── Codex error extraction (#1166) ──────────────────────────────────────────
test("openai-codex-responses.ts extracts nested error fields", () => {