diff --git a/src/resources/extensions/sf/auto.ts b/src/resources/extensions/sf/auto.ts index 7f6aecc8c..a314e96f8 100644 --- a/src/resources/extensions/sf/auto.ts +++ b/src/resources/extensions/sf/auto.ts @@ -466,6 +466,15 @@ export function getAutoModeStartModel(): { return s.autoModeStartModel; } +/** + * Update the dashboard-facing dispatched model label. + * Used when runtime recovery switches models mid-unit (e.g. provider fallback) + * so the AUTO box reflects the active model immediately. + */ +export function setCurrentDispatchedModelId(model: { provider: string; id: string } | null): void { + s.currentDispatchedModelId = model ? `${model.provider}/${model.id}` : null; +} + // Tool tracking — delegates to auto-tool-tracking.ts export function markToolStart(toolCallId: string, toolName?: string): void { _markToolStart(toolCallId, s.active, toolName); diff --git a/src/resources/extensions/sf/bootstrap/agent-end-recovery.ts b/src/resources/extensions/sf/bootstrap/agent-end-recovery.ts index b23799694..642fc45ee 100644 --- a/src/resources/extensions/sf/bootstrap/agent-end-recovery.ts +++ b/src/resources/extensions/sf/bootstrap/agent-end-recovery.ts @@ -2,7 +2,7 @@ import type { ExtensionAPI, ExtensionContext } from "@singularity-forge/pi-codin import { logWarning } from "../workflow-logger.js"; import { checkAutoStartAfterDiscuss } from "../guided-flow.js"; -import { getAutoDashboardData, getAutoModeStartModel, isAutoActive, pauseAuto } from "../auto.js"; +import { getAutoDashboardData, getAutoModeStartModel, isAutoActive, pauseAuto, setCurrentDispatchedModelId } from "../auto.js"; import { getNextFallbackModel, resolveModelWithFallbacksForUnit, resolvePersistModelChanges } from "../preferences.js"; import { pauseAutoForProviderError } from "../provider-error-pause.js"; import { isSessionSwitchInFlight, resolveAgentEnd } from "../auto-loop.js"; @@ -125,26 +125,11 @@ export async function handleAgentEnd( // ── 1. Classify using rawErrorMsg to avoid prose false-positives ──── const cls = classifyError(rawErrorMsg, explicitRetryAfterMs); - // ── 1b. Defer to Core RetryHandler for transient errors ───────────── - // The Core RetryHandler (agent-session.ts) processes retryable errors - // AFTER this extension handler, in the same _processAgentEvent() call. - // For transient errors (overloaded, rate limit, server), the Core will - // retry in-context — same session, same conversation — which is strictly - // better than our Layer 2 pause+resume (which creates a new session). - // - // If we react here AND the Core also retries, we race: pauseAuto tears - // down the session while agent.continue() starts a new turn. - // - // Solution: Do nothing for transient errors. The Core RetryHandler - // runs next in _processAgentEvent and will either: - // a) Retry successfully → new agent_end (success) → we see it next time - // b) Exhaust retries → the agent stays idle, autoLoop's unit timeout - // or stuck detection handles it - // - // We do NOT call resolveAgentEnd here — that would unblock autoLoop - // prematurely while the Core is still retrying in the same session. - // We do NOT call pauseAuto — that would tear down the session. - if (isTransient(cls)) { + // ── 1b. Defer to Core RetryHandler for most transient errors ──────── + // Core retries transient failures in-session after this handler. + // Keep that behavior for non-rate-limit classes to avoid pause/retry races, + // but let rate-limit continue into model fallback logic below (#4373). + if (isTransient(cls) && cls.kind !== "rate-limit") { return; } @@ -203,6 +188,7 @@ export async function handleAgentEnd( if (modelToSet) { const ok = await pi.setModel(modelToSet, { persist: persistModelChanges }); if (ok) { + setCurrentDispatchedModelId({ provider: modelToSet.provider, id: modelToSet.id }); ctx.ui.notify(`Model error${errorDetail}. Switched to fallback: ${nextModelId} and resuming.`, "warning"); pi.sendMessage({ customType: "sf-auto-timeout-recovery", content: "Continue execution.", display: false }, { triggerTurn: true }); return; @@ -220,6 +206,7 @@ export async function handleAgentEnd( if (startModel) { const ok = await pi.setModel(startModel, { persist: persistModelChanges }); if (ok) { + setCurrentDispatchedModelId({ provider: startModel.provider, id: startModel.id }); retryState.networkRetryCount = 0; retryState.currentRetryModelId = undefined; ctx.ui.notify(`Model error${errorDetail}. Restored session model: ${sessionModel.provider}/${sessionModel.id} and resuming.`, "warning"); diff --git a/src/resources/extensions/sf/error-classifier.ts b/src/resources/extensions/sf/error-classifier.ts index 19b788407..ebe38a2b3 100644 --- a/src/resources/extensions/sf/error-classifier.ts +++ b/src/resources/extensions/sf/error-classifier.ts @@ -43,7 +43,8 @@ export function resetRetryState(state: RetryState): void { // ── Classification ────────────────────────────────────────────────────────── const PERMANENT_RE = /auth|unauthorized|forbidden|invalid.*key|invalid.*api|billing|quota exceeded|account/i; -const RATE_LIMIT_RE = /rate.?limit|too many requests|429/i; +// Include provider-specific quota-window phrasing like "hit your limit", "usage limit", "quota reached" +const RATE_LIMIT_RE = /rate.?limit|too many requests|429|hit your limit|usage limit|quota (?:reached|hit)|limit.*resets?/i; // OpenRouter affordability-style quota errors should be treated as transient // so core retry logic can lower maxTokens and continue in-session. const AFFORDABILITY_RE = /requires more credits|can only afford|insufficient credits|not enough credits|fewer max_tokens/i; diff --git a/src/resources/extensions/sf/tests/provider-errors.test.ts b/src/resources/extensions/sf/tests/provider-errors.test.ts index 2ae75a380..17c65dbb6 100644 --- a/src/resources/extensions/sf/tests/provider-errors.test.ts +++ b/src/resources/extensions/sf/tests/provider-errors.test.ts @@ -32,6 +32,19 @@ test("classifyError detects rate limit from message", () => { assert.equal(result.kind, "rate-limit"); }); +test("classifyError treats Anthropic quota-window phrasing as transient rate-limit (#4373)", () => { + const result = classifyError("You've hit your limit · resets soon"); + assert.ok(isTransient(result)); + assert.equal(result.kind, "rate-limit"); + assert.ok("retryAfterMs" in result && result.retryAfterMs === 60_000); +}); + +test("classifyError treats usage-limit phrasing as transient rate-limit (#4373)", () => { + const result = classifyError("usage limit reached for this workspace"); + assert.ok(isTransient(result)); + assert.equal(result.kind, "rate-limit"); +}); + test("classifyError treats OpenRouter affordability errors as transient rate-limit class", () => { const result = classifyError( "402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.", @@ -455,6 +468,22 @@ test("agent-end-recovery.ts resumes transient provider pauses through startAuto ); }); +test("agent-end-recovery.ts does not defer rate-limit errors to core retry handler before fallback (#4373)", () => { + const src = readFileSync(join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"), "utf-8"); + assert.ok( + src.includes('if (isTransient(cls) && cls.kind !== "rate-limit")'), + "rate-limit errors must bypass transient core-retry deferral so fallback can execute (#4373)", + ); +}); + +test("agent-end-recovery.ts updates dashboard dispatched model after fallback switch", () => { + const src = readFileSync(join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"), "utf-8"); + assert.ok( + src.includes("setCurrentDispatchedModelId"), + "agent-end-recovery.ts should update currentDispatchedModelId when recovery switches model", + ); +}); + // ── Codex error extraction (#1166) ────────────────────────────────────────── test("openai-codex-responses.ts extracts nested error fields", () => {