fix(sf): handle auto-mode limit errors with model fallback (#4373)
Cherry-pick of gsd-build/gsd-2@0b7a05491 adapted for sf/ paths: - Expand RATE_LIMIT_RE to cover quota-window phrasing (hit your limit, usage limit, quota reached) - Rate-limit errors bypass transient-deferral early return so model fallback executes - Add setCurrentDispatchedModelId() to keep AUTO dashboard label in sync after fallback switch - 4 regression tests for classifier coverage and structural guards Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
b5e1beff8e
commit
9af9c0712d
4 changed files with 48 additions and 22 deletions
|
|
@ -466,6 +466,15 @@ export function getAutoModeStartModel(): {
|
|||
return s.autoModeStartModel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the dashboard-facing dispatched model label.
|
||||
* Used when runtime recovery switches models mid-unit (e.g. provider fallback)
|
||||
* so the AUTO box reflects the active model immediately.
|
||||
*/
|
||||
export function setCurrentDispatchedModelId(model: { provider: string; id: string } | null): void {
|
||||
s.currentDispatchedModelId = model ? `${model.provider}/${model.id}` : null;
|
||||
}
|
||||
|
||||
// Tool tracking — delegates to auto-tool-tracking.ts
|
||||
export function markToolStart(toolCallId: string, toolName?: string): void {
|
||||
_markToolStart(toolCallId, s.active, toolName);
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import type { ExtensionAPI, ExtensionContext } from "@singularity-forge/pi-codin
|
|||
|
||||
import { logWarning } from "../workflow-logger.js";
|
||||
import { checkAutoStartAfterDiscuss } from "../guided-flow.js";
|
||||
import { getAutoDashboardData, getAutoModeStartModel, isAutoActive, pauseAuto } from "../auto.js";
|
||||
import { getAutoDashboardData, getAutoModeStartModel, isAutoActive, pauseAuto, setCurrentDispatchedModelId } from "../auto.js";
|
||||
import { getNextFallbackModel, resolveModelWithFallbacksForUnit, resolvePersistModelChanges } from "../preferences.js";
|
||||
import { pauseAutoForProviderError } from "../provider-error-pause.js";
|
||||
import { isSessionSwitchInFlight, resolveAgentEnd } from "../auto-loop.js";
|
||||
|
|
@ -125,26 +125,11 @@ export async function handleAgentEnd(
|
|||
// ── 1. Classify using rawErrorMsg to avoid prose false-positives ────
|
||||
const cls = classifyError(rawErrorMsg, explicitRetryAfterMs);
|
||||
|
||||
// ── 1b. Defer to Core RetryHandler for transient errors ─────────────
|
||||
// The Core RetryHandler (agent-session.ts) processes retryable errors
|
||||
// AFTER this extension handler, in the same _processAgentEvent() call.
|
||||
// For transient errors (overloaded, rate limit, server), the Core will
|
||||
// retry in-context — same session, same conversation — which is strictly
|
||||
// better than our Layer 2 pause+resume (which creates a new session).
|
||||
//
|
||||
// If we react here AND the Core also retries, we race: pauseAuto tears
|
||||
// down the session while agent.continue() starts a new turn.
|
||||
//
|
||||
// Solution: Do nothing for transient errors. The Core RetryHandler
|
||||
// runs next in _processAgentEvent and will either:
|
||||
// a) Retry successfully → new agent_end (success) → we see it next time
|
||||
// b) Exhaust retries → the agent stays idle, autoLoop's unit timeout
|
||||
// or stuck detection handles it
|
||||
//
|
||||
// We do NOT call resolveAgentEnd here — that would unblock autoLoop
|
||||
// prematurely while the Core is still retrying in the same session.
|
||||
// We do NOT call pauseAuto — that would tear down the session.
|
||||
if (isTransient(cls)) {
|
||||
// ── 1b. Defer to Core RetryHandler for most transient errors ────────
|
||||
// Core retries transient failures in-session after this handler.
|
||||
// Keep that behavior for non-rate-limit classes to avoid pause/retry races,
|
||||
// but let rate-limit continue into model fallback logic below (#4373).
|
||||
if (isTransient(cls) && cls.kind !== "rate-limit") {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -203,6 +188,7 @@ export async function handleAgentEnd(
|
|||
if (modelToSet) {
|
||||
const ok = await pi.setModel(modelToSet, { persist: persistModelChanges });
|
||||
if (ok) {
|
||||
setCurrentDispatchedModelId({ provider: modelToSet.provider, id: modelToSet.id });
|
||||
ctx.ui.notify(`Model error${errorDetail}. Switched to fallback: ${nextModelId} and resuming.`, "warning");
|
||||
pi.sendMessage({ customType: "sf-auto-timeout-recovery", content: "Continue execution.", display: false }, { triggerTurn: true });
|
||||
return;
|
||||
|
|
@ -220,6 +206,7 @@ export async function handleAgentEnd(
|
|||
if (startModel) {
|
||||
const ok = await pi.setModel(startModel, { persist: persistModelChanges });
|
||||
if (ok) {
|
||||
setCurrentDispatchedModelId({ provider: startModel.provider, id: startModel.id });
|
||||
retryState.networkRetryCount = 0;
|
||||
retryState.currentRetryModelId = undefined;
|
||||
ctx.ui.notify(`Model error${errorDetail}. Restored session model: ${sessionModel.provider}/${sessionModel.id} and resuming.`, "warning");
|
||||
|
|
|
|||
|
|
@ -43,7 +43,8 @@ export function resetRetryState(state: RetryState): void {
|
|||
// ── Classification ──────────────────────────────────────────────────────────
|
||||
|
||||
const PERMANENT_RE = /auth|unauthorized|forbidden|invalid.*key|invalid.*api|billing|quota exceeded|account/i;
|
||||
const RATE_LIMIT_RE = /rate.?limit|too many requests|429/i;
|
||||
// Include provider-specific quota-window phrasing like "hit your limit", "usage limit", "quota reached"
|
||||
const RATE_LIMIT_RE = /rate.?limit|too many requests|429|hit your limit|usage limit|quota (?:reached|hit)|limit.*resets?/i;
|
||||
// OpenRouter affordability-style quota errors should be treated as transient
|
||||
// so core retry logic can lower maxTokens and continue in-session.
|
||||
const AFFORDABILITY_RE = /requires more credits|can only afford|insufficient credits|not enough credits|fewer max_tokens/i;
|
||||
|
|
|
|||
|
|
@ -32,6 +32,19 @@ test("classifyError detects rate limit from message", () => {
|
|||
assert.equal(result.kind, "rate-limit");
|
||||
});
|
||||
|
||||
test("classifyError treats Anthropic quota-window phrasing as transient rate-limit (#4373)", () => {
|
||||
const result = classifyError("You've hit your limit · resets soon");
|
||||
assert.ok(isTransient(result));
|
||||
assert.equal(result.kind, "rate-limit");
|
||||
assert.ok("retryAfterMs" in result && result.retryAfterMs === 60_000);
|
||||
});
|
||||
|
||||
test("classifyError treats usage-limit phrasing as transient rate-limit (#4373)", () => {
|
||||
const result = classifyError("usage limit reached for this workspace");
|
||||
assert.ok(isTransient(result));
|
||||
assert.equal(result.kind, "rate-limit");
|
||||
});
|
||||
|
||||
test("classifyError treats OpenRouter affordability errors as transient rate-limit class", () => {
|
||||
const result = classifyError(
|
||||
"402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.",
|
||||
|
|
@ -455,6 +468,22 @@ test("agent-end-recovery.ts resumes transient provider pauses through startAuto
|
|||
);
|
||||
});
|
||||
|
||||
test("agent-end-recovery.ts does not defer rate-limit errors to core retry handler before fallback (#4373)", () => {
|
||||
const src = readFileSync(join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"), "utf-8");
|
||||
assert.ok(
|
||||
src.includes('if (isTransient(cls) && cls.kind !== "rate-limit")'),
|
||||
"rate-limit errors must bypass transient core-retry deferral so fallback can execute (#4373)",
|
||||
);
|
||||
});
|
||||
|
||||
test("agent-end-recovery.ts updates dashboard dispatched model after fallback switch", () => {
|
||||
const src = readFileSync(join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"), "utf-8");
|
||||
assert.ok(
|
||||
src.includes("setCurrentDispatchedModelId"),
|
||||
"agent-end-recovery.ts should update currentDispatchedModelId when recovery switches model",
|
||||
);
|
||||
});
|
||||
|
||||
// ── Codex error extraction (#1166) ──────────────────────────────────────────
|
||||
|
||||
test("openai-codex-responses.ts extracts nested error fields", () => {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue