sf snapshot: uncommitted changes after 120m inactivity

This commit is contained in:
Mikael Hugo 2026-05-04 14:46:50 +02:00
parent abe34084a4
commit 362d766680
4 changed files with 87 additions and 18 deletions

View file

@ -82,7 +82,7 @@ describe("FallbackResolver — findFallback", () => {
assert.equal(result!.chainName, "coding");
});
it("marks current provider as exhausted", async () => {
it("marks current provider as exhausted for rate_limit errors", async () => {
const { resolver, authStorage } = createResolver();
await resolver.findFallback(zaiModel, "rate_limit");
@ -92,6 +92,18 @@ describe("FallbackResolver — findFallback", () => {
assert.equal(fn.mock.calls[0][1], "rate_limit");
});
it("does NOT mark provider as exhausted for quota_exhausted (per-model quota)", async () => {
const { resolver, authStorage } = createResolver();
await resolver.findFallback(zaiModel, "quota_exhausted");
const fn = authStorage.markProviderExhausted as any;
assert.equal(
fn.mock.calls.length,
0,
"quota_exhausted should not mark entire provider exhausted — other models may have quota",
);
});
it("skips backed-off providers", async () => {
const { resolver } = createResolver({
isProviderAvailable: (provider: string) => provider !== "alibaba",

View file

@ -44,8 +44,13 @@ export class FallbackResolver {
const { enabled, chains } = this.settingsManager.getFallbackSettings();
if (!enabled) return null;
// Mark the current provider as exhausted at the provider level
this.authStorage.markProviderExhausted(currentModel.provider, errorType);
// Mark the current provider as exhausted at the provider level.
// Skip for quota_exhausted — quotas are typically per-model (e.g.
// google-gemini-cli's Code Assist per-model limits), so other models
// from the same provider may still be available.
if (errorType !== "quota_exhausted") {
this.authStorage.markProviderExhausted(currentModel.provider, errorType);
}
// Search all chains for one containing the current model
for (const [chainName, entries] of Object.entries(chains)) {

View file

@ -513,6 +513,19 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
);
assert.equal(handler.isRetryableError(msg), true);
});
it("considers 'no capacity' provider errors as retryable", () => {
const { deps } = createMockDeps();
const handler = new RetryHandler(deps);
const msg = errorMessage(
"No capacity available for model gemini-2.5-pro on the server",
);
assert.equal(
handler.isRetryableError(msg),
true,
"no capacity errors should be retryable (triggers fallback)",
);
});
});
describe("third-party block claude-code fallback (#3772)", () => {
@ -608,7 +621,7 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
});
describe("quota wait before fallback", () => {
it("waits for retryAfterMs before retrying same provider on quota error", async () => {
it("waits for short retryAfterMs before retrying same provider on quota error", async () => {
const { deps, emittedEvents, continueFn } = createMockDeps({
model: createMockModel("google-gemini-cli", "gemini-2.5-pro"),
fallbackResult: null,
@ -617,14 +630,14 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
const handler = new RetryHandler(deps);
const msg = errorMessage(
"You have exhausted your capacity on this model. Your quota will reset after 59s.",
"You have exhausted your capacity on this model. Your quota will reset after 3s.",
);
(msg as any).retryAfterMs = 59000;
(msg as any).retryAfterMs = 3000;
const result = await handler.handleRetryableError(msg);
// Should wait and retry, not immediately fail
assert.equal(result, true, "should wait and retry on quota reset");
// Should wait and retry for short resets (< 5s threshold)
assert.equal(result, true, "should wait and retry on short quota reset");
const retryStart = emittedEvents.find(
(e) => e.type === "auto_retry_start",
@ -632,7 +645,7 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
assert.ok(retryStart, "should emit auto_retry_start with wait");
assert.equal(
retryStart!.delayMs,
59000,
3000,
"should use provider's retry-after delay",
);
assert.ok(
@ -651,7 +664,7 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
);
});
it("falls through to fallback when retryAfterMs exceeds maxDelayMs", async () => {
it("falls through to fallback when retryAfterMs exceeds short threshold", async () => {
const fallbackModel = createMockModel("openai", "gpt-4o");
const { deps, emittedEvents } = createMockDeps({
model: createMockModel("google-gemini-cli", "gemini-2.5-pro"),
@ -659,18 +672,18 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
model: fallbackModel,
reason: "cross-provider fallback",
},
retrySettings: { maxRetries: 5, baseDelayMs: 1000, maxDelayMs: 30000 },
retrySettings: { maxRetries: 5, baseDelayMs: 1000, maxDelayMs: 60000 },
});
const handler = new RetryHandler(deps);
const msg = errorMessage(
"You have exhausted your capacity on this model. Your quota will reset after 5m.",
"You have exhausted your capacity on this model. Your quota will reset after 59s.",
);
(msg as any).retryAfterMs = 300000; // 5 minutes, exceeds maxDelayMs
(msg as any).retryAfterMs = 59000;
const result = await handler.handleRetryableError(msg);
// Should fall through to fallback since wait is too long
// Should fall through to fallback since wait > 5s threshold
assert.equal(result, true, "should fallback when quota reset is too long");
const switchEvent = emittedEvents.find(
@ -764,6 +777,39 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
);
});
it("triggers fallback for 'no capacity' server errors", async () => {
// "No capacity available" is a provider-side capacity issue,
// not a credential/rate-limit problem. Should classify as rate_limit
// to trigger the fallback chain.
const fallbackModel = createMockModel("openai", "gpt-4o");
const { deps, emittedEvents, findFallback } = createMockDeps({
model: createMockModel("google-gemini-cli", "gemini-2.5-pro"),
markUsageLimitReachedResult: false,
fallbackResult: {
model: fallbackModel,
reason: "free-selection fallback",
},
});
const handler = new RetryHandler(deps);
const msg = errorMessage(
"No capacity available for model gemini-2.5-pro on the server",
);
const result = await handler.handleRetryableError(msg);
assert.equal(result, true, "should retry with fallback provider");
assert.equal(
findFallback.mock.calls.length,
1,
"should invoke fallback resolver for capacity errors",
);
assert.ok(
emittedEvents.some((e) => e.type === "fallback_provider_switch"),
"should emit fallback_provider_switch",
);
});
it("still tries cross-provider fallback for quota_exhausted without credential backoff", async () => {
const fallbackModel = createMockModel("openai", "gpt-4o");
const { deps, markUsageLimitReached, continueFn } = createMockDeps({

View file

@ -119,7 +119,7 @@ export class RetryHandler {
// generated error from getApiKey() when credentials are in a backoff window.
// Re-entering the retry handler for that message creates a cascade of empty
// error entries in the session file, breaking resume (#3429).
return /overloaded|rate.?limit|too many requests|402|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|connection.?lost|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|requires more credits|can only afford|insufficient credits|not enough credits|extra usage is required|(?:out of|no) extra usage|third.party.*draw from extra|third.party.*not.*available/i.test(
return /overloaded|rate.?limit|too many requests|402|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|connection.?lost|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|requires more credits|can only afford|insufficient credits|not enough credits|extra usage is required|(?:out of|no) extra usage|third.party.*draw from extra|third.party.*not.*available|no capacity|capacity.*available/i.test(
err,
);
}
@ -211,9 +211,10 @@ export class RetryHandler {
const isAuthError = errorType === "auth_error";
if (isRateLimit || isQuotaError || isAuthError) {
// For quota errors with a retry-after hint, wait before switching providers.
// The quota may reset quickly (e.g. 59s), so waiting is often better than
// switching to a potentially worse model.
if (isQuotaError && message.retryAfterMs !== undefined && message.retryAfterMs > 0) {
// Only wait if the reset is very short (< 5s); otherwise falling back to
// another provider is faster and keeps auto-mode throughput up.
const QUOTA_WAIT_THRESHOLD_MS = 5_000;
if (isQuotaError && message.retryAfterMs !== undefined && message.retryAfterMs > 0 && message.retryAfterMs <= QUOTA_WAIT_THRESHOLD_MS) {
const cap = settings.maxDelayMs > 0 ? settings.maxDelayMs : Infinity;
if (message.retryAfterMs <= cap) {
this._deps.emit({
@ -486,6 +487,11 @@ export class RetryHandler {
return "quota_exhausted";
if (/rate.?limit|too many requests|429|529|overloaded/i.test(err))
return "rate_limit";
// Provider-side capacity/server load — the server has no available
// capacity for this model (e.g. "No capacity available for model X").
// Treat as rate_limit so the fallback chain kicks in immediately.
if (/no capacity|capacity.*available|server.*busy|too busy/i.test(err))
return "rate_limit";
if (
/500|502|503|504|server.?error|internal.?error|service.?unavailable/i.test(
err,