sf snapshot: uncommitted changes after 120m inactivity
This commit is contained in:
parent
abe34084a4
commit
362d766680
4 changed files with 87 additions and 18 deletions
|
|
@ -82,7 +82,7 @@ describe("FallbackResolver — findFallback", () => {
|
|||
assert.equal(result!.chainName, "coding");
|
||||
});
|
||||
|
||||
it("marks current provider as exhausted", async () => {
|
||||
it("marks current provider as exhausted for rate_limit errors", async () => {
|
||||
const { resolver, authStorage } = createResolver();
|
||||
await resolver.findFallback(zaiModel, "rate_limit");
|
||||
|
||||
|
|
@ -92,6 +92,18 @@ describe("FallbackResolver — findFallback", () => {
|
|||
assert.equal(fn.mock.calls[0][1], "rate_limit");
|
||||
});
|
||||
|
||||
it("does NOT mark provider as exhausted for quota_exhausted (per-model quota)", async () => {
|
||||
const { resolver, authStorage } = createResolver();
|
||||
await resolver.findFallback(zaiModel, "quota_exhausted");
|
||||
|
||||
const fn = authStorage.markProviderExhausted as any;
|
||||
assert.equal(
|
||||
fn.mock.calls.length,
|
||||
0,
|
||||
"quota_exhausted should not mark entire provider exhausted — other models may have quota",
|
||||
);
|
||||
});
|
||||
|
||||
it("skips backed-off providers", async () => {
|
||||
const { resolver } = createResolver({
|
||||
isProviderAvailable: (provider: string) => provider !== "alibaba",
|
||||
|
|
|
|||
|
|
@ -44,8 +44,13 @@ export class FallbackResolver {
|
|||
const { enabled, chains } = this.settingsManager.getFallbackSettings();
|
||||
if (!enabled) return null;
|
||||
|
||||
// Mark the current provider as exhausted at the provider level
|
||||
this.authStorage.markProviderExhausted(currentModel.provider, errorType);
|
||||
// Mark the current provider as exhausted at the provider level.
|
||||
// Skip for quota_exhausted — quotas are typically per-model (e.g.
|
||||
// google-gemini-cli's Code Assist per-model limits), so other models
|
||||
// from the same provider may still be available.
|
||||
if (errorType !== "quota_exhausted") {
|
||||
this.authStorage.markProviderExhausted(currentModel.provider, errorType);
|
||||
}
|
||||
|
||||
// Search all chains for one containing the current model
|
||||
for (const [chainName, entries] of Object.entries(chains)) {
|
||||
|
|
|
|||
|
|
@ -513,6 +513,19 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
|
|||
);
|
||||
assert.equal(handler.isRetryableError(msg), true);
|
||||
});
|
||||
|
||||
it("considers 'no capacity' provider errors as retryable", () => {
|
||||
const { deps } = createMockDeps();
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage(
|
||||
"No capacity available for model gemini-2.5-pro on the server",
|
||||
);
|
||||
assert.equal(
|
||||
handler.isRetryableError(msg),
|
||||
true,
|
||||
"no capacity errors should be retryable (triggers fallback)",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("third-party block claude-code fallback (#3772)", () => {
|
||||
|
|
@ -608,7 +621,7 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
|
|||
});
|
||||
|
||||
describe("quota wait before fallback", () => {
|
||||
it("waits for retryAfterMs before retrying same provider on quota error", async () => {
|
||||
it("waits for short retryAfterMs before retrying same provider on quota error", async () => {
|
||||
const { deps, emittedEvents, continueFn } = createMockDeps({
|
||||
model: createMockModel("google-gemini-cli", "gemini-2.5-pro"),
|
||||
fallbackResult: null,
|
||||
|
|
@ -617,14 +630,14 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
|
|||
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage(
|
||||
"You have exhausted your capacity on this model. Your quota will reset after 59s.",
|
||||
"You have exhausted your capacity on this model. Your quota will reset after 3s.",
|
||||
);
|
||||
(msg as any).retryAfterMs = 59000;
|
||||
(msg as any).retryAfterMs = 3000;
|
||||
|
||||
const result = await handler.handleRetryableError(msg);
|
||||
|
||||
// Should wait and retry, not immediately fail
|
||||
assert.equal(result, true, "should wait and retry on quota reset");
|
||||
// Should wait and retry for short resets (< 5s threshold)
|
||||
assert.equal(result, true, "should wait and retry on short quota reset");
|
||||
|
||||
const retryStart = emittedEvents.find(
|
||||
(e) => e.type === "auto_retry_start",
|
||||
|
|
@ -632,7 +645,7 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
|
|||
assert.ok(retryStart, "should emit auto_retry_start with wait");
|
||||
assert.equal(
|
||||
retryStart!.delayMs,
|
||||
59000,
|
||||
3000,
|
||||
"should use provider's retry-after delay",
|
||||
);
|
||||
assert.ok(
|
||||
|
|
@ -651,7 +664,7 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
|
|||
);
|
||||
});
|
||||
|
||||
it("falls through to fallback when retryAfterMs exceeds maxDelayMs", async () => {
|
||||
it("falls through to fallback when retryAfterMs exceeds short threshold", async () => {
|
||||
const fallbackModel = createMockModel("openai", "gpt-4o");
|
||||
const { deps, emittedEvents } = createMockDeps({
|
||||
model: createMockModel("google-gemini-cli", "gemini-2.5-pro"),
|
||||
|
|
@ -659,18 +672,18 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
|
|||
model: fallbackModel,
|
||||
reason: "cross-provider fallback",
|
||||
},
|
||||
retrySettings: { maxRetries: 5, baseDelayMs: 1000, maxDelayMs: 30000 },
|
||||
retrySettings: { maxRetries: 5, baseDelayMs: 1000, maxDelayMs: 60000 },
|
||||
});
|
||||
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage(
|
||||
"You have exhausted your capacity on this model. Your quota will reset after 5m.",
|
||||
"You have exhausted your capacity on this model. Your quota will reset after 59s.",
|
||||
);
|
||||
(msg as any).retryAfterMs = 300000; // 5 minutes, exceeds maxDelayMs
|
||||
(msg as any).retryAfterMs = 59000;
|
||||
|
||||
const result = await handler.handleRetryableError(msg);
|
||||
|
||||
// Should fall through to fallback since wait is too long
|
||||
// Should fall through to fallback since wait > 5s threshold
|
||||
assert.equal(result, true, "should fallback when quota reset is too long");
|
||||
|
||||
const switchEvent = emittedEvents.find(
|
||||
|
|
@ -764,6 +777,39 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
|
|||
);
|
||||
});
|
||||
|
||||
it("triggers fallback for 'no capacity' server errors", async () => {
|
||||
// "No capacity available" is a provider-side capacity issue,
|
||||
// not a credential/rate-limit problem. Should classify as rate_limit
|
||||
// to trigger the fallback chain.
|
||||
const fallbackModel = createMockModel("openai", "gpt-4o");
|
||||
const { deps, emittedEvents, findFallback } = createMockDeps({
|
||||
model: createMockModel("google-gemini-cli", "gemini-2.5-pro"),
|
||||
markUsageLimitReachedResult: false,
|
||||
fallbackResult: {
|
||||
model: fallbackModel,
|
||||
reason: "free-selection fallback",
|
||||
},
|
||||
});
|
||||
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage(
|
||||
"No capacity available for model gemini-2.5-pro on the server",
|
||||
);
|
||||
|
||||
const result = await handler.handleRetryableError(msg);
|
||||
|
||||
assert.equal(result, true, "should retry with fallback provider");
|
||||
assert.equal(
|
||||
findFallback.mock.calls.length,
|
||||
1,
|
||||
"should invoke fallback resolver for capacity errors",
|
||||
);
|
||||
assert.ok(
|
||||
emittedEvents.some((e) => e.type === "fallback_provider_switch"),
|
||||
"should emit fallback_provider_switch",
|
||||
);
|
||||
});
|
||||
|
||||
it("still tries cross-provider fallback for quota_exhausted without credential backoff", async () => {
|
||||
const fallbackModel = createMockModel("openai", "gpt-4o");
|
||||
const { deps, markUsageLimitReached, continueFn } = createMockDeps({
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ export class RetryHandler {
|
|||
// generated error from getApiKey() when credentials are in a backoff window.
|
||||
// Re-entering the retry handler for that message creates a cascade of empty
|
||||
// error entries in the session file, breaking resume (#3429).
|
||||
return /overloaded|rate.?limit|too many requests|402|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|connection.?lost|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|requires more credits|can only afford|insufficient credits|not enough credits|extra usage is required|(?:out of|no) extra usage|third.party.*draw from extra|third.party.*not.*available/i.test(
|
||||
return /overloaded|rate.?limit|too many requests|402|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|connection.?lost|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|requires more credits|can only afford|insufficient credits|not enough credits|extra usage is required|(?:out of|no) extra usage|third.party.*draw from extra|third.party.*not.*available|no capacity|capacity.*available/i.test(
|
||||
err,
|
||||
);
|
||||
}
|
||||
|
|
@ -211,9 +211,10 @@ export class RetryHandler {
|
|||
const isAuthError = errorType === "auth_error";
|
||||
if (isRateLimit || isQuotaError || isAuthError) {
|
||||
// For quota errors with a retry-after hint, wait before switching providers.
|
||||
// The quota may reset quickly (e.g. 59s), so waiting is often better than
|
||||
// switching to a potentially worse model.
|
||||
if (isQuotaError && message.retryAfterMs !== undefined && message.retryAfterMs > 0) {
|
||||
// Only wait if the reset is very short (< 5s); otherwise falling back to
|
||||
// another provider is faster and keeps auto-mode throughput up.
|
||||
const QUOTA_WAIT_THRESHOLD_MS = 5_000;
|
||||
if (isQuotaError && message.retryAfterMs !== undefined && message.retryAfterMs > 0 && message.retryAfterMs <= QUOTA_WAIT_THRESHOLD_MS) {
|
||||
const cap = settings.maxDelayMs > 0 ? settings.maxDelayMs : Infinity;
|
||||
if (message.retryAfterMs <= cap) {
|
||||
this._deps.emit({
|
||||
|
|
@ -486,6 +487,11 @@ export class RetryHandler {
|
|||
return "quota_exhausted";
|
||||
if (/rate.?limit|too many requests|429|529|overloaded/i.test(err))
|
||||
return "rate_limit";
|
||||
// Provider-side capacity/server load — the server has no available
|
||||
// capacity for this model (e.g. "No capacity available for model X").
|
||||
// Treat as rate_limit so the fallback chain kicks in immediately.
|
||||
if (/no capacity|capacity.*available|server.*busy|too busy/i.test(err))
|
||||
return "rate_limit";
|
||||
if (
|
||||
/500|502|503|504|server.?error|internal.?error|service.?unavailable/i.test(
|
||||
err,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue