fix: add retry logic for transient network/auth failures instead of crashing (#365)

This commit is contained in:
Flux Labs 2026-03-14 09:50:35 -05:00 committed by GitHub
parent a7ac787165
commit f24f63f290
2 changed files with 49 additions and 29 deletions

View file

@ -2276,8 +2276,8 @@ export class AgentSession {
if (isContextOverflow(message, contextWindow)) return false;
const err = message.errorMessage;
// Match: overloaded_error, rate limit, 429, 500, 502, 503, 504, service unavailable, connection errors, fetch failed, terminated, retry delay exceeded
return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server error|internal error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay/i.test(
// Match: overloaded_error, rate limit, 429, 500, 502, 503, 504, service unavailable, connection errors, fetch failed, terminated, retry delay exceeded, network unavailable / auth expired (transient network failures)
return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server error|internal error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|temporarily backed off/i.test(
err,
);
}

View file

@ -317,42 +317,62 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
if (!resolvedProvider) {
throw new Error("No model selected");
}
const key = await modelRegistry.getApiKeyForProvider(resolvedProvider);
if (!key) {
// Check if credentials exist but are temporarily backed off
// (e.g., after a 429 quota exhaustion). Provide a specific error
// so the retry handler knows this is transient, not a permanent
// auth failure.
// Retry key resolution with backoff to handle transient network failures
// (e.g., OAuth token refresh failing due to brief connectivity loss).
const maxAttempts = 3;
const baseDelayMs = 2000;
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
const key = await modelRegistry.getApiKeyForProvider(resolvedProvider);
if (key) return key;
// On the last attempt, fall through to error handling below
if (attempt >= maxAttempts) break;
// Only retry if credentials exist (network issue) — no point retrying
// when there are genuinely no credentials configured.
const hasAuth = modelRegistry.authStorage.hasAuth(resolvedProvider);
if (hasAuth) {
throw new Error(
`All credentials for "${resolvedProvider}" are temporarily backed off due to rate limiting. ` +
`The request will be retried automatically when backoff expires.`,
);
}
const model = agent.state.model;
const isOAuth = model && modelRegistry.isUsingOAuth(model);
if (isOAuth) {
// If credentials exist but are all in a backoff window (quota / rate-limit),
// surface a specific message instead of the misleading "Authentication failed".
if (modelRegistry.authStorage.areAllCredentialsBackedOff(resolvedProvider)) {
throw new Error(
`Rate limit in effect for "${resolvedProvider}". ` +
`Please wait before retrying or switch to a different model.`,
);
}
if (!hasAuth && !isOAuth) break;
// Wait with exponential backoff before retrying
await new Promise(resolve => setTimeout(resolve, baseDelayMs * attempt));
}
// All retries exhausted — throw descriptive error
// Check if credentials exist but are temporarily backed off
// (e.g., after a 429 quota exhaustion). Provide a specific error
// so the retry handler knows this is transient, not a permanent
// auth failure.
const hasAuth = modelRegistry.authStorage.hasAuth(resolvedProvider);
if (hasAuth) {
throw new Error(
`All credentials for "${resolvedProvider}" are temporarily backed off due to rate limiting. ` +
`The request will be retried automatically when backoff expires.`,
);
}
const model = agent.state.model;
const isOAuth = model && modelRegistry.isUsingOAuth(model);
if (isOAuth) {
// If credentials exist but are all in a backoff window (quota / rate-limit),
// surface a specific message instead of the misleading "Authentication failed".
if (modelRegistry.authStorage.areAllCredentialsBackedOff(resolvedProvider)) {
throw new Error(
`Authentication failed for "${resolvedProvider}". ` +
`Credentials may have expired or network is unavailable. ` +
`Run '/login ${resolvedProvider}' to re-authenticate.`,
`Rate limit in effect for "${resolvedProvider}". ` +
`Please wait before retrying or switch to a different model.`,
);
}
throw new Error(
`No API key found for "${resolvedProvider}". ` +
`Set an API key environment variable or run '/login ${resolvedProvider}'.`,
`Authentication failed for "${resolvedProvider}". ` +
`Credentials may have expired or network is unavailable. ` +
`Run '/login ${resolvedProvider}' to re-authenticate.`,
);
}
return key;
throw new Error(
`No API key found for "${resolvedProvider}". ` +
`Set an API key environment variable or run '/login ${resolvedProvider}'.`,
);
},
});