fix: classify terminated/connection errors as transient in provider error handler (#2309) (#2432)

classifyProviderError now recognizes terminated, connection reset, connection
refused, fetch failed, and other network errors as transient. These get a 15s
backoff delay and auto-resume instead of being treated as permanent failures
requiring manual intervention.

Fixes #2309

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Tom Boucher 2026-03-25 00:35:19 -04:00 committed by GitHub
parent 5d0c6311f1
commit 17ce3085f9
2 changed files with 58 additions and 0 deletions

View file

@ -19,6 +19,11 @@ export function classifyProviderError(errorMsg: string): {
const isRateLimit = /rate.?limit|too many requests|429/i.test(errorMsg);
const isServerError = /internal server error|500|502|503|overloaded|server_error|api_error|service.?unavailable/i.test(errorMsg);
// Connection/process errors — transient, auto-resume after brief backoff (#2309).
// These indicate the process was killed, the connection was reset, or a network
// blip occurred. They are NOT permanent failures.
const isConnectionError = /terminated|connection.?reset|connection.?refused|other side closed|fetch failed|network.?(?:is\s+)?unavailable|ECONNREFUSED|ECONNRESET|EPIPE/i.test(errorMsg);
// Permanent errors — never auto-resume
const isPermanent = /auth|unauthorized|forbidden|invalid.*key|invalid.*api|billing|quota exceeded|account/i.test(errorMsg);
@ -37,6 +42,10 @@ export function classifyProviderError(errorMsg: string): {
return { isTransient: true, isRateLimit: false, suggestedDelayMs: 30_000 }; // 30s for server errors
}
if (isConnectionError) {
return { isTransient: true, isRateLimit: false, suggestedDelayMs: 15_000 }; // 15s for connection errors
}
// Unknown error — treat as permanent (user reviews)
return { isTransient: false, isRateLimit: false, suggestedDelayMs: 0 };
}

View file

@ -0,0 +1,49 @@
/**
* terminated-transient.test.ts Regression test for #2309.
*
* classifyProviderError should treat 'terminated' errors (process killed,
* connection reset) as transient with auto-resume, not permanent.
*/
import test from "node:test";
import assert from "node:assert/strict";
import { classifyProviderError } from "../provider-error-pause.ts";
test("#2309: 'terminated' errors should be classified as transient", () => {
const result = classifyProviderError("terminated");
assert.equal(result.isTransient, true, "'terminated' should be transient");
assert.equal(result.isRateLimit, false, "'terminated' is not a rate limit");
assert.ok(result.suggestedDelayMs > 0, "'terminated' should have a retry delay");
});
test("#2309: 'connection reset' errors should be classified as transient", () => {
const result = classifyProviderError("connection reset by peer");
assert.equal(result.isTransient, true, "'connection reset' should be transient");
});
test("#2309: 'other side closed' errors should be classified as transient", () => {
const result = classifyProviderError("other side closed the connection");
assert.equal(result.isTransient, true, "'other side closed' should be transient");
});
test("#2309: 'fetch failed' errors should be classified as transient", () => {
const result = classifyProviderError("fetch failed: network error");
assert.equal(result.isTransient, true, "'fetch failed' should be transient");
});
test("#2309: 'connection refused' errors should be classified as transient", () => {
const result = classifyProviderError("ECONNREFUSED: connection refused");
assert.equal(result.isTransient, true, "'connection refused' should be transient");
});
test("#2309: permanent errors are still permanent", () => {
const authResult = classifyProviderError("unauthorized: invalid API key");
assert.equal(authResult.isTransient, false, "auth errors should stay permanent");
assert.equal(authResult.suggestedDelayMs, 0, "permanent errors have no delay");
});
test("#2309: rate limits are still transient", () => {
const rlResult = classifyProviderError("rate limit exceeded (429)");
assert.equal(rlResult.isTransient, true, "rate limits are still transient");
assert.equal(rlResult.isRateLimit, true, "rate limits are flagged as rate limits");
});