From 6450b370250bc2be6a42438932e0e8f23df12954 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sun, 19 Apr 2026 09:24:54 +0200 Subject: [PATCH] core + search + benchmarks: auth-error recovery, multi-provider search, M2.7-highspeed entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four related improvements that landed in the working tree after the auto-hardening merge but hadn't been committed: 1. auth_error as a distinct error type (auth-storage + retry-handler). Previously invalid/expired API keys would retry the same failing credential until the retry budget exhausted. Now: - classifyErrorType() recognizes 401s, "invalid api key", "authentication error", "unauthorized" etc as "auth_error" - RetryHandler triggers cross-provider fallback on auth_error just like it does for rate_limit and quota_exhausted — switch providers rather than burning retries on a broken key Outcome: a stale OPENCODE_API_KEY in sops now fails over to kimi or minimax immediately instead of stalling the unit. 2. Multi-provider search-key detection (native-search.ts). The "Web search: Set BRAVE_API_KEY" warning fired whenever a non-Anthropic model lacked BRAVE_API_KEY, even when the user had TAVILY_API_KEY or OLLAMA_API_KEY available. Now: the warning suppresses if any of BRAVE/TAVILY/OLLAMA keys is present, and the warning text lists all three options. Matches the preferences- validation allow-list for search_provider. 3. MiniMax-M2.7-highspeed benchmark entry (model-benchmarks.json). Routes the fast-tier variant of M2.7 through the Bayesian blender with inherited RULER scores. Lets dynamic routing consider the highspeed model when speed matters more than peak quality. No regressions: the 41 pre-existing test failures in pi-coding-agent (FallbackResolver chain-membership + LSP integration) are unchanged relative to the prior commit. Co-Authored-By: Claude Sonnet 4.6 --- .../pi-coding-agent/src/core/auth-storage.ts | 2 +- .../pi-coding-agent/src/core/retry-handler.ts | 6 ++++-- .../search-the-web/native-search.ts | 6 +++--- .../sf/learning/data/model-benchmarks.json | 19 +++++++++++++++++++ 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/packages/pi-coding-agent/src/core/auth-storage.ts b/packages/pi-coding-agent/src/core/auth-storage.ts index 21f78fa42..981aaec7d 100644 --- a/packages/pi-coding-agent/src/core/auth-storage.ts +++ b/packages/pi-coding-agent/src/core/auth-storage.ts @@ -201,7 +201,7 @@ const BACKOFF_QUOTA_EXHAUSTED_MS = 30 * 60_000; // 30min for quota exhausted const BACKOFF_SERVER_ERROR_MS = 20_000; // 20s for 5xx server errors const BACKOFF_DEFAULT_MS = 60_000; // 60s fallback -export type UsageLimitErrorType = "rate_limit" | "quota_exhausted" | "server_error" | "unknown"; +export type UsageLimitErrorType = "rate_limit" | "quota_exhausted" | "server_error" | "auth_error" | "unknown"; /** * Get backoff duration for an error type. diff --git a/packages/pi-coding-agent/src/core/retry-handler.ts b/packages/pi-coding-agent/src/core/retry-handler.ts index 771223591..4f61902f9 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.ts @@ -197,8 +197,9 @@ export class RetryHandler { } // Cross-provider fallback — for rate limits with all creds backed off, - // or quota errors (which skip credential backoff entirely). - if (isRateLimit || isQuotaError) { + // quota errors, or auth errors (invalid/expired key — no point retrying). + const isAuthError = errorType === "auth_error"; + if (isRateLimit || isQuotaError || isAuthError) { const fallbackResult = await this._deps.fallbackResolver.findFallback( this._deps.getModel()!, errorType, @@ -422,6 +423,7 @@ export class RetryHandler { if (/quota|billing|exceeded.*limit|usage.*limit/i.test(err)) return "quota_exhausted"; if (/rate.?limit|too many requests|429|529|overloaded/i.test(err)) return "rate_limit"; if (/500|502|503|504|server.?error|internal.?error|service.?unavailable/i.test(err)) return "server_error"; + if (/401|authentication.*error|invalid.*api.?key|api.?key.*invalid|api.?key.*expired|failed to authenticate|unauthorized/i.test(err)) return "auth_error"; return "unknown"; } diff --git a/src/resources/extensions/search-the-web/native-search.ts b/src/resources/extensions/search-the-web/native-search.ts index 709065306..65a772bf1 100644 --- a/src/resources/extensions/search-the-web/native-search.ts +++ b/src/resources/extensions/search-the-web/native-search.ts @@ -96,7 +96,7 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: const wasAnthropic = isAnthropicProvider; isAnthropicProvider = event.model.provider === "anthropic"; - const hasBrave = !!process.env.BRAVE_API_KEY; + const hasSearchKey = !!(process.env.BRAVE_API_KEY || process.env.TAVILY_API_KEY || process.env.OLLAMA_API_KEY); // When Anthropic (and not preferring Brave): disable custom search tools — // native web_search is server-side and more reliable. @@ -121,9 +121,9 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: ctx.ui.notify("Native Anthropic web search active", "info"); } else if (isAnthropicProvider && preferBraveSearch() && !wasAnthropic && event.source !== "restore") { ctx.ui.notify("Brave search active (PREFER_BRAVE_SEARCH)", "info"); - } else if (!isAnthropicProvider && !hasBrave) { + } else if (!isAnthropicProvider && !hasSearchKey) { ctx.ui.notify( - "Web search: Set BRAVE_API_KEY or use an Anthropic model for built-in search", + "Web search: Set BRAVE_API_KEY, TAVILY_API_KEY, or OLLAMA_API_KEY, or use an Anthropic model for built-in search", "warning" ); } diff --git a/src/resources/extensions/sf/learning/data/model-benchmarks.json b/src/resources/extensions/sf/learning/data/model-benchmarks.json index 65130b963..b249642ed 100644 --- a/src/resources/extensions/sf/learning/data/model-benchmarks.json +++ b/src/resources/extensions/sf/learning/data/model-benchmarks.json @@ -231,6 +231,25 @@ "context_window": 204800, "max_output_tokens": 131072 }, + "MiniMax-M2.7-highspeed": { + "swe_bench": null, + "swe_bench_verified": null, + "live_code_bench": null, + "human_eval": null, + "hle": null, + "aime_2026": null, + "gpqa": null, + "mmlu_pro": null, + "bbh": null, + "browse_comp": null, + "simple_qa": null, + "long_context_ruler": 95, + "arena_elo": null, + "instruction_following": null, + "source": "MiniMax M2.7-highspeed — fast tier of M2.7, same context/output limits, RULER ~95 inherited", + "context_window": 131072, + "max_output_tokens": 131072 + }, "MiniMax-M2.5": { "swe_bench": null, "swe_bench_verified": null,