diff --git a/packages/pi-coding-agent/src/core/auth-storage.ts b/packages/pi-coding-agent/src/core/auth-storage.ts index 21f78fa42..981aaec7d 100644 --- a/packages/pi-coding-agent/src/core/auth-storage.ts +++ b/packages/pi-coding-agent/src/core/auth-storage.ts @@ -201,7 +201,7 @@ const BACKOFF_QUOTA_EXHAUSTED_MS = 30 * 60_000; // 30min for quota exhausted const BACKOFF_SERVER_ERROR_MS = 20_000; // 20s for 5xx server errors const BACKOFF_DEFAULT_MS = 60_000; // 60s fallback -export type UsageLimitErrorType = "rate_limit" | "quota_exhausted" | "server_error" | "unknown"; +export type UsageLimitErrorType = "rate_limit" | "quota_exhausted" | "server_error" | "auth_error" | "unknown"; /** * Get backoff duration for an error type. diff --git a/packages/pi-coding-agent/src/core/retry-handler.ts b/packages/pi-coding-agent/src/core/retry-handler.ts index 771223591..4f61902f9 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.ts @@ -197,8 +197,9 @@ export class RetryHandler { } // Cross-provider fallback — for rate limits with all creds backed off, - // or quota errors (which skip credential backoff entirely). - if (isRateLimit || isQuotaError) { + // quota errors, or auth errors (invalid/expired key — no point retrying). + const isAuthError = errorType === "auth_error"; + if (isRateLimit || isQuotaError || isAuthError) { const fallbackResult = await this._deps.fallbackResolver.findFallback( this._deps.getModel()!, errorType, @@ -422,6 +423,7 @@ export class RetryHandler { if (/quota|billing|exceeded.*limit|usage.*limit/i.test(err)) return "quota_exhausted"; if (/rate.?limit|too many requests|429|529|overloaded/i.test(err)) return "rate_limit"; if (/500|502|503|504|server.?error|internal.?error|service.?unavailable/i.test(err)) return "server_error"; + if (/401|authentication.*error|invalid.*api.?key|api.?key.*invalid|api.?key.*expired|failed to authenticate|unauthorized/i.test(err)) return "auth_error"; return "unknown"; } diff --git a/src/resources/extensions/search-the-web/native-search.ts b/src/resources/extensions/search-the-web/native-search.ts index 709065306..65a772bf1 100644 --- a/src/resources/extensions/search-the-web/native-search.ts +++ b/src/resources/extensions/search-the-web/native-search.ts @@ -96,7 +96,7 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: const wasAnthropic = isAnthropicProvider; isAnthropicProvider = event.model.provider === "anthropic"; - const hasBrave = !!process.env.BRAVE_API_KEY; + const hasSearchKey = !!(process.env.BRAVE_API_KEY || process.env.TAVILY_API_KEY || process.env.OLLAMA_API_KEY); // When Anthropic (and not preferring Brave): disable custom search tools — // native web_search is server-side and more reliable. @@ -121,9 +121,9 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic: ctx.ui.notify("Native Anthropic web search active", "info"); } else if (isAnthropicProvider && preferBraveSearch() && !wasAnthropic && event.source !== "restore") { ctx.ui.notify("Brave search active (PREFER_BRAVE_SEARCH)", "info"); - } else if (!isAnthropicProvider && !hasBrave) { + } else if (!isAnthropicProvider && !hasSearchKey) { ctx.ui.notify( - "Web search: Set BRAVE_API_KEY or use an Anthropic model for built-in search", + "Web search: Set BRAVE_API_KEY, TAVILY_API_KEY, or OLLAMA_API_KEY, or use an Anthropic model for built-in search", "warning" ); } diff --git a/src/resources/extensions/sf/learning/data/model-benchmarks.json b/src/resources/extensions/sf/learning/data/model-benchmarks.json index 65130b963..b249642ed 100644 --- a/src/resources/extensions/sf/learning/data/model-benchmarks.json +++ b/src/resources/extensions/sf/learning/data/model-benchmarks.json @@ -231,6 +231,25 @@ "context_window": 204800, "max_output_tokens": 131072 }, + "MiniMax-M2.7-highspeed": { + "swe_bench": null, + "swe_bench_verified": null, + "live_code_bench": null, + "human_eval": null, + "hle": null, + "aime_2026": null, + "gpqa": null, + "mmlu_pro": null, + "bbh": null, + "browse_comp": null, + "simple_qa": null, + "long_context_ruler": 95, + "arena_elo": null, + "instruction_following": null, + "source": "MiniMax M2.7-highspeed — fast tier of M2.7, same context/output limits, RULER ~95 inherited", + "context_window": 131072, + "max_output_tokens": 131072 + }, "MiniMax-M2.5": { "swe_bench": null, "swe_bench_verified": null,