diff --git a/src/resources/extensions/sf/memory-embeddings-llm-gateway.ts b/src/resources/extensions/sf/memory-embeddings-llm-gateway.ts index 1a2f3016e..dbb57269f 100644 --- a/src/resources/extensions/sf/memory-embeddings-llm-gateway.ts +++ b/src/resources/extensions/sf/memory-embeddings-llm-gateway.ts @@ -32,6 +32,20 @@ export interface GatewayConfig { const DEFAULT_TIMEOUT_MS = 30_000; +// Throttle the "rerank worker offline" warning so per-query log spam doesn't +// drown out other diagnostics when SF_LLM_GATEWAY_RERANK_MODEL is set but no +// worker is online — the soft-degrade is expected in that state. +let lastRerankUnavailableLogAt = 0; +const RERANK_UNAVAILABLE_LOG_THROTTLE_MS = 60_000; +function logRerankUnavailable(msg: string): void { + const now = Date.now(); + if (now - lastRerankUnavailableLogAt < RERANK_UNAVAILABLE_LOG_THROTTLE_MS) { + return; + } + lastRerankUnavailableLogAt = now; + logWarning("memory-embeddings", msg); +} + const ENV_KEY = "SF_LLM_GATEWAY_KEY"; const ENV_URL = "SF_LLM_GATEWAY_URL"; const ENV_EMBED_MODEL = "SF_LLM_GATEWAY_EMBED_MODEL"; @@ -141,8 +155,7 @@ export async function rerankCandidates( signal: controller.signal, }); if (res.status === 503 || res.status === 404) { - logWarning( - "memory-embeddings", + logRerankUnavailable( `llm-gateway /rerank unavailable (${res.status}); falling back to non-reranked results`, ); return null; @@ -152,8 +165,7 @@ export async function rerankCandidates( // branch on res.ok before peeking at the body. const bodyText = await res.text().catch(() => ""); if (/no worker.*rerank/i.test(bodyText)) { - logWarning( - "memory-embeddings", + logRerankUnavailable( "llm-gateway /rerank: no worker capability available", ); return null;