From 5fda99bfae710d24f6ae52597940c7faa86eb322 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sat, 2 May 2026 23:27:57 +0200 Subject: [PATCH] chore(sf): throttle rerank-unavailable warnings to once per minute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When SF_LLM_GATEWAY_RERANK_MODEL is set but no rerank worker is online, every memory query (per execute-task prompt assembly) would log "[sf:memory-embeddings] WARN: llm-gateway /rerank unavailable (503)" — several lines per turn, all redundant. The soft-degrade is expected in this state. Now the message logs at most once per 60s. Symmetric with the runEmbeddingBackfill unavailable-throttle pattern. Both sad-path loggers stay informative (the operator sees one line and knows the worker is down) without drowning the journal. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../sf/memory-embeddings-llm-gateway.ts | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/resources/extensions/sf/memory-embeddings-llm-gateway.ts b/src/resources/extensions/sf/memory-embeddings-llm-gateway.ts index 1a2f3016e..dbb57269f 100644 --- a/src/resources/extensions/sf/memory-embeddings-llm-gateway.ts +++ b/src/resources/extensions/sf/memory-embeddings-llm-gateway.ts @@ -32,6 +32,20 @@ export interface GatewayConfig { const DEFAULT_TIMEOUT_MS = 30_000; +// Throttle the "rerank worker offline" warning so per-query log spam doesn't +// drown out other diagnostics when SF_LLM_GATEWAY_RERANK_MODEL is set but no +// worker is online — the soft-degrade is expected in that state. +let lastRerankUnavailableLogAt = 0; +const RERANK_UNAVAILABLE_LOG_THROTTLE_MS = 60_000; +function logRerankUnavailable(msg: string): void { + const now = Date.now(); + if (now - lastRerankUnavailableLogAt < RERANK_UNAVAILABLE_LOG_THROTTLE_MS) { + return; + } + lastRerankUnavailableLogAt = now; + logWarning("memory-embeddings", msg); +} + const ENV_KEY = "SF_LLM_GATEWAY_KEY"; const ENV_URL = "SF_LLM_GATEWAY_URL"; const ENV_EMBED_MODEL = "SF_LLM_GATEWAY_EMBED_MODEL"; @@ -141,8 +155,7 @@ export async function rerankCandidates( signal: controller.signal, }); if (res.status === 503 || res.status === 404) { - logWarning( - "memory-embeddings", + logRerankUnavailable( `llm-gateway /rerank unavailable (${res.status}); falling back to non-reranked results`, ); return null; @@ -152,8 +165,7 @@ export async function rerankCandidates( // branch on res.ok before peeking at the body. const bodyText = await res.text().catch(() => ""); if (/no worker.*rerank/i.test(bodyText)) { - logWarning( - "memory-embeddings", + logRerankUnavailable( "llm-gateway /rerank: no worker capability available", ); return null;