chore(sf): throttle rerank-unavailable warnings to once per minute

When SF_LLM_GATEWAY_RERANK_MODEL is set but no rerank worker is online,
every memory query (per execute-task prompt assembly) would log
"[sf:memory-embeddings] WARN: llm-gateway /rerank unavailable (503)" —
several lines per turn, all redundant. The soft-degrade is expected in
this state.

Now the message logs at most once per 60s. Symmetric with the
runEmbeddingBackfill unavailable-throttle pattern. Both sad-path
loggers stay informative (the operator sees one line and knows the
worker is down) without drowning the journal.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-02 23:27:57 +02:00
parent 0ee94f21be
commit 5fda99bfae

View file

@ -32,6 +32,20 @@ export interface GatewayConfig {
const DEFAULT_TIMEOUT_MS = 30_000;
// Throttle the "rerank worker offline" warning so per-query log spam doesn't
// drown out other diagnostics when SF_LLM_GATEWAY_RERANK_MODEL is set but no
// worker is online — the soft-degrade is expected in that state.
let lastRerankUnavailableLogAt = 0;
const RERANK_UNAVAILABLE_LOG_THROTTLE_MS = 60_000;
function logRerankUnavailable(msg: string): void {
const now = Date.now();
if (now - lastRerankUnavailableLogAt < RERANK_UNAVAILABLE_LOG_THROTTLE_MS) {
return;
}
lastRerankUnavailableLogAt = now;
logWarning("memory-embeddings", msg);
}
const ENV_KEY = "SF_LLM_GATEWAY_KEY";
const ENV_URL = "SF_LLM_GATEWAY_URL";
const ENV_EMBED_MODEL = "SF_LLM_GATEWAY_EMBED_MODEL";
@ -141,8 +155,7 @@ export async function rerankCandidates(
signal: controller.signal,
});
if (res.status === 503 || res.status === 404) {
logWarning(
"memory-embeddings",
logRerankUnavailable(
`llm-gateway /rerank unavailable (${res.status}); falling back to non-reranked results`,
);
return null;
@ -152,8 +165,7 @@ export async function rerankCandidates(
// branch on res.ok before peeking at the body.
const bodyText = await res.text().catch(() => "");
if (/no worker.*rerank/i.test(bodyText)) {
logWarning(
"memory-embeddings",
logRerankUnavailable(
"llm-gateway /rerank: no worker capability available",
);
return null;