From f7d067e4398108788ba22b713f68d132be232c3f Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Tue, 5 May 2026 22:27:33 +0200 Subject: [PATCH] feat: add sf memory status and backfill checks --- .../extensions/sf/bootstrap/register-hooks.js | 18 +- .../extensions/sf/commands-memory.js | 189 ++++++++++++++++++ .../sf/memory-embeddings-llm-gateway.js | 26 ++- .../tests/memory-backfill-lifecycle.test.mjs | 19 ++ .../memory-embeddings-llm-gateway.test.mjs | 4 + 5 files changed, 242 insertions(+), 14 deletions(-) create mode 100644 src/resources/extensions/sf/tests/memory-backfill-lifecycle.test.mjs diff --git a/src/resources/extensions/sf/bootstrap/register-hooks.js b/src/resources/extensions/sf/bootstrap/register-hooks.js index 86ebecb1f..2e1f94de3 100644 --- a/src/resources/extensions/sf/bootstrap/register-hooks.js +++ b/src/resources/extensions/sf/bootstrap/register-hooks.js @@ -92,6 +92,17 @@ import { let isFirstSession = true; let lastGeminiPreflightWarning; +export async function runAgentEndMemoryBackfill(runBackfill) { + try { + const runner = + runBackfill ?? + (await import("../memory-embeddings.js")).runEmbeddingBackfill; + await runner(); + } catch { + // Never break agent_end on backfill issues. + } +} + async function runSessionStartupDoctorFix(ctx) { if (process.env.SF_DISABLE_STARTUP_DOCTOR === "1") return; try { @@ -383,12 +394,7 @@ export function registerHooks(pi, ecosystemHandlers = []) { // gateway has an embed worker online, embed any memories that don't yet // have a vector. Bounded per invocation; logs once-per-minute when the // gateway is unavailable so we don't spam the journal. - try { - const { runEmbeddingBackfill } = await import("../memory-embeddings.js"); - await runEmbeddingBackfill(); - } catch { - // Never break agent_end on backfill issues. - } + await runAgentEndMemoryBackfill(); }); // Squash-merge quick-task branch back to the original branch after the // agent turn completes (#2668). cleanupQuickBranch is a no-op when no diff --git a/src/resources/extensions/sf/commands-memory.js b/src/resources/extensions/sf/commands-memory.js index 6a88bdabf..509f0a424 100644 --- a/src/resources/extensions/sf/commands-memory.js +++ b/src/resources/extensions/sf/commands-memory.js @@ -120,6 +120,9 @@ export async function handleMemory(args, ctx, pi) { case "stats": handleStats(ctx); return; + case "status": + await handleStatus(ctx); + return; case "sources": handleSources(ctx); return; @@ -144,6 +147,9 @@ export async function handleMemory(args, ctx, pi) { case "cap": handleCap(ctx, parsed.positional[0]); return; + case "backfill": + await handleBackfill(ctx); + return; default: ctx.ui.notify( `Unknown subcommand "${parsed.sub}". ${usage()}`, @@ -160,6 +166,7 @@ function usage() { " show print one memory", " forget supersede a memory", " stats counts by category / sources / edges", + " status live gateway, embedding coverage, and search-mode diagnostics", " sources list recent memory_sources", ' note "" ingest an inline note as a source', " ingest ingest a local file path or URL", @@ -168,6 +175,7 @@ function usage() { " import load a previous export (idempotent)", " decay run the stale-memory decay pass immediately", " cap [N] enforce the memory cap (default 50)", + " backfill embed queued memories now using the configured gateway", "", "Options: --tag a,b --scope project|global| --extract", ].join("\n"); @@ -352,6 +360,187 @@ function handleStats(ctx) { ctx.ui.notify(`Stats failed: ${err.message}`, "warning"); } } +async function handleStatus(ctx) { + const adapter = _getAdapter(); + if (!adapter) { + ctx.ui.notify("No SF database available.", "warning"); + return; + } + const dbStatus = readMemoryDbStatus(adapter); + const { loadGatewayConfigFromEnv, createGatewayEmbedFn, rerankCandidates } = + await import("./memory-embeddings-llm-gateway.js"); + const gatewayConfig = loadGatewayConfigFromEnv(); + let embeddingProbe = { ok: false, status: "not_configured", latencyMs: null }; + let rerankProbe = { ok: false, status: "not_configured", latencyMs: null }; + if (gatewayConfig) { + embeddingProbe = await probeEmbedding(gatewayConfig, createGatewayEmbedFn); + rerankProbe = await probeRerank(gatewayConfig, rerankCandidates); + } + const searchMode = !gatewayConfig + ? "static" + : embeddingProbe.ok && rerankProbe.ok + ? "embedding+rerank" + : embeddingProbe.ok + ? "embedding" + : "static"; + const lines = [ + "SF memory status", + "", + "Gateway:", + gatewayConfig ? ` key: yes (${gatewayConfig.keySource})` : " key: no", + ` url: ${gatewayConfig?.url ?? "n/a"}${gatewayConfig ? ` (${gatewayConfig.urlSource})` : ""}`, + ` embed model: ${gatewayConfig?.embeddingModel ?? "n/a"}`, + ` rerank model: ${gatewayConfig?.rerankModel ?? "n/a"}`, + "", + "Live probes:", + ` embeddings: ${formatProbe(embeddingProbe)}`, + ` rerank: ${formatProbe(rerankProbe)}`, + "", + "Coverage:", + ` active memories: ${dbStatus.activeCount}`, + ` embedded active: ${dbStatus.embeddedActive}`, + ` embedding coverage: ${dbStatus.coverage}`, + ` queued for backfill: ${dbStatus.unembeddedActive}`, + ` stored embeddings: ${dbStatus.embeddingsTotal}`, + "", + "Backfill:", + " trigger: agent_end", + " max per turn: 50", + " batch size: 16", + "", + `Effective search mode: ${searchMode}`, + ]; + ctx.ui.notify(lines.join("\n"), "info"); +} +async function handleBackfill(ctx) { + const adapter = _getAdapter(); + if (!adapter) { + ctx.ui.notify("No SF database available.", "warning"); + return; + } + const before = readMemoryDbStatus(adapter); + const { loadGatewayConfigFromEnv } = await import( + "./memory-embeddings-llm-gateway.js" + ); + const gatewayConfig = loadGatewayConfigFromEnv(); + if (!gatewayConfig) { + ctx.ui.notify( + "Memory backfill unavailable: no llm-gateway key configured.", + "warning", + ); + return; + } + const { runEmbeddingBackfill } = await import("./memory-embeddings.js"); + const embedded = await runEmbeddingBackfill(); + const after = readMemoryDbStatus(adapter); + ctx.ui.notify( + [ + `Memory backfill embedded ${embedded} memor${embedded === 1 ? "y" : "ies"}.`, + `Coverage: ${before.embeddedActive}/${before.activeCount} (${before.coverage}) -> ${after.embeddedActive}/${after.activeCount} (${after.coverage})`, + `Queued for backfill: ${after.unembeddedActive}`, + ].join("\n"), + embedded > 0 ? "info" : "warning", + ); +} +function readMemoryDbStatus(adapter) { + const activeCount = + adapter + .prepare( + "SELECT count(*) as cnt FROM memories WHERE superseded_by IS NULL", + ) + .get()?.["cnt"] ?? 0; + const embeddingsTotal = + adapter.prepare("SELECT count(*) as cnt FROM memory_embeddings").get()?.[ + "cnt" + ] ?? 0; + const embeddedActive = + adapter + .prepare(`SELECT count(*) as cnt FROM memory_embeddings e + JOIN memories m ON m.id = e.memory_id + WHERE m.superseded_by IS NULL`) + .get()?.["cnt"] ?? 0; + const unembeddedActive = Math.max(0, activeCount - embeddedActive); + const coverage = + activeCount > 0 + ? `${Math.round((embeddedActive / activeCount) * 100)}%` + : "n/a"; + return { + activeCount, + embeddedActive, + embeddingsTotal, + unembeddedActive, + coverage, + }; +} +async function probeEmbedding(gatewayConfig, createGatewayEmbedFn) { + const startedAt = Date.now(); + try { + const embedFn = createGatewayEmbedFn({ + ...gatewayConfig, + timeoutMs: 10_000, + }); + const vectors = await embedFn(["sf memory status embedding probe"]); + const dim = vectors[0]?.length ?? 0; + if (dim <= 0) { + return { + ok: false, + status: "empty_vector", + latencyMs: Date.now() - startedAt, + }; + } + return { + ok: true, + status: `ok (${dim} dims)`, + latencyMs: Date.now() - startedAt, + }; + } catch (error) { + return { + ok: false, + status: sanitizeProbeError(error), + latencyMs: Date.now() - startedAt, + }; + } +} +async function probeRerank(gatewayConfig, rerankCandidates) { + const startedAt = Date.now(); + try { + const scores = await rerankCandidates( + { ...gatewayConfig, timeoutMs: 10_000 }, + "sf memory status rerank probe", + [ + { id: "a", text: "sf memory uses gateway reranking" }, + { id: "b", text: "unrelated filler document" }, + ], + ); + if (!scores) { + return { + ok: false, + status: "unavailable", + latencyMs: Date.now() - startedAt, + }; + } + return { + ok: scores.length === 2, + status: `ok (${scores.length} scores)`, + latencyMs: Date.now() - startedAt, + }; + } catch (error) { + return { + ok: false, + status: sanitizeProbeError(error), + latencyMs: Date.now() - startedAt, + }; + } +} +function formatProbe(probe) { + const suffix = + typeof probe.latencyMs === "number" ? `, ${probe.latencyMs}ms` : ""; + return `${probe.ok ? "ok" : "not ok"} (${probe.status}${suffix})`; +} +function sanitizeProbeError(error) { + const message = error instanceof Error ? error.message : String(error); + return message.replace(/Bearer\s+[A-Za-z0-9._~+/=-]+/g, "Bearer [redacted]"); +} function handleExport(ctx, target) { if (!target) { ctx.ui.notify("Usage: /sf memory export ", "warning"); diff --git a/src/resources/extensions/sf/memory-embeddings-llm-gateway.js b/src/resources/extensions/sf/memory-embeddings-llm-gateway.js index 74a6b9792..d785682a9 100644 --- a/src/resources/extensions/sf/memory-embeddings-llm-gateway.js +++ b/src/resources/extensions/sf/memory-embeddings-llm-gateway.js @@ -39,24 +39,34 @@ const KEY_ALIASES = [ "LLM_MUX_API_KEY", ]; const URL_ALIASES = [ENV_URL, "LLM_GATEWAY_BASE_URL", "LLM_MUX_BASE_URL"]; -function firstEnvValue(keys) { +function firstEnvEntry(keys) { for (const key of keys) { const value = process.env[key]?.trim(); - if (value) return value; + if (value) return { key, value }; } - return ""; + return null; +} +function firstEnvValue(keys) { + return firstEnvEntry(keys)?.value ?? ""; } /** Read gateway config from env. Returns null when SF_LLM_GATEWAY_KEY is * missing — the gateway path is opt-in and silently absent otherwise. */ export function loadGatewayConfigFromEnv() { - const apiKey = firstEnvValue(KEY_ALIASES); - if (!apiKey) return null; - const url = - firstEnvValue(URL_ALIASES) || "https://llm-gateway.centralcloud.com/v1"; + const keyEntry = firstEnvEntry(KEY_ALIASES); + if (!keyEntry) return null; + const urlEntry = firstEnvEntry(URL_ALIASES); + const url = urlEntry?.value || "https://llm-gateway.centralcloud.com/v1"; const embeddingModel = firstEnvValue([ENV_EMBED_MODEL]) || DEFAULT_EMBEDDING_MODEL; const rerankModel = firstEnvValue([ENV_RERANK_MODEL]) || DEFAULT_RERANK_MODEL; - return { url, apiKey, embeddingModel, rerankModel }; + return { + url, + apiKey: keyEntry.value, + keySource: keyEntry.key, + urlSource: urlEntry?.key ?? "default", + embeddingModel, + rerankModel, + }; } /** Build an EmbedFn that posts to /embeddings with Bearer auth. * Returns Float32Array[] in the same order as the input. Throws on HTTP diff --git a/src/resources/extensions/sf/tests/memory-backfill-lifecycle.test.mjs b/src/resources/extensions/sf/tests/memory-backfill-lifecycle.test.mjs new file mode 100644 index 000000000..ab26b93f8 --- /dev/null +++ b/src/resources/extensions/sf/tests/memory-backfill-lifecycle.test.mjs @@ -0,0 +1,19 @@ +import assert from "node:assert/strict"; +import { test } from "vitest"; +import { runAgentEndMemoryBackfill } from "../bootstrap/register-hooks.js"; + +test("agent_end memory backfill invokes the embedding repair sweep", async () => { + let calls = 0; + + await runAgentEndMemoryBackfill(async () => { + calls += 1; + }); + + assert.equal(calls, 1); +}); + +test("agent_end memory backfill failures do not break the lifecycle", async () => { + await runAgentEndMemoryBackfill(async () => { + throw new Error("gateway offline"); + }); +}); diff --git a/src/resources/extensions/sf/tests/memory-embeddings-llm-gateway.test.mjs b/src/resources/extensions/sf/tests/memory-embeddings-llm-gateway.test.mjs index b9330458a..4f5b7bcb8 100644 --- a/src/resources/extensions/sf/tests/memory-embeddings-llm-gateway.test.mjs +++ b/src/resources/extensions/sf/tests/memory-embeddings-llm-gateway.test.mjs @@ -39,6 +39,8 @@ test("loadGatewayConfigFromEnv accepts SF-prefixed configuration", () => { assert.deepEqual(loadGatewayConfigFromEnv(), { url: "https://example.test/v1", apiKey: "sf-key", + keySource: "SF_LLM_GATEWAY_KEY", + urlSource: "SF_LLM_GATEWAY_URL", embeddingModel: "embed-model", rerankModel: "rerank-model", }); @@ -53,6 +55,8 @@ test("loadGatewayConfigFromEnv accepts llm-gateway shell aliases", () => { assert.deepEqual(loadGatewayConfigFromEnv(), { url: "https://llm-gateway.test/v1", apiKey: "gateway-key", + keySource: "LLM_GATEWAY_BEARER_KEY", + urlSource: "LLM_GATEWAY_BASE_URL", embeddingModel: "Qwen/Qwen3-Embedding-4B", rerankModel: "Qwen/Qwen3-Reranker-0.6B", });