feat: add sf memory status and backfill checks

This commit is contained in:
Mikael Hugo 2026-05-05 22:27:33 +02:00
parent 305b4869ac
commit f7d067e439
5 changed files with 242 additions and 14 deletions

View file

@ -92,6 +92,17 @@ import {
let isFirstSession = true;
let lastGeminiPreflightWarning;
export async function runAgentEndMemoryBackfill(runBackfill) {
try {
const runner =
runBackfill ??
(await import("../memory-embeddings.js")).runEmbeddingBackfill;
await runner();
} catch {
// Never break agent_end on backfill issues.
}
}
async function runSessionStartupDoctorFix(ctx) {
if (process.env.SF_DISABLE_STARTUP_DOCTOR === "1") return;
try {
@ -383,12 +394,7 @@ export function registerHooks(pi, ecosystemHandlers = []) {
// gateway has an embed worker online, embed any memories that don't yet
// have a vector. Bounded per invocation; logs once-per-minute when the
// gateway is unavailable so we don't spam the journal.
try {
const { runEmbeddingBackfill } = await import("../memory-embeddings.js");
await runEmbeddingBackfill();
} catch {
// Never break agent_end on backfill issues.
}
await runAgentEndMemoryBackfill();
});
// Squash-merge quick-task branch back to the original branch after the
// agent turn completes (#2668). cleanupQuickBranch is a no-op when no

View file

@ -120,6 +120,9 @@ export async function handleMemory(args, ctx, pi) {
case "stats":
handleStats(ctx);
return;
case "status":
await handleStatus(ctx);
return;
case "sources":
handleSources(ctx);
return;
@ -144,6 +147,9 @@ export async function handleMemory(args, ctx, pi) {
case "cap":
handleCap(ctx, parsed.positional[0]);
return;
case "backfill":
await handleBackfill(ctx);
return;
default:
ctx.ui.notify(
`Unknown subcommand "${parsed.sub}". ${usage()}`,
@ -160,6 +166,7 @@ function usage() {
" show <MEM###> print one memory",
" forget <MEM###> supersede a memory",
" stats counts by category / sources / edges",
" status live gateway, embedding coverage, and search-mode diagnostics",
" sources list recent memory_sources",
' note "<text>" ingest an inline note as a source',
" ingest <path|url> ingest a local file path or URL",
@ -168,6 +175,7 @@ function usage() {
" import <path.json> load a previous export (idempotent)",
" decay run the stale-memory decay pass immediately",
" cap [N] enforce the memory cap (default 50)",
" backfill embed queued memories now using the configured gateway",
"",
"Options: --tag a,b --scope project|global|<custom> --extract",
].join("\n");
@ -352,6 +360,187 @@ function handleStats(ctx) {
ctx.ui.notify(`Stats failed: ${err.message}`, "warning");
}
}
async function handleStatus(ctx) {
const adapter = _getAdapter();
if (!adapter) {
ctx.ui.notify("No SF database available.", "warning");
return;
}
const dbStatus = readMemoryDbStatus(adapter);
const { loadGatewayConfigFromEnv, createGatewayEmbedFn, rerankCandidates } =
await import("./memory-embeddings-llm-gateway.js");
const gatewayConfig = loadGatewayConfigFromEnv();
let embeddingProbe = { ok: false, status: "not_configured", latencyMs: null };
let rerankProbe = { ok: false, status: "not_configured", latencyMs: null };
if (gatewayConfig) {
embeddingProbe = await probeEmbedding(gatewayConfig, createGatewayEmbedFn);
rerankProbe = await probeRerank(gatewayConfig, rerankCandidates);
}
const searchMode = !gatewayConfig
? "static"
: embeddingProbe.ok && rerankProbe.ok
? "embedding+rerank"
: embeddingProbe.ok
? "embedding"
: "static";
const lines = [
"SF memory status",
"",
"Gateway:",
gatewayConfig ? ` key: yes (${gatewayConfig.keySource})` : " key: no",
` url: ${gatewayConfig?.url ?? "n/a"}${gatewayConfig ? ` (${gatewayConfig.urlSource})` : ""}`,
` embed model: ${gatewayConfig?.embeddingModel ?? "n/a"}`,
` rerank model: ${gatewayConfig?.rerankModel ?? "n/a"}`,
"",
"Live probes:",
` embeddings: ${formatProbe(embeddingProbe)}`,
` rerank: ${formatProbe(rerankProbe)}`,
"",
"Coverage:",
` active memories: ${dbStatus.activeCount}`,
` embedded active: ${dbStatus.embeddedActive}`,
` embedding coverage: ${dbStatus.coverage}`,
` queued for backfill: ${dbStatus.unembeddedActive}`,
` stored embeddings: ${dbStatus.embeddingsTotal}`,
"",
"Backfill:",
" trigger: agent_end",
" max per turn: 50",
" batch size: 16",
"",
`Effective search mode: ${searchMode}`,
];
ctx.ui.notify(lines.join("\n"), "info");
}
async function handleBackfill(ctx) {
const adapter = _getAdapter();
if (!adapter) {
ctx.ui.notify("No SF database available.", "warning");
return;
}
const before = readMemoryDbStatus(adapter);
const { loadGatewayConfigFromEnv } = await import(
"./memory-embeddings-llm-gateway.js"
);
const gatewayConfig = loadGatewayConfigFromEnv();
if (!gatewayConfig) {
ctx.ui.notify(
"Memory backfill unavailable: no llm-gateway key configured.",
"warning",
);
return;
}
const { runEmbeddingBackfill } = await import("./memory-embeddings.js");
const embedded = await runEmbeddingBackfill();
const after = readMemoryDbStatus(adapter);
ctx.ui.notify(
[
`Memory backfill embedded ${embedded} memor${embedded === 1 ? "y" : "ies"}.`,
`Coverage: ${before.embeddedActive}/${before.activeCount} (${before.coverage}) -> ${after.embeddedActive}/${after.activeCount} (${after.coverage})`,
`Queued for backfill: ${after.unembeddedActive}`,
].join("\n"),
embedded > 0 ? "info" : "warning",
);
}
function readMemoryDbStatus(adapter) {
const activeCount =
adapter
.prepare(
"SELECT count(*) as cnt FROM memories WHERE superseded_by IS NULL",
)
.get()?.["cnt"] ?? 0;
const embeddingsTotal =
adapter.prepare("SELECT count(*) as cnt FROM memory_embeddings").get()?.[
"cnt"
] ?? 0;
const embeddedActive =
adapter
.prepare(`SELECT count(*) as cnt FROM memory_embeddings e
JOIN memories m ON m.id = e.memory_id
WHERE m.superseded_by IS NULL`)
.get()?.["cnt"] ?? 0;
const unembeddedActive = Math.max(0, activeCount - embeddedActive);
const coverage =
activeCount > 0
? `${Math.round((embeddedActive / activeCount) * 100)}%`
: "n/a";
return {
activeCount,
embeddedActive,
embeddingsTotal,
unembeddedActive,
coverage,
};
}
async function probeEmbedding(gatewayConfig, createGatewayEmbedFn) {
const startedAt = Date.now();
try {
const embedFn = createGatewayEmbedFn({
...gatewayConfig,
timeoutMs: 10_000,
});
const vectors = await embedFn(["sf memory status embedding probe"]);
const dim = vectors[0]?.length ?? 0;
if (dim <= 0) {
return {
ok: false,
status: "empty_vector",
latencyMs: Date.now() - startedAt,
};
}
return {
ok: true,
status: `ok (${dim} dims)`,
latencyMs: Date.now() - startedAt,
};
} catch (error) {
return {
ok: false,
status: sanitizeProbeError(error),
latencyMs: Date.now() - startedAt,
};
}
}
async function probeRerank(gatewayConfig, rerankCandidates) {
const startedAt = Date.now();
try {
const scores = await rerankCandidates(
{ ...gatewayConfig, timeoutMs: 10_000 },
"sf memory status rerank probe",
[
{ id: "a", text: "sf memory uses gateway reranking" },
{ id: "b", text: "unrelated filler document" },
],
);
if (!scores) {
return {
ok: false,
status: "unavailable",
latencyMs: Date.now() - startedAt,
};
}
return {
ok: scores.length === 2,
status: `ok (${scores.length} scores)`,
latencyMs: Date.now() - startedAt,
};
} catch (error) {
return {
ok: false,
status: sanitizeProbeError(error),
latencyMs: Date.now() - startedAt,
};
}
}
function formatProbe(probe) {
const suffix =
typeof probe.latencyMs === "number" ? `, ${probe.latencyMs}ms` : "";
return `${probe.ok ? "ok" : "not ok"} (${probe.status}${suffix})`;
}
function sanitizeProbeError(error) {
const message = error instanceof Error ? error.message : String(error);
return message.replace(/Bearer\s+[A-Za-z0-9._~+/=-]+/g, "Bearer [redacted]");
}
function handleExport(ctx, target) {
if (!target) {
ctx.ui.notify("Usage: /sf memory export <path.json>", "warning");

View file

@ -39,24 +39,34 @@ const KEY_ALIASES = [
"LLM_MUX_API_KEY",
];
const URL_ALIASES = [ENV_URL, "LLM_GATEWAY_BASE_URL", "LLM_MUX_BASE_URL"];
function firstEnvValue(keys) {
function firstEnvEntry(keys) {
for (const key of keys) {
const value = process.env[key]?.trim();
if (value) return value;
if (value) return { key, value };
}
return "";
return null;
}
function firstEnvValue(keys) {
return firstEnvEntry(keys)?.value ?? "";
}
/** Read gateway config from env. Returns null when SF_LLM_GATEWAY_KEY is
* missing the gateway path is opt-in and silently absent otherwise. */
export function loadGatewayConfigFromEnv() {
const apiKey = firstEnvValue(KEY_ALIASES);
if (!apiKey) return null;
const url =
firstEnvValue(URL_ALIASES) || "https://llm-gateway.centralcloud.com/v1";
const keyEntry = firstEnvEntry(KEY_ALIASES);
if (!keyEntry) return null;
const urlEntry = firstEnvEntry(URL_ALIASES);
const url = urlEntry?.value || "https://llm-gateway.centralcloud.com/v1";
const embeddingModel =
firstEnvValue([ENV_EMBED_MODEL]) || DEFAULT_EMBEDDING_MODEL;
const rerankModel = firstEnvValue([ENV_RERANK_MODEL]) || DEFAULT_RERANK_MODEL;
return { url, apiKey, embeddingModel, rerankModel };
return {
url,
apiKey: keyEntry.value,
keySource: keyEntry.key,
urlSource: urlEntry?.key ?? "default",
embeddingModel,
rerankModel,
};
}
/** Build an EmbedFn that posts to <url>/embeddings with Bearer auth.
* Returns Float32Array[] in the same order as the input. Throws on HTTP

View file

@ -0,0 +1,19 @@
import assert from "node:assert/strict";
import { test } from "vitest";
import { runAgentEndMemoryBackfill } from "../bootstrap/register-hooks.js";
test("agent_end memory backfill invokes the embedding repair sweep", async () => {
let calls = 0;
await runAgentEndMemoryBackfill(async () => {
calls += 1;
});
assert.equal(calls, 1);
});
test("agent_end memory backfill failures do not break the lifecycle", async () => {
await runAgentEndMemoryBackfill(async () => {
throw new Error("gateway offline");
});
});

View file

@ -39,6 +39,8 @@ test("loadGatewayConfigFromEnv accepts SF-prefixed configuration", () => {
assert.deepEqual(loadGatewayConfigFromEnv(), {
url: "https://example.test/v1",
apiKey: "sf-key",
keySource: "SF_LLM_GATEWAY_KEY",
urlSource: "SF_LLM_GATEWAY_URL",
embeddingModel: "embed-model",
rerankModel: "rerank-model",
});
@ -53,6 +55,8 @@ test("loadGatewayConfigFromEnv accepts llm-gateway shell aliases", () => {
assert.deepEqual(loadGatewayConfigFromEnv(), {
url: "https://llm-gateway.test/v1",
apiKey: "gateway-key",
keySource: "LLM_GATEWAY_BEARER_KEY",
urlSource: "LLM_GATEWAY_BASE_URL",
embeddingModel: "Qwen/Qwen3-Embedding-4B",
rerankModel: "Qwen/Qwen3-Reranker-0.6B",
});