feat: add sf memory status and backfill checks
This commit is contained in:
parent
305b4869ac
commit
f7d067e439
5 changed files with 242 additions and 14 deletions
|
|
@ -92,6 +92,17 @@ import {
|
|||
let isFirstSession = true;
|
||||
let lastGeminiPreflightWarning;
|
||||
|
||||
export async function runAgentEndMemoryBackfill(runBackfill) {
|
||||
try {
|
||||
const runner =
|
||||
runBackfill ??
|
||||
(await import("../memory-embeddings.js")).runEmbeddingBackfill;
|
||||
await runner();
|
||||
} catch {
|
||||
// Never break agent_end on backfill issues.
|
||||
}
|
||||
}
|
||||
|
||||
async function runSessionStartupDoctorFix(ctx) {
|
||||
if (process.env.SF_DISABLE_STARTUP_DOCTOR === "1") return;
|
||||
try {
|
||||
|
|
@ -383,12 +394,7 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
// gateway has an embed worker online, embed any memories that don't yet
|
||||
// have a vector. Bounded per invocation; logs once-per-minute when the
|
||||
// gateway is unavailable so we don't spam the journal.
|
||||
try {
|
||||
const { runEmbeddingBackfill } = await import("../memory-embeddings.js");
|
||||
await runEmbeddingBackfill();
|
||||
} catch {
|
||||
// Never break agent_end on backfill issues.
|
||||
}
|
||||
await runAgentEndMemoryBackfill();
|
||||
});
|
||||
// Squash-merge quick-task branch back to the original branch after the
|
||||
// agent turn completes (#2668). cleanupQuickBranch is a no-op when no
|
||||
|
|
|
|||
|
|
@ -120,6 +120,9 @@ export async function handleMemory(args, ctx, pi) {
|
|||
case "stats":
|
||||
handleStats(ctx);
|
||||
return;
|
||||
case "status":
|
||||
await handleStatus(ctx);
|
||||
return;
|
||||
case "sources":
|
||||
handleSources(ctx);
|
||||
return;
|
||||
|
|
@ -144,6 +147,9 @@ export async function handleMemory(args, ctx, pi) {
|
|||
case "cap":
|
||||
handleCap(ctx, parsed.positional[0]);
|
||||
return;
|
||||
case "backfill":
|
||||
await handleBackfill(ctx);
|
||||
return;
|
||||
default:
|
||||
ctx.ui.notify(
|
||||
`Unknown subcommand "${parsed.sub}". ${usage()}`,
|
||||
|
|
@ -160,6 +166,7 @@ function usage() {
|
|||
" show <MEM###> print one memory",
|
||||
" forget <MEM###> supersede a memory",
|
||||
" stats counts by category / sources / edges",
|
||||
" status live gateway, embedding coverage, and search-mode diagnostics",
|
||||
" sources list recent memory_sources",
|
||||
' note "<text>" ingest an inline note as a source',
|
||||
" ingest <path|url> ingest a local file path or URL",
|
||||
|
|
@ -168,6 +175,7 @@ function usage() {
|
|||
" import <path.json> load a previous export (idempotent)",
|
||||
" decay run the stale-memory decay pass immediately",
|
||||
" cap [N] enforce the memory cap (default 50)",
|
||||
" backfill embed queued memories now using the configured gateway",
|
||||
"",
|
||||
"Options: --tag a,b --scope project|global|<custom> --extract",
|
||||
].join("\n");
|
||||
|
|
@ -352,6 +360,187 @@ function handleStats(ctx) {
|
|||
ctx.ui.notify(`Stats failed: ${err.message}`, "warning");
|
||||
}
|
||||
}
|
||||
async function handleStatus(ctx) {
|
||||
const adapter = _getAdapter();
|
||||
if (!adapter) {
|
||||
ctx.ui.notify("No SF database available.", "warning");
|
||||
return;
|
||||
}
|
||||
const dbStatus = readMemoryDbStatus(adapter);
|
||||
const { loadGatewayConfigFromEnv, createGatewayEmbedFn, rerankCandidates } =
|
||||
await import("./memory-embeddings-llm-gateway.js");
|
||||
const gatewayConfig = loadGatewayConfigFromEnv();
|
||||
let embeddingProbe = { ok: false, status: "not_configured", latencyMs: null };
|
||||
let rerankProbe = { ok: false, status: "not_configured", latencyMs: null };
|
||||
if (gatewayConfig) {
|
||||
embeddingProbe = await probeEmbedding(gatewayConfig, createGatewayEmbedFn);
|
||||
rerankProbe = await probeRerank(gatewayConfig, rerankCandidates);
|
||||
}
|
||||
const searchMode = !gatewayConfig
|
||||
? "static"
|
||||
: embeddingProbe.ok && rerankProbe.ok
|
||||
? "embedding+rerank"
|
||||
: embeddingProbe.ok
|
||||
? "embedding"
|
||||
: "static";
|
||||
const lines = [
|
||||
"SF memory status",
|
||||
"",
|
||||
"Gateway:",
|
||||
gatewayConfig ? ` key: yes (${gatewayConfig.keySource})` : " key: no",
|
||||
` url: ${gatewayConfig?.url ?? "n/a"}${gatewayConfig ? ` (${gatewayConfig.urlSource})` : ""}`,
|
||||
` embed model: ${gatewayConfig?.embeddingModel ?? "n/a"}`,
|
||||
` rerank model: ${gatewayConfig?.rerankModel ?? "n/a"}`,
|
||||
"",
|
||||
"Live probes:",
|
||||
` embeddings: ${formatProbe(embeddingProbe)}`,
|
||||
` rerank: ${formatProbe(rerankProbe)}`,
|
||||
"",
|
||||
"Coverage:",
|
||||
` active memories: ${dbStatus.activeCount}`,
|
||||
` embedded active: ${dbStatus.embeddedActive}`,
|
||||
` embedding coverage: ${dbStatus.coverage}`,
|
||||
` queued for backfill: ${dbStatus.unembeddedActive}`,
|
||||
` stored embeddings: ${dbStatus.embeddingsTotal}`,
|
||||
"",
|
||||
"Backfill:",
|
||||
" trigger: agent_end",
|
||||
" max per turn: 50",
|
||||
" batch size: 16",
|
||||
"",
|
||||
`Effective search mode: ${searchMode}`,
|
||||
];
|
||||
ctx.ui.notify(lines.join("\n"), "info");
|
||||
}
|
||||
async function handleBackfill(ctx) {
|
||||
const adapter = _getAdapter();
|
||||
if (!adapter) {
|
||||
ctx.ui.notify("No SF database available.", "warning");
|
||||
return;
|
||||
}
|
||||
const before = readMemoryDbStatus(adapter);
|
||||
const { loadGatewayConfigFromEnv } = await import(
|
||||
"./memory-embeddings-llm-gateway.js"
|
||||
);
|
||||
const gatewayConfig = loadGatewayConfigFromEnv();
|
||||
if (!gatewayConfig) {
|
||||
ctx.ui.notify(
|
||||
"Memory backfill unavailable: no llm-gateway key configured.",
|
||||
"warning",
|
||||
);
|
||||
return;
|
||||
}
|
||||
const { runEmbeddingBackfill } = await import("./memory-embeddings.js");
|
||||
const embedded = await runEmbeddingBackfill();
|
||||
const after = readMemoryDbStatus(adapter);
|
||||
ctx.ui.notify(
|
||||
[
|
||||
`Memory backfill embedded ${embedded} memor${embedded === 1 ? "y" : "ies"}.`,
|
||||
`Coverage: ${before.embeddedActive}/${before.activeCount} (${before.coverage}) -> ${after.embeddedActive}/${after.activeCount} (${after.coverage})`,
|
||||
`Queued for backfill: ${after.unembeddedActive}`,
|
||||
].join("\n"),
|
||||
embedded > 0 ? "info" : "warning",
|
||||
);
|
||||
}
|
||||
function readMemoryDbStatus(adapter) {
|
||||
const activeCount =
|
||||
adapter
|
||||
.prepare(
|
||||
"SELECT count(*) as cnt FROM memories WHERE superseded_by IS NULL",
|
||||
)
|
||||
.get()?.["cnt"] ?? 0;
|
||||
const embeddingsTotal =
|
||||
adapter.prepare("SELECT count(*) as cnt FROM memory_embeddings").get()?.[
|
||||
"cnt"
|
||||
] ?? 0;
|
||||
const embeddedActive =
|
||||
adapter
|
||||
.prepare(`SELECT count(*) as cnt FROM memory_embeddings e
|
||||
JOIN memories m ON m.id = e.memory_id
|
||||
WHERE m.superseded_by IS NULL`)
|
||||
.get()?.["cnt"] ?? 0;
|
||||
const unembeddedActive = Math.max(0, activeCount - embeddedActive);
|
||||
const coverage =
|
||||
activeCount > 0
|
||||
? `${Math.round((embeddedActive / activeCount) * 100)}%`
|
||||
: "n/a";
|
||||
return {
|
||||
activeCount,
|
||||
embeddedActive,
|
||||
embeddingsTotal,
|
||||
unembeddedActive,
|
||||
coverage,
|
||||
};
|
||||
}
|
||||
async function probeEmbedding(gatewayConfig, createGatewayEmbedFn) {
|
||||
const startedAt = Date.now();
|
||||
try {
|
||||
const embedFn = createGatewayEmbedFn({
|
||||
...gatewayConfig,
|
||||
timeoutMs: 10_000,
|
||||
});
|
||||
const vectors = await embedFn(["sf memory status embedding probe"]);
|
||||
const dim = vectors[0]?.length ?? 0;
|
||||
if (dim <= 0) {
|
||||
return {
|
||||
ok: false,
|
||||
status: "empty_vector",
|
||||
latencyMs: Date.now() - startedAt,
|
||||
};
|
||||
}
|
||||
return {
|
||||
ok: true,
|
||||
status: `ok (${dim} dims)`,
|
||||
latencyMs: Date.now() - startedAt,
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
ok: false,
|
||||
status: sanitizeProbeError(error),
|
||||
latencyMs: Date.now() - startedAt,
|
||||
};
|
||||
}
|
||||
}
|
||||
async function probeRerank(gatewayConfig, rerankCandidates) {
|
||||
const startedAt = Date.now();
|
||||
try {
|
||||
const scores = await rerankCandidates(
|
||||
{ ...gatewayConfig, timeoutMs: 10_000 },
|
||||
"sf memory status rerank probe",
|
||||
[
|
||||
{ id: "a", text: "sf memory uses gateway reranking" },
|
||||
{ id: "b", text: "unrelated filler document" },
|
||||
],
|
||||
);
|
||||
if (!scores) {
|
||||
return {
|
||||
ok: false,
|
||||
status: "unavailable",
|
||||
latencyMs: Date.now() - startedAt,
|
||||
};
|
||||
}
|
||||
return {
|
||||
ok: scores.length === 2,
|
||||
status: `ok (${scores.length} scores)`,
|
||||
latencyMs: Date.now() - startedAt,
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
ok: false,
|
||||
status: sanitizeProbeError(error),
|
||||
latencyMs: Date.now() - startedAt,
|
||||
};
|
||||
}
|
||||
}
|
||||
function formatProbe(probe) {
|
||||
const suffix =
|
||||
typeof probe.latencyMs === "number" ? `, ${probe.latencyMs}ms` : "";
|
||||
return `${probe.ok ? "ok" : "not ok"} (${probe.status}${suffix})`;
|
||||
}
|
||||
function sanitizeProbeError(error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return message.replace(/Bearer\s+[A-Za-z0-9._~+/=-]+/g, "Bearer [redacted]");
|
||||
}
|
||||
function handleExport(ctx, target) {
|
||||
if (!target) {
|
||||
ctx.ui.notify("Usage: /sf memory export <path.json>", "warning");
|
||||
|
|
|
|||
|
|
@ -39,24 +39,34 @@ const KEY_ALIASES = [
|
|||
"LLM_MUX_API_KEY",
|
||||
];
|
||||
const URL_ALIASES = [ENV_URL, "LLM_GATEWAY_BASE_URL", "LLM_MUX_BASE_URL"];
|
||||
function firstEnvValue(keys) {
|
||||
function firstEnvEntry(keys) {
|
||||
for (const key of keys) {
|
||||
const value = process.env[key]?.trim();
|
||||
if (value) return value;
|
||||
if (value) return { key, value };
|
||||
}
|
||||
return "";
|
||||
return null;
|
||||
}
|
||||
function firstEnvValue(keys) {
|
||||
return firstEnvEntry(keys)?.value ?? "";
|
||||
}
|
||||
/** Read gateway config from env. Returns null when SF_LLM_GATEWAY_KEY is
|
||||
* missing — the gateway path is opt-in and silently absent otherwise. */
|
||||
export function loadGatewayConfigFromEnv() {
|
||||
const apiKey = firstEnvValue(KEY_ALIASES);
|
||||
if (!apiKey) return null;
|
||||
const url =
|
||||
firstEnvValue(URL_ALIASES) || "https://llm-gateway.centralcloud.com/v1";
|
||||
const keyEntry = firstEnvEntry(KEY_ALIASES);
|
||||
if (!keyEntry) return null;
|
||||
const urlEntry = firstEnvEntry(URL_ALIASES);
|
||||
const url = urlEntry?.value || "https://llm-gateway.centralcloud.com/v1";
|
||||
const embeddingModel =
|
||||
firstEnvValue([ENV_EMBED_MODEL]) || DEFAULT_EMBEDDING_MODEL;
|
||||
const rerankModel = firstEnvValue([ENV_RERANK_MODEL]) || DEFAULT_RERANK_MODEL;
|
||||
return { url, apiKey, embeddingModel, rerankModel };
|
||||
return {
|
||||
url,
|
||||
apiKey: keyEntry.value,
|
||||
keySource: keyEntry.key,
|
||||
urlSource: urlEntry?.key ?? "default",
|
||||
embeddingModel,
|
||||
rerankModel,
|
||||
};
|
||||
}
|
||||
/** Build an EmbedFn that posts to <url>/embeddings with Bearer auth.
|
||||
* Returns Float32Array[] in the same order as the input. Throws on HTTP
|
||||
|
|
|
|||
|
|
@ -0,0 +1,19 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { test } from "vitest";
|
||||
import { runAgentEndMemoryBackfill } from "../bootstrap/register-hooks.js";
|
||||
|
||||
test("agent_end memory backfill invokes the embedding repair sweep", async () => {
|
||||
let calls = 0;
|
||||
|
||||
await runAgentEndMemoryBackfill(async () => {
|
||||
calls += 1;
|
||||
});
|
||||
|
||||
assert.equal(calls, 1);
|
||||
});
|
||||
|
||||
test("agent_end memory backfill failures do not break the lifecycle", async () => {
|
||||
await runAgentEndMemoryBackfill(async () => {
|
||||
throw new Error("gateway offline");
|
||||
});
|
||||
});
|
||||
|
|
@ -39,6 +39,8 @@ test("loadGatewayConfigFromEnv accepts SF-prefixed configuration", () => {
|
|||
assert.deepEqual(loadGatewayConfigFromEnv(), {
|
||||
url: "https://example.test/v1",
|
||||
apiKey: "sf-key",
|
||||
keySource: "SF_LLM_GATEWAY_KEY",
|
||||
urlSource: "SF_LLM_GATEWAY_URL",
|
||||
embeddingModel: "embed-model",
|
||||
rerankModel: "rerank-model",
|
||||
});
|
||||
|
|
@ -53,6 +55,8 @@ test("loadGatewayConfigFromEnv accepts llm-gateway shell aliases", () => {
|
|||
assert.deepEqual(loadGatewayConfigFromEnv(), {
|
||||
url: "https://llm-gateway.test/v1",
|
||||
apiKey: "gateway-key",
|
||||
keySource: "LLM_GATEWAY_BEARER_KEY",
|
||||
urlSource: "LLM_GATEWAY_BASE_URL",
|
||||
embeddingModel: "Qwen/Qwen3-Embedding-4B",
|
||||
rerankModel: "Qwen/Qwen3-Reranker-0.6B",
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue