From a77e1551d258dac18bae281e4e7e00866d9a3c70 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sun, 10 May 2026 18:17:49 +0200 Subject: [PATCH] refactor(memory): consolidate memory system, remove dead code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Delete memory-backfill.js — not imported anywhere, dead code - Rename memory-sleeper.js → tool-watchdog.js — misnamed; it is a tool-output watchdog with no relation to the memory store - Collapse memory-embeddings-llm-gateway.js into memory-embeddings.js — removes the lazy-import split; loadGatewayConfigFromEnv, createGatewayEmbedFn, and rerankCandidates are now direct exports - Remove buildEmbeddingFn() dead stub (always returned null) - Enable packages/coding-agent memory extraction extension by default (memory.enabled ?? true) so session-level extraction is active - Update all import sites and tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../coding-agent/src/core/settings-manager.ts | 2 +- .../extensions/sf/bootstrap/register-hooks.js | 102 ++++++- .../extensions/sf/commands-memory.js | 6 +- .../extensions/sf/memory-backfill.js | 104 -------- .../sf/memory-embeddings-llm-gateway.js | 252 ------------------ .../extensions/sf/memory-embeddings.js | 245 +++++++++++++---- src/resources/extensions/sf/memory-store.js | 2 +- .../sf/tests/memory-state-cache.test.mjs | 10 +- .../{memory-sleeper.js => tool-watchdog.js} | 15 +- 9 files changed, 304 insertions(+), 434 deletions(-) delete mode 100644 src/resources/extensions/sf/memory-backfill.js delete mode 100644 src/resources/extensions/sf/memory-embeddings-llm-gateway.js rename src/resources/extensions/sf/{memory-sleeper.js => tool-watchdog.js} (87%) diff --git a/packages/coding-agent/src/core/settings-manager.ts b/packages/coding-agent/src/core/settings-manager.ts index 6268157a2..9500e0eb1 100644 --- a/packages/coding-agent/src/core/settings-manager.ts +++ b/packages/coding-agent/src/core/settings-manager.ts @@ -1232,7 +1232,7 @@ export class SettingsManager { summaryInjectionTokenLimit: number; } { return { - enabled: this.settings.memory?.enabled ?? false, + enabled: this.settings.memory?.enabled ?? true, maxRolloutsPerStartup: this.settings.memory?.maxRolloutsPerStartup ?? 64, maxRolloutAgeDays: this.settings.memory?.maxRolloutAgeDays ?? 30, minRolloutIdleHours: this.settings.memory?.minRolloutIdleHours ?? 12, diff --git a/src/resources/extensions/sf/bootstrap/register-hooks.js b/src/resources/extensions/sf/bootstrap/register-hooks.js index 8e4e96dfe..068d3b5ac 100644 --- a/src/resources/extensions/sf/bootstrap/register-hooks.js +++ b/src/resources/extensions/sf/bootstrap/register-hooks.js @@ -1,5 +1,12 @@ import { spawnSync } from "node:child_process"; -import { existsSync, readdirSync } from "node:fs"; +import { + existsSync, + mkdirSync, + readdirSync, + readFileSync, + unlinkSync, + writeFileSync, +} from "node:fs"; import { join, relative, resolve } from "node:path"; import { isToolCallEventType } from "@singularity-forge/coding-agent"; import { resetAskUserQuestionsCache } from "../../ask-user-questions.js"; @@ -15,6 +22,7 @@ import { markToolEnd, markToolStart, recordToolInvocationError, + startAutoDetached, } from "../auto.js"; import { applyCompletionNudgeTemperature, @@ -39,9 +47,9 @@ import { selectLearnedModel, } from "../learning/runtime.js"; import { - observeMemorySleeperToolResult, - resetMemorySleeper, -} from "../memory-sleeper.js"; + observeToolResult, + resetToolWatchdog, +} from "../tool-watchdog.js"; import { initNotificationStore } from "../notification-store.js"; import { initNotificationWidget } from "../notification-widget.js"; import { @@ -106,6 +114,42 @@ import { // printed it before the TUI launched. Only re-print on /clear (subsequent sessions). let isFirstSession = true; let lastGeminiPreflightWarning; +const COMPACTION_AUTO_RESUME_FILE = "compaction-auto-resume.json"; + +function compactionResumePath(basePath) { + return join(basePath, ".sf", "runtime", COMPACTION_AUTO_RESUME_FILE); +} + +function writeCompactionResumeMarker(basePath, marker) { + try { + const runtimeDir = join(basePath, ".sf", "runtime"); + mkdirSync(runtimeDir, { recursive: true }); + writeFileSync( + compactionResumePath(basePath), + JSON.stringify(marker, null, 2) + "\n", + "utf-8", + ); + } catch { + // Non-fatal: compaction autoresume marker is best-effort. + } +} + +function consumeCompactionResumeMarker(basePath) { + const markerPath = compactionResumePath(basePath); + if (!existsSync(markerPath)) return null; + try { + const marker = JSON.parse(readFileSync(markerPath, "utf-8")); + unlinkSync(markerPath); + return marker; + } catch { + try { + unlinkSync(markerPath); + } catch { + // Ignore cleanup errors. + } + return null; + } +} export async function runAgentEndMemoryBackfill(runBackfill) { try { @@ -209,7 +253,7 @@ export function registerHooks(pi, ecosystemHandlers = []) { pi.on("session_start", async (_event, ctx) => { lastGeminiPreflightWarning = undefined; resetLearningRuntime(); - resetMemorySleeper(); + resetToolWatchdog(); try { const sid = ctx.sessionManager?.getSessionId?.() ?? ""; const _sfile = ctx.sessionManager?.getSessionFile?.() ?? ""; @@ -434,11 +478,38 @@ export function registerHooks(pi, ecosystemHandlers = []) { } catch { /* non-fatal — requirement promoter must never block session start */ } + // Compaction should never behave like a stop boundary. If autonomous mode + // was active when compaction happened, continue automatically on session start. + try { + if (!isAutoActive() && !isAutoPaused()) { + const marker = consumeCompactionResumeMarker(process.cwd()); + if ( + marker?.reason === "session_before_compact" && + marker?.autoActive === true && + marker?.basePath === process.cwd() + ) { + ctx.ui?.notify?.( + "Resuming autonomous mode after compaction.", + "info", + ); + startAutoDetached(ctx, pi, process.cwd(), marker?.verbose === true, { + step: marker?.stepMode === true, + canAskUser: marker?.canAskUser !== false, + milestoneLock: + typeof marker?.milestoneLock === "string" + ? marker.milestoneLock + : null, + }); + } + } + } catch { + /* non-fatal — autoresume must never block session start */ + } }); pi.on("session_switch", async (_event, ctx) => { lastGeminiPreflightWarning = undefined; resetLearningRuntime(); - resetMemorySleeper(); + resetToolWatchdog(); initNotificationStore(process.cwd()); installNotifyInterceptor(ctx); resetWriteGateState(); @@ -689,6 +760,21 @@ export function registerHooks(pi, ecosystemHandlers = []) { }, }, }; + if (isAutoActive()) { + writeCompactionResumeMarker(basePath, { + reason: "session_before_compact", + ts: new Date().toISOString(), + autoActive: true, + basePath, + stepMode: session?.stepMode === true, + canAskUser: isCanAskUser(), + milestoneLock: + typeof session?.sessionMilestoneLock === "string" + ? session.sessionMilestoneLock + : null, + verbose: session?.verbose === true, + }); + } // Persist compaction summary as the session's most recent work description // so memory-pipeline ingestion has a compact semantic handle for retrieval. try { @@ -1060,11 +1146,11 @@ export function registerHooks(pi, ecosystemHandlers = []) { ) { markResearchTerminalTransition(); } - const steer = observeMemorySleeperToolResult(event); + const steer = observeToolResult(event); if (steer) { pi.sendMessage( { - customType: "sf-memory-sleeper", + customType: "sf-tool-watchdog", content: steer.content, display: false, details: { diff --git a/src/resources/extensions/sf/commands-memory.js b/src/resources/extensions/sf/commands-memory.js index 76c4fb3e8..e7ffd18f6 100644 --- a/src/resources/extensions/sf/commands-memory.js +++ b/src/resources/extensions/sf/commands-memory.js @@ -219,7 +219,7 @@ async function handleSearch(ctx, parsed) { return; } const { loadGatewayConfigFromEnv } = await import( - "./memory-embeddings-llm-gateway.js" + "./memory-embeddings.js" ); const gatewayConfig = loadGatewayConfigFromEnv(); const usingEmbeddings = !!gatewayConfig; @@ -367,7 +367,7 @@ async function handleStatus(ctx) { } const dbStatus = readMemoryDbStatus(adapter); const { loadGatewayConfigFromEnv, createGatewayEmbedFn, rerankCandidates } = - await import("./memory-embeddings-llm-gateway.js"); + await import("./memory-embeddings.js"); const gatewayConfig = loadGatewayConfigFromEnv(); let embeddingProbe = { ok: false, status: "not_configured", latencyMs: null }; let rerankProbe = { ok: false, status: "not_configured", latencyMs: null }; @@ -419,7 +419,7 @@ async function handleBackfill(ctx) { } const before = readMemoryDbStatus(adapter); const { loadGatewayConfigFromEnv } = await import( - "./memory-embeddings-llm-gateway.js" + "./memory-embeddings.js" ); const gatewayConfig = loadGatewayConfigFromEnv(); if (!gatewayConfig) { diff --git a/src/resources/extensions/sf/memory-backfill.js b/src/resources/extensions/sf/memory-backfill.js deleted file mode 100644 index fd0d5e71f..000000000 --- a/src/resources/extensions/sf/memory-backfill.js +++ /dev/null @@ -1,104 +0,0 @@ -// SF — Decisions -> memories backfill -// -// Idempotent one-shot migration that copies every active decisions row into -// the memories table with category="architecture". Idempotency is enforced -// by tagging each backfilled memory's content with the original decision ID -// via a structured prefix and skipping any decision whose ID already appears -// in the memories table. -// -// Triggered opportunistically by buildBeforeAgentStartResult so the cost -// only ever fires once per project. Costs O(N) inserts on first run where -// N is the active-decisions count; subsequent runs are an O(N) lookup that -// finds existing markers and exits. - -import { createMemory } from "./memory-store.js"; -import { _getAdapter, isDbAvailable } from "./sf-db.js"; -import { logWarning } from "./workflow-logger.js"; -/** - * Backfill active decisions rows into the memories table. - * - * - Idempotent (per-row): every row written embeds - * `[decision:${decisionId}]` as a prefix in the content so we can - * detect existing backfills via a LIKE query. Only decisions whose id - * is already present in the memory store are skipped. - * - Best-effort: never throws. Logs and returns 0 on failure so a broken - * backfill cannot block agent startup. - * - Active-only: skips rows where `superseded_by IS NOT NULL`. Superseded - * decisions are historical record; the memory store is for active - * knowledge. - * - * Returns the number of memories written (0 when already backfilled or - * when the DB has no decisions). Callers can log the result or surface it - * to the user. - */ -export function backfillDecisionsToMemories() { - if (!isDbAvailable()) return 0; - const adapter = _getAdapter(); - if (!adapter) return 0; - try { - const decisions = adapter - .prepare( - "SELECT id, when_context, scope, decision, choice, rationale, made_by, revisable, superseded_by FROM decisions WHERE superseded_by IS NULL", - ) - .all(); - if (decisions.length === 0) return 0; - // Per-row idempotency: each backfilled memory starts with - // "[decision:]" in the content. Detect existing rows via LIKE. - const checkExisting = adapter.prepare( - "SELECT 1 FROM memories WHERE content LIKE :pattern LIMIT 1", - ); - let written = 0; - for (const raw of decisions) { - const row = { - id: String(raw["id"] ?? ""), - when_context: String(raw["when_context"] ?? ""), - scope: String(raw["scope"] ?? ""), - decision: String(raw["decision"] ?? ""), - choice: String(raw["choice"] ?? ""), - rationale: String(raw["rationale"] ?? ""), - made_by: String(raw["made_by"] ?? "agent"), - revisable: String(raw["revisable"] ?? ""), - superseded_by: - raw["superseded_by"] == null ? null : String(raw["superseded_by"]), - }; - if (!row.id) continue; - if (checkExisting.get({ ":pattern": `[decision:${row.id}] %` })) continue; - const content = synthesizeContent(row); - const id = createMemory({ - category: "architecture", - content, - confidence: 0.85, - }); - if (id) written += 1; - } - return written; - } catch (e) { - logWarning( - "memory-backfill", - `decisions->memories backfill failed: ${e.message}`, - ); - return 0; - } -} -/** - * Combine the decision's structured fields into a 1-3 sentence content - * string suitable for keyword retrieval and human review. - * - * Format: "[decision:] Chose: . Rationale: ." - * The "[decision:]" prefix enables idempotent backfill detection. - * Truncates each field to keep the synthesized line under ~600 chars. - */ -function synthesizeContent(row) { - const trim = (value, max) => { - const cleaned = value.replace(/\s+/g, " ").trim(); - return cleaned.length > max ? cleaned.slice(0, max - 1) + "…" : cleaned; - }; - const parts = [`[decision:${row.id}]`]; - const decision = trim(row.decision, 240); - const choice = trim(row.choice, 200); - const rationale = trim(row.rationale, 200); - if (decision) parts.push(decision); - if (choice) parts.push(`Chose: ${choice}.`); - if (rationale) parts.push(`Rationale: ${rationale}.`); - return parts.join(" "); -} diff --git a/src/resources/extensions/sf/memory-embeddings-llm-gateway.js b/src/resources/extensions/sf/memory-embeddings-llm-gateway.js deleted file mode 100644 index a937e2bc5..000000000 --- a/src/resources/extensions/sf/memory-embeddings-llm-gateway.js +++ /dev/null @@ -1,252 +0,0 @@ -// SF Memory Embeddings — LLM Gateway adapter -// -// Speaks the OpenAI-shaped /v1/embeddings and /v1/rerank protocols against -// a custom inference-fabric llm-gateway endpoint. Returns null when the -// gateway is not configured (env var unset / unreachable / catalog empty), -// so the consumer can fall through to keyword-only ranking without -// surfacing errors. -// -// Why a separate module: keeping gateway-specific HTTP, headers, and error -// shapes out of memory-embeddings.ts (which is provider-agnostic) means the -// embed-fn discovery surface stays clean and the gateway can be swapped or -// disabled without touching the consumer. -import { logWarning } from "./workflow-logger.js"; - -const DEFAULT_TIMEOUT_MS = 30_000; -// Throttle the "rerank worker offline" warning so per-query log spam doesn't -// drown out other diagnostics when SF_LLM_GATEWAY_RERANK_MODEL is set but no -// worker is online — the soft-degrade is expected in that state. -let lastRerankUnavailableLogAt = 0; -const RERANK_UNAVAILABLE_LOG_THROTTLE_MS = 60_000; -function logRerankUnavailable(msg) { - const now = Date.now(); - if (now - lastRerankUnavailableLogAt < RERANK_UNAVAILABLE_LOG_THROTTLE_MS) { - return; - } - lastRerankUnavailableLogAt = now; - logWarning("memory-embeddings", msg); -} -// Circuit breaker for the embed path. When the remote gateway is unreachable -// (network timeout, cold-start stall), each call would otherwise wait the full -// DEFAULT_TIMEOUT_MS (30 s) before failing. After EMBED_CIRCUIT_THRESHOLD -// consecutive failures the circuit opens for EMBED_CIRCUIT_OPEN_MS and returns -// [] immediately — callers fall through to keyword-only ranking with no stall. -// The circuit half-opens automatically after the cooldown expires. -const EMBED_CIRCUIT_THRESHOLD = 3; -const EMBED_CIRCUIT_OPEN_MS = 60_000; -const embedCircuit = { failures: 0, openUntil: 0, lastLogAt: 0 }; -function embedCircuitIsOpen() { - return embedCircuit.openUntil > Date.now(); -} -function onEmbedSuccess() { - embedCircuit.failures = 0; - embedCircuit.openUntil = 0; -} -function onEmbedFailure() { - embedCircuit.failures += 1; - if (embedCircuit.failures >= EMBED_CIRCUIT_THRESHOLD) { - embedCircuit.openUntil = Date.now() + EMBED_CIRCUIT_OPEN_MS; - const now = Date.now(); - if (now - embedCircuit.lastLogAt >= EMBED_CIRCUIT_OPEN_MS) { - embedCircuit.lastLogAt = now; - logWarning( - "memory-embeddings", - `llm-gateway /embeddings circuit open after ${EMBED_CIRCUIT_THRESHOLD} failures; ` + - `skipping embed for ${EMBED_CIRCUIT_OPEN_MS / 1000}s — memory search falls back to keyword ranking`, - ); - } - } -} -const ENV_KEY = "SF_LLM_GATEWAY_KEY"; -const ENV_URL = "SF_LLM_GATEWAY_URL"; -const ENV_EMBED_MODEL = "SF_LLM_GATEWAY_EMBED_MODEL"; -const ENV_RERANK_MODEL = "SF_LLM_GATEWAY_RERANK_MODEL"; -const ENV_EMBED_QUERY_INSTRUCTION = "SF_LLM_GATEWAY_EMBED_QUERY_INSTRUCTION"; -const DEFAULT_EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-4B"; -const DEFAULT_RERANK_MODEL = "Qwen/Qwen3-Reranker-0.6B"; -// Qwen3-Embedding uses asymmetric retrieval: queries are prefixed with a task -// instruction so the model projects them into the "query" region of the embedding -// space, while document texts are sent as-is (no instruction) so they land in -// the "passage" region. Mixing these correctly is critical for retrieval quality. -// -// Format expected by the model: "Instruct: \nQuery: " followed by the -// query text (the gateway appends the text to the instruction). Documents omit -// the instruction entirely. -// -// References: Qwen3-Embedding model card (HuggingFace) §Asymmetric Retrieval. -const DEFAULT_QUERY_INSTRUCTION = - "Instruct: Retrieve relevant software engineering memories, facts, and project decisions for the given query\nQuery: "; -const KEY_ALIASES = [ - ENV_KEY, - "LLM_GATEWAY_API_KEY", - "LLM_GATEWAY_BEARER_KEY", - "LLM_MUX_API_KEY", -]; -const URL_ALIASES = [ENV_URL, "LLM_GATEWAY_BASE_URL", "LLM_MUX_BASE_URL"]; -function firstEnvEntry(keys) { - for (const key of keys) { - const value = process.env[key]?.trim(); - if (value) return { key, value }; - } - return null; -} -function firstEnvValue(keys) { - return firstEnvEntry(keys)?.value ?? ""; -} -/** Read gateway config from env. Returns null when SF_LLM_GATEWAY_KEY is - * missing — the gateway path is opt-in and silently absent otherwise. - * - * `queryInstruction` is the Qwen3-style task instruction prepended to query - * texts during retrieval. Document texts (backfill) are sent without it. - * Override via SF_LLM_GATEWAY_EMBED_QUERY_INSTRUCTION. */ -export function loadGatewayConfigFromEnv() { - const keyEntry = firstEnvEntry(KEY_ALIASES); - if (!keyEntry) return null; - const urlEntry = firstEnvEntry(URL_ALIASES); - const url = urlEntry?.value || "https://llm-gateway.centralcloud.com/v1"; - const embeddingModel = - firstEnvValue([ENV_EMBED_MODEL]) || DEFAULT_EMBEDDING_MODEL; - const rerankModel = firstEnvValue([ENV_RERANK_MODEL]) || DEFAULT_RERANK_MODEL; - const queryInstruction = - firstEnvValue([ENV_EMBED_QUERY_INSTRUCTION]) || DEFAULT_QUERY_INSTRUCTION; - return { - url, - apiKey: keyEntry.value, - keySource: keyEntry.key, - urlSource: urlEntry?.key ?? "default", - embeddingModel, - rerankModel, - queryInstruction, - }; -} -/** Build an EmbedFn that posts to /embeddings with Bearer auth. - * Returns Float32Array[] in the same order as the input. Throws on HTTP - * errors so the caller (embedMemories) logs and counts as zero. - * A circuit breaker short-circuits to [] after EMBED_CIRCUIT_THRESHOLD - * consecutive failures so a down/cold gateway never stalls the caller for - * the full 30 s timeout on every call. - * - * `opts.instruction` — when set, included as the top-level `instruction` - * field in the request body. Qwen3-Embedding uses this for asymmetric - * retrieval: pass `config.queryInstruction` for query embeddings; omit for - * document/memory backfill so passages land in the correct embedding region. */ -export function createGatewayEmbedFn(config, opts) { - return async (texts) => { - if (texts.length === 0) return []; - // Circuit open — fail fast, no network call. - if (embedCircuitIsOpen()) return []; - const controller = new AbortController(); - const timeout = setTimeout( - () => controller.abort(), - config.timeoutMs ?? DEFAULT_TIMEOUT_MS, - ); - try { - const body = { - model: config.embeddingModel, - input: texts, - }; - if (opts?.instruction) { - body.instruction = opts.instruction; - } - const res = await fetch(`${config.url}/embeddings`, { - method: "POST", - headers: { - Authorization: `Bearer ${config.apiKey}`, - "Content-Type": "application/json", - }, - body: JSON.stringify(body), - signal: controller.signal, - }); - if (!res.ok) { - const body = await res.text().catch(() => ""); - // Throw immediately — the outer catch handles onEmbedFailure once. - throw new Error( - `llm-gateway /embeddings ${res.status}: ${body.slice(0, 200)}`, - ); - } - const json = await res.json(); - if (!Array.isArray(json.data)) { - // Throw — the outer catch handles onEmbedFailure once. - throw new Error("llm-gateway /embeddings: missing data array"); - } - // Sort by index to handle out-of-order responses defensively. - const sorted = [...json.data].sort((a, b) => a.index - b.index); - const result = sorted.map((d) => Float32Array.from(d.embedding)); - onEmbedSuccess(); - return result; - } catch (err) { - // Catch AbortError (timeout) and all thrown errors from above — all - // count as a circuit failure. onEmbedFailure is called exactly once - // per failed request regardless of failure mode. - onEmbedFailure(); - throw err; - } finally { - clearTimeout(timeout); - } - }; -} -/** Score candidates against a query via /rerank. Returns null when no - * rerank model is configured OR the gateway has no rerank worker online — - * callers should treat null as "skip the rerank pass". */ -export async function rerankCandidates(config, query, candidates) { - if (!config.rerankModel) return null; - if (candidates.length === 0) return []; - const controller = new AbortController(); - const timeout = setTimeout( - () => controller.abort(), - config.timeoutMs ?? DEFAULT_TIMEOUT_MS, - ); - try { - const res = await fetch(`${config.url}/rerank`, { - method: "POST", - headers: { - Authorization: `Bearer ${config.apiKey}`, - "Content-Type": "application/json", - }, - body: JSON.stringify({ - model: config.rerankModel, - query, - documents: candidates.map((c) => c.text), - }), - signal: controller.signal, - }); - if (res.status === 503 || res.status === 404) { - logRerankUnavailable( - `llm-gateway /rerank unavailable (${res.status}); falling back to non-reranked results`, - ); - return null; - } - // Read once — the gateway sometimes returns 200 with a plain-text body - // like "no worker with rerank capability is available", so we can't - // branch on res.ok before peeking at the body. - const bodyText = await res.text().catch(() => ""); - if (/no worker.*rerank/i.test(bodyText)) { - logRerankUnavailable( - "llm-gateway /rerank: no worker capability available", - ); - return null; - } - if (!res.ok) { - throw new Error( - `llm-gateway /rerank ${res.status}: ${bodyText.slice(0, 200)}`, - ); - } - let json; - try { - json = JSON.parse(bodyText); - } catch { - throw new Error( - `llm-gateway /rerank: malformed JSON response (${bodyText.slice(0, 200)})`, - ); - } - if (!Array.isArray(json.results)) { - throw new Error("llm-gateway /rerank: missing results array"); - } - return json.results.map((r) => ({ - id: candidates[r.index]?.id ?? String(r.index), - score: r.relevance_score, - })); - } finally { - clearTimeout(timeout); - } -} diff --git a/src/resources/extensions/sf/memory-embeddings.js b/src/resources/extensions/sf/memory-embeddings.js index cda7880f1..f1364acfe 100644 --- a/src/resources/extensions/sf/memory-embeddings.js +++ b/src/resources/extensions/sf/memory-embeddings.js @@ -1,16 +1,13 @@ -// SF Memory Embeddings — provider-agnostic embedding layer +// SF Memory Embeddings — embedding, reranking, and vector ranking // -// Active providers: -// - llm-gateway adapter (memory-embeddings-llm-gateway.ts) — opt-in via -// SF_LLM_GATEWAY_KEY; the only working write path today. -// - buildEmbeddingFn (Pi-SDK provider scan) — discovery hook only; -// returns null because the SDK has no provider-neutral embedding -// API yet. Kept as the future plug-in surface. +// OpenAI-shaped /v1/embeddings and /v1/rerank against a configurable +// gateway endpoint (opt-in via SF_LLM_GATEWAY_KEY). All paths degrade +// gracefully to keyword-only ranking when no gateway is configured. // -// Read pipeline used by `getRelevantMemoriesRanked` (memory-store.ts): +// Read pipeline used by `getRelevantMemoriesRanked` (memory-store.js): // 1. cosine ranking via rankMemoriesByEmbedding (uses loadEmbeddingMap) // 2. relation-boost via applyRelationBoost (uses memory_relations edges) -// 3. optional rerank via memory-embeddings-llm-gateway/rerankCandidates +// 3. optional rerank via rerankCandidates // // When no gateway is configured (or the worker is offline), all three // pipeline stages soft-degrade and `getRelevantMemoriesRanked` falls back @@ -23,44 +20,196 @@ import { } from "./sf-db.js"; import { logWarning } from "./workflow-logger.js"; -// ─── Model selection ──────────────────────────────────────────────────────── -const EMBEDDING_ID_HINTS = [ - "embed", - "embedding", - "voyage", - "text-embedding", - "nomic", - "jina-embed", - "bge", - "mxbai-embed", +// ─── Gateway config ────────────────────────────────────────────────────────── +const DEFAULT_TIMEOUT_MS = 30_000; +const ENV_KEY = "SF_LLM_GATEWAY_KEY"; +const ENV_URL = "SF_LLM_GATEWAY_URL"; +const ENV_EMBED_MODEL = "SF_LLM_GATEWAY_EMBED_MODEL"; +const ENV_RERANK_MODEL = "SF_LLM_GATEWAY_RERANK_MODEL"; +const ENV_EMBED_QUERY_INSTRUCTION = "SF_LLM_GATEWAY_EMBED_QUERY_INSTRUCTION"; +const DEFAULT_EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-4B"; +const DEFAULT_RERANK_MODEL = "Qwen/Qwen3-Reranker-0.6B"; +const DEFAULT_QUERY_INSTRUCTION = + "Instruct: Retrieve relevant software engineering memories, facts, and project decisions for the given query\nQuery: "; +const KEY_ALIASES = [ + ENV_KEY, + "LLM_GATEWAY_API_KEY", + "LLM_GATEWAY_BEARER_KEY", + "LLM_MUX_API_KEY", ]; -/** - * Try to build an embedding function from the model registry. Returns null - * when no embedding-capable model is obvious from the registry metadata. - * - * NOTE: the Pi SDK doesn't yet expose a dedicated embeddings API for every - * provider. This implementation currently targets Anthropic / OpenAI-shaped - * SDKs: when the caller has direct API access via `ctx.modelRegistry`, they - * can wire this up by providing an `embedFn` override. We ship the hint-based - * detection here so future providers can plug in without touching callers. - */ -export function buildEmbeddingFn(ctx) { - try { - const available = ctx.modelRegistry?.getAvailable?.(); - if (!available || available.length === 0) return null; - const candidate = available.find((model) => { - const id = typeof model?.id === "string" ? model.id.toLowerCase() : ""; - return EMBEDDING_ID_HINTS.some((hint) => id.includes(hint)); - }); - if (!candidate) return null; - // We don't currently have a provider-neutral embedding call in Pi; the - // detection surface is in place so wiring can happen once Pi offers it. - return null; - } catch (err) { - logWarning("memory-embeddings", `model discovery failed: ${err.message}`); - return null; +const URL_ALIASES = [ENV_URL, "LLM_GATEWAY_BASE_URL", "LLM_MUX_BASE_URL"]; +function firstEnvEntry(keys) { + for (const key of keys) { + const value = process.env[key]?.trim(); + if (value) return { key, value }; + } + return null; +} +function firstEnvValue(keys) { + return firstEnvEntry(keys)?.value ?? ""; +} +/** Read gateway config from env. Returns null when no key is present — opt-in. */ +export function loadGatewayConfigFromEnv() { + const keyEntry = firstEnvEntry(KEY_ALIASES); + if (!keyEntry) return null; + const urlEntry = firstEnvEntry(URL_ALIASES); + const url = urlEntry?.value || "https://llm-gateway.centralcloud.com/v1"; + const embeddingModel = + firstEnvValue([ENV_EMBED_MODEL]) || DEFAULT_EMBEDDING_MODEL; + const rerankModel = firstEnvValue([ENV_RERANK_MODEL]) || DEFAULT_RERANK_MODEL; + const queryInstruction = + firstEnvValue([ENV_EMBED_QUERY_INSTRUCTION]) || DEFAULT_QUERY_INSTRUCTION; + return { + url, + apiKey: keyEntry.value, + keySource: keyEntry.key, + urlSource: urlEntry?.key ?? "default", + embeddingModel, + rerankModel, + queryInstruction, + }; +} + +// ─── Circuit breaker ───────────────────────────────────────────────────────── +const EMBED_CIRCUIT_THRESHOLD = 3; +const EMBED_CIRCUIT_OPEN_MS = 60_000; +const embedCircuit = { failures: 0, openUntil: 0, lastLogAt: 0 }; +function embedCircuitIsOpen() { + return embedCircuit.openUntil > Date.now(); +} +function onEmbedSuccess() { + embedCircuit.failures = 0; + embedCircuit.openUntil = 0; +} +function onEmbedFailure() { + embedCircuit.failures += 1; + if (embedCircuit.failures >= EMBED_CIRCUIT_THRESHOLD) { + embedCircuit.openUntil = Date.now() + EMBED_CIRCUIT_OPEN_MS; + const now = Date.now(); + if (now - embedCircuit.lastLogAt >= EMBED_CIRCUIT_OPEN_MS) { + embedCircuit.lastLogAt = now; + logWarning( + "memory-embeddings", + `gateway /embeddings circuit open after ${EMBED_CIRCUIT_THRESHOLD} failures; ` + + `skipping for ${EMBED_CIRCUIT_OPEN_MS / 1000}s — falling back to keyword ranking`, + ); + } } } + +// ─── Rerank throttle ───────────────────────────────────────────────────────── +let lastRerankUnavailableLogAt = 0; +const RERANK_UNAVAILABLE_LOG_THROTTLE_MS = 60_000; +function logRerankUnavailable(msg) { + const now = Date.now(); + if (now - lastRerankUnavailableLogAt < RERANK_UNAVAILABLE_LOG_THROTTLE_MS) + return; + lastRerankUnavailableLogAt = now; + logWarning("memory-embeddings", msg); +} + +// ─── Gateway HTTP ───────────────────────────────────────────────────────────── +/** Build an embed function that posts to /embeddings with Bearer auth. */ +export function createGatewayEmbedFn(config, opts) { + return async (texts) => { + if (texts.length === 0) return []; + if (embedCircuitIsOpen()) return []; + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(), + config.timeoutMs ?? DEFAULT_TIMEOUT_MS, + ); + try { + const body = { model: config.embeddingModel, input: texts }; + if (opts?.instruction) body.instruction = opts.instruction; + const res = await fetch(`${config.url}/embeddings`, { + method: "POST", + headers: { + Authorization: `Bearer ${config.apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(body), + signal: controller.signal, + }); + if (!res.ok) { + const bodyText = await res.text().catch(() => ""); + throw new Error( + `gateway /embeddings ${res.status}: ${bodyText.slice(0, 200)}`, + ); + } + const json = await res.json(); + if (!Array.isArray(json.data)) + throw new Error("gateway /embeddings: missing data array"); + const sorted = [...json.data].sort((a, b) => a.index - b.index); + const result = sorted.map((d) => Float32Array.from(d.embedding)); + onEmbedSuccess(); + return result; + } catch (err) { + onEmbedFailure(); + throw err; + } finally { + clearTimeout(timeout); + } + }; +} + +/** Score candidates against a query via /rerank. Returns null when unavailable. */ +export async function rerankCandidates(config, query, candidates) { + if (!config.rerankModel) return null; + if (candidates.length === 0) return []; + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(), + config.timeoutMs ?? DEFAULT_TIMEOUT_MS, + ); + try { + const res = await fetch(`${config.url}/rerank`, { + method: "POST", + headers: { + Authorization: `Bearer ${config.apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: config.rerankModel, + query, + documents: candidates.map((c) => c.text), + }), + signal: controller.signal, + }); + if (res.status === 503 || res.status === 404) { + logRerankUnavailable( + `gateway /rerank unavailable (${res.status}); falling back to cosine order`, + ); + return null; + } + const bodyText = await res.text().catch(() => ""); + if (/no worker.*rerank/i.test(bodyText)) { + logRerankUnavailable("gateway /rerank: no worker capability available"); + return null; + } + if (!res.ok) + throw new Error( + `gateway /rerank ${res.status}: ${bodyText.slice(0, 200)}`, + ); + let json; + try { + json = JSON.parse(bodyText); + } catch { + throw new Error( + `gateway /rerank: malformed JSON (${bodyText.slice(0, 200)})`, + ); + } + if (!Array.isArray(json.results)) + throw new Error("gateway /rerank: missing results array"); + return json.results.map((r) => ({ + id: candidates[r.index]?.id ?? String(r.index), + score: r.relevance_score, + })); + } finally { + clearTimeout(timeout); + } +} + // ─── Vector (de)serialization ─────────────────────────────────────────────── export function packFloat32(vec) { return new Uint8Array(vec.buffer, vec.byteOffset, vec.byteLength); @@ -245,9 +394,6 @@ export function rankMemoriesByEmbedding( export async function embedQueryViaGateway(query) { if (!query.trim()) return null; try { - const { loadGatewayConfigFromEnv, createGatewayEmbedFn } = await import( - "./memory-embeddings-llm-gateway.js" - ); const cfg = loadGatewayConfigFromEnv(); if (!cfg) return null; const embedFn = createGatewayEmbedFn(cfg, { @@ -344,11 +490,8 @@ export async function runEmbeddingBackfill(opts) { if (backfillInFlight) return 0; const max = opts?.maxPerInvocation ?? 50; const batchSize = opts?.batchSize ?? 16; - const { loadGatewayConfigFromEnv, createGatewayEmbedFn } = await import( - "./memory-embeddings-llm-gateway.js" - ); const cfg = loadGatewayConfigFromEnv(); - if (!cfg) return 0; // Gateway opt-in; absent config = no-op. + if (!cfg) return 0; backfillInFlight = true; let embedded = 0; try { diff --git a/src/resources/extensions/sf/memory-store.js b/src/resources/extensions/sf/memory-store.js index 50a4c1f09..c681f4138 100644 --- a/src/resources/extensions/sf/memory-store.js +++ b/src/resources/extensions/sf/memory-store.js @@ -212,7 +212,7 @@ export async function getRelevantMemoriesRanked(query, limit = 10) { // the cosine-ranked order as-is — strictly additive precision boost. try { const { loadGatewayConfigFromEnv, rerankCandidates } = await import( - "./memory-embeddings-llm-gateway.js" + "./memory-embeddings.js" ); const cfg = loadGatewayConfigFromEnv(); if (cfg?.rerankModel && topK.length > 1) { diff --git a/src/resources/extensions/sf/tests/memory-state-cache.test.mjs b/src/resources/extensions/sf/tests/memory-state-cache.test.mjs index 6ae2a71a6..3fe399790 100644 --- a/src/resources/extensions/sf/tests/memory-state-cache.test.mjs +++ b/src/resources/extensions/sf/tests/memory-state-cache.test.mjs @@ -203,16 +203,16 @@ describe("readFrozenDefinition error wrapping", () => { }); }); -// ─── memory-sleeper: seenKeys bounded (LOW) ──────────────────────────────── +// ─── tool-watchdog: seenKeys bounded (LOW) ──────────────────────────────── describe("memory-sleeper seenKeys", () => { test("resetMemorySleeper clears seenKeys", async () => { - const { resetMemorySleeper, observeMemorySleeperToolResult } = await import( - "../memory-sleeper.js" + const { resetToolWatchdog, observeToolResult } = await import( + "../tool-watchdog.js" ); - resetMemorySleeper(); + resetToolWatchdog(); // After reset, the same event should be processed again - const result = observeMemorySleeperToolResult({ + const result = observeToolResult({ toolName: "bash", input: { command: "bun install" }, content: [{ type: "text", text: "ok" }], diff --git a/src/resources/extensions/sf/memory-sleeper.js b/src/resources/extensions/sf/tool-watchdog.js similarity index 87% rename from src/resources/extensions/sf/memory-sleeper.js rename to src/resources/extensions/sf/tool-watchdog.js index d9ef54185..e2cc6a258 100644 --- a/src/resources/extensions/sf/memory-sleeper.js +++ b/src/resources/extensions/sf/tool-watchdog.js @@ -1,11 +1,8 @@ -// SF Memory Sleeper — runtime tool-output watchdog +// SF Tool Watchdog — runtime tool-output observer // -// Despite the "memory" prefix, this module is unrelated to the -// memory-store / memory-relations / memory-embeddings pipeline. It -// observes tool-result events at runtime and emits a steer (info / +// Observes tool-result events at runtime and emits a steer (info / // warning) when patterns like a repeated bash failure or a too-large -// tool result indicate the agent is stuck. Naming is historical — -// "sleeper" means a dormant watcher, not a stored memory. +// tool result indicate the agent is stuck. const seenKeys = new Set(); const bashFailures = new Map(); const MAX_RESULT_CHARS = 6000; @@ -13,7 +10,7 @@ const REPEAT_FAILURE_WINDOW_MS = 10 * 60 * 1000; // Maximum number of seen keys to retain before clearing; prevents unbounded // accumulation across long autonomous mode sessions spanning many units. const MAX_SEEN_KEYS = 200; -export function resetMemorySleeper() { +export function resetToolWatchdog() { seenKeys.clear(); bashFailures.clear(); } @@ -38,7 +35,7 @@ function once(steer) { } function buildSteer(title, body) { return [ - `Memory sleeper steering: ${title}`, + `Tool watchdog steering: ${title}`, "", body, "", @@ -109,7 +106,7 @@ function maybeRepeatedFailureSteer(event, text) { ), }); } -export function observeMemorySleeperToolResult(event) { +export function observeToolResult(event) { const text = contentText(event); return ( maybeBunSteer(event) ??