diff --git a/src/resources/extensions/sf/auto-prompts.ts b/src/resources/extensions/sf/auto-prompts.ts index acd298ad9..c2946cf68 100644 --- a/src/resources/extensions/sf/auto-prompts.ts +++ b/src/resources/extensions/sf/auto-prompts.ts @@ -37,6 +37,7 @@ import { inlineGraphSubgraph } from "./graph-context.js"; import { formatMemoriesForPrompt, getActiveMemoriesRanked, + getRelevantMemoriesRanked, } from "./memory-store.js"; import { parseRoadmap } from "./parsers.js"; import { @@ -2479,9 +2480,17 @@ export async function buildExecuteTaskPrompt( { pending: new Set(etPending.map((g) => g.gate_id)), allowOmit: true }, ); - const memoriesSection = (() => { + // Query-aware memory ranking: build a short query from the active task + // context so embeddings can promote semantically-relevant memories above + // the cold static-rank top. Falls back to pure static ranking when no + // gateway is configured or no embeddings exist yet — see + // getRelevantMemoriesRanked for the fallback chain. + const memoryQuery = `${sTitle} ${tTitle}`.trim(); + const memoriesSection = await (async () => { try { - const memories = getActiveMemoriesRanked(10); + const memories = memoryQuery + ? await getRelevantMemoriesRanked(memoryQuery, 10) + : getActiveMemoriesRanked(10); if (memories.length === 0) return "## Project Memories\n(none yet)"; return `## Project Memories\n${formatMemoriesForPrompt(memories)}`; } catch { diff --git a/src/resources/extensions/sf/memory-embeddings.ts b/src/resources/extensions/sf/memory-embeddings.ts index 94506f371..ca89f80a1 100644 --- a/src/resources/extensions/sf/memory-embeddings.ts +++ b/src/resources/extensions/sf/memory-embeddings.ts @@ -234,6 +234,73 @@ export async function embedMemories( } } +// ─── Query-aware ranking ────────────────────────────────────────────────── + +/** Combine static rank (confidence × hit-count boost) with semantic similarity + * to the embedded query. When no embedding rows or no query embedding are + * available, returns the input list unchanged so callers can rely on the + * static order as a fallback. */ +export function rankMemoriesByEmbedding( + memories: Array<{ id: string; staticScore: number }>, + queryVector: Float32Array | null, + memoryEmbeddings: Map, + options?: { semanticWeight?: number }, +): Array<{ id: string; combinedScore: number; cosine: number }> { + const alpha = options?.semanticWeight ?? 0.6; + if (!queryVector || memoryEmbeddings.size === 0) { + return memories.map((m) => ({ + id: m.id, + combinedScore: m.staticScore, + cosine: 0, + })); + } + return memories + .map((m) => { + const vec = memoryEmbeddings.get(m.id); + const cosine = vec ? cosineSimilarity(queryVector, vec) : 0; + // Static score in [0, ~1.5+]; cosine in [-1, 1] but typically [0, 1]. + // Blend so a perfect static + perfect cosine ≈ 2× a static-only top hit, + // and a memory with no embedding at all still ranks by static score. + const combinedScore = m.staticScore * (1 + alpha * cosine); + return { id: m.id, combinedScore, cosine }; + }) + .sort((a, b) => b.combinedScore - a.combinedScore); +} + +/** Embed `query` via the configured gateway and return its Float32Array, or + * null when no gateway is configured / the embed call fails. Best-effort. */ +export async function embedQueryViaGateway( + query: string, +): Promise { + if (!query.trim()) return null; + try { + const { loadGatewayConfigFromEnv, createGatewayEmbedFn } = await import( + "./memory-embeddings-llm-gateway.js" + ); + const cfg = loadGatewayConfigFromEnv(); + if (!cfg) return null; + const embedFn = createGatewayEmbedFn(cfg); + const vectors = await embedFn([query]); + return vectors[0] ?? null; + } catch (err) { + logWarning( + "memory-embeddings", + `query embed failed: ${(err as Error).message}`, + ); + return null; + } +} + +/** Map of memoryId → vector for all active memories that have embeddings. + * Used by the ranker to look up vectors without N+1 queries. */ +export function loadEmbeddingMap(): Map { + const map = new Map(); + for (const row of loadAllEmbeddings()) { + map.set(row.memoryId, row.vector); + } + return map; +} + // ─── Auto-engagement / backfill driver ──────────────────────────────────── /** Find active memories (not superseded) that don't yet have an embedding row. diff --git a/src/resources/extensions/sf/memory-store.ts b/src/resources/extensions/sf/memory-store.ts index a57e5fe2e..2be41d872 100644 --- a/src/resources/extensions/sf/memory-store.ts +++ b/src/resources/extensions/sf/memory-store.ts @@ -138,6 +138,58 @@ export function getActiveMemoriesRanked(limit = 30): Memory[] { } } +/** + * Query-aware ranking: when an embedding gateway is configured and at least + * some memories have vectors, rerank the top static-pool by combining the + * static score with cosine similarity to the embedded query. Falls back + * cleanly to pure static ranking when: + * - query is empty + * - no SF_LLM_GATEWAY_KEY (or gateway unreachable) + * - no memories have vectors yet (fresh DB or worker offline) + * + * The pool oversample (`limit * 5` capped at 50) ensures the embedding + * rerank can promote a relevant-but-static-cold memory into the top-K. + */ +export async function getRelevantMemoriesRanked( + query: string, + limit = 10, +): Promise { + if (!isDbAvailable()) return []; + const pool = getActiveMemoriesRanked(Math.min(50, limit * 5)); + if (pool.length === 0 || !query.trim()) { + return pool.slice(0, limit); + } + try { + const { embedQueryViaGateway, loadEmbeddingMap, rankMemoriesByEmbedding } = + await import("./memory-embeddings.js"); + const [queryVec, embeddingMap] = await Promise.all([ + embedQueryViaGateway(query), + Promise.resolve(loadEmbeddingMap()), + ]); + if (!queryVec || embeddingMap.size === 0) { + return pool.slice(0, limit); + } + const ranked = rankMemoriesByEmbedding( + pool.map((m) => ({ + id: m.id, + staticScore: m.confidence * (1 + m.hit_count * 0.1), + })), + queryVec, + embeddingMap, + ); + const byId = new Map(pool.map((m) => [m.id, m])); + const out: Memory[] = []; + for (const r of ranked) { + const mem = byId.get(r.id); + if (mem) out.push(mem); + if (out.length >= limit) break; + } + return out; + } catch { + return pool.slice(0, limit); + } +} + /** * Generate the next memory ID: MEM + zero-padded 3-digit from MAX(seq). * Returns MEM001 if no memories exist. diff --git a/src/resources/extensions/sf/tests/memory-query-ranking.test.ts b/src/resources/extensions/sf/tests/memory-query-ranking.test.ts new file mode 100644 index 000000000..1eddf89b3 --- /dev/null +++ b/src/resources/extensions/sf/tests/memory-query-ranking.test.ts @@ -0,0 +1,237 @@ +/** + * Query-aware memory ranking — combines static rank with embedding cosine + * similarity. Tests the pure ranker (no I/O) and the end-to-end async path + * with a mocked gateway. + * + * The contract being locked here: + * - empty query / no gateway / no embeddings → static order preserved + * - query + gateway + embeddings → semantically relevant memory promoted + * even when its static score is lower + */ + +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, test, vi } from "vitest"; + +import { + loadEmbeddingMap, + rankMemoriesByEmbedding, + saveEmbedding, +} from "../memory-embeddings.ts"; +import { closeDatabase, openDatabase } from "../sf-db.ts"; +import { + createMemory, + getActiveMemoriesRanked, + getRelevantMemoriesRanked, +} from "../memory-store.ts"; + +let dir: string; +const originalEnv = { ...process.env }; + +beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "sf-memory-rank-")); + openDatabase(join(dir, "sf.db")); + process.env = { ...originalEnv }; +}); + +afterEach(() => { + closeDatabase(); + rmSync(dir, { recursive: true, force: true }); + vi.restoreAllMocks(); + process.env = { ...originalEnv }; +}); + +describe("rankMemoriesByEmbedding (pure)", () => { + test("returns static order unchanged when query vector is null", () => { + const out = rankMemoriesByEmbedding( + [ + { id: "a", staticScore: 1.0 }, + { id: "b", staticScore: 0.5 }, + ], + null, + new Map(), + ); + assert.deepEqual( + out.map((r) => r.id), + ["a", "b"], + ); + }); + + test("returns static order unchanged when embedding map is empty", () => { + const q = Float32Array.from([1, 0, 0]); + const out = rankMemoriesByEmbedding( + [ + { id: "a", staticScore: 1.0 }, + { id: "b", staticScore: 0.5 }, + ], + q, + new Map(), + ); + assert.deepEqual( + out.map((r) => r.id), + ["a", "b"], + ); + }); + + test("promotes semantically aligned memory above lower-similarity higher-static", () => { + const q = Float32Array.from([1, 0, 0]); + const map = new Map([ + // "a" is static-strong but orthogonal to query + ["a", Float32Array.from([0, 1, 0])], + // "b" is static-weak but aligned with query + ["b", Float32Array.from([1, 0, 0])], + ]); + const out = rankMemoriesByEmbedding( + [ + { id: "a", staticScore: 1.0 }, + { id: "b", staticScore: 0.7 }, + ], + q, + map, + { semanticWeight: 0.6 }, + ); + // b: 0.7 * (1 + 0.6 * 1.0) = 1.12 + // a: 1.0 * (1 + 0.6 * 0.0) = 1.00 + assert.equal(out[0].id, "b"); + assert.equal(out[1].id, "a"); + }); + + test("memory without an embedding row falls back to pure static score", () => { + const q = Float32Array.from([1, 0, 0]); + const map = new Map([ + ["a", Float32Array.from([1, 0, 0])], + ]); + const out = rankMemoriesByEmbedding( + [ + { id: "a", staticScore: 0.5 }, + { id: "b", staticScore: 0.4 }, + ], + q, + map, + { semanticWeight: 0.6 }, + ); + // a: 0.5 * (1 + 0.6 * 1.0) = 0.80 + // b: 0.4 * (1 + 0) = 0.40 + assert.equal(out[0].id, "a"); + assert.equal(out[0].cosine, 1); + assert.equal(out[1].id, "b"); + assert.equal(out[1].cosine, 0); + }); +}); + +describe("loadEmbeddingMap", () => { + test("returns vectors keyed by memoryId for active memories", () => { + const a = createMemory({ category: "architecture", content: "alpha" }); + const b = createMemory({ category: "architecture", content: "beta" }); + assert.ok(a && b); + saveEmbedding(a, Float32Array.from([1, 2, 3]), "test-model"); + saveEmbedding(b, Float32Array.from([4, 5, 6]), "test-model"); + const map = loadEmbeddingMap(); + assert.equal(map.size, 2); + assert.deepEqual(Array.from(map.get(a)!), [1, 2, 3]); + assert.deepEqual(Array.from(map.get(b)!), [4, 5, 6]); + }); +}); + +describe("getRelevantMemoriesRanked (async, mocked gateway)", () => { + test("falls back to static ranking when SF_LLM_GATEWAY_KEY unset", async () => { + delete process.env.SF_LLM_GATEWAY_KEY; + createMemory({ category: "architecture", content: "alpha" }); + createMemory({ category: "architecture", content: "beta" }); + const out = await getRelevantMemoriesRanked("anything", 10); + assert.equal(out.length, 2); + const fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); + // Re-run to confirm no fetch happens on subsequent calls either. + await getRelevantMemoriesRanked("anything", 10); + assert.equal(fetchMock.mock.calls.length, 0); + }); + + test("falls back to static ranking when query is empty", async () => { + process.env.SF_LLM_GATEWAY_KEY = "x"; + const a = createMemory({ category: "architecture", content: "alpha" }); + assert.ok(a); + const fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); + const out = await getRelevantMemoriesRanked("", 10); + assert.equal(out.length, 1); + assert.equal(fetchMock.mock.calls.length, 0); + }); + + test("uses gateway embedding to promote relevant memory", async () => { + process.env.SF_LLM_GATEWAY_KEY = "x"; + process.env.SF_LLM_GATEWAY_URL = "https://gateway.test/v1"; + const a = createMemory({ + category: "architecture", + content: "completely unrelated topic", + }); + const b = createMemory({ + category: "architecture", + content: "task plan format", + }); + assert.ok(a && b); + // Static order: both 0.8 confidence, hit_count 0 → tie. DB insert order + // breaks the tie (a first). Pre-seed embeddings: + // a is orthogonal to query (cosine 0); b is aligned (cosine 1). + saveEmbedding(a, Float32Array.from([0, 1, 0]), "test-model"); + saveEmbedding(b, Float32Array.from([1, 0, 0]), "test-model"); + + // Mock gateway → returns query vector aligned with b. + vi.stubGlobal( + "fetch", + vi.fn(async () => + new Response( + JSON.stringify({ + object: "list", + data: [ + { + object: "embedding", + index: 0, + embedding: [1, 0, 0], + }, + ], + }), + { status: 200 }, + ), + ), + ); + + const out = await getRelevantMemoriesRanked("plan format query", 10); + assert.equal(out.length, 2); + assert.equal(out[0].id, b, "semantically relevant memory must rank first"); + assert.equal(out[1].id, a); + }); + + test("falls back to static ranking when gateway fails", async () => { + process.env.SF_LLM_GATEWAY_KEY = "x"; + process.env.SF_LLM_GATEWAY_URL = "https://gateway.test/v1"; + const a = createMemory({ category: "architecture", content: "alpha" }); + assert.ok(a); + vi.stubGlobal( + "fetch", + vi.fn(async () => new Response("boom", { status: 500 })), + ); + const out = await getRelevantMemoriesRanked("query", 10); + assert.equal(out.length, 1); + }); + + test("static ranking still works with no embeddings table populated", async () => { + const fallbackOrder = getActiveMemoriesRanked(10).length; + assert.equal(fallbackOrder, 0); // pre-condition: empty + const a = createMemory({ + category: "architecture", + content: "high-confidence", + confidence: 0.95, + }); + const b = createMemory({ + category: "architecture", + content: "low-confidence", + confidence: 0.5, + }); + assert.ok(a && b); + const out = await getRelevantMemoriesRanked("anything", 10); + assert.equal(out[0].id, a, "high-confidence memory ranks first by static score"); + }); +});