feat(sf): query-aware memory ranking — embeddings now actually matter

Previous commit populated memory_embeddings rows but no consumer read
them — the read path (getActiveMemoriesRanked) used pure static score
(confidence × hit_count). Embeddings were silent.

This wires the read side:
- rankMemoriesByEmbedding (pure, in memory-embeddings.ts) blends static
  score with cosine similarity: combined = static * (1 + α * cosine).
  Defaults α=0.6 — a perfect-static + zero-similarity hit ties roughly
  with a low-static + perfect-similarity hit, so semantically relevant
  cold memories can surface above stale-but-popular ones.
- embedQueryViaGateway + loadEmbeddingMap — supporting helpers.
- getRelevantMemoriesRanked (memory-store.ts) — async query-aware ranker.
  Oversamples the static pool 5×, embeds the query, blends, returns top-K.
  Falls back cleanly to static ranking when:
    - query empty
    - no SF_LLM_GATEWAY_KEY (gateway not configured)
    - gateway request fails (500/network)
    - no embeddings exist yet (fresh DB / worker offline)
- auto-prompts.ts: execute-task injection now uses sliceTitle + taskTitle
  as the query so memories relevant to the current work surface first.

10 new tests lock the contract — pure ranker math, fallback chain, and
the gateway-mocked promotion case.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-02 22:18:45 +02:00
parent 56ee89a946
commit eb5f7ef7b6
4 changed files with 367 additions and 2 deletions

View file

@ -37,6 +37,7 @@ import { inlineGraphSubgraph } from "./graph-context.js";
import {
formatMemoriesForPrompt,
getActiveMemoriesRanked,
getRelevantMemoriesRanked,
} from "./memory-store.js";
import { parseRoadmap } from "./parsers.js";
import {
@ -2479,9 +2480,17 @@ export async function buildExecuteTaskPrompt(
{ pending: new Set(etPending.map((g) => g.gate_id)), allowOmit: true },
);
const memoriesSection = (() => {
// Query-aware memory ranking: build a short query from the active task
// context so embeddings can promote semantically-relevant memories above
// the cold static-rank top. Falls back to pure static ranking when no
// gateway is configured or no embeddings exist yet — see
// getRelevantMemoriesRanked for the fallback chain.
const memoryQuery = `${sTitle} ${tTitle}`.trim();
const memoriesSection = await (async () => {
try {
const memories = getActiveMemoriesRanked(10);
const memories = memoryQuery
? await getRelevantMemoriesRanked(memoryQuery, 10)
: getActiveMemoriesRanked(10);
if (memories.length === 0) return "## Project Memories\n(none yet)";
return `## Project Memories\n${formatMemoriesForPrompt(memories)}`;
} catch {

View file

@ -234,6 +234,73 @@ export async function embedMemories(
}
}
// ─── Query-aware ranking ──────────────────────────────────────────────────
/** Combine static rank (confidence × hit-count boost) with semantic similarity
* to the embedded query. When no embedding rows or no query embedding are
* available, returns the input list unchanged so callers can rely on the
* static order as a fallback. */
export function rankMemoriesByEmbedding(
memories: Array<{ id: string; staticScore: number }>,
queryVector: Float32Array | null,
memoryEmbeddings: Map<string, Float32Array>,
options?: { semanticWeight?: number },
): Array<{ id: string; combinedScore: number; cosine: number }> {
const alpha = options?.semanticWeight ?? 0.6;
if (!queryVector || memoryEmbeddings.size === 0) {
return memories.map((m) => ({
id: m.id,
combinedScore: m.staticScore,
cosine: 0,
}));
}
return memories
.map((m) => {
const vec = memoryEmbeddings.get(m.id);
const cosine = vec ? cosineSimilarity(queryVector, vec) : 0;
// Static score in [0, ~1.5+]; cosine in [-1, 1] but typically [0, 1].
// Blend so a perfect static + perfect cosine ≈ 2× a static-only top hit,
// and a memory with no embedding at all still ranks by static score.
const combinedScore = m.staticScore * (1 + alpha * cosine);
return { id: m.id, combinedScore, cosine };
})
.sort((a, b) => b.combinedScore - a.combinedScore);
}
/** Embed `query` via the configured gateway and return its Float32Array, or
* null when no gateway is configured / the embed call fails. Best-effort. */
export async function embedQueryViaGateway(
query: string,
): Promise<Float32Array | null> {
if (!query.trim()) return null;
try {
const { loadGatewayConfigFromEnv, createGatewayEmbedFn } = await import(
"./memory-embeddings-llm-gateway.js"
);
const cfg = loadGatewayConfigFromEnv();
if (!cfg) return null;
const embedFn = createGatewayEmbedFn(cfg);
const vectors = await embedFn([query]);
return vectors[0] ?? null;
} catch (err) {
logWarning(
"memory-embeddings",
`query embed failed: ${(err as Error).message}`,
);
return null;
}
}
/** Map of memoryId vector for all active memories that have embeddings.
* Used by the ranker to look up vectors without N+1 queries. */
export function loadEmbeddingMap(): Map<string, Float32Array> {
const map = new Map<string, Float32Array>();
for (const row of loadAllEmbeddings()) {
map.set(row.memoryId, row.vector);
}
return map;
}
// ─── Auto-engagement / backfill driver ────────────────────────────────────
/** Find active memories (not superseded) that don't yet have an embedding row.

View file

@ -138,6 +138,58 @@ export function getActiveMemoriesRanked(limit = 30): Memory[] {
}
}
/**
* Query-aware ranking: when an embedding gateway is configured and at least
* some memories have vectors, rerank the top static-pool by combining the
* static score with cosine similarity to the embedded query. Falls back
* cleanly to pure static ranking when:
* - query is empty
* - no SF_LLM_GATEWAY_KEY (or gateway unreachable)
* - no memories have vectors yet (fresh DB or worker offline)
*
* The pool oversample (`limit * 5` capped at 50) ensures the embedding
* rerank can promote a relevant-but-static-cold memory into the top-K.
*/
export async function getRelevantMemoriesRanked(
query: string,
limit = 10,
): Promise<Memory[]> {
if (!isDbAvailable()) return [];
const pool = getActiveMemoriesRanked(Math.min(50, limit * 5));
if (pool.length === 0 || !query.trim()) {
return pool.slice(0, limit);
}
try {
const { embedQueryViaGateway, loadEmbeddingMap, rankMemoriesByEmbedding } =
await import("./memory-embeddings.js");
const [queryVec, embeddingMap] = await Promise.all([
embedQueryViaGateway(query),
Promise.resolve(loadEmbeddingMap()),
]);
if (!queryVec || embeddingMap.size === 0) {
return pool.slice(0, limit);
}
const ranked = rankMemoriesByEmbedding(
pool.map((m) => ({
id: m.id,
staticScore: m.confidence * (1 + m.hit_count * 0.1),
})),
queryVec,
embeddingMap,
);
const byId = new Map(pool.map((m) => [m.id, m]));
const out: Memory[] = [];
for (const r of ranked) {
const mem = byId.get(r.id);
if (mem) out.push(mem);
if (out.length >= limit) break;
}
return out;
} catch {
return pool.slice(0, limit);
}
}
/**
* Generate the next memory ID: MEM + zero-padded 3-digit from MAX(seq).
* Returns MEM001 if no memories exist.

View file

@ -0,0 +1,237 @@
/**
* Query-aware memory ranking combines static rank with embedding cosine
* similarity. Tests the pure ranker (no I/O) and the end-to-end async path
* with a mocked gateway.
*
* The contract being locked here:
* - empty query / no gateway / no embeddings static order preserved
* - query + gateway + embeddings semantically relevant memory promoted
* even when its static score is lower
*/
import assert from "node:assert/strict";
import { mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, beforeEach, describe, test, vi } from "vitest";
import {
loadEmbeddingMap,
rankMemoriesByEmbedding,
saveEmbedding,
} from "../memory-embeddings.ts";
import { closeDatabase, openDatabase } from "../sf-db.ts";
import {
createMemory,
getActiveMemoriesRanked,
getRelevantMemoriesRanked,
} from "../memory-store.ts";
let dir: string;
const originalEnv = { ...process.env };
beforeEach(() => {
dir = mkdtempSync(join(tmpdir(), "sf-memory-rank-"));
openDatabase(join(dir, "sf.db"));
process.env = { ...originalEnv };
});
afterEach(() => {
closeDatabase();
rmSync(dir, { recursive: true, force: true });
vi.restoreAllMocks();
process.env = { ...originalEnv };
});
describe("rankMemoriesByEmbedding (pure)", () => {
test("returns static order unchanged when query vector is null", () => {
const out = rankMemoriesByEmbedding(
[
{ id: "a", staticScore: 1.0 },
{ id: "b", staticScore: 0.5 },
],
null,
new Map(),
);
assert.deepEqual(
out.map((r) => r.id),
["a", "b"],
);
});
test("returns static order unchanged when embedding map is empty", () => {
const q = Float32Array.from([1, 0, 0]);
const out = rankMemoriesByEmbedding(
[
{ id: "a", staticScore: 1.0 },
{ id: "b", staticScore: 0.5 },
],
q,
new Map(),
);
assert.deepEqual(
out.map((r) => r.id),
["a", "b"],
);
});
test("promotes semantically aligned memory above lower-similarity higher-static", () => {
const q = Float32Array.from([1, 0, 0]);
const map = new Map<string, Float32Array>([
// "a" is static-strong but orthogonal to query
["a", Float32Array.from([0, 1, 0])],
// "b" is static-weak but aligned with query
["b", Float32Array.from([1, 0, 0])],
]);
const out = rankMemoriesByEmbedding(
[
{ id: "a", staticScore: 1.0 },
{ id: "b", staticScore: 0.7 },
],
q,
map,
{ semanticWeight: 0.6 },
);
// b: 0.7 * (1 + 0.6 * 1.0) = 1.12
// a: 1.0 * (1 + 0.6 * 0.0) = 1.00
assert.equal(out[0].id, "b");
assert.equal(out[1].id, "a");
});
test("memory without an embedding row falls back to pure static score", () => {
const q = Float32Array.from([1, 0, 0]);
const map = new Map<string, Float32Array>([
["a", Float32Array.from([1, 0, 0])],
]);
const out = rankMemoriesByEmbedding(
[
{ id: "a", staticScore: 0.5 },
{ id: "b", staticScore: 0.4 },
],
q,
map,
{ semanticWeight: 0.6 },
);
// a: 0.5 * (1 + 0.6 * 1.0) = 0.80
// b: 0.4 * (1 + 0) = 0.40
assert.equal(out[0].id, "a");
assert.equal(out[0].cosine, 1);
assert.equal(out[1].id, "b");
assert.equal(out[1].cosine, 0);
});
});
describe("loadEmbeddingMap", () => {
test("returns vectors keyed by memoryId for active memories", () => {
const a = createMemory({ category: "architecture", content: "alpha" });
const b = createMemory({ category: "architecture", content: "beta" });
assert.ok(a && b);
saveEmbedding(a, Float32Array.from([1, 2, 3]), "test-model");
saveEmbedding(b, Float32Array.from([4, 5, 6]), "test-model");
const map = loadEmbeddingMap();
assert.equal(map.size, 2);
assert.deepEqual(Array.from(map.get(a)!), [1, 2, 3]);
assert.deepEqual(Array.from(map.get(b)!), [4, 5, 6]);
});
});
describe("getRelevantMemoriesRanked (async, mocked gateway)", () => {
test("falls back to static ranking when SF_LLM_GATEWAY_KEY unset", async () => {
delete process.env.SF_LLM_GATEWAY_KEY;
createMemory({ category: "architecture", content: "alpha" });
createMemory({ category: "architecture", content: "beta" });
const out = await getRelevantMemoriesRanked("anything", 10);
assert.equal(out.length, 2);
const fetchMock = vi.fn();
vi.stubGlobal("fetch", fetchMock);
// Re-run to confirm no fetch happens on subsequent calls either.
await getRelevantMemoriesRanked("anything", 10);
assert.equal(fetchMock.mock.calls.length, 0);
});
test("falls back to static ranking when query is empty", async () => {
process.env.SF_LLM_GATEWAY_KEY = "x";
const a = createMemory({ category: "architecture", content: "alpha" });
assert.ok(a);
const fetchMock = vi.fn();
vi.stubGlobal("fetch", fetchMock);
const out = await getRelevantMemoriesRanked("", 10);
assert.equal(out.length, 1);
assert.equal(fetchMock.mock.calls.length, 0);
});
test("uses gateway embedding to promote relevant memory", async () => {
process.env.SF_LLM_GATEWAY_KEY = "x";
process.env.SF_LLM_GATEWAY_URL = "https://gateway.test/v1";
const a = createMemory({
category: "architecture",
content: "completely unrelated topic",
});
const b = createMemory({
category: "architecture",
content: "task plan format",
});
assert.ok(a && b);
// Static order: both 0.8 confidence, hit_count 0 → tie. DB insert order
// breaks the tie (a first). Pre-seed embeddings:
// a is orthogonal to query (cosine 0); b is aligned (cosine 1).
saveEmbedding(a, Float32Array.from([0, 1, 0]), "test-model");
saveEmbedding(b, Float32Array.from([1, 0, 0]), "test-model");
// Mock gateway → returns query vector aligned with b.
vi.stubGlobal(
"fetch",
vi.fn(async () =>
new Response(
JSON.stringify({
object: "list",
data: [
{
object: "embedding",
index: 0,
embedding: [1, 0, 0],
},
],
}),
{ status: 200 },
),
),
);
const out = await getRelevantMemoriesRanked("plan format query", 10);
assert.equal(out.length, 2);
assert.equal(out[0].id, b, "semantically relevant memory must rank first");
assert.equal(out[1].id, a);
});
test("falls back to static ranking when gateway fails", async () => {
process.env.SF_LLM_GATEWAY_KEY = "x";
process.env.SF_LLM_GATEWAY_URL = "https://gateway.test/v1";
const a = createMemory({ category: "architecture", content: "alpha" });
assert.ok(a);
vi.stubGlobal(
"fetch",
vi.fn(async () => new Response("boom", { status: 500 })),
);
const out = await getRelevantMemoriesRanked("query", 10);
assert.equal(out.length, 1);
});
test("static ranking still works with no embeddings table populated", async () => {
const fallbackOrder = getActiveMemoriesRanked(10).length;
assert.equal(fallbackOrder, 0); // pre-condition: empty
const a = createMemory({
category: "architecture",
content: "high-confidence",
confidence: 0.95,
});
const b = createMemory({
category: "architecture",
content: "low-confidence",
confidence: 0.5,
});
assert.ok(a && b);
const out = await getRelevantMemoriesRanked("anything", 10);
assert.equal(out[0].id, a, "high-confidence memory ranks first by static score");
});
});