feat(sf): query-aware memory ranking — embeddings now actually matter
Previous commit populated memory_embeddings rows but no consumer read
them — the read path (getActiveMemoriesRanked) used pure static score
(confidence × hit_count). Embeddings were silent.
This wires the read side:
- rankMemoriesByEmbedding (pure, in memory-embeddings.ts) blends static
score with cosine similarity: combined = static * (1 + α * cosine).
Defaults α=0.6 — a perfect-static + zero-similarity hit ties roughly
with a low-static + perfect-similarity hit, so semantically relevant
cold memories can surface above stale-but-popular ones.
- embedQueryViaGateway + loadEmbeddingMap — supporting helpers.
- getRelevantMemoriesRanked (memory-store.ts) — async query-aware ranker.
Oversamples the static pool 5×, embeds the query, blends, returns top-K.
Falls back cleanly to static ranking when:
- query empty
- no SF_LLM_GATEWAY_KEY (gateway not configured)
- gateway request fails (500/network)
- no embeddings exist yet (fresh DB / worker offline)
- auto-prompts.ts: execute-task injection now uses sliceTitle + taskTitle
as the query so memories relevant to the current work surface first.
10 new tests lock the contract — pure ranker math, fallback chain, and
the gateway-mocked promotion case.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
56ee89a946
commit
eb5f7ef7b6
4 changed files with 367 additions and 2 deletions
|
|
@ -37,6 +37,7 @@ import { inlineGraphSubgraph } from "./graph-context.js";
|
|||
import {
|
||||
formatMemoriesForPrompt,
|
||||
getActiveMemoriesRanked,
|
||||
getRelevantMemoriesRanked,
|
||||
} from "./memory-store.js";
|
||||
import { parseRoadmap } from "./parsers.js";
|
||||
import {
|
||||
|
|
@ -2479,9 +2480,17 @@ export async function buildExecuteTaskPrompt(
|
|||
{ pending: new Set(etPending.map((g) => g.gate_id)), allowOmit: true },
|
||||
);
|
||||
|
||||
const memoriesSection = (() => {
|
||||
// Query-aware memory ranking: build a short query from the active task
|
||||
// context so embeddings can promote semantically-relevant memories above
|
||||
// the cold static-rank top. Falls back to pure static ranking when no
|
||||
// gateway is configured or no embeddings exist yet — see
|
||||
// getRelevantMemoriesRanked for the fallback chain.
|
||||
const memoryQuery = `${sTitle} ${tTitle}`.trim();
|
||||
const memoriesSection = await (async () => {
|
||||
try {
|
||||
const memories = getActiveMemoriesRanked(10);
|
||||
const memories = memoryQuery
|
||||
? await getRelevantMemoriesRanked(memoryQuery, 10)
|
||||
: getActiveMemoriesRanked(10);
|
||||
if (memories.length === 0) return "## Project Memories\n(none yet)";
|
||||
return `## Project Memories\n${formatMemoriesForPrompt(memories)}`;
|
||||
} catch {
|
||||
|
|
|
|||
|
|
@ -234,6 +234,73 @@ export async function embedMemories(
|
|||
}
|
||||
}
|
||||
|
||||
// ─── Query-aware ranking ──────────────────────────────────────────────────
|
||||
|
||||
/** Combine static rank (confidence × hit-count boost) with semantic similarity
|
||||
* to the embedded query. When no embedding rows or no query embedding are
|
||||
* available, returns the input list unchanged so callers can rely on the
|
||||
* static order as a fallback. */
|
||||
export function rankMemoriesByEmbedding(
|
||||
memories: Array<{ id: string; staticScore: number }>,
|
||||
queryVector: Float32Array | null,
|
||||
memoryEmbeddings: Map<string, Float32Array>,
|
||||
options?: { semanticWeight?: number },
|
||||
): Array<{ id: string; combinedScore: number; cosine: number }> {
|
||||
const alpha = options?.semanticWeight ?? 0.6;
|
||||
if (!queryVector || memoryEmbeddings.size === 0) {
|
||||
return memories.map((m) => ({
|
||||
id: m.id,
|
||||
combinedScore: m.staticScore,
|
||||
cosine: 0,
|
||||
}));
|
||||
}
|
||||
return memories
|
||||
.map((m) => {
|
||||
const vec = memoryEmbeddings.get(m.id);
|
||||
const cosine = vec ? cosineSimilarity(queryVector, vec) : 0;
|
||||
// Static score in [0, ~1.5+]; cosine in [-1, 1] but typically [0, 1].
|
||||
// Blend so a perfect static + perfect cosine ≈ 2× a static-only top hit,
|
||||
// and a memory with no embedding at all still ranks by static score.
|
||||
const combinedScore = m.staticScore * (1 + alpha * cosine);
|
||||
return { id: m.id, combinedScore, cosine };
|
||||
})
|
||||
.sort((a, b) => b.combinedScore - a.combinedScore);
|
||||
}
|
||||
|
||||
/** Embed `query` via the configured gateway and return its Float32Array, or
|
||||
* null when no gateway is configured / the embed call fails. Best-effort. */
|
||||
export async function embedQueryViaGateway(
|
||||
query: string,
|
||||
): Promise<Float32Array | null> {
|
||||
if (!query.trim()) return null;
|
||||
try {
|
||||
const { loadGatewayConfigFromEnv, createGatewayEmbedFn } = await import(
|
||||
"./memory-embeddings-llm-gateway.js"
|
||||
);
|
||||
const cfg = loadGatewayConfigFromEnv();
|
||||
if (!cfg) return null;
|
||||
const embedFn = createGatewayEmbedFn(cfg);
|
||||
const vectors = await embedFn([query]);
|
||||
return vectors[0] ?? null;
|
||||
} catch (err) {
|
||||
logWarning(
|
||||
"memory-embeddings",
|
||||
`query embed failed: ${(err as Error).message}`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Map of memoryId → vector for all active memories that have embeddings.
|
||||
* Used by the ranker to look up vectors without N+1 queries. */
|
||||
export function loadEmbeddingMap(): Map<string, Float32Array> {
|
||||
const map = new Map<string, Float32Array>();
|
||||
for (const row of loadAllEmbeddings()) {
|
||||
map.set(row.memoryId, row.vector);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
// ─── Auto-engagement / backfill driver ────────────────────────────────────
|
||||
|
||||
/** Find active memories (not superseded) that don't yet have an embedding row.
|
||||
|
|
|
|||
|
|
@ -138,6 +138,58 @@ export function getActiveMemoriesRanked(limit = 30): Memory[] {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Query-aware ranking: when an embedding gateway is configured and at least
|
||||
* some memories have vectors, rerank the top static-pool by combining the
|
||||
* static score with cosine similarity to the embedded query. Falls back
|
||||
* cleanly to pure static ranking when:
|
||||
* - query is empty
|
||||
* - no SF_LLM_GATEWAY_KEY (or gateway unreachable)
|
||||
* - no memories have vectors yet (fresh DB or worker offline)
|
||||
*
|
||||
* The pool oversample (`limit * 5` capped at 50) ensures the embedding
|
||||
* rerank can promote a relevant-but-static-cold memory into the top-K.
|
||||
*/
|
||||
export async function getRelevantMemoriesRanked(
|
||||
query: string,
|
||||
limit = 10,
|
||||
): Promise<Memory[]> {
|
||||
if (!isDbAvailable()) return [];
|
||||
const pool = getActiveMemoriesRanked(Math.min(50, limit * 5));
|
||||
if (pool.length === 0 || !query.trim()) {
|
||||
return pool.slice(0, limit);
|
||||
}
|
||||
try {
|
||||
const { embedQueryViaGateway, loadEmbeddingMap, rankMemoriesByEmbedding } =
|
||||
await import("./memory-embeddings.js");
|
||||
const [queryVec, embeddingMap] = await Promise.all([
|
||||
embedQueryViaGateway(query),
|
||||
Promise.resolve(loadEmbeddingMap()),
|
||||
]);
|
||||
if (!queryVec || embeddingMap.size === 0) {
|
||||
return pool.slice(0, limit);
|
||||
}
|
||||
const ranked = rankMemoriesByEmbedding(
|
||||
pool.map((m) => ({
|
||||
id: m.id,
|
||||
staticScore: m.confidence * (1 + m.hit_count * 0.1),
|
||||
})),
|
||||
queryVec,
|
||||
embeddingMap,
|
||||
);
|
||||
const byId = new Map(pool.map((m) => [m.id, m]));
|
||||
const out: Memory[] = [];
|
||||
for (const r of ranked) {
|
||||
const mem = byId.get(r.id);
|
||||
if (mem) out.push(mem);
|
||||
if (out.length >= limit) break;
|
||||
}
|
||||
return out;
|
||||
} catch {
|
||||
return pool.slice(0, limit);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the next memory ID: MEM + zero-padded 3-digit from MAX(seq).
|
||||
* Returns MEM001 if no memories exist.
|
||||
|
|
|
|||
237
src/resources/extensions/sf/tests/memory-query-ranking.test.ts
Normal file
237
src/resources/extensions/sf/tests/memory-query-ranking.test.ts
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
/**
|
||||
* Query-aware memory ranking — combines static rank with embedding cosine
|
||||
* similarity. Tests the pure ranker (no I/O) and the end-to-end async path
|
||||
* with a mocked gateway.
|
||||
*
|
||||
* The contract being locked here:
|
||||
* - empty query / no gateway / no embeddings → static order preserved
|
||||
* - query + gateway + embeddings → semantically relevant memory promoted
|
||||
* even when its static score is lower
|
||||
*/
|
||||
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdtempSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, beforeEach, describe, test, vi } from "vitest";
|
||||
|
||||
import {
|
||||
loadEmbeddingMap,
|
||||
rankMemoriesByEmbedding,
|
||||
saveEmbedding,
|
||||
} from "../memory-embeddings.ts";
|
||||
import { closeDatabase, openDatabase } from "../sf-db.ts";
|
||||
import {
|
||||
createMemory,
|
||||
getActiveMemoriesRanked,
|
||||
getRelevantMemoriesRanked,
|
||||
} from "../memory-store.ts";
|
||||
|
||||
let dir: string;
|
||||
const originalEnv = { ...process.env };
|
||||
|
||||
beforeEach(() => {
|
||||
dir = mkdtempSync(join(tmpdir(), "sf-memory-rank-"));
|
||||
openDatabase(join(dir, "sf.db"));
|
||||
process.env = { ...originalEnv };
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
closeDatabase();
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
vi.restoreAllMocks();
|
||||
process.env = { ...originalEnv };
|
||||
});
|
||||
|
||||
describe("rankMemoriesByEmbedding (pure)", () => {
|
||||
test("returns static order unchanged when query vector is null", () => {
|
||||
const out = rankMemoriesByEmbedding(
|
||||
[
|
||||
{ id: "a", staticScore: 1.0 },
|
||||
{ id: "b", staticScore: 0.5 },
|
||||
],
|
||||
null,
|
||||
new Map(),
|
||||
);
|
||||
assert.deepEqual(
|
||||
out.map((r) => r.id),
|
||||
["a", "b"],
|
||||
);
|
||||
});
|
||||
|
||||
test("returns static order unchanged when embedding map is empty", () => {
|
||||
const q = Float32Array.from([1, 0, 0]);
|
||||
const out = rankMemoriesByEmbedding(
|
||||
[
|
||||
{ id: "a", staticScore: 1.0 },
|
||||
{ id: "b", staticScore: 0.5 },
|
||||
],
|
||||
q,
|
||||
new Map(),
|
||||
);
|
||||
assert.deepEqual(
|
||||
out.map((r) => r.id),
|
||||
["a", "b"],
|
||||
);
|
||||
});
|
||||
|
||||
test("promotes semantically aligned memory above lower-similarity higher-static", () => {
|
||||
const q = Float32Array.from([1, 0, 0]);
|
||||
const map = new Map<string, Float32Array>([
|
||||
// "a" is static-strong but orthogonal to query
|
||||
["a", Float32Array.from([0, 1, 0])],
|
||||
// "b" is static-weak but aligned with query
|
||||
["b", Float32Array.from([1, 0, 0])],
|
||||
]);
|
||||
const out = rankMemoriesByEmbedding(
|
||||
[
|
||||
{ id: "a", staticScore: 1.0 },
|
||||
{ id: "b", staticScore: 0.7 },
|
||||
],
|
||||
q,
|
||||
map,
|
||||
{ semanticWeight: 0.6 },
|
||||
);
|
||||
// b: 0.7 * (1 + 0.6 * 1.0) = 1.12
|
||||
// a: 1.0 * (1 + 0.6 * 0.0) = 1.00
|
||||
assert.equal(out[0].id, "b");
|
||||
assert.equal(out[1].id, "a");
|
||||
});
|
||||
|
||||
test("memory without an embedding row falls back to pure static score", () => {
|
||||
const q = Float32Array.from([1, 0, 0]);
|
||||
const map = new Map<string, Float32Array>([
|
||||
["a", Float32Array.from([1, 0, 0])],
|
||||
]);
|
||||
const out = rankMemoriesByEmbedding(
|
||||
[
|
||||
{ id: "a", staticScore: 0.5 },
|
||||
{ id: "b", staticScore: 0.4 },
|
||||
],
|
||||
q,
|
||||
map,
|
||||
{ semanticWeight: 0.6 },
|
||||
);
|
||||
// a: 0.5 * (1 + 0.6 * 1.0) = 0.80
|
||||
// b: 0.4 * (1 + 0) = 0.40
|
||||
assert.equal(out[0].id, "a");
|
||||
assert.equal(out[0].cosine, 1);
|
||||
assert.equal(out[1].id, "b");
|
||||
assert.equal(out[1].cosine, 0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("loadEmbeddingMap", () => {
|
||||
test("returns vectors keyed by memoryId for active memories", () => {
|
||||
const a = createMemory({ category: "architecture", content: "alpha" });
|
||||
const b = createMemory({ category: "architecture", content: "beta" });
|
||||
assert.ok(a && b);
|
||||
saveEmbedding(a, Float32Array.from([1, 2, 3]), "test-model");
|
||||
saveEmbedding(b, Float32Array.from([4, 5, 6]), "test-model");
|
||||
const map = loadEmbeddingMap();
|
||||
assert.equal(map.size, 2);
|
||||
assert.deepEqual(Array.from(map.get(a)!), [1, 2, 3]);
|
||||
assert.deepEqual(Array.from(map.get(b)!), [4, 5, 6]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getRelevantMemoriesRanked (async, mocked gateway)", () => {
|
||||
test("falls back to static ranking when SF_LLM_GATEWAY_KEY unset", async () => {
|
||||
delete process.env.SF_LLM_GATEWAY_KEY;
|
||||
createMemory({ category: "architecture", content: "alpha" });
|
||||
createMemory({ category: "architecture", content: "beta" });
|
||||
const out = await getRelevantMemoriesRanked("anything", 10);
|
||||
assert.equal(out.length, 2);
|
||||
const fetchMock = vi.fn();
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
// Re-run to confirm no fetch happens on subsequent calls either.
|
||||
await getRelevantMemoriesRanked("anything", 10);
|
||||
assert.equal(fetchMock.mock.calls.length, 0);
|
||||
});
|
||||
|
||||
test("falls back to static ranking when query is empty", async () => {
|
||||
process.env.SF_LLM_GATEWAY_KEY = "x";
|
||||
const a = createMemory({ category: "architecture", content: "alpha" });
|
||||
assert.ok(a);
|
||||
const fetchMock = vi.fn();
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
const out = await getRelevantMemoriesRanked("", 10);
|
||||
assert.equal(out.length, 1);
|
||||
assert.equal(fetchMock.mock.calls.length, 0);
|
||||
});
|
||||
|
||||
test("uses gateway embedding to promote relevant memory", async () => {
|
||||
process.env.SF_LLM_GATEWAY_KEY = "x";
|
||||
process.env.SF_LLM_GATEWAY_URL = "https://gateway.test/v1";
|
||||
const a = createMemory({
|
||||
category: "architecture",
|
||||
content: "completely unrelated topic",
|
||||
});
|
||||
const b = createMemory({
|
||||
category: "architecture",
|
||||
content: "task plan format",
|
||||
});
|
||||
assert.ok(a && b);
|
||||
// Static order: both 0.8 confidence, hit_count 0 → tie. DB insert order
|
||||
// breaks the tie (a first). Pre-seed embeddings:
|
||||
// a is orthogonal to query (cosine 0); b is aligned (cosine 1).
|
||||
saveEmbedding(a, Float32Array.from([0, 1, 0]), "test-model");
|
||||
saveEmbedding(b, Float32Array.from([1, 0, 0]), "test-model");
|
||||
|
||||
// Mock gateway → returns query vector aligned with b.
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn(async () =>
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
object: "list",
|
||||
data: [
|
||||
{
|
||||
object: "embedding",
|
||||
index: 0,
|
||||
embedding: [1, 0, 0],
|
||||
},
|
||||
],
|
||||
}),
|
||||
{ status: 200 },
|
||||
),
|
||||
),
|
||||
);
|
||||
|
||||
const out = await getRelevantMemoriesRanked("plan format query", 10);
|
||||
assert.equal(out.length, 2);
|
||||
assert.equal(out[0].id, b, "semantically relevant memory must rank first");
|
||||
assert.equal(out[1].id, a);
|
||||
});
|
||||
|
||||
test("falls back to static ranking when gateway fails", async () => {
|
||||
process.env.SF_LLM_GATEWAY_KEY = "x";
|
||||
process.env.SF_LLM_GATEWAY_URL = "https://gateway.test/v1";
|
||||
const a = createMemory({ category: "architecture", content: "alpha" });
|
||||
assert.ok(a);
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn(async () => new Response("boom", { status: 500 })),
|
||||
);
|
||||
const out = await getRelevantMemoriesRanked("query", 10);
|
||||
assert.equal(out.length, 1);
|
||||
});
|
||||
|
||||
test("static ranking still works with no embeddings table populated", async () => {
|
||||
const fallbackOrder = getActiveMemoriesRanked(10).length;
|
||||
assert.equal(fallbackOrder, 0); // pre-condition: empty
|
||||
const a = createMemory({
|
||||
category: "architecture",
|
||||
content: "high-confidence",
|
||||
confidence: 0.95,
|
||||
});
|
||||
const b = createMemory({
|
||||
category: "architecture",
|
||||
content: "low-confidence",
|
||||
confidence: 0.5,
|
||||
});
|
||||
assert.ok(a && b);
|
||||
const out = await getRelevantMemoriesRanked("anything", 10);
|
||||
assert.equal(out[0].id, a, "high-confidence memory ranks first by static score");
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue