feat(sf): live embeddings via inference-fabric llm-gateway + auto-backfill

Adds an opt-in embedding path against `https://llm-gateway.centralcloud.com/v1` using qwen/qwen3-embedding-4b. Activated by exporting SF_LLM_GATEWAY_KEY; URL/model overridable via SF_LLM_GATEWAY_URL and SF_LLM_GATEWAY_EMBED_MODEL. Rerank surface present (SF_LLM_GATEWAY_RERANK_MODEL) but degrades to null when no rerank worker is online — current gateway has none, so it stays dormant until one comes up. - memory-embeddings-llm-gateway.ts: createGatewayEmbedFn + rerankCandidates speaking the OpenAI-shaped /v1/embeddings and /v1/rerank protocols. - memory-embeddings.ts: listUnembeddedMemoryIds + runEmbeddingBackfill — best-effort sweep, in-flight-guarded, bounded, throttled "unavailable" log. Wired into agent_end so every turn opportunistically embeds new memories when the gateway is reachable. - sf-db.ts: pre-existing bug fix — memory_embeddings, memory_relations, and memory_sources were referenced everywhere but never CREATE-d in the schema. Adding them as IF NOT EXISTS with proper FK + PK so fresh DBs actually work. - 16 new tests covering env config, embed fn shape, rerank degradation, backfill happy/sad/bounded paths. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 22:13:23 +02:00 · 2026-05-02 22:13:23 +02:00 · 56ee89a946
commit 56ee89a946
parent dd126ddc8b
6 changed files with 673 additions and 1 deletions
--- a/src/resources/extensions/sf/memory-embeddings-llm-gateway.ts
+++ b/src/resources/extensions/sf/memory-embeddings-llm-gateway.ts
@ -0,0 +1,184 @@
 // SF Memory Embeddings — LLM Gateway adapter
 //
 // Speaks the OpenAI-shaped /v1/embeddings and /v1/rerank protocols against
 // a custom inference-fabric llm-gateway endpoint. Returns null when the
 // gateway is not configured (env var unset / unreachable / catalog empty),
 // so the consumer can fall through to keyword-only ranking without
 // surfacing errors.
 //
 // Why a separate module: keeping gateway-specific HTTP, headers, and error
 // shapes out of memory-embeddings.ts (which is provider-agnostic) means the
 // embed-fn discovery surface stays clean and the gateway can be swapped or
 // disabled without touching the consumer.
 import { logWarning } from "./workflow-logger.js";
 import type { EmbedFn } from "./memory-embeddings.js";
 export interface GatewayConfig {
 	/** Base URL for the OpenAI-compatible endpoint, including /v1. */
 	url: string;
 	/** Bearer token. Read from env at the call site, never persisted. */
 	apiKey: string;
 	/** Embedding model id. The current llm-gateway exposes only
 	 *  qwen/qwen3-embedding-4b — other ids will 400. */
 	embeddingModel: string;
 	/** Rerank model id. Optional — when unset or no rerank worker is online,
 	 *  rerank() returns null so callers fall back to keyword + cosine. */
 	rerankModel?: string;
 	/** Per-request timeout in ms. Defaults to 30s — embedding the whole memory
 	 *  table on a slow link can take a while. */
 	timeoutMs?: number;
 }
 const DEFAULT_TIMEOUT_MS = 30_000;
 const ENV_KEY = "SF_LLM_GATEWAY_KEY";
 const ENV_URL = "SF_LLM_GATEWAY_URL";
 const ENV_EMBED_MODEL = "SF_LLM_GATEWAY_EMBED_MODEL";
 const ENV_RERANK_MODEL = "SF_LLM_GATEWAY_RERANK_MODEL";
 /** Read gateway config from env. Returns null when SF_LLM_GATEWAY_KEY is
 *  missing — the gateway path is opt-in and silently absent otherwise. */
 export function loadGatewayConfigFromEnv(): GatewayConfig | null {
 	const apiKey = process.env[ENV_KEY];
 	if (!apiKey) return null;
 	const url = process.env[ENV_URL] ?? "https://llm-gateway.centralcloud.com/v1";
 	const embeddingModel =
 		process.env[ENV_EMBED_MODEL] ?? "qwen/qwen3-embedding-4b";
 	const rerankModel = process.env[ENV_RERANK_MODEL] || undefined;
 	return { url, apiKey, embeddingModel, rerankModel };
 }
 interface EmbeddingsResponse {
 	object: string;
 	data?: Array<{ object: string; index: number; embedding: number[] }>;
 }
 /** Build an EmbedFn that posts to <url>/embeddings with Bearer auth.
 *  Returns Float32Array[] in the same order as the input. Throws on HTTP
 *  errors so the caller (embedMemories) logs and counts as zero. */
 export function createGatewayEmbedFn(config: GatewayConfig): EmbedFn {
 	return async (texts: string[]): Promise<Float32Array[]> => {
 		if (texts.length === 0) return [];
 		const controller = new AbortController();
 		const timeout = setTimeout(
 			() => controller.abort(),
 			config.timeoutMs ?? DEFAULT_TIMEOUT_MS,
 		);
 		try {
 			const res = await fetch(`${config.url}/embeddings`, {
 				method: "POST",
 				headers: {
 					Authorization: `Bearer ${config.apiKey}`,
 					"Content-Type": "application/json",
 				},
 				body: JSON.stringify({
 					model: config.embeddingModel,
 					input: texts,
 				}),
 				signal: controller.signal,
 			});
 			if (!res.ok) {
 				const body = await res.text().catch(() => "");
 				throw new Error(
 					`llm-gateway /embeddings ${res.status}: ${body.slice(0, 200)}`,
 				);
 			}
 			const json = (await res.json()) as EmbeddingsResponse;
 			if (!Array.isArray(json.data)) {
 				throw new Error("llm-gateway /embeddings: missing data array");
 			}
 			// Sort by index to handle out-of-order responses defensively.
 			const sorted = [...json.data].sort((a, b) => a.index - b.index);
 			return sorted.map((d) => Float32Array.from(d.embedding));
 		} finally {
 			clearTimeout(timeout);
 		}
 	};
 }
 export interface RerankCandidate {
 	id: string;
 	text: string;
 }
 export interface RerankScore {
 	id: string;
 	score: number;
 }
 interface RerankResponse {
 	results?: Array<{ index: number; relevance_score: number }>;
 }
 /** Score candidates against a query via <url>/rerank. Returns null when no
 *  rerank model is configured OR the gateway has no rerank worker online —
 *  callers should treat null as "skip the rerank pass". */
 export async function rerankCandidates(
 	config: GatewayConfig,
 	query: string,
 	candidates: RerankCandidate[],
 ): Promise<RerankScore[] | null> {
 	if (!config.rerankModel) return null;
 	if (candidates.length === 0) return [];
 	const controller = new AbortController();
 	const timeout = setTimeout(
 		() => controller.abort(),
 		config.timeoutMs ?? DEFAULT_TIMEOUT_MS,
 	);
 	try {
 		const res = await fetch(`${config.url}/rerank`, {
 			method: "POST",
 			headers: {
 				Authorization: `Bearer ${config.apiKey}`,
 				"Content-Type": "application/json",
 			},
 			body: JSON.stringify({
 				model: config.rerankModel,
 				query,
 				documents: candidates.map((c) => c.text),
 			}),
 			signal: controller.signal,
 		});
 		if (res.status === 503 || res.status === 404) {
 			logWarning(
 				"memory-embeddings",
 				`llm-gateway /rerank unavailable (${res.status}); falling back to non-reranked results`,
 			);
 			return null;
 		}
 		// Read once — the gateway sometimes returns 200 with a plain-text body
 		// like "no worker with rerank capability is available", so we can't
 		// branch on res.ok before peeking at the body.
 		const bodyText = await res.text().catch(() => "");
 		if (/no worker.*rerank/i.test(bodyText)) {
 			logWarning(
 				"memory-embeddings",
 				"llm-gateway /rerank: no worker capability available",
 			);
 			return null;
 		}
 		if (!res.ok) {
 			throw new Error(
 				`llm-gateway /rerank ${res.status}: ${bodyText.slice(0, 200)}`,
 			);
 		}
 		let json: RerankResponse;
 		try {
 			json = JSON.parse(bodyText) as RerankResponse;
 		} catch {
 			throw new Error(
 				`llm-gateway /rerank: malformed JSON response (${bodyText.slice(0, 200)})`,
 			);
 		}
 		if (!Array.isArray(json.results)) {
 			throw new Error("llm-gateway /rerank: missing results array");
 		}
 		return json.results.map((r) => ({
 			id: candidates[r.index]?.id ?? String(r.index),
 			score: r.relevance_score,
 		}));
 	} finally {
 		clearTimeout(timeout);
 	}
 }
--- a/src/resources/extensions/sf/memory-embeddings.ts
+++ b/src/resources/extensions/sf/memory-embeddings.ts
@ -233,3 +233,99 @@ export async function embedMemories(
 		return 0;
 	}
 }
 // ─── Auto-engagement / backfill driver ────────────────────────────────────
 /** Find active memories (not superseded) that don't yet have an embedding row.
 *  Used by the backfill driver to know what to embed next. */
 export function listUnembeddedMemoryIds(limit = 50): Array<{
 	id: string;
 	content: string;
 }> {
 	if (!isDbAvailable()) return [];
 	const adapter = _getAdapter();
 	if (!adapter) return [];
 	try {
 		const rows = adapter
 			.prepare(
 				`SELECT m.id, m.content
         FROM memories m
         LEFT JOIN memory_embeddings e ON e.memory_id = m.id
         WHERE m.superseded_by IS NULL AND e.memory_id IS NULL
         ORDER BY m.seq ASC
         LIMIT :lim`,
 			)
 			.all({ ":lim": limit });
 		return rows.map((r) => ({
 			id: r["id"] as string,
 			content: r["content"] as string,
 		}));
 	} catch {
 		return [];
 	}
 }
 let backfillInFlight = false;
 let lastUnavailableLogAt = 0;
 /** Best-effort embedding backfill. Probes the gateway by attempting to embed
 *  the first unembedded batch — on success, persists vectors and continues
 *  until either the limit is reached or the queue is empty; on failure (no
 *  worker, network error, missing config), logs once-per-minute and returns
 *  zero so callers can keep firing without spam.
 *
 *  Safe to call from a hook on every turn — guarded against re-entry via an
 *  in-flight flag and bounded by `maxPerInvocation`. */
 export async function runEmbeddingBackfill(opts?: {
 	maxPerInvocation?: number;
 	batchSize?: number;
 }): Promise<number> {
 	if (backfillInFlight) return 0;
 	const max = opts?.maxPerInvocation ?? 50;
 	const batchSize = opts?.batchSize ?? 16;
 	const { loadGatewayConfigFromEnv, createGatewayEmbedFn } = await import(
 		"./memory-embeddings-llm-gateway.js"
 	);
 	const cfg = loadGatewayConfigFromEnv();
 	if (!cfg) return 0; // Gateway opt-in; absent config = no-op.
 	backfillInFlight = true;
 	let embedded = 0;
 	try {
 		const embedFn = createGatewayEmbedFn(cfg);
 		while (embedded < max) {
 			const batch = listUnembeddedMemoryIds(
 				Math.min(batchSize, max - embedded),
 			);
 			if (batch.length === 0) break;
 			let count = 0;
 			try {
 				count = await embedMemories(batch, embedFn, cfg.embeddingModel);
 			} catch (err) {
 				// Throttle "unavailable" log to once per minute so we don't spam
 				// a journal when the gateway worker is offline.
 				const now = Date.now();
 				if (now - lastUnavailableLogAt > 60_000) {
 					logWarning(
 						"memory-embeddings",
 						`backfill: gateway embed failed (${(err as Error).message}); will retry next turn`,
 					);
 					lastUnavailableLogAt = now;
 				}
 				return embedded;
 			}
 			if (count === 0) break; // Stop early to avoid loops on transient saves
 			embedded += count;
 		}
 		if (embedded > 0) {
 			logWarning(
 				"memory-embeddings",
 				`backfill: embedded ${embedded} memories via ${cfg.embeddingModel}`,
 			);
 		}
 		return embedded;
 	} finally {
 		backfillInFlight = false;
 	}
 }
--- a/src/resources/extensions/sf/sf-db.ts
+++ b/src/resources/extensions/sf/sf-db.ts
@ -285,6 +285,49 @@ function initSchema(db: DbAdapter, fileBacked: boolean): void {
      )
    `);
 		// memory_embeddings, memory_relations, memory_sources used to be referenced
 		// by helper functions and queries (memory-embeddings.ts, memory-relations.ts,
 		// memory-ingest.ts) without a corresponding CREATE TABLE — any actual write
 		// would have failed with "no such table". Creating them as IF NOT EXISTS so
 		// existing DBs that somehow have them survive, and fresh DBs work.
 		db.exec(`
      CREATE TABLE IF NOT EXISTS memory_embeddings (
        memory_id TEXT PRIMARY KEY,
        model TEXT NOT NULL,
        dim INTEGER NOT NULL,
        vector BLOB NOT NULL,
        updated_at TEXT NOT NULL,
        FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
      )
    `);
 		db.exec(`
      CREATE TABLE IF NOT EXISTS memory_relations (
        from_id TEXT NOT NULL,
        to_id TEXT NOT NULL,
        rel TEXT NOT NULL,
        confidence REAL NOT NULL DEFAULT 0.8,
        created_at TEXT NOT NULL,
        PRIMARY KEY (from_id, to_id, rel),
        FOREIGN KEY (from_id) REFERENCES memories(id) ON DELETE CASCADE,
        FOREIGN KEY (to_id) REFERENCES memories(id) ON DELETE CASCADE
      )
    `);
 		db.exec(`
      CREATE TABLE IF NOT EXISTS memory_sources (
        id TEXT PRIMARY KEY,
        kind TEXT NOT NULL,
        uri TEXT,
        title TEXT,
        content TEXT NOT NULL,
        content_hash TEXT NOT NULL,
        imported_at TEXT NOT NULL,
        scope TEXT NOT NULL DEFAULT 'project',
        tags TEXT NOT NULL DEFAULT '[]'
      )
    `);
 		db.exec(`
      CREATE TABLE IF NOT EXISTS milestones (
        id TEXT PRIMARY KEY,
--- a/src/resources/extensions/sf/tests/memory-embeddings-backfill.test.ts
+++ b/src/resources/extensions/sf/tests/memory-embeddings-backfill.test.ts
@ -0,0 +1,138 @@
 /**
 * Backfill driver — embeds active memories without vectors via the gateway.
 *
 * Verifies the no-op path (no env config), the success path (vectors land in
 * memory_embeddings), and the unavailable path (gateway throws → returns 0,
 * doesn't crash, doesn't double-embed on the next call).
 */
 import assert from "node:assert/strict";
 import { mkdtempSync, rmSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { afterEach, beforeEach, describe, test, vi } from "vitest";
 import {
 	getEmbeddingForMemory,
 	listUnembeddedMemoryIds,
 	runEmbeddingBackfill,
 } from "../memory-embeddings.ts";
 import { closeDatabase, openDatabase } from "../sf-db.ts";
 import { createMemory } from "../memory-store.ts";
 let dir: string;
 const originalEnv = { ...process.env };
 beforeEach(() => {
 	dir = mkdtempSync(join(tmpdir(), "sf-embed-backfill-"));
 	openDatabase(join(dir, "sf.db"));
 	process.env = { ...originalEnv };
 });
 afterEach(() => {
 	closeDatabase();
 	rmSync(dir, { recursive: true, force: true });
 	vi.restoreAllMocks();
 	process.env = { ...originalEnv };
 });
 describe("listUnembeddedMemoryIds", () => {
 	test("returns active memories with no embedding row", () => {
 		const a = createMemory({ category: "architecture", content: "alpha" });
 		const b = createMemory({ category: "architecture", content: "beta" });
 		assert.ok(a && b);
 		const out = listUnembeddedMemoryIds();
 		assert.equal(out.length, 2);
 		assert.deepEqual(out.map((r) => r.id).sort(), [a, b].sort());
 	});
 });
 describe("runEmbeddingBackfill", () => {
 	test("returns 0 silently when SF_LLM_GATEWAY_KEY is unset", async () => {
 		delete process.env.SF_LLM_GATEWAY_KEY;
 		createMemory({ category: "architecture", content: "x" });
 		const fetchMock = vi.fn();
 		vi.stubGlobal("fetch", fetchMock);
 		assert.equal(await runEmbeddingBackfill(), 0);
 		assert.equal(fetchMock.mock.calls.length, 0);
 	});
 	test("embeds unembedded memories and persists vectors when gateway responds", async () => {
 		process.env.SF_LLM_GATEWAY_KEY = "secret";
 		process.env.SF_LLM_GATEWAY_URL = "https://gateway.test/v1";
 		const a = createMemory({ category: "architecture", content: "alpha" });
 		const b = createMemory({ category: "architecture", content: "beta" });
 		assert.ok(a && b);
 		vi.stubGlobal(
 			"fetch",
 			vi.fn(async (_url, init) => {
 				const body = JSON.parse((init as RequestInit).body as string);
 				const data = (body.input as string[]).map(
 					(_text: string, index: number) => ({
 						object: "embedding",
 						index,
 						embedding: [0.1 * (index + 1), 0.2, 0.3],
 					}),
 				);
 				return new Response(
 					JSON.stringify({ object: "list", data }),
 					{ status: 200, headers: { "content-type": "application/json" } },
 				);
 			}),
 		);
 		const embedded = await runEmbeddingBackfill();
 		assert.equal(embedded, 2);
 		const rowA = getEmbeddingForMemory(a);
 		const rowB = getEmbeddingForMemory(b);
 		assert.ok(rowA && rowB);
 		assert.equal(rowA!.dim, 3);
 		assert.equal(rowA!.model, "qwen/qwen3-embedding-4b");
 	});
 	test("returns 0 and doesn't throw when gateway returns 4xx", async () => {
 		process.env.SF_LLM_GATEWAY_KEY = "secret";
 		process.env.SF_LLM_GATEWAY_URL = "https://gateway.test/v1";
 		createMemory({ category: "architecture", content: "x" });
 		vi.stubGlobal(
 			"fetch",
 			vi.fn(async () =>
 				new Response("Unsupported model", { status: 400 }),
 			),
 		);
 		assert.equal(await runEmbeddingBackfill(), 0);
 		// Memory is still un-embedded — backfill will retry later
 		assert.equal(listUnembeddedMemoryIds().length, 1);
 	});
 	test("respects maxPerInvocation", async () => {
 		process.env.SF_LLM_GATEWAY_KEY = "secret";
 		process.env.SF_LLM_GATEWAY_URL = "https://gateway.test/v1";
 		for (let i = 0; i < 5; i++) {
 			createMemory({ category: "architecture", content: `m${i}` });
 		}
 		vi.stubGlobal(
 			"fetch",
 			vi.fn(async (_url, init) => {
 				const body = JSON.parse((init as RequestInit).body as string);
 				const data = (body.input as string[]).map(
 					(_t: string, index: number) => ({
 						object: "embedding",
 						index,
 						embedding: [0.1, 0.2],
 					}),
 				);
 				return new Response(JSON.stringify({ object: "list", data }), {
 					status: 200,
 				});
 			}),
 		);
 		const embedded = await runEmbeddingBackfill({
 			maxPerInvocation: 2,
 			batchSize: 2,
 		});
 		assert.equal(embedded, 2);
 		assert.equal(listUnembeddedMemoryIds().length, 3);
 	});
 });
--- a/src/resources/extensions/sf/tests/memory-embeddings-llm-gateway.test.ts
+++ b/src/resources/extensions/sf/tests/memory-embeddings-llm-gateway.test.ts
@ -0,0 +1,211 @@
 /**
 * llm-gateway adapter tests — mocked fetch.
 *
 * Live integration is gated behind INTEGRATION=1 + SF_LLM_GATEWAY_KEY:
 *   INTEGRATION=1 SF_LLM_GATEWAY_KEY=... npx vitest run \
 *     src/resources/extensions/sf/tests/memory-embeddings-llm-gateway.test.ts
 */
 import assert from "node:assert/strict";
 import { afterEach, beforeEach, describe, test, vi } from "vitest";
 import {
 	createGatewayEmbedFn,
 	loadGatewayConfigFromEnv,
 	rerankCandidates,
 } from "../memory-embeddings-llm-gateway.ts";
 describe("loadGatewayConfigFromEnv", () => {
 	const original = { ...process.env };
 	afterEach(() => {
 		process.env = { ...original };
 	});
 	test("returns null when SF_LLM_GATEWAY_KEY is unset", () => {
 		delete process.env.SF_LLM_GATEWAY_KEY;
 		assert.equal(loadGatewayConfigFromEnv(), null);
 	});
 	test("populates defaults when only the key is set", () => {
 		process.env.SF_LLM_GATEWAY_KEY = "abc";
 		delete process.env.SF_LLM_GATEWAY_URL;
 		delete process.env.SF_LLM_GATEWAY_EMBED_MODEL;
 		delete process.env.SF_LLM_GATEWAY_RERANK_MODEL;
 		const cfg = loadGatewayConfigFromEnv();
 		assert.ok(cfg);
 		assert.equal(cfg!.apiKey, "abc");
 		assert.equal(cfg!.url, "https://llm-gateway.centralcloud.com/v1");
 		assert.equal(cfg!.embeddingModel, "qwen/qwen3-embedding-4b");
 		assert.equal(cfg!.rerankModel, undefined);
 	});
 	test("env overrides win", () => {
 		process.env.SF_LLM_GATEWAY_KEY = "abc";
 		process.env.SF_LLM_GATEWAY_URL = "https://example.test/v1";
 		process.env.SF_LLM_GATEWAY_EMBED_MODEL = "custom/embed";
 		process.env.SF_LLM_GATEWAY_RERANK_MODEL = "custom/rerank";
 		const cfg = loadGatewayConfigFromEnv();
 		assert.ok(cfg);
 		assert.equal(cfg!.url, "https://example.test/v1");
 		assert.equal(cfg!.embeddingModel, "custom/embed");
 		assert.equal(cfg!.rerankModel, "custom/rerank");
 	});
 });
 describe("createGatewayEmbedFn", () => {
 	const cfg = {
 		url: "https://gateway.test/v1",
 		apiKey: "secret",
 		embeddingModel: "qwen/qwen3-embedding-4b",
 	};
 	beforeEach(() => {
 		vi.restoreAllMocks();
 	});
 	test("returns Float32Array[] in input order", async () => {
 		const fetchMock = vi.fn(async () =>
 			new Response(
 				JSON.stringify({
 					object: "list",
 					data: [
 						{ object: "embedding", index: 0, embedding: [0.1, 0.2, 0.3] },
 						{ object: "embedding", index: 1, embedding: [0.4, 0.5, 0.6] },
 					],
 				}),
 				{ status: 200, headers: { "content-type": "application/json" } },
 			),
 		);
 		vi.stubGlobal("fetch", fetchMock);
 		const embed = createGatewayEmbedFn(cfg);
 		const out = await embed(["hello", "world"]);
 		assert.equal(out.length, 2);
 		assert.deepEqual(Array.from(out[0]), [
 			0.1, 0.2, 0.3,
 		].map((n) => Math.fround(n)));
 		assert.deepEqual(Array.from(out[1]), [
 			0.4, 0.5, 0.6,
 		].map((n) => Math.fround(n)));
 		// Verify request shape
 		const [url, init] = fetchMock.mock.calls[0]!;
 		assert.equal(url, "https://gateway.test/v1/embeddings");
 		const headers = (init as RequestInit).headers as Record<string, string>;
 		assert.equal(headers.Authorization, "Bearer secret");
 		const body = JSON.parse((init as RequestInit).body as string);
 		assert.equal(body.model, "qwen/qwen3-embedding-4b");
 		assert.deepEqual(body.input, ["hello", "world"]);
 	});
 	test("re-orders out-of-order responses by index", async () => {
 		vi.stubGlobal(
 			"fetch",
 			vi.fn(async () =>
 				new Response(
 					JSON.stringify({
 						object: "list",
 						data: [
 							{ object: "embedding", index: 1, embedding: [0.4] },
 							{ object: "embedding", index: 0, embedding: [0.1] },
 						],
 					}),
 					{ status: 200 },
 				),
 			),
 		);
 		const out = await createGatewayEmbedFn(cfg)(["a", "b"]);
 		assert.deepEqual(Array.from(out[0]), [Math.fround(0.1)]);
 		assert.deepEqual(Array.from(out[1]), [Math.fround(0.4)]);
 	});
 	test("throws on non-2xx with a useful message", async () => {
 		vi.stubGlobal(
 			"fetch",
 			vi.fn(async () =>
 				new Response("Unsupported model 'x'", { status: 400 }),
 			),
 		);
 		await assert.rejects(
 			() => createGatewayEmbedFn(cfg)(["hi"]),
 			/llm-gateway \/embeddings 400.*Unsupported model/,
 		);
 	});
 	test("returns [] for empty input without making a request", async () => {
 		const fetchMock = vi.fn();
 		vi.stubGlobal("fetch", fetchMock);
 		const out = await createGatewayEmbedFn(cfg)([]);
 		assert.deepEqual(out, []);
 		assert.equal(fetchMock.mock.calls.length, 0);
 	});
 });
 describe("rerankCandidates", () => {
 	const cfg = {
 		url: "https://gateway.test/v1",
 		apiKey: "secret",
 		embeddingModel: "qwen/qwen3-embedding-4b",
 		rerankModel: "bge-reranker",
 	};
 	beforeEach(() => {
 		vi.restoreAllMocks();
 	});
 	test("returns null when rerankModel is unset", async () => {
 		const out = await rerankCandidates(
 			{ ...cfg, rerankModel: undefined },
 			"q",
 			[{ id: "a", text: "x" }],
 		);
 		assert.equal(out, null);
 	});
 	test("returns scores aligned to original ids", async () => {
 		vi.stubGlobal(
 			"fetch",
 			vi.fn(async () =>
 				new Response(
 					JSON.stringify({
 						results: [
 							{ index: 1, relevance_score: 0.9 },
 							{ index: 0, relevance_score: 0.1 },
 						],
 					}),
 					{ status: 200 },
 				),
 			),
 		);
 		const out = await rerankCandidates(cfg, "q", [
 			{ id: "a", text: "alpha" },
 			{ id: "b", text: "beta" },
 		]);
 		assert.deepEqual(out, [
 			{ id: "b", score: 0.9 },
 			{ id: "a", score: 0.1 },
 		]);
 	});
 	test("degrades to null on 503 (worker offline)", async () => {
 		vi.stubGlobal(
 			"fetch",
 			vi.fn(async () => new Response("worker unavailable", { status: 503 })),
 		);
 		const out = await rerankCandidates(cfg, "q", [{ id: "a", text: "x" }]);
 		assert.equal(out, null);
 	});
 	test("degrades to null on 200 'no worker rerank' body", async () => {
 		vi.stubGlobal(
 			"fetch",
 			vi.fn(async () =>
 				new Response("no worker with rerank capability is available", {
 					status: 200,
 				}),
 			),
 		);
 		const out = await rerankCandidates(cfg, "q", [{ id: "a", text: "x" }]);
 		assert.equal(out, null);
 	});
 });
--- a/src/resources/extensions/sf/tests/memory-store.test.ts
+++ b/src/resources/extensions/sf/tests/memory-store.test.ts
@ -515,7 +515,7 @@ test("memory-store: schema includes memories table", () => {
 	const version = adapter
 		.prepare("SELECT MAX(version) as v FROM schema_version")
 		.get();
-	assert.deepStrictEqual(version?.["v"], 21, "schema version should be 21");
+	assert.deepStrictEqual(version?.["v"], 25, "schema version should be 25");
 	closeDatabase();
 });