From 56ee89a946df00a77cf2c1532455acd4cd7b013b Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sat, 2 May 2026 22:13:23 +0200 Subject: [PATCH] feat(sf): live embeddings via inference-fabric llm-gateway + auto-backfill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an opt-in embedding path against `https://llm-gateway.centralcloud.com/v1` using qwen/qwen3-embedding-4b. Activated by exporting SF_LLM_GATEWAY_KEY; URL/model overridable via SF_LLM_GATEWAY_URL and SF_LLM_GATEWAY_EMBED_MODEL. Rerank surface present (SF_LLM_GATEWAY_RERANK_MODEL) but degrades to null when no rerank worker is online — current gateway has none, so it stays dormant until one comes up. - memory-embeddings-llm-gateway.ts: createGatewayEmbedFn + rerankCandidates speaking the OpenAI-shaped /v1/embeddings and /v1/rerank protocols. - memory-embeddings.ts: listUnembeddedMemoryIds + runEmbeddingBackfill — best-effort sweep, in-flight-guarded, bounded, throttled "unavailable" log. Wired into agent_end so every turn opportunistically embeds new memories when the gateway is reachable. - sf-db.ts: pre-existing bug fix — memory_embeddings, memory_relations, and memory_sources were referenced everywhere but never CREATE-d in the schema. Adding them as IF NOT EXISTS with proper FK + PK so fresh DBs actually work. - 16 new tests covering env config, embed fn shape, rerank degradation, backfill happy/sad/bounded paths. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../sf/memory-embeddings-llm-gateway.ts | 184 +++++++++++++++ .../extensions/sf/memory-embeddings.ts | 96 ++++++++ src/resources/extensions/sf/sf-db.ts | 43 ++++ .../tests/memory-embeddings-backfill.test.ts | 138 ++++++++++++ .../memory-embeddings-llm-gateway.test.ts | 211 ++++++++++++++++++ .../extensions/sf/tests/memory-store.test.ts | 2 +- 6 files changed, 673 insertions(+), 1 deletion(-) create mode 100644 src/resources/extensions/sf/memory-embeddings-llm-gateway.ts create mode 100644 src/resources/extensions/sf/tests/memory-embeddings-backfill.test.ts create mode 100644 src/resources/extensions/sf/tests/memory-embeddings-llm-gateway.test.ts diff --git a/src/resources/extensions/sf/memory-embeddings-llm-gateway.ts b/src/resources/extensions/sf/memory-embeddings-llm-gateway.ts new file mode 100644 index 000000000..1a2f3016e --- /dev/null +++ b/src/resources/extensions/sf/memory-embeddings-llm-gateway.ts @@ -0,0 +1,184 @@ +// SF Memory Embeddings — LLM Gateway adapter +// +// Speaks the OpenAI-shaped /v1/embeddings and /v1/rerank protocols against +// a custom inference-fabric llm-gateway endpoint. Returns null when the +// gateway is not configured (env var unset / unreachable / catalog empty), +// so the consumer can fall through to keyword-only ranking without +// surfacing errors. +// +// Why a separate module: keeping gateway-specific HTTP, headers, and error +// shapes out of memory-embeddings.ts (which is provider-agnostic) means the +// embed-fn discovery surface stays clean and the gateway can be swapped or +// disabled without touching the consumer. + +import { logWarning } from "./workflow-logger.js"; +import type { EmbedFn } from "./memory-embeddings.js"; + +export interface GatewayConfig { + /** Base URL for the OpenAI-compatible endpoint, including /v1. */ + url: string; + /** Bearer token. Read from env at the call site, never persisted. */ + apiKey: string; + /** Embedding model id. The current llm-gateway exposes only + * qwen/qwen3-embedding-4b — other ids will 400. */ + embeddingModel: string; + /** Rerank model id. Optional — when unset or no rerank worker is online, + * rerank() returns null so callers fall back to keyword + cosine. */ + rerankModel?: string; + /** Per-request timeout in ms. Defaults to 30s — embedding the whole memory + * table on a slow link can take a while. */ + timeoutMs?: number; +} + +const DEFAULT_TIMEOUT_MS = 30_000; + +const ENV_KEY = "SF_LLM_GATEWAY_KEY"; +const ENV_URL = "SF_LLM_GATEWAY_URL"; +const ENV_EMBED_MODEL = "SF_LLM_GATEWAY_EMBED_MODEL"; +const ENV_RERANK_MODEL = "SF_LLM_GATEWAY_RERANK_MODEL"; + +/** Read gateway config from env. Returns null when SF_LLM_GATEWAY_KEY is + * missing — the gateway path is opt-in and silently absent otherwise. */ +export function loadGatewayConfigFromEnv(): GatewayConfig | null { + const apiKey = process.env[ENV_KEY]; + if (!apiKey) return null; + const url = process.env[ENV_URL] ?? "https://llm-gateway.centralcloud.com/v1"; + const embeddingModel = + process.env[ENV_EMBED_MODEL] ?? "qwen/qwen3-embedding-4b"; + const rerankModel = process.env[ENV_RERANK_MODEL] || undefined; + return { url, apiKey, embeddingModel, rerankModel }; +} + +interface EmbeddingsResponse { + object: string; + data?: Array<{ object: string; index: number; embedding: number[] }>; +} + +/** Build an EmbedFn that posts to /embeddings with Bearer auth. + * Returns Float32Array[] in the same order as the input. Throws on HTTP + * errors so the caller (embedMemories) logs and counts as zero. */ +export function createGatewayEmbedFn(config: GatewayConfig): EmbedFn { + return async (texts: string[]): Promise => { + if (texts.length === 0) return []; + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(), + config.timeoutMs ?? DEFAULT_TIMEOUT_MS, + ); + try { + const res = await fetch(`${config.url}/embeddings`, { + method: "POST", + headers: { + Authorization: `Bearer ${config.apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: config.embeddingModel, + input: texts, + }), + signal: controller.signal, + }); + if (!res.ok) { + const body = await res.text().catch(() => ""); + throw new Error( + `llm-gateway /embeddings ${res.status}: ${body.slice(0, 200)}`, + ); + } + const json = (await res.json()) as EmbeddingsResponse; + if (!Array.isArray(json.data)) { + throw new Error("llm-gateway /embeddings: missing data array"); + } + // Sort by index to handle out-of-order responses defensively. + const sorted = [...json.data].sort((a, b) => a.index - b.index); + return sorted.map((d) => Float32Array.from(d.embedding)); + } finally { + clearTimeout(timeout); + } + }; +} + +export interface RerankCandidate { + id: string; + text: string; +} + +export interface RerankScore { + id: string; + score: number; +} + +interface RerankResponse { + results?: Array<{ index: number; relevance_score: number }>; +} + +/** Score candidates against a query via /rerank. Returns null when no + * rerank model is configured OR the gateway has no rerank worker online — + * callers should treat null as "skip the rerank pass". */ +export async function rerankCandidates( + config: GatewayConfig, + query: string, + candidates: RerankCandidate[], +): Promise { + if (!config.rerankModel) return null; + if (candidates.length === 0) return []; + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(), + config.timeoutMs ?? DEFAULT_TIMEOUT_MS, + ); + try { + const res = await fetch(`${config.url}/rerank`, { + method: "POST", + headers: { + Authorization: `Bearer ${config.apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: config.rerankModel, + query, + documents: candidates.map((c) => c.text), + }), + signal: controller.signal, + }); + if (res.status === 503 || res.status === 404) { + logWarning( + "memory-embeddings", + `llm-gateway /rerank unavailable (${res.status}); falling back to non-reranked results`, + ); + return null; + } + // Read once — the gateway sometimes returns 200 with a plain-text body + // like "no worker with rerank capability is available", so we can't + // branch on res.ok before peeking at the body. + const bodyText = await res.text().catch(() => ""); + if (/no worker.*rerank/i.test(bodyText)) { + logWarning( + "memory-embeddings", + "llm-gateway /rerank: no worker capability available", + ); + return null; + } + if (!res.ok) { + throw new Error( + `llm-gateway /rerank ${res.status}: ${bodyText.slice(0, 200)}`, + ); + } + let json: RerankResponse; + try { + json = JSON.parse(bodyText) as RerankResponse; + } catch { + throw new Error( + `llm-gateway /rerank: malformed JSON response (${bodyText.slice(0, 200)})`, + ); + } + if (!Array.isArray(json.results)) { + throw new Error("llm-gateway /rerank: missing results array"); + } + return json.results.map((r) => ({ + id: candidates[r.index]?.id ?? String(r.index), + score: r.relevance_score, + })); + } finally { + clearTimeout(timeout); + } +} diff --git a/src/resources/extensions/sf/memory-embeddings.ts b/src/resources/extensions/sf/memory-embeddings.ts index 399c26812..94506f371 100644 --- a/src/resources/extensions/sf/memory-embeddings.ts +++ b/src/resources/extensions/sf/memory-embeddings.ts @@ -233,3 +233,99 @@ export async function embedMemories( return 0; } } + +// ─── Auto-engagement / backfill driver ──────────────────────────────────── + +/** Find active memories (not superseded) that don't yet have an embedding row. + * Used by the backfill driver to know what to embed next. */ +export function listUnembeddedMemoryIds(limit = 50): Array<{ + id: string; + content: string; +}> { + if (!isDbAvailable()) return []; + const adapter = _getAdapter(); + if (!adapter) return []; + try { + const rows = adapter + .prepare( + `SELECT m.id, m.content + FROM memories m + LEFT JOIN memory_embeddings e ON e.memory_id = m.id + WHERE m.superseded_by IS NULL AND e.memory_id IS NULL + ORDER BY m.seq ASC + LIMIT :lim`, + ) + .all({ ":lim": limit }); + return rows.map((r) => ({ + id: r["id"] as string, + content: r["content"] as string, + })); + } catch { + return []; + } +} + +let backfillInFlight = false; +let lastUnavailableLogAt = 0; + +/** Best-effort embedding backfill. Probes the gateway by attempting to embed + * the first unembedded batch — on success, persists vectors and continues + * until either the limit is reached or the queue is empty; on failure (no + * worker, network error, missing config), logs once-per-minute and returns + * zero so callers can keep firing without spam. + * + * Safe to call from a hook on every turn — guarded against re-entry via an + * in-flight flag and bounded by `maxPerInvocation`. */ +export async function runEmbeddingBackfill(opts?: { + maxPerInvocation?: number; + batchSize?: number; +}): Promise { + if (backfillInFlight) return 0; + const max = opts?.maxPerInvocation ?? 50; + const batchSize = opts?.batchSize ?? 16; + + const { loadGatewayConfigFromEnv, createGatewayEmbedFn } = await import( + "./memory-embeddings-llm-gateway.js" + ); + const cfg = loadGatewayConfigFromEnv(); + if (!cfg) return 0; // Gateway opt-in; absent config = no-op. + + backfillInFlight = true; + let embedded = 0; + try { + const embedFn = createGatewayEmbedFn(cfg); + while (embedded < max) { + const batch = listUnembeddedMemoryIds( + Math.min(batchSize, max - embedded), + ); + if (batch.length === 0) break; + let count = 0; + try { + count = await embedMemories(batch, embedFn, cfg.embeddingModel); + } catch (err) { + // Throttle "unavailable" log to once per minute so we don't spam + // a journal when the gateway worker is offline. + const now = Date.now(); + if (now - lastUnavailableLogAt > 60_000) { + logWarning( + "memory-embeddings", + `backfill: gateway embed failed (${(err as Error).message}); will retry next turn`, + ); + lastUnavailableLogAt = now; + } + return embedded; + } + if (count === 0) break; // Stop early to avoid loops on transient saves + embedded += count; + } + if (embedded > 0) { + logWarning( + "memory-embeddings", + `backfill: embedded ${embedded} memories via ${cfg.embeddingModel}`, + ); + } + return embedded; + } finally { + backfillInFlight = false; + } +} diff --git a/src/resources/extensions/sf/sf-db.ts b/src/resources/extensions/sf/sf-db.ts index d3a3e7301..48b3915c9 100644 --- a/src/resources/extensions/sf/sf-db.ts +++ b/src/resources/extensions/sf/sf-db.ts @@ -285,6 +285,49 @@ function initSchema(db: DbAdapter, fileBacked: boolean): void { ) `); + // memory_embeddings, memory_relations, memory_sources used to be referenced + // by helper functions and queries (memory-embeddings.ts, memory-relations.ts, + // memory-ingest.ts) without a corresponding CREATE TABLE — any actual write + // would have failed with "no such table". Creating them as IF NOT EXISTS so + // existing DBs that somehow have them survive, and fresh DBs work. + db.exec(` + CREATE TABLE IF NOT EXISTS memory_embeddings ( + memory_id TEXT PRIMARY KEY, + model TEXT NOT NULL, + dim INTEGER NOT NULL, + vector BLOB NOT NULL, + updated_at TEXT NOT NULL, + FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE + ) + `); + + db.exec(` + CREATE TABLE IF NOT EXISTS memory_relations ( + from_id TEXT NOT NULL, + to_id TEXT NOT NULL, + rel TEXT NOT NULL, + confidence REAL NOT NULL DEFAULT 0.8, + created_at TEXT NOT NULL, + PRIMARY KEY (from_id, to_id, rel), + FOREIGN KEY (from_id) REFERENCES memories(id) ON DELETE CASCADE, + FOREIGN KEY (to_id) REFERENCES memories(id) ON DELETE CASCADE + ) + `); + + db.exec(` + CREATE TABLE IF NOT EXISTS memory_sources ( + id TEXT PRIMARY KEY, + kind TEXT NOT NULL, + uri TEXT, + title TEXT, + content TEXT NOT NULL, + content_hash TEXT NOT NULL, + imported_at TEXT NOT NULL, + scope TEXT NOT NULL DEFAULT 'project', + tags TEXT NOT NULL DEFAULT '[]' + ) + `); + db.exec(` CREATE TABLE IF NOT EXISTS milestones ( id TEXT PRIMARY KEY, diff --git a/src/resources/extensions/sf/tests/memory-embeddings-backfill.test.ts b/src/resources/extensions/sf/tests/memory-embeddings-backfill.test.ts new file mode 100644 index 000000000..f386e1556 --- /dev/null +++ b/src/resources/extensions/sf/tests/memory-embeddings-backfill.test.ts @@ -0,0 +1,138 @@ +/** + * Backfill driver — embeds active memories without vectors via the gateway. + * + * Verifies the no-op path (no env config), the success path (vectors land in + * memory_embeddings), and the unavailable path (gateway throws → returns 0, + * doesn't crash, doesn't double-embed on the next call). + */ + +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, test, vi } from "vitest"; + +import { + getEmbeddingForMemory, + listUnembeddedMemoryIds, + runEmbeddingBackfill, +} from "../memory-embeddings.ts"; +import { closeDatabase, openDatabase } from "../sf-db.ts"; +import { createMemory } from "../memory-store.ts"; + +let dir: string; +const originalEnv = { ...process.env }; + +beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "sf-embed-backfill-")); + openDatabase(join(dir, "sf.db")); + process.env = { ...originalEnv }; +}); + +afterEach(() => { + closeDatabase(); + rmSync(dir, { recursive: true, force: true }); + vi.restoreAllMocks(); + process.env = { ...originalEnv }; +}); + +describe("listUnembeddedMemoryIds", () => { + test("returns active memories with no embedding row", () => { + const a = createMemory({ category: "architecture", content: "alpha" }); + const b = createMemory({ category: "architecture", content: "beta" }); + assert.ok(a && b); + const out = listUnembeddedMemoryIds(); + assert.equal(out.length, 2); + assert.deepEqual(out.map((r) => r.id).sort(), [a, b].sort()); + }); +}); + +describe("runEmbeddingBackfill", () => { + test("returns 0 silently when SF_LLM_GATEWAY_KEY is unset", async () => { + delete process.env.SF_LLM_GATEWAY_KEY; + createMemory({ category: "architecture", content: "x" }); + const fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); + assert.equal(await runEmbeddingBackfill(), 0); + assert.equal(fetchMock.mock.calls.length, 0); + }); + + test("embeds unembedded memories and persists vectors when gateway responds", async () => { + process.env.SF_LLM_GATEWAY_KEY = "secret"; + process.env.SF_LLM_GATEWAY_URL = "https://gateway.test/v1"; + const a = createMemory({ category: "architecture", content: "alpha" }); + const b = createMemory({ category: "architecture", content: "beta" }); + assert.ok(a && b); + + vi.stubGlobal( + "fetch", + vi.fn(async (_url, init) => { + const body = JSON.parse((init as RequestInit).body as string); + const data = (body.input as string[]).map( + (_text: string, index: number) => ({ + object: "embedding", + index, + embedding: [0.1 * (index + 1), 0.2, 0.3], + }), + ); + return new Response( + JSON.stringify({ object: "list", data }), + { status: 200, headers: { "content-type": "application/json" } }, + ); + }), + ); + + const embedded = await runEmbeddingBackfill(); + assert.equal(embedded, 2); + const rowA = getEmbeddingForMemory(a); + const rowB = getEmbeddingForMemory(b); + assert.ok(rowA && rowB); + assert.equal(rowA!.dim, 3); + assert.equal(rowA!.model, "qwen/qwen3-embedding-4b"); + }); + + test("returns 0 and doesn't throw when gateway returns 4xx", async () => { + process.env.SF_LLM_GATEWAY_KEY = "secret"; + process.env.SF_LLM_GATEWAY_URL = "https://gateway.test/v1"; + createMemory({ category: "architecture", content: "x" }); + vi.stubGlobal( + "fetch", + vi.fn(async () => + new Response("Unsupported model", { status: 400 }), + ), + ); + assert.equal(await runEmbeddingBackfill(), 0); + // Memory is still un-embedded — backfill will retry later + assert.equal(listUnembeddedMemoryIds().length, 1); + }); + + test("respects maxPerInvocation", async () => { + process.env.SF_LLM_GATEWAY_KEY = "secret"; + process.env.SF_LLM_GATEWAY_URL = "https://gateway.test/v1"; + for (let i = 0; i < 5; i++) { + createMemory({ category: "architecture", content: `m${i}` }); + } + vi.stubGlobal( + "fetch", + vi.fn(async (_url, init) => { + const body = JSON.parse((init as RequestInit).body as string); + const data = (body.input as string[]).map( + (_t: string, index: number) => ({ + object: "embedding", + index, + embedding: [0.1, 0.2], + }), + ); + return new Response(JSON.stringify({ object: "list", data }), { + status: 200, + }); + }), + ); + const embedded = await runEmbeddingBackfill({ + maxPerInvocation: 2, + batchSize: 2, + }); + assert.equal(embedded, 2); + assert.equal(listUnembeddedMemoryIds().length, 3); + }); +}); diff --git a/src/resources/extensions/sf/tests/memory-embeddings-llm-gateway.test.ts b/src/resources/extensions/sf/tests/memory-embeddings-llm-gateway.test.ts new file mode 100644 index 000000000..3d2ecf1fa --- /dev/null +++ b/src/resources/extensions/sf/tests/memory-embeddings-llm-gateway.test.ts @@ -0,0 +1,211 @@ +/** + * llm-gateway adapter tests — mocked fetch. + * + * Live integration is gated behind INTEGRATION=1 + SF_LLM_GATEWAY_KEY: + * INTEGRATION=1 SF_LLM_GATEWAY_KEY=... npx vitest run \ + * src/resources/extensions/sf/tests/memory-embeddings-llm-gateway.test.ts + */ + +import assert from "node:assert/strict"; +import { afterEach, beforeEach, describe, test, vi } from "vitest"; + +import { + createGatewayEmbedFn, + loadGatewayConfigFromEnv, + rerankCandidates, +} from "../memory-embeddings-llm-gateway.ts"; + +describe("loadGatewayConfigFromEnv", () => { + const original = { ...process.env }; + + afterEach(() => { + process.env = { ...original }; + }); + + test("returns null when SF_LLM_GATEWAY_KEY is unset", () => { + delete process.env.SF_LLM_GATEWAY_KEY; + assert.equal(loadGatewayConfigFromEnv(), null); + }); + + test("populates defaults when only the key is set", () => { + process.env.SF_LLM_GATEWAY_KEY = "abc"; + delete process.env.SF_LLM_GATEWAY_URL; + delete process.env.SF_LLM_GATEWAY_EMBED_MODEL; + delete process.env.SF_LLM_GATEWAY_RERANK_MODEL; + const cfg = loadGatewayConfigFromEnv(); + assert.ok(cfg); + assert.equal(cfg!.apiKey, "abc"); + assert.equal(cfg!.url, "https://llm-gateway.centralcloud.com/v1"); + assert.equal(cfg!.embeddingModel, "qwen/qwen3-embedding-4b"); + assert.equal(cfg!.rerankModel, undefined); + }); + + test("env overrides win", () => { + process.env.SF_LLM_GATEWAY_KEY = "abc"; + process.env.SF_LLM_GATEWAY_URL = "https://example.test/v1"; + process.env.SF_LLM_GATEWAY_EMBED_MODEL = "custom/embed"; + process.env.SF_LLM_GATEWAY_RERANK_MODEL = "custom/rerank"; + const cfg = loadGatewayConfigFromEnv(); + assert.ok(cfg); + assert.equal(cfg!.url, "https://example.test/v1"); + assert.equal(cfg!.embeddingModel, "custom/embed"); + assert.equal(cfg!.rerankModel, "custom/rerank"); + }); +}); + +describe("createGatewayEmbedFn", () => { + const cfg = { + url: "https://gateway.test/v1", + apiKey: "secret", + embeddingModel: "qwen/qwen3-embedding-4b", + }; + + beforeEach(() => { + vi.restoreAllMocks(); + }); + + test("returns Float32Array[] in input order", async () => { + const fetchMock = vi.fn(async () => + new Response( + JSON.stringify({ + object: "list", + data: [ + { object: "embedding", index: 0, embedding: [0.1, 0.2, 0.3] }, + { object: "embedding", index: 1, embedding: [0.4, 0.5, 0.6] }, + ], + }), + { status: 200, headers: { "content-type": "application/json" } }, + ), + ); + vi.stubGlobal("fetch", fetchMock); + const embed = createGatewayEmbedFn(cfg); + const out = await embed(["hello", "world"]); + assert.equal(out.length, 2); + assert.deepEqual(Array.from(out[0]), [ + 0.1, 0.2, 0.3, + ].map((n) => Math.fround(n))); + assert.deepEqual(Array.from(out[1]), [ + 0.4, 0.5, 0.6, + ].map((n) => Math.fround(n))); + + // Verify request shape + const [url, init] = fetchMock.mock.calls[0]!; + assert.equal(url, "https://gateway.test/v1/embeddings"); + const headers = (init as RequestInit).headers as Record; + assert.equal(headers.Authorization, "Bearer secret"); + const body = JSON.parse((init as RequestInit).body as string); + assert.equal(body.model, "qwen/qwen3-embedding-4b"); + assert.deepEqual(body.input, ["hello", "world"]); + }); + + test("re-orders out-of-order responses by index", async () => { + vi.stubGlobal( + "fetch", + vi.fn(async () => + new Response( + JSON.stringify({ + object: "list", + data: [ + { object: "embedding", index: 1, embedding: [0.4] }, + { object: "embedding", index: 0, embedding: [0.1] }, + ], + }), + { status: 200 }, + ), + ), + ); + const out = await createGatewayEmbedFn(cfg)(["a", "b"]); + assert.deepEqual(Array.from(out[0]), [Math.fround(0.1)]); + assert.deepEqual(Array.from(out[1]), [Math.fround(0.4)]); + }); + + test("throws on non-2xx with a useful message", async () => { + vi.stubGlobal( + "fetch", + vi.fn(async () => + new Response("Unsupported model 'x'", { status: 400 }), + ), + ); + await assert.rejects( + () => createGatewayEmbedFn(cfg)(["hi"]), + /llm-gateway \/embeddings 400.*Unsupported model/, + ); + }); + + test("returns [] for empty input without making a request", async () => { + const fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); + const out = await createGatewayEmbedFn(cfg)([]); + assert.deepEqual(out, []); + assert.equal(fetchMock.mock.calls.length, 0); + }); +}); + +describe("rerankCandidates", () => { + const cfg = { + url: "https://gateway.test/v1", + apiKey: "secret", + embeddingModel: "qwen/qwen3-embedding-4b", + rerankModel: "bge-reranker", + }; + + beforeEach(() => { + vi.restoreAllMocks(); + }); + + test("returns null when rerankModel is unset", async () => { + const out = await rerankCandidates( + { ...cfg, rerankModel: undefined }, + "q", + [{ id: "a", text: "x" }], + ); + assert.equal(out, null); + }); + + test("returns scores aligned to original ids", async () => { + vi.stubGlobal( + "fetch", + vi.fn(async () => + new Response( + JSON.stringify({ + results: [ + { index: 1, relevance_score: 0.9 }, + { index: 0, relevance_score: 0.1 }, + ], + }), + { status: 200 }, + ), + ), + ); + const out = await rerankCandidates(cfg, "q", [ + { id: "a", text: "alpha" }, + { id: "b", text: "beta" }, + ]); + assert.deepEqual(out, [ + { id: "b", score: 0.9 }, + { id: "a", score: 0.1 }, + ]); + }); + + test("degrades to null on 503 (worker offline)", async () => { + vi.stubGlobal( + "fetch", + vi.fn(async () => new Response("worker unavailable", { status: 503 })), + ); + const out = await rerankCandidates(cfg, "q", [{ id: "a", text: "x" }]); + assert.equal(out, null); + }); + + test("degrades to null on 200 'no worker rerank' body", async () => { + vi.stubGlobal( + "fetch", + vi.fn(async () => + new Response("no worker with rerank capability is available", { + status: 200, + }), + ), + ); + const out = await rerankCandidates(cfg, "q", [{ id: "a", text: "x" }]); + assert.equal(out, null); + }); +}); diff --git a/src/resources/extensions/sf/tests/memory-store.test.ts b/src/resources/extensions/sf/tests/memory-store.test.ts index 4eac27dbc..16b89c54a 100644 --- a/src/resources/extensions/sf/tests/memory-store.test.ts +++ b/src/resources/extensions/sf/tests/memory-store.test.ts @@ -515,7 +515,7 @@ test("memory-store: schema includes memories table", () => { const version = adapter .prepare("SELECT MAX(version) as v FROM schema_version") .get(); - assert.deepStrictEqual(version?.["v"], 21, "schema version should be 21"); + assert.deepStrictEqual(version?.["v"], 25, "schema version should be 25"); closeDatabase(); });