fix: wire sf memory to llm gateway aliases

This commit is contained in:
Mikael Hugo 2026-05-05 22:10:54 +02:00
parent d75ebfe7c3
commit 305b4869ac
8 changed files with 208 additions and 37 deletions

View file

@ -211,9 +211,12 @@ async function handleSearch(ctx, parsed) {
ctx.ui.notify("No matches.", "info");
return;
}
const usingEmbeddings = !!process.env.SF_LLM_GATEWAY_KEY;
const usingRerank =
usingEmbeddings && !!process.env.SF_LLM_GATEWAY_RERANK_MODEL;
const { loadGatewayConfigFromEnv } = await import(
"./memory-embeddings-llm-gateway.js"
);
const gatewayConfig = loadGatewayConfigFromEnv();
const usingEmbeddings = !!gatewayConfig;
const usingRerank = !!gatewayConfig?.rerankModel;
const rankLabel = usingRerank
? "embedding+rerank-ranked"
: usingEmbeddings

View file

@ -30,15 +30,32 @@ const ENV_KEY = "SF_LLM_GATEWAY_KEY";
const ENV_URL = "SF_LLM_GATEWAY_URL";
const ENV_EMBED_MODEL = "SF_LLM_GATEWAY_EMBED_MODEL";
const ENV_RERANK_MODEL = "SF_LLM_GATEWAY_RERANK_MODEL";
const DEFAULT_EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-4B";
const DEFAULT_RERANK_MODEL = "Qwen/Qwen3-Reranker-0.6B";
const KEY_ALIASES = [
ENV_KEY,
"LLM_GATEWAY_API_KEY",
"LLM_GATEWAY_BEARER_KEY",
"LLM_MUX_API_KEY",
];
const URL_ALIASES = [ENV_URL, "LLM_GATEWAY_BASE_URL", "LLM_MUX_BASE_URL"];
function firstEnvValue(keys) {
for (const key of keys) {
const value = process.env[key]?.trim();
if (value) return value;
}
return "";
}
/** Read gateway config from env. Returns null when SF_LLM_GATEWAY_KEY is
* missing the gateway path is opt-in and silently absent otherwise. */
export function loadGatewayConfigFromEnv() {
const apiKey = process.env[ENV_KEY];
const apiKey = firstEnvValue(KEY_ALIASES);
if (!apiKey) return null;
const url = process.env[ENV_URL] ?? "https://llm-gateway.centralcloud.com/v1";
const url =
firstEnvValue(URL_ALIASES) || "https://llm-gateway.centralcloud.com/v1";
const embeddingModel =
process.env[ENV_EMBED_MODEL] ?? "qwen/qwen3-embedding-4b";
const rerankModel = process.env[ENV_RERANK_MODEL] || undefined;
firstEnvValue([ENV_EMBED_MODEL]) || DEFAULT_EMBEDDING_MODEL;
const rerankModel = firstEnvValue([ENV_RERANK_MODEL]) || DEFAULT_RERANK_MODEL;
return { url, apiKey, embeddingModel, rerankModel };
}
/** Build an EmbedFn that posts to <url>/embeddings with Bearer auth.

View file

@ -123,9 +123,9 @@ These are runtime-only; SF reads them at startup, never persists them, never log
| Variable | Purpose | Default |
|---|---|---|
| `SF_LLM_GATEWAY_KEY` | Bearer token for the inference-fabric llm-gateway. **When unset, embeddings are disabled** and `getRelevantMemoriesRanked` falls back to static (confidence × hit-count) ranking. | (unset) |
| `SF_LLM_GATEWAY_URL` | OpenAI-compatible endpoint base, including `/v1`. | `https://llm-gateway.centralcloud.com/v1` |
| `SF_LLM_GATEWAY_EMBED_MODEL` | Embedding model id served by the gateway. | `qwen/qwen3-embedding-4b` |
| `SF_LLM_GATEWAY_RERANK_MODEL` | Rerank model id. When unset OR no rerank worker is online, rerank silently degrades and the cosine pass alone ranks results. | (unset) |
| `SF_LLM_GATEWAY_KEY` | Bearer token for the inference-fabric llm-gateway. Also accepts `LLM_GATEWAY_API_KEY`, `LLM_GATEWAY_BEARER_KEY`, and `LLM_MUX_API_KEY`. **When unset, embeddings are disabled** and `getRelevantMemoriesRanked` falls back to static (confidence × hit-count) ranking. | (unset) |
| `SF_LLM_GATEWAY_URL` | OpenAI-compatible endpoint base, including `/v1`. Also accepts `LLM_GATEWAY_BASE_URL` and `LLM_MUX_BASE_URL`. | `https://llm-gateway.centralcloud.com/v1` |
| `SF_LLM_GATEWAY_EMBED_MODEL` | Embedding model id served by the gateway. | `Qwen/Qwen3-Embedding-4B` |
| `SF_LLM_GATEWAY_RERANK_MODEL` | Rerank model id. When the gateway key is set, SF defaults to `Qwen/Qwen3-Reranker-0.6B`; if no rerank worker is online, rerank silently degrades and the cosine pass alone ranks results. | `Qwen/Qwen3-Reranker-0.6B` |
Once `SF_LLM_GATEWAY_KEY` is set, the agent_end hook opportunistically backfills embeddings for any memories without vectors (50 per turn, 16 per batch). `/sf memory search "<query>"` lights up the embedding-ranked path; without the key it shows static rank.

View file

@ -0,0 +1,62 @@
import { existsSync } from "node:fs";
import { dirname, join, resolve } from "node:path";
import { fileURLToPath } from "node:url";
function firstExisting(paths) {
return paths.find((candidate) => existsSync(candidate)) ?? null;
}
export function getMarketplaceFixtures(testDir) {
const home = process.env.HOME ?? "";
const repoRoot = resolve(
dirname(fileURLToPath(import.meta.url)),
"..",
"..",
"..",
"..",
"..",
);
const parent = resolve(testDir, "..", "..", "..");
const grandparent = resolve(parent, "..");
const claudeSkillsPath = firstExisting(
[
process.env.CLAUDE_SKILLS_MARKETPLACE_PATH,
join(home, "repos", "claude_skills"),
join(parent, "claude_skills"),
join(grandparent, "claude_skills"),
join(repoRoot, "..", "claude_skills"),
].filter(Boolean),
);
const claudePluginsOfficialPath = firstExisting(
[
process.env.CLAUDE_PLUGINS_OFFICIAL_MARKETPLACE_PATH,
join(home, ".codex", "plugins", "cache", "claude-plugins-official"),
join(home, ".claude", "plugins", "cache", "claude-plugins-official"),
join(
home,
".claude",
"plugins",
"marketplaces",
"claude-plugins-official",
),
join(home, "repos", "claude-plugins-official"),
join(parent, "claude-plugins-official"),
join(grandparent, "claude-plugins-official"),
join(repoRoot, "..", "claude-plugins-official"),
].filter(Boolean),
);
const available = Boolean(claudeSkillsPath && claudePluginsOfficialPath);
return {
available,
skipReason: available ? null : "Claude marketplace fixture repos not found",
fixtures: available
? {
claudeSkillsPath,
claudePluginsOfficialPath,
}
: null,
};
}

View file

@ -0,0 +1,66 @@
import assert from "node:assert/strict";
import { afterEach, test } from "vitest";
import { loadGatewayConfigFromEnv } from "../memory-embeddings-llm-gateway.js";
const KEYS = [
"SF_LLM_GATEWAY_KEY",
"SF_LLM_GATEWAY_URL",
"SF_LLM_GATEWAY_EMBED_MODEL",
"SF_LLM_GATEWAY_RERANK_MODEL",
"LLM_GATEWAY_API_KEY",
"LLM_GATEWAY_BEARER_KEY",
"LLM_GATEWAY_BASE_URL",
"LLM_MUX_API_KEY",
"LLM_MUX_BASE_URL",
];
function withCleanGatewayEnv(fn) {
const original = Object.fromEntries(
KEYS.map((key) => [key, process.env[key]]),
);
for (const key of KEYS) delete process.env[key];
afterEach(() => {
for (const key of KEYS) {
if (original[key] === undefined) delete process.env[key];
else process.env[key] = original[key];
}
});
fn();
}
test("loadGatewayConfigFromEnv accepts SF-prefixed configuration", () => {
withCleanGatewayEnv(() => {
process.env.SF_LLM_GATEWAY_KEY = "sf-key";
process.env.SF_LLM_GATEWAY_URL = "https://example.test/v1";
process.env.SF_LLM_GATEWAY_EMBED_MODEL = "embed-model";
process.env.SF_LLM_GATEWAY_RERANK_MODEL = "rerank-model";
assert.deepEqual(loadGatewayConfigFromEnv(), {
url: "https://example.test/v1",
apiKey: "sf-key",
embeddingModel: "embed-model",
rerankModel: "rerank-model",
});
});
});
test("loadGatewayConfigFromEnv accepts llm-gateway shell aliases", () => {
withCleanGatewayEnv(() => {
process.env.LLM_GATEWAY_BEARER_KEY = "gateway-key";
process.env.LLM_GATEWAY_BASE_URL = "https://llm-gateway.test/v1";
assert.deepEqual(loadGatewayConfigFromEnv(), {
url: "https://llm-gateway.test/v1",
apiKey: "gateway-key",
embeddingModel: "Qwen/Qwen3-Embedding-4B",
rerankModel: "Qwen/Qwen3-Reranker-0.6B",
});
});
});
test("loadGatewayConfigFromEnv returns null without any gateway key", () => {
withCleanGatewayEnv(() => {
assert.equal(loadGatewayConfigFromEnv(), null);
});
});

View file

@ -8,13 +8,13 @@ import { fileURLToPath } from "node:url";
import { test } from "vitest";
const __dirname = dirname(fileURLToPath(import.meta.url));
const autoTsPath = join(
const autoJsPath = join(
__dirname,
"..",
"resources",
"extensions",
"sf",
"auto.ts",
"auto.js",
);
const loaderTsPath = join(__dirname, "..", "loader.ts");
@ -26,26 +26,26 @@ test("loader.ts sets SF_PKG_ROOT env var", () => {
);
});
test("auto.ts resume uses SF_PKG_ROOT for resource-loader import, not bare relative path", () => {
const autoSrc = readFileSync(autoTsPath, "utf-8");
test("auto.js resume uses SF_PKG_ROOT for resource-loader import, not bare relative path", () => {
const autoSrc = readFileSync(autoJsPath, "utf-8");
// Must reference SF_PKG_ROOT to build an absolute path
assert.ok(
autoSrc.includes("process.env.SF_PKG_ROOT"),
"auto.ts must use SF_PKG_ROOT to resolve resource-loader.js from deployed extension path",
"auto.js must use SF_PKG_ROOT to resolve resource-loader.js from deployed extension path",
);
// The import must use the computed variable (resourceLoaderPath), not a hardcoded relative path.
assert.ok(
autoSrc.includes("await import(resourceLoaderPath)"),
"auto.ts resource-loader import must use the computed resourceLoaderPath variable, not a hardcoded relative path",
"auto.js resource-loader import must use the computed resourceLoaderPath variable, not a hardcoded relative path",
);
// The resourceLoaderPath must be constructed from SF_PKG_ROOT via pathToFileURL
// (raw filesystem paths break on Windows with ERR_UNSUPPORTED_ESM_URL_SCHEME)
assert.ok(
autoSrc.includes("pathToFileURL(join(pkgRoot,"),
"auto.ts must convert the constructed path to a file URL for cross-platform import()",
"auto.js must convert the constructed path to a file URL for cross-platform import()",
);
});

View file

@ -497,7 +497,6 @@ test("refresh failures keep the workspace locked and expose the failed bridge-re
test("fresh sf --web browser onboarding stays locked on failed validation and unlocks after a successful retry", async (_t) => {
if (process.platform === "win32") {
return; // skip: "runtime launch test uses POSIX browser-open stubs";
return;
}
const tempRoot = mkdtempSync(join(tmpdir(), "sf-web-onboarding-runtime-"));
@ -591,7 +590,7 @@ test("fresh sf --web browser onboarding stays locked on failed validation and un
providerId: "openai",
apiKey: "valid-demo-key",
}),
signal: AbortSignal.timeout(60_000),
signal: AbortSignal.timeout(120_000),
});
assert.equal(
validValidation.status,
@ -617,4 +616,4 @@ test("fresh sf --web browser onboarding stays locked on failed validation and un
const bootAfterPayload = (await bootAfter.json()) as any;
assert.equal(bootAfterPayload.onboarding.locked, false);
assert.equal(bootAfterPayload.onboarding.lockReason, null);
});
}, 180_000);

View file

@ -174,38 +174,62 @@ test("executeTavilySearch sends POST to Tavily API and produces CachedSearchResu
// =============================================================================
test("resolveSearchProvider returns 'tavily' when TAVILY_API_KEY is set and BRAVE_API_KEY is not", (_t) => {
const origTavily = process.env.TAVILY_API_KEY;
const origBrave = process.env.BRAVE_API_KEY;
const keys = [
"TAVILY_API_KEY",
"BRAVE_API_KEY",
"MINIMAX_API_KEY",
"MINIMAX_CODE_PLAN_KEY",
"MINIMAX_CODING_API_KEY",
"SERPER_API_KEY",
"EXA_API_KEY",
"OLLAMA_API_KEY",
];
const originals: Record<string, string | undefined> = {};
for (const key of keys) {
originals[key] = process.env[key];
delete process.env[key];
}
process.env.TAVILY_API_KEY = "tvly-test-key";
delete process.env.BRAVE_API_KEY;
afterEach(() => {
if (origTavily !== undefined) process.env.TAVILY_API_KEY = origTavily;
else delete process.env.TAVILY_API_KEY;
if (origBrave !== undefined) process.env.BRAVE_API_KEY = origBrave;
else delete process.env.BRAVE_API_KEY;
for (const key of keys) {
if (originals[key] !== undefined) process.env[key] = originals[key];
else delete process.env[key];
}
});
const provider = resolveSearchProvider();
const provider = resolveSearchProvider("auto");
assert.equal(provider, "tavily");
});
test("resolveSearchProvider returns 'brave' when only BRAVE_API_KEY is set", (_t) => {
const origTavily = process.env.TAVILY_API_KEY;
const origBrave = process.env.BRAVE_API_KEY;
const keys = [
"TAVILY_API_KEY",
"BRAVE_API_KEY",
"MINIMAX_API_KEY",
"MINIMAX_CODE_PLAN_KEY",
"MINIMAX_CODING_API_KEY",
"SERPER_API_KEY",
"EXA_API_KEY",
"OLLAMA_API_KEY",
];
const originals: Record<string, string | undefined> = {};
for (const key of keys) {
originals[key] = process.env[key];
delete process.env[key];
}
delete process.env.TAVILY_API_KEY;
process.env.BRAVE_API_KEY = "BSA-test-key";
afterEach(() => {
if (origTavily !== undefined) process.env.TAVILY_API_KEY = origTavily;
else delete process.env.TAVILY_API_KEY;
if (origBrave !== undefined) process.env.BRAVE_API_KEY = origBrave;
else delete process.env.BRAVE_API_KEY;
for (const key of keys) {
if (originals[key] !== undefined) process.env[key] = originals[key];
else delete process.env[key];
}
});
const provider = resolveSearchProvider();
const provider = resolveSearchProvider("auto");
assert.equal(provider, "brave");
});