fix: wire sf memory to llm gateway aliases
This commit is contained in:
parent
d75ebfe7c3
commit
305b4869ac
8 changed files with 208 additions and 37 deletions
|
|
@ -211,9 +211,12 @@ async function handleSearch(ctx, parsed) {
|
|||
ctx.ui.notify("No matches.", "info");
|
||||
return;
|
||||
}
|
||||
const usingEmbeddings = !!process.env.SF_LLM_GATEWAY_KEY;
|
||||
const usingRerank =
|
||||
usingEmbeddings && !!process.env.SF_LLM_GATEWAY_RERANK_MODEL;
|
||||
const { loadGatewayConfigFromEnv } = await import(
|
||||
"./memory-embeddings-llm-gateway.js"
|
||||
);
|
||||
const gatewayConfig = loadGatewayConfigFromEnv();
|
||||
const usingEmbeddings = !!gatewayConfig;
|
||||
const usingRerank = !!gatewayConfig?.rerankModel;
|
||||
const rankLabel = usingRerank
|
||||
? "embedding+rerank-ranked"
|
||||
: usingEmbeddings
|
||||
|
|
|
|||
|
|
@ -30,15 +30,32 @@ const ENV_KEY = "SF_LLM_GATEWAY_KEY";
|
|||
const ENV_URL = "SF_LLM_GATEWAY_URL";
|
||||
const ENV_EMBED_MODEL = "SF_LLM_GATEWAY_EMBED_MODEL";
|
||||
const ENV_RERANK_MODEL = "SF_LLM_GATEWAY_RERANK_MODEL";
|
||||
const DEFAULT_EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-4B";
|
||||
const DEFAULT_RERANK_MODEL = "Qwen/Qwen3-Reranker-0.6B";
|
||||
const KEY_ALIASES = [
|
||||
ENV_KEY,
|
||||
"LLM_GATEWAY_API_KEY",
|
||||
"LLM_GATEWAY_BEARER_KEY",
|
||||
"LLM_MUX_API_KEY",
|
||||
];
|
||||
const URL_ALIASES = [ENV_URL, "LLM_GATEWAY_BASE_URL", "LLM_MUX_BASE_URL"];
|
||||
function firstEnvValue(keys) {
|
||||
for (const key of keys) {
|
||||
const value = process.env[key]?.trim();
|
||||
if (value) return value;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
/** Read gateway config from env. Returns null when SF_LLM_GATEWAY_KEY is
|
||||
* missing — the gateway path is opt-in and silently absent otherwise. */
|
||||
export function loadGatewayConfigFromEnv() {
|
||||
const apiKey = process.env[ENV_KEY];
|
||||
const apiKey = firstEnvValue(KEY_ALIASES);
|
||||
if (!apiKey) return null;
|
||||
const url = process.env[ENV_URL] ?? "https://llm-gateway.centralcloud.com/v1";
|
||||
const url =
|
||||
firstEnvValue(URL_ALIASES) || "https://llm-gateway.centralcloud.com/v1";
|
||||
const embeddingModel =
|
||||
process.env[ENV_EMBED_MODEL] ?? "qwen/qwen3-embedding-4b";
|
||||
const rerankModel = process.env[ENV_RERANK_MODEL] || undefined;
|
||||
firstEnvValue([ENV_EMBED_MODEL]) || DEFAULT_EMBEDDING_MODEL;
|
||||
const rerankModel = firstEnvValue([ENV_RERANK_MODEL]) || DEFAULT_RERANK_MODEL;
|
||||
return { url, apiKey, embeddingModel, rerankModel };
|
||||
}
|
||||
/** Build an EmbedFn that posts to <url>/embeddings with Bearer auth.
|
||||
|
|
|
|||
|
|
@ -123,9 +123,9 @@ These are runtime-only; SF reads them at startup, never persists them, never log
|
|||
|
||||
| Variable | Purpose | Default |
|
||||
|---|---|---|
|
||||
| `SF_LLM_GATEWAY_KEY` | Bearer token for the inference-fabric llm-gateway. **When unset, embeddings are disabled** and `getRelevantMemoriesRanked` falls back to static (confidence × hit-count) ranking. | (unset) |
|
||||
| `SF_LLM_GATEWAY_URL` | OpenAI-compatible endpoint base, including `/v1`. | `https://llm-gateway.centralcloud.com/v1` |
|
||||
| `SF_LLM_GATEWAY_EMBED_MODEL` | Embedding model id served by the gateway. | `qwen/qwen3-embedding-4b` |
|
||||
| `SF_LLM_GATEWAY_RERANK_MODEL` | Rerank model id. When unset OR no rerank worker is online, rerank silently degrades and the cosine pass alone ranks results. | (unset) |
|
||||
| `SF_LLM_GATEWAY_KEY` | Bearer token for the inference-fabric llm-gateway. Also accepts `LLM_GATEWAY_API_KEY`, `LLM_GATEWAY_BEARER_KEY`, and `LLM_MUX_API_KEY`. **When unset, embeddings are disabled** and `getRelevantMemoriesRanked` falls back to static (confidence × hit-count) ranking. | (unset) |
|
||||
| `SF_LLM_GATEWAY_URL` | OpenAI-compatible endpoint base, including `/v1`. Also accepts `LLM_GATEWAY_BASE_URL` and `LLM_MUX_BASE_URL`. | `https://llm-gateway.centralcloud.com/v1` |
|
||||
| `SF_LLM_GATEWAY_EMBED_MODEL` | Embedding model id served by the gateway. | `Qwen/Qwen3-Embedding-4B` |
|
||||
| `SF_LLM_GATEWAY_RERANK_MODEL` | Rerank model id. When the gateway key is set, SF defaults to `Qwen/Qwen3-Reranker-0.6B`; if no rerank worker is online, rerank silently degrades and the cosine pass alone ranks results. | `Qwen/Qwen3-Reranker-0.6B` |
|
||||
|
||||
Once `SF_LLM_GATEWAY_KEY` is set, the agent_end hook opportunistically backfills embeddings for any memories without vectors (50 per turn, 16 per batch). `/sf memory search "<query>"` lights up the embedding-ranked path; without the key it shows static rank.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,62 @@
|
|||
import { existsSync } from "node:fs";
|
||||
import { dirname, join, resolve } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
function firstExisting(paths) {
|
||||
return paths.find((candidate) => existsSync(candidate)) ?? null;
|
||||
}
|
||||
|
||||
export function getMarketplaceFixtures(testDir) {
|
||||
const home = process.env.HOME ?? "";
|
||||
const repoRoot = resolve(
|
||||
dirname(fileURLToPath(import.meta.url)),
|
||||
"..",
|
||||
"..",
|
||||
"..",
|
||||
"..",
|
||||
"..",
|
||||
);
|
||||
const parent = resolve(testDir, "..", "..", "..");
|
||||
const grandparent = resolve(parent, "..");
|
||||
|
||||
const claudeSkillsPath = firstExisting(
|
||||
[
|
||||
process.env.CLAUDE_SKILLS_MARKETPLACE_PATH,
|
||||
join(home, "repos", "claude_skills"),
|
||||
join(parent, "claude_skills"),
|
||||
join(grandparent, "claude_skills"),
|
||||
join(repoRoot, "..", "claude_skills"),
|
||||
].filter(Boolean),
|
||||
);
|
||||
|
||||
const claudePluginsOfficialPath = firstExisting(
|
||||
[
|
||||
process.env.CLAUDE_PLUGINS_OFFICIAL_MARKETPLACE_PATH,
|
||||
join(home, ".codex", "plugins", "cache", "claude-plugins-official"),
|
||||
join(home, ".claude", "plugins", "cache", "claude-plugins-official"),
|
||||
join(
|
||||
home,
|
||||
".claude",
|
||||
"plugins",
|
||||
"marketplaces",
|
||||
"claude-plugins-official",
|
||||
),
|
||||
join(home, "repos", "claude-plugins-official"),
|
||||
join(parent, "claude-plugins-official"),
|
||||
join(grandparent, "claude-plugins-official"),
|
||||
join(repoRoot, "..", "claude-plugins-official"),
|
||||
].filter(Boolean),
|
||||
);
|
||||
|
||||
const available = Boolean(claudeSkillsPath && claudePluginsOfficialPath);
|
||||
return {
|
||||
available,
|
||||
skipReason: available ? null : "Claude marketplace fixture repos not found",
|
||||
fixtures: available
|
||||
? {
|
||||
claudeSkillsPath,
|
||||
claudePluginsOfficialPath,
|
||||
}
|
||||
: null,
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { afterEach, test } from "vitest";
|
||||
|
||||
import { loadGatewayConfigFromEnv } from "../memory-embeddings-llm-gateway.js";
|
||||
|
||||
const KEYS = [
|
||||
"SF_LLM_GATEWAY_KEY",
|
||||
"SF_LLM_GATEWAY_URL",
|
||||
"SF_LLM_GATEWAY_EMBED_MODEL",
|
||||
"SF_LLM_GATEWAY_RERANK_MODEL",
|
||||
"LLM_GATEWAY_API_KEY",
|
||||
"LLM_GATEWAY_BEARER_KEY",
|
||||
"LLM_GATEWAY_BASE_URL",
|
||||
"LLM_MUX_API_KEY",
|
||||
"LLM_MUX_BASE_URL",
|
||||
];
|
||||
|
||||
function withCleanGatewayEnv(fn) {
|
||||
const original = Object.fromEntries(
|
||||
KEYS.map((key) => [key, process.env[key]]),
|
||||
);
|
||||
for (const key of KEYS) delete process.env[key];
|
||||
afterEach(() => {
|
||||
for (const key of KEYS) {
|
||||
if (original[key] === undefined) delete process.env[key];
|
||||
else process.env[key] = original[key];
|
||||
}
|
||||
});
|
||||
fn();
|
||||
}
|
||||
|
||||
test("loadGatewayConfigFromEnv accepts SF-prefixed configuration", () => {
|
||||
withCleanGatewayEnv(() => {
|
||||
process.env.SF_LLM_GATEWAY_KEY = "sf-key";
|
||||
process.env.SF_LLM_GATEWAY_URL = "https://example.test/v1";
|
||||
process.env.SF_LLM_GATEWAY_EMBED_MODEL = "embed-model";
|
||||
process.env.SF_LLM_GATEWAY_RERANK_MODEL = "rerank-model";
|
||||
|
||||
assert.deepEqual(loadGatewayConfigFromEnv(), {
|
||||
url: "https://example.test/v1",
|
||||
apiKey: "sf-key",
|
||||
embeddingModel: "embed-model",
|
||||
rerankModel: "rerank-model",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
test("loadGatewayConfigFromEnv accepts llm-gateway shell aliases", () => {
|
||||
withCleanGatewayEnv(() => {
|
||||
process.env.LLM_GATEWAY_BEARER_KEY = "gateway-key";
|
||||
process.env.LLM_GATEWAY_BASE_URL = "https://llm-gateway.test/v1";
|
||||
|
||||
assert.deepEqual(loadGatewayConfigFromEnv(), {
|
||||
url: "https://llm-gateway.test/v1",
|
||||
apiKey: "gateway-key",
|
||||
embeddingModel: "Qwen/Qwen3-Embedding-4B",
|
||||
rerankModel: "Qwen/Qwen3-Reranker-0.6B",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
test("loadGatewayConfigFromEnv returns null without any gateway key", () => {
|
||||
withCleanGatewayEnv(() => {
|
||||
assert.equal(loadGatewayConfigFromEnv(), null);
|
||||
});
|
||||
});
|
||||
|
|
@ -8,13 +8,13 @@ import { fileURLToPath } from "node:url";
|
|||
import { test } from "vitest";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const autoTsPath = join(
|
||||
const autoJsPath = join(
|
||||
__dirname,
|
||||
"..",
|
||||
"resources",
|
||||
"extensions",
|
||||
"sf",
|
||||
"auto.ts",
|
||||
"auto.js",
|
||||
);
|
||||
const loaderTsPath = join(__dirname, "..", "loader.ts");
|
||||
|
||||
|
|
@ -26,26 +26,26 @@ test("loader.ts sets SF_PKG_ROOT env var", () => {
|
|||
);
|
||||
});
|
||||
|
||||
test("auto.ts resume uses SF_PKG_ROOT for resource-loader import, not bare relative path", () => {
|
||||
const autoSrc = readFileSync(autoTsPath, "utf-8");
|
||||
test("auto.js resume uses SF_PKG_ROOT for resource-loader import, not bare relative path", () => {
|
||||
const autoSrc = readFileSync(autoJsPath, "utf-8");
|
||||
|
||||
// Must reference SF_PKG_ROOT to build an absolute path
|
||||
assert.ok(
|
||||
autoSrc.includes("process.env.SF_PKG_ROOT"),
|
||||
"auto.ts must use SF_PKG_ROOT to resolve resource-loader.js from deployed extension path",
|
||||
"auto.js must use SF_PKG_ROOT to resolve resource-loader.js from deployed extension path",
|
||||
);
|
||||
|
||||
// The import must use the computed variable (resourceLoaderPath), not a hardcoded relative path.
|
||||
assert.ok(
|
||||
autoSrc.includes("await import(resourceLoaderPath)"),
|
||||
"auto.ts resource-loader import must use the computed resourceLoaderPath variable, not a hardcoded relative path",
|
||||
"auto.js resource-loader import must use the computed resourceLoaderPath variable, not a hardcoded relative path",
|
||||
);
|
||||
|
||||
// The resourceLoaderPath must be constructed from SF_PKG_ROOT via pathToFileURL
|
||||
// (raw filesystem paths break on Windows with ERR_UNSUPPORTED_ESM_URL_SCHEME)
|
||||
assert.ok(
|
||||
autoSrc.includes("pathToFileURL(join(pkgRoot,"),
|
||||
"auto.ts must convert the constructed path to a file URL for cross-platform import()",
|
||||
"auto.js must convert the constructed path to a file URL for cross-platform import()",
|
||||
);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -497,7 +497,6 @@ test("refresh failures keep the workspace locked and expose the failed bridge-re
|
|||
test("fresh sf --web browser onboarding stays locked on failed validation and unlocks after a successful retry", async (_t) => {
|
||||
if (process.platform === "win32") {
|
||||
return; // skip: "runtime launch test uses POSIX browser-open stubs";
|
||||
return;
|
||||
}
|
||||
|
||||
const tempRoot = mkdtempSync(join(tmpdir(), "sf-web-onboarding-runtime-"));
|
||||
|
|
@ -591,7 +590,7 @@ test("fresh sf --web browser onboarding stays locked on failed validation and un
|
|||
providerId: "openai",
|
||||
apiKey: "valid-demo-key",
|
||||
}),
|
||||
signal: AbortSignal.timeout(60_000),
|
||||
signal: AbortSignal.timeout(120_000),
|
||||
});
|
||||
assert.equal(
|
||||
validValidation.status,
|
||||
|
|
@ -617,4 +616,4 @@ test("fresh sf --web browser onboarding stays locked on failed validation and un
|
|||
const bootAfterPayload = (await bootAfter.json()) as any;
|
||||
assert.equal(bootAfterPayload.onboarding.locked, false);
|
||||
assert.equal(bootAfterPayload.onboarding.lockReason, null);
|
||||
});
|
||||
}, 180_000);
|
||||
|
|
|
|||
|
|
@ -174,38 +174,62 @@ test("executeTavilySearch sends POST to Tavily API and produces CachedSearchResu
|
|||
// =============================================================================
|
||||
|
||||
test("resolveSearchProvider returns 'tavily' when TAVILY_API_KEY is set and BRAVE_API_KEY is not", (_t) => {
|
||||
const origTavily = process.env.TAVILY_API_KEY;
|
||||
const origBrave = process.env.BRAVE_API_KEY;
|
||||
const keys = [
|
||||
"TAVILY_API_KEY",
|
||||
"BRAVE_API_KEY",
|
||||
"MINIMAX_API_KEY",
|
||||
"MINIMAX_CODE_PLAN_KEY",
|
||||
"MINIMAX_CODING_API_KEY",
|
||||
"SERPER_API_KEY",
|
||||
"EXA_API_KEY",
|
||||
"OLLAMA_API_KEY",
|
||||
];
|
||||
const originals: Record<string, string | undefined> = {};
|
||||
for (const key of keys) {
|
||||
originals[key] = process.env[key];
|
||||
delete process.env[key];
|
||||
}
|
||||
|
||||
process.env.TAVILY_API_KEY = "tvly-test-key";
|
||||
delete process.env.BRAVE_API_KEY;
|
||||
|
||||
afterEach(() => {
|
||||
if (origTavily !== undefined) process.env.TAVILY_API_KEY = origTavily;
|
||||
else delete process.env.TAVILY_API_KEY;
|
||||
if (origBrave !== undefined) process.env.BRAVE_API_KEY = origBrave;
|
||||
else delete process.env.BRAVE_API_KEY;
|
||||
for (const key of keys) {
|
||||
if (originals[key] !== undefined) process.env[key] = originals[key];
|
||||
else delete process.env[key];
|
||||
}
|
||||
});
|
||||
|
||||
const provider = resolveSearchProvider();
|
||||
const provider = resolveSearchProvider("auto");
|
||||
assert.equal(provider, "tavily");
|
||||
});
|
||||
|
||||
test("resolveSearchProvider returns 'brave' when only BRAVE_API_KEY is set", (_t) => {
|
||||
const origTavily = process.env.TAVILY_API_KEY;
|
||||
const origBrave = process.env.BRAVE_API_KEY;
|
||||
const keys = [
|
||||
"TAVILY_API_KEY",
|
||||
"BRAVE_API_KEY",
|
||||
"MINIMAX_API_KEY",
|
||||
"MINIMAX_CODE_PLAN_KEY",
|
||||
"MINIMAX_CODING_API_KEY",
|
||||
"SERPER_API_KEY",
|
||||
"EXA_API_KEY",
|
||||
"OLLAMA_API_KEY",
|
||||
];
|
||||
const originals: Record<string, string | undefined> = {};
|
||||
for (const key of keys) {
|
||||
originals[key] = process.env[key];
|
||||
delete process.env[key];
|
||||
}
|
||||
|
||||
delete process.env.TAVILY_API_KEY;
|
||||
process.env.BRAVE_API_KEY = "BSA-test-key";
|
||||
|
||||
afterEach(() => {
|
||||
if (origTavily !== undefined) process.env.TAVILY_API_KEY = origTavily;
|
||||
else delete process.env.TAVILY_API_KEY;
|
||||
if (origBrave !== undefined) process.env.BRAVE_API_KEY = origBrave;
|
||||
else delete process.env.BRAVE_API_KEY;
|
||||
for (const key of keys) {
|
||||
if (originals[key] !== undefined) process.env[key] = originals[key];
|
||||
else delete process.env[key];
|
||||
}
|
||||
});
|
||||
|
||||
const provider = resolveSearchProvider();
|
||||
const provider = resolveSearchProvider("auto");
|
||||
assert.equal(provider, "brave");
|
||||
});
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue