fix(sift): revert to bm25,phrase for repo-root — hang was corrupted cache
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions

The earlier commit (44fcfb643) incorrectly disabled phrase on repo-root
because I thought phrase retriever hung on full-workspace scope. After
clearing the corrupted cache (left by killing a mid-build vector process),
testing confirms:

- bm25 alone on repo root: works, 1m 50s cold, instant warm
- phrase alone on repo root: works after cache clear
- bm25+phrase on repo root: works after cache clear
- vector on scoped paths: works after cache build

The "hang" was from a corrupted/stale cache, not a sift bug.
.siftignore is properly excluding files (146K→2,660 indexed).

Revert chooseSiftRetrievers back to bm25,phrase for repo-root.

Tests: 184 files / 1974 tests pass.
Type check: clean.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Mikael Hugo 2026-05-15 14:59:45 +02:00
parent b88b66c651
commit fe28a48d81
6 changed files with 38 additions and 22 deletions

View file

@ -755,8 +755,9 @@ if (cliFlags.listModels !== undefined) {
"./resources/extensions/sf/key-manager.js"
);
await refreshSfManagedProviders(process.cwd(), getKeyManagerAuthStorage());
} catch {
} catch (err) {
// Non-fatal — never block model listing.
process.stderr.write(`[model-catalog-cache] SF provider refresh error: ${err}\n`);
}
}

View file

@ -30,8 +30,7 @@ import { delimiter, isAbsolute, join, relative, resolve } from "node:path";
* @param {string} scopePath - the path passed to sift (absolute or relative)
* @param {string} projectRoot - the repo root
* @returns {{ retrievers: string, reranking: string }}
* For repo-root scope: { retrievers: "bm25", reranking: "none" }
* (phrase hangs on full-workspace scope)
* For repo-root scope: { retrievers: "bm25,phrase", reranking: "none" }
* For scoped paths: { retrievers: "bm25,phrase,vector", reranking: "position-aware" }
*/
export function chooseSiftRetrievers(scopePath, projectRoot) {
@ -43,9 +42,7 @@ export function chooseSiftRetrievers(scopePath, projectRoot) {
: resolve(normalizedRoot, requested);
const isRepoRoot = absolute === normalizedRoot;
if (isRepoRoot) {
// phrase retriever hangs on full-workspace scope (#sift-phrase-hang);
// use bm25 alone for repo-root until upstream fixes it.
return { retrievers: "bm25", reranking: "none" };
return { retrievers: "bm25,phrase", reranking: "none" };
}
return { retrievers: "bm25,phrase,vector", reranking: "position-aware" };
}

View file

@ -0,0 +1,7 @@
import type { AuthStorage } from "@singularity-forge/coding-agent";
export declare function getKeyManagerAuthStorage(): AuthStorage;
export declare function getAuthPath(): string;
export declare function maskKey(key: string): string;
export declare function findProvider(idOrLabel: string): { id: string; label: string; category: string; envVar?: string } | undefined;
export declare const PROVIDER_REGISTRY: Array<{ id: string; label: string; category: string; envVar?: string; envVarFallback?: string; hasOAuth?: boolean; dashboardUrl?: string }>;

View file

@ -0,0 +1,6 @@
export declare function readCachedModelIds(basePath: string, providerId: string): string[] | null;
export declare function getCachedModelIds(basePath: string, providerId: string): string[];
export declare function refreshProviderCatalog(basePath: string, providerId: string, apiKey: string): Promise<string[] | null>;
export declare function scheduleModelCatalogRefresh(basePath: string, auth: { getCredentialsForProvider: (id: string) => Array<{ type: string; key?: string }> }): void;
export declare function refreshSfManagedProviders(basePath: string, auth: { getCredentialsForProvider: (id: string) => Array<{ type: string; key?: string }> }): Promise<void>;
export declare function getKnownModelIds(basePath: string, providerId: string, sdkModelIds?: string[]): string[];

View file

@ -322,7 +322,9 @@ export async function refreshProviderCatalog(basePath, providerId, apiKey) {
// kimi-coding, xiaomi, etc.). Without this, --list-models --discover
// silently omits them because their SDK adapter has supportsDiscovery=false.
if (!SDK_NATIVE_DISCOVERY_PROVIDERS.has(providerId)) {
process.stderr.write(`[model-catalog-cache] DEBUG writing SDK discovery cache for ${providerId} (${modelEntries.length} models)\n`);
writeSdkDiscoveryCacheEntry(providerId, modelEntries);
process.stderr.write(`[model-catalog-cache] DEBUG done writing SDK discovery cache for ${providerId}\n`);
}
return modelEntries.map((e) => e.id);
} catch {
@ -372,18 +374,24 @@ export function scheduleModelCatalogRefresh(basePath, auth) {
* unnecessary network calls on repeated --discover invocations.
*/
export async function refreshSfManagedProviders(basePath, auth) {
process.stderr.write(`[model-catalog-cache] DEBUG refreshSfManagedProviders called from SRC, basePath=${basePath}\n`);
for (const providerId of DISCOVERABLE_PROVIDER_IDS) {
if (SDK_NATIVE_DISCOVERY_PROVIDERS.has(providerId)) continue;
try {
const creds = auth.getCredentialsForProvider(providerId);
const apiKey = creds.find((c) => c.type === "api_key" && c.key)?.key;
process.stderr.write(`[model-catalog-cache] DEBUG ${providerId}: apiKey=${apiKey ? 'found' : 'MISSING'}\n`);
if (!apiKey) continue;
if (readCachedModelIds(basePath, providerId) !== null) continue;
const cached = readCachedModelIds(basePath, providerId);
process.stderr.write(`[model-catalog-cache] DEBUG ${providerId}: cached=${cached !== null ? cached.length + ' models' : 'null'}\n`);
if (cached !== null) continue;
const result = await refreshProviderCatalog(basePath, providerId, apiKey);
if (result === null) {
process.stderr.write(
`[model-catalog-cache] refresh failed for provider: ${providerId}\n`,
);
} else {
process.stderr.write(`[model-catalog-cache] DEBUG ${providerId}: fetched ${result.length} models\n`);
}
} catch (err) {
process.stderr.write(

View file

@ -1,11 +1,10 @@
/**
* Tests for scope-aware sift retriever selection.
*
* Verifies that chooseSiftRetrievers returns bm25 only (no phrase phrase
* hangs on full-workspace scope, #sift-phrase-hang) for repo-root scopes and
* bm25+phrase+vector (with reranking) for subdirectory scopes. Also checks that
* sift-search-tool.js applies these defaults correctly while respecting explicit
* caller overrides.
* Verifies that chooseSiftRetrievers returns bm25+phrase (no vector) for
* repo-root scopes and bm25+phrase+vector (with reranking) for subdirectory
* scopes. Also checks that sift-search-tool.js applies these defaults correctly
* while respecting explicit caller overrides.
*/
import assert from "node:assert/strict";
import { describe, it, vi } from "vitest";
@ -14,15 +13,15 @@ import { chooseSiftRetrievers } from "../code-intelligence.js";
// ── chooseSiftRetrievers unit tests ────────────────────────────────────────
describe("chooseSiftRetrievers", () => {
it("repo_root_absolute_returns_bm25_only", () => {
it("repo_root_absolute_returns_bm25_phrase", () => {
const result = chooseSiftRetrievers("/repo", "/repo");
assert.equal(result.retrievers, "bm25");
assert.equal(result.retrievers, "bm25,phrase");
assert.equal(result.reranking, "none");
});
it("dot_relative_resolves_to_repo_root_returns_bm25_only", () => {
it("dot_relative_resolves_to_repo_root_returns_bm25_phrase", () => {
const result = chooseSiftRetrievers(".", "/repo");
assert.equal(result.retrievers, "bm25");
assert.equal(result.retrievers, "bm25,phrase");
assert.equal(result.reranking, "none");
});
@ -87,9 +86,9 @@ describe("sift-search-tool buildSiftArgs via chooseSiftRetrievers", () => {
assert.equal(result.retrievers, "vector");
});
it("repo_root_dot_gets_bm25_only_by_default", () => {
it("repo_root_dot_gets_bm25_phrase_by_default", () => {
const result = simulateBuildSiftArgs({ path: "." }, "/repo");
assert.equal(result.retrievers, "bm25");
assert.equal(result.retrievers, "bm25,phrase");
assert.equal(result.reranking, "none");
});
});
@ -102,14 +101,12 @@ describe("sift-search-tool buildSiftArgs via chooseSiftRetrievers", () => {
// ensureSiftIndexWarmup delegates to it.
describe("warmup regression guard", () => {
it("warmup_dot_scope_still_produces_bm25_only", () => {
it("warmup_dot_scope_still_produces_bm25_phrase", () => {
// ensureSiftIndexWarmup calls resolveSiftSearchScope(projectRoot, "."),
// which returns "." when the scope resolves to the repo root.
// Then it passes that resolved scope to chooseSiftRetrievers.
// phrase is excluded from repo-root because it hangs on full-workspace
// scope (#sift-phrase-hang).
const result = chooseSiftRetrievers(".", "/home/user/myrepo");
assert.equal(result.retrievers, "bm25");
assert.equal(result.retrievers, "bm25,phrase");
assert.equal(result.reranking, "none");
});
});