Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions
The sentence-transformers/all-MiniLM-L6-v2 embedding model inference hangs indefinitely during sift search, causing: - Warmup to never complete (TTL expired 62+ min ago) - All page-index-hybrid searches to timeout - The search cache to become stale Fix: Restrict warmup and search to bm25+phrase retrievers with no ML reranking. This gives fast lexical results while avoiding the hanging embedding inference path. Also expose --retrievers and --reranking params in sift_search tool so callers can override per-query if needed. Closes #vector-hang-fix Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
859 lines
26 KiB
JavaScript
859 lines
26 KiB
JavaScript
/**
|
|
* Optional code-intelligence backends for SF.
|
|
*
|
|
* Sift is the live code retrieval path. CODEBASE.md stays the durable fallback
|
|
* when the live index is unavailable, cold, or degraded.
|
|
*/
|
|
import { spawn, spawnSync } from "node:child_process";
|
|
import {
|
|
existsSync,
|
|
mkdirSync,
|
|
readdirSync,
|
|
readFileSync,
|
|
statSync,
|
|
writeFileSync,
|
|
} from "node:fs";
|
|
import { delimiter, isAbsolute, join, relative, resolve } from "node:path";
|
|
import { getErrorMessage } from "./error-utils.js";
|
|
|
|
const SIFT_BINARY_NAME = process.platform === "win32" ? "sift.exe" : "sift";
|
|
const DEFAULT_SIFT_WARMUP_TTL_MS = 6 * 60 * 60 * 1000;
|
|
const DEFAULT_SIFT_WARMUP_QUERY =
|
|
"repo architecture source tests entrypoints configuration";
|
|
const DEFAULT_SIFT_WARMUP_LIMIT = 1;
|
|
const DEFAULT_SIFT_WARMUP_RETRIEVER_TIMEOUT_MS = 30_000;
|
|
const DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC = 600;
|
|
const SIFT_WARMUP_KILL_GRACE_SEC = 10;
|
|
const DEFAULT_SIFT_HEALTH_TIMEOUT_MS = 60_000;
|
|
const SIFT_HEALTH_CACHE = new Map();
|
|
const SIFT_CACHE_POLLUTION_PATTERNS = [
|
|
{ label: ".claude worktrees", pattern: /(?:^|[/\\])\.claude[/\\]/ },
|
|
{ label: ".git internals", pattern: /(?:^|[/\\])\.git[/\\]/ },
|
|
{ label: "dist-test output", pattern: /(?:^|[/\\])dist-test[/\\]/ },
|
|
{ label: "node_modules", pattern: /(?:^|[/\\])node_modules[/\\]/ },
|
|
{
|
|
label: "package dist output",
|
|
pattern: /(?:^|[/\\])packages[/\\][^/\\]+[/\\]dist[/\\]/,
|
|
},
|
|
];
|
|
export function resolveSiftWarmupRuntimeDirs(projectRoot) {
|
|
const runtimeRoot = join(projectRoot, ".sf", "runtime", "sift");
|
|
return {
|
|
searchCache: join(runtimeRoot, "search-cache"),
|
|
tmpDir: join(runtimeRoot, "tmp"),
|
|
};
|
|
}
|
|
/**
|
|
* Ensure the repo-local Sift runtime directories exist.
|
|
*
|
|
* Purpose: keep Sift's search database scoped to the current repository instead
|
|
* of sharing a process-global cache across unrelated projects.
|
|
*
|
|
* Consumer: Sift warmup, status probes, `sift_search`, and `codebase_search`.
|
|
*/
|
|
export function ensureSiftRuntimeDirs(projectRoot) {
|
|
const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
|
|
mkdirSync(dirs.searchCache, { recursive: true });
|
|
mkdirSync(dirs.tmpDir, { recursive: true });
|
|
return dirs;
|
|
}
|
|
export function buildSiftEnv(projectRoot, env) {
|
|
const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
|
|
return {
|
|
...env,
|
|
SIFT_SEARCH_CACHE: dirs.searchCache,
|
|
TMPDIR: dirs.tmpDir,
|
|
};
|
|
}
|
|
/**
|
|
* Resolve a Sift search scope to the form Sift's local ignore matcher expects.
|
|
*
|
|
* Purpose: preserve `.siftignore` semantics by running Sift from the repository
|
|
* root with repo-relative scopes instead of absolute paths.
|
|
*
|
|
* Consumer: Sift warmup, `sift_search`, and `codebase_search`.
|
|
*/
|
|
export function resolveSiftSearchScope(projectRoot, scope) {
|
|
const normalizedRoot = normalizeProjectRoot(projectRoot);
|
|
const requested =
|
|
typeof scope === "string" && scope.trim() ? scope.trim() : ".";
|
|
const absolute = isAbsolute(requested)
|
|
? resolve(requested)
|
|
: resolve(normalizedRoot, requested);
|
|
const rel = relative(normalizedRoot, absolute);
|
|
if (!rel) return ".";
|
|
if (!rel.startsWith("..") && !isAbsolute(rel)) return rel;
|
|
return requested;
|
|
}
|
|
function normalizeProjectRoot(projectRoot) {
|
|
return resolve(projectRoot);
|
|
}
|
|
function commandExists(command, env = process.env) {
|
|
if (!command) return false;
|
|
return lookupExecutable(command, env) !== null;
|
|
}
|
|
function lookupExecutable(command, env = process.env) {
|
|
if (/[\\/]/.test(command) && existsSync(command)) return command;
|
|
const pathValue = env.PATH ?? "";
|
|
for (const dir of pathValue.split(delimiter).filter(Boolean)) {
|
|
const candidate = join(dir, command);
|
|
if (existsSync(candidate)) return candidate;
|
|
}
|
|
return null;
|
|
}
|
|
function resolveSiftWarmupHardTimeoutSec(env, override) {
|
|
if (env.SF_SIFT_HARD_TIMEOUT_DISABLE === "1") return null;
|
|
if (override !== undefined) {
|
|
return Number.isFinite(override) && override > 0
|
|
? Math.floor(override)
|
|
: null;
|
|
}
|
|
const raw = env.SF_SIFT_HARD_TIMEOUT_SEC?.trim();
|
|
if (raw) {
|
|
const parsed = Number.parseInt(raw, 10);
|
|
if (parsed === 0) return null;
|
|
if (Number.isFinite(parsed) && parsed > 0) return parsed;
|
|
}
|
|
return DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC;
|
|
}
|
|
function resolveSiftWarmupTimeoutWrapper(env, timeoutSec) {
|
|
if (process.platform === "win32") return null;
|
|
const candidates =
|
|
process.platform === "darwin"
|
|
? ["gtimeout", "timeout"]
|
|
: ["timeout", "gtimeout"];
|
|
for (const candidate of candidates) {
|
|
const binary = lookupExecutable(candidate, env);
|
|
if (binary) {
|
|
return {
|
|
binary,
|
|
wrapperArgs: [
|
|
`--kill-after=${SIFT_WARMUP_KILL_GRACE_SEC}`,
|
|
String(timeoutSec),
|
|
],
|
|
timeoutSec,
|
|
};
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
export function resolveSiftBinary(env = process.env) {
|
|
const explicit = env.SIFT_PATH?.trim();
|
|
if (explicit) return explicit;
|
|
return (
|
|
lookupExecutable(SIFT_BINARY_NAME, env) ??
|
|
(SIFT_BINARY_NAME === "sift" ? null : lookupExecutable("sift", env))
|
|
);
|
|
}
|
|
function resolveSiftHealthTimeoutMs(env) {
|
|
const raw = env.SF_SIFT_HEALTH_TIMEOUT_MS?.trim();
|
|
if (!raw) return DEFAULT_SIFT_HEALTH_TIMEOUT_MS;
|
|
const parsed = Number.parseInt(raw, 10);
|
|
return Number.isFinite(parsed) && parsed > 0
|
|
? parsed
|
|
: DEFAULT_SIFT_HEALTH_TIMEOUT_MS;
|
|
}
|
|
function resolveSiftHealthProbePath(projectRoot) {
|
|
for (const candidate of ["src", "packages", "tests"]) {
|
|
const absolute = join(projectRoot, candidate);
|
|
if (existsSync(absolute)) return candidate;
|
|
}
|
|
return ".";
|
|
}
|
|
function runSiftHealthProbe(projectRoot, binaryPath, env) {
|
|
const normalizedRoot = normalizeProjectRoot(projectRoot);
|
|
const timeoutMs = resolveSiftHealthTimeoutMs(env);
|
|
const probePath = resolveSiftHealthProbePath(normalizedRoot);
|
|
const cacheKey = [
|
|
normalizedRoot,
|
|
binaryPath,
|
|
env.SIFT_PATH ?? "",
|
|
env.SF_SIFT_HEALTH_TIMEOUT_MS ?? "",
|
|
env.SF_SIFT_HEALTHCHECK_DISABLE ?? "",
|
|
].join("\0");
|
|
if (SIFT_HEALTH_CACHE.has(cacheKey)) return SIFT_HEALTH_CACHE.get(cacheKey);
|
|
const dirs = ensureSiftRuntimeDirs(normalizedRoot);
|
|
if (env.SF_SIFT_HEALTHCHECK_DISABLE === "1") {
|
|
const skipped = {
|
|
ok: true,
|
|
probePath,
|
|
timeoutMs,
|
|
searchCache: dirs.searchCache,
|
|
tmpDir: dirs.tmpDir,
|
|
reason: "sift health probe disabled",
|
|
};
|
|
SIFT_HEALTH_CACHE.set(cacheKey, skipped);
|
|
return skipped;
|
|
}
|
|
const result = spawnSync(
|
|
binaryPath,
|
|
[
|
|
"search",
|
|
"--json",
|
|
"--strategy",
|
|
"bm25",
|
|
"--limit",
|
|
"1",
|
|
"--retriever-timeout-ms",
|
|
String(Math.min(timeoutMs, 1_000)),
|
|
probePath,
|
|
"function",
|
|
],
|
|
{
|
|
cwd: normalizedRoot,
|
|
env: buildSiftEnv(normalizedRoot, env),
|
|
encoding: "utf-8",
|
|
maxBuffer: 1024 * 1024,
|
|
timeout: timeoutMs,
|
|
},
|
|
);
|
|
const probe = {
|
|
ok: result.status === 0,
|
|
probePath,
|
|
timeoutMs,
|
|
searchCache: dirs.searchCache,
|
|
tmpDir: dirs.tmpDir,
|
|
status: result.status,
|
|
signal: result.signal,
|
|
stderr: result.stderr,
|
|
reason: "",
|
|
};
|
|
if (probe.ok) {
|
|
probe.reason = `sift scoped health probe passed for ${probePath}`;
|
|
} else if (result.error?.code === "ETIMEDOUT" || result.signal) {
|
|
probe.reason = `sift scoped health probe timed out after ${timeoutMs}ms for ${probePath}`;
|
|
} else if (result.error) {
|
|
probe.reason = `sift scoped health probe failed: ${result.error.message}`;
|
|
} else {
|
|
const detail = String(result.stderr || "").trim();
|
|
probe.reason = detail
|
|
? `sift scoped health probe failed: ${detail.slice(0, 300)}`
|
|
: `sift scoped health probe exited ${result.status ?? "unknown"}`;
|
|
}
|
|
SIFT_HEALTH_CACHE.set(cacheKey, probe);
|
|
return probe;
|
|
}
|
|
function listFilesCapped(root, maxFiles = 32) {
|
|
const files = [];
|
|
const visit = (dir) => {
|
|
if (files.length >= maxFiles) return;
|
|
let entries = [];
|
|
try {
|
|
entries = readdirSync(dir, { withFileTypes: true });
|
|
} catch {
|
|
return;
|
|
}
|
|
for (const entry of entries) {
|
|
if (files.length >= maxFiles) return;
|
|
const path = join(dir, entry.name);
|
|
if (entry.isDirectory()) {
|
|
visit(path);
|
|
} else if (entry.isFile()) {
|
|
files.push(path);
|
|
}
|
|
}
|
|
};
|
|
visit(root);
|
|
return files;
|
|
}
|
|
function inspectSiftCache(projectRoot) {
|
|
const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
|
|
const manifestRoot = join(dirs.searchCache, "artifacts", "manifests");
|
|
const samples = [];
|
|
for (const manifest of listFilesCapped(manifestRoot, 16)) {
|
|
let text = "";
|
|
try {
|
|
text = readFileSync(manifest).toString("utf-8");
|
|
} catch {
|
|
continue;
|
|
}
|
|
for (const { label, pattern } of SIFT_CACHE_POLLUTION_PATTERNS) {
|
|
const match = text.match(pattern);
|
|
if (match) {
|
|
const start = Math.max(0, (match.index ?? 0) - 80);
|
|
const end = Math.min(text.length, (match.index ?? 0) + 160);
|
|
const sample = text
|
|
.slice(start, end)
|
|
.replace(/[^\x20-\x7E]+/g, " ")
|
|
.trim();
|
|
samples.push({ label, sample });
|
|
break;
|
|
}
|
|
}
|
|
if (samples.length >= 5) break;
|
|
}
|
|
return {
|
|
inspected: existsSync(manifestRoot),
|
|
polluted: samples.length > 0,
|
|
samples,
|
|
};
|
|
}
|
|
function inspectSiftWarmupArtifacts(projectRoot) {
|
|
const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
|
|
const artifactsRoot = join(dirs.searchCache, "artifacts");
|
|
const artifactSampleLimit = 512;
|
|
const files = listFilesCapped(artifactsRoot, artifactSampleLimit);
|
|
let latestArtifactAt = null;
|
|
let totalBytes = 0;
|
|
for (const file of files) {
|
|
try {
|
|
const stat = statSync(file);
|
|
totalBytes += stat.size;
|
|
const mtime = new Date(stat.mtimeMs).toISOString();
|
|
if (!latestArtifactAt || mtime > latestArtifactAt) {
|
|
latestArtifactAt = mtime;
|
|
}
|
|
} catch {
|
|
// Best-effort observability only; marker reconciliation must not fail
|
|
// because a cache file changed while we were inspecting it.
|
|
}
|
|
}
|
|
return {
|
|
artifactCount: files.length,
|
|
artifactCountCapped: files.length >= artifactSampleLimit,
|
|
artifactSampleLimit,
|
|
latestArtifactAt,
|
|
cacheBytes: totalBytes,
|
|
};
|
|
}
|
|
function finalizeSiftWarmupMarker(projectRoot, markerPath, parsed, reason) {
|
|
const artifacts = inspectSiftWarmupArtifacts(projectRoot);
|
|
const status = artifacts.artifactCount > 0 ? "completed" : "stale";
|
|
const reconciled = {
|
|
...parsed,
|
|
schemaVersion: 3,
|
|
status,
|
|
finishedAt: new Date().toISOString(),
|
|
terminalReason: reason,
|
|
artifactCount: artifacts.artifactCount,
|
|
artifactCountCapped: artifacts.artifactCountCapped,
|
|
artifactSampleLimit: artifacts.artifactSampleLimit,
|
|
latestArtifactAt: artifacts.latestArtifactAt,
|
|
cacheBytes: artifacts.cacheBytes,
|
|
};
|
|
try {
|
|
writeFileSync(
|
|
markerPath,
|
|
`${JSON.stringify(reconciled, null, 2)}\n`,
|
|
"utf-8",
|
|
);
|
|
} catch {
|
|
return null;
|
|
}
|
|
return { ...reconciled, markerPath };
|
|
}
|
|
export function detectSift(projectRoot, prefs, env = process.env) {
|
|
if (prefs?.indexer_backend === "none") {
|
|
return {
|
|
backend: "sift",
|
|
status: "disabled",
|
|
reason: "codebase.indexer_backend is none",
|
|
};
|
|
}
|
|
const explicit = env.SIFT_PATH?.trim();
|
|
const binaryPath = resolveSiftBinary(env) ?? undefined;
|
|
if (!binaryPath) {
|
|
return {
|
|
backend: "sift",
|
|
status: "missing",
|
|
reason:
|
|
"sift binary not found on PATH; set SIFT_PATH or install rupurt/sift.",
|
|
};
|
|
}
|
|
if (explicit && !commandExists(explicit, env)) {
|
|
return {
|
|
backend: "sift",
|
|
status: "missing",
|
|
command: explicit,
|
|
binaryPath: explicit,
|
|
reason: "SIFT_PATH is set but does not resolve to an executable file.",
|
|
};
|
|
}
|
|
const warmup = readSiftWarmupMarker(projectRoot);
|
|
if (warmup?.status === "warming") {
|
|
const dirs = ensureSiftRuntimeDirs(projectRoot);
|
|
return {
|
|
backend: "sift",
|
|
status: "warming",
|
|
command: binaryPath,
|
|
binaryPath,
|
|
searchCache: dirs.searchCache,
|
|
tmpDir: dirs.tmpDir,
|
|
probePath: warmup.scope ?? ".",
|
|
reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"}; repo-local Sift index warmup is still running`,
|
|
markerPath: warmup.markerPath,
|
|
};
|
|
}
|
|
const health = runSiftHealthProbe(projectRoot, binaryPath, env);
|
|
if (!health.ok) {
|
|
return {
|
|
backend: "sift",
|
|
status: "degraded",
|
|
command: binaryPath,
|
|
binaryPath,
|
|
searchCache: health.searchCache,
|
|
tmpDir: health.tmpDir,
|
|
probePath: health.probePath,
|
|
reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"} but ${health.reason}`,
|
|
};
|
|
}
|
|
const cacheInspection = inspectSiftCache(projectRoot);
|
|
if (cacheInspection.polluted) {
|
|
return {
|
|
backend: "sift",
|
|
status: "degraded",
|
|
command: binaryPath,
|
|
binaryPath,
|
|
searchCache: health.searchCache,
|
|
tmpDir: health.tmpDir,
|
|
probePath: health.probePath,
|
|
cacheInspection,
|
|
reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"} but repo-local Sift cache contains ignored/generated paths`,
|
|
};
|
|
}
|
|
return {
|
|
backend: "sift",
|
|
status: "configured",
|
|
command: binaryPath,
|
|
binaryPath,
|
|
searchCache: health.searchCache,
|
|
tmpDir: health.tmpDir,
|
|
probePath: health.probePath,
|
|
cacheInspection,
|
|
reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"}; ${health.reason}`,
|
|
};
|
|
}
|
|
function isFreshMarker(markerPath, now, ttlMs) {
|
|
try {
|
|
const stat = statSync(markerPath);
|
|
if (now - stat.mtimeMs >= ttlMs) return false;
|
|
const parsed = JSON.parse(readFileSync(markerPath, "utf-8"));
|
|
if (parsed.schemaVersion === 3) {
|
|
if (
|
|
parsed.status === "warming" &&
|
|
parsed.pid &&
|
|
!isProcessAlive(parsed.pid)
|
|
)
|
|
return false;
|
|
return typeof parsed.scope === "string" && parsed.scope.length > 0;
|
|
}
|
|
return (
|
|
parsed.schemaVersion === 2 &&
|
|
Array.isArray(parsed.args) &&
|
|
parsed.args.at(-2) === "."
|
|
);
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
function readSiftWarmupMarker(projectRoot) {
|
|
const markerPath = join(
|
|
projectRoot,
|
|
".sf",
|
|
"runtime",
|
|
"sift-index-warmup.json",
|
|
);
|
|
try {
|
|
if (!existsSync(markerPath)) return null;
|
|
const parsed = JSON.parse(readFileSync(markerPath, "utf-8"));
|
|
if (parsed.schemaVersion !== 3) return null;
|
|
if (parsed.status !== "warming") return null;
|
|
if (parsed.pid && !isProcessAlive(parsed.pid)) {
|
|
finalizeSiftWarmupMarker(
|
|
projectRoot,
|
|
markerPath,
|
|
parsed,
|
|
`warmup pid ${parsed.pid} is no longer running`,
|
|
);
|
|
return null;
|
|
}
|
|
const started = Date.parse(parsed.startedAt);
|
|
const hardTimeoutSec = Number(
|
|
parsed.hardTimeoutSec ?? DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC,
|
|
);
|
|
const expiresAt =
|
|
started +
|
|
Math.max(60, hardTimeoutSec + SIFT_WARMUP_KILL_GRACE_SEC) * 1000;
|
|
if (!Number.isFinite(started)) {
|
|
finalizeSiftWarmupMarker(
|
|
projectRoot,
|
|
markerPath,
|
|
parsed,
|
|
"warmup marker has invalid startedAt",
|
|
);
|
|
return null;
|
|
}
|
|
if (Date.now() > expiresAt) {
|
|
finalizeSiftWarmupMarker(
|
|
projectRoot,
|
|
markerPath,
|
|
parsed,
|
|
"warmup marker exceeded hard timeout window",
|
|
);
|
|
return null;
|
|
}
|
|
return { ...parsed, markerPath };
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
function isProcessAlive(pid) {
|
|
try {
|
|
process.kill(Number(pid), 0);
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
|
|
const env = options.env ?? process.env;
|
|
const backendName = resolveEffectiveCodebaseIndexerBackendName(
|
|
projectRoot,
|
|
prefs,
|
|
env,
|
|
);
|
|
if (backendName !== "sift") {
|
|
return {
|
|
status: "skipped",
|
|
reason: `effective codebase indexer is ${backendName}`,
|
|
};
|
|
}
|
|
const detection = detectSift(projectRoot, prefs, {
|
|
...env,
|
|
SF_SIFT_HEALTHCHECK_DISABLE: "1",
|
|
});
|
|
if (detection.status === "warming") {
|
|
return {
|
|
status: "skipped",
|
|
reason: "sift index warmup is already running",
|
|
markerPath: detection.markerPath,
|
|
};
|
|
}
|
|
if (
|
|
!["configured", "degraded"].includes(detection.status) ||
|
|
!detection.binaryPath
|
|
) {
|
|
return {
|
|
status: "unavailable",
|
|
reason: detection.reason,
|
|
};
|
|
}
|
|
const markerPath = join(
|
|
projectRoot,
|
|
".sf",
|
|
"runtime",
|
|
"sift-index-warmup.json",
|
|
);
|
|
const now = options.now ?? Date.now();
|
|
const ttlMs = options.ttlMs ?? DEFAULT_SIFT_WARMUP_TTL_MS;
|
|
if (!options.force && isFreshMarker(markerPath, now, ttlMs)) {
|
|
return {
|
|
status: "skipped",
|
|
reason: "recent sift warmup marker exists",
|
|
markerPath,
|
|
};
|
|
}
|
|
const scope = resolveSiftSearchScope(projectRoot, options.scope ?? ".");
|
|
// ── Vector retriever hang workaround ─────────────────────────────────────
|
|
// When the embedding model (sentence-transformers/all-MiniLM-L6-v2) hangs
|
|
// during inference, page-index-hybrid with vector retriever stalls forever.
|
|
// Restrict retrievers to bm25+phrase and disable ML reranking so warmup
|
|
// completes without the vector path (#vector-hang-fix).
|
|
const siftArgs = [
|
|
"search",
|
|
"--json",
|
|
"--strategy",
|
|
"page-index-hybrid",
|
|
"--limit",
|
|
String(options.limit ?? DEFAULT_SIFT_WARMUP_LIMIT),
|
|
"--retriever-timeout-ms",
|
|
String(
|
|
options.retrieverTimeoutMs ?? DEFAULT_SIFT_WARMUP_RETRIEVER_TIMEOUT_MS,
|
|
),
|
|
"--retrievers",
|
|
"bm25,phrase",
|
|
"--reranking",
|
|
"none",
|
|
scope,
|
|
options.query ?? DEFAULT_SIFT_WARMUP_QUERY,
|
|
];
|
|
const hardTimeoutSec = resolveSiftWarmupHardTimeoutSec(
|
|
env,
|
|
options.hardTimeoutSec,
|
|
);
|
|
const wrapper =
|
|
hardTimeoutSec !== null
|
|
? resolveSiftWarmupTimeoutWrapper(env, hardTimeoutSec)
|
|
: null;
|
|
const command = wrapper ? wrapper.binary : detection.binaryPath;
|
|
const args = wrapper
|
|
? [...wrapper.wrapperArgs, detection.binaryPath, ...siftArgs]
|
|
: siftArgs;
|
|
const startedReason = wrapper
|
|
? `sift page-index-hybrid warmup started (hard cap ${wrapper.timeoutSec}s via ${wrapper.binary})`
|
|
: hardTimeoutSec === null
|
|
? "sift page-index-hybrid warmup started (hard cap disabled)"
|
|
: "sift page-index-hybrid warmup started (no timeout(1)/gtimeout on PATH; running unbounded)";
|
|
try {
|
|
const runtimeDirs = resolveSiftWarmupRuntimeDirs(projectRoot);
|
|
ensureSiftRuntimeDirs(projectRoot);
|
|
const childEnv = buildSiftEnv(projectRoot, env);
|
|
const marker = {
|
|
schemaVersion: 3,
|
|
status: "warming",
|
|
startedAt: new Date(now).toISOString(),
|
|
command,
|
|
cwd: projectRoot,
|
|
args,
|
|
scope,
|
|
siftBinary: detection.binaryPath,
|
|
hardTimeoutSec: wrapper?.timeoutSec ?? null,
|
|
searchCache: runtimeDirs.searchCache,
|
|
tmpDir: runtimeDirs.tmpDir,
|
|
};
|
|
writeFileSync(markerPath, `${JSON.stringify(marker, null, 2)}\n`, "utf-8");
|
|
const child = (options.spawnFn ?? spawn)(command, args, {
|
|
cwd: projectRoot,
|
|
env: childEnv,
|
|
stdio: "ignore",
|
|
detached: true,
|
|
});
|
|
marker.pid = child.pid ?? null;
|
|
writeFileSync(markerPath, `${JSON.stringify(marker, null, 2)}\n`, "utf-8");
|
|
child.unref();
|
|
return {
|
|
status: "started",
|
|
reason: startedReason,
|
|
command,
|
|
args,
|
|
markerPath,
|
|
};
|
|
} catch (err) {
|
|
return {
|
|
status: "error",
|
|
reason: getErrorMessage(err),
|
|
command,
|
|
args,
|
|
markerPath,
|
|
};
|
|
}
|
|
}
|
|
function buildSiftContextLines(projectRoot, prefs, env = process.env) {
|
|
const detection = detectSift(projectRoot, prefs, env);
|
|
const lines = [];
|
|
if (detection.status === "disabled") {
|
|
lines.push(
|
|
"- Codebase indexer: disabled by `codebase.indexer_backend: none`.",
|
|
);
|
|
} else if (detection.status === "configured" && detection.binaryPath) {
|
|
lines.push(`- Sift: configured as local CLI \`${detection.binaryPath}\`.`);
|
|
lines.push(
|
|
`- Sift cache: project-scoped at \`${detection.searchCache}\`; do not use a shared/global Sift search database for this repo.`,
|
|
);
|
|
lines.push(
|
|
"- Use Sift with explicit, narrow paths after quick `grep`/`find`/`ls` orientation; avoid root-scope searches unless status proves they are responsive.",
|
|
);
|
|
lines.push(
|
|
"- Tool: `sift_search` exposes the full Sift CLI surface — prefer direct `bm25`, `path-hybrid`, or `page-index-hybrid` with a scoped `path`.",
|
|
);
|
|
lines.push(
|
|
"- Tool: `codebase_search` is the platform-level wrapper — use it only with a scoped `scope` when possible.",
|
|
);
|
|
lines.push(
|
|
"- Strategy guide: `page-index-hybrid` (strongest recall + structural reranking), " +
|
|
"`path-hybrid` (filename/path-heavy), `bm25` (fast lexical-only), `vector` (semantic-only).",
|
|
);
|
|
lines.push(
|
|
"- If Sift is slow, empty, or times out, continue with native `grep`/`find`/`ls`, `lsp`, scout, and `.sf/CODEBASE.md` only as fallback context.",
|
|
);
|
|
} else if (detection.status === "warming" && detection.binaryPath) {
|
|
lines.push(
|
|
`- Sift: installed at \`${detection.binaryPath}\`; repo-local index warmup is running.`,
|
|
);
|
|
lines.push(
|
|
`- Sift cache: project-scoped at \`${detection.searchCache}\`; do not use a shared/global Sift search database for this repo.`,
|
|
);
|
|
lines.push(
|
|
"- Use grep/find/ls and lsp for broad orientation while warmup runs. Use `.sf/CODEBASE.md` only as fallback context. Use narrow `sift_search` paths if needed; broad root-scope Sift may still be cold.",
|
|
);
|
|
} else if (detection.status === "degraded" && detection.binaryPath) {
|
|
lines.push(
|
|
`- Sift: installed at \`${detection.binaryPath}\` but degraded for this repo: ${detection.reason}.`,
|
|
);
|
|
lines.push(
|
|
`- Sift cache: project-scoped at \`${detection.searchCache}\`; do not use a shared/global Sift search database for this repo.`,
|
|
);
|
|
lines.push(
|
|
"- Do not use broad Sift/codebase_search as the first exploration step. Prefer native `grep`/`find`/`ls`, lsp, and narrow `sift_search` only after reducing scope. Use `.sf/CODEBASE.md` only as fallback context.",
|
|
);
|
|
} else {
|
|
lines.push(
|
|
"- Sift: not available. This is optional; continue with native `grep`/`find`/`ls`, `lsp`, scout, and `.sf/CODEBASE.md` only as fallback context.",
|
|
);
|
|
lines.push(
|
|
"- To enable later: install `rupurt/sift` on PATH or set `SIFT_PATH` to the sift binary.",
|
|
);
|
|
}
|
|
return lines;
|
|
}
|
|
function buildNoCodebaseIndexerContextLines() {
|
|
return [
|
|
"- Codebase indexer: disabled by `codebase.indexer_backend: none`; continue with native `grep`/`find`/`ls`, `lsp`, scout, and `.sf/CODEBASE.md` only as fallback context.",
|
|
];
|
|
}
|
|
export function resolveCodebaseIndexerBackendName(prefs) {
|
|
if (prefs?.indexer_backend === "none") return "none";
|
|
return "sift";
|
|
}
|
|
export function resolveEffectiveCodebaseIndexerBackendName(
|
|
_projectRoot,
|
|
prefs,
|
|
_env = process.env,
|
|
) {
|
|
if (prefs?.indexer_backend === "none") return "none";
|
|
return "sift";
|
|
}
|
|
export function getCodebaseIndexerBackend(prefsOrName) {
|
|
const name =
|
|
typeof prefsOrName === "string"
|
|
? prefsOrName
|
|
: resolveCodebaseIndexerBackendName(prefsOrName);
|
|
return CODEBASE_INDEXER_BACKENDS[name] ?? SIFT_CODEBASE_INDEXER_BACKEND;
|
|
}
|
|
export function detectCodebaseIndexer(projectRoot, prefs, env = process.env) {
|
|
const backendName = resolveEffectiveCodebaseIndexerBackendName(
|
|
projectRoot,
|
|
prefs,
|
|
env,
|
|
);
|
|
return getCodebaseIndexerBackend(backendName).detect(projectRoot, prefs, env);
|
|
}
|
|
export function formatCodebaseIndexerStatus(
|
|
projectRoot,
|
|
prefs,
|
|
env = process.env,
|
|
) {
|
|
const backendName = resolveEffectiveCodebaseIndexerBackendName(
|
|
projectRoot,
|
|
prefs,
|
|
env,
|
|
);
|
|
return getCodebaseIndexerBackend(backendName).formatStatus(
|
|
projectRoot,
|
|
prefs,
|
|
env,
|
|
);
|
|
}
|
|
export function buildCodeIntelligenceContextBlock(
|
|
projectRoot,
|
|
prefs,
|
|
env = process.env,
|
|
) {
|
|
const backendName = resolveEffectiveCodebaseIndexerBackendName(
|
|
projectRoot,
|
|
prefs,
|
|
env,
|
|
);
|
|
const lines = [
|
|
"[PROJECT CODE INTELLIGENCE]",
|
|
"",
|
|
"- Live code retrieval should use Sift when healthy. Use `.sf/CODEBASE.md` only as durable fallback context when Sift is unavailable, cold, degraded, or explicitly needed as a generated overview.",
|
|
...getCodebaseIndexerBackend(backendName).buildContextLines(
|
|
projectRoot,
|
|
prefs,
|
|
env,
|
|
),
|
|
];
|
|
return `\n\n${lines.join("\n")}`;
|
|
}
|
|
export function formatSiftStatus(projectRoot, prefs, env = process.env) {
|
|
const detection = detectSift(projectRoot, prefs, env);
|
|
const lines = ["Sift Status", ""];
|
|
lines.push(`Status: ${detection.status}`);
|
|
lines.push(`Reason: ${detection.reason}`);
|
|
if (detection.command) lines.push(`Command: ${detection.command}`);
|
|
if (detection.binaryPath) lines.push(`Binary: ${detection.binaryPath}`);
|
|
if (detection.searchCache)
|
|
lines.push(`Search cache: ${detection.searchCache}`);
|
|
if (detection.tmpDir) lines.push(`Temp dir: ${detection.tmpDir}`);
|
|
if (detection.probePath)
|
|
lines.push(`Health probe scope: ${detection.probePath}`);
|
|
if (detection.markerPath)
|
|
lines.push(`Warmup marker: ${detection.markerPath}`);
|
|
if (detection.cacheInspection?.polluted) {
|
|
lines.push(
|
|
"Cache integrity: polluted - ignored/generated paths were found in repo-local Sift manifests.",
|
|
);
|
|
for (const sample of detection.cacheInspection.samples ?? []) {
|
|
lines.push(`Cache sample (${sample.label}): ${sample.sample}`);
|
|
}
|
|
lines.push(
|
|
"Action: remove .sf/runtime/sift/search-cache and warm Sift again from the repo root.",
|
|
);
|
|
} else if (detection.cacheInspection?.inspected) {
|
|
lines.push(
|
|
"Cache integrity: ok - no ignored/generated path samples found in inspected manifests.",
|
|
);
|
|
}
|
|
if (detection.status === "configured" && detection.command) {
|
|
lines.push(
|
|
`Operational: ${commandExists(detection.command, env) ? "yes - scoped health probe passed" : "no - configured command is missing"}`,
|
|
);
|
|
} else if (detection.status === "warming" && detection.command) {
|
|
lines.push(
|
|
"Operational: warming - binary exists and repo-local index warmup is running. Give Sift time on CPU before broad searches.",
|
|
);
|
|
} else if (detection.status === "degraded" && detection.command) {
|
|
lines.push(
|
|
"Operational: degraded - binary exists, but the bounded scoped health probe failed. Use narrow paths or fallback search.",
|
|
);
|
|
} else {
|
|
lines.push(
|
|
"Operational: no - install rupurt/sift on PATH or set SIFT_PATH.",
|
|
);
|
|
}
|
|
lines.push("");
|
|
lines.push(
|
|
"Sift is optional. SF falls back to native grep/find/ls, lsp, scout, and CODEBASE.md only as fallback context when it is unavailable.",
|
|
);
|
|
lines.push(
|
|
'When configured, agents should use `sift search --json <path> "<query>"`; `page-index-hybrid` is the strongest direct-search preset and `path-hybrid` is best for path-heavy queries.',
|
|
);
|
|
lines.push(
|
|
"SF runs Sift warmup with a project-scoped SIFT_SEARCH_CACHE under .sf/runtime/sift/ while leaving model cache shared.",
|
|
);
|
|
return lines.join("\n");
|
|
}
|
|
function formatNoCodebaseIndexerStatus() {
|
|
return [
|
|
"Codebase Indexer Status",
|
|
"",
|
|
"Status: disabled",
|
|
"Reason: codebase.indexer_backend is none",
|
|
"Operational: no - optional codebase indexer disabled.",
|
|
"",
|
|
"SF will use native grep/find/ls, lsp, scout, and CODEBASE.md only as fallback context for codebase orientation.",
|
|
].join("\n");
|
|
}
|
|
export const SIFT_CODEBASE_INDEXER_BACKEND = {
|
|
name: "sift",
|
|
label: "Sift",
|
|
detect: detectSift,
|
|
formatStatus: formatSiftStatus,
|
|
buildContextLines: buildSiftContextLines,
|
|
};
|
|
export const NO_CODEBASE_INDEXER_BACKEND = {
|
|
name: "none",
|
|
label: "None",
|
|
detect: () => ({
|
|
backend: "none",
|
|
status: "disabled",
|
|
reason: "codebase.indexer_backend is none",
|
|
}),
|
|
formatStatus: formatNoCodebaseIndexerStatus,
|
|
buildContextLines: buildNoCodebaseIndexerContextLines,
|
|
};
|
|
export const CODEBASE_INDEXER_BACKENDS = {
|
|
sift: SIFT_CODEBASE_INDEXER_BACKEND,
|
|
none: NO_CODEBASE_INDEXER_BACKEND,
|
|
};
|