fix: harden sift codebase indexing
This commit is contained in:
parent
5b9355fa74
commit
ee836142ed
13 changed files with 544 additions and 53 deletions
|
|
@ -8,7 +8,7 @@ You are a scout. Quickly investigate a codebase and return structured findings t
|
|||
|
||||
Use in-process `grep`, `find`, `ls`, and `lsp` before shelling out. These keep exploration inside SF's tool surface and use native backends where available.
|
||||
|
||||
Use `codebase_search` as your PRIMARY tool for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"). It uses Sift-backed hybrid BM25/vector retrieval and is significantly more effective than grep for navigating unfamiliar logic. Use `sift_search` when you need agentic multi-turn research, explicit strategy selection (e.g. `page-index-hybrid`, `path-hybrid`), or planner configuration. Use exact text search (`grep`) only when you already have a specific identifier or filename in mind. You are still the scout role; Sift is the powerful primitive you should lead with for exploration.
|
||||
Use `grep`, `find`, and `ls` for broad orientation first. Use `codebase_search` for conceptual, behavioral, or architectural discovery only with a narrow scope and when the project code-intelligence status says Sift is healthy enough for this repo. Use `sift_search` when you need explicit strategy selection (e.g. `bm25`, `path-hybrid`, `page-index-hybrid`) and a scoped path. If Sift is degraded, slow, or empty, fall back to grep/find/ls and direct reads. Each repo has its own Sift cache under `.sf/runtime/sift/`.
|
||||
|
||||
Your output will be passed to an agent who has NOT seen the files you explored.
|
||||
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ through these tiers IN ORDER. Skip a tier only when it has been demonstrably
|
|||
exhausted, not just because the next tier is faster.
|
||||
|
||||
Tier 1 — Code lookup:
|
||||
- sift / codebase_search for symbols, patterns, prior usages
|
||||
- grep/find/ls for broad orientation; scoped sift / codebase_search for symbols, patterns, prior usages when Sift status is healthy for the repo
|
||||
- Read source files (Read tool, file paths from PLAN/CODEBASE)
|
||||
- Inspect .sf/DECISIONS.md, .sf/KNOWLEDGE.md, docs/design-docs/, docs/records/
|
||||
- Check tests for documented behavior
|
||||
|
|
|
|||
|
|
@ -2,7 +2,11 @@ export const PROJECT_RAG_MCP_SERVER_NAME: string;
|
|||
export function detectProjectRag(projectRoot: string, prefs: Record<string, unknown>, env?: NodeJS.ProcessEnv): unknown;
|
||||
export function resolveProjectRagBinary(env?: NodeJS.ProcessEnv): string | null;
|
||||
export function resolveSiftBinary(env?: NodeJS.ProcessEnv): string | null;
|
||||
export function detectSift(_projectRoot: string, prefs: Record<string, unknown>, env?: NodeJS.ProcessEnv): unknown;
|
||||
export function resolveSiftWarmupRuntimeDirs(projectRoot: string): { searchCache: string; tmpDir: string };
|
||||
export function ensureSiftRuntimeDirs(projectRoot: string): { searchCache: string; tmpDir: string };
|
||||
export function buildSiftEnv(projectRoot: string, env: NodeJS.ProcessEnv): NodeJS.ProcessEnv;
|
||||
export function resolveSiftSearchScope(projectRoot: string, scope?: string): string;
|
||||
export function detectSift(projectRoot: string, prefs: Record<string, unknown>, env?: NodeJS.ProcessEnv): unknown;
|
||||
export function ensureSiftIndexWarmup(projectRoot: string, prefs: Record<string, unknown>, options?: Record<string, unknown>): Promise<unknown>;
|
||||
export function resolveProjectRagBuildJobs(env?: NodeJS.ProcessEnv): number;
|
||||
export function findProjectRagSourceDir(projectRoot: string, env?: NodeJS.ProcessEnv): string | null;
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@
|
|||
* accelerators for local code retrieval.
|
||||
*/
|
||||
import { spawn, spawnSync } from "node:child_process";
|
||||
import { existsSync, mkdirSync, readFileSync, statSync, writeFileSync, } from "node:fs";
|
||||
import { delimiter, join, resolve } from "node:path";
|
||||
import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync, } from "node:fs";
|
||||
import { delimiter, isAbsolute, join, relative, resolve } from "node:path";
|
||||
export const PROJECT_RAG_MCP_SERVER_NAME = "project-rag";
|
||||
const PROJECT_RAG_BINARY_NAME = process.platform === "win32" ? "project-rag.exe" : "project-rag";
|
||||
const SIFT_BINARY_NAME = process.platform === "win32" ? "sift.exe" : "sift";
|
||||
|
|
@ -22,8 +22,17 @@ const DEFAULT_SIFT_WARMUP_TTL_MS = 6 * 60 * 60 * 1000;
|
|||
const DEFAULT_SIFT_WARMUP_QUERY = "repo architecture source tests entrypoints configuration";
|
||||
const DEFAULT_SIFT_WARMUP_LIMIT = 1;
|
||||
const DEFAULT_SIFT_WARMUP_RETRIEVER_TIMEOUT_MS = 30_000;
|
||||
const DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC = 30;
|
||||
const DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC = 600;
|
||||
const SIFT_WARMUP_KILL_GRACE_SEC = 10;
|
||||
const DEFAULT_SIFT_HEALTH_TIMEOUT_MS = 60_000;
|
||||
const SIFT_HEALTH_CACHE = new Map();
|
||||
const SIFT_CACHE_POLLUTION_PATTERNS = [
|
||||
{ label: ".claude worktrees", pattern: /(?:^|[/\\])\.claude[/\\]/ },
|
||||
{ label: ".git internals", pattern: /(?:^|[/\\])\.git[/\\]/ },
|
||||
{ label: "dist-test output", pattern: /(?:^|[/\\])dist-test[/\\]/ },
|
||||
{ label: "node_modules", pattern: /(?:^|[/\\])node_modules[/\\]/ },
|
||||
{ label: "package dist output", pattern: /(?:^|[/\\])packages[/\\][^/\\]+[/\\]dist[/\\]/ },
|
||||
];
|
||||
export function resolveSiftWarmupRuntimeDirs(projectRoot) {
|
||||
const runtimeRoot = join(projectRoot, ".sf", "runtime", "sift");
|
||||
return {
|
||||
|
|
@ -31,6 +40,20 @@ export function resolveSiftWarmupRuntimeDirs(projectRoot) {
|
|||
tmpDir: join(runtimeRoot, "tmp"),
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Ensure the repo-local Sift runtime directories exist.
|
||||
*
|
||||
* Purpose: keep Sift's search database scoped to the current repository instead
|
||||
* of sharing a process-global cache across unrelated projects.
|
||||
*
|
||||
* Consumer: Sift warmup, status probes, `sift_search`, and `codebase_search`.
|
||||
*/
|
||||
export function ensureSiftRuntimeDirs(projectRoot) {
|
||||
const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
|
||||
mkdirSync(dirs.searchCache, { recursive: true });
|
||||
mkdirSync(dirs.tmpDir, { recursive: true });
|
||||
return dirs;
|
||||
}
|
||||
export function buildSiftEnv(projectRoot, env) {
|
||||
const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
|
||||
return {
|
||||
|
|
@ -39,6 +62,27 @@ export function buildSiftEnv(projectRoot, env) {
|
|||
TMPDIR: dirs.tmpDir,
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Resolve a Sift search scope to the form Sift's local ignore matcher expects.
|
||||
*
|
||||
* Purpose: preserve `.siftignore` semantics by running Sift from the repository
|
||||
* root with repo-relative scopes instead of absolute paths.
|
||||
*
|
||||
* Consumer: Sift warmup, `sift_search`, and `codebase_search`.
|
||||
*/
|
||||
export function resolveSiftSearchScope(projectRoot, scope) {
|
||||
const normalizedRoot = normalizeProjectRoot(projectRoot);
|
||||
const requested = typeof scope === "string" && scope.trim() ? scope.trim() : ".";
|
||||
const absolute = isAbsolute(requested)
|
||||
? resolve(requested)
|
||||
: resolve(normalizedRoot, requested);
|
||||
const rel = relative(normalizedRoot, absolute);
|
||||
if (!rel)
|
||||
return ".";
|
||||
if (!rel.startsWith("..") && !isAbsolute(rel))
|
||||
return rel;
|
||||
return requested;
|
||||
}
|
||||
function readJsonConfig(configPath) {
|
||||
if (!existsSync(configPath))
|
||||
return {};
|
||||
|
|
@ -195,7 +239,156 @@ export function resolveSiftBinary(env = process.env) {
|
|||
return (lookupExecutable(SIFT_BINARY_NAME, env) ??
|
||||
(SIFT_BINARY_NAME === "sift" ? null : lookupExecutable("sift", env)));
|
||||
}
|
||||
export function detectSift(_projectRoot, prefs, env = process.env) {
|
||||
function resolveSiftHealthTimeoutMs(env) {
|
||||
const raw = env.SF_SIFT_HEALTH_TIMEOUT_MS?.trim();
|
||||
if (!raw)
|
||||
return DEFAULT_SIFT_HEALTH_TIMEOUT_MS;
|
||||
const parsed = Number.parseInt(raw, 10);
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_SIFT_HEALTH_TIMEOUT_MS;
|
||||
}
|
||||
function resolveSiftHealthProbePath(projectRoot) {
|
||||
for (const candidate of ["src", "packages", "tests"]) {
|
||||
const absolute = join(projectRoot, candidate);
|
||||
if (existsSync(absolute))
|
||||
return candidate;
|
||||
}
|
||||
return ".";
|
||||
}
|
||||
function runSiftHealthProbe(projectRoot, binaryPath, env) {
|
||||
const normalizedRoot = normalizeProjectRoot(projectRoot);
|
||||
const timeoutMs = resolveSiftHealthTimeoutMs(env);
|
||||
const probePath = resolveSiftHealthProbePath(normalizedRoot);
|
||||
const cacheKey = [
|
||||
normalizedRoot,
|
||||
binaryPath,
|
||||
env.SIFT_PATH ?? "",
|
||||
env.SF_SIFT_HEALTH_TIMEOUT_MS ?? "",
|
||||
env.SF_SIFT_HEALTHCHECK_DISABLE ?? "",
|
||||
].join("\0");
|
||||
if (SIFT_HEALTH_CACHE.has(cacheKey))
|
||||
return SIFT_HEALTH_CACHE.get(cacheKey);
|
||||
const dirs = ensureSiftRuntimeDirs(normalizedRoot);
|
||||
if (env.SF_SIFT_HEALTHCHECK_DISABLE === "1") {
|
||||
const skipped = {
|
||||
ok: true,
|
||||
probePath,
|
||||
timeoutMs,
|
||||
searchCache: dirs.searchCache,
|
||||
tmpDir: dirs.tmpDir,
|
||||
reason: "sift health probe disabled",
|
||||
};
|
||||
SIFT_HEALTH_CACHE.set(cacheKey, skipped);
|
||||
return skipped;
|
||||
}
|
||||
const result = spawnSync(binaryPath, [
|
||||
"search",
|
||||
"--json",
|
||||
"--strategy",
|
||||
"bm25",
|
||||
"--limit",
|
||||
"1",
|
||||
"--retriever-timeout-ms",
|
||||
String(Math.min(timeoutMs, 1_000)),
|
||||
probePath,
|
||||
"function",
|
||||
], {
|
||||
cwd: normalizedRoot,
|
||||
env: buildSiftEnv(normalizedRoot, env),
|
||||
encoding: "utf-8",
|
||||
maxBuffer: 1024 * 1024,
|
||||
timeout: timeoutMs,
|
||||
});
|
||||
const probe = {
|
||||
ok: result.status === 0,
|
||||
probePath,
|
||||
timeoutMs,
|
||||
searchCache: dirs.searchCache,
|
||||
tmpDir: dirs.tmpDir,
|
||||
status: result.status,
|
||||
signal: result.signal,
|
||||
stderr: result.stderr,
|
||||
reason: "",
|
||||
};
|
||||
if (probe.ok) {
|
||||
probe.reason = `sift scoped health probe passed for ${probePath}`;
|
||||
}
|
||||
else if (result.error?.code === "ETIMEDOUT" || result.signal) {
|
||||
probe.reason = `sift scoped health probe timed out after ${timeoutMs}ms for ${probePath}`;
|
||||
}
|
||||
else if (result.error) {
|
||||
probe.reason = `sift scoped health probe failed: ${result.error.message}`;
|
||||
}
|
||||
else {
|
||||
const detail = String(result.stderr || "").trim();
|
||||
probe.reason = detail
|
||||
? `sift scoped health probe failed: ${detail.slice(0, 300)}`
|
||||
: `sift scoped health probe exited ${result.status ?? "unknown"}`;
|
||||
}
|
||||
SIFT_HEALTH_CACHE.set(cacheKey, probe);
|
||||
return probe;
|
||||
}
|
||||
function listFilesCapped(root, maxFiles = 32) {
|
||||
const files = [];
|
||||
const visit = (dir) => {
|
||||
if (files.length >= maxFiles)
|
||||
return;
|
||||
let entries = [];
|
||||
try {
|
||||
entries = readdirSync(dir, { withFileTypes: true });
|
||||
}
|
||||
catch {
|
||||
return;
|
||||
}
|
||||
for (const entry of entries) {
|
||||
if (files.length >= maxFiles)
|
||||
return;
|
||||
const path = join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
visit(path);
|
||||
}
|
||||
else if (entry.isFile()) {
|
||||
files.push(path);
|
||||
}
|
||||
}
|
||||
};
|
||||
visit(root);
|
||||
return files;
|
||||
}
|
||||
function inspectSiftCache(projectRoot) {
|
||||
const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
|
||||
const manifestRoot = join(dirs.searchCache, "artifacts", "manifests");
|
||||
const samples = [];
|
||||
for (const manifest of listFilesCapped(manifestRoot, 16)) {
|
||||
let text = "";
|
||||
try {
|
||||
text = readFileSync(manifest).toString("utf-8");
|
||||
}
|
||||
catch {
|
||||
continue;
|
||||
}
|
||||
for (const { label, pattern } of SIFT_CACHE_POLLUTION_PATTERNS) {
|
||||
const match = text.match(pattern);
|
||||
if (match) {
|
||||
const start = Math.max(0, (match.index ?? 0) - 80);
|
||||
const end = Math.min(text.length, (match.index ?? 0) + 160);
|
||||
const sample = text
|
||||
.slice(start, end)
|
||||
.replace(/[^\x20-\x7E]+/g, " ")
|
||||
.trim();
|
||||
samples.push({ label, sample });
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (samples.length >= 5)
|
||||
break;
|
||||
}
|
||||
return {
|
||||
inspected: existsSync(manifestRoot),
|
||||
polluted: samples.length > 0,
|
||||
samples,
|
||||
};
|
||||
}
|
||||
export function detectSift(projectRoot, prefs, env = process.env) {
|
||||
if (prefs?.indexer_backend === "none") {
|
||||
return {
|
||||
backend: "sift",
|
||||
|
|
@ -221,14 +414,58 @@ export function detectSift(_projectRoot, prefs, env = process.env) {
|
|||
reason: "SIFT_PATH is set but does not resolve to an executable file.",
|
||||
};
|
||||
}
|
||||
const warmup = readSiftWarmupMarker(projectRoot);
|
||||
if (warmup?.status === "warming") {
|
||||
const dirs = ensureSiftRuntimeDirs(projectRoot);
|
||||
return {
|
||||
backend: "sift",
|
||||
status: "warming",
|
||||
command: binaryPath,
|
||||
binaryPath,
|
||||
searchCache: dirs.searchCache,
|
||||
tmpDir: dirs.tmpDir,
|
||||
probePath: warmup.scope ?? ".",
|
||||
reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"}; repo-local Sift index warmup is still running`,
|
||||
markerPath: warmup.markerPath,
|
||||
};
|
||||
}
|
||||
const health = runSiftHealthProbe(projectRoot, binaryPath, env);
|
||||
if (!health.ok) {
|
||||
return {
|
||||
backend: "sift",
|
||||
status: "degraded",
|
||||
command: binaryPath,
|
||||
binaryPath,
|
||||
searchCache: health.searchCache,
|
||||
tmpDir: health.tmpDir,
|
||||
probePath: health.probePath,
|
||||
reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"} but ${health.reason}`,
|
||||
};
|
||||
}
|
||||
const cacheInspection = inspectSiftCache(projectRoot);
|
||||
if (cacheInspection.polluted) {
|
||||
return {
|
||||
backend: "sift",
|
||||
status: "degraded",
|
||||
command: binaryPath,
|
||||
binaryPath,
|
||||
searchCache: health.searchCache,
|
||||
tmpDir: health.tmpDir,
|
||||
probePath: health.probePath,
|
||||
cacheInspection,
|
||||
reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"} but repo-local Sift cache contains ignored/generated paths`,
|
||||
};
|
||||
}
|
||||
return {
|
||||
backend: "sift",
|
||||
status: "configured",
|
||||
command: binaryPath,
|
||||
binaryPath,
|
||||
reason: explicit
|
||||
? "sift binary resolved from SIFT_PATH"
|
||||
: "sift binary found on PATH",
|
||||
searchCache: health.searchCache,
|
||||
tmpDir: health.tmpDir,
|
||||
probePath: health.probePath,
|
||||
cacheInspection,
|
||||
reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"}; ${health.reason}`,
|
||||
};
|
||||
}
|
||||
function isFreshMarker(markerPath, now, ttlMs) {
|
||||
|
|
@ -237,6 +474,11 @@ function isFreshMarker(markerPath, now, ttlMs) {
|
|||
if (now - stat.mtimeMs >= ttlMs)
|
||||
return false;
|
||||
const parsed = JSON.parse(readFileSync(markerPath, "utf-8"));
|
||||
if (parsed.schemaVersion === 3) {
|
||||
if (parsed.status === "warming" && parsed.pid && !isProcessAlive(parsed.pid))
|
||||
return false;
|
||||
return typeof parsed.scope === "string" && parsed.scope.length > 0;
|
||||
}
|
||||
return (parsed.schemaVersion === 2 &&
|
||||
Array.isArray(parsed.args) &&
|
||||
parsed.args.at(-2) === ".");
|
||||
|
|
@ -245,6 +487,38 @@ function isFreshMarker(markerPath, now, ttlMs) {
|
|||
return false;
|
||||
}
|
||||
}
|
||||
function readSiftWarmupMarker(projectRoot) {
|
||||
const markerPath = join(projectRoot, ".sf", "runtime", "sift-index-warmup.json");
|
||||
try {
|
||||
if (!existsSync(markerPath))
|
||||
return null;
|
||||
const parsed = JSON.parse(readFileSync(markerPath, "utf-8"));
|
||||
if (parsed.schemaVersion !== 3)
|
||||
return null;
|
||||
if (parsed.status !== "warming")
|
||||
return null;
|
||||
if (parsed.pid && !isProcessAlive(parsed.pid))
|
||||
return null;
|
||||
const started = Date.parse(parsed.startedAt);
|
||||
const hardTimeoutSec = Number(parsed.hardTimeoutSec ?? DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC);
|
||||
const expiresAt = started + Math.max(60, hardTimeoutSec + SIFT_WARMUP_KILL_GRACE_SEC) * 1000;
|
||||
if (!Number.isFinite(started) || Date.now() > expiresAt)
|
||||
return null;
|
||||
return { ...parsed, markerPath };
|
||||
}
|
||||
catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
function isProcessAlive(pid) {
|
||||
try {
|
||||
process.kill(Number(pid), 0);
|
||||
return true;
|
||||
}
|
||||
catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
|
||||
const env = options.env ?? process.env;
|
||||
const backendName = resolveEffectiveCodebaseIndexerBackendName(projectRoot, prefs, env);
|
||||
|
|
@ -254,8 +528,18 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
|
|||
reason: `effective codebase indexer is ${backendName}`,
|
||||
};
|
||||
}
|
||||
const detection = detectSift(projectRoot, prefs, env);
|
||||
if (detection.status !== "configured" || !detection.binaryPath) {
|
||||
const detection = detectSift(projectRoot, prefs, {
|
||||
...env,
|
||||
SF_SIFT_HEALTHCHECK_DISABLE: "1",
|
||||
});
|
||||
if (detection.status === "warming") {
|
||||
return {
|
||||
status: "skipped",
|
||||
reason: "sift index warmup is already running",
|
||||
markerPath: detection.markerPath,
|
||||
};
|
||||
}
|
||||
if (!["configured", "degraded"].includes(detection.status) || !detection.binaryPath) {
|
||||
return {
|
||||
status: "unavailable",
|
||||
reason: detection.reason,
|
||||
|
|
@ -271,6 +555,7 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
|
|||
markerPath,
|
||||
};
|
||||
}
|
||||
const scope = resolveSiftSearchScope(projectRoot, options.scope ?? ".");
|
||||
const siftArgs = [
|
||||
"search",
|
||||
"--json",
|
||||
|
|
@ -280,7 +565,7 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
|
|||
String(options.limit ?? DEFAULT_SIFT_WARMUP_LIMIT),
|
||||
"--retriever-timeout-ms",
|
||||
String(options.retrieverTimeoutMs ?? DEFAULT_SIFT_WARMUP_RETRIEVER_TIMEOUT_MS),
|
||||
".",
|
||||
scope,
|
||||
options.query ?? DEFAULT_SIFT_WARMUP_QUERY,
|
||||
];
|
||||
const hardTimeoutSec = resolveSiftWarmupHardTimeoutSec(env, options.hardTimeoutSec);
|
||||
|
|
@ -298,27 +583,30 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
|
|||
: "sift page-index-hybrid warmup started (no timeout(1)/gtimeout on PATH; running unbounded)";
|
||||
try {
|
||||
const runtimeDirs = resolveSiftWarmupRuntimeDirs(projectRoot);
|
||||
mkdirSync(join(projectRoot, ".sf", "runtime"), { recursive: true });
|
||||
mkdirSync(runtimeDirs.searchCache, { recursive: true });
|
||||
mkdirSync(runtimeDirs.tmpDir, { recursive: true });
|
||||
ensureSiftRuntimeDirs(projectRoot);
|
||||
const childEnv = buildSiftEnv(projectRoot, env);
|
||||
writeFileSync(markerPath, `${JSON.stringify({
|
||||
schemaVersion: 2,
|
||||
const marker = {
|
||||
schemaVersion: 3,
|
||||
status: "warming",
|
||||
startedAt: new Date(now).toISOString(),
|
||||
command,
|
||||
cwd: projectRoot,
|
||||
args,
|
||||
scope,
|
||||
siftBinary: detection.binaryPath,
|
||||
hardTimeoutSec: wrapper?.timeoutSec ?? null,
|
||||
searchCache: runtimeDirs.searchCache,
|
||||
tmpDir: runtimeDirs.tmpDir,
|
||||
}, null, 2)}\n`, "utf-8");
|
||||
};
|
||||
writeFileSync(markerPath, `${JSON.stringify(marker, null, 2)}\n`, "utf-8");
|
||||
const child = (options.spawnFn ?? spawn)(command, args, {
|
||||
cwd: projectRoot,
|
||||
env: childEnv,
|
||||
stdio: "ignore",
|
||||
detached: true,
|
||||
});
|
||||
marker.pid = child.pid ?? null;
|
||||
writeFileSync(markerPath, `${JSON.stringify(marker, null, 2)}\n`, "utf-8");
|
||||
child.unref();
|
||||
return {
|
||||
status: "started",
|
||||
|
|
@ -507,17 +795,23 @@ function buildSiftContextLines(projectRoot, prefs, env = process.env) {
|
|||
}
|
||||
else if (detection.status === "configured" && detection.binaryPath) {
|
||||
lines.push(`- Sift: configured as local CLI \`${detection.binaryPath}\`.`);
|
||||
lines.push("- Use Sift for broad code retrieval before manual file-by-file reading, " +
|
||||
"especially conceptual queries, exact identifiers, approximate file/path intent, and synthesis-ready snippets.");
|
||||
lines.push("- Tool: `sift_search` exposes the full Sift CLI surface — use it for agentic multi-turn search, " +
|
||||
"explicit strategy selection, and planner configuration.");
|
||||
lines.push("- Tool: `codebase_search` is the platform-level wrapper — use it for simple conceptual queries.");
|
||||
lines.push(`- Sift cache: project-scoped at \`${detection.searchCache}\`; do not use a shared/global Sift search database for this repo.`);
|
||||
lines.push("- Use Sift with explicit, narrow paths after quick `grep`/`find`/`ls` orientation; avoid root-scope searches unless status proves they are responsive.");
|
||||
lines.push("- Tool: `sift_search` exposes the full Sift CLI surface — prefer direct `bm25`, `path-hybrid`, or `page-index-hybrid` with a scoped `path`.");
|
||||
lines.push("- Tool: `codebase_search` is the platform-level wrapper — use it only with a scoped `scope` when possible.");
|
||||
lines.push("- Strategy guide: `page-index-hybrid` (strongest recall + structural reranking), " +
|
||||
"`path-hybrid` (filename/path-heavy), `bm25` (fast lexical-only), `vector` (semantic-only).");
|
||||
lines.push("- Agent mode: enable `agent: true` on `sift_search` for multi-turn research. " +
|
||||
"Use `agentMode: 'graph'` for disconnected code regions and `plannerStrategy: 'model-driven'` for LLM-guided planning.");
|
||||
lines.push("- SF runs Sift warmup with a project-scoped `SIFT_SEARCH_CACHE` under `.sf/runtime/sift/` while leaving model cache shared; " +
|
||||
"if the CLI is missing or fails, continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.");
|
||||
lines.push("- If Sift is slow, empty, or times out, continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.");
|
||||
}
|
||||
else if (detection.status === "warming" && detection.binaryPath) {
|
||||
lines.push(`- Sift: installed at \`${detection.binaryPath}\`; repo-local index warmup is running.`);
|
||||
lines.push(`- Sift cache: project-scoped at \`${detection.searchCache}\`; do not use a shared/global Sift search database for this repo.`);
|
||||
lines.push("- Use grep/find/ls and `.sf/CODEBASE.md` for broad orientation while warmup runs. Use narrow `sift_search` paths if needed; broad root-scope Sift may still be cold.");
|
||||
}
|
||||
else if (detection.status === "degraded" && detection.binaryPath) {
|
||||
lines.push(`- Sift: installed at \`${detection.binaryPath}\` but degraded for this repo: ${detection.reason}.`);
|
||||
lines.push(`- Sift cache: project-scoped at \`${detection.searchCache}\`; do not use a shared/global Sift search database for this repo.`);
|
||||
lines.push("- Do not use broad Sift/codebase_search as the first exploration step. Prefer native `grep`/`find`/`ls`, `.sf/CODEBASE.md`, and narrow `sift_search` only after reducing scope.");
|
||||
}
|
||||
else {
|
||||
lines.push("- Sift: not available. This is optional; continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.");
|
||||
|
|
@ -537,7 +831,7 @@ export function resolveEffectiveCodebaseIndexerBackendName(projectRoot, prefs, e
|
|||
if (prefs?.indexer_backend)
|
||||
return prefs.indexer_backend;
|
||||
const sift = detectSift(projectRoot, prefs, env);
|
||||
if (sift.status === "configured")
|
||||
if (["configured", "warming", "degraded"].includes(sift.status))
|
||||
return "sift";
|
||||
return "projectRag";
|
||||
}
|
||||
|
|
@ -606,8 +900,32 @@ export function formatSiftStatus(projectRoot, prefs, env = process.env) {
|
|||
lines.push(`Command: ${detection.command}`);
|
||||
if (detection.binaryPath)
|
||||
lines.push(`Binary: ${detection.binaryPath}`);
|
||||
if (detection.searchCache)
|
||||
lines.push(`Search cache: ${detection.searchCache}`);
|
||||
if (detection.tmpDir)
|
||||
lines.push(`Temp dir: ${detection.tmpDir}`);
|
||||
if (detection.probePath)
|
||||
lines.push(`Health probe scope: ${detection.probePath}`);
|
||||
if (detection.markerPath)
|
||||
lines.push(`Warmup marker: ${detection.markerPath}`);
|
||||
if (detection.cacheInspection?.polluted) {
|
||||
lines.push("Cache integrity: polluted - ignored/generated paths were found in repo-local Sift manifests.");
|
||||
for (const sample of detection.cacheInspection.samples ?? []) {
|
||||
lines.push(`Cache sample (${sample.label}): ${sample.sample}`);
|
||||
}
|
||||
lines.push("Action: remove .sf/runtime/sift/search-cache and warm Sift again from the repo root.");
|
||||
}
|
||||
else if (detection.cacheInspection?.inspected) {
|
||||
lines.push("Cache integrity: ok - no ignored/generated path samples found in inspected manifests.");
|
||||
}
|
||||
if (detection.status === "configured" && detection.command) {
|
||||
lines.push(`Operational: ${commandExists(detection.command, env) ? "yes" : "no - configured command is missing"}`);
|
||||
lines.push(`Operational: ${commandExists(detection.command, env) ? "yes - scoped health probe passed" : "no - configured command is missing"}`);
|
||||
}
|
||||
else if (detection.status === "warming" && detection.command) {
|
||||
lines.push("Operational: warming - binary exists and repo-local index warmup is running. Give Sift time on CPU before broad searches.");
|
||||
}
|
||||
else if (detection.status === "degraded" && detection.command) {
|
||||
lines.push("Operational: degraded - binary exists, but the bounded scoped health probe failed. Use narrow paths or fallback search.");
|
||||
}
|
||||
else {
|
||||
lines.push("Operational: no - install rupurt/sift on PATH or set SIFT_PATH.");
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ Before anything else, form a diagnosis: What is the core challenge? What is brok
|
|||
- **Measure coverage**: find untested critical paths
|
||||
- **Scan for dead code, stubs, and commented-out features** — abandoned attempts are signals
|
||||
- **Discover needed skills**: identify repo languages, frameworks, data stores, external services, build tools, and domain-specific competencies. Check installed skills first; record installed, missing, and potentially useful skills in `.sf/CODEBASE.md` and `.sf/PM-STRATEGY.md`.
|
||||
- **Use code intelligence**: use `codebase_search` (or Project RAG tools if configured) as your PRIMARY exploration method for conceptual, behavioral, or architectural discovery before manually reading files. Use `sift_search` for agentic multi-turn research or explicit strategy selection. Fall back to `.sf/CODEBASE.md`, in-process `grep`/`find`/`ls`, and `lsp` only for exact matches or structural navigation.
|
||||
- **Use code intelligence**: start with `.sf/CODEBASE.md`, in-process `grep`/`find`/`ls`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when the `PROJECT CODE INTELLIGENCE` block says Sift is healthy enough for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use Project RAG tools first for broad retrieval if Project RAG is configured.
|
||||
- Use in-process `grep`, `find`, `ls`, and `lsp` before shelling out. Fall back to shell `rg`, `find`, `ast-grep`, or `ls -la` only when the native/in-process tool surface is insufficient.
|
||||
|
||||
### Step 2: Check library and ecosystem facts
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ After reflection is confirmed, decide the approach based on the actual scope —
|
|||
|
||||
Before asking your first question, do a mandatory investigation pass. This is not optional.
|
||||
|
||||
1. **Scout the codebase** — use `codebase_search` for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"); use `sift_search` for agentic multi-turn research or explicit strategy selection; use in-process `grep`, `find`, `ls`, and `lsp` for exact identifier matches or structural navigation. Use `scout` for broad unfamiliar areas that need a separate explorer. Understand what already exists, what patterns are established, what constraints current code imposes.
|
||||
1. **Scout the codebase** — start with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when the `PROJECT CODE INTELLIGENCE` block says Sift is healthy enough for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use `scout` for broad unfamiliar areas that need a separate explorer. Understand what already exists, what patterns are established, what constraints current code imposes.
|
||||
2. **Check library docs — DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) as the default for any GitHub-hosted library or framework the user mentioned. Fall back to `resolve_library` / `get_library_docs` (Context7) for npm/pypi/crates packages DeepWiki doesn't have. **Context7 free tier is capped at 1000 req/month — spend those on cases DeepWiki can't cover.** Get current facts about capabilities, constraints, API shapes, version-specific behavior.
|
||||
3. **Web search** — `search-the-web` if the domain is unfamiliar, if you need current best practices, or if the user referenced external services/APIs you need facts about. Use `fetch_page` for full content when snippets aren't enough.
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ Apply `pm-planning` skill thinking throughout: use Working Backwards to anchor o
|
|||
### Before your first question round
|
||||
|
||||
Do a lightweight targeted investigation so your questions are grounded in reality:
|
||||
- Scout the codebase: use `codebase_search` for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"); use `sift_search` for agentic multi-turn research or explicit strategy selection; use in-process `grep`, `find`, `ls`, and `lsp` for exact identifier matches or structural navigation. Use `scout` for broad unfamiliar areas that need a separate explorer.
|
||||
- Scout the codebase: start with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when Sift is healthy for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use `scout` for broad unfamiliar areas that need a separate explorer.
|
||||
- If the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, use its MCP search tools for broad concept, symbol, schema, and git-history lookup before manually reading files
|
||||
- Check the roadmap context above (if present) to understand what surrounds this milestone
|
||||
- **Library docs — DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) for any GitHub-hosted library. Fall back to `resolve_library` / `get_library_docs` (Context7) only when DeepWiki doesn't have it (Context7 is capped at 1000 req/month free tier).
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ Your goal is **not** to center the discussion on tech stack trivia, naming conve
|
|||
### Before your first question round
|
||||
|
||||
Do a lightweight targeted investigation so your questions are grounded in reality:
|
||||
- Scout the codebase: use `codebase_search` for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"); use `sift_search` for agentic multi-turn research or explicit strategy selection; use in-process `grep`, `find`, `ls`, and `lsp` for exact identifier matches or structural navigation. Use `scout` for broad unfamiliar areas that need a separate explorer.
|
||||
- Scout the codebase: start with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when Sift is healthy for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use `scout` for broad unfamiliar areas that need a separate explorer.
|
||||
- Check the roadmap context above to understand what surrounds this slice — what comes before, what depends on it
|
||||
- **Library docs — DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) for any GitHub-hosted library. Fall back to `resolve_library` / `get_library_docs` (Context7) only when DeepWiki doesn't have it (Context7 is capped at 1000 req/month free tier).
|
||||
- Identify the 3–5 biggest behavioural unknowns: things where the user's answer will materially change what gets built
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ Never fabricate or simulate user input during this discussion. Never generate fa
|
|||
|
||||
- Check library docs **DeepWiki first** (`ask_question` / `read_wiki_structure` / `read_wiki_contents`) for any GitHub-hosted library or framework — AI-indexed, no free-tier cap. Fall back to Context7 (`resolve_library` / `get_library_docs`) for npm/pypi/crates packages DeepWiki doesn't cover. Context7 free tier is 1000 req/month — don't spend those on cases DeepWiki covers.
|
||||
- Do web searches (`search-the-web`) to verify the landscape — what solutions exist, what's changed recently, what's the current best practice. Use `freshness` for recency-sensitive queries, `domain` to target specific sites. Use `fetch_page` to read the full content of promising URLs when snippets aren't enough. **Budget:** You have a limited number of web searches per turn (typically 3-5). Prefer DeepWiki → Context7 → web search for docs; use `search_and_read` for one-shot topic research. Do NOT repeat the same or similar queries. Distribute searches across turns rather than clustering them.
|
||||
- Scout the codebase: use `codebase_search` for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"); use `sift_search` for agentic multi-turn research or explicit strategy selection; use in-process `grep`, `find`, `ls`, and `lsp` for exact identifier matches or structural navigation. Use `scout` for broad unfamiliar areas that need a separate explorer. Understand what already exists, what patterns are established, what constraints current code imposes.
|
||||
- Scout the codebase: start with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when Sift is healthy for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use `scout` for broad unfamiliar areas that need a separate explorer. Understand what already exists, what patterns are established, what constraints current code imposes.
|
||||
|
||||
Don't go deep — just enough that your next question reflects what's actually true rather than what you assume.
|
||||
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ Templates showing the expected format for each artifact type are in:
|
|||
|
||||
**Code navigation:** Use `lsp` for definition, type_definition, implementation, references, incoming_calls, outgoing_calls, hover, signature, symbols, rename, code_actions, format, and diagnostics. Falls back gracefully if no server is available. Never `grep` for a symbol definition when `lsp` can resolve it semantically. Never shell out to prettier/rustfmt/gofmt when `lsp format` is available. After editing code, use `lsp diagnostics` to verify no type errors were introduced.
|
||||
|
||||
**Codebase exploration:** For conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"), use `codebase_search` first. Its hybrid BM25+Vector retrieval is significantly more effective than grep for navigating unfamiliar logic. For Sift-specific features — agentic multi-turn search, explicit strategy selection, or planner configuration — use `sift_search`. Strategy guide: `page-index-hybrid` (strongest recall + structural reranking, default), `path-hybrid` (filename/path-heavy queries), `bm25` (fast lexical-only), `vector` (semantic-only). Enable `agent: true` with `agentMode: 'graph'` for deep multi-turn research across disconnected code regions, or `plannerStrategy: 'model-driven'` for LLM-guided planning. Use in-process SF tools like `grep` for exact text matches when you already have a specific identifier, and `find`/`ls` for literal filesystem discovery. Use `lsp` for structural navigation (definitions, references). Use `.sf/CODEBASE.md` for durable orientation. If the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, use its MCP tools for broad hybrid semantic + BM25 code retrieval before manual file-by-file reading. Never read files one-by-one to "explore" — search first, then read what's relevant.
|
||||
**Codebase exploration:** Start broad orientation with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp`. Use `codebase_search` for conceptual, behavioral, or architectural discovery only after choosing a narrow scope and checking the `PROJECT CODE INTELLIGENCE` block; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. For Sift-specific features — explicit strategy selection or planner configuration — use `sift_search` with a scoped `path`. Strategy guide: `bm25` (fast lexical), `path-hybrid` (filename/path-heavy queries), `page-index-hybrid` (stronger recall + reranking), `vector` (semantic-only). Each repo uses its own Sift cache under `.sf/runtime/sift/`; do not rely on a shared/global Sift database. Use `lsp` for structural navigation (definitions, references). If the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, use its MCP tools for broad hybrid semantic + BM25 code retrieval before manual file-by-file reading. Never read files one-by-one to "explore" — search first, then read what's relevant.
|
||||
|
||||
**Swarm dispatch:** Let the system decide whether swarming fits before dispatching multiple execution subagents. Use a 2-3 worker same-model swarm only when the work splits into independent shards with explicit file/directory ownership, shard-local verification, low conflict risk, and clear wall-clock savings. Do not swarm shared-interface edits, lockfiles, migrations, single-failure debugging, or sequence-dependent work. The parent agent remains coordinator: assign ownership, synthesize results, inspect dirty files, resolve conflicts, and run final verification.
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,133 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { chmodSync, existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, test } from "vitest";
|
||||
|
||||
import {
|
||||
buildSiftEnv,
|
||||
detectSift,
|
||||
ensureSiftRuntimeDirs,
|
||||
resolveSiftSearchScope,
|
||||
resolveSiftWarmupRuntimeDirs,
|
||||
resolveEffectiveCodebaseIndexerBackendName,
|
||||
} from "../code-intelligence.js";
|
||||
|
||||
const tmpRoots = [];
|
||||
|
||||
afterEach(() => {
|
||||
for (const dir of tmpRoots.splice(0)) {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
function makeProject() {
|
||||
const root = mkdtempSync(join(tmpdir(), "sf-sift-project-"));
|
||||
tmpRoots.push(root);
|
||||
mkdirSync(join(root, "src"), { recursive: true });
|
||||
writeFileSync(join(root, "src", "index.js"), "export const value = 1;\n");
|
||||
return root;
|
||||
}
|
||||
|
||||
function makeFakeSift(script) {
|
||||
const dir = mkdtempSync(join(tmpdir(), "sf-sift-bin-"));
|
||||
tmpRoots.push(dir);
|
||||
const bin = join(dir, "sift");
|
||||
writeFileSync(bin, script);
|
||||
chmodSync(bin, 0o755);
|
||||
return { dir, bin };
|
||||
}
|
||||
|
||||
test("buildSiftEnv_uses_project_scoped_cache_from_project_root", () => {
|
||||
const projectRoot = makeProject();
|
||||
|
||||
const env = buildSiftEnv(projectRoot, { PATH: "/usr/bin" });
|
||||
|
||||
assert.equal(env.SIFT_SEARCH_CACHE, join(projectRoot, ".sf", "runtime", "sift", "search-cache"));
|
||||
assert.equal(env.TMPDIR, join(projectRoot, ".sf", "runtime", "sift", "tmp"));
|
||||
});
|
||||
|
||||
test("resolveSiftSearchScope_normalizes_project_absolute_paths_to_relative", () => {
|
||||
const projectRoot = makeProject();
|
||||
|
||||
assert.equal(resolveSiftSearchScope(projectRoot), ".");
|
||||
assert.equal(resolveSiftSearchScope(projectRoot, projectRoot), ".");
|
||||
assert.equal(resolveSiftSearchScope(projectRoot, join(projectRoot, "src")), "src");
|
||||
assert.equal(resolveSiftSearchScope(projectRoot, "src"), "src");
|
||||
});
|
||||
|
||||
test("ensureSiftRuntimeDirs_creates_repo_local_cache_directories", () => {
|
||||
const projectRoot = makeProject();
|
||||
const dirs = ensureSiftRuntimeDirs(projectRoot);
|
||||
|
||||
assert.equal(dirs.searchCache, join(projectRoot, ".sf", "runtime", "sift", "search-cache"));
|
||||
assert.ok(existsSync(dirs.searchCache));
|
||||
assert.ok(existsSync(dirs.tmpDir));
|
||||
});
|
||||
|
||||
test("detectSift_when_probe_times_out_reports_degraded_not_configured", () => {
|
||||
const projectRoot = makeProject();
|
||||
const { bin } = makeFakeSift("#!/bin/sh\nsleep 5\n");
|
||||
|
||||
const result = detectSift(projectRoot, {}, {
|
||||
PATH: process.env.PATH ?? "",
|
||||
SIFT_PATH: bin,
|
||||
SF_SIFT_HEALTH_TIMEOUT_MS: "50",
|
||||
});
|
||||
|
||||
assert.equal(result.status, "degraded");
|
||||
assert.equal(result.binaryPath, bin);
|
||||
assert.match(result.reason, /health probe timed out/i);
|
||||
});
|
||||
|
||||
test("resolveEffectiveCodebaseIndexerBackendName_when_sift_is_cold_still_selects_sift", () => {
|
||||
const projectRoot = makeProject();
|
||||
const { bin } = makeFakeSift("#!/bin/sh\nsleep 5\n");
|
||||
|
||||
const result = resolveEffectiveCodebaseIndexerBackendName(projectRoot, {}, {
|
||||
PATH: process.env.PATH ?? "",
|
||||
SIFT_PATH: bin,
|
||||
SF_SIFT_HEALTH_TIMEOUT_MS: "50",
|
||||
});
|
||||
|
||||
assert.equal(result, "sift");
|
||||
});
|
||||
|
||||
test("detectSift_when_probe_succeeds_reports_project_cache", () => {
|
||||
const projectRoot = makeProject();
|
||||
const { bin } = makeFakeSift("#!/bin/sh\nprintf '{\"hits\":[]}\\n'\n");
|
||||
|
||||
const result = detectSift(projectRoot, {}, {
|
||||
PATH: process.env.PATH ?? "",
|
||||
SIFT_PATH: bin,
|
||||
SF_SIFT_HEALTH_TIMEOUT_MS: "1000",
|
||||
});
|
||||
const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
|
||||
|
||||
assert.equal(result.status, "configured");
|
||||
assert.equal(result.binaryPath, bin);
|
||||
assert.equal(result.searchCache, dirs.searchCache);
|
||||
assert.equal(result.tmpDir, dirs.tmpDir);
|
||||
});
|
||||
|
||||
test("detectSift_when_cache_manifest_contains_ignored_paths_reports_degraded", () => {
|
||||
const projectRoot = makeProject();
|
||||
const { bin } = makeFakeSift("#!/bin/sh\nprintf '{\"hits\":[]}\\n'\n");
|
||||
const dirs = ensureSiftRuntimeDirs(projectRoot);
|
||||
const manifestDir = join(dirs.searchCache, "artifacts", "manifests");
|
||||
mkdirSync(manifestDir, { recursive: true });
|
||||
writeFileSync(
|
||||
join(manifestDir, "bad"),
|
||||
`./src/index.js\n${projectRoot}/.claude/worktrees/agent/src/index.js\n`,
|
||||
);
|
||||
|
||||
const result = detectSift(projectRoot, {}, {
|
||||
PATH: process.env.PATH ?? "",
|
||||
SIFT_PATH: bin,
|
||||
SF_SIFT_HEALTH_TIMEOUT_MS: "1000",
|
||||
});
|
||||
|
||||
assert.equal(result.status, "degraded");
|
||||
assert.match(result.reason, /cache contains ignored\/generated paths/i);
|
||||
assert.equal(result.cacheInspection.polluted, true);
|
||||
});
|
||||
|
|
@ -11,7 +11,7 @@
|
|||
*/
|
||||
import { execFile } from "node:child_process";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import { resolveSiftBinary } from "../code-intelligence.js";
|
||||
import { buildSiftEnv, ensureSiftRuntimeDirs, resolveSiftBinary, resolveSiftSearchScope } from "../code-intelligence.js";
|
||||
|
||||
const KNOWN_STRATEGIES = [
|
||||
"hybrid",
|
||||
|
|
@ -34,7 +34,8 @@ const DEFAULT_TIMEOUT_MS = 60_000;
|
|||
/**
|
||||
* Build the sift CLI argument list from tool parameters.
|
||||
*/
|
||||
function buildSiftArgs(params) {
|
||||
function buildSiftArgs(params, projectRoot = process.cwd()) {
|
||||
const scope = resolveSiftSearchScope(projectRoot, params.path);
|
||||
const args = [
|
||||
"search",
|
||||
"--json",
|
||||
|
|
@ -57,7 +58,7 @@ function buildSiftArgs(params) {
|
|||
}
|
||||
|
||||
// Path and query are positional
|
||||
args.push(params.path ?? ".", params.query);
|
||||
args.push(scope, params.query);
|
||||
return args;
|
||||
}
|
||||
|
||||
|
|
@ -104,13 +105,16 @@ function parseSiftOutput(rawStdout, rawStderr) {
|
|||
/**
|
||||
* Execute a sift search with the given parameters.
|
||||
*/
|
||||
function runSift(binaryPath, args, timeoutMs) {
|
||||
function runSift(binaryPath, args, timeoutMs, projectRoot) {
|
||||
return new Promise((resolve, reject) => {
|
||||
ensureSiftRuntimeDirs(projectRoot);
|
||||
const child = execFile(
|
||||
binaryPath,
|
||||
args,
|
||||
{
|
||||
cwd: projectRoot,
|
||||
encoding: "utf-8",
|
||||
env: buildSiftEnv(projectRoot, process.env),
|
||||
maxBuffer: 16 * 1024 * 1024,
|
||||
timeout: timeoutMs,
|
||||
},
|
||||
|
|
@ -154,7 +158,7 @@ export function registerSiftSearchTool(pi) {
|
|||
}),
|
||||
path: Type.Optional(
|
||||
Type.String({
|
||||
description: "Directory or file path to search within. Default: current directory ('.').",
|
||||
description: "Directory or file path to search within. Default: repository root ('.'); absolute paths inside the repo are normalized to repo-relative paths so .siftignore applies.",
|
||||
default: ".",
|
||||
}),
|
||||
),
|
||||
|
|
@ -236,14 +240,17 @@ export function registerSiftSearchTool(pi) {
|
|||
};
|
||||
}
|
||||
|
||||
const args = buildSiftArgs(params);
|
||||
const projectRoot = process.cwd();
|
||||
const args = buildSiftArgs(params, projectRoot);
|
||||
const scope = args.at(-2) ?? ".";
|
||||
const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
||||
|
||||
const startedAt = Date.now();
|
||||
try {
|
||||
const { stdout, stderr } = await runSift(binaryPath, args, timeoutMs);
|
||||
const { stdout, stderr } = await runSift(binaryPath, args, timeoutMs, projectRoot);
|
||||
const elapsedMs = Date.now() - startedAt;
|
||||
const result = parseSiftOutput(stdout, stderr);
|
||||
const runtimeDirs = ensureSiftRuntimeDirs(projectRoot);
|
||||
|
||||
// Telemetry: log query outcomes for tuning
|
||||
const { logInfo } = await import("../workflow-logger.js");
|
||||
|
|
@ -251,14 +258,18 @@ export function registerSiftSearchTool(pi) {
|
|||
query: params.query,
|
||||
strategy: params.strategy ?? DEFAULT_STRATEGY,
|
||||
agent: params.agent ?? false,
|
||||
path: scope,
|
||||
hitCount: result.hits.length,
|
||||
elapsedMs,
|
||||
binary: binaryPath,
|
||||
searchCache: runtimeDirs.searchCache,
|
||||
});
|
||||
|
||||
const lines = [
|
||||
`Sift search: "${params.query}"`,
|
||||
`Strategy: ${params.strategy ?? DEFAULT_STRATEGY}${params.agent ? ` | agent: ${params.agentMode ?? "linear"} | planner: ${params.plannerStrategy ?? "heuristic"}` : ""}`,
|
||||
`Scope: ${scope}`,
|
||||
`Search cache: ${runtimeDirs.searchCache}`,
|
||||
`Hits: ${result.hits.length} | Elapsed: ${elapsedMs}ms`,
|
||||
"",
|
||||
];
|
||||
|
|
@ -288,6 +299,7 @@ export function registerSiftSearchTool(pi) {
|
|||
elapsedMs,
|
||||
hitCount: result.hits.length,
|
||||
hits: result.hits,
|
||||
searchCache: runtimeDirs.searchCache,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import { StringEnum } from "@singularity-forge/pi-ai";
|
|||
import { getMarkdownTheme, } from "@singularity-forge/pi-coding-agent";
|
||||
import { Container, Markdown, Spacer, Text } from "@singularity-forge/pi-tui";
|
||||
import { CmuxClient, shellEscape } from "../cmux/index.js";
|
||||
import { buildSiftEnv, resolveSiftBinary } from "../sf/code-intelligence.js";
|
||||
import { buildSiftEnv, ensureSiftRuntimeDirs, resolveSiftBinary, resolveSiftSearchScope } from "../sf/code-intelligence.js";
|
||||
import { loadEffectiveSFPreferences } from "../sf/preferences.js";
|
||||
import { formatTokenCount } from "../shared/mod.js";
|
||||
import { getCurrentPhase } from "../shared/sf-phase-state.js";
|
||||
|
|
@ -1789,7 +1789,7 @@ export default function (pi) {
|
|||
description: "Natural-language query describing what to explore (e.g. 'find where the write gate tool_call hooks are registered')",
|
||||
}),
|
||||
scope: Type.Optional(Type.String({
|
||||
description: "Path to search within. Defaults to the current working directory. Use the active worktree for isolation.",
|
||||
description: "Path to search within. Defaults to repository root ('.'); absolute paths inside the repo are normalized to repo-relative paths so .siftignore applies.",
|
||||
})),
|
||||
strategy: Type.Optional(Type.String({
|
||||
description: "Search strategy: 'path-hybrid' (default), 'page-index-hybrid', 'bm25', or 'path'",
|
||||
|
|
@ -1802,22 +1802,23 @@ export default function (pi) {
|
|||
name: "codebase_search",
|
||||
label: "Code Search",
|
||||
description: [
|
||||
"Perform Sift-backed hybrid (BM25 + Vector) retrieval over a codebase scope.",
|
||||
" Use this as your PRIMARY exploration tool for conceptual, behavioral, or cross-cutting questions",
|
||||
"Perform Sift-backed hybrid (BM25 + Vector) retrieval over a scoped codebase path.",
|
||||
" Use this for conceptual, behavioral, or cross-cutting questions only after choosing a narrow scope",
|
||||
" (e.g. 'how is X handled?', 'where is the logic for Y?', 'find examples of Z').",
|
||||
" It is significantly more effective than grep for discovering unfamiliar logic and architecture.",
|
||||
" If Sift status is degraded or the scope is broad, prefer grep/find/ls and retry with a narrower scope.",
|
||||
].join(""),
|
||||
promptGuidelines: [
|
||||
"Use codebase_search BEFORE grep when exploring unfamiliar areas or conceptual patterns.",
|
||||
"Use grep/find/ls for broad orientation first, then codebase_search with a specific scope for conceptual patterns.",
|
||||
" page-index-hybrid (default): Use for 'How' and 'Why' questions (logic, implementation, reasoning).",
|
||||
" path-hybrid: Use for 'Where' questions (architecture, directory structure, file location).",
|
||||
" Keep scope narrow enough to avoid root-level Sift timeouts; each repo uses its own SIFT_SEARCH_CACHE under .sf/runtime/sift/.",
|
||||
" Be descriptive in your query: include function names, types, or intent (e.g. 'auth middleware validation').",
|
||||
" This tool is read-only and optimized for evidence gathering before you plan or edit.",
|
||||
],
|
||||
parameters: CodebaseSearchParams,
|
||||
renderCall(args, theme) {
|
||||
const query = typeof args.query === "string" ? args.query : "";
|
||||
const scope = typeof args.scope === "string" ? args.scope : process.cwd();
|
||||
const scope = resolveSiftSearchScope(process.cwd(), typeof args.scope === "string" ? args.scope : undefined);
|
||||
const strategy = typeof args.strategy === "string" ? args.strategy : "page-index-hybrid";
|
||||
const preview = query.length > 90 ? `${query.slice(0, 89).trimEnd()}…` : query;
|
||||
const scopeLabel = scope.length > 70
|
||||
|
|
@ -1860,7 +1861,8 @@ export default function (pi) {
|
|||
return new Text(rendered, 0, 0);
|
||||
},
|
||||
async execute(_toolCallId, params, signal) {
|
||||
const scope = params.scope ?? process.cwd();
|
||||
const projectRoot = process.cwd();
|
||||
const scope = resolveSiftSearchScope(projectRoot, params.scope);
|
||||
const strategy = params.strategy ?? "page-index-hybrid";
|
||||
const query = params.query;
|
||||
const timeoutMs = typeof params.timeoutMs === "number" &&
|
||||
|
|
@ -1868,14 +1870,33 @@ export default function (pi) {
|
|||
? Math.max(1_000, params.timeoutMs)
|
||||
: CODEBASE_SEARCH_TIMEOUT_MS;
|
||||
const siftBin = resolveSiftBinary();
|
||||
if (!siftBin) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "codebase_search unavailable: sift binary not found. Use grep/find/ls or set SIFT_PATH.",
|
||||
},
|
||||
],
|
||||
details: {
|
||||
operation: "codebase_search",
|
||||
exitCode: 127,
|
||||
query,
|
||||
scope,
|
||||
strategy,
|
||||
timeoutMs,
|
||||
},
|
||||
};
|
||||
}
|
||||
const args = buildCodebaseSearchArgs(strategy, query, scope);
|
||||
const stderr = [];
|
||||
const stdout = [];
|
||||
let wasAborted = false;
|
||||
let timedOut = false;
|
||||
const childEnv = buildSiftEnv(scope, process.env);
|
||||
const runtimeDirs = ensureSiftRuntimeDirs(projectRoot);
|
||||
const childEnv = buildSiftEnv(projectRoot, process.env);
|
||||
const proc = spawn(siftBin, args, {
|
||||
cwd: scope,
|
||||
cwd: projectRoot,
|
||||
env: childEnv,
|
||||
shell: false,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
|
|
@ -1951,6 +1972,7 @@ export default function (pi) {
|
|||
scope,
|
||||
strategy,
|
||||
timeoutMs,
|
||||
searchCache: runtimeDirs.searchCache,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -1977,6 +1999,7 @@ export default function (pi) {
|
|||
scope,
|
||||
strategy,
|
||||
timeoutMs,
|
||||
searchCache: runtimeDirs.searchCache,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -1995,6 +2018,7 @@ export default function (pi) {
|
|||
exitCode,
|
||||
siftBin,
|
||||
timeoutMs,
|
||||
searchCache: runtimeDirs.searchCache,
|
||||
},
|
||||
};
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue