fix: harden sift codebase indexing

2026-05-05 13:29:28 +02:00 · 2026-05-05 13:29:28 +02:00 · ee836142ed
commit ee836142ed
parent 5b9355fa74
13 changed files with 544 additions and 53 deletions
--- a/src/resources/agents/scout.md
+++ b/src/resources/agents/scout.md
@ -8,7 +8,7 @@ You are a scout. Quickly investigate a codebase and return structured findings t

 Use in-process `grep`, `find`, `ls`, and `lsp` before shelling out. These keep exploration inside SF's tool surface and use native backends where available.

-Use `codebase_search` as your PRIMARY tool for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"). It uses Sift-backed hybrid BM25/vector retrieval and is significantly more effective than grep for navigating unfamiliar logic. Use `sift_search` when you need agentic multi-turn research, explicit strategy selection (e.g. `page-index-hybrid`, `path-hybrid`), or planner configuration. Use exact text search (`grep`) only when you already have a specific identifier or filename in mind. You are still the scout role; Sift is the powerful primitive you should lead with for exploration.
+Use `grep`, `find`, and `ls` for broad orientation first. Use `codebase_search` for conceptual, behavioral, or architectural discovery only with a narrow scope and when the project code-intelligence status says Sift is healthy enough for this repo. Use `sift_search` when you need explicit strategy selection (e.g. `bm25`, `path-hybrid`, `page-index-hybrid`) and a scoped path. If Sift is degraded, slow, or empty, fall back to grep/find/ls and direct reads. Each repo has its own Sift cache under `.sf/runtime/sift/`.

 Your output will be passed to an agent who has NOT seen the files you explored.

--- a/src/resources/extensions/sf/bootstrap/system-context.js
+++ b/src/resources/extensions/sf/bootstrap/system-context.js
@ -112,7 +112,7 @@ through these tiers IN ORDER. Skip a tier only when it has been demonstrably
 exhausted, not just because the next tier is faster.

  Tier 1 — Code lookup:
-    - sift / codebase_search for symbols, patterns, prior usages
+    - grep/find/ls for broad orientation; scoped sift / codebase_search for symbols, patterns, prior usages when Sift status is healthy for the repo
    - Read source files (Read tool, file paths from PLAN/CODEBASE)
    - Inspect .sf/DECISIONS.md, .sf/KNOWLEDGE.md, docs/design-docs/, docs/records/
    - Check tests for documented behavior
--- a/src/resources/extensions/sf/code-intelligence.d.ts
+++ b/src/resources/extensions/sf/code-intelligence.d.ts
@ -2,7 +2,11 @@ export const PROJECT_RAG_MCP_SERVER_NAME: string;
 export function detectProjectRag(projectRoot: string, prefs: Record<string, unknown>, env?: NodeJS.ProcessEnv): unknown;
 export function resolveProjectRagBinary(env?: NodeJS.ProcessEnv): string | null;
 export function resolveSiftBinary(env?: NodeJS.ProcessEnv): string | null;
-export function detectSift(_projectRoot: string, prefs: Record<string, unknown>, env?: NodeJS.ProcessEnv): unknown;
+export function resolveSiftWarmupRuntimeDirs(projectRoot: string): { searchCache: string; tmpDir: string };
+export function ensureSiftRuntimeDirs(projectRoot: string): { searchCache: string; tmpDir: string };
+export function buildSiftEnv(projectRoot: string, env: NodeJS.ProcessEnv): NodeJS.ProcessEnv;
+export function resolveSiftSearchScope(projectRoot: string, scope?: string): string;
+export function detectSift(projectRoot: string, prefs: Record<string, unknown>, env?: NodeJS.ProcessEnv): unknown;
 export function ensureSiftIndexWarmup(projectRoot: string, prefs: Record<string, unknown>, options?: Record<string, unknown>): Promise<unknown>;
 export function resolveProjectRagBuildJobs(env?: NodeJS.ProcessEnv): number;
 export function findProjectRagSourceDir(projectRoot: string, env?: NodeJS.ProcessEnv): string | null;
--- a/src/resources/extensions/sf/code-intelligence.js
+++ b/src/resources/extensions/sf/code-intelligence.js
@ -5,8 +5,8 @@
 * accelerators for local code retrieval.
 */
 import { spawn, spawnSync } from "node:child_process";
-import { existsSync, mkdirSync, readFileSync, statSync, writeFileSync, } from "node:fs";
-import { delimiter, join, resolve } from "node:path";
+import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync, } from "node:fs";
+import { delimiter, isAbsolute, join, relative, resolve } from "node:path";
 export const PROJECT_RAG_MCP_SERVER_NAME = "project-rag";
 const PROJECT_RAG_BINARY_NAME = process.platform === "win32" ? "project-rag.exe" : "project-rag";
 const SIFT_BINARY_NAME = process.platform === "win32" ? "sift.exe" : "sift";
@ -22,8 +22,17 @@ const DEFAULT_SIFT_WARMUP_TTL_MS = 6 * 60 * 60 * 1000;
 const DEFAULT_SIFT_WARMUP_QUERY = "repo architecture source tests entrypoints configuration";
 const DEFAULT_SIFT_WARMUP_LIMIT = 1;
 const DEFAULT_SIFT_WARMUP_RETRIEVER_TIMEOUT_MS = 30_000;
-const DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC = 30;
+const DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC = 600;
 const SIFT_WARMUP_KILL_GRACE_SEC = 10;
+const DEFAULT_SIFT_HEALTH_TIMEOUT_MS = 60_000;
+const SIFT_HEALTH_CACHE = new Map();
+const SIFT_CACHE_POLLUTION_PATTERNS = [
+    { label: ".claude worktrees", pattern: /(?:^|[/\\])\.claude[/\\]/ },
+    { label: ".git internals", pattern: /(?:^|[/\\])\.git[/\\]/ },
+    { label: "dist-test output", pattern: /(?:^|[/\\])dist-test[/\\]/ },
+    { label: "node_modules", pattern: /(?:^|[/\\])node_modules[/\\]/ },
+    { label: "package dist output", pattern: /(?:^|[/\\])packages[/\\][^/\\]+[/\\]dist[/\\]/ },
+];
 export function resolveSiftWarmupRuntimeDirs(projectRoot) {
    const runtimeRoot = join(projectRoot, ".sf", "runtime", "sift");
    return {
@ -31,6 +40,20 @@ export function resolveSiftWarmupRuntimeDirs(projectRoot) {
        tmpDir: join(runtimeRoot, "tmp"),
    };
 }
+/**
+ * Ensure the repo-local Sift runtime directories exist.
+ *
+ * Purpose: keep Sift's search database scoped to the current repository instead
+ * of sharing a process-global cache across unrelated projects.
+ *
+ * Consumer: Sift warmup, status probes, `sift_search`, and `codebase_search`.
+ */
+export function ensureSiftRuntimeDirs(projectRoot) {
+    const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
+    mkdirSync(dirs.searchCache, { recursive: true });
+    mkdirSync(dirs.tmpDir, { recursive: true });
+    return dirs;
+}
 export function buildSiftEnv(projectRoot, env) {
    const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
    return {
@ -39,6 +62,27 @@ export function buildSiftEnv(projectRoot, env) {
        TMPDIR: dirs.tmpDir,
    };
 }
+/**
+ * Resolve a Sift search scope to the form Sift's local ignore matcher expects.
+ *
+ * Purpose: preserve `.siftignore` semantics by running Sift from the repository
+ * root with repo-relative scopes instead of absolute paths.
+ *
+ * Consumer: Sift warmup, `sift_search`, and `codebase_search`.
+ */
+export function resolveSiftSearchScope(projectRoot, scope) {
+    const normalizedRoot = normalizeProjectRoot(projectRoot);
+    const requested = typeof scope === "string" && scope.trim() ? scope.trim() : ".";
+    const absolute = isAbsolute(requested)
+        ? resolve(requested)
+        : resolve(normalizedRoot, requested);
+    const rel = relative(normalizedRoot, absolute);
+    if (!rel)
+        return ".";
+    if (!rel.startsWith("..") && !isAbsolute(rel))
+        return rel;
+    return requested;
+}
 function readJsonConfig(configPath) {
    if (!existsSync(configPath))
        return {};
@ -195,7 +239,156 @@ export function resolveSiftBinary(env = process.env) {
    return (lookupExecutable(SIFT_BINARY_NAME, env) ??
        (SIFT_BINARY_NAME === "sift" ? null : lookupExecutable("sift", env)));
 }
-export function detectSift(_projectRoot, prefs, env = process.env) {
+function resolveSiftHealthTimeoutMs(env) {
+    const raw = env.SF_SIFT_HEALTH_TIMEOUT_MS?.trim();
+    if (!raw)
+        return DEFAULT_SIFT_HEALTH_TIMEOUT_MS;
+    const parsed = Number.parseInt(raw, 10);
+    return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_SIFT_HEALTH_TIMEOUT_MS;
+}
+function resolveSiftHealthProbePath(projectRoot) {
+    for (const candidate of ["src", "packages", "tests"]) {
+        const absolute = join(projectRoot, candidate);
+        if (existsSync(absolute))
+            return candidate;
+    }
+    return ".";
+}
+function runSiftHealthProbe(projectRoot, binaryPath, env) {
+    const normalizedRoot = normalizeProjectRoot(projectRoot);
+    const timeoutMs = resolveSiftHealthTimeoutMs(env);
+    const probePath = resolveSiftHealthProbePath(normalizedRoot);
+    const cacheKey = [
+        normalizedRoot,
+        binaryPath,
+        env.SIFT_PATH ?? "",
+        env.SF_SIFT_HEALTH_TIMEOUT_MS ?? "",
+        env.SF_SIFT_HEALTHCHECK_DISABLE ?? "",
+    ].join("\0");
+    if (SIFT_HEALTH_CACHE.has(cacheKey))
+        return SIFT_HEALTH_CACHE.get(cacheKey);
+    const dirs = ensureSiftRuntimeDirs(normalizedRoot);
+    if (env.SF_SIFT_HEALTHCHECK_DISABLE === "1") {
+        const skipped = {
+            ok: true,
+            probePath,
+            timeoutMs,
+            searchCache: dirs.searchCache,
+            tmpDir: dirs.tmpDir,
+            reason: "sift health probe disabled",
+        };
+        SIFT_HEALTH_CACHE.set(cacheKey, skipped);
+        return skipped;
+    }
+    const result = spawnSync(binaryPath, [
+        "search",
+        "--json",
+        "--strategy",
+        "bm25",
+        "--limit",
+        "1",
+        "--retriever-timeout-ms",
+        String(Math.min(timeoutMs, 1_000)),
+        probePath,
+        "function",
+    ], {
+        cwd: normalizedRoot,
+        env: buildSiftEnv(normalizedRoot, env),
+        encoding: "utf-8",
+        maxBuffer: 1024 * 1024,
+        timeout: timeoutMs,
+    });
+    const probe = {
+        ok: result.status === 0,
+        probePath,
+        timeoutMs,
+        searchCache: dirs.searchCache,
+        tmpDir: dirs.tmpDir,
+        status: result.status,
+        signal: result.signal,
+        stderr: result.stderr,
+        reason: "",
+    };
+    if (probe.ok) {
+        probe.reason = `sift scoped health probe passed for ${probePath}`;
+    }
+    else if (result.error?.code === "ETIMEDOUT" || result.signal) {
+        probe.reason = `sift scoped health probe timed out after ${timeoutMs}ms for ${probePath}`;
+    }
+    else if (result.error) {
+        probe.reason = `sift scoped health probe failed: ${result.error.message}`;
+    }
+    else {
+        const detail = String(result.stderr || "").trim();
+        probe.reason = detail
+            ? `sift scoped health probe failed: ${detail.slice(0, 300)}`
+            : `sift scoped health probe exited ${result.status ?? "unknown"}`;
+    }
+    SIFT_HEALTH_CACHE.set(cacheKey, probe);
+    return probe;
+}
+function listFilesCapped(root, maxFiles = 32) {
+    const files = [];
+    const visit = (dir) => {
+        if (files.length >= maxFiles)
+            return;
+        let entries = [];
+        try {
+            entries = readdirSync(dir, { withFileTypes: true });
+        }
+        catch {
+            return;
+        }
+        for (const entry of entries) {
+            if (files.length >= maxFiles)
+                return;
+            const path = join(dir, entry.name);
+            if (entry.isDirectory()) {
+                visit(path);
+            }
+            else if (entry.isFile()) {
+                files.push(path);
+            }
+        }
+    };
+    visit(root);
+    return files;
+}
+function inspectSiftCache(projectRoot) {
+    const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
+    const manifestRoot = join(dirs.searchCache, "artifacts", "manifests");
+    const samples = [];
+    for (const manifest of listFilesCapped(manifestRoot, 16)) {
+        let text = "";
+        try {
+            text = readFileSync(manifest).toString("utf-8");
+        }
+        catch {
+            continue;
+        }
+        for (const { label, pattern } of SIFT_CACHE_POLLUTION_PATTERNS) {
+            const match = text.match(pattern);
+            if (match) {
+                const start = Math.max(0, (match.index ?? 0) - 80);
+                const end = Math.min(text.length, (match.index ?? 0) + 160);
+                const sample = text
+                    .slice(start, end)
+                    .replace(/[^\x20-\x7E]+/g, " ")
+                    .trim();
+                samples.push({ label, sample });
+                break;
+            }
+        }
+        if (samples.length >= 5)
+            break;
+    }
+    return {
+        inspected: existsSync(manifestRoot),
+        polluted: samples.length > 0,
+        samples,
+    };
+}
+export function detectSift(projectRoot, prefs, env = process.env) {
    if (prefs?.indexer_backend === "none") {
        return {
            backend: "sift",
@ -221,14 +414,58 @@ export function detectSift(_projectRoot, prefs, env = process.env) {
            reason: "SIFT_PATH is set but does not resolve to an executable file.",
        };
    }
+    const warmup = readSiftWarmupMarker(projectRoot);
+    if (warmup?.status === "warming") {
+        const dirs = ensureSiftRuntimeDirs(projectRoot);
+        return {
+            backend: "sift",
+            status: "warming",
+            command: binaryPath,
+            binaryPath,
+            searchCache: dirs.searchCache,
+            tmpDir: dirs.tmpDir,
+            probePath: warmup.scope ?? ".",
+            reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"}; repo-local Sift index warmup is still running`,
+            markerPath: warmup.markerPath,
+        };
+    }
+    const health = runSiftHealthProbe(projectRoot, binaryPath, env);
+    if (!health.ok) {
+        return {
+            backend: "sift",
+            status: "degraded",
+            command: binaryPath,
+            binaryPath,
+            searchCache: health.searchCache,
+            tmpDir: health.tmpDir,
+            probePath: health.probePath,
+            reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"} but ${health.reason}`,
+        };
+    }
+    const cacheInspection = inspectSiftCache(projectRoot);
+    if (cacheInspection.polluted) {
+        return {
+            backend: "sift",
+            status: "degraded",
+            command: binaryPath,
+            binaryPath,
+            searchCache: health.searchCache,
+            tmpDir: health.tmpDir,
+            probePath: health.probePath,
+            cacheInspection,
+            reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"} but repo-local Sift cache contains ignored/generated paths`,
+        };
+    }
    return {
        backend: "sift",
        status: "configured",
        command: binaryPath,
        binaryPath,
-        reason: explicit
-            ? "sift binary resolved from SIFT_PATH"
-            : "sift binary found on PATH",
+        searchCache: health.searchCache,
+        tmpDir: health.tmpDir,
+        probePath: health.probePath,
+        cacheInspection,
+        reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"}; ${health.reason}`,
    };
 }
 function isFreshMarker(markerPath, now, ttlMs) {
@ -237,6 +474,11 @@ function isFreshMarker(markerPath, now, ttlMs) {
        if (now - stat.mtimeMs >= ttlMs)
            return false;
        const parsed = JSON.parse(readFileSync(markerPath, "utf-8"));
+        if (parsed.schemaVersion === 3) {
+            if (parsed.status === "warming" && parsed.pid && !isProcessAlive(parsed.pid))
+                return false;
+            return typeof parsed.scope === "string" && parsed.scope.length > 0;
+        }
        return (parsed.schemaVersion === 2 &&
            Array.isArray(parsed.args) &&
            parsed.args.at(-2) === ".");
@ -245,6 +487,38 @@ function isFreshMarker(markerPath, now, ttlMs) {
        return false;
    }
 }
+function readSiftWarmupMarker(projectRoot) {
+    const markerPath = join(projectRoot, ".sf", "runtime", "sift-index-warmup.json");
+    try {
+        if (!existsSync(markerPath))
+            return null;
+        const parsed = JSON.parse(readFileSync(markerPath, "utf-8"));
+        if (parsed.schemaVersion !== 3)
+            return null;
+        if (parsed.status !== "warming")
+            return null;
+        if (parsed.pid && !isProcessAlive(parsed.pid))
+            return null;
+        const started = Date.parse(parsed.startedAt);
+        const hardTimeoutSec = Number(parsed.hardTimeoutSec ?? DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC);
+        const expiresAt = started + Math.max(60, hardTimeoutSec + SIFT_WARMUP_KILL_GRACE_SEC) * 1000;
+        if (!Number.isFinite(started) || Date.now() > expiresAt)
+            return null;
+        return { ...parsed, markerPath };
+    }
+    catch {
+        return null;
+    }
+}
+function isProcessAlive(pid) {
+    try {
+        process.kill(Number(pid), 0);
+        return true;
+    }
+    catch {
+        return false;
+    }
+}
 export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
    const env = options.env ?? process.env;
    const backendName = resolveEffectiveCodebaseIndexerBackendName(projectRoot, prefs, env);
@ -254,8 +528,18 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
            reason: `effective codebase indexer is ${backendName}`,
        };
    }
-    const detection = detectSift(projectRoot, prefs, env);
-    if (detection.status !== "configured" || !detection.binaryPath) {
+    const detection = detectSift(projectRoot, prefs, {
+        ...env,
+        SF_SIFT_HEALTHCHECK_DISABLE: "1",
+    });
+    if (detection.status === "warming") {
+        return {
+            status: "skipped",
+            reason: "sift index warmup is already running",
+            markerPath: detection.markerPath,
+        };
+    }
+    if (!["configured", "degraded"].includes(detection.status) || !detection.binaryPath) {
        return {
            status: "unavailable",
            reason: detection.reason,
@ -271,6 +555,7 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
            markerPath,
        };
    }
+    const scope = resolveSiftSearchScope(projectRoot, options.scope ?? ".");
    const siftArgs = [
        "search",
        "--json",
@ -280,7 +565,7 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
        String(options.limit ?? DEFAULT_SIFT_WARMUP_LIMIT),
        "--retriever-timeout-ms",
        String(options.retrieverTimeoutMs ?? DEFAULT_SIFT_WARMUP_RETRIEVER_TIMEOUT_MS),
-        ".",
+        scope,
        options.query ?? DEFAULT_SIFT_WARMUP_QUERY,
    ];
    const hardTimeoutSec = resolveSiftWarmupHardTimeoutSec(env, options.hardTimeoutSec);
@ -298,27 +583,30 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
            : "sift page-index-hybrid warmup started (no timeout(1)/gtimeout on PATH; running unbounded)";
    try {
        const runtimeDirs = resolveSiftWarmupRuntimeDirs(projectRoot);
-        mkdirSync(join(projectRoot, ".sf", "runtime"), { recursive: true });
-        mkdirSync(runtimeDirs.searchCache, { recursive: true });
-        mkdirSync(runtimeDirs.tmpDir, { recursive: true });
+        ensureSiftRuntimeDirs(projectRoot);
        const childEnv = buildSiftEnv(projectRoot, env);
-        writeFileSync(markerPath, `${JSON.stringify({
-            schemaVersion: 2,
+        const marker = {
+            schemaVersion: 3,
+            status: "warming",
            startedAt: new Date(now).toISOString(),
            command,
            cwd: projectRoot,
            args,
+            scope,
            siftBinary: detection.binaryPath,
            hardTimeoutSec: wrapper?.timeoutSec ?? null,
            searchCache: runtimeDirs.searchCache,
            tmpDir: runtimeDirs.tmpDir,
-        }, null, 2)}\n`, "utf-8");
+        };
+        writeFileSync(markerPath, `${JSON.stringify(marker, null, 2)}\n`, "utf-8");
        const child = (options.spawnFn ?? spawn)(command, args, {
            cwd: projectRoot,
            env: childEnv,
            stdio: "ignore",
            detached: true,
        });
+        marker.pid = child.pid ?? null;
+        writeFileSync(markerPath, `${JSON.stringify(marker, null, 2)}\n`, "utf-8");
        child.unref();
        return {
            status: "started",
@ -507,17 +795,23 @@ function buildSiftContextLines(projectRoot, prefs, env = process.env) {
    }
    else if (detection.status === "configured" && detection.binaryPath) {
        lines.push(`- Sift: configured as local CLI \`${detection.binaryPath}\`.`);
-        lines.push("- Use Sift for broad code retrieval before manual file-by-file reading, " +
-            "especially conceptual queries, exact identifiers, approximate file/path intent, and synthesis-ready snippets.");
-        lines.push("- Tool: `sift_search` exposes the full Sift CLI surface — use it for agentic multi-turn search, " +
-            "explicit strategy selection, and planner configuration.");
-        lines.push("- Tool: `codebase_search` is the platform-level wrapper — use it for simple conceptual queries.");
+        lines.push(`- Sift cache: project-scoped at \`${detection.searchCache}\`; do not use a shared/global Sift search database for this repo.`);
+        lines.push("- Use Sift with explicit, narrow paths after quick `grep`/`find`/`ls` orientation; avoid root-scope searches unless status proves they are responsive.");
+        lines.push("- Tool: `sift_search` exposes the full Sift CLI surface — prefer direct `bm25`, `path-hybrid`, or `page-index-hybrid` with a scoped `path`.");
+        lines.push("- Tool: `codebase_search` is the platform-level wrapper — use it only with a scoped `scope` when possible.");
        lines.push("- Strategy guide: `page-index-hybrid` (strongest recall + structural reranking), " +
            "`path-hybrid` (filename/path-heavy), `bm25` (fast lexical-only), `vector` (semantic-only).");
-        lines.push("- Agent mode: enable `agent: true` on `sift_search` for multi-turn research. " +
-            "Use `agentMode: 'graph'` for disconnected code regions and `plannerStrategy: 'model-driven'` for LLM-guided planning.");
-        lines.push("- SF runs Sift warmup with a project-scoped `SIFT_SEARCH_CACHE` under `.sf/runtime/sift/` while leaving model cache shared; " +
-            "if the CLI is missing or fails, continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.");
+        lines.push("- If Sift is slow, empty, or times out, continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.");
+    }
+    else if (detection.status === "warming" && detection.binaryPath) {
+        lines.push(`- Sift: installed at \`${detection.binaryPath}\`; repo-local index warmup is running.`);
+        lines.push(`- Sift cache: project-scoped at \`${detection.searchCache}\`; do not use a shared/global Sift search database for this repo.`);
+        lines.push("- Use grep/find/ls and `.sf/CODEBASE.md` for broad orientation while warmup runs. Use narrow `sift_search` paths if needed; broad root-scope Sift may still be cold.");
+    }
+    else if (detection.status === "degraded" && detection.binaryPath) {
+        lines.push(`- Sift: installed at \`${detection.binaryPath}\` but degraded for this repo: ${detection.reason}.`);
+        lines.push(`- Sift cache: project-scoped at \`${detection.searchCache}\`; do not use a shared/global Sift search database for this repo.`);
+        lines.push("- Do not use broad Sift/codebase_search as the first exploration step. Prefer native `grep`/`find`/`ls`, `.sf/CODEBASE.md`, and narrow `sift_search` only after reducing scope.");
    }
    else {
        lines.push("- Sift: not available. This is optional; continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.");
@ -537,7 +831,7 @@ export function resolveEffectiveCodebaseIndexerBackendName(projectRoot, prefs, e
    if (prefs?.indexer_backend)
        return prefs.indexer_backend;
    const sift = detectSift(projectRoot, prefs, env);
-    if (sift.status === "configured")
+    if (["configured", "warming", "degraded"].includes(sift.status))
        return "sift";
    return "projectRag";
 }
@ -606,8 +900,32 @@ export function formatSiftStatus(projectRoot, prefs, env = process.env) {
        lines.push(`Command: ${detection.command}`);
    if (detection.binaryPath)
        lines.push(`Binary: ${detection.binaryPath}`);
+    if (detection.searchCache)
+        lines.push(`Search cache: ${detection.searchCache}`);
+    if (detection.tmpDir)
+        lines.push(`Temp dir: ${detection.tmpDir}`);
+    if (detection.probePath)
+        lines.push(`Health probe scope: ${detection.probePath}`);
+    if (detection.markerPath)
+        lines.push(`Warmup marker: ${detection.markerPath}`);
+    if (detection.cacheInspection?.polluted) {
+        lines.push("Cache integrity: polluted - ignored/generated paths were found in repo-local Sift manifests.");
+        for (const sample of detection.cacheInspection.samples ?? []) {
+            lines.push(`Cache sample (${sample.label}): ${sample.sample}`);
+        }
+        lines.push("Action: remove .sf/runtime/sift/search-cache and warm Sift again from the repo root.");
+    }
+    else if (detection.cacheInspection?.inspected) {
+        lines.push("Cache integrity: ok - no ignored/generated path samples found in inspected manifests.");
+    }
    if (detection.status === "configured" && detection.command) {
-        lines.push(`Operational: ${commandExists(detection.command, env) ? "yes" : "no - configured command is missing"}`);
+        lines.push(`Operational: ${commandExists(detection.command, env) ? "yes - scoped health probe passed" : "no - configured command is missing"}`);
+    }
+    else if (detection.status === "warming" && detection.command) {
+        lines.push("Operational: warming - binary exists and repo-local index warmup is running. Give Sift time on CPU before broad searches.");
+    }
+    else if (detection.status === "degraded" && detection.command) {
+        lines.push("Operational: degraded - binary exists, but the bounded scoped health probe failed. Use narrow paths or fallback search.");
    }
    else {
        lines.push("Operational: no - install rupurt/sift on PATH or set SIFT_PATH.");
--- a/src/resources/extensions/sf/prompts/discuss-headless.md
+++ b/src/resources/extensions/sf/prompts/discuss-headless.md
@ -76,7 +76,7 @@ Before anything else, form a diagnosis: What is the core challenge? What is brok
 - **Measure coverage**: find untested critical paths
 - **Scan for dead code, stubs, and commented-out features** — abandoned attempts are signals
 - **Discover needed skills**: identify repo languages, frameworks, data stores, external services, build tools, and domain-specific competencies. Check installed skills first; record installed, missing, and potentially useful skills in `.sf/CODEBASE.md` and `.sf/PM-STRATEGY.md`.
- **Use code intelligence**: use `codebase_search` (or Project RAG tools if configured) as your PRIMARY exploration method for conceptual, behavioral, or architectural discovery before manually reading files. Use `sift_search` for agentic multi-turn research or explicit strategy selection. Fall back to `.sf/CODEBASE.md`, in-process `grep`/`find`/`ls`, and `lsp` only for exact matches or structural navigation.
+- **Use code intelligence**: start with `.sf/CODEBASE.md`, in-process `grep`/`find`/`ls`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when the `PROJECT CODE INTELLIGENCE` block says Sift is healthy enough for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use Project RAG tools first for broad retrieval if Project RAG is configured.
 - Use in-process `grep`, `find`, `ls`, and `lsp` before shelling out. Fall back to shell `rg`, `find`, `ast-grep`, or `ls -la` only when the native/in-process tool surface is insufficient.

 ### Step 2: Check library and ecosystem facts
--- a/src/resources/extensions/sf/prompts/discuss.md
+++ b/src/resources/extensions/sf/prompts/discuss.md
@ -34,7 +34,7 @@ After reflection is confirmed, decide the approach based on the actual scope —

 Before asking your first question, do a mandatory investigation pass. This is not optional.

-1. **Scout the codebase** — use `codebase_search` for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"); use `sift_search` for agentic multi-turn research or explicit strategy selection; use in-process `grep`, `find`, `ls`, and `lsp` for exact identifier matches or structural navigation. Use `scout` for broad unfamiliar areas that need a separate explorer. Understand what already exists, what patterns are established, what constraints current code imposes.
+1. **Scout the codebase** — start with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when the `PROJECT CODE INTELLIGENCE` block says Sift is healthy enough for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use `scout` for broad unfamiliar areas that need a separate explorer. Understand what already exists, what patterns are established, what constraints current code imposes.
 2. **Check library docs — DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) as the default for any GitHub-hosted library or framework the user mentioned. Fall back to `resolve_library` / `get_library_docs` (Context7) for npm/pypi/crates packages DeepWiki doesn't have. **Context7 free tier is capped at 1000 req/month — spend those on cases DeepWiki can't cover.** Get current facts about capabilities, constraints, API shapes, version-specific behavior.
 3. **Web search** — `search-the-web` if the domain is unfamiliar, if you need current best practices, or if the user referenced external services/APIs you need facts about. Use `fetch_page` for full content when snippets aren't enough.

--- a/src/resources/extensions/sf/prompts/guided-discuss-milestone.md
+++ b/src/resources/extensions/sf/prompts/guided-discuss-milestone.md
@ -15,7 +15,7 @@ Apply `pm-planning` skill thinking throughout: use Working Backwards to anchor o
 ### Before your first question round

 Do a lightweight targeted investigation so your questions are grounded in reality:
- Scout the codebase: use `codebase_search` for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"); use `sift_search` for agentic multi-turn research or explicit strategy selection; use in-process `grep`, `find`, `ls`, and `lsp` for exact identifier matches or structural navigation. Use `scout` for broad unfamiliar areas that need a separate explorer.
+- Scout the codebase: start with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when Sift is healthy for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use `scout` for broad unfamiliar areas that need a separate explorer.
 - If the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, use its MCP search tools for broad concept, symbol, schema, and git-history lookup before manually reading files
 - Check the roadmap context above (if present) to understand what surrounds this milestone
 - **Library docs — DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) for any GitHub-hosted library. Fall back to `resolve_library` / `get_library_docs` (Context7) only when DeepWiki doesn't have it (Context7 is capped at 1000 req/month free tier).
--- a/src/resources/extensions/sf/prompts/guided-discuss-slice.md
+++ b/src/resources/extensions/sf/prompts/guided-discuss-slice.md
@ -11,7 +11,7 @@ Your goal is **not** to center the discussion on tech stack trivia, naming conve
 ### Before your first question round

 Do a lightweight targeted investigation so your questions are grounded in reality:
- Scout the codebase: use `codebase_search` for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"); use `sift_search` for agentic multi-turn research or explicit strategy selection; use in-process `grep`, `find`, `ls`, and `lsp` for exact identifier matches or structural navigation. Use `scout` for broad unfamiliar areas that need a separate explorer.
+- Scout the codebase: start with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when Sift is healthy for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use `scout` for broad unfamiliar areas that need a separate explorer.
 - Check the roadmap context above to understand what surrounds this slice — what comes before, what depends on it
 - **Library docs — DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) for any GitHub-hosted library. Fall back to `resolve_library` / `get_library_docs` (Context7) only when DeepWiki doesn't have it (Context7 is capped at 1000 req/month free tier).
 - Identify the 3–5 biggest behavioural unknowns: things where the user's answer will materially change what gets built
--- a/src/resources/extensions/sf/prompts/queue.md
+++ b/src/resources/extensions/sf/prompts/queue.md
@ -26,7 +26,7 @@ Never fabricate or simulate user input during this discussion. Never generate fa

 - Check library docs **DeepWiki first** (`ask_question` / `read_wiki_structure` / `read_wiki_contents`) for any GitHub-hosted library or framework — AI-indexed, no free-tier cap. Fall back to Context7 (`resolve_library` / `get_library_docs`) for npm/pypi/crates packages DeepWiki doesn't cover. Context7 free tier is 1000 req/month — don't spend those on cases DeepWiki covers.
 - Do web searches (`search-the-web`) to verify the landscape — what solutions exist, what's changed recently, what's the current best practice. Use `freshness` for recency-sensitive queries, `domain` to target specific sites. Use `fetch_page` to read the full content of promising URLs when snippets aren't enough. **Budget:** You have a limited number of web searches per turn (typically 3-5). Prefer DeepWiki → Context7 → web search for docs; use `search_and_read` for one-shot topic research. Do NOT repeat the same or similar queries. Distribute searches across turns rather than clustering them.
- Scout the codebase: use `codebase_search` for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"); use `sift_search` for agentic multi-turn research or explicit strategy selection; use in-process `grep`, `find`, `ls`, and `lsp` for exact identifier matches or structural navigation. Use `scout` for broad unfamiliar areas that need a separate explorer. Understand what already exists, what patterns are established, what constraints current code imposes.
+- Scout the codebase: start with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when Sift is healthy for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use `scout` for broad unfamiliar areas that need a separate explorer. Understand what already exists, what patterns are established, what constraints current code imposes.

 Don't go deep — just enough that your next question reflects what's actually true rather than what you assume.

--- a/src/resources/extensions/sf/prompts/system.md
+++ b/src/resources/extensions/sf/prompts/system.md
@ -161,7 +161,7 @@ Templates showing the expected format for each artifact type are in:

 **Code navigation:** Use `lsp` for definition, type_definition, implementation, references, incoming_calls, outgoing_calls, hover, signature, symbols, rename, code_actions, format, and diagnostics. Falls back gracefully if no server is available. Never `grep` for a symbol definition when `lsp` can resolve it semantically. Never shell out to prettier/rustfmt/gofmt when `lsp format` is available. After editing code, use `lsp diagnostics` to verify no type errors were introduced.

-**Codebase exploration:** For conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"), use `codebase_search` first. Its hybrid BM25+Vector retrieval is significantly more effective than grep for navigating unfamiliar logic. For Sift-specific features — agentic multi-turn search, explicit strategy selection, or planner configuration — use `sift_search`. Strategy guide: `page-index-hybrid` (strongest recall + structural reranking, default), `path-hybrid` (filename/path-heavy queries), `bm25` (fast lexical-only), `vector` (semantic-only). Enable `agent: true` with `agentMode: 'graph'` for deep multi-turn research across disconnected code regions, or `plannerStrategy: 'model-driven'` for LLM-guided planning. Use in-process SF tools like `grep` for exact text matches when you already have a specific identifier, and `find`/`ls` for literal filesystem discovery. Use `lsp` for structural navigation (definitions, references). Use `.sf/CODEBASE.md` for durable orientation. If the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, use its MCP tools for broad hybrid semantic + BM25 code retrieval before manual file-by-file reading. Never read files one-by-one to "explore" — search first, then read what's relevant.
+**Codebase exploration:** Start broad orientation with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp`. Use `codebase_search` for conceptual, behavioral, or architectural discovery only after choosing a narrow scope and checking the `PROJECT CODE INTELLIGENCE` block; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. For Sift-specific features — explicit strategy selection or planner configuration — use `sift_search` with a scoped `path`. Strategy guide: `bm25` (fast lexical), `path-hybrid` (filename/path-heavy queries), `page-index-hybrid` (stronger recall + reranking), `vector` (semantic-only). Each repo uses its own Sift cache under `.sf/runtime/sift/`; do not rely on a shared/global Sift database. Use `lsp` for structural navigation (definitions, references). If the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, use its MCP tools for broad hybrid semantic + BM25 code retrieval before manual file-by-file reading. Never read files one-by-one to "explore" — search first, then read what's relevant.

 **Swarm dispatch:** Let the system decide whether swarming fits before dispatching multiple execution subagents. Use a 2-3 worker same-model swarm only when the work splits into independent shards with explicit file/directory ownership, shard-local verification, low conflict risk, and clear wall-clock savings. Do not swarm shared-interface edits, lockfiles, migrations, single-failure debugging, or sequence-dependent work. The parent agent remains coordinator: assign ownership, synthesize results, inspect dirty files, resolve conflicts, and run final verification.

--- a/src/resources/extensions/sf/tests/code-intelligence-sift.test.mjs
+++ b/src/resources/extensions/sf/tests/code-intelligence-sift.test.mjs
@ -0,0 +1,133 @@
+import assert from "node:assert/strict";
+import { chmodSync, existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, test } from "vitest";
+
+import {
+	buildSiftEnv,
+	detectSift,
+	ensureSiftRuntimeDirs,
+	resolveSiftSearchScope,
+	resolveSiftWarmupRuntimeDirs,
+	resolveEffectiveCodebaseIndexerBackendName,
+} from "../code-intelligence.js";
+
+const tmpRoots = [];
+
+afterEach(() => {
+	for (const dir of tmpRoots.splice(0)) {
+		rmSync(dir, { recursive: true, force: true });
+	}
+});
+
+function makeProject() {
+	const root = mkdtempSync(join(tmpdir(), "sf-sift-project-"));
+	tmpRoots.push(root);
+	mkdirSync(join(root, "src"), { recursive: true });
+	writeFileSync(join(root, "src", "index.js"), "export const value = 1;\n");
+	return root;
+}
+
+function makeFakeSift(script) {
+	const dir = mkdtempSync(join(tmpdir(), "sf-sift-bin-"));
+	tmpRoots.push(dir);
+	const bin = join(dir, "sift");
+	writeFileSync(bin, script);
+	chmodSync(bin, 0o755);
+	return { dir, bin };
+}
+
+test("buildSiftEnv_uses_project_scoped_cache_from_project_root", () => {
+	const projectRoot = makeProject();
+
+	const env = buildSiftEnv(projectRoot, { PATH: "/usr/bin" });
+
+	assert.equal(env.SIFT_SEARCH_CACHE, join(projectRoot, ".sf", "runtime", "sift", "search-cache"));
+	assert.equal(env.TMPDIR, join(projectRoot, ".sf", "runtime", "sift", "tmp"));
+});
+
+test("resolveSiftSearchScope_normalizes_project_absolute_paths_to_relative", () => {
+	const projectRoot = makeProject();
+
+	assert.equal(resolveSiftSearchScope(projectRoot), ".");
+	assert.equal(resolveSiftSearchScope(projectRoot, projectRoot), ".");
+	assert.equal(resolveSiftSearchScope(projectRoot, join(projectRoot, "src")), "src");
+	assert.equal(resolveSiftSearchScope(projectRoot, "src"), "src");
+});
+
+test("ensureSiftRuntimeDirs_creates_repo_local_cache_directories", () => {
+	const projectRoot = makeProject();
+	const dirs = ensureSiftRuntimeDirs(projectRoot);
+
+	assert.equal(dirs.searchCache, join(projectRoot, ".sf", "runtime", "sift", "search-cache"));
+	assert.ok(existsSync(dirs.searchCache));
+	assert.ok(existsSync(dirs.tmpDir));
+});
+
+test("detectSift_when_probe_times_out_reports_degraded_not_configured", () => {
+	const projectRoot = makeProject();
+	const { bin } = makeFakeSift("#!/bin/sh\nsleep 5\n");
+
+	const result = detectSift(projectRoot, {}, {
+		PATH: process.env.PATH ?? "",
+		SIFT_PATH: bin,
+		SF_SIFT_HEALTH_TIMEOUT_MS: "50",
+	});
+
+	assert.equal(result.status, "degraded");
+	assert.equal(result.binaryPath, bin);
+	assert.match(result.reason, /health probe timed out/i);
+});
+
+test("resolveEffectiveCodebaseIndexerBackendName_when_sift_is_cold_still_selects_sift", () => {
+	const projectRoot = makeProject();
+	const { bin } = makeFakeSift("#!/bin/sh\nsleep 5\n");
+
+	const result = resolveEffectiveCodebaseIndexerBackendName(projectRoot, {}, {
+		PATH: process.env.PATH ?? "",
+		SIFT_PATH: bin,
+		SF_SIFT_HEALTH_TIMEOUT_MS: "50",
+	});
+
+	assert.equal(result, "sift");
+});
+
+test("detectSift_when_probe_succeeds_reports_project_cache", () => {
+	const projectRoot = makeProject();
+	const { bin } = makeFakeSift("#!/bin/sh\nprintf '{\"hits\":[]}\\n'\n");
+
+	const result = detectSift(projectRoot, {}, {
+		PATH: process.env.PATH ?? "",
+		SIFT_PATH: bin,
+		SF_SIFT_HEALTH_TIMEOUT_MS: "1000",
+	});
+	const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
+
+	assert.equal(result.status, "configured");
+	assert.equal(result.binaryPath, bin);
+	assert.equal(result.searchCache, dirs.searchCache);
+	assert.equal(result.tmpDir, dirs.tmpDir);
+});
+
+test("detectSift_when_cache_manifest_contains_ignored_paths_reports_degraded", () => {
+	const projectRoot = makeProject();
+	const { bin } = makeFakeSift("#!/bin/sh\nprintf '{\"hits\":[]}\\n'\n");
+	const dirs = ensureSiftRuntimeDirs(projectRoot);
+	const manifestDir = join(dirs.searchCache, "artifacts", "manifests");
+	mkdirSync(manifestDir, { recursive: true });
+	writeFileSync(
+		join(manifestDir, "bad"),
+		`./src/index.js\n${projectRoot}/.claude/worktrees/agent/src/index.js\n`,
+	);
+
+	const result = detectSift(projectRoot, {}, {
+		PATH: process.env.PATH ?? "",
+		SIFT_PATH: bin,
+		SF_SIFT_HEALTH_TIMEOUT_MS: "1000",
+	});
+
+	assert.equal(result.status, "degraded");
+	assert.match(result.reason, /cache contains ignored\/generated paths/i);
+	assert.equal(result.cacheInspection.polluted, true);
+});
--- a/src/resources/extensions/sf/tools/sift-search-tool.js
+++ b/src/resources/extensions/sf/tools/sift-search-tool.js
@ -11,7 +11,7 @@
 */
 import { execFile } from "node:child_process";
 import { Type } from "@sinclair/typebox";
-import { resolveSiftBinary } from "../code-intelligence.js";
+import { buildSiftEnv, ensureSiftRuntimeDirs, resolveSiftBinary, resolveSiftSearchScope } from "../code-intelligence.js";

 const KNOWN_STRATEGIES = [
 	"hybrid",
@ -34,7 +34,8 @@ const DEFAULT_TIMEOUT_MS = 60_000;
 /**
 * Build the sift CLI argument list from tool parameters.
 */
-function buildSiftArgs(params) {
+function buildSiftArgs(params, projectRoot = process.cwd()) {
+	const scope = resolveSiftSearchScope(projectRoot, params.path);
 	const args = [
 		"search",
 		"--json",
@ -57,7 +58,7 @@ function buildSiftArgs(params) {
 	}

 	// Path and query are positional
-	args.push(params.path ?? ".", params.query);
+	args.push(scope, params.query);
 	return args;
 }

@ -104,13 +105,16 @@ function parseSiftOutput(rawStdout, rawStderr) {
 /**
 * Execute a sift search with the given parameters.
 */
-function runSift(binaryPath, args, timeoutMs) {
+function runSift(binaryPath, args, timeoutMs, projectRoot) {
 	return new Promise((resolve, reject) => {
+		ensureSiftRuntimeDirs(projectRoot);
 		const child = execFile(
 			binaryPath,
 			args,
 			{
+				cwd: projectRoot,
 				encoding: "utf-8",
+				env: buildSiftEnv(projectRoot, process.env),
 				maxBuffer: 16 * 1024 * 1024,
 				timeout: timeoutMs,
 			},
@ -154,7 +158,7 @@ export function registerSiftSearchTool(pi) {
 			}),
 			path: Type.Optional(
 				Type.String({
-					description: "Directory or file path to search within. Default: current directory ('.').",
+					description: "Directory or file path to search within. Default: repository root ('.'); absolute paths inside the repo are normalized to repo-relative paths so .siftignore applies.",
 					default: ".",
 				}),
 			),
@ -236,14 +240,17 @@ export function registerSiftSearchTool(pi) {
 				};
 			}

-			const args = buildSiftArgs(params);
+			const projectRoot = process.cwd();
+			const args = buildSiftArgs(params, projectRoot);
+			const scope = args.at(-2) ?? ".";
 			const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS;

 			const startedAt = Date.now();
 			try {
-				const { stdout, stderr } = await runSift(binaryPath, args, timeoutMs);
+				const { stdout, stderr } = await runSift(binaryPath, args, timeoutMs, projectRoot);
 				const elapsedMs = Date.now() - startedAt;
 				const result = parseSiftOutput(stdout, stderr);
+				const runtimeDirs = ensureSiftRuntimeDirs(projectRoot);

 				// Telemetry: log query outcomes for tuning
 				const { logInfo } = await import("../workflow-logger.js");
@ -251,14 +258,18 @@ export function registerSiftSearchTool(pi) {
 					query: params.query,
 					strategy: params.strategy ?? DEFAULT_STRATEGY,
 					agent: params.agent ?? false,
+					path: scope,
 					hitCount: result.hits.length,
 					elapsedMs,
 					binary: binaryPath,
+					searchCache: runtimeDirs.searchCache,
 				});

 				const lines = [
 					`Sift search: "${params.query}"`,
 					`Strategy: ${params.strategy ?? DEFAULT_STRATEGY}${params.agent ? ` | agent: ${params.agentMode ?? "linear"} | planner: ${params.plannerStrategy ?? "heuristic"}` : ""}`,
+					`Scope: ${scope}`,
+					`Search cache: ${runtimeDirs.searchCache}`,
 					`Hits: ${result.hits.length} | Elapsed: ${elapsedMs}ms`,
 					"",
 				];
@ -288,6 +299,7 @@ export function registerSiftSearchTool(pi) {
 						elapsedMs,
 						hitCount: result.hits.length,
 						hits: result.hits,
+						searchCache: runtimeDirs.searchCache,
 					},
 				};
 			} catch (err) {
--- a/src/resources/extensions/subagent/index.js
+++ b/src/resources/extensions/subagent/index.js
@ -22,7 +22,7 @@ import { StringEnum } from "@singularity-forge/pi-ai";
 import { getMarkdownTheme, } from "@singularity-forge/pi-coding-agent";
 import { Container, Markdown, Spacer, Text } from "@singularity-forge/pi-tui";
 import { CmuxClient, shellEscape } from "../cmux/index.js";
-import { buildSiftEnv, resolveSiftBinary } from "../sf/code-intelligence.js";
+import { buildSiftEnv, ensureSiftRuntimeDirs, resolveSiftBinary, resolveSiftSearchScope } from "../sf/code-intelligence.js";
 import { loadEffectiveSFPreferences } from "../sf/preferences.js";
 import { formatTokenCount } from "../shared/mod.js";
 import { getCurrentPhase } from "../shared/sf-phase-state.js";
@ -1789,7 +1789,7 @@ export default function (pi) {
            description: "Natural-language query describing what to explore (e.g. 'find where the write gate tool_call hooks are registered')",
        }),
        scope: Type.Optional(Type.String({
-            description: "Path to search within. Defaults to the current working directory. Use the active worktree for isolation.",
+            description: "Path to search within. Defaults to repository root ('.'); absolute paths inside the repo are normalized to repo-relative paths so .siftignore applies.",
        })),
        strategy: Type.Optional(Type.String({
            description: "Search strategy: 'path-hybrid' (default), 'page-index-hybrid', 'bm25', or 'path'",
@ -1802,22 +1802,23 @@ export default function (pi) {
        name: "codebase_search",
        label: "Code Search",
        description: [
-            "Perform Sift-backed hybrid (BM25 + Vector) retrieval over a codebase scope.",
-            " Use this as your PRIMARY exploration tool for conceptual, behavioral, or cross-cutting questions",
+            "Perform Sift-backed hybrid (BM25 + Vector) retrieval over a scoped codebase path.",
+            " Use this for conceptual, behavioral, or cross-cutting questions only after choosing a narrow scope",
            " (e.g. 'how is X handled?', 'where is the logic for Y?', 'find examples of Z').",
-            " It is significantly more effective than grep for discovering unfamiliar logic and architecture.",
+            " If Sift status is degraded or the scope is broad, prefer grep/find/ls and retry with a narrower scope.",
        ].join(""),
        promptGuidelines: [
-            "Use codebase_search BEFORE grep when exploring unfamiliar areas or conceptual patterns.",
+            "Use grep/find/ls for broad orientation first, then codebase_search with a specific scope for conceptual patterns.",
            " page-index-hybrid (default): Use for 'How' and 'Why' questions (logic, implementation, reasoning).",
            " path-hybrid: Use for 'Where' questions (architecture, directory structure, file location).",
+            " Keep scope narrow enough to avoid root-level Sift timeouts; each repo uses its own SIFT_SEARCH_CACHE under .sf/runtime/sift/.",
            " Be descriptive in your query: include function names, types, or intent (e.g. 'auth middleware validation').",
            " This tool is read-only and optimized for evidence gathering before you plan or edit.",
        ],
        parameters: CodebaseSearchParams,
        renderCall(args, theme) {
            const query = typeof args.query === "string" ? args.query : "";
-            const scope = typeof args.scope === "string" ? args.scope : process.cwd();
+            const scope = resolveSiftSearchScope(process.cwd(), typeof args.scope === "string" ? args.scope : undefined);
            const strategy = typeof args.strategy === "string" ? args.strategy : "page-index-hybrid";
            const preview = query.length > 90 ? `${query.slice(0, 89).trimEnd()}…` : query;
            const scopeLabel = scope.length > 70
@ -1860,7 +1861,8 @@ export default function (pi) {
            return new Text(rendered, 0, 0);
        },
        async execute(_toolCallId, params, signal) {
-            const scope = params.scope ?? process.cwd();
+            const projectRoot = process.cwd();
+            const scope = resolveSiftSearchScope(projectRoot, params.scope);
            const strategy = params.strategy ?? "page-index-hybrid";
            const query = params.query;
            const timeoutMs = typeof params.timeoutMs === "number" &&
@ -1868,14 +1870,33 @@ export default function (pi) {
                ? Math.max(1_000, params.timeoutMs)
                : CODEBASE_SEARCH_TIMEOUT_MS;
            const siftBin = resolveSiftBinary();
+            if (!siftBin) {
+                return {
+                    content: [
+                        {
+                            type: "text",
+                            text: "codebase_search unavailable: sift binary not found. Use grep/find/ls or set SIFT_PATH.",
+                        },
+                    ],
+                    details: {
+                        operation: "codebase_search",
+                        exitCode: 127,
+                        query,
+                        scope,
+                        strategy,
+                        timeoutMs,
+                    },
+                };
+            }
            const args = buildCodebaseSearchArgs(strategy, query, scope);
            const stderr = [];
            const stdout = [];
            let wasAborted = false;
            let timedOut = false;
-            const childEnv = buildSiftEnv(scope, process.env);
+            const runtimeDirs = ensureSiftRuntimeDirs(projectRoot);
+            const childEnv = buildSiftEnv(projectRoot, process.env);
            const proc = spawn(siftBin, args, {
-                cwd: scope,
+                cwd: projectRoot,
                env: childEnv,
                shell: false,
                stdio: ["ignore", "pipe", "pipe"],
@ -1951,6 +1972,7 @@ export default function (pi) {
                        scope,
                        strategy,
                        timeoutMs,
+                        searchCache: runtimeDirs.searchCache,
                    },
                };
            }
@ -1977,6 +1999,7 @@ export default function (pi) {
                        scope,
                        strategy,
                        timeoutMs,
+                        searchCache: runtimeDirs.searchCache,
                    },
                };
            }
@ -1995,6 +2018,7 @@ export default function (pi) {
                    exitCode,
                    siftBin,
                    timeoutMs,
+                    searchCache: runtimeDirs.searchCache,
                },
            };
        },