From ee836142ed2d1c7a9d5086b5afc6a2e985d0e660 Mon Sep 17 00:00:00 2001
From: Mikael Hugo <mikkihugo@users.noreply.github.com>
Date: Tue, 5 May 2026 13:29:28 +0200
Subject: [PATCH] fix: harden sift codebase indexing

---
 src/resources/agents/scout.md                 |   2 +-
 .../extensions/sf/bootstrap/system-context.js |   2 +-
 .../extensions/sf/code-intelligence.d.ts      |   6 +-
 .../extensions/sf/code-intelligence.js        | 372 ++++++++++++++++--
 .../extensions/sf/prompts/discuss-headless.md |   2 +-
 .../extensions/sf/prompts/discuss.md          |   2 +-
 .../sf/prompts/guided-discuss-milestone.md    |   2 +-
 .../sf/prompts/guided-discuss-slice.md        |   2 +-
 src/resources/extensions/sf/prompts/queue.md  |   2 +-
 src/resources/extensions/sf/prompts/system.md |   2 +-
 .../sf/tests/code-intelligence-sift.test.mjs  | 133 +++++++
 .../extensions/sf/tools/sift-search-tool.js   |  26 +-
 src/resources/extensions/subagent/index.js    |  44 ++-
 13 files changed, 544 insertions(+), 53 deletions(-)
 create mode 100644 src/resources/extensions/sf/tests/code-intelligence-sift.test.mjs

diff --git a/src/resources/agents/scout.md b/src/resources/agents/scout.md
index c06b9ee63..b8c4ebcdd 100644
--- a/src/resources/agents/scout.md
+++ b/src/resources/agents/scout.md
@@ -8,7 +8,7 @@ You are a scout. Quickly investigate a codebase and return structured findings t
 
 Use in-process `grep`, `find`, `ls`, and `lsp` before shelling out. These keep exploration inside SF's tool surface and use native backends where available.
 
-Use `codebase_search` as your PRIMARY tool for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"). It uses Sift-backed hybrid BM25/vector retrieval and is significantly more effective than grep for navigating unfamiliar logic. Use `sift_search` when you need agentic multi-turn research, explicit strategy selection (e.g. `page-index-hybrid`, `path-hybrid`), or planner configuration. Use exact text search (`grep`) only when you already have a specific identifier or filename in mind. You are still the scout role; Sift is the powerful primitive you should lead with for exploration.
+Use `grep`, `find`, and `ls` for broad orientation first. Use `codebase_search` for conceptual, behavioral, or architectural discovery only with a narrow scope and when the project code-intelligence status says Sift is healthy enough for this repo. Use `sift_search` when you need explicit strategy selection (e.g. `bm25`, `path-hybrid`, `page-index-hybrid`) and a scoped path. If Sift is degraded, slow, or empty, fall back to grep/find/ls and direct reads. Each repo has its own Sift cache under `.sf/runtime/sift/`.
 
 Your output will be passed to an agent who has NOT seen the files you explored.
 
diff --git a/src/resources/extensions/sf/bootstrap/system-context.js b/src/resources/extensions/sf/bootstrap/system-context.js
index 392da9134..282bc9de3 100644
--- a/src/resources/extensions/sf/bootstrap/system-context.js
+++ b/src/resources/extensions/sf/bootstrap/system-context.js
@@ -112,7 +112,7 @@ through these tiers IN ORDER. Skip a tier only when it has been demonstrably
 exhausted, not just because the next tier is faster.
 
   Tier 1 — Code lookup:
-    - sift / codebase_search for symbols, patterns, prior usages
+    - grep/find/ls for broad orientation; scoped sift / codebase_search for symbols, patterns, prior usages when Sift status is healthy for the repo
     - Read source files (Read tool, file paths from PLAN/CODEBASE)
     - Inspect .sf/DECISIONS.md, .sf/KNOWLEDGE.md, docs/design-docs/, docs/records/
     - Check tests for documented behavior
diff --git a/src/resources/extensions/sf/code-intelligence.d.ts b/src/resources/extensions/sf/code-intelligence.d.ts
index ea97f587c..f3e54d4f2 100644
--- a/src/resources/extensions/sf/code-intelligence.d.ts
+++ b/src/resources/extensions/sf/code-intelligence.d.ts
@@ -2,7 +2,11 @@ export const PROJECT_RAG_MCP_SERVER_NAME: string;
 export function detectProjectRag(projectRoot: string, prefs: Record<string, unknown>, env?: NodeJS.ProcessEnv): unknown;
 export function resolveProjectRagBinary(env?: NodeJS.ProcessEnv): string | null;
 export function resolveSiftBinary(env?: NodeJS.ProcessEnv): string | null;
-export function detectSift(_projectRoot: string, prefs: Record<string, unknown>, env?: NodeJS.ProcessEnv): unknown;
+export function resolveSiftWarmupRuntimeDirs(projectRoot: string): { searchCache: string; tmpDir: string };
+export function ensureSiftRuntimeDirs(projectRoot: string): { searchCache: string; tmpDir: string };
+export function buildSiftEnv(projectRoot: string, env: NodeJS.ProcessEnv): NodeJS.ProcessEnv;
+export function resolveSiftSearchScope(projectRoot: string, scope?: string): string;
+export function detectSift(projectRoot: string, prefs: Record<string, unknown>, env?: NodeJS.ProcessEnv): unknown;
 export function ensureSiftIndexWarmup(projectRoot: string, prefs: Record<string, unknown>, options?: Record<string, unknown>): Promise<unknown>;
 export function resolveProjectRagBuildJobs(env?: NodeJS.ProcessEnv): number;
 export function findProjectRagSourceDir(projectRoot: string, env?: NodeJS.ProcessEnv): string | null;
diff --git a/src/resources/extensions/sf/code-intelligence.js b/src/resources/extensions/sf/code-intelligence.js
index bac56ce80..fca6c252b 100644
--- a/src/resources/extensions/sf/code-intelligence.js
+++ b/src/resources/extensions/sf/code-intelligence.js
@@ -5,8 +5,8 @@
  * accelerators for local code retrieval.
  */
 import { spawn, spawnSync } from "node:child_process";
-import { existsSync, mkdirSync, readFileSync, statSync, writeFileSync, } from "node:fs";
-import { delimiter, join, resolve } from "node:path";
+import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync, } from "node:fs";
+import { delimiter, isAbsolute, join, relative, resolve } from "node:path";
 export const PROJECT_RAG_MCP_SERVER_NAME = "project-rag";
 const PROJECT_RAG_BINARY_NAME = process.platform === "win32" ? "project-rag.exe" : "project-rag";
 const SIFT_BINARY_NAME = process.platform === "win32" ? "sift.exe" : "sift";
@@ -22,8 +22,17 @@ const DEFAULT_SIFT_WARMUP_TTL_MS = 6 * 60 * 60 * 1000;
 const DEFAULT_SIFT_WARMUP_QUERY = "repo architecture source tests entrypoints configuration";
 const DEFAULT_SIFT_WARMUP_LIMIT = 1;
 const DEFAULT_SIFT_WARMUP_RETRIEVER_TIMEOUT_MS = 30_000;
-const DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC = 30;
+const DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC = 600;
 const SIFT_WARMUP_KILL_GRACE_SEC = 10;
+const DEFAULT_SIFT_HEALTH_TIMEOUT_MS = 60_000;
+const SIFT_HEALTH_CACHE = new Map();
+const SIFT_CACHE_POLLUTION_PATTERNS = [
+    { label: ".claude worktrees", pattern: /(?:^|[/\\])\.claude[/\\]/ },
+    { label: ".git internals", pattern: /(?:^|[/\\])\.git[/\\]/ },
+    { label: "dist-test output", pattern: /(?:^|[/\\])dist-test[/\\]/ },
+    { label: "node_modules", pattern: /(?:^|[/\\])node_modules[/\\]/ },
+    { label: "package dist output", pattern: /(?:^|[/\\])packages[/\\][^/\\]+[/\\]dist[/\\]/ },
+];
 export function resolveSiftWarmupRuntimeDirs(projectRoot) {
     const runtimeRoot = join(projectRoot, ".sf", "runtime", "sift");
     return {
@@ -31,6 +40,20 @@ export function resolveSiftWarmupRuntimeDirs(projectRoot) {
         tmpDir: join(runtimeRoot, "tmp"),
     };
 }
+/**
+ * Ensure the repo-local Sift runtime directories exist.
+ *
+ * Purpose: keep Sift's search database scoped to the current repository instead
+ * of sharing a process-global cache across unrelated projects.
+ *
+ * Consumer: Sift warmup, status probes, `sift_search`, and `codebase_search`.
+ */
+export function ensureSiftRuntimeDirs(projectRoot) {
+    const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
+    mkdirSync(dirs.searchCache, { recursive: true });
+    mkdirSync(dirs.tmpDir, { recursive: true });
+    return dirs;
+}
 export function buildSiftEnv(projectRoot, env) {
     const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
     return {
@@ -39,6 +62,27 @@ export function buildSiftEnv(projectRoot, env) {
         TMPDIR: dirs.tmpDir,
     };
 }
+/**
+ * Resolve a Sift search scope to the form Sift's local ignore matcher expects.
+ *
+ * Purpose: preserve `.siftignore` semantics by running Sift from the repository
+ * root with repo-relative scopes instead of absolute paths.
+ *
+ * Consumer: Sift warmup, `sift_search`, and `codebase_search`.
+ */
+export function resolveSiftSearchScope(projectRoot, scope) {
+    const normalizedRoot = normalizeProjectRoot(projectRoot);
+    const requested = typeof scope === "string" && scope.trim() ? scope.trim() : ".";
+    const absolute = isAbsolute(requested)
+        ? resolve(requested)
+        : resolve(normalizedRoot, requested);
+    const rel = relative(normalizedRoot, absolute);
+    if (!rel)
+        return ".";
+    if (!rel.startsWith("..") && !isAbsolute(rel))
+        return rel;
+    return requested;
+}
 function readJsonConfig(configPath) {
     if (!existsSync(configPath))
         return {};
@@ -195,7 +239,156 @@ export function resolveSiftBinary(env = process.env) {
     return (lookupExecutable(SIFT_BINARY_NAME, env) ??
         (SIFT_BINARY_NAME === "sift" ? null : lookupExecutable("sift", env)));
 }
-export function detectSift(_projectRoot, prefs, env = process.env) {
+function resolveSiftHealthTimeoutMs(env) {
+    const raw = env.SF_SIFT_HEALTH_TIMEOUT_MS?.trim();
+    if (!raw)
+        return DEFAULT_SIFT_HEALTH_TIMEOUT_MS;
+    const parsed = Number.parseInt(raw, 10);
+    return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_SIFT_HEALTH_TIMEOUT_MS;
+}
+function resolveSiftHealthProbePath(projectRoot) {
+    for (const candidate of ["src", "packages", "tests"]) {
+        const absolute = join(projectRoot, candidate);
+        if (existsSync(absolute))
+            return candidate;
+    }
+    return ".";
+}
+function runSiftHealthProbe(projectRoot, binaryPath, env) {
+    const normalizedRoot = normalizeProjectRoot(projectRoot);
+    const timeoutMs = resolveSiftHealthTimeoutMs(env);
+    const probePath = resolveSiftHealthProbePath(normalizedRoot);
+    const cacheKey = [
+        normalizedRoot,
+        binaryPath,
+        env.SIFT_PATH ?? "",
+        env.SF_SIFT_HEALTH_TIMEOUT_MS ?? "",
+        env.SF_SIFT_HEALTHCHECK_DISABLE ?? "",
+    ].join("\0");
+    if (SIFT_HEALTH_CACHE.has(cacheKey))
+        return SIFT_HEALTH_CACHE.get(cacheKey);
+    const dirs = ensureSiftRuntimeDirs(normalizedRoot);
+    if (env.SF_SIFT_HEALTHCHECK_DISABLE === "1") {
+        const skipped = {
+            ok: true,
+            probePath,
+            timeoutMs,
+            searchCache: dirs.searchCache,
+            tmpDir: dirs.tmpDir,
+            reason: "sift health probe disabled",
+        };
+        SIFT_HEALTH_CACHE.set(cacheKey, skipped);
+        return skipped;
+    }
+    const result = spawnSync(binaryPath, [
+        "search",
+        "--json",
+        "--strategy",
+        "bm25",
+        "--limit",
+        "1",
+        "--retriever-timeout-ms",
+        String(Math.min(timeoutMs, 1_000)),
+        probePath,
+        "function",
+    ], {
+        cwd: normalizedRoot,
+        env: buildSiftEnv(normalizedRoot, env),
+        encoding: "utf-8",
+        maxBuffer: 1024 * 1024,
+        timeout: timeoutMs,
+    });
+    const probe = {
+        ok: result.status === 0,
+        probePath,
+        timeoutMs,
+        searchCache: dirs.searchCache,
+        tmpDir: dirs.tmpDir,
+        status: result.status,
+        signal: result.signal,
+        stderr: result.stderr,
+        reason: "",
+    };
+    if (probe.ok) {
+        probe.reason = `sift scoped health probe passed for ${probePath}`;
+    }
+    else if (result.error?.code === "ETIMEDOUT" || result.signal) {
+        probe.reason = `sift scoped health probe timed out after ${timeoutMs}ms for ${probePath}`;
+    }
+    else if (result.error) {
+        probe.reason = `sift scoped health probe failed: ${result.error.message}`;
+    }
+    else {
+        const detail = String(result.stderr || "").trim();
+        probe.reason = detail
+            ? `sift scoped health probe failed: ${detail.slice(0, 300)}`
+            : `sift scoped health probe exited ${result.status ?? "unknown"}`;
+    }
+    SIFT_HEALTH_CACHE.set(cacheKey, probe);
+    return probe;
+}
+function listFilesCapped(root, maxFiles = 32) {
+    const files = [];
+    const visit = (dir) => {
+        if (files.length >= maxFiles)
+            return;
+        let entries = [];
+        try {
+            entries = readdirSync(dir, { withFileTypes: true });
+        }
+        catch {
+            return;
+        }
+        for (const entry of entries) {
+            if (files.length >= maxFiles)
+                return;
+            const path = join(dir, entry.name);
+            if (entry.isDirectory()) {
+                visit(path);
+            }
+            else if (entry.isFile()) {
+                files.push(path);
+            }
+        }
+    };
+    visit(root);
+    return files;
+}
+function inspectSiftCache(projectRoot) {
+    const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
+    const manifestRoot = join(dirs.searchCache, "artifacts", "manifests");
+    const samples = [];
+    for (const manifest of listFilesCapped(manifestRoot, 16)) {
+        let text = "";
+        try {
+            text = readFileSync(manifest).toString("utf-8");
+        }
+        catch {
+            continue;
+        }
+        for (const { label, pattern } of SIFT_CACHE_POLLUTION_PATTERNS) {
+            const match = text.match(pattern);
+            if (match) {
+                const start = Math.max(0, (match.index ?? 0) - 80);
+                const end = Math.min(text.length, (match.index ?? 0) + 160);
+                const sample = text
+                    .slice(start, end)
+                    .replace(/[^\x20-\x7E]+/g, " ")
+                    .trim();
+                samples.push({ label, sample });
+                break;
+            }
+        }
+        if (samples.length >= 5)
+            break;
+    }
+    return {
+        inspected: existsSync(manifestRoot),
+        polluted: samples.length > 0,
+        samples,
+    };
+}
+export function detectSift(projectRoot, prefs, env = process.env) {
     if (prefs?.indexer_backend === "none") {
         return {
             backend: "sift",
@@ -221,14 +414,58 @@ export function detectSift(_projectRoot, prefs, env = process.env) {
             reason: "SIFT_PATH is set but does not resolve to an executable file.",
         };
     }
+    const warmup = readSiftWarmupMarker(projectRoot);
+    if (warmup?.status === "warming") {
+        const dirs = ensureSiftRuntimeDirs(projectRoot);
+        return {
+            backend: "sift",
+            status: "warming",
+            command: binaryPath,
+            binaryPath,
+            searchCache: dirs.searchCache,
+            tmpDir: dirs.tmpDir,
+            probePath: warmup.scope ?? ".",
+            reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"}; repo-local Sift index warmup is still running`,
+            markerPath: warmup.markerPath,
+        };
+    }
+    const health = runSiftHealthProbe(projectRoot, binaryPath, env);
+    if (!health.ok) {
+        return {
+            backend: "sift",
+            status: "degraded",
+            command: binaryPath,
+            binaryPath,
+            searchCache: health.searchCache,
+            tmpDir: health.tmpDir,
+            probePath: health.probePath,
+            reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"} but ${health.reason}`,
+        };
+    }
+    const cacheInspection = inspectSiftCache(projectRoot);
+    if (cacheInspection.polluted) {
+        return {
+            backend: "sift",
+            status: "degraded",
+            command: binaryPath,
+            binaryPath,
+            searchCache: health.searchCache,
+            tmpDir: health.tmpDir,
+            probePath: health.probePath,
+            cacheInspection,
+            reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"} but repo-local Sift cache contains ignored/generated paths`,
+        };
+    }
     return {
         backend: "sift",
         status: "configured",
         command: binaryPath,
         binaryPath,
-        reason: explicit
-            ? "sift binary resolved from SIFT_PATH"
-            : "sift binary found on PATH",
+        searchCache: health.searchCache,
+        tmpDir: health.tmpDir,
+        probePath: health.probePath,
+        cacheInspection,
+        reason: `${explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH"}; ${health.reason}`,
     };
 }
 function isFreshMarker(markerPath, now, ttlMs) {
@@ -237,6 +474,11 @@ function isFreshMarker(markerPath, now, ttlMs) {
         if (now - stat.mtimeMs >= ttlMs)
             return false;
         const parsed = JSON.parse(readFileSync(markerPath, "utf-8"));
+        if (parsed.schemaVersion === 3) {
+            if (parsed.status === "warming" && parsed.pid && !isProcessAlive(parsed.pid))
+                return false;
+            return typeof parsed.scope === "string" && parsed.scope.length > 0;
+        }
         return (parsed.schemaVersion === 2 &&
             Array.isArray(parsed.args) &&
             parsed.args.at(-2) === ".");
@@ -245,6 +487,38 @@ function isFreshMarker(markerPath, now, ttlMs) {
         return false;
     }
 }
+function readSiftWarmupMarker(projectRoot) {
+    const markerPath = join(projectRoot, ".sf", "runtime", "sift-index-warmup.json");
+    try {
+        if (!existsSync(markerPath))
+            return null;
+        const parsed = JSON.parse(readFileSync(markerPath, "utf-8"));
+        if (parsed.schemaVersion !== 3)
+            return null;
+        if (parsed.status !== "warming")
+            return null;
+        if (parsed.pid && !isProcessAlive(parsed.pid))
+            return null;
+        const started = Date.parse(parsed.startedAt);
+        const hardTimeoutSec = Number(parsed.hardTimeoutSec ?? DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC);
+        const expiresAt = started + Math.max(60, hardTimeoutSec + SIFT_WARMUP_KILL_GRACE_SEC) * 1000;
+        if (!Number.isFinite(started) || Date.now() > expiresAt)
+            return null;
+        return { ...parsed, markerPath };
+    }
+    catch {
+        return null;
+    }
+}
+function isProcessAlive(pid) {
+    try {
+        process.kill(Number(pid), 0);
+        return true;
+    }
+    catch {
+        return false;
+    }
+}
 export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
     const env = options.env ?? process.env;
     const backendName = resolveEffectiveCodebaseIndexerBackendName(projectRoot, prefs, env);
@@ -254,8 +528,18 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
             reason: `effective codebase indexer is ${backendName}`,
         };
     }
-    const detection = detectSift(projectRoot, prefs, env);
-    if (detection.status !== "configured" || !detection.binaryPath) {
+    const detection = detectSift(projectRoot, prefs, {
+        ...env,
+        SF_SIFT_HEALTHCHECK_DISABLE: "1",
+    });
+    if (detection.status === "warming") {
+        return {
+            status: "skipped",
+            reason: "sift index warmup is already running",
+            markerPath: detection.markerPath,
+        };
+    }
+    if (!["configured", "degraded"].includes(detection.status) || !detection.binaryPath) {
         return {
             status: "unavailable",
             reason: detection.reason,
@@ -271,6 +555,7 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
             markerPath,
         };
     }
+    const scope = resolveSiftSearchScope(projectRoot, options.scope ?? ".");
     const siftArgs = [
         "search",
         "--json",
@@ -280,7 +565,7 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
         String(options.limit ?? DEFAULT_SIFT_WARMUP_LIMIT),
         "--retriever-timeout-ms",
         String(options.retrieverTimeoutMs ?? DEFAULT_SIFT_WARMUP_RETRIEVER_TIMEOUT_MS),
-        ".",
+        scope,
         options.query ?? DEFAULT_SIFT_WARMUP_QUERY,
     ];
     const hardTimeoutSec = resolveSiftWarmupHardTimeoutSec(env, options.hardTimeoutSec);
@@ -298,27 +583,30 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
             : "sift page-index-hybrid warmup started (no timeout(1)/gtimeout on PATH; running unbounded)";
     try {
         const runtimeDirs = resolveSiftWarmupRuntimeDirs(projectRoot);
-        mkdirSync(join(projectRoot, ".sf", "runtime"), { recursive: true });
-        mkdirSync(runtimeDirs.searchCache, { recursive: true });
-        mkdirSync(runtimeDirs.tmpDir, { recursive: true });
+        ensureSiftRuntimeDirs(projectRoot);
         const childEnv = buildSiftEnv(projectRoot, env);
-        writeFileSync(markerPath, `${JSON.stringify({
-            schemaVersion: 2,
+        const marker = {
+            schemaVersion: 3,
+            status: "warming",
             startedAt: new Date(now).toISOString(),
             command,
             cwd: projectRoot,
             args,
+            scope,
             siftBinary: detection.binaryPath,
             hardTimeoutSec: wrapper?.timeoutSec ?? null,
             searchCache: runtimeDirs.searchCache,
             tmpDir: runtimeDirs.tmpDir,
-        }, null, 2)}\n`, "utf-8");
+        };
+        writeFileSync(markerPath, `${JSON.stringify(marker, null, 2)}\n`, "utf-8");
         const child = (options.spawnFn ?? spawn)(command, args, {
             cwd: projectRoot,
             env: childEnv,
             stdio: "ignore",
             detached: true,
         });
+        marker.pid = child.pid ?? null;
+        writeFileSync(markerPath, `${JSON.stringify(marker, null, 2)}\n`, "utf-8");
         child.unref();
         return {
             status: "started",
@@ -507,17 +795,23 @@ function buildSiftContextLines(projectRoot, prefs, env = process.env) {
     }
     else if (detection.status === "configured" && detection.binaryPath) {
         lines.push(`- Sift: configured as local CLI \`${detection.binaryPath}\`.`);
-        lines.push("- Use Sift for broad code retrieval before manual file-by-file reading, " +
-            "especially conceptual queries, exact identifiers, approximate file/path intent, and synthesis-ready snippets.");
-        lines.push("- Tool: `sift_search` exposes the full Sift CLI surface — use it for agentic multi-turn search, " +
-            "explicit strategy selection, and planner configuration.");
-        lines.push("- Tool: `codebase_search` is the platform-level wrapper — use it for simple conceptual queries.");
+        lines.push(`- Sift cache: project-scoped at \`${detection.searchCache}\`; do not use a shared/global Sift search database for this repo.`);
+        lines.push("- Use Sift with explicit, narrow paths after quick `grep`/`find`/`ls` orientation; avoid root-scope searches unless status proves they are responsive.");
+        lines.push("- Tool: `sift_search` exposes the full Sift CLI surface — prefer direct `bm25`, `path-hybrid`, or `page-index-hybrid` with a scoped `path`.");
+        lines.push("- Tool: `codebase_search` is the platform-level wrapper — use it only with a scoped `scope` when possible.");
         lines.push("- Strategy guide: `page-index-hybrid` (strongest recall + structural reranking), " +
             "`path-hybrid` (filename/path-heavy), `bm25` (fast lexical-only), `vector` (semantic-only).");
-        lines.push("- Agent mode: enable `agent: true` on `sift_search` for multi-turn research. " +
-            "Use `agentMode: 'graph'` for disconnected code regions and `plannerStrategy: 'model-driven'` for LLM-guided planning.");
-        lines.push("- SF runs Sift warmup with a project-scoped `SIFT_SEARCH_CACHE` under `.sf/runtime/sift/` while leaving model cache shared; " +
-            "if the CLI is missing or fails, continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.");
+        lines.push("- If Sift is slow, empty, or times out, continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.");
+    }
+    else if (detection.status === "warming" && detection.binaryPath) {
+        lines.push(`- Sift: installed at \`${detection.binaryPath}\`; repo-local index warmup is running.`);
+        lines.push(`- Sift cache: project-scoped at \`${detection.searchCache}\`; do not use a shared/global Sift search database for this repo.`);
+        lines.push("- Use grep/find/ls and `.sf/CODEBASE.md` for broad orientation while warmup runs. Use narrow `sift_search` paths if needed; broad root-scope Sift may still be cold.");
+    }
+    else if (detection.status === "degraded" && detection.binaryPath) {
+        lines.push(`- Sift: installed at \`${detection.binaryPath}\` but degraded for this repo: ${detection.reason}.`);
+        lines.push(`- Sift cache: project-scoped at \`${detection.searchCache}\`; do not use a shared/global Sift search database for this repo.`);
+        lines.push("- Do not use broad Sift/codebase_search as the first exploration step. Prefer native `grep`/`find`/`ls`, `.sf/CODEBASE.md`, and narrow `sift_search` only after reducing scope.");
     }
     else {
         lines.push("- Sift: not available. This is optional; continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.");
@@ -537,7 +831,7 @@ export function resolveEffectiveCodebaseIndexerBackendName(projectRoot, prefs, e
     if (prefs?.indexer_backend)
         return prefs.indexer_backend;
     const sift = detectSift(projectRoot, prefs, env);
-    if (sift.status === "configured")
+    if (["configured", "warming", "degraded"].includes(sift.status))
         return "sift";
     return "projectRag";
 }
@@ -606,8 +900,32 @@ export function formatSiftStatus(projectRoot, prefs, env = process.env) {
         lines.push(`Command: ${detection.command}`);
     if (detection.binaryPath)
         lines.push(`Binary: ${detection.binaryPath}`);
+    if (detection.searchCache)
+        lines.push(`Search cache: ${detection.searchCache}`);
+    if (detection.tmpDir)
+        lines.push(`Temp dir: ${detection.tmpDir}`);
+    if (detection.probePath)
+        lines.push(`Health probe scope: ${detection.probePath}`);
+    if (detection.markerPath)
+        lines.push(`Warmup marker: ${detection.markerPath}`);
+    if (detection.cacheInspection?.polluted) {
+        lines.push("Cache integrity: polluted - ignored/generated paths were found in repo-local Sift manifests.");
+        for (const sample of detection.cacheInspection.samples ?? []) {
+            lines.push(`Cache sample (${sample.label}): ${sample.sample}`);
+        }
+        lines.push("Action: remove .sf/runtime/sift/search-cache and warm Sift again from the repo root.");
+    }
+    else if (detection.cacheInspection?.inspected) {
+        lines.push("Cache integrity: ok - no ignored/generated path samples found in inspected manifests.");
+    }
     if (detection.status === "configured" && detection.command) {
-        lines.push(`Operational: ${commandExists(detection.command, env) ? "yes" : "no - configured command is missing"}`);
+        lines.push(`Operational: ${commandExists(detection.command, env) ? "yes - scoped health probe passed" : "no - configured command is missing"}`);
+    }
+    else if (detection.status === "warming" && detection.command) {
+        lines.push("Operational: warming - binary exists and repo-local index warmup is running. Give Sift time on CPU before broad searches.");
+    }
+    else if (detection.status === "degraded" && detection.command) {
+        lines.push("Operational: degraded - binary exists, but the bounded scoped health probe failed. Use narrow paths or fallback search.");
     }
     else {
         lines.push("Operational: no - install rupurt/sift on PATH or set SIFT_PATH.");
diff --git a/src/resources/extensions/sf/prompts/discuss-headless.md b/src/resources/extensions/sf/prompts/discuss-headless.md
index 594470708..37e50f9ef 100644
--- a/src/resources/extensions/sf/prompts/discuss-headless.md
+++ b/src/resources/extensions/sf/prompts/discuss-headless.md
@@ -76,7 +76,7 @@ Before anything else, form a diagnosis: What is the core challenge? What is brok
 - **Measure coverage**: find untested critical paths
 - **Scan for dead code, stubs, and commented-out features** — abandoned attempts are signals
 - **Discover needed skills**: identify repo languages, frameworks, data stores, external services, build tools, and domain-specific competencies. Check installed skills first; record installed, missing, and potentially useful skills in `.sf/CODEBASE.md` and `.sf/PM-STRATEGY.md`.
-- **Use code intelligence**: use `codebase_search` (or Project RAG tools if configured) as your PRIMARY exploration method for conceptual, behavioral, or architectural discovery before manually reading files. Use `sift_search` for agentic multi-turn research or explicit strategy selection. Fall back to `.sf/CODEBASE.md`, in-process `grep`/`find`/`ls`, and `lsp` only for exact matches or structural navigation.
+- **Use code intelligence**: start with `.sf/CODEBASE.md`, in-process `grep`/`find`/`ls`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when the `PROJECT CODE INTELLIGENCE` block says Sift is healthy enough for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use Project RAG tools first for broad retrieval if Project RAG is configured.
 - Use in-process `grep`, `find`, `ls`, and `lsp` before shelling out. Fall back to shell `rg`, `find`, `ast-grep`, or `ls -la` only when the native/in-process tool surface is insufficient.
 
 ### Step 2: Check library and ecosystem facts
diff --git a/src/resources/extensions/sf/prompts/discuss.md b/src/resources/extensions/sf/prompts/discuss.md
index 2ff9f6f04..ed42cb894 100644
--- a/src/resources/extensions/sf/prompts/discuss.md
+++ b/src/resources/extensions/sf/prompts/discuss.md
@@ -34,7 +34,7 @@ After reflection is confirmed, decide the approach based on the actual scope —
 
 Before asking your first question, do a mandatory investigation pass. This is not optional.
 
-1. **Scout the codebase** — use `codebase_search` for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"); use `sift_search` for agentic multi-turn research or explicit strategy selection; use in-process `grep`, `find`, `ls`, and `lsp` for exact identifier matches or structural navigation. Use `scout` for broad unfamiliar areas that need a separate explorer. Understand what already exists, what patterns are established, what constraints current code imposes.
+1. **Scout the codebase** — start with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when the `PROJECT CODE INTELLIGENCE` block says Sift is healthy enough for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use `scout` for broad unfamiliar areas that need a separate explorer. Understand what already exists, what patterns are established, what constraints current code imposes.
 2. **Check library docs — DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) as the default for any GitHub-hosted library or framework the user mentioned. Fall back to `resolve_library` / `get_library_docs` (Context7) for npm/pypi/crates packages DeepWiki doesn't have. **Context7 free tier is capped at 1000 req/month — spend those on cases DeepWiki can't cover.** Get current facts about capabilities, constraints, API shapes, version-specific behavior.
 3. **Web search** — `search-the-web` if the domain is unfamiliar, if you need current best practices, or if the user referenced external services/APIs you need facts about. Use `fetch_page` for full content when snippets aren't enough.
 
diff --git a/src/resources/extensions/sf/prompts/guided-discuss-milestone.md b/src/resources/extensions/sf/prompts/guided-discuss-milestone.md
index 5c3491a6f..cf29a89db 100644
--- a/src/resources/extensions/sf/prompts/guided-discuss-milestone.md
+++ b/src/resources/extensions/sf/prompts/guided-discuss-milestone.md
@@ -15,7 +15,7 @@ Apply `pm-planning` skill thinking throughout: use Working Backwards to anchor o
 ### Before your first question round
 
 Do a lightweight targeted investigation so your questions are grounded in reality:
-- Scout the codebase: use `codebase_search` for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"); use `sift_search` for agentic multi-turn research or explicit strategy selection; use in-process `grep`, `find`, `ls`, and `lsp` for exact identifier matches or structural navigation. Use `scout` for broad unfamiliar areas that need a separate explorer.
+- Scout the codebase: start with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when Sift is healthy for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use `scout` for broad unfamiliar areas that need a separate explorer.
 - If the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, use its MCP search tools for broad concept, symbol, schema, and git-history lookup before manually reading files
 - Check the roadmap context above (if present) to understand what surrounds this milestone
 - **Library docs — DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) for any GitHub-hosted library. Fall back to `resolve_library` / `get_library_docs` (Context7) only when DeepWiki doesn't have it (Context7 is capped at 1000 req/month free tier).
diff --git a/src/resources/extensions/sf/prompts/guided-discuss-slice.md b/src/resources/extensions/sf/prompts/guided-discuss-slice.md
index 369ed97d0..afd57ee8a 100644
--- a/src/resources/extensions/sf/prompts/guided-discuss-slice.md
+++ b/src/resources/extensions/sf/prompts/guided-discuss-slice.md
@@ -11,7 +11,7 @@ Your goal is **not** to center the discussion on tech stack trivia, naming conve
 ### Before your first question round
 
 Do a lightweight targeted investigation so your questions are grounded in reality:
-- Scout the codebase: use `codebase_search` for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"); use `sift_search` for agentic multi-turn research or explicit strategy selection; use in-process `grep`, `find`, `ls`, and `lsp` for exact identifier matches or structural navigation. Use `scout` for broad unfamiliar areas that need a separate explorer.
+- Scout the codebase: start with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when Sift is healthy for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use `scout` for broad unfamiliar areas that need a separate explorer.
 - Check the roadmap context above to understand what surrounds this slice — what comes before, what depends on it
 - **Library docs — DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) for any GitHub-hosted library. Fall back to `resolve_library` / `get_library_docs` (Context7) only when DeepWiki doesn't have it (Context7 is capped at 1000 req/month free tier).
 - Identify the 3–5 biggest behavioural unknowns: things where the user's answer will materially change what gets built
diff --git a/src/resources/extensions/sf/prompts/queue.md b/src/resources/extensions/sf/prompts/queue.md
index e26395eb2..fafb8de8a 100644
--- a/src/resources/extensions/sf/prompts/queue.md
+++ b/src/resources/extensions/sf/prompts/queue.md
@@ -26,7 +26,7 @@ Never fabricate or simulate user input during this discussion. Never generate fa
 
 - Check library docs **DeepWiki first** (`ask_question` / `read_wiki_structure` / `read_wiki_contents`) for any GitHub-hosted library or framework — AI-indexed, no free-tier cap. Fall back to Context7 (`resolve_library` / `get_library_docs`) for npm/pypi/crates packages DeepWiki doesn't cover. Context7 free tier is 1000 req/month — don't spend those on cases DeepWiki covers.
 - Do web searches (`search-the-web`) to verify the landscape — what solutions exist, what's changed recently, what's the current best practice. Use `freshness` for recency-sensitive queries, `domain` to target specific sites. Use `fetch_page` to read the full content of promising URLs when snippets aren't enough. **Budget:** You have a limited number of web searches per turn (typically 3-5). Prefer DeepWiki → Context7 → web search for docs; use `search_and_read` for one-shot topic research. Do NOT repeat the same or similar queries. Distribute searches across turns rather than clustering them.
-- Scout the codebase: use `codebase_search` for conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"); use `sift_search` for agentic multi-turn research or explicit strategy selection; use in-process `grep`, `find`, `ls`, and `lsp` for exact identifier matches or structural navigation. Use `scout` for broad unfamiliar areas that need a separate explorer. Understand what already exists, what patterns are established, what constraints current code imposes.
+- Scout the codebase: start with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp` for broad orientation. Use `codebase_search` or `sift_search` only with a scoped path and only when Sift is healthy for this repo; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. Use `scout` for broad unfamiliar areas that need a separate explorer. Understand what already exists, what patterns are established, what constraints current code imposes.
 
 Don't go deep — just enough that your next question reflects what's actually true rather than what you assume.
 
diff --git a/src/resources/extensions/sf/prompts/system.md b/src/resources/extensions/sf/prompts/system.md
index 64c109804..ec3e525a4 100644
--- a/src/resources/extensions/sf/prompts/system.md
+++ b/src/resources/extensions/sf/prompts/system.md
@@ -161,7 +161,7 @@ Templates showing the expected format for each artifact type are in:
 
 **Code navigation:** Use `lsp` for definition, type_definition, implementation, references, incoming_calls, outgoing_calls, hover, signature, symbols, rename, code_actions, format, and diagnostics. Falls back gracefully if no server is available. Never `grep` for a symbol definition when `lsp` can resolve it semantically. Never shell out to prettier/rustfmt/gofmt when `lsp format` is available. After editing code, use `lsp diagnostics` to verify no type errors were introduced.
 
-**Codebase exploration:** For conceptual, behavioral, or architectural discovery (e.g. "how does X work?", "where is Y handled?"), use `codebase_search` first. Its hybrid BM25+Vector retrieval is significantly more effective than grep for navigating unfamiliar logic. For Sift-specific features — agentic multi-turn search, explicit strategy selection, or planner configuration — use `sift_search`. Strategy guide: `page-index-hybrid` (strongest recall + structural reranking, default), `path-hybrid` (filename/path-heavy queries), `bm25` (fast lexical-only), `vector` (semantic-only). Enable `agent: true` with `agentMode: 'graph'` for deep multi-turn research across disconnected code regions, or `plannerStrategy: 'model-driven'` for LLM-guided planning. Use in-process SF tools like `grep` for exact text matches when you already have a specific identifier, and `find`/`ls` for literal filesystem discovery. Use `lsp` for structural navigation (definitions, references). Use `.sf/CODEBASE.md` for durable orientation. If the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, use its MCP tools for broad hybrid semantic + BM25 code retrieval before manual file-by-file reading. Never read files one-by-one to "explore" — search first, then read what's relevant.
+**Codebase exploration:** Start broad orientation with in-process `grep`, `find`, `ls`, `.sf/CODEBASE.md`, and `lsp`. Use `codebase_search` for conceptual, behavioral, or architectural discovery only after choosing a narrow scope and checking the `PROJECT CODE INTELLIGENCE` block; if Sift is degraded, slow, empty, or timing out, keep using grep/find/ls and direct reads. For Sift-specific features — explicit strategy selection or planner configuration — use `sift_search` with a scoped `path`. Strategy guide: `bm25` (fast lexical), `path-hybrid` (filename/path-heavy queries), `page-index-hybrid` (stronger recall + reranking), `vector` (semantic-only). Each repo uses its own Sift cache under `.sf/runtime/sift/`; do not rely on a shared/global Sift database. Use `lsp` for structural navigation (definitions, references). If the `PROJECT CODE INTELLIGENCE` block says Project RAG is configured, use its MCP tools for broad hybrid semantic + BM25 code retrieval before manual file-by-file reading. Never read files one-by-one to "explore" — search first, then read what's relevant.
 
 **Swarm dispatch:** Let the system decide whether swarming fits before dispatching multiple execution subagents. Use a 2-3 worker same-model swarm only when the work splits into independent shards with explicit file/directory ownership, shard-local verification, low conflict risk, and clear wall-clock savings. Do not swarm shared-interface edits, lockfiles, migrations, single-failure debugging, or sequence-dependent work. The parent agent remains coordinator: assign ownership, synthesize results, inspect dirty files, resolve conflicts, and run final verification.
 
diff --git a/src/resources/extensions/sf/tests/code-intelligence-sift.test.mjs b/src/resources/extensions/sf/tests/code-intelligence-sift.test.mjs
new file mode 100644
index 000000000..b8e7432b2
--- /dev/null
+++ b/src/resources/extensions/sf/tests/code-intelligence-sift.test.mjs
@@ -0,0 +1,133 @@
+import assert from "node:assert/strict";
+import { chmodSync, existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, test } from "vitest";
+
+import {
+	buildSiftEnv,
+	detectSift,
+	ensureSiftRuntimeDirs,
+	resolveSiftSearchScope,
+	resolveSiftWarmupRuntimeDirs,
+	resolveEffectiveCodebaseIndexerBackendName,
+} from "../code-intelligence.js";
+
+const tmpRoots = [];
+
+afterEach(() => {
+	for (const dir of tmpRoots.splice(0)) {
+		rmSync(dir, { recursive: true, force: true });
+	}
+});
+
+function makeProject() {
+	const root = mkdtempSync(join(tmpdir(), "sf-sift-project-"));
+	tmpRoots.push(root);
+	mkdirSync(join(root, "src"), { recursive: true });
+	writeFileSync(join(root, "src", "index.js"), "export const value = 1;\n");
+	return root;
+}
+
+function makeFakeSift(script) {
+	const dir = mkdtempSync(join(tmpdir(), "sf-sift-bin-"));
+	tmpRoots.push(dir);
+	const bin = join(dir, "sift");
+	writeFileSync(bin, script);
+	chmodSync(bin, 0o755);
+	return { dir, bin };
+}
+
+test("buildSiftEnv_uses_project_scoped_cache_from_project_root", () => {
+	const projectRoot = makeProject();
+
+	const env = buildSiftEnv(projectRoot, { PATH: "/usr/bin" });
+
+	assert.equal(env.SIFT_SEARCH_CACHE, join(projectRoot, ".sf", "runtime", "sift", "search-cache"));
+	assert.equal(env.TMPDIR, join(projectRoot, ".sf", "runtime", "sift", "tmp"));
+});
+
+test("resolveSiftSearchScope_normalizes_project_absolute_paths_to_relative", () => {
+	const projectRoot = makeProject();
+
+	assert.equal(resolveSiftSearchScope(projectRoot), ".");
+	assert.equal(resolveSiftSearchScope(projectRoot, projectRoot), ".");
+	assert.equal(resolveSiftSearchScope(projectRoot, join(projectRoot, "src")), "src");
+	assert.equal(resolveSiftSearchScope(projectRoot, "src"), "src");
+});
+
+test("ensureSiftRuntimeDirs_creates_repo_local_cache_directories", () => {
+	const projectRoot = makeProject();
+	const dirs = ensureSiftRuntimeDirs(projectRoot);
+
+	assert.equal(dirs.searchCache, join(projectRoot, ".sf", "runtime", "sift", "search-cache"));
+	assert.ok(existsSync(dirs.searchCache));
+	assert.ok(existsSync(dirs.tmpDir));
+});
+
+test("detectSift_when_probe_times_out_reports_degraded_not_configured", () => {
+	const projectRoot = makeProject();
+	const { bin } = makeFakeSift("#!/bin/sh\nsleep 5\n");
+
+	const result = detectSift(projectRoot, {}, {
+		PATH: process.env.PATH ?? "",
+		SIFT_PATH: bin,
+		SF_SIFT_HEALTH_TIMEOUT_MS: "50",
+	});
+
+	assert.equal(result.status, "degraded");
+	assert.equal(result.binaryPath, bin);
+	assert.match(result.reason, /health probe timed out/i);
+});
+
+test("resolveEffectiveCodebaseIndexerBackendName_when_sift_is_cold_still_selects_sift", () => {
+	const projectRoot = makeProject();
+	const { bin } = makeFakeSift("#!/bin/sh\nsleep 5\n");
+
+	const result = resolveEffectiveCodebaseIndexerBackendName(projectRoot, {}, {
+		PATH: process.env.PATH ?? "",
+		SIFT_PATH: bin,
+		SF_SIFT_HEALTH_TIMEOUT_MS: "50",
+	});
+
+	assert.equal(result, "sift");
+});
+
+test("detectSift_when_probe_succeeds_reports_project_cache", () => {
+	const projectRoot = makeProject();
+	const { bin } = makeFakeSift("#!/bin/sh\nprintf '{\"hits\":[]}\\n'\n");
+
+	const result = detectSift(projectRoot, {}, {
+		PATH: process.env.PATH ?? "",
+		SIFT_PATH: bin,
+		SF_SIFT_HEALTH_TIMEOUT_MS: "1000",
+	});
+	const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
+
+	assert.equal(result.status, "configured");
+	assert.equal(result.binaryPath, bin);
+	assert.equal(result.searchCache, dirs.searchCache);
+	assert.equal(result.tmpDir, dirs.tmpDir);
+});
+
+test("detectSift_when_cache_manifest_contains_ignored_paths_reports_degraded", () => {
+	const projectRoot = makeProject();
+	const { bin } = makeFakeSift("#!/bin/sh\nprintf '{\"hits\":[]}\\n'\n");
+	const dirs = ensureSiftRuntimeDirs(projectRoot);
+	const manifestDir = join(dirs.searchCache, "artifacts", "manifests");
+	mkdirSync(manifestDir, { recursive: true });
+	writeFileSync(
+		join(manifestDir, "bad"),
+		`./src/index.js\n${projectRoot}/.claude/worktrees/agent/src/index.js\n`,
+	);
+
+	const result = detectSift(projectRoot, {}, {
+		PATH: process.env.PATH ?? "",
+		SIFT_PATH: bin,
+		SF_SIFT_HEALTH_TIMEOUT_MS: "1000",
+	});
+
+	assert.equal(result.status, "degraded");
+	assert.match(result.reason, /cache contains ignored\/generated paths/i);
+	assert.equal(result.cacheInspection.polluted, true);
+});
diff --git a/src/resources/extensions/sf/tools/sift-search-tool.js b/src/resources/extensions/sf/tools/sift-search-tool.js
index 25212a891..3554df475 100644
--- a/src/resources/extensions/sf/tools/sift-search-tool.js
+++ b/src/resources/extensions/sf/tools/sift-search-tool.js
@@ -11,7 +11,7 @@
  */
 import { execFile } from "node:child_process";
 import { Type } from "@sinclair/typebox";
-import { resolveSiftBinary } from "../code-intelligence.js";
+import { buildSiftEnv, ensureSiftRuntimeDirs, resolveSiftBinary, resolveSiftSearchScope } from "../code-intelligence.js";
 
 const KNOWN_STRATEGIES = [
 	"hybrid",
@@ -34,7 +34,8 @@ const DEFAULT_TIMEOUT_MS = 60_000;
 /**
  * Build the sift CLI argument list from tool parameters.
  */
-function buildSiftArgs(params) {
+function buildSiftArgs(params, projectRoot = process.cwd()) {
+	const scope = resolveSiftSearchScope(projectRoot, params.path);
 	const args = [
 		"search",
 		"--json",
@@ -57,7 +58,7 @@ function buildSiftArgs(params) {
 	}
 
 	// Path and query are positional
-	args.push(params.path ?? ".", params.query);
+	args.push(scope, params.query);
 	return args;
 }
 
@@ -104,13 +105,16 @@ function parseSiftOutput(rawStdout, rawStderr) {
 /**
  * Execute a sift search with the given parameters.
  */
-function runSift(binaryPath, args, timeoutMs) {
+function runSift(binaryPath, args, timeoutMs, projectRoot) {
 	return new Promise((resolve, reject) => {
+		ensureSiftRuntimeDirs(projectRoot);
 		const child = execFile(
 			binaryPath,
 			args,
 			{
+				cwd: projectRoot,
 				encoding: "utf-8",
+				env: buildSiftEnv(projectRoot, process.env),
 				maxBuffer: 16 * 1024 * 1024,
 				timeout: timeoutMs,
 			},
@@ -154,7 +158,7 @@ export function registerSiftSearchTool(pi) {
 			}),
 			path: Type.Optional(
 				Type.String({
-					description: "Directory or file path to search within. Default: current directory ('.').",
+					description: "Directory or file path to search within. Default: repository root ('.'); absolute paths inside the repo are normalized to repo-relative paths so .siftignore applies.",
 					default: ".",
 				}),
 			),
@@ -236,14 +240,17 @@ export function registerSiftSearchTool(pi) {
 				};
 			}
 
-			const args = buildSiftArgs(params);
+			const projectRoot = process.cwd();
+			const args = buildSiftArgs(params, projectRoot);
+			const scope = args.at(-2) ?? ".";
 			const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS;
 
 			const startedAt = Date.now();
 			try {
-				const { stdout, stderr } = await runSift(binaryPath, args, timeoutMs);
+				const { stdout, stderr } = await runSift(binaryPath, args, timeoutMs, projectRoot);
 				const elapsedMs = Date.now() - startedAt;
 				const result = parseSiftOutput(stdout, stderr);
+				const runtimeDirs = ensureSiftRuntimeDirs(projectRoot);
 
 				// Telemetry: log query outcomes for tuning
 				const { logInfo } = await import("../workflow-logger.js");
@@ -251,14 +258,18 @@ export function registerSiftSearchTool(pi) {
 					query: params.query,
 					strategy: params.strategy ?? DEFAULT_STRATEGY,
 					agent: params.agent ?? false,
+					path: scope,
 					hitCount: result.hits.length,
 					elapsedMs,
 					binary: binaryPath,
+					searchCache: runtimeDirs.searchCache,
 				});
 
 				const lines = [
 					`Sift search: "${params.query}"`,
 					`Strategy: ${params.strategy ?? DEFAULT_STRATEGY}${params.agent ? ` | agent: ${params.agentMode ?? "linear"} | planner: ${params.plannerStrategy ?? "heuristic"}` : ""}`,
+					`Scope: ${scope}`,
+					`Search cache: ${runtimeDirs.searchCache}`,
 					`Hits: ${result.hits.length} | Elapsed: ${elapsedMs}ms`,
 					"",
 				];
@@ -288,6 +299,7 @@ export function registerSiftSearchTool(pi) {
 						elapsedMs,
 						hitCount: result.hits.length,
 						hits: result.hits,
+						searchCache: runtimeDirs.searchCache,
 					},
 				};
 			} catch (err) {
diff --git a/src/resources/extensions/subagent/index.js b/src/resources/extensions/subagent/index.js
index 1aa6fd9d3..56e32f811 100644
--- a/src/resources/extensions/subagent/index.js
+++ b/src/resources/extensions/subagent/index.js
@@ -22,7 +22,7 @@ import { StringEnum } from "@singularity-forge/pi-ai";
 import { getMarkdownTheme, } from "@singularity-forge/pi-coding-agent";
 import { Container, Markdown, Spacer, Text } from "@singularity-forge/pi-tui";
 import { CmuxClient, shellEscape } from "../cmux/index.js";
-import { buildSiftEnv, resolveSiftBinary } from "../sf/code-intelligence.js";
+import { buildSiftEnv, ensureSiftRuntimeDirs, resolveSiftBinary, resolveSiftSearchScope } from "../sf/code-intelligence.js";
 import { loadEffectiveSFPreferences } from "../sf/preferences.js";
 import { formatTokenCount } from "../shared/mod.js";
 import { getCurrentPhase } from "../shared/sf-phase-state.js";
@@ -1789,7 +1789,7 @@ export default function (pi) {
             description: "Natural-language query describing what to explore (e.g. 'find where the write gate tool_call hooks are registered')",
         }),
         scope: Type.Optional(Type.String({
-            description: "Path to search within. Defaults to the current working directory. Use the active worktree for isolation.",
+            description: "Path to search within. Defaults to repository root ('.'); absolute paths inside the repo are normalized to repo-relative paths so .siftignore applies.",
         })),
         strategy: Type.Optional(Type.String({
             description: "Search strategy: 'path-hybrid' (default), 'page-index-hybrid', 'bm25', or 'path'",
@@ -1802,22 +1802,23 @@ export default function (pi) {
         name: "codebase_search",
         label: "Code Search",
         description: [
-            "Perform Sift-backed hybrid (BM25 + Vector) retrieval over a codebase scope.",
-            " Use this as your PRIMARY exploration tool for conceptual, behavioral, or cross-cutting questions",
+            "Perform Sift-backed hybrid (BM25 + Vector) retrieval over a scoped codebase path.",
+            " Use this for conceptual, behavioral, or cross-cutting questions only after choosing a narrow scope",
             " (e.g. 'how is X handled?', 'where is the logic for Y?', 'find examples of Z').",
-            " It is significantly more effective than grep for discovering unfamiliar logic and architecture.",
+            " If Sift status is degraded or the scope is broad, prefer grep/find/ls and retry with a narrower scope.",
         ].join(""),
         promptGuidelines: [
-            "Use codebase_search BEFORE grep when exploring unfamiliar areas or conceptual patterns.",
+            "Use grep/find/ls for broad orientation first, then codebase_search with a specific scope for conceptual patterns.",
             " page-index-hybrid (default): Use for 'How' and 'Why' questions (logic, implementation, reasoning).",
             " path-hybrid: Use for 'Where' questions (architecture, directory structure, file location).",
+            " Keep scope narrow enough to avoid root-level Sift timeouts; each repo uses its own SIFT_SEARCH_CACHE under .sf/runtime/sift/.",
             " Be descriptive in your query: include function names, types, or intent (e.g. 'auth middleware validation').",
             " This tool is read-only and optimized for evidence gathering before you plan or edit.",
         ],
         parameters: CodebaseSearchParams,
         renderCall(args, theme) {
             const query = typeof args.query === "string" ? args.query : "";
-            const scope = typeof args.scope === "string" ? args.scope : process.cwd();
+            const scope = resolveSiftSearchScope(process.cwd(), typeof args.scope === "string" ? args.scope : undefined);
             const strategy = typeof args.strategy === "string" ? args.strategy : "page-index-hybrid";
             const preview = query.length > 90 ? `${query.slice(0, 89).trimEnd()}…` : query;
             const scopeLabel = scope.length > 70
@@ -1860,7 +1861,8 @@ export default function (pi) {
             return new Text(rendered, 0, 0);
         },
         async execute(_toolCallId, params, signal) {
-            const scope = params.scope ?? process.cwd();
+            const projectRoot = process.cwd();
+            const scope = resolveSiftSearchScope(projectRoot, params.scope);
             const strategy = params.strategy ?? "page-index-hybrid";
             const query = params.query;
             const timeoutMs = typeof params.timeoutMs === "number" &&
@@ -1868,14 +1870,33 @@ export default function (pi) {
                 ? Math.max(1_000, params.timeoutMs)
                 : CODEBASE_SEARCH_TIMEOUT_MS;
             const siftBin = resolveSiftBinary();
+            if (!siftBin) {
+                return {
+                    content: [
+                        {
+                            type: "text",
+                            text: "codebase_search unavailable: sift binary not found. Use grep/find/ls or set SIFT_PATH.",
+                        },
+                    ],
+                    details: {
+                        operation: "codebase_search",
+                        exitCode: 127,
+                        query,
+                        scope,
+                        strategy,
+                        timeoutMs,
+                    },
+                };
+            }
             const args = buildCodebaseSearchArgs(strategy, query, scope);
             const stderr = [];
             const stdout = [];
             let wasAborted = false;
             let timedOut = false;
-            const childEnv = buildSiftEnv(scope, process.env);
+            const runtimeDirs = ensureSiftRuntimeDirs(projectRoot);
+            const childEnv = buildSiftEnv(projectRoot, process.env);
             const proc = spawn(siftBin, args, {
-                cwd: scope,
+                cwd: projectRoot,
                 env: childEnv,
                 shell: false,
                 stdio: ["ignore", "pipe", "pipe"],
@@ -1951,6 +1972,7 @@ export default function (pi) {
                         scope,
                         strategy,
                         timeoutMs,
+                        searchCache: runtimeDirs.searchCache,
                     },
                 };
             }
@@ -1977,6 +1999,7 @@ export default function (pi) {
                         scope,
                         strategy,
                         timeoutMs,
+                        searchCache: runtimeDirs.searchCache,
                     },
                 };
             }
@@ -1995,6 +2018,7 @@ export default function (pi) {
                     exitCode,
                     siftBin,
                     timeoutMs,
+                    searchCache: runtimeDirs.searchCache,
                 },
             };
         },