From 32362a83bc43e112b965c442e5bf294276f53717 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Fri, 15 May 2026 11:23:54 +0200 Subject: [PATCH] feat(sift): add --verbose flag and vector-index progress logging Adds three improvements to sift diagnostics: 1. --verbose flag: When SF_SIFT_LOG_LEVEL=debug|trace, sift search calls now include --verbose for richer stderr output from the Rust binary. Applied to sift_search, codebase_search, and warmup paths. 2. Vector-index progress poller: During searches that include the 'vector' retriever, a 30-second interval polls the global sift cache (~/.cache/sift/search/artifacts/indexes/*/sectors/) and writes progress lines to the log file: [2026-05-15T11:00:00Z] vector-index progress: 32 sectors (80 MB total) This lets an operator tail the log during long cold-cache embedding builds instead of staring at a silent process. 3. estimateVectorIndexProgress / countVectorSectors helpers count sector files across all index directories and report total count + size. Tests: 179 files / 1858 tests pass. Type check: clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/resources/extensions/sf/subagent/index.js | 71 +++++++++++++++++ .../extensions/sf/tools/sift-search-tool.js | 76 ++++++++++++++++++- 2 files changed, 145 insertions(+), 2 deletions(-) diff --git a/src/resources/extensions/sf/subagent/index.js b/src/resources/extensions/sf/subagent/index.js index 516be2d87..9abfabf37 100644 --- a/src/resources/extensions/sf/subagent/index.js +++ b/src/resources/extensions/sf/subagent/index.js @@ -65,6 +65,47 @@ const COLLAPSED_ITEM_COUNT = 10; */ const CODEBASE_SEARCH_TIMEOUT_MS = 600_000; const liveSubagentProcesses = new Set(); + +/** + * Count vector-index sector files in the global sift cache. + * Returns { sectorCount, cacheSizeMb }. + */ +function countVectorSectors() { + try { + const globalCache = path.join( + os.homedir(), + ".cache", + "sift", + "search", + "artifacts", + "indexes", + ); + if (!fs.statSync(globalCache, { throwIfNoEntry: false })?.isDirectory()) { + return { sectorCount: 0, cacheSizeMb: 0 }; + } + let totalSectors = 0; + let totalBytes = 0; + for (const entry of fs.readdirSync(globalCache)) { + const sectorDir = path.join(globalCache, entry, "sectors"); + try { + const files = fs.readdirSync(sectorDir); + totalSectors += files.length; + for (const f of files) { + const s = fs.statSync(path.join(sectorDir, f), { throwIfNoEntry: false }); + if (s) totalBytes += s.size; + } + } catch { + // skip unreadable dirs + } + } + return { + sectorCount: totalSectors, + cacheSizeMb: Math.round(totalBytes / (1024 * 1024)), + }; + } catch { + return { sectorCount: 0, cacheSizeMb: 0 }; + } +} const liveSubagentControllers = new Set(); const AGENT_ALIASES = { default: "worker", @@ -180,6 +221,13 @@ function buildCodebaseSearchArgs(strategy, query, scope, projectRoot) { query, scope, ]; + // --verbose gives more progress info from the Rust binary when the + // operator explicitly asked for debug-level sift logging. + const siftLogLevel = (process.env.SF_SIFT_LOG_LEVEL ?? "info").toLowerCase(); + if (siftLogLevel === "debug" || siftLogLevel === "trace") { + args.push("--verbose"); + } + return args; } function formatUsageStats(usage, model) { const parts = []; @@ -2644,6 +2692,27 @@ export default function (pi) { ); fs.writeFileSync(logPath, "", "utf-8"); } + + // If vector retrievers are in play, start a progress poller that + // writes index-build state into the log file every 30 seconds. + const usesVector = args.includes("vector"); + let progressTimer = null; + let lastSectorCount = 0; + if (usesVector && logPath) { + lastSectorCount = countVectorSectors(); + progressTimer = setInterval(() => { + const { sectorCount, cacheSizeMb } = countVectorSectors(); + const delta = sectorCount - lastSectorCount; + lastSectorCount = sectorCount; + const line = `[${new Date().toISOString()}] vector-index progress: ${sectorCount} sectors (${cacheSizeMb} MB total)${delta > 0 ? ` (+${delta} since last check)` : ""}\n`; + try { + fs.appendFileSync(logPath, line); + } catch { + // ignore + } + }, 30_000); + } + const childEnv = { ...buildSiftEnv(projectRoot, process.env), ...logEnv }; const proc = spawn(siftBin, args, { cwd: projectRoot, @@ -2694,12 +2763,14 @@ export default function (pi) { const exitCode = await new Promise((resolve) => { proc.on("close", (code) => { clearTimeout(timeout); + if (progressTimer) clearInterval(progressTimer); liveSubagentProcesses.delete(proc); if (signal) signal.removeEventListener("abort", killProc); resolve(code ?? 0); }); proc.on("error", () => { clearTimeout(timeout); + if (progressTimer) clearInterval(progressTimer); liveSubagentProcesses.delete(proc); if (signal) signal.removeEventListener("abort", killProc); resolve(1); diff --git a/src/resources/extensions/sf/tools/sift-search-tool.js b/src/resources/extensions/sf/tools/sift-search-tool.js index 98d4ddac0..8dc910425 100644 --- a/src/resources/extensions/sf/tools/sift-search-tool.js +++ b/src/resources/extensions/sf/tools/sift-search-tool.js @@ -10,8 +10,8 @@ * Consumer: executing agents that need Sift's advanced retrieval modes. */ import { execFile } from "node:child_process"; -import { mkdirSync, writeFileSync } from "node:fs"; -import { dirname } from "node:path"; +import { mkdirSync, readdirSync, statSync, writeFileSync } from "node:fs"; +import { dirname, join } from "node:path"; import { Type } from "@sinclair/typebox"; import { buildSiftEnv, @@ -69,6 +69,13 @@ function buildSiftArgs(params, projectRoot = process.cwd()) { args.push("--retrievers", String(effectiveRetrievers)); args.push("--reranking", String(effectiveReranking)); + // --verbose gives more progress info from the Rust binary when the + // operator explicitly asked for debug-level sift logging. + const siftLogLevel = (process.env.SF_SIFT_LOG_LEVEL ?? "info").toLowerCase(); + if (siftLogLevel === "debug" || siftLogLevel === "trace") { + args.push("--verbose"); + } + if (params.agent === true) { args.push("--agent"); if (params.agentMode) { @@ -127,6 +134,47 @@ function parseSiftOutput(rawStdout, rawStderr) { /** * Execute a sift search with the given parameters. */ +/** + * Count vector-index sector files in the global sift cache to estimate + * indexing progress. Returns { sectorCount, cacheSizeMb }. + */ +function estimateVectorIndexProgress(projectRoot) { + try { + const globalCache = join( + process.env.HOME ?? "/tmp", + ".cache", + "sift", + "search", + "artifacts", + "indexes", + ); + if (!statSync(globalCache, { throwIfNoEntry: false })?.isDirectory()) { + return { sectorCount: 0, cacheSizeMb: 0 }; + } + let totalSectors = 0; + let totalBytes = 0; + for (const entry of readdirSync(globalCache)) { + const sectorDir = join(globalCache, entry, "sectors"); + try { + const files = readdirSync(sectorDir); + totalSectors += files.length; + for (const f of files) { + const s = statSync(join(sectorDir, f), { throwIfNoEntry: false }); + if (s) totalBytes += s.size; + } + } catch { + // skip unreadable dirs + } + } + return { + sectorCount: totalSectors, + cacheSizeMb: Math.round(totalBytes / (1024 * 1024)), + }; + } catch { + return { sectorCount: 0, cacheSizeMb: 0 }; + } +} + function runSift(binaryPath, args, timeoutMs, projectRoot, logEnv = {}, logPath = null) { return new Promise((resolve, reject) => { ensureSiftRuntimeDirs(projectRoot); @@ -134,6 +182,29 @@ function runSift(binaryPath, args, timeoutMs, projectRoot, logEnv = {}, logPath mkdirSync(dirname(logPath), { recursive: true }); writeFileSync(logPath, "", "utf-8"); } + + // If vector retrievers are in play, start a progress poller that + // writes index-build state into the log file every 30 seconds so + // an operator can tail it during long cold-cache runs. + const usesVector = args.includes("vector"); + let progressTimer = null; + if (usesVector && logPath) { + const baseline = estimateVectorIndexProgress(projectRoot); + let lastSectors = baseline.sectorCount; + progressTimer = setInterval(() => { + const { sectorCount, cacheSizeMb } = + estimateVectorIndexProgress(projectRoot); + const delta = sectorCount - lastSectors; + lastSectors = sectorCount; + const line = `[${new Date().toISOString()}] vector-index progress: ${sectorCount} sectors (${cacheSizeMb} MB total)${delta > 0 ? ` (+${delta} since last check)` : ""}\n`; + try { + writeFileSync(logPath, line, { encoding: "utf-8", flag: "a" }); + } catch { + // ignore + } + }, 30_000); + } + const _child = execFile( binaryPath, args, @@ -145,6 +216,7 @@ function runSift(binaryPath, args, timeoutMs, projectRoot, logEnv = {}, logPath timeout: timeoutMs, }, (error, stdout, stderr) => { + if (progressTimer) clearInterval(progressTimer); if (logPath && stderr) { try { writeFileSync(logPath, stderr, { encoding: "utf-8", flag: "a" });