feat(sift): add --verbose flag and vector-index progress logging
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions

Adds three improvements to sift diagnostics:

1. --verbose flag: When SF_SIFT_LOG_LEVEL=debug|trace, sift search
   calls now include --verbose for richer stderr output from the Rust
   binary. Applied to sift_search, codebase_search, and warmup paths.

2. Vector-index progress poller: During searches that include the
   'vector' retriever, a 30-second interval polls the global sift cache
   (~/.cache/sift/search/artifacts/indexes/*/sectors/) and writes
   progress lines to the log file:
     [2026-05-15T11:00:00Z] vector-index progress: 32 sectors (80 MB total)
   This lets an operator tail the log during long cold-cache embedding
   builds instead of staring at a silent process.

3. estimateVectorIndexProgress / countVectorSectors helpers count sector
   files across all index directories and report total count + size.

Tests: 179 files / 1858 tests pass.
Type check: clean.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Mikael Hugo 2026-05-15 11:23:54 +02:00
parent 9b42404149
commit 32362a83bc
2 changed files with 145 additions and 2 deletions

View file

@ -65,6 +65,47 @@ const COLLAPSED_ITEM_COUNT = 10;
*/
const CODEBASE_SEARCH_TIMEOUT_MS = 600_000;
const liveSubagentProcesses = new Set();
/**
* Count vector-index sector files in the global sift cache.
* Returns { sectorCount, cacheSizeMb }.
*/
function countVectorSectors() {
try {
const globalCache = path.join(
os.homedir(),
".cache",
"sift",
"search",
"artifacts",
"indexes",
);
if (!fs.statSync(globalCache, { throwIfNoEntry: false })?.isDirectory()) {
return { sectorCount: 0, cacheSizeMb: 0 };
}
let totalSectors = 0;
let totalBytes = 0;
for (const entry of fs.readdirSync(globalCache)) {
const sectorDir = path.join(globalCache, entry, "sectors");
try {
const files = fs.readdirSync(sectorDir);
totalSectors += files.length;
for (const f of files) {
const s = fs.statSync(path.join(sectorDir, f), { throwIfNoEntry: false });
if (s) totalBytes += s.size;
}
} catch {
// skip unreadable dirs
}
}
return {
sectorCount: totalSectors,
cacheSizeMb: Math.round(totalBytes / (1024 * 1024)),
};
} catch {
return { sectorCount: 0, cacheSizeMb: 0 };
}
}
const liveSubagentControllers = new Set();
const AGENT_ALIASES = {
default: "worker",
@ -180,6 +221,13 @@ function buildCodebaseSearchArgs(strategy, query, scope, projectRoot) {
query,
scope,
];
// --verbose gives more progress info from the Rust binary when the
// operator explicitly asked for debug-level sift logging.
const siftLogLevel = (process.env.SF_SIFT_LOG_LEVEL ?? "info").toLowerCase();
if (siftLogLevel === "debug" || siftLogLevel === "trace") {
args.push("--verbose");
}
return args;
}
function formatUsageStats(usage, model) {
const parts = [];
@ -2644,6 +2692,27 @@ export default function (pi) {
);
fs.writeFileSync(logPath, "", "utf-8");
}
// If vector retrievers are in play, start a progress poller that
// writes index-build state into the log file every 30 seconds.
const usesVector = args.includes("vector");
let progressTimer = null;
let lastSectorCount = 0;
if (usesVector && logPath) {
lastSectorCount = countVectorSectors();
progressTimer = setInterval(() => {
const { sectorCount, cacheSizeMb } = countVectorSectors();
const delta = sectorCount - lastSectorCount;
lastSectorCount = sectorCount;
const line = `[${new Date().toISOString()}] vector-index progress: ${sectorCount} sectors (${cacheSizeMb} MB total)${delta > 0 ? ` (+${delta} since last check)` : ""}\n`;
try {
fs.appendFileSync(logPath, line);
} catch {
// ignore
}
}, 30_000);
}
const childEnv = { ...buildSiftEnv(projectRoot, process.env), ...logEnv };
const proc = spawn(siftBin, args, {
cwd: projectRoot,
@ -2694,12 +2763,14 @@ export default function (pi) {
const exitCode = await new Promise((resolve) => {
proc.on("close", (code) => {
clearTimeout(timeout);
if (progressTimer) clearInterval(progressTimer);
liveSubagentProcesses.delete(proc);
if (signal) signal.removeEventListener("abort", killProc);
resolve(code ?? 0);
});
proc.on("error", () => {
clearTimeout(timeout);
if (progressTimer) clearInterval(progressTimer);
liveSubagentProcesses.delete(proc);
if (signal) signal.removeEventListener("abort", killProc);
resolve(1);

View file

@ -10,8 +10,8 @@
* Consumer: executing agents that need Sift's advanced retrieval modes.
*/
import { execFile } from "node:child_process";
import { mkdirSync, writeFileSync } from "node:fs";
import { dirname } from "node:path";
import { mkdirSync, readdirSync, statSync, writeFileSync } from "node:fs";
import { dirname, join } from "node:path";
import { Type } from "@sinclair/typebox";
import {
buildSiftEnv,
@ -69,6 +69,13 @@ function buildSiftArgs(params, projectRoot = process.cwd()) {
args.push("--retrievers", String(effectiveRetrievers));
args.push("--reranking", String(effectiveReranking));
// --verbose gives more progress info from the Rust binary when the
// operator explicitly asked for debug-level sift logging.
const siftLogLevel = (process.env.SF_SIFT_LOG_LEVEL ?? "info").toLowerCase();
if (siftLogLevel === "debug" || siftLogLevel === "trace") {
args.push("--verbose");
}
if (params.agent === true) {
args.push("--agent");
if (params.agentMode) {
@ -127,6 +134,47 @@ function parseSiftOutput(rawStdout, rawStderr) {
/**
* Execute a sift search with the given parameters.
*/
/**
* Count vector-index sector files in the global sift cache to estimate
* indexing progress. Returns { sectorCount, cacheSizeMb }.
*/
function estimateVectorIndexProgress(projectRoot) {
try {
const globalCache = join(
process.env.HOME ?? "/tmp",
".cache",
"sift",
"search",
"artifacts",
"indexes",
);
if (!statSync(globalCache, { throwIfNoEntry: false })?.isDirectory()) {
return { sectorCount: 0, cacheSizeMb: 0 };
}
let totalSectors = 0;
let totalBytes = 0;
for (const entry of readdirSync(globalCache)) {
const sectorDir = join(globalCache, entry, "sectors");
try {
const files = readdirSync(sectorDir);
totalSectors += files.length;
for (const f of files) {
const s = statSync(join(sectorDir, f), { throwIfNoEntry: false });
if (s) totalBytes += s.size;
}
} catch {
// skip unreadable dirs
}
}
return {
sectorCount: totalSectors,
cacheSizeMb: Math.round(totalBytes / (1024 * 1024)),
};
} catch {
return { sectorCount: 0, cacheSizeMb: 0 };
}
}
function runSift(binaryPath, args, timeoutMs, projectRoot, logEnv = {}, logPath = null) {
return new Promise((resolve, reject) => {
ensureSiftRuntimeDirs(projectRoot);
@ -134,6 +182,29 @@ function runSift(binaryPath, args, timeoutMs, projectRoot, logEnv = {}, logPath
mkdirSync(dirname(logPath), { recursive: true });
writeFileSync(logPath, "", "utf-8");
}
// If vector retrievers are in play, start a progress poller that
// writes index-build state into the log file every 30 seconds so
// an operator can tail it during long cold-cache runs.
const usesVector = args.includes("vector");
let progressTimer = null;
if (usesVector && logPath) {
const baseline = estimateVectorIndexProgress(projectRoot);
let lastSectors = baseline.sectorCount;
progressTimer = setInterval(() => {
const { sectorCount, cacheSizeMb } =
estimateVectorIndexProgress(projectRoot);
const delta = sectorCount - lastSectors;
lastSectors = sectorCount;
const line = `[${new Date().toISOString()}] vector-index progress: ${sectorCount} sectors (${cacheSizeMb} MB total)${delta > 0 ? ` (+${delta} since last check)` : ""}\n`;
try {
writeFileSync(logPath, line, { encoding: "utf-8", flag: "a" });
} catch {
// ignore
}
}, 30_000);
}
const _child = execFile(
binaryPath,
args,
@ -145,6 +216,7 @@ function runSift(binaryPath, args, timeoutMs, projectRoot, logEnv = {}, logPath
timeout: timeoutMs,
},
(error, stdout, stderr) => {
if (progressTimer) clearInterval(progressTimer);
if (logPath && stderr) {
try {
writeFileSync(logPath, stderr, { encoding: "utf-8", flag: "a" });