feat(sift): add --verbose flag and vector-index progress logging
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions
Adds three improvements to sift diagnostics:
1. --verbose flag: When SF_SIFT_LOG_LEVEL=debug|trace, sift search
calls now include --verbose for richer stderr output from the Rust
binary. Applied to sift_search, codebase_search, and warmup paths.
2. Vector-index progress poller: During searches that include the
'vector' retriever, a 30-second interval polls the global sift cache
(~/.cache/sift/search/artifacts/indexes/*/sectors/) and writes
progress lines to the log file:
[2026-05-15T11:00:00Z] vector-index progress: 32 sectors (80 MB total)
This lets an operator tail the log during long cold-cache embedding
builds instead of staring at a silent process.
3. estimateVectorIndexProgress / countVectorSectors helpers count sector
files across all index directories and report total count + size.
Tests: 179 files / 1858 tests pass.
Type check: clean.
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
9b42404149
commit
32362a83bc
2 changed files with 145 additions and 2 deletions
|
|
@ -65,6 +65,47 @@ const COLLAPSED_ITEM_COUNT = 10;
|
|||
*/
|
||||
const CODEBASE_SEARCH_TIMEOUT_MS = 600_000;
|
||||
const liveSubagentProcesses = new Set();
|
||||
|
||||
/**
|
||||
* Count vector-index sector files in the global sift cache.
|
||||
* Returns { sectorCount, cacheSizeMb }.
|
||||
*/
|
||||
function countVectorSectors() {
|
||||
try {
|
||||
const globalCache = path.join(
|
||||
os.homedir(),
|
||||
".cache",
|
||||
"sift",
|
||||
"search",
|
||||
"artifacts",
|
||||
"indexes",
|
||||
);
|
||||
if (!fs.statSync(globalCache, { throwIfNoEntry: false })?.isDirectory()) {
|
||||
return { sectorCount: 0, cacheSizeMb: 0 };
|
||||
}
|
||||
let totalSectors = 0;
|
||||
let totalBytes = 0;
|
||||
for (const entry of fs.readdirSync(globalCache)) {
|
||||
const sectorDir = path.join(globalCache, entry, "sectors");
|
||||
try {
|
||||
const files = fs.readdirSync(sectorDir);
|
||||
totalSectors += files.length;
|
||||
for (const f of files) {
|
||||
const s = fs.statSync(path.join(sectorDir, f), { throwIfNoEntry: false });
|
||||
if (s) totalBytes += s.size;
|
||||
}
|
||||
} catch {
|
||||
// skip unreadable dirs
|
||||
}
|
||||
}
|
||||
return {
|
||||
sectorCount: totalSectors,
|
||||
cacheSizeMb: Math.round(totalBytes / (1024 * 1024)),
|
||||
};
|
||||
} catch {
|
||||
return { sectorCount: 0, cacheSizeMb: 0 };
|
||||
}
|
||||
}
|
||||
const liveSubagentControllers = new Set();
|
||||
const AGENT_ALIASES = {
|
||||
default: "worker",
|
||||
|
|
@ -180,6 +221,13 @@ function buildCodebaseSearchArgs(strategy, query, scope, projectRoot) {
|
|||
query,
|
||||
scope,
|
||||
];
|
||||
// --verbose gives more progress info from the Rust binary when the
|
||||
// operator explicitly asked for debug-level sift logging.
|
||||
const siftLogLevel = (process.env.SF_SIFT_LOG_LEVEL ?? "info").toLowerCase();
|
||||
if (siftLogLevel === "debug" || siftLogLevel === "trace") {
|
||||
args.push("--verbose");
|
||||
}
|
||||
return args;
|
||||
}
|
||||
function formatUsageStats(usage, model) {
|
||||
const parts = [];
|
||||
|
|
@ -2644,6 +2692,27 @@ export default function (pi) {
|
|||
);
|
||||
fs.writeFileSync(logPath, "", "utf-8");
|
||||
}
|
||||
|
||||
// If vector retrievers are in play, start a progress poller that
|
||||
// writes index-build state into the log file every 30 seconds.
|
||||
const usesVector = args.includes("vector");
|
||||
let progressTimer = null;
|
||||
let lastSectorCount = 0;
|
||||
if (usesVector && logPath) {
|
||||
lastSectorCount = countVectorSectors();
|
||||
progressTimer = setInterval(() => {
|
||||
const { sectorCount, cacheSizeMb } = countVectorSectors();
|
||||
const delta = sectorCount - lastSectorCount;
|
||||
lastSectorCount = sectorCount;
|
||||
const line = `[${new Date().toISOString()}] vector-index progress: ${sectorCount} sectors (${cacheSizeMb} MB total)${delta > 0 ? ` (+${delta} since last check)` : ""}\n`;
|
||||
try {
|
||||
fs.appendFileSync(logPath, line);
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}, 30_000);
|
||||
}
|
||||
|
||||
const childEnv = { ...buildSiftEnv(projectRoot, process.env), ...logEnv };
|
||||
const proc = spawn(siftBin, args, {
|
||||
cwd: projectRoot,
|
||||
|
|
@ -2694,12 +2763,14 @@ export default function (pi) {
|
|||
const exitCode = await new Promise((resolve) => {
|
||||
proc.on("close", (code) => {
|
||||
clearTimeout(timeout);
|
||||
if (progressTimer) clearInterval(progressTimer);
|
||||
liveSubagentProcesses.delete(proc);
|
||||
if (signal) signal.removeEventListener("abort", killProc);
|
||||
resolve(code ?? 0);
|
||||
});
|
||||
proc.on("error", () => {
|
||||
clearTimeout(timeout);
|
||||
if (progressTimer) clearInterval(progressTimer);
|
||||
liveSubagentProcesses.delete(proc);
|
||||
if (signal) signal.removeEventListener("abort", killProc);
|
||||
resolve(1);
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@
|
|||
* Consumer: executing agents that need Sift's advanced retrieval modes.
|
||||
*/
|
||||
import { execFile } from "node:child_process";
|
||||
import { mkdirSync, writeFileSync } from "node:fs";
|
||||
import { dirname } from "node:path";
|
||||
import { mkdirSync, readdirSync, statSync, writeFileSync } from "node:fs";
|
||||
import { dirname, join } from "node:path";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import {
|
||||
buildSiftEnv,
|
||||
|
|
@ -69,6 +69,13 @@ function buildSiftArgs(params, projectRoot = process.cwd()) {
|
|||
args.push("--retrievers", String(effectiveRetrievers));
|
||||
args.push("--reranking", String(effectiveReranking));
|
||||
|
||||
// --verbose gives more progress info from the Rust binary when the
|
||||
// operator explicitly asked for debug-level sift logging.
|
||||
const siftLogLevel = (process.env.SF_SIFT_LOG_LEVEL ?? "info").toLowerCase();
|
||||
if (siftLogLevel === "debug" || siftLogLevel === "trace") {
|
||||
args.push("--verbose");
|
||||
}
|
||||
|
||||
if (params.agent === true) {
|
||||
args.push("--agent");
|
||||
if (params.agentMode) {
|
||||
|
|
@ -127,6 +134,47 @@ function parseSiftOutput(rawStdout, rawStderr) {
|
|||
/**
|
||||
* Execute a sift search with the given parameters.
|
||||
*/
|
||||
/**
|
||||
* Count vector-index sector files in the global sift cache to estimate
|
||||
* indexing progress. Returns { sectorCount, cacheSizeMb }.
|
||||
*/
|
||||
function estimateVectorIndexProgress(projectRoot) {
|
||||
try {
|
||||
const globalCache = join(
|
||||
process.env.HOME ?? "/tmp",
|
||||
".cache",
|
||||
"sift",
|
||||
"search",
|
||||
"artifacts",
|
||||
"indexes",
|
||||
);
|
||||
if (!statSync(globalCache, { throwIfNoEntry: false })?.isDirectory()) {
|
||||
return { sectorCount: 0, cacheSizeMb: 0 };
|
||||
}
|
||||
let totalSectors = 0;
|
||||
let totalBytes = 0;
|
||||
for (const entry of readdirSync(globalCache)) {
|
||||
const sectorDir = join(globalCache, entry, "sectors");
|
||||
try {
|
||||
const files = readdirSync(sectorDir);
|
||||
totalSectors += files.length;
|
||||
for (const f of files) {
|
||||
const s = statSync(join(sectorDir, f), { throwIfNoEntry: false });
|
||||
if (s) totalBytes += s.size;
|
||||
}
|
||||
} catch {
|
||||
// skip unreadable dirs
|
||||
}
|
||||
}
|
||||
return {
|
||||
sectorCount: totalSectors,
|
||||
cacheSizeMb: Math.round(totalBytes / (1024 * 1024)),
|
||||
};
|
||||
} catch {
|
||||
return { sectorCount: 0, cacheSizeMb: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
function runSift(binaryPath, args, timeoutMs, projectRoot, logEnv = {}, logPath = null) {
|
||||
return new Promise((resolve, reject) => {
|
||||
ensureSiftRuntimeDirs(projectRoot);
|
||||
|
|
@ -134,6 +182,29 @@ function runSift(binaryPath, args, timeoutMs, projectRoot, logEnv = {}, logPath
|
|||
mkdirSync(dirname(logPath), { recursive: true });
|
||||
writeFileSync(logPath, "", "utf-8");
|
||||
}
|
||||
|
||||
// If vector retrievers are in play, start a progress poller that
|
||||
// writes index-build state into the log file every 30 seconds so
|
||||
// an operator can tail it during long cold-cache runs.
|
||||
const usesVector = args.includes("vector");
|
||||
let progressTimer = null;
|
||||
if (usesVector && logPath) {
|
||||
const baseline = estimateVectorIndexProgress(projectRoot);
|
||||
let lastSectors = baseline.sectorCount;
|
||||
progressTimer = setInterval(() => {
|
||||
const { sectorCount, cacheSizeMb } =
|
||||
estimateVectorIndexProgress(projectRoot);
|
||||
const delta = sectorCount - lastSectors;
|
||||
lastSectors = sectorCount;
|
||||
const line = `[${new Date().toISOString()}] vector-index progress: ${sectorCount} sectors (${cacheSizeMb} MB total)${delta > 0 ? ` (+${delta} since last check)` : ""}\n`;
|
||||
try {
|
||||
writeFileSync(logPath, line, { encoding: "utf-8", flag: "a" });
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}, 30_000);
|
||||
}
|
||||
|
||||
const _child = execFile(
|
||||
binaryPath,
|
||||
args,
|
||||
|
|
@ -145,6 +216,7 @@ function runSift(binaryPath, args, timeoutMs, projectRoot, logEnv = {}, logPath
|
|||
timeout: timeoutMs,
|
||||
},
|
||||
(error, stdout, stderr) => {
|
||||
if (progressTimer) clearInterval(progressTimer);
|
||||
if (logPath && stderr) {
|
||||
try {
|
||||
writeFileSync(logPath, stderr, { encoding: "utf-8", flag: "a" });
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue