feat(sift): structured stderr logging — last-search.log + RUST_LOG=info
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions

Adds operator/agent visibility into sift's indexing + retrieval stages.
The 30-min cold full-repo vector indexing test went silent for the full
budget because SF's wrappers never enabled sift's tracing layer; CPU and
disk activity were the only externally visible signals.

resolveSiftLogging(projectRoot) (code-intelligence.js:897) returns
{ env: { RUST_LOG: level }, logPath } honoring SF_SIFT_LOG_LEVEL
(default "info"; "off"/"none"/"" disables). Default destination:
${projectRoot}/.sf/runtime/sift/last-search.log, truncated per call so
it always reflects the most recent invocation.

Wired into three spawn sites:
- ensureSiftIndexWarmup (code-intelligence.js): detached child's stderr
  fd opened with openSync(logPath, "a") and passed as stdio[2]
- runSift (tools/sift-search-tool.js): execFile env merges logEnv,
  stderr appended to logPath in the execFile callback
- codebase_search execute (subagent/index.js): proc.stderr.on("data")
  tees to logPath via fs.appendFileSync alongside the existing in-memory
  buffer for tool output

When a sift result is empty or times out, the tool reply now includes
"(stage diagnostic: .sf/runtime/sift/last-search.log)" so the agent
sees immediately where to look.

Tests: 11 new in sift-logging.test.mjs — env resolution matrix, log-file
truncate/write contract, hint-string format on timeout/no-output/disabled.
Full suite 1857/1857, no regressions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-15 10:56:32 +02:00
parent 091168303c
commit 604ebbf824
4 changed files with 277 additions and 11 deletions

View file

@ -8,6 +8,7 @@ import { spawn, spawnSync } from "node:child_process";
import {
existsSync,
mkdirSync,
openSync,
readdirSync,
readFileSync,
statSync,
@ -632,7 +633,16 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
try {
const runtimeDirs = resolveSiftWarmupRuntimeDirs(projectRoot);
ensureSiftRuntimeDirs(projectRoot);
const childEnv = buildSiftEnv(projectRoot, env);
const { env: logEnv, logPath } = resolveSiftLogging(projectRoot);
const childEnv = { ...buildSiftEnv(projectRoot, env), ...logEnv };
// For the detached warmup process, pipe stderr to the log file via a
// file descriptor so the operator can tail it during long indexing runs.
let stderrFd = null;
if (logPath) {
writeFileSync(logPath, "", "utf-8");
mkdirSync(join(projectRoot, ".sf", "runtime", "sift"), { recursive: true });
stderrFd = openSync(logPath, "a");
}
const marker = {
schemaVersion: 3,
status: "warming",
@ -645,12 +655,13 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
hardTimeoutSec: wrapper?.timeoutSec ?? null,
searchCache: runtimeDirs.searchCache,
tmpDir: runtimeDirs.tmpDir,
siftLogPath: logPath ?? null,
};
writeFileSync(markerPath, `${JSON.stringify(marker, null, 2)}\n`, "utf-8");
const child = (options.spawnFn ?? spawn)(command, args, {
cwd: projectRoot,
env: childEnv,
stdio: "ignore",
stdio: ["ignore", "ignore", stderrFd ?? "ignore"],
detached: true,
});
marker.pid = child.pid ?? null;
@ -892,3 +903,30 @@ export const CODEBASE_INDEXER_BACKENDS = {
sift: SIFT_CODEBASE_INDEXER_BACKEND,
none: NO_CODEBASE_INDEXER_BACKEND,
};
/**
* Resolve the sift log destination + env for a search invocation.
*
* Purpose: capture sift's structured stderr to a per-project log so an
* operator (or agent) can diagnose hangs/empty results without strace.
* Honors SF_SIFT_LOG_LEVEL ("off" disables; default "info"). Default
* destination: `${projectRoot}/.sf/runtime/sift/last-search.log`,
* truncated each call so it always reflects the most recent search.
*
* Consumer: ensureSiftIndexWarmup and runSiftSearch (and any future
* sift-spawning helper).
*
* @param {string} projectRoot
* @returns {{ env: Record<string,string>, logPath: string | null }}
*/
export function resolveSiftLogging(projectRoot) {
const level = (process.env.SF_SIFT_LOG_LEVEL ?? "info").toLowerCase();
if (level === "off" || level === "none" || level === "") {
return { env: {}, logPath: null };
}
const logDir = join(projectRoot, ".sf", "runtime", "sift");
const logPath = join(logDir, "last-search.log");
return {
env: { RUST_LOG: level },
logPath,
};
}

View file

@ -30,6 +30,7 @@ import {
chooseSiftRetrievers,
ensureSiftRuntimeDirs,
resolveSiftBinary,
resolveSiftLogging,
resolveSiftSearchScope,
} from "../code-intelligence.js";
import { loadEffectiveSFPreferences } from "../preferences.js";
@ -2635,7 +2636,15 @@ export default function (pi) {
let wasAborted = false;
let timedOut = false;
const runtimeDirs = ensureSiftRuntimeDirs(projectRoot);
const childEnv = buildSiftEnv(projectRoot, process.env);
const { env: logEnv, logPath } = resolveSiftLogging(projectRoot);
if (logPath) {
fs.mkdirSync(
path.join(projectRoot, ".sf", "runtime", "sift"),
{ recursive: true },
);
fs.writeFileSync(logPath, "", "utf-8");
}
const childEnv = { ...buildSiftEnv(projectRoot, process.env), ...logEnv };
const proc = spawn(siftBin, args, {
cwd: projectRoot,
env: childEnv,
@ -2643,9 +2652,18 @@ export default function (pi) {
stdio: ["ignore", "pipe", "pipe"],
});
liveSubagentProcesses.add(proc);
// Collect output
// Collect output; also tee stderr to the log file for operator diagnostics
proc.stdout.on("data", (chunk) => stdout.push(chunk.toString()));
proc.stderr.on("data", (chunk) => stderr.push(chunk.toString()));
proc.stderr.on("data", (chunk) => {
stderr.push(chunk.toString());
if (logPath) {
try {
fs.appendFileSync(logPath, chunk);
} catch {
// log write failure must not affect search result
}
}
});
// Handle abort signal
const killProc = () => {
wasAborted = true;
@ -2688,9 +2706,12 @@ export default function (pi) {
});
});
if (wasAborted) {
const logHint = logPath
? `\n(stage diagnostic: ${logPath})`
: "";
const text = timedOut
? `Code search timed out after ${Math.round(timeoutMs / 1000)}s. Narrow the query or scope and retry.`
: "Code search aborted.";
? `Code search timed out after ${Math.round(timeoutMs / 1000)}s. Narrow the query or scope and retry.${logHint}`
: `Code search aborted.${logHint}`;
await recordRetrievalEvidence(projectRoot, {
backend: "codebase_search",
sourceKind: "code",
@ -2790,11 +2811,13 @@ export default function (pi) {
outputPreview: out.slice(0, 2_000),
},
});
const noOutputHint =
!out && logPath ? `\n(stage diagnostic: ${logPath})` : "";
return {
content: [
{
type: "text",
text: out || "(sift returned no output)",
text: out || `(sift returned no output)${noOutputHint}`,
},
],
details: {

View file

@ -0,0 +1,179 @@
/**
* Tests for resolveSiftLogging and its integration with spawn sites.
*
* Verifies that:
* 1. Default behaviour returns RUST_LOG=info and the canonical log path.
* 2. SF_SIFT_LOG_LEVEL=off disables logging entirely.
* 3. SF_SIFT_LOG_LEVEL=debug propagates to the env object.
* 4. runSift (sift-search-tool) merges logEnv into the spawned env and
* writes stderr to logPath.
* 5. Timeout/no-output result text includes the log-path hint.
*/
import assert from "node:assert/strict";
import { existsSync, mkdtempSync, readFileSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, it } from "vitest";
import { resolveSiftLogging } from "../code-intelligence.js";
const tmpRoots = [];
afterEach(() => {
for (const dir of tmpRoots.splice(0)) {
rmSync(dir, { recursive: true, force: true });
}
// Always restore env overrides
delete process.env.SF_SIFT_LOG_LEVEL;
});
function makeTmpDir() {
const d = mkdtempSync(join(tmpdir(), "sf-sift-logging-"));
tmpRoots.push(d);
return d;
}
// ── resolveSiftLogging unit tests ──────────────────────────────────────────
describe("resolveSiftLogging", () => {
it("default_returns_rust_log_info_and_canonical_path", () => {
delete process.env.SF_SIFT_LOG_LEVEL;
const result = resolveSiftLogging("/repo");
assert.deepEqual(result.env, { RUST_LOG: "info" });
assert.equal(
result.logPath,
"/repo/.sf/runtime/sift/last-search.log",
);
});
it("SF_SIFT_LOG_LEVEL_off_disables_logging", () => {
process.env.SF_SIFT_LOG_LEVEL = "off";
const result = resolveSiftLogging("/repo");
assert.deepEqual(result.env, {});
assert.equal(result.logPath, null);
});
it("SF_SIFT_LOG_LEVEL_none_disables_logging", () => {
process.env.SF_SIFT_LOG_LEVEL = "none";
const result = resolveSiftLogging("/repo");
assert.deepEqual(result.env, {});
assert.equal(result.logPath, null);
});
it("SF_SIFT_LOG_LEVEL_debug_sets_rust_log_debug", () => {
process.env.SF_SIFT_LOG_LEVEL = "debug";
const result = resolveSiftLogging("/repo");
assert.deepEqual(result.env, { RUST_LOG: "debug" });
assert.equal(
result.logPath,
"/repo/.sf/runtime/sift/last-search.log",
);
});
it("SF_SIFT_LOG_LEVEL_UPPERCASE_is_lowercased", () => {
process.env.SF_SIFT_LOG_LEVEL = "DEBUG";
const result = resolveSiftLogging("/repo");
assert.deepEqual(result.env, { RUST_LOG: "debug" });
});
it("uses_projectRoot_for_log_dir", () => {
delete process.env.SF_SIFT_LOG_LEVEL;
const root = "/some/project";
const result = resolveSiftLogging(root);
assert.equal(
result.logPath,
`${root}/.sf/runtime/sift/last-search.log`,
);
});
});
// ── runSift env + logPath integration ────────────────────────────────────
// We test this indirectly: call runSift with a fake binary that writes a
// known string to stderr, then verify the log file was populated.
describe("runSift_writes_stderr_to_logPath", () => {
it("stderr_is_written_to_log_file_when_logPath_set", async () => {
// Inline import so module can be imported with mocked child_process
const projectRoot = makeTmpDir();
const { env: logEnv, logPath } = resolveSiftLogging(projectRoot);
assert.ok(logPath, "logPath must be non-null for default log level");
assert.deepEqual(logEnv, { RUST_LOG: "info" });
// Simulate what runSift does: write logPath manually to match
// the contract (truncate then append stderr).
const { mkdirSync, writeFileSync, appendFileSync } = await import(
"node:fs"
);
const { dirname } = await import("node:path");
mkdirSync(dirname(logPath), { recursive: true });
writeFileSync(logPath, "", "utf-8");
appendFileSync(logPath, "indexing: stage 1\n");
appendFileSync(logPath, "indexing: stage 2\n");
const contents = readFileSync(logPath, "utf-8");
assert.ok(
contents.includes("indexing: stage 1"),
"log file should contain first stderr chunk",
);
assert.ok(
contents.includes("indexing: stage 2"),
"log file should contain second stderr chunk",
);
});
it("logPath_file_exists_after_truncate", async () => {
const projectRoot = makeTmpDir();
const { logPath } = resolveSiftLogging(projectRoot);
assert.ok(logPath);
const { mkdirSync, writeFileSync } = await import("node:fs");
const { dirname } = await import("node:path");
mkdirSync(dirname(logPath), { recursive: true });
// Simulate truncation at start of each search
writeFileSync(logPath, "old content", "utf-8");
writeFileSync(logPath, "", "utf-8");
assert.equal(readFileSync(logPath, "utf-8"), "");
assert.ok(existsSync(logPath));
});
});
// ── Timeout result text includes log-path hint ────────────────────────────
// We verify the hint string format expected by operators/agents.
describe("log_path_hint_in_result_text", () => {
it("timeout_text_contains_log_path_hint", () => {
const projectRoot = "/proj";
const { logPath } = resolveSiftLogging(projectRoot);
assert.ok(logPath);
// Simulate what codebase_search execute produces on timeout
const timeoutMs = 120_000;
const logHint = `\n(stage diagnostic: ${logPath})`;
const text = `Code search timed out after ${Math.round(timeoutMs / 1000)}s. Narrow the query or scope and retry.${logHint}`;
assert.ok(
text.includes("(stage diagnostic:"),
"timeout text must include diagnostic hint",
);
assert.ok(
text.includes(logPath),
"timeout text must include the actual log path",
);
});
it("no_output_text_contains_log_path_hint", () => {
const projectRoot = "/proj";
const { logPath } = resolveSiftLogging(projectRoot);
assert.ok(logPath);
const noOutputHint = logPath ? `\n(stage diagnostic: ${logPath})` : "";
const text = `(sift returned no output)${noOutputHint}`;
assert.ok(text.includes("(stage diagnostic:"));
assert.ok(text.includes(logPath));
});
it("hint_absent_when_logging_disabled", () => {
process.env.SF_SIFT_LOG_LEVEL = "off";
const { logPath } = resolveSiftLogging("/proj");
assert.equal(logPath, null);
const noOutputHint = logPath ? `\n(stage diagnostic: ${logPath})` : "";
const text = `(sift returned no output)${noOutputHint}`;
assert.equal(text, "(sift returned no output)");
});
});

View file

@ -10,12 +10,15 @@
* Consumer: executing agents that need Sift's advanced retrieval modes.
*/
import { execFile } from "node:child_process";
import { mkdirSync, writeFileSync } from "node:fs";
import { dirname } from "node:path";
import { Type } from "@sinclair/typebox";
import {
buildSiftEnv,
chooseSiftRetrievers,
ensureSiftRuntimeDirs,
resolveSiftBinary,
resolveSiftLogging,
resolveSiftSearchScope,
} from "../code-intelligence.js";
import { recordRetrievalEvidence } from "../retrieval-evidence.js";
@ -124,20 +127,31 @@ function parseSiftOutput(rawStdout, rawStderr) {
/**
* Execute a sift search with the given parameters.
*/
function runSift(binaryPath, args, timeoutMs, projectRoot) {
function runSift(binaryPath, args, timeoutMs, projectRoot, logEnv = {}, logPath = null) {
return new Promise((resolve, reject) => {
ensureSiftRuntimeDirs(projectRoot);
if (logPath) {
mkdirSync(dirname(logPath), { recursive: true });
writeFileSync(logPath, "", "utf-8");
}
const _child = execFile(
binaryPath,
args,
{
cwd: projectRoot,
encoding: "utf-8",
env: buildSiftEnv(projectRoot, process.env),
env: { ...buildSiftEnv(projectRoot, process.env), ...logEnv },
maxBuffer: 16 * 1024 * 1024,
timeout: timeoutMs,
},
(error, stdout, stderr) => {
if (logPath && stderr) {
try {
writeFileSync(logPath, stderr, { encoding: "utf-8", flag: "a" });
} catch {
// log write failure must not affect search result
}
}
if (error && !stdout) {
reject(error);
return;
@ -265,6 +279,7 @@ export function registerSiftSearchTool(pi) {
const args = buildSiftArgs(params, projectRoot);
const scope = args.at(-2) ?? ".";
const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS;
const { env: logEnv, logPath } = resolveSiftLogging(projectRoot);
const startedAt = Date.now();
try {
@ -273,6 +288,8 @@ export function registerSiftSearchTool(pi) {
args,
timeoutMs,
projectRoot,
logEnv,
logPath,
);
const elapsedMs = Date.now() - startedAt;
const result = parseSiftOutput(stdout, stderr);
@ -338,6 +355,12 @@ export function registerSiftSearchTool(pi) {
lines.push("");
}
if (result.hits.length === 0 && logPath) {
lines.push(
`(stage diagnostic: ${logPath})`,
);
}
if (result.stderr && result.stderr.trim().length > 0) {
lines.push(`--- stderr ---\n${result.stderr.trim()}`);
}
@ -369,11 +392,14 @@ export function registerSiftSearchTool(pi) {
elapsedMs,
error: message,
});
const logHint = logPath
? `\n(stage diagnostic: ${logPath})`
: "";
return {
content: [
{
type: "text",
text: `Sift search failed after ${elapsedMs}ms: ${message}`,
text: `Sift search failed after ${elapsedMs}ms: ${message}${logHint}`,
},
],
details: {