fix(sf): cap sift warmup with timeout(1) wall-clock wrapper

Orphaned sift warmups can spin past --retriever-timeout-ms (a per-page
timeout, not wall-clock) and burn CPU indefinitely after the launcher
exits — observed a 95-min, 98% CPU orphan. Wrap the detached spawn in
timeout(1) / gtimeout when present (SIGTERM at the cap, SIGKILL 10s
later); fall back to raw spawn elsewhere. Default cap 1800s, override
via SF_SIFT_HARD_TIMEOUT_SEC, disable via SF_SIFT_HARD_TIMEOUT_DISABLE=1.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-02 08:29:02 +02:00
parent f5ea1cb6c0
commit f4dd66d4ed
2 changed files with 156 additions and 6 deletions

View file

@ -131,6 +131,7 @@ interface SiftIndexWarmupOptions {
query?: string;
limit?: number;
retrieverTimeoutMs?: number;
hardTimeoutSec?: number;
spawnFn?: typeof spawn;
now?: number;
}
@ -140,6 +141,8 @@ const DEFAULT_SIFT_WARMUP_QUERY =
"repo architecture source tests entrypoints configuration";
const DEFAULT_SIFT_WARMUP_LIMIT = 1;
const DEFAULT_SIFT_WARMUP_RETRIEVER_TIMEOUT_MS = 30_000;
const DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC = 1800;
const SIFT_WARMUP_KILL_GRACE_SEC = 10;
function readJsonConfig(configPath: string): McpConfigFile {
if (!existsSync(configPath)) return {};
@ -271,6 +274,56 @@ function lookupExecutable(
return null;
}
interface ResolvedHardTimeout {
binary: string;
wrapperArgs: string[];
timeoutSec: number;
}
function resolveSiftWarmupHardTimeoutSec(
env: NodeJS.ProcessEnv,
override?: number,
): number | null {
if (env.SF_SIFT_HARD_TIMEOUT_DISABLE === "1") return null;
if (override !== undefined) {
return Number.isFinite(override) && override > 0
? Math.floor(override)
: null;
}
const raw = env.SF_SIFT_HARD_TIMEOUT_SEC?.trim();
if (raw) {
const parsed = Number.parseInt(raw, 10);
if (parsed === 0) return null;
if (Number.isFinite(parsed) && parsed > 0) return parsed;
}
return DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC;
}
function resolveSiftWarmupTimeoutWrapper(
env: NodeJS.ProcessEnv,
timeoutSec: number,
): ResolvedHardTimeout | null {
if (process.platform === "win32") return null;
const candidates =
process.platform === "darwin"
? ["gtimeout", "timeout"]
: ["timeout", "gtimeout"];
for (const candidate of candidates) {
const binary = lookupExecutable(candidate, env);
if (binary) {
return {
binary,
wrapperArgs: [
`--kill-after=${SIFT_WARMUP_KILL_GRACE_SEC}`,
String(timeoutSec),
],
timeoutSec,
};
}
}
return null;
}
export function resolveProjectRagBinary(
env: NodeJS.ProcessEnv = process.env,
): string | null {
@ -401,7 +454,7 @@ export function ensureSiftIndexWarmup(
};
}
const args = [
const siftArgs = [
"search",
"--json",
"--strategy",
@ -417,6 +470,24 @@ export function ensureSiftIndexWarmup(
options.query ?? DEFAULT_SIFT_WARMUP_QUERY,
];
const hardTimeoutSec = resolveSiftWarmupHardTimeoutSec(
env,
options.hardTimeoutSec,
);
const wrapper =
hardTimeoutSec !== null
? resolveSiftWarmupTimeoutWrapper(env, hardTimeoutSec)
: null;
const command = wrapper ? wrapper.binary : detection.binaryPath;
const args = wrapper
? [...wrapper.wrapperArgs, detection.binaryPath, ...siftArgs]
: siftArgs;
const startedReason = wrapper
? `sift page-index-hybrid warmup started (hard cap ${wrapper.timeoutSec}s via ${wrapper.binary})`
: hardTimeoutSec === null
? "sift page-index-hybrid warmup started (hard cap disabled)"
: "sift page-index-hybrid warmup started (no timeout(1)/gtimeout on PATH; running unbounded)";
try {
mkdirSync(join(projectRoot, ".sf", "runtime"), { recursive: true });
writeFileSync(
@ -425,9 +496,11 @@ export function ensureSiftIndexWarmup(
{
schemaVersion: 2,
startedAt: new Date(now).toISOString(),
command: detection.binaryPath,
command,
cwd: projectRoot,
args,
siftBinary: detection.binaryPath,
hardTimeoutSec: wrapper?.timeoutSec ?? null,
},
null,
2,
@ -435,7 +508,7 @@ export function ensureSiftIndexWarmup(
"utf-8",
);
const child = (options.spawnFn ?? spawn)(detection.binaryPath, args, {
const child = (options.spawnFn ?? spawn)(command, args, {
cwd: projectRoot,
env,
stdio: "ignore",
@ -444,8 +517,8 @@ export function ensureSiftIndexWarmup(
child.unref();
return {
status: "started",
reason: "sift page-index-hybrid warmup started",
command: detection.binaryPath,
reason: startedReason,
command,
args,
markerPath,
};
@ -453,7 +526,7 @@ export function ensureSiftIndexWarmup(
return {
status: "error",
reason: err instanceof Error ? err.message : String(err),
command: detection.binaryPath,
command,
args,
markerPath,
};

View file

@ -447,6 +447,83 @@ test("ensureSiftIndexWarmup skips recent marker and explicit non-sift backends",
}
});
test("ensureSiftIndexWarmup wraps sift with timeout(1) when available", () => {
const projectRoot = makeProject();
try {
const fakeSift = writeFakeSiftBinary(projectRoot);
const fakeTimeout = join(projectRoot, "bin", "timeout");
writeFileSync(fakeTimeout, "", "utf-8");
const calls: Array<{ command: string; args: string[] }> = [];
const fakeSpawn = ((command: string, args: string[]) => {
calls.push({ command, args });
return { unref() {} };
}) as unknown as typeof import("node:child_process").spawn;
const result = ensureSiftIndexWarmup(projectRoot, undefined, {
env: { PATH: join(projectRoot, "bin") },
spawnFn: fakeSpawn,
hardTimeoutSec: 42,
force: true,
now: Date.parse("2026-05-02T12:00:00.000Z"),
});
assert.equal(result.status, "started");
assert.equal(calls.length, 1);
assert.equal(calls[0].command, fakeTimeout);
assert.deepEqual(calls[0].args.slice(0, 3), [
"--kill-after=10",
"42",
fakeSift,
]);
assert.equal(calls[0].args[3], "search");
assert.match(result.reason, /hard cap 42s/);
const marker = JSON.parse(
readFileSync(
join(projectRoot, ".sf", "runtime", "sift-index-warmup.json"),
"utf-8",
),
);
assert.equal(marker.command, fakeTimeout);
assert.equal(marker.siftBinary, fakeSift);
assert.equal(marker.hardTimeoutSec, 42);
} finally {
cleanup(projectRoot);
}
});
test("ensureSiftIndexWarmup honors SF_SIFT_HARD_TIMEOUT_DISABLE", () => {
const projectRoot = makeProject();
try {
const fakeSift = writeFakeSiftBinary(projectRoot);
writeFileSync(join(projectRoot, "bin", "timeout"), "", "utf-8");
const calls: Array<{ command: string; args: string[] }> = [];
const fakeSpawn = ((command: string, args: string[]) => {
calls.push({ command, args });
return { unref() {} };
}) as unknown as typeof import("node:child_process").spawn;
const result = ensureSiftIndexWarmup(projectRoot, undefined, {
env: {
PATH: join(projectRoot, "bin"),
SF_SIFT_HARD_TIMEOUT_DISABLE: "1",
},
spawnFn: fakeSpawn,
force: true,
now: Date.parse("2026-05-02T12:00:00.000Z"),
});
assert.equal(result.status, "started");
assert.equal(calls.length, 1);
assert.equal(calls[0].command, fakeSift);
assert.equal(calls[0].args[0], "search");
assert.match(result.reason, /hard cap disabled/);
} finally {
cleanup(projectRoot);
}
});
test("ensureSiftIndexWarmup ignores stale absolute-path warmup markers", () => {
const projectRoot = makeProject();
try {