fix: reconcile sift warmup observability

This commit is contained in:
Mikael Hugo 2026-05-06 06:22:09 +02:00
parent 76b218762b
commit a95e2947df
5 changed files with 251 additions and 3 deletions

View file

@ -49,6 +49,7 @@ export interface HeadlessHeartbeatContext {
eventDelta?: number;
toolCallDelta?: number;
openToolCount?: number;
openToolDetails?: string[];
activeUnit?: string;
activeModel?: string;
lastEventType?: string;
@ -549,7 +550,9 @@ export function formatHeadlessHeartbeat(ctx: HeadlessHeartbeatContext): string {
activityParts.length > 0 ? `; activity=${activityParts.join(", ")}` : "";
const openTools =
ctx.openToolCount && ctx.openToolCount > 0
? `; openTools=${ctx.openToolCount}`
? ctx.openToolDetails?.length
? `; openTools=${ctx.openToolCount}[${ctx.openToolDetails.join(", ")}]`
: `; openTools=${ctx.openToolCount}`
: "";
const unit = ctx.activeUnit ? `; unit=${ctx.activeUnit}` : "";
const model = ctx.activeModel ? `; model=${ctx.activeModel}` : "";

View file

@ -70,6 +70,7 @@ import {
formatThinkingStart,
handleExtensionUIRequest,
startSupervisedStdinReader,
summarizeToolArgs,
} from "./headless-ui.js";
import { getProjectSessionsDir } from "./project-sessions.js";
import {
@ -814,6 +815,10 @@ async function runHeadlessOnce(
// Verbose text-mode state
const toolStartTimes = new Map<string, number>();
const openToolInfoByCallId = new Map<
string,
{ toolName: string; args: unknown; startedAt: number }
>();
let lastCostData:
| { costUsd: number; inputTokens: number; outputTokens: number }
| undefined;
@ -1036,6 +1041,29 @@ async function runHeadlessOnce(
}
}
function formatOpenToolAge(ms: number): string {
const seconds = Math.max(0, Math.floor(ms / 1000));
const minutes = Math.floor(seconds / 60);
const remainder = seconds % 60;
if (minutes > 0) return `${minutes}m${remainder}s`;
return `${remainder}s`;
}
function summarizeOpenHeadlessTools(now: number): string[] {
const entries = [...openToolInfoByCallId.values()];
const shown = entries.slice(0, 3).map((info) => {
const name = info.toolName || "unknown";
const args = summarizeToolArgs(info.toolName, info.args);
const argPart = args
? `:${args.length > 48 ? `${args.slice(0, 45)}...` : args}`
: "";
return `${name}${argPart} ${formatOpenToolAge(now - info.startedAt)}`;
});
const hidden = entries.length - shown.length;
if (hidden > 0) shown.push(`+${hidden} more`);
return shown;
}
// Client started flag — replaces old stdinWriter null-check
let clientStarted = false;
// Adapter for AnswerInjector — wraps client.sendUIResponse in a writeToStdin-compatible callback
@ -1137,6 +1165,7 @@ async function runHeadlessOnce(
eventDelta: totalEvents - lastHeartbeatEventCount,
toolCallDelta: toolCallCount - lastHeartbeatToolCallCount,
openToolCount: toolStartTimes.size,
openToolDetails: summarizeOpenHeadlessTools(now),
activeUnit: activeHeadlessUnit,
activeModel: activeHeadlessModel,
lastEventType: lastEvent?.type,
@ -1157,6 +1186,13 @@ async function runHeadlessOnce(
if (eventType === "tool_execution_start") {
const toolCallId = String(eventObj.toolCallId ?? eventObj.id ?? "");
const toolName = String(eventObj.toolName ?? "");
if (toolCallId) {
openToolInfoByCallId.set(toolCallId, {
toolName,
args: eventObj.args,
startedAt: Date.now(),
});
}
if (toolCallId && isInteractiveHeadlessTool(toolName)) {
interactiveToolCallIds.add(toolCallId);
}
@ -1168,6 +1204,16 @@ async function runHeadlessOnce(
if (traceActive && toolCallId && toolName) {
handleToolStart(toolName, toolCallId);
}
} else if (eventType === "tool_execution_update") {
const toolCallId = String(eventObj.toolCallId ?? eventObj.id ?? "");
if (toolCallId) {
const existing = openToolInfoByCallId.get(toolCallId);
openToolInfoByCallId.set(toolCallId, {
toolName: String(eventObj.toolName ?? existing?.toolName ?? ""),
args: eventObj.args ?? existing?.args,
startedAt: existing?.startedAt ?? Date.now(),
});
}
} else if (eventType === "tool_execution_end") {
const toolCallId = String(eventObj.toolCallId ?? eventObj.id ?? "");
if (toolCallId) {
@ -1175,6 +1221,7 @@ async function runHeadlessOnce(
lastInteractiveToolEndTime = Date.now();
}
interactiveToolCallIds.delete(toolCallId);
openToolInfoByCallId.delete(toolCallId);
}
// Close the tool span if tracing is active
if (traceActive && toolCallId) {

View file

@ -287,6 +287,60 @@ function inspectSiftCache(projectRoot) {
samples,
};
}
function inspectSiftWarmupArtifacts(projectRoot) {
const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
const artifactsRoot = join(dirs.searchCache, "artifacts");
const artifactSampleLimit = 512;
const files = listFilesCapped(artifactsRoot, artifactSampleLimit);
let latestArtifactAt = null;
let totalBytes = 0;
for (const file of files) {
try {
const stat = statSync(file);
totalBytes += stat.size;
const mtime = new Date(stat.mtimeMs).toISOString();
if (!latestArtifactAt || mtime > latestArtifactAt) {
latestArtifactAt = mtime;
}
} catch {
// Best-effort observability only; marker reconciliation must not fail
// because a cache file changed while we were inspecting it.
}
}
return {
artifactCount: files.length,
artifactCountCapped: files.length >= artifactSampleLimit,
artifactSampleLimit,
latestArtifactAt,
cacheBytes: totalBytes,
};
}
function finalizeSiftWarmupMarker(projectRoot, markerPath, parsed, reason) {
const artifacts = inspectSiftWarmupArtifacts(projectRoot);
const status = artifacts.artifactCount > 0 ? "completed" : "stale";
const reconciled = {
...parsed,
schemaVersion: 3,
status,
finishedAt: new Date().toISOString(),
terminalReason: reason,
artifactCount: artifacts.artifactCount,
artifactCountCapped: artifacts.artifactCountCapped,
artifactSampleLimit: artifacts.artifactSampleLimit,
latestArtifactAt: artifacts.latestArtifactAt,
cacheBytes: artifacts.cacheBytes,
};
try {
writeFileSync(
markerPath,
`${JSON.stringify(reconciled, null, 2)}\n`,
"utf-8",
);
} catch {
return null;
}
return { ...reconciled, markerPath };
}
export function detectSift(projectRoot, prefs, env = process.env) {
if (prefs?.indexer_backend === "none") {
return {
@ -403,7 +457,15 @@ function readSiftWarmupMarker(projectRoot) {
const parsed = JSON.parse(readFileSync(markerPath, "utf-8"));
if (parsed.schemaVersion !== 3) return null;
if (parsed.status !== "warming") return null;
if (parsed.pid && !isProcessAlive(parsed.pid)) return null;
if (parsed.pid && !isProcessAlive(parsed.pid)) {
finalizeSiftWarmupMarker(
projectRoot,
markerPath,
parsed,
`warmup pid ${parsed.pid} is no longer running`,
);
return null;
}
const started = Date.parse(parsed.startedAt);
const hardTimeoutSec = Number(
parsed.hardTimeoutSec ?? DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC,
@ -411,7 +473,24 @@ function readSiftWarmupMarker(projectRoot) {
const expiresAt =
started +
Math.max(60, hardTimeoutSec + SIFT_WARMUP_KILL_GRACE_SEC) * 1000;
if (!Number.isFinite(started) || Date.now() > expiresAt) return null;
if (!Number.isFinite(started)) {
finalizeSiftWarmupMarker(
projectRoot,
markerPath,
parsed,
"warmup marker has invalid startedAt",
);
return null;
}
if (Date.now() > expiresAt) {
finalizeSiftWarmupMarker(
projectRoot,
markerPath,
parsed,
"warmup marker exceeded hard timeout window",
);
return null;
}
return { ...parsed, markerPath };
} catch {
return null;

View file

@ -4,6 +4,7 @@ import {
existsSync,
mkdirSync,
mkdtempSync,
readFileSync,
rmSync,
writeFileSync,
} from "node:fs";
@ -138,6 +139,111 @@ test("detectSift_when_probe_succeeds_reports_project_cache", () => {
assert.equal(result.tmpDir, dirs.tmpDir);
});
test("detectSift_when_warmup_pid_is_dead_marks_marker_completed_when_cache_exists", () => {
const projectRoot = makeProject();
const { bin } = makeFakeSift("#!/bin/sh\nprintf '{\"hits\":[]}\\n'\n");
const dirs = ensureSiftRuntimeDirs(projectRoot);
const artifactDir = join(dirs.searchCache, "artifacts", "indexes", "repo");
mkdirSync(artifactDir, { recursive: true });
writeFileSync(join(artifactDir, "index.bin"), "indexed");
const markerPath = join(
projectRoot,
".sf",
"runtime",
"sift-index-warmup.json",
);
writeFileSync(
markerPath,
JSON.stringify(
{
schemaVersion: 3,
status: "warming",
startedAt: new Date().toISOString(),
command: bin,
cwd: projectRoot,
args: ["search", "."],
scope: ".",
siftBinary: bin,
hardTimeoutSec: 600,
searchCache: dirs.searchCache,
tmpDir: dirs.tmpDir,
pid: 99999999,
},
null,
2,
),
);
const result = detectSift(
projectRoot,
{},
{
PATH: process.env.PATH ?? "",
SIFT_PATH: bin,
SF_SIFT_HEALTH_TIMEOUT_MS: "1000",
},
);
const marker = JSON.parse(readFileSync(markerPath, "utf-8"));
assert.equal(result.status, "configured");
assert.equal(marker.status, "completed");
assert.match(marker.terminalReason, /pid 99999999 is no longer running/);
assert.equal(marker.artifactCount, 1);
assert.equal(marker.artifactCountCapped, false);
assert.equal(typeof marker.finishedAt, "string");
assert.equal(typeof marker.latestArtifactAt, "string");
});
test("detectSift_when_warmup_pid_is_dead_marks_marker_stale_without_cache", () => {
const projectRoot = makeProject();
const { bin } = makeFakeSift("#!/bin/sh\nprintf '{\"hits\":[]}\\n'\n");
const dirs = ensureSiftRuntimeDirs(projectRoot);
const markerPath = join(
projectRoot,
".sf",
"runtime",
"sift-index-warmup.json",
);
writeFileSync(
markerPath,
JSON.stringify(
{
schemaVersion: 3,
status: "warming",
startedAt: new Date().toISOString(),
command: bin,
cwd: projectRoot,
args: ["search", "."],
scope: ".",
siftBinary: bin,
hardTimeoutSec: 600,
searchCache: dirs.searchCache,
tmpDir: dirs.tmpDir,
pid: 99999999,
},
null,
2,
),
);
detectSift(
projectRoot,
{},
{
PATH: process.env.PATH ?? "",
SIFT_PATH: bin,
SF_SIFT_HEALTH_TIMEOUT_MS: "1000",
},
);
const marker = JSON.parse(readFileSync(markerPath, "utf-8"));
assert.equal(marker.status, "stale");
assert.match(marker.terminalReason, /pid 99999999 is no longer running/);
assert.equal(marker.artifactCount, 0);
assert.equal(marker.artifactCountCapped, false);
assert.equal(marker.latestArtifactAt, null);
});
test("detectSift_when_cache_manifest_contains_ignored_paths_reports_degraded", () => {
const projectRoot = makeProject();
const { bin } = makeFakeSift("#!/bin/sh\nprintf '{\"hits\":[]}\\n'\n");

View file

@ -56,6 +56,19 @@ describe("formatProgress", () => {
assert.ok(result.includes("activity=no new events"));
assert.ok(!result.includes("openTools="));
});
it("shows active tool identity when a quiet wait has an open tool", () => {
const result = formatHeadlessHeartbeat({
elapsedMs: 300_000,
quietMs: 180_000,
totalEvents: 10,
toolCallCount: 2,
openToolCount: 1,
openToolDetails: ["subagent:review M010 2m4s"],
});
assert.ok(result.includes("openTools=1[subagent:review M010 2m4s]"));
});
});
describe("tool_execution_start", () => {