fix: reconcile sift warmup observability
This commit is contained in:
parent
76b218762b
commit
a95e2947df
5 changed files with 251 additions and 3 deletions
|
|
@ -49,6 +49,7 @@ export interface HeadlessHeartbeatContext {
|
|||
eventDelta?: number;
|
||||
toolCallDelta?: number;
|
||||
openToolCount?: number;
|
||||
openToolDetails?: string[];
|
||||
activeUnit?: string;
|
||||
activeModel?: string;
|
||||
lastEventType?: string;
|
||||
|
|
@ -549,7 +550,9 @@ export function formatHeadlessHeartbeat(ctx: HeadlessHeartbeatContext): string {
|
|||
activityParts.length > 0 ? `; activity=${activityParts.join(", ")}` : "";
|
||||
const openTools =
|
||||
ctx.openToolCount && ctx.openToolCount > 0
|
||||
? `; openTools=${ctx.openToolCount}`
|
||||
? ctx.openToolDetails?.length
|
||||
? `; openTools=${ctx.openToolCount}[${ctx.openToolDetails.join(", ")}]`
|
||||
: `; openTools=${ctx.openToolCount}`
|
||||
: "";
|
||||
const unit = ctx.activeUnit ? `; unit=${ctx.activeUnit}` : "";
|
||||
const model = ctx.activeModel ? `; model=${ctx.activeModel}` : "";
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ import {
|
|||
formatThinkingStart,
|
||||
handleExtensionUIRequest,
|
||||
startSupervisedStdinReader,
|
||||
summarizeToolArgs,
|
||||
} from "./headless-ui.js";
|
||||
import { getProjectSessionsDir } from "./project-sessions.js";
|
||||
import {
|
||||
|
|
@ -814,6 +815,10 @@ async function runHeadlessOnce(
|
|||
|
||||
// Verbose text-mode state
|
||||
const toolStartTimes = new Map<string, number>();
|
||||
const openToolInfoByCallId = new Map<
|
||||
string,
|
||||
{ toolName: string; args: unknown; startedAt: number }
|
||||
>();
|
||||
let lastCostData:
|
||||
| { costUsd: number; inputTokens: number; outputTokens: number }
|
||||
| undefined;
|
||||
|
|
@ -1036,6 +1041,29 @@ async function runHeadlessOnce(
|
|||
}
|
||||
}
|
||||
|
||||
function formatOpenToolAge(ms: number): string {
|
||||
const seconds = Math.max(0, Math.floor(ms / 1000));
|
||||
const minutes = Math.floor(seconds / 60);
|
||||
const remainder = seconds % 60;
|
||||
if (minutes > 0) return `${minutes}m${remainder}s`;
|
||||
return `${remainder}s`;
|
||||
}
|
||||
|
||||
function summarizeOpenHeadlessTools(now: number): string[] {
|
||||
const entries = [...openToolInfoByCallId.values()];
|
||||
const shown = entries.slice(0, 3).map((info) => {
|
||||
const name = info.toolName || "unknown";
|
||||
const args = summarizeToolArgs(info.toolName, info.args);
|
||||
const argPart = args
|
||||
? `:${args.length > 48 ? `${args.slice(0, 45)}...` : args}`
|
||||
: "";
|
||||
return `${name}${argPart} ${formatOpenToolAge(now - info.startedAt)}`;
|
||||
});
|
||||
const hidden = entries.length - shown.length;
|
||||
if (hidden > 0) shown.push(`+${hidden} more`);
|
||||
return shown;
|
||||
}
|
||||
|
||||
// Client started flag — replaces old stdinWriter null-check
|
||||
let clientStarted = false;
|
||||
// Adapter for AnswerInjector — wraps client.sendUIResponse in a writeToStdin-compatible callback
|
||||
|
|
@ -1137,6 +1165,7 @@ async function runHeadlessOnce(
|
|||
eventDelta: totalEvents - lastHeartbeatEventCount,
|
||||
toolCallDelta: toolCallCount - lastHeartbeatToolCallCount,
|
||||
openToolCount: toolStartTimes.size,
|
||||
openToolDetails: summarizeOpenHeadlessTools(now),
|
||||
activeUnit: activeHeadlessUnit,
|
||||
activeModel: activeHeadlessModel,
|
||||
lastEventType: lastEvent?.type,
|
||||
|
|
@ -1157,6 +1186,13 @@ async function runHeadlessOnce(
|
|||
if (eventType === "tool_execution_start") {
|
||||
const toolCallId = String(eventObj.toolCallId ?? eventObj.id ?? "");
|
||||
const toolName = String(eventObj.toolName ?? "");
|
||||
if (toolCallId) {
|
||||
openToolInfoByCallId.set(toolCallId, {
|
||||
toolName,
|
||||
args: eventObj.args,
|
||||
startedAt: Date.now(),
|
||||
});
|
||||
}
|
||||
if (toolCallId && isInteractiveHeadlessTool(toolName)) {
|
||||
interactiveToolCallIds.add(toolCallId);
|
||||
}
|
||||
|
|
@ -1168,6 +1204,16 @@ async function runHeadlessOnce(
|
|||
if (traceActive && toolCallId && toolName) {
|
||||
handleToolStart(toolName, toolCallId);
|
||||
}
|
||||
} else if (eventType === "tool_execution_update") {
|
||||
const toolCallId = String(eventObj.toolCallId ?? eventObj.id ?? "");
|
||||
if (toolCallId) {
|
||||
const existing = openToolInfoByCallId.get(toolCallId);
|
||||
openToolInfoByCallId.set(toolCallId, {
|
||||
toolName: String(eventObj.toolName ?? existing?.toolName ?? ""),
|
||||
args: eventObj.args ?? existing?.args,
|
||||
startedAt: existing?.startedAt ?? Date.now(),
|
||||
});
|
||||
}
|
||||
} else if (eventType === "tool_execution_end") {
|
||||
const toolCallId = String(eventObj.toolCallId ?? eventObj.id ?? "");
|
||||
if (toolCallId) {
|
||||
|
|
@ -1175,6 +1221,7 @@ async function runHeadlessOnce(
|
|||
lastInteractiveToolEndTime = Date.now();
|
||||
}
|
||||
interactiveToolCallIds.delete(toolCallId);
|
||||
openToolInfoByCallId.delete(toolCallId);
|
||||
}
|
||||
// Close the tool span if tracing is active
|
||||
if (traceActive && toolCallId) {
|
||||
|
|
|
|||
|
|
@ -287,6 +287,60 @@ function inspectSiftCache(projectRoot) {
|
|||
samples,
|
||||
};
|
||||
}
|
||||
function inspectSiftWarmupArtifacts(projectRoot) {
|
||||
const dirs = resolveSiftWarmupRuntimeDirs(projectRoot);
|
||||
const artifactsRoot = join(dirs.searchCache, "artifacts");
|
||||
const artifactSampleLimit = 512;
|
||||
const files = listFilesCapped(artifactsRoot, artifactSampleLimit);
|
||||
let latestArtifactAt = null;
|
||||
let totalBytes = 0;
|
||||
for (const file of files) {
|
||||
try {
|
||||
const stat = statSync(file);
|
||||
totalBytes += stat.size;
|
||||
const mtime = new Date(stat.mtimeMs).toISOString();
|
||||
if (!latestArtifactAt || mtime > latestArtifactAt) {
|
||||
latestArtifactAt = mtime;
|
||||
}
|
||||
} catch {
|
||||
// Best-effort observability only; marker reconciliation must not fail
|
||||
// because a cache file changed while we were inspecting it.
|
||||
}
|
||||
}
|
||||
return {
|
||||
artifactCount: files.length,
|
||||
artifactCountCapped: files.length >= artifactSampleLimit,
|
||||
artifactSampleLimit,
|
||||
latestArtifactAt,
|
||||
cacheBytes: totalBytes,
|
||||
};
|
||||
}
|
||||
function finalizeSiftWarmupMarker(projectRoot, markerPath, parsed, reason) {
|
||||
const artifacts = inspectSiftWarmupArtifacts(projectRoot);
|
||||
const status = artifacts.artifactCount > 0 ? "completed" : "stale";
|
||||
const reconciled = {
|
||||
...parsed,
|
||||
schemaVersion: 3,
|
||||
status,
|
||||
finishedAt: new Date().toISOString(),
|
||||
terminalReason: reason,
|
||||
artifactCount: artifacts.artifactCount,
|
||||
artifactCountCapped: artifacts.artifactCountCapped,
|
||||
artifactSampleLimit: artifacts.artifactSampleLimit,
|
||||
latestArtifactAt: artifacts.latestArtifactAt,
|
||||
cacheBytes: artifacts.cacheBytes,
|
||||
};
|
||||
try {
|
||||
writeFileSync(
|
||||
markerPath,
|
||||
`${JSON.stringify(reconciled, null, 2)}\n`,
|
||||
"utf-8",
|
||||
);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
return { ...reconciled, markerPath };
|
||||
}
|
||||
export function detectSift(projectRoot, prefs, env = process.env) {
|
||||
if (prefs?.indexer_backend === "none") {
|
||||
return {
|
||||
|
|
@ -403,7 +457,15 @@ function readSiftWarmupMarker(projectRoot) {
|
|||
const parsed = JSON.parse(readFileSync(markerPath, "utf-8"));
|
||||
if (parsed.schemaVersion !== 3) return null;
|
||||
if (parsed.status !== "warming") return null;
|
||||
if (parsed.pid && !isProcessAlive(parsed.pid)) return null;
|
||||
if (parsed.pid && !isProcessAlive(parsed.pid)) {
|
||||
finalizeSiftWarmupMarker(
|
||||
projectRoot,
|
||||
markerPath,
|
||||
parsed,
|
||||
`warmup pid ${parsed.pid} is no longer running`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
const started = Date.parse(parsed.startedAt);
|
||||
const hardTimeoutSec = Number(
|
||||
parsed.hardTimeoutSec ?? DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC,
|
||||
|
|
@ -411,7 +473,24 @@ function readSiftWarmupMarker(projectRoot) {
|
|||
const expiresAt =
|
||||
started +
|
||||
Math.max(60, hardTimeoutSec + SIFT_WARMUP_KILL_GRACE_SEC) * 1000;
|
||||
if (!Number.isFinite(started) || Date.now() > expiresAt) return null;
|
||||
if (!Number.isFinite(started)) {
|
||||
finalizeSiftWarmupMarker(
|
||||
projectRoot,
|
||||
markerPath,
|
||||
parsed,
|
||||
"warmup marker has invalid startedAt",
|
||||
);
|
||||
return null;
|
||||
}
|
||||
if (Date.now() > expiresAt) {
|
||||
finalizeSiftWarmupMarker(
|
||||
projectRoot,
|
||||
markerPath,
|
||||
parsed,
|
||||
"warmup marker exceeded hard timeout window",
|
||||
);
|
||||
return null;
|
||||
}
|
||||
return { ...parsed, markerPath };
|
||||
} catch {
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import {
|
|||
existsSync,
|
||||
mkdirSync,
|
||||
mkdtempSync,
|
||||
readFileSync,
|
||||
rmSync,
|
||||
writeFileSync,
|
||||
} from "node:fs";
|
||||
|
|
@ -138,6 +139,111 @@ test("detectSift_when_probe_succeeds_reports_project_cache", () => {
|
|||
assert.equal(result.tmpDir, dirs.tmpDir);
|
||||
});
|
||||
|
||||
test("detectSift_when_warmup_pid_is_dead_marks_marker_completed_when_cache_exists", () => {
|
||||
const projectRoot = makeProject();
|
||||
const { bin } = makeFakeSift("#!/bin/sh\nprintf '{\"hits\":[]}\\n'\n");
|
||||
const dirs = ensureSiftRuntimeDirs(projectRoot);
|
||||
const artifactDir = join(dirs.searchCache, "artifacts", "indexes", "repo");
|
||||
mkdirSync(artifactDir, { recursive: true });
|
||||
writeFileSync(join(artifactDir, "index.bin"), "indexed");
|
||||
const markerPath = join(
|
||||
projectRoot,
|
||||
".sf",
|
||||
"runtime",
|
||||
"sift-index-warmup.json",
|
||||
);
|
||||
writeFileSync(
|
||||
markerPath,
|
||||
JSON.stringify(
|
||||
{
|
||||
schemaVersion: 3,
|
||||
status: "warming",
|
||||
startedAt: new Date().toISOString(),
|
||||
command: bin,
|
||||
cwd: projectRoot,
|
||||
args: ["search", "."],
|
||||
scope: ".",
|
||||
siftBinary: bin,
|
||||
hardTimeoutSec: 600,
|
||||
searchCache: dirs.searchCache,
|
||||
tmpDir: dirs.tmpDir,
|
||||
pid: 99999999,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
|
||||
const result = detectSift(
|
||||
projectRoot,
|
||||
{},
|
||||
{
|
||||
PATH: process.env.PATH ?? "",
|
||||
SIFT_PATH: bin,
|
||||
SF_SIFT_HEALTH_TIMEOUT_MS: "1000",
|
||||
},
|
||||
);
|
||||
const marker = JSON.parse(readFileSync(markerPath, "utf-8"));
|
||||
|
||||
assert.equal(result.status, "configured");
|
||||
assert.equal(marker.status, "completed");
|
||||
assert.match(marker.terminalReason, /pid 99999999 is no longer running/);
|
||||
assert.equal(marker.artifactCount, 1);
|
||||
assert.equal(marker.artifactCountCapped, false);
|
||||
assert.equal(typeof marker.finishedAt, "string");
|
||||
assert.equal(typeof marker.latestArtifactAt, "string");
|
||||
});
|
||||
|
||||
test("detectSift_when_warmup_pid_is_dead_marks_marker_stale_without_cache", () => {
|
||||
const projectRoot = makeProject();
|
||||
const { bin } = makeFakeSift("#!/bin/sh\nprintf '{\"hits\":[]}\\n'\n");
|
||||
const dirs = ensureSiftRuntimeDirs(projectRoot);
|
||||
const markerPath = join(
|
||||
projectRoot,
|
||||
".sf",
|
||||
"runtime",
|
||||
"sift-index-warmup.json",
|
||||
);
|
||||
writeFileSync(
|
||||
markerPath,
|
||||
JSON.stringify(
|
||||
{
|
||||
schemaVersion: 3,
|
||||
status: "warming",
|
||||
startedAt: new Date().toISOString(),
|
||||
command: bin,
|
||||
cwd: projectRoot,
|
||||
args: ["search", "."],
|
||||
scope: ".",
|
||||
siftBinary: bin,
|
||||
hardTimeoutSec: 600,
|
||||
searchCache: dirs.searchCache,
|
||||
tmpDir: dirs.tmpDir,
|
||||
pid: 99999999,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
|
||||
detectSift(
|
||||
projectRoot,
|
||||
{},
|
||||
{
|
||||
PATH: process.env.PATH ?? "",
|
||||
SIFT_PATH: bin,
|
||||
SF_SIFT_HEALTH_TIMEOUT_MS: "1000",
|
||||
},
|
||||
);
|
||||
const marker = JSON.parse(readFileSync(markerPath, "utf-8"));
|
||||
|
||||
assert.equal(marker.status, "stale");
|
||||
assert.match(marker.terminalReason, /pid 99999999 is no longer running/);
|
||||
assert.equal(marker.artifactCount, 0);
|
||||
assert.equal(marker.artifactCountCapped, false);
|
||||
assert.equal(marker.latestArtifactAt, null);
|
||||
});
|
||||
|
||||
test("detectSift_when_cache_manifest_contains_ignored_paths_reports_degraded", () => {
|
||||
const projectRoot = makeProject();
|
||||
const { bin } = makeFakeSift("#!/bin/sh\nprintf '{\"hits\":[]}\\n'\n");
|
||||
|
|
|
|||
|
|
@ -56,6 +56,19 @@ describe("formatProgress", () => {
|
|||
assert.ok(result.includes("activity=no new events"));
|
||||
assert.ok(!result.includes("openTools="));
|
||||
});
|
||||
|
||||
it("shows active tool identity when a quiet wait has an open tool", () => {
|
||||
const result = formatHeadlessHeartbeat({
|
||||
elapsedMs: 300_000,
|
||||
quietMs: 180_000,
|
||||
totalEvents: 10,
|
||||
toolCallCount: 2,
|
||||
openToolCount: 1,
|
||||
openToolDetails: ["subagent:review M010 2m4s"],
|
||||
});
|
||||
|
||||
assert.ok(result.includes("openTools=1[subagent:review M010 2m4s]"));
|
||||
});
|
||||
});
|
||||
|
||||
describe("tool_execution_start", () => {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue