fix: wire continue-here context-pressure monitor to send wrap-up signal at 70% (#916)
This commit is contained in:
parent
25292f8840
commit
f5e9b00f47
2 changed files with 171 additions and 11 deletions
|
|
@ -81,6 +81,7 @@ import {
|
|||
initMetrics, resetMetrics, snapshotUnitMetrics, getLedger,
|
||||
getProjectTotals, formatCost, formatTokenCount,
|
||||
} from "./metrics.js";
|
||||
import { computeBudgets, resolveExecutorContextWindow } from "./context-budget.js";
|
||||
import { join } from "node:path";
|
||||
import { sep as pathSep } from "node:path";
|
||||
import { homedir } from "node:os";
|
||||
|
|
@ -412,6 +413,8 @@ let originalModelProvider: string | null = null;
|
|||
let unitTimeoutHandle: ReturnType<typeof setTimeout> | null = null;
|
||||
let wrapupWarningHandle: ReturnType<typeof setTimeout> | null = null;
|
||||
let idleWatchdogHandle: ReturnType<typeof setInterval> | null = null;
|
||||
/** Context-pressure continue-here monitor — fires once when context usage >= 70% */
|
||||
let continueHereHandle: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
/** Dispatch gap watchdog — detects when the state machine stalls between units.
|
||||
* After handleAgentEnd completes, if auto-mode is still active but no new unit
|
||||
|
|
@ -589,6 +592,10 @@ function clearUnitTimeout(): void {
|
|||
clearInterval(idleWatchdogHandle);
|
||||
idleWatchdogHandle = null;
|
||||
}
|
||||
if (continueHereHandle) {
|
||||
clearInterval(continueHereHandle);
|
||||
continueHereHandle = null;
|
||||
}
|
||||
inFlightTools.clear();
|
||||
clearDispatchGapWatchdog();
|
||||
}
|
||||
|
|
@ -600,6 +607,17 @@ function clearDispatchGapWatchdog(): void {
|
|||
}
|
||||
}
|
||||
|
||||
/** Build snapshot metric opts, enriching with continueHereFired from the runtime record. */
|
||||
function buildSnapshotOpts(unitType: string, unitId: string): { continueHereFired?: boolean; promptCharCount?: number; baselineCharCount?: number } & Record<string, unknown> {
|
||||
const runtime = currentUnit ? readUnitRuntimeRecord(basePath, unitType, unitId) : null;
|
||||
return {
|
||||
promptCharCount: lastPromptCharCount,
|
||||
baselineCharCount: lastBaselineCharCount,
|
||||
...(currentUnitRouting ?? {}),
|
||||
...(runtime?.continueHereFired ? { continueHereFired: true } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a watchdog that fires if no new unit is dispatched within DISPATCH_GAP_TIMEOUT_MS
|
||||
* after handleAgentEnd completes. This catches the case where the dispatch chain silently
|
||||
|
|
@ -1762,7 +1780,7 @@ export async function handleAgentEnd(
|
|||
const hookStartedAt = Date.now();
|
||||
if (currentUnit) {
|
||||
const modelId = ctx.model?.id ?? "unknown";
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
|
||||
const hookActivityFile = saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
|
||||
if (hookActivityFile) {
|
||||
try {
|
||||
|
|
@ -2409,7 +2427,7 @@ async function dispatchNextUnit(
|
|||
// Save final session before stopping
|
||||
if (currentUnit) {
|
||||
const modelId = ctx.model?.id ?? "unknown";
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
|
||||
saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
|
||||
}
|
||||
|
||||
|
|
@ -2454,7 +2472,7 @@ async function dispatchNextUnit(
|
|||
if (!mid || !midTitle) {
|
||||
if (currentUnit) {
|
||||
const modelId = ctx.model?.id ?? "unknown";
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
|
||||
saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
|
||||
}
|
||||
const noMilestoneReason = !mid
|
||||
|
|
@ -2472,7 +2490,7 @@ async function dispatchNextUnit(
|
|||
if (state.phase === "complete") {
|
||||
if (currentUnit) {
|
||||
const modelId = ctx.model?.id ?? "unknown";
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
|
||||
saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
|
||||
}
|
||||
// Clear completed-units.json for the finished milestone so it doesn't grow unbounded.
|
||||
|
|
@ -2542,7 +2560,7 @@ async function dispatchNextUnit(
|
|||
if (state.phase === "blocked") {
|
||||
if (currentUnit) {
|
||||
const modelId = ctx.model?.id ?? "unknown";
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
|
||||
saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
|
||||
}
|
||||
const blockerMsg = `Blocked: ${state.blockers.join(", ")}`;
|
||||
|
|
@ -2653,7 +2671,7 @@ async function dispatchNextUnit(
|
|||
if (dispatchResult.action === "stop") {
|
||||
if (currentUnit) {
|
||||
const modelId = ctx.model?.id ?? "unknown";
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
|
||||
saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
|
||||
}
|
||||
await stopAuto(ctx, pi, dispatchResult.reason);
|
||||
|
|
@ -2875,7 +2893,7 @@ async function dispatchNextUnit(
|
|||
if (lifetimeCount > MAX_LIFETIME_DISPATCHES) {
|
||||
if (currentUnit) {
|
||||
const modelId = ctx.model?.id ?? "unknown";
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
|
||||
}
|
||||
saveActivityLog(ctx, basePath, unitType, unitId);
|
||||
const expected = diagnoseExpectedArtifact(unitType, unitId, basePath);
|
||||
|
|
@ -2889,7 +2907,7 @@ async function dispatchNextUnit(
|
|||
if (prevCount >= MAX_UNIT_DISPATCHES) {
|
||||
if (currentUnit) {
|
||||
const modelId = ctx.model?.id ?? "unknown";
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
|
||||
}
|
||||
saveActivityLog(ctx, basePath, unitType, unitId);
|
||||
|
||||
|
|
@ -3047,7 +3065,7 @@ async function dispatchNextUnit(
|
|||
// The session still holds the previous unit's data (newSession hasn't fired yet).
|
||||
if (currentUnit) {
|
||||
const modelId = ctx.model?.id ?? "unknown";
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
|
||||
const activityFile = saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
|
||||
|
||||
// Fire-and-forget memory extraction from completed unit
|
||||
|
|
@ -3429,7 +3447,7 @@ async function dispatchNextUnit(
|
|||
|
||||
if (currentUnit) {
|
||||
const modelId = ctx.model?.id ?? "unknown";
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
|
||||
}
|
||||
saveActivityLog(ctx, basePath, unitType, unitId);
|
||||
|
||||
|
|
@ -3455,7 +3473,7 @@ async function dispatchNextUnit(
|
|||
timeoutAt: Date.now(),
|
||||
});
|
||||
const modelId = ctx.model?.id ?? "unknown";
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
|
||||
}
|
||||
saveActivityLog(ctx, basePath, unitType, unitId);
|
||||
|
||||
|
|
@ -3469,6 +3487,67 @@ async function dispatchNextUnit(
|
|||
await pauseAuto(ctx, pi);
|
||||
}, hardTimeoutMs);
|
||||
|
||||
// ── Continue-here context-pressure monitor ────────────────────────────
|
||||
// Polls context usage every 15s. When usage hits the continue-here
|
||||
// threshold (70%), sends a one-shot wrap-up signal so the agent finishes
|
||||
// gracefully and the next unit gets a fresh session. This is softer than
|
||||
// context_pause_threshold which hard-pauses auto-mode entirely.
|
||||
if (continueHereHandle) {
|
||||
clearInterval(continueHereHandle);
|
||||
continueHereHandle = null;
|
||||
}
|
||||
const executorContextWindow = resolveExecutorContextWindow(
|
||||
ctx.modelRegistry as Parameters<typeof resolveExecutorContextWindow>[0],
|
||||
prefs as Parameters<typeof resolveExecutorContextWindow>[1],
|
||||
ctx.model?.contextWindow,
|
||||
);
|
||||
const continueHereThreshold = computeBudgets(executorContextWindow).continueThresholdPercent;
|
||||
continueHereHandle = setInterval(() => {
|
||||
if (!active || !currentUnit || !cmdCtx) return;
|
||||
// One-shot guard: skip if already fired for this unit
|
||||
const runtime = readUnitRuntimeRecord(basePath, unitType, unitId);
|
||||
if (runtime?.continueHereFired) return;
|
||||
|
||||
const contextUsage = cmdCtx.getContextUsage();
|
||||
if (!contextUsage || contextUsage.percent == null || contextUsage.percent < continueHereThreshold) return;
|
||||
|
||||
// Fire once — mark runtime record and send wrap-up message
|
||||
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit!.startedAt, {
|
||||
continueHereFired: true,
|
||||
});
|
||||
|
||||
if (verbose) {
|
||||
ctx.ui.notify(
|
||||
`Context at ${contextUsage.percent}% (threshold: ${continueHereThreshold}%) — sending wrap-up signal.`,
|
||||
"info",
|
||||
);
|
||||
}
|
||||
|
||||
pi.sendMessage(
|
||||
{
|
||||
customType: "gsd-auto-wrapup",
|
||||
display: verbose,
|
||||
content: [
|
||||
"**CONTEXT BUDGET WARNING — wrap up this unit now.**",
|
||||
`Context window is at ${contextUsage.percent}% (threshold: ${continueHereThreshold}%).`,
|
||||
"The next unit needs a fresh context to work effectively. Wrap up now:",
|
||||
"1. Finish any in-progress file writes",
|
||||
"2. Write or update the required durable artifacts (summary, checkboxes)",
|
||||
"3. Mark task state on disk correctly",
|
||||
"4. Leave precise resume notes if anything remains unfinished",
|
||||
"Do NOT start new sub-tasks or investigations.",
|
||||
].join("\n"),
|
||||
},
|
||||
{ triggerTurn: true },
|
||||
);
|
||||
|
||||
// Clear the interval after firing — no need to keep polling
|
||||
if (continueHereHandle) {
|
||||
clearInterval(continueHereHandle);
|
||||
continueHereHandle = null;
|
||||
}
|
||||
}, 15_000);
|
||||
|
||||
// Inject prompt — verify auto-mode still active (guards against race with timeout/pause)
|
||||
if (!active) return;
|
||||
pi.sendMessage(
|
||||
|
|
|
|||
|
|
@ -201,4 +201,85 @@ describe("continue-here", () => {
|
|||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("context-pressure monitor integration", () => {
|
||||
it("should fire wrap-up when context >= threshold and mark continueHereFired", async () => {
|
||||
const { writeUnitRuntimeRecord, readUnitRuntimeRecord, clearUnitRuntimeRecord } = await import("../unit-runtime.js");
|
||||
const fs = await import("node:fs");
|
||||
const path = await import("node:path");
|
||||
const os = await import("node:os");
|
||||
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "continue-here-monitor-"));
|
||||
try {
|
||||
// Simulate the monitor's one-shot logic:
|
||||
// 1. Write initial runtime record (continueHereFired=false)
|
||||
const startedAt = Date.now();
|
||||
writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, {
|
||||
phase: "dispatched",
|
||||
wrapupWarningSent: false,
|
||||
});
|
||||
|
||||
const budget = computeBudgets(128_000);
|
||||
const threshold = budget.continueThresholdPercent;
|
||||
|
||||
// Simulate the monitor poll: context at 75% (above threshold)
|
||||
const contextPercent = 75;
|
||||
const runtime = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
|
||||
assert.ok(runtime, "runtime record should exist");
|
||||
assert.equal(runtime!.continueHereFired, false, "initially false");
|
||||
|
||||
// Check: should fire
|
||||
const shouldFire = !runtime!.continueHereFired
|
||||
&& contextPercent >= threshold;
|
||||
assert.ok(shouldFire, "should fire when context >= threshold and not yet fired");
|
||||
|
||||
// Mark as fired (what the monitor does)
|
||||
writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, {
|
||||
continueHereFired: true,
|
||||
});
|
||||
|
||||
// Verify one-shot: second poll should NOT fire
|
||||
const runtime2 = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
|
||||
assert.ok(runtime2, "runtime record should still exist");
|
||||
assert.equal(runtime2!.continueHereFired, true, "should be marked as fired");
|
||||
|
||||
const shouldFireAgain = !runtime2!.continueHereFired
|
||||
&& contextPercent >= threshold;
|
||||
assert.equal(shouldFireAgain, false, "must not fire again — one-shot guard");
|
||||
|
||||
// Clean up
|
||||
clearUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
|
||||
} finally {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("should not fire when context is below threshold", () => {
|
||||
const budget = computeBudgets(200_000);
|
||||
const threshold = budget.continueThresholdPercent;
|
||||
|
||||
// Simulate monitor poll with context at 50%
|
||||
const contextPercent = 50;
|
||||
const continueHereFired = false;
|
||||
const shouldFire = !continueHereFired && contextPercent >= threshold;
|
||||
assert.equal(shouldFire, false, "50% should not trigger continue-here");
|
||||
});
|
||||
|
||||
it("should not fire when contextUsage is null/undefined", () => {
|
||||
const budget = computeBudgets(128_000);
|
||||
const threshold = budget.continueThresholdPercent;
|
||||
|
||||
// Simulate the full guard chain from the monitor
|
||||
const usageUndefined = undefined as { percent: number | null } | undefined;
|
||||
const shouldFire1 = usageUndefined != null
|
||||
&& usageUndefined.percent != null
|
||||
&& usageUndefined.percent >= threshold;
|
||||
assert.equal(shouldFire1, false, "undefined usage must not fire");
|
||||
|
||||
const usageNullPercent: { percent: number | null } = { percent: null };
|
||||
const shouldFire2 = usageNullPercent.percent != null
|
||||
&& usageNullPercent.percent >= threshold;
|
||||
assert.equal(shouldFire2, false, "null percent must not fire");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue