fix: wire continue-here context-pressure monitor to send wrap-up signal at 70% (#916)

This commit is contained in:
Jeremy McSpadden 2026-03-17 12:44:12 -05:00 committed by GitHub
parent 25292f8840
commit f5e9b00f47
2 changed files with 171 additions and 11 deletions

View file

@ -81,6 +81,7 @@ import {
initMetrics, resetMetrics, snapshotUnitMetrics, getLedger,
getProjectTotals, formatCost, formatTokenCount,
} from "./metrics.js";
import { computeBudgets, resolveExecutorContextWindow } from "./context-budget.js";
import { join } from "node:path";
import { sep as pathSep } from "node:path";
import { homedir } from "node:os";
@ -412,6 +413,8 @@ let originalModelProvider: string | null = null;
let unitTimeoutHandle: ReturnType<typeof setTimeout> | null = null;
let wrapupWarningHandle: ReturnType<typeof setTimeout> | null = null;
let idleWatchdogHandle: ReturnType<typeof setInterval> | null = null;
/** Context-pressure continue-here monitor — fires once when context usage >= 70% */
let continueHereHandle: ReturnType<typeof setInterval> | null = null;
/** Dispatch gap watchdog detects when the state machine stalls between units.
* After handleAgentEnd completes, if auto-mode is still active but no new unit
@ -589,6 +592,10 @@ function clearUnitTimeout(): void {
clearInterval(idleWatchdogHandle);
idleWatchdogHandle = null;
}
if (continueHereHandle) {
clearInterval(continueHereHandle);
continueHereHandle = null;
}
inFlightTools.clear();
clearDispatchGapWatchdog();
}
@ -600,6 +607,17 @@ function clearDispatchGapWatchdog(): void {
}
}
/** Build snapshot metric opts, enriching with continueHereFired from the runtime record. */
function buildSnapshotOpts(unitType: string, unitId: string): { continueHereFired?: boolean; promptCharCount?: number; baselineCharCount?: number } & Record<string, unknown> {
const runtime = currentUnit ? readUnitRuntimeRecord(basePath, unitType, unitId) : null;
return {
promptCharCount: lastPromptCharCount,
baselineCharCount: lastBaselineCharCount,
...(currentUnitRouting ?? {}),
...(runtime?.continueHereFired ? { continueHereFired: true } : {}),
};
}
/**
* Start a watchdog that fires if no new unit is dispatched within DISPATCH_GAP_TIMEOUT_MS
* after handleAgentEnd completes. This catches the case where the dispatch chain silently
@ -1762,7 +1780,7 @@ export async function handleAgentEnd(
const hookStartedAt = Date.now();
if (currentUnit) {
const modelId = ctx.model?.id ?? "unknown";
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
const hookActivityFile = saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
if (hookActivityFile) {
try {
@ -2409,7 +2427,7 @@ async function dispatchNextUnit(
// Save final session before stopping
if (currentUnit) {
const modelId = ctx.model?.id ?? "unknown";
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
}
@ -2454,7 +2472,7 @@ async function dispatchNextUnit(
if (!mid || !midTitle) {
if (currentUnit) {
const modelId = ctx.model?.id ?? "unknown";
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
}
const noMilestoneReason = !mid
@ -2472,7 +2490,7 @@ async function dispatchNextUnit(
if (state.phase === "complete") {
if (currentUnit) {
const modelId = ctx.model?.id ?? "unknown";
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
}
// Clear completed-units.json for the finished milestone so it doesn't grow unbounded.
@ -2542,7 +2560,7 @@ async function dispatchNextUnit(
if (state.phase === "blocked") {
if (currentUnit) {
const modelId = ctx.model?.id ?? "unknown";
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
}
const blockerMsg = `Blocked: ${state.blockers.join(", ")}`;
@ -2653,7 +2671,7 @@ async function dispatchNextUnit(
if (dispatchResult.action === "stop") {
if (currentUnit) {
const modelId = ctx.model?.id ?? "unknown";
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
}
await stopAuto(ctx, pi, dispatchResult.reason);
@ -2875,7 +2893,7 @@ async function dispatchNextUnit(
if (lifetimeCount > MAX_LIFETIME_DISPATCHES) {
if (currentUnit) {
const modelId = ctx.model?.id ?? "unknown";
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
}
saveActivityLog(ctx, basePath, unitType, unitId);
const expected = diagnoseExpectedArtifact(unitType, unitId, basePath);
@ -2889,7 +2907,7 @@ async function dispatchNextUnit(
if (prevCount >= MAX_UNIT_DISPATCHES) {
if (currentUnit) {
const modelId = ctx.model?.id ?? "unknown";
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
}
saveActivityLog(ctx, basePath, unitType, unitId);
@ -3047,7 +3065,7 @@ async function dispatchNextUnit(
// The session still holds the previous unit's data (newSession hasn't fired yet).
if (currentUnit) {
const modelId = ctx.model?.id ?? "unknown";
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
const activityFile = saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
// Fire-and-forget memory extraction from completed unit
@ -3429,7 +3447,7 @@ async function dispatchNextUnit(
if (currentUnit) {
const modelId = ctx.model?.id ?? "unknown";
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
}
saveActivityLog(ctx, basePath, unitType, unitId);
@ -3455,7 +3473,7 @@ async function dispatchNextUnit(
timeoutAt: Date.now(),
});
const modelId = ctx.model?.id ?? "unknown";
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
}
saveActivityLog(ctx, basePath, unitType, unitId);
@ -3469,6 +3487,67 @@ async function dispatchNextUnit(
await pauseAuto(ctx, pi);
}, hardTimeoutMs);
// ── Continue-here context-pressure monitor ────────────────────────────
// Polls context usage every 15s. When usage hits the continue-here
// threshold (70%), sends a one-shot wrap-up signal so the agent finishes
// gracefully and the next unit gets a fresh session. This is softer than
// context_pause_threshold which hard-pauses auto-mode entirely.
if (continueHereHandle) {
clearInterval(continueHereHandle);
continueHereHandle = null;
}
const executorContextWindow = resolveExecutorContextWindow(
ctx.modelRegistry as Parameters<typeof resolveExecutorContextWindow>[0],
prefs as Parameters<typeof resolveExecutorContextWindow>[1],
ctx.model?.contextWindow,
);
const continueHereThreshold = computeBudgets(executorContextWindow).continueThresholdPercent;
continueHereHandle = setInterval(() => {
if (!active || !currentUnit || !cmdCtx) return;
// One-shot guard: skip if already fired for this unit
const runtime = readUnitRuntimeRecord(basePath, unitType, unitId);
if (runtime?.continueHereFired) return;
const contextUsage = cmdCtx.getContextUsage();
if (!contextUsage || contextUsage.percent == null || contextUsage.percent < continueHereThreshold) return;
// Fire once — mark runtime record and send wrap-up message
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit!.startedAt, {
continueHereFired: true,
});
if (verbose) {
ctx.ui.notify(
`Context at ${contextUsage.percent}% (threshold: ${continueHereThreshold}%) — sending wrap-up signal.`,
"info",
);
}
pi.sendMessage(
{
customType: "gsd-auto-wrapup",
display: verbose,
content: [
"**CONTEXT BUDGET WARNING — wrap up this unit now.**",
`Context window is at ${contextUsage.percent}% (threshold: ${continueHereThreshold}%).`,
"The next unit needs a fresh context to work effectively. Wrap up now:",
"1. Finish any in-progress file writes",
"2. Write or update the required durable artifacts (summary, checkboxes)",
"3. Mark task state on disk correctly",
"4. Leave precise resume notes if anything remains unfinished",
"Do NOT start new sub-tasks or investigations.",
].join("\n"),
},
{ triggerTurn: true },
);
// Clear the interval after firing — no need to keep polling
if (continueHereHandle) {
clearInterval(continueHereHandle);
continueHereHandle = null;
}
}, 15_000);
// Inject prompt — verify auto-mode still active (guards against race with timeout/pause)
if (!active) return;
pi.sendMessage(

View file

@ -201,4 +201,85 @@ describe("continue-here", () => {
}
});
});
describe("context-pressure monitor integration", () => {
it("should fire wrap-up when context >= threshold and mark continueHereFired", async () => {
const { writeUnitRuntimeRecord, readUnitRuntimeRecord, clearUnitRuntimeRecord } = await import("../unit-runtime.js");
const fs = await import("node:fs");
const path = await import("node:path");
const os = await import("node:os");
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "continue-here-monitor-"));
try {
// Simulate the monitor's one-shot logic:
// 1. Write initial runtime record (continueHereFired=false)
const startedAt = Date.now();
writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, {
phase: "dispatched",
wrapupWarningSent: false,
});
const budget = computeBudgets(128_000);
const threshold = budget.continueThresholdPercent;
// Simulate the monitor poll: context at 75% (above threshold)
const contextPercent = 75;
const runtime = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
assert.ok(runtime, "runtime record should exist");
assert.equal(runtime!.continueHereFired, false, "initially false");
// Check: should fire
const shouldFire = !runtime!.continueHereFired
&& contextPercent >= threshold;
assert.ok(shouldFire, "should fire when context >= threshold and not yet fired");
// Mark as fired (what the monitor does)
writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, {
continueHereFired: true,
});
// Verify one-shot: second poll should NOT fire
const runtime2 = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
assert.ok(runtime2, "runtime record should still exist");
assert.equal(runtime2!.continueHereFired, true, "should be marked as fired");
const shouldFireAgain = !runtime2!.continueHereFired
&& contextPercent >= threshold;
assert.equal(shouldFireAgain, false, "must not fire again — one-shot guard");
// Clean up
clearUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
} finally {
fs.rmSync(tmpDir, { recursive: true, force: true });
}
});
it("should not fire when context is below threshold", () => {
const budget = computeBudgets(200_000);
const threshold = budget.continueThresholdPercent;
// Simulate monitor poll with context at 50%
const contextPercent = 50;
const continueHereFired = false;
const shouldFire = !continueHereFired && contextPercent >= threshold;
assert.equal(shouldFire, false, "50% should not trigger continue-here");
});
it("should not fire when contextUsage is null/undefined", () => {
const budget = computeBudgets(128_000);
const threshold = budget.continueThresholdPercent;
// Simulate the full guard chain from the monitor
const usageUndefined = undefined as { percent: number | null } | undefined;
const shouldFire1 = usageUndefined != null
&& usageUndefined.percent != null
&& usageUndefined.percent >= threshold;
assert.equal(shouldFire1, false, "undefined usage must not fire");
const usageNullPercent: { percent: number | null } = { percent: null };
const shouldFire2 = usageNullPercent.percent != null
&& usageNullPercent.percent >= threshold;
assert.equal(shouldFire2, false, "null percent must not fire");
});
});
});