From f5e9b00f472d7a538d80bae2a9014ebbfcc0415f Mon Sep 17 00:00:00 2001 From: Jeremy McSpadden Date: Tue, 17 Mar 2026 12:44:12 -0500 Subject: [PATCH] fix: wire continue-here context-pressure monitor to send wrap-up signal at 70% (#916) --- src/resources/extensions/gsd/auto.ts | 101 ++++++++++++++++-- .../gsd/tests/continue-here.test.ts | 81 ++++++++++++++ 2 files changed, 171 insertions(+), 11 deletions(-) diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 3616de016..35cebd1ab 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -81,6 +81,7 @@ import { initMetrics, resetMetrics, snapshotUnitMetrics, getLedger, getProjectTotals, formatCost, formatTokenCount, } from "./metrics.js"; +import { computeBudgets, resolveExecutorContextWindow } from "./context-budget.js"; import { join } from "node:path"; import { sep as pathSep } from "node:path"; import { homedir } from "node:os"; @@ -412,6 +413,8 @@ let originalModelProvider: string | null = null; let unitTimeoutHandle: ReturnType | null = null; let wrapupWarningHandle: ReturnType | null = null; let idleWatchdogHandle: ReturnType | null = null; +/** Context-pressure continue-here monitor — fires once when context usage >= 70% */ +let continueHereHandle: ReturnType | null = null; /** Dispatch gap watchdog — detects when the state machine stalls between units. * After handleAgentEnd completes, if auto-mode is still active but no new unit @@ -589,6 +592,10 @@ function clearUnitTimeout(): void { clearInterval(idleWatchdogHandle); idleWatchdogHandle = null; } + if (continueHereHandle) { + clearInterval(continueHereHandle); + continueHereHandle = null; + } inFlightTools.clear(); clearDispatchGapWatchdog(); } @@ -600,6 +607,17 @@ function clearDispatchGapWatchdog(): void { } } +/** Build snapshot metric opts, enriching with continueHereFired from the runtime record. */ +function buildSnapshotOpts(unitType: string, unitId: string): { continueHereFired?: boolean; promptCharCount?: number; baselineCharCount?: number } & Record { + const runtime = currentUnit ? readUnitRuntimeRecord(basePath, unitType, unitId) : null; + return { + promptCharCount: lastPromptCharCount, + baselineCharCount: lastBaselineCharCount, + ...(currentUnitRouting ?? {}), + ...(runtime?.continueHereFired ? { continueHereFired: true } : {}), + }; +} + /** * Start a watchdog that fires if no new unit is dispatched within DISPATCH_GAP_TIMEOUT_MS * after handleAgentEnd completes. This catches the case where the dispatch chain silently @@ -1762,7 +1780,7 @@ export async function handleAgentEnd( const hookStartedAt = Date.now(); if (currentUnit) { const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id)); const hookActivityFile = saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); if (hookActivityFile) { try { @@ -2409,7 +2427,7 @@ async function dispatchNextUnit( // Save final session before stopping if (currentUnit) { const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id)); saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); } @@ -2454,7 +2472,7 @@ async function dispatchNextUnit( if (!mid || !midTitle) { if (currentUnit) { const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id)); saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); } const noMilestoneReason = !mid @@ -2472,7 +2490,7 @@ async function dispatchNextUnit( if (state.phase === "complete") { if (currentUnit) { const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id)); saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); } // Clear completed-units.json for the finished milestone so it doesn't grow unbounded. @@ -2542,7 +2560,7 @@ async function dispatchNextUnit( if (state.phase === "blocked") { if (currentUnit) { const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id)); saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); } const blockerMsg = `Blocked: ${state.blockers.join(", ")}`; @@ -2653,7 +2671,7 @@ async function dispatchNextUnit( if (dispatchResult.action === "stop") { if (currentUnit) { const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id)); saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); } await stopAuto(ctx, pi, dispatchResult.reason); @@ -2875,7 +2893,7 @@ async function dispatchNextUnit( if (lifetimeCount > MAX_LIFETIME_DISPATCHES) { if (currentUnit) { const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id)); } saveActivityLog(ctx, basePath, unitType, unitId); const expected = diagnoseExpectedArtifact(unitType, unitId, basePath); @@ -2889,7 +2907,7 @@ async function dispatchNextUnit( if (prevCount >= MAX_UNIT_DISPATCHES) { if (currentUnit) { const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id)); } saveActivityLog(ctx, basePath, unitType, unitId); @@ -3047,7 +3065,7 @@ async function dispatchNextUnit( // The session still holds the previous unit's data (newSession hasn't fired yet). if (currentUnit) { const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id)); const activityFile = saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); // Fire-and-forget memory extraction from completed unit @@ -3429,7 +3447,7 @@ async function dispatchNextUnit( if (currentUnit) { const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id)); } saveActivityLog(ctx, basePath, unitType, unitId); @@ -3455,7 +3473,7 @@ async function dispatchNextUnit( timeoutAt: Date.now(), }); const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id)); } saveActivityLog(ctx, basePath, unitType, unitId); @@ -3469,6 +3487,67 @@ async function dispatchNextUnit( await pauseAuto(ctx, pi); }, hardTimeoutMs); + // ── Continue-here context-pressure monitor ──────────────────────────── + // Polls context usage every 15s. When usage hits the continue-here + // threshold (70%), sends a one-shot wrap-up signal so the agent finishes + // gracefully and the next unit gets a fresh session. This is softer than + // context_pause_threshold which hard-pauses auto-mode entirely. + if (continueHereHandle) { + clearInterval(continueHereHandle); + continueHereHandle = null; + } + const executorContextWindow = resolveExecutorContextWindow( + ctx.modelRegistry as Parameters[0], + prefs as Parameters[1], + ctx.model?.contextWindow, + ); + const continueHereThreshold = computeBudgets(executorContextWindow).continueThresholdPercent; + continueHereHandle = setInterval(() => { + if (!active || !currentUnit || !cmdCtx) return; + // One-shot guard: skip if already fired for this unit + const runtime = readUnitRuntimeRecord(basePath, unitType, unitId); + if (runtime?.continueHereFired) return; + + const contextUsage = cmdCtx.getContextUsage(); + if (!contextUsage || contextUsage.percent == null || contextUsage.percent < continueHereThreshold) return; + + // Fire once — mark runtime record and send wrap-up message + writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit!.startedAt, { + continueHereFired: true, + }); + + if (verbose) { + ctx.ui.notify( + `Context at ${contextUsage.percent}% (threshold: ${continueHereThreshold}%) — sending wrap-up signal.`, + "info", + ); + } + + pi.sendMessage( + { + customType: "gsd-auto-wrapup", + display: verbose, + content: [ + "**CONTEXT BUDGET WARNING — wrap up this unit now.**", + `Context window is at ${contextUsage.percent}% (threshold: ${continueHereThreshold}%).`, + "The next unit needs a fresh context to work effectively. Wrap up now:", + "1. Finish any in-progress file writes", + "2. Write or update the required durable artifacts (summary, checkboxes)", + "3. Mark task state on disk correctly", + "4. Leave precise resume notes if anything remains unfinished", + "Do NOT start new sub-tasks or investigations.", + ].join("\n"), + }, + { triggerTurn: true }, + ); + + // Clear the interval after firing — no need to keep polling + if (continueHereHandle) { + clearInterval(continueHereHandle); + continueHereHandle = null; + } + }, 15_000); + // Inject prompt — verify auto-mode still active (guards against race with timeout/pause) if (!active) return; pi.sendMessage( diff --git a/src/resources/extensions/gsd/tests/continue-here.test.ts b/src/resources/extensions/gsd/tests/continue-here.test.ts index 6edcbfde1..eb31e084f 100644 --- a/src/resources/extensions/gsd/tests/continue-here.test.ts +++ b/src/resources/extensions/gsd/tests/continue-here.test.ts @@ -201,4 +201,85 @@ describe("continue-here", () => { } }); }); + + describe("context-pressure monitor integration", () => { + it("should fire wrap-up when context >= threshold and mark continueHereFired", async () => { + const { writeUnitRuntimeRecord, readUnitRuntimeRecord, clearUnitRuntimeRecord } = await import("../unit-runtime.js"); + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "continue-here-monitor-")); + try { + // Simulate the monitor's one-shot logic: + // 1. Write initial runtime record (continueHereFired=false) + const startedAt = Date.now(); + writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, { + phase: "dispatched", + wrapupWarningSent: false, + }); + + const budget = computeBudgets(128_000); + const threshold = budget.continueThresholdPercent; + + // Simulate the monitor poll: context at 75% (above threshold) + const contextPercent = 75; + const runtime = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01"); + assert.ok(runtime, "runtime record should exist"); + assert.equal(runtime!.continueHereFired, false, "initially false"); + + // Check: should fire + const shouldFire = !runtime!.continueHereFired + && contextPercent >= threshold; + assert.ok(shouldFire, "should fire when context >= threshold and not yet fired"); + + // Mark as fired (what the monitor does) + writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, { + continueHereFired: true, + }); + + // Verify one-shot: second poll should NOT fire + const runtime2 = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01"); + assert.ok(runtime2, "runtime record should still exist"); + assert.equal(runtime2!.continueHereFired, true, "should be marked as fired"); + + const shouldFireAgain = !runtime2!.continueHereFired + && contextPercent >= threshold; + assert.equal(shouldFireAgain, false, "must not fire again — one-shot guard"); + + // Clean up + clearUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01"); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it("should not fire when context is below threshold", () => { + const budget = computeBudgets(200_000); + const threshold = budget.continueThresholdPercent; + + // Simulate monitor poll with context at 50% + const contextPercent = 50; + const continueHereFired = false; + const shouldFire = !continueHereFired && contextPercent >= threshold; + assert.equal(shouldFire, false, "50% should not trigger continue-here"); + }); + + it("should not fire when contextUsage is null/undefined", () => { + const budget = computeBudgets(128_000); + const threshold = budget.continueThresholdPercent; + + // Simulate the full guard chain from the monitor + const usageUndefined = undefined as { percent: number | null } | undefined; + const shouldFire1 = usageUndefined != null + && usageUndefined.percent != null + && usageUndefined.percent >= threshold; + assert.equal(shouldFire1, false, "undefined usage must not fire"); + + const usageNullPercent: { percent: number | null } = { percent: null }; + const shouldFire2 = usageNullPercent.percent != null + && usageNullPercent.percent >= threshold; + assert.equal(shouldFire2, false, "null percent must not fire"); + }); + }); });