fix: wire continue-here context-pressure monitor to send wrap-up signal at 70% (#916)

2026-03-17 12:44:12 -05:00 · 2026-03-17 12:44:12 -05:00 · f5e9b00f47
commit f5e9b00f47
parent 25292f8840
2 changed files with 171 additions and 11 deletions
--- a/src/resources/extensions/gsd/auto.ts
+++ b/src/resources/extensions/gsd/auto.ts
@ -81,6 +81,7 @@ import {
  initMetrics, resetMetrics, snapshotUnitMetrics, getLedger,
  getProjectTotals, formatCost, formatTokenCount,
 } from "./metrics.js";
+import { computeBudgets, resolveExecutorContextWindow } from "./context-budget.js";
 import { join } from "node:path";
 import { sep as pathSep } from "node:path";
 import { homedir } from "node:os";
@ -412,6 +413,8 @@ let originalModelProvider: string | null = null;
 let unitTimeoutHandle: ReturnType<typeof setTimeout> | null = null;
 let wrapupWarningHandle: ReturnType<typeof setTimeout> | null = null;
 let idleWatchdogHandle: ReturnType<typeof setInterval> | null = null;
+/** Context-pressure continue-here monitor — fires once when context usage >= 70% */
+let continueHereHandle: ReturnType<typeof setInterval> | null = null;

 /** Dispatch gap watchdog — detects when the state machine stalls between units.
 *  After handleAgentEnd completes, if auto-mode is still active but no new unit
@ -589,6 +592,10 @@ function clearUnitTimeout(): void {
    clearInterval(idleWatchdogHandle);
    idleWatchdogHandle = null;
  }
+  if (continueHereHandle) {
+    clearInterval(continueHereHandle);
+    continueHereHandle = null;
+  }
  inFlightTools.clear();
  clearDispatchGapWatchdog();
 }
@ -600,6 +607,17 @@ function clearDispatchGapWatchdog(): void {
  }
 }

+/** Build snapshot metric opts, enriching with continueHereFired from the runtime record. */
+function buildSnapshotOpts(unitType: string, unitId: string): { continueHereFired?: boolean; promptCharCount?: number; baselineCharCount?: number } & Record<string, unknown> {
+  const runtime = currentUnit ? readUnitRuntimeRecord(basePath, unitType, unitId) : null;
+  return {
+    promptCharCount: lastPromptCharCount,
+    baselineCharCount: lastBaselineCharCount,
+    ...(currentUnitRouting ?? {}),
+    ...(runtime?.continueHereFired ? { continueHereFired: true } : {}),
+  };
+}
+
 /**
 * Start a watchdog that fires if no new unit is dispatched within DISPATCH_GAP_TIMEOUT_MS
 * after handleAgentEnd completes. This catches the case where the dispatch chain silently
@ -1762,7 +1780,7 @@ export async function handleAgentEnd(
      const hookStartedAt = Date.now();
      if (currentUnit) {
        const modelId = ctx.model?.id ?? "unknown";
-        snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
+        snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
        const hookActivityFile = saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
        if (hookActivityFile) {
          try {
@ -2409,7 +2427,7 @@ async function dispatchNextUnit(
    // Save final session before stopping
    if (currentUnit) {
      const modelId = ctx.model?.id ?? "unknown";
-      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
+      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
      saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
    }

@ -2454,7 +2472,7 @@ async function dispatchNextUnit(
  if (!mid || !midTitle) {
    if (currentUnit) {
      const modelId = ctx.model?.id ?? "unknown";
-      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
+      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
      saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
    }
    const noMilestoneReason = !mid
@ -2472,7 +2490,7 @@ async function dispatchNextUnit(
  if (state.phase === "complete") {
    if (currentUnit) {
      const modelId = ctx.model?.id ?? "unknown";
-      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
+      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
      saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
    }
    // Clear completed-units.json for the finished milestone so it doesn't grow unbounded.
@ -2542,7 +2560,7 @@ async function dispatchNextUnit(
  if (state.phase === "blocked") {
    if (currentUnit) {
      const modelId = ctx.model?.id ?? "unknown";
-      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
+      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
      saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
    }
    const blockerMsg = `Blocked: ${state.blockers.join(", ")}`;
@ -2653,7 +2671,7 @@ async function dispatchNextUnit(
  if (dispatchResult.action === "stop") {
    if (currentUnit) {
      const modelId = ctx.model?.id ?? "unknown";
-      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
+      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
      saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
    }
    await stopAuto(ctx, pi, dispatchResult.reason);
@ -2875,7 +2893,7 @@ async function dispatchNextUnit(
  if (lifetimeCount > MAX_LIFETIME_DISPATCHES) {
    if (currentUnit) {
      const modelId = ctx.model?.id ?? "unknown";
-      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
+      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
    }
    saveActivityLog(ctx, basePath, unitType, unitId);
    const expected = diagnoseExpectedArtifact(unitType, unitId, basePath);
@ -2889,7 +2907,7 @@ async function dispatchNextUnit(
  if (prevCount >= MAX_UNIT_DISPATCHES) {
    if (currentUnit) {
      const modelId = ctx.model?.id ?? "unknown";
-      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
+      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
    }
    saveActivityLog(ctx, basePath, unitType, unitId);

@ -3047,7 +3065,7 @@ async function dispatchNextUnit(
  // The session still holds the previous unit's data (newSession hasn't fired yet).
  if (currentUnit) {
    const modelId = ctx.model?.id ?? "unknown";
-    snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
+    snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
    const activityFile = saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);

    // Fire-and-forget memory extraction from completed unit
@ -3429,7 +3447,7 @@ async function dispatchNextUnit(

    if (currentUnit) {
      const modelId = ctx.model?.id ?? "unknown";
-      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
+      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
    }
    saveActivityLog(ctx, basePath, unitType, unitId);

@ -3455,7 +3473,7 @@ async function dispatchNextUnit(
        timeoutAt: Date.now(),
      });
      const modelId = ctx.model?.id ?? "unknown";
-      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
+      snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, buildSnapshotOpts(currentUnit.type, currentUnit.id));
    }
    saveActivityLog(ctx, basePath, unitType, unitId);

@ -3469,6 +3487,67 @@ async function dispatchNextUnit(
    await pauseAuto(ctx, pi);
  }, hardTimeoutMs);

+  // ── Continue-here context-pressure monitor ────────────────────────────
+  // Polls context usage every 15s. When usage hits the continue-here
+  // threshold (70%), sends a one-shot wrap-up signal so the agent finishes
+  // gracefully and the next unit gets a fresh session. This is softer than
+  // context_pause_threshold which hard-pauses auto-mode entirely.
+  if (continueHereHandle) {
+    clearInterval(continueHereHandle);
+    continueHereHandle = null;
+  }
+  const executorContextWindow = resolveExecutorContextWindow(
+    ctx.modelRegistry as Parameters<typeof resolveExecutorContextWindow>[0],
+    prefs as Parameters<typeof resolveExecutorContextWindow>[1],
+    ctx.model?.contextWindow,
+  );
+  const continueHereThreshold = computeBudgets(executorContextWindow).continueThresholdPercent;
+  continueHereHandle = setInterval(() => {
+    if (!active || !currentUnit || !cmdCtx) return;
+    // One-shot guard: skip if already fired for this unit
+    const runtime = readUnitRuntimeRecord(basePath, unitType, unitId);
+    if (runtime?.continueHereFired) return;
+
+    const contextUsage = cmdCtx.getContextUsage();
+    if (!contextUsage || contextUsage.percent == null || contextUsage.percent < continueHereThreshold) return;
+
+    // Fire once — mark runtime record and send wrap-up message
+    writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit!.startedAt, {
+      continueHereFired: true,
+    });
+
+    if (verbose) {
+      ctx.ui.notify(
+        `Context at ${contextUsage.percent}% (threshold: ${continueHereThreshold}%) — sending wrap-up signal.`,
+        "info",
+      );
+    }
+
+    pi.sendMessage(
+      {
+        customType: "gsd-auto-wrapup",
+        display: verbose,
+        content: [
+          "**CONTEXT BUDGET WARNING — wrap up this unit now.**",
+          `Context window is at ${contextUsage.percent}% (threshold: ${continueHereThreshold}%).`,
+          "The next unit needs a fresh context to work effectively. Wrap up now:",
+          "1. Finish any in-progress file writes",
+          "2. Write or update the required durable artifacts (summary, checkboxes)",
+          "3. Mark task state on disk correctly",
+          "4. Leave precise resume notes if anything remains unfinished",
+          "Do NOT start new sub-tasks or investigations.",
+        ].join("\n"),
+      },
+      { triggerTurn: true },
+    );
+
+    // Clear the interval after firing — no need to keep polling
+    if (continueHereHandle) {
+      clearInterval(continueHereHandle);
+      continueHereHandle = null;
+    }
+  }, 15_000);
+
  // Inject prompt — verify auto-mode still active (guards against race with timeout/pause)
  if (!active) return;
  pi.sendMessage(
--- a/src/resources/extensions/gsd/tests/continue-here.test.ts
+++ b/src/resources/extensions/gsd/tests/continue-here.test.ts
@ -201,4 +201,85 @@ describe("continue-here", () => {
      }
    });
  });
+
+  describe("context-pressure monitor integration", () => {
+    it("should fire wrap-up when context >= threshold and mark continueHereFired", async () => {
+      const { writeUnitRuntimeRecord, readUnitRuntimeRecord, clearUnitRuntimeRecord } = await import("../unit-runtime.js");
+      const fs = await import("node:fs");
+      const path = await import("node:path");
+      const os = await import("node:os");
+
+      const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "continue-here-monitor-"));
+      try {
+        // Simulate the monitor's one-shot logic:
+        // 1. Write initial runtime record (continueHereFired=false)
+        const startedAt = Date.now();
+        writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, {
+          phase: "dispatched",
+          wrapupWarningSent: false,
+        });
+
+        const budget = computeBudgets(128_000);
+        const threshold = budget.continueThresholdPercent;
+
+        // Simulate the monitor poll: context at 75% (above threshold)
+        const contextPercent = 75;
+        const runtime = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
+        assert.ok(runtime, "runtime record should exist");
+        assert.equal(runtime!.continueHereFired, false, "initially false");
+
+        // Check: should fire
+        const shouldFire = !runtime!.continueHereFired
+          && contextPercent >= threshold;
+        assert.ok(shouldFire, "should fire when context >= threshold and not yet fired");
+
+        // Mark as fired (what the monitor does)
+        writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, {
+          continueHereFired: true,
+        });
+
+        // Verify one-shot: second poll should NOT fire
+        const runtime2 = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
+        assert.ok(runtime2, "runtime record should still exist");
+        assert.equal(runtime2!.continueHereFired, true, "should be marked as fired");
+
+        const shouldFireAgain = !runtime2!.continueHereFired
+          && contextPercent >= threshold;
+        assert.equal(shouldFireAgain, false, "must not fire again — one-shot guard");
+
+        // Clean up
+        clearUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
+      } finally {
+        fs.rmSync(tmpDir, { recursive: true, force: true });
+      }
+    });
+
+    it("should not fire when context is below threshold", () => {
+      const budget = computeBudgets(200_000);
+      const threshold = budget.continueThresholdPercent;
+
+      // Simulate monitor poll with context at 50%
+      const contextPercent = 50;
+      const continueHereFired = false;
+      const shouldFire = !continueHereFired && contextPercent >= threshold;
+      assert.equal(shouldFire, false, "50% should not trigger continue-here");
+    });
+
+    it("should not fire when contextUsage is null/undefined", () => {
+      const budget = computeBudgets(128_000);
+      const threshold = budget.continueThresholdPercent;
+
+      // Simulate the full guard chain from the monitor
+      const usageUndefined = undefined as { percent: number | null } | undefined;
+      const shouldFire1 = usageUndefined != null
+        && usageUndefined.percent != null
+        && usageUndefined.percent >= threshold;
+      assert.equal(shouldFire1, false, "undefined usage must not fire");
+
+      const usageNullPercent: { percent: number | null } = { percent: null };
+      const shouldFire2 = usageNullPercent.percent != null
+        && usageNullPercent.percent >= threshold;
+      assert.equal(shouldFire2, false, "null percent must not fire");
+    });
+  });
 });