refactor(gsd): replace stuck counter with sliding-window detection (#1618)

Replace the crude sameUnitCount counter with a sliding window (size 6) that detects three stuck patterns: 1. Same error repeated twice in a row → stuck immediately 2. Same unit derived 3 consecutive times → stuck (was 5, now faster) 3. Oscillation pattern A→B→A→B → stuck (previously undetected) Graduated recovery preserved: first detection triggers cache invalidation + retry, second detection triggers hard stop. Exported detectStuck() function with 8 unit tests covering all rules plus edge cases (truncation, priority, non-triggers). Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 10:41:35 -06:00 · 2026-03-20 10:41:35 -06:00 · da0a8f31f7
commit da0a8f31f7
parent 5dbfc86882
2 changed files with 250 additions and 91 deletions
--- a/src/resources/extensions/gsd/auto-loop.ts
+++ b/src/resources/extensions/gsd/auto-loop.ts
@ -133,6 +133,63 @@ export function _setActiveSession(_session: AutoSession | null): void {
  // No-op — kept for test backward compatibility
 }

+// ─── detectStuck ─────────────────────────────────────────────────────────────
+
+type WindowEntry = { key: string; error?: string };
+
+/**
+ * Analyze a sliding window of recent unit dispatches for stuck patterns.
+ * Returns a signal with reason if stuck, null otherwise.
+ *
+ * Rule 1: Same error string twice in a row → stuck immediately.
+ * Rule 2: Same unit key 3+ consecutive times → stuck (preserves prior behavior).
+ * Rule 3: Oscillation A→B→A→B in last 4 entries → stuck.
+ */
+export function detectStuck(
+  window: readonly WindowEntry[],
+): { stuck: true; reason: string } | null {
+  if (window.length < 2) return null;
+
+  const last = window[window.length - 1];
+  const prev = window[window.length - 2];
+
+  // Rule 1: Same error repeated consecutively
+  if (last.error && prev.error && last.error === prev.error) {
+    return {
+      stuck: true,
+      reason: `Same error repeated: ${last.error.slice(0, 200)}`,
+    };
+  }
+
+  // Rule 2: Same unit 3+ consecutive times
+  if (window.length >= 3) {
+    const lastThree = window.slice(-3);
+    if (lastThree.every((u) => u.key === last.key)) {
+      return {
+        stuck: true,
+        reason: `${last.key} derived 3 consecutive times without progress`,
+      };
+    }
+  }
+
+  // Rule 3: Oscillation (A→B→A→B in last 4)
+  if (window.length >= 4) {
+    const w = window.slice(-4);
+    if (
+      w[0].key === w[2].key &&
+      w[1].key === w[3].key &&
+      w[0].key !== w[1].key
+    ) {
+      return {
+        stuck: true,
+        reason: `Oscillation detected: ${w[0].key} ↔ ${w[1].key}`,
+      };
+    }
+  }
+
+  return null;
+}
+
 // ─── runUnit ─────────────────────────────────────────────────────────────────

 /**
@ -601,8 +658,10 @@ export async function autoLoop(
 ): Promise<void> {
  debugLog("autoLoop", { phase: "enter" });
  let iteration = 0;
-  let lastDerivedUnit = "";
-  let sameUnitCount = 0;
+  // ── Sliding-window stuck detection ──
+  const recentUnits: Array<{ key: string; error?: string }> = [];
+  const STUCK_WINDOW_SIZE = 6;
+  let stuckRecoveryAttempts = 0;

  let consecutiveErrors = 0;

@ -750,8 +809,8 @@ export async function autoLoop(
        s.unitDispatchCount.clear();
        s.unitRecoveryCount.clear();
        s.unitLifetimeDispatches.clear();
-        lastDerivedUnit = "";
-        sameUnitCount = 0;
+        recentUnits.length = 0;
+        stuckRecoveryAttempts = 0;

        // Worktree lifecycle on milestone transition — merge current, enter next
        deps.resolver.mergeAndExit(s.currentMilestoneId!, ctx.ui);
@ -1077,71 +1136,79 @@ export async function autoLoop(
      let prompt = dispatchResult.prompt;
      const pauseAfterUatDispatch = dispatchResult.pauseAfterDispatch ?? false;

-      // ── Same-unit stuck counter with graduated recovery ──
+      // ── Sliding-window stuck detection with graduated recovery ──
      const derivedKey = `${unitType}/${unitId}`;
-      if (derivedKey === lastDerivedUnit && !s.pendingVerificationRetry) {
-        sameUnitCount++;
-        debugLog("autoLoop", {
-          phase: "stuck-check",
-          unitType,
-          unitId,
-          sameUnitCount,
-        });

-        if (sameUnitCount === 3) {
-          // Level 1: try verifying the artifact — maybe it was written but not detected
-          const artifactExists = deps.verifyExpectedArtifact(
+      if (!s.pendingVerificationRetry) {
+        recentUnits.push({ key: derivedKey });
+        if (recentUnits.length > STUCK_WINDOW_SIZE) recentUnits.shift();
+
+        const stuckSignal = detectStuck(recentUnits);
+        if (stuckSignal) {
+          debugLog("autoLoop", {
+            phase: "stuck-check",
            unitType,
            unitId,
-            s.basePath,
-          );
-          if (artifactExists) {
-            debugLog("autoLoop", {
-              phase: "stuck-recovery",
-              level: 1,
-              action: "artifact-found",
-            });
+            reason: stuckSignal.reason,
+            recoveryAttempts: stuckRecoveryAttempts,
+          });
+
+          if (stuckRecoveryAttempts === 0) {
+            // Level 1: try verifying the artifact, then cache invalidation + retry
+            stuckRecoveryAttempts++;
+            const artifactExists = deps.verifyExpectedArtifact(
+              unitType,
+              unitId,
+              s.basePath,
+            );
+            if (artifactExists) {
+              debugLog("autoLoop", {
+                phase: "stuck-recovery",
+                level: 1,
+                action: "artifact-found",
+              });
+              ctx.ui.notify(
+                `Stuck recovery: artifact for ${unitType} ${unitId} found on disk. Invalidating caches.`,
+                "info",
+              );
+              deps.invalidateAllCaches();
+              continue;
+            }
            ctx.ui.notify(
-              `Stuck recovery: artifact for ${unitType} ${unitId} found on disk. Invalidating caches.`,
-              "info",
+              `Stuck on ${unitType} ${unitId} (${stuckSignal.reason}). Invalidating caches and retrying.`,
+              "warning",
            );
            deps.invalidateAllCaches();
-            continue;
+          } else {
+            // Level 2: hard stop — genuinely stuck
+            debugLog("autoLoop", {
+              phase: "stuck-detected",
+              unitType,
+              unitId,
+              reason: stuckSignal.reason,
+            });
+            await deps.stopAuto(
+              ctx,
+              pi,
+              `Stuck: ${stuckSignal.reason}`,
+            );
+            ctx.ui.notify(
+              `Stuck on ${unitType} ${unitId} — ${stuckSignal.reason}. The expected artifact was not written.`,
+              "error",
+            );
+            break;
+          }
+        } else {
+          // Progress detected — reset recovery counter
+          if (stuckRecoveryAttempts > 0) {
+            debugLog("autoLoop", {
+              phase: "stuck-counter-reset",
+              from: recentUnits[recentUnits.length - 2]?.key ?? "",
+              to: derivedKey,
+            });
+            stuckRecoveryAttempts = 0;
          }
-          ctx.ui.notify(
-            `Stuck on ${unitType} ${unitId} (attempt ${sameUnitCount}). Invalidating caches and retrying.`,
-            "warning",
-          );
-          deps.invalidateAllCaches();
-        } else if (sameUnitCount === 5) {
-          // Level 2: hard stop — genuinely stuck
-          debugLog("autoLoop", {
-            phase: "stuck-detected",
-            unitType,
-            unitId,
-            sameUnitCount,
-          });
-          await deps.stopAuto(
-            ctx,
-            pi,
-            `Stuck: ${unitType} ${unitId} derived ${sameUnitCount} consecutive times without progress`,
-          );
-          ctx.ui.notify(
-            `Stuck on ${unitType} ${unitId} — deriveState returns the same unit after ${sameUnitCount} attempts. The expected artifact was not written.`,
-            "error",
-          );
-          break;
        }
-      } else {
-        if (derivedKey !== lastDerivedUnit) {
-          debugLog("autoLoop", {
-            phase: "stuck-counter-reset",
-            from: lastDerivedUnit,
-            to: derivedKey,
-          });
-        }
-        lastDerivedUnit = derivedKey;
-        sameUnitCount = 0;
      }

      // Pre-dispatch hooks
@ -1374,6 +1441,23 @@ export async function autoLoop(
        status: unitResult.status,
      });

+      // Tag the most recent window entry with error info for stuck detection
+      if (unitResult.status === "error" || unitResult.status === "cancelled") {
+        const lastEntry = recentUnits[recentUnits.length - 1];
+        if (lastEntry) {
+          lastEntry.error = `${unitResult.status}:${unitType}/${unitId}`;
+        }
+      } else if (unitResult.event?.messages?.length) {
+        const lastMsg = unitResult.event.messages[unitResult.event.messages.length - 1];
+        const msgStr = typeof lastMsg === "string" ? lastMsg : JSON.stringify(lastMsg);
+        if (/error|fail|exception/i.test(msgStr)) {
+          const lastEntry = recentUnits[recentUnits.length - 1];
+          if (lastEntry) {
+            lastEntry.error = msgStr.slice(0, 200);
+          }
+        }
+      }
+
      if (unitResult.status === "cancelled") {
        ctx.ui.notify(
          `Session creation timed out or was cancelled for ${unitType} ${unitId}. Will retry.`,
--- a/src/resources/extensions/gsd/tests/auto-loop.test.ts
+++ b/src/resources/extensions/gsd/tests/auto-loop.test.ts
@ -7,6 +7,7 @@ import {
  resolveAgentEnd,
  runUnit,
  autoLoop,
+  detectStuck,
  _resetPendingResolve,
  _setActiveSession,
  isSessionSwitchInFlight,
@ -1042,7 +1043,7 @@ test("handleAgentEnd in auto.ts is a thin wrapper calling resolveAgentEnd", () =

 // ── Stuck counter tests ──────────────────────────────────────────────────────

-test("stuck counter: stops when deriveState returns same unit 5 consecutive times", async () => {
+test("stuck detection: stops when sliding window detects same unit 3 consecutive times", async () => {
  _resetPendingResolve();

  const ctx = makeMockCtx();
@ -1077,20 +1078,15 @@ test("stuck counter: stops when deriveState returns same unit 5 consecutive time

  const loopPromise = autoLoop(ctx, pi, s, deps);

-  // The loop will dispatch the same unit each iteration. On iteration 1, sameUnitCount
-  // starts at 0 and the unit key is set. On iterations 2-5, sameUnitCount increments.
-  // At sameUnitCount=5 (iteration 6), stopAuto is called.
-  // Each iteration requires resolving an agent_end event.
-  // But the stuck counter fires BEFORE runUnit, so we only need to resolve 4 times
-  // (iterations 1-4 each run a unit, iteration 5 increments to 5 and stops).
+  // Sliding window: iteration 1 pushes [A], iteration 2 pushes [A,A],
+  // iteration 3 pushes [A,A,A] → Rule 2 fires (3 consecutive) → Level 1 recovery.
+  // Level 1 invalidates caches and continues. Iteration 4 pushes [A,A,A,A] →
+  // Rule 2 fires again → Level 2 hard stop.
+  // Iterations 1-3 each run a unit (3 resolves needed). Iteration 3 triggers
+  // Level 1 (cache invalidation + continue). Iteration 4 triggers Level 2 (stop
+  // before runUnit), so no 4th resolve needed.

-  // Actually: iteration 1 sets lastDerivedUnit (sameUnitCount=0).
-  // Iteration 2: derivedKey === lastDerivedUnit → sameUnitCount=1.
-  // Iteration 3: sameUnitCount=2. Iteration 4: sameUnitCount=3.
-  // Iteration 5: sameUnitCount=4. Iteration 6: sameUnitCount=5 → stop.
-  // So we need to resolve 5 agent_end events (iterations 1-5 each run a unit).
-
-  for (let i = 0; i < 5; i++) {
+  for (let i = 0; i < 3; i++) {
    await new Promise((r) => setTimeout(r, 30));
    resolveAgentEnd(makeEvent());
  }
@ -1105,17 +1101,13 @@ test("stuck counter: stops when deriveState returns same unit 5 consecutive time
    stopReason.includes("Stuck"),
    `stop reason should mention 'Stuck', got: ${stopReason}`,
  );
-  assert.ok(
-    stopReason.includes("execute-task"),
-    "stop reason should include unitType",
-  );
  assert.ok(
    stopReason.includes("M001/S01/T01"),
    "stop reason should include unitId",
  );
 });

-test("stuck counter: resets when deriveState returns a different unit", async () => {
+test("stuck detection: window resets recovery when deriveState returns a different unit", async () => {
  _resetPendingResolve();

  const ctx = makeMockCtx();
@ -1176,10 +1168,11 @@ test("stuck counter: resets when deriveState returns a different unit", async ()

  await loopPromise;

-  // The counter should have reset when T02 was derived — no stuck stop
+  // Level 1 recovery fires on iteration 3 (cache invalidation + continue),
+  // then iteration 4 derives T02 — no Level 2 hard stop.
  assert.ok(
    !stopCalled,
-    "stopAuto should NOT have been called — counter reset on unit change",
+    "stopAuto should NOT have been called — different unit broke stuck pattern",
  );
  assert.ok(
    deriveCallCount >= 4,
@ -1187,7 +1180,7 @@ test("stuck counter: resets when deriveState returns a different unit", async ()
  );
 });

-test("stuck counter: does not increment during verification retry", async () => {
+test("stuck detection: does not push to window during verification retry", async () => {
  _resetPendingResolve();

  const ctx = makeMockCtx();
@ -1249,10 +1242,10 @@ test("stuck counter: does not increment during verification retry", async () =>
  await loopPromise;

  // Even though same unit was derived 4 times, verification retries should
-  // not count, so stuck counter should not have fired
+  // not push to the sliding window, so stuck detection should not have fired
  assert.ok(
    !stopReason.includes("Stuck"),
-    `stuck counter should not fire during verification retries, got: ${stopReason}`,
+    `stuck detection should not fire during verification retries, got: ${stopReason}`,
  );
  assert.equal(
    verifyCallCount,
@ -1261,24 +1254,106 @@ test("stuck counter: does not increment during verification retry", async () =>
  );
 });

-test("stuck counter: logs debug output with stuck-detected phase", () => {
-  // Structural test: verify the auto-loop.ts source contains both
-  // stuck-detected and stuck-counter-reset debug log phases
+// ── detectStuck unit tests ────────────────────────────────────────────────────
+
+test("detectStuck: returns null for fewer than 2 entries", () => {
+  assert.equal(detectStuck([]), null);
+  assert.equal(detectStuck([{ key: "A" }]), null);
+});
+
+test("detectStuck: Rule 1 — same error twice in a row", () => {
+  const result = detectStuck([
+    { key: "A", error: "ENOENT: file not found" },
+    { key: "A", error: "ENOENT: file not found" },
+  ]);
+  assert.ok(result?.stuck, "should detect same error repeated");
+  assert.ok(result?.reason.includes("Same error repeated"));
+});
+
+test("detectStuck: Rule 1 — different errors do not trigger", () => {
+  const result = detectStuck([
+    { key: "A", error: "ENOENT: file not found" },
+    { key: "A", error: "EACCES: permission denied" },
+  ]);
+  assert.equal(result, null);
+});
+
+test("detectStuck: Rule 2 — same unit 3 consecutive times", () => {
+  const result = detectStuck([
+    { key: "execute-task/M001/S01/T01" },
+    { key: "execute-task/M001/S01/T01" },
+    { key: "execute-task/M001/S01/T01" },
+  ]);
+  assert.ok(result?.stuck);
+  assert.ok(result?.reason.includes("3 consecutive times"));
+});
+
+test("detectStuck: Rule 2 — 2 consecutive does not trigger", () => {
+  assert.equal(detectStuck([
+    { key: "A" },
+    { key: "A" },
+  ]), null);
+});
+
+test("detectStuck: Rule 3 — oscillation A→B→A→B", () => {
+  const result = detectStuck([
+    { key: "A" },
+    { key: "B" },
+    { key: "A" },
+    { key: "B" },
+  ]);
+  assert.ok(result?.stuck);
+  assert.ok(result?.reason.includes("Oscillation"));
+});
+
+test("detectStuck: Rule 3 — non-oscillation pattern A→B→C→B", () => {
+  assert.equal(detectStuck([
+    { key: "A" },
+    { key: "B" },
+    { key: "C" },
+    { key: "B" },
+  ]), null);
+});
+
+test("detectStuck: Rule 1 takes priority over Rule 2 when both match", () => {
+  const result = detectStuck([
+    { key: "A", error: "test error" },
+    { key: "A", error: "test error" },
+    { key: "A", error: "test error" },
+  ]);
+  assert.ok(result?.stuck);
+  // Rule 1 fires first
+  assert.ok(result?.reason.includes("Same error repeated"));
+});
+
+test("detectStuck: truncates long error strings", () => {
+  const longError = "x".repeat(500);
+  const result = detectStuck([
+    { key: "A", error: longError },
+    { key: "A", error: longError },
+  ]);
+  assert.ok(result?.stuck);
+  assert.ok(result!.reason.length < 300, "reason should be truncated");
+});
+
+test("stuck detection: logs debug output with stuck-detected phase", () => {
+  // Structural test: verify the auto-loop.ts source contains
+  // stuck-detected and stuck-counter-reset debug log phases, plus detectStuck
  const src = readFileSync(
    resolve(import.meta.dirname, "..", "auto-loop.ts"),
    "utf-8",
  );
  assert.ok(
    src.includes('"stuck-detected"'),
-    "auto-loop.ts must log phase: 'stuck-detected' when stuck counter fires",
+    "auto-loop.ts must log phase: 'stuck-detected' when stuck detection fires",
  );
  assert.ok(
    src.includes('"stuck-counter-reset"'),
-    "auto-loop.ts must log phase: 'stuck-counter-reset' when counter resets on new unit",
+    "auto-loop.ts must log phase: 'stuck-counter-reset' when recovery resets on new unit",
  );
  assert.ok(
-    src.includes("sameUnitCount"),
-    "auto-loop.ts must track sameUnitCount for stuck detection",
+    src.includes("detectStuck"),
+    "auto-loop.ts must use detectStuck for sliding window analysis",
  );
 });