Merge pull request #4042 from mastertyko/fix/3760-forensics-session-aware-loops

fix(gsd): scope stuck-loop forensics to auto sessions
2026-04-12 08:46:35 -05:00 · 2026-04-12 08:46:35 -05:00 · 7c45b5abf2
commit 7c45b5abf2
parent 343dc8a675 e987734559
4 changed files with 99 additions and 8 deletions
--- a/src/resources/extensions/gsd/auto.ts
+++ b/src/resources/extensions/gsd/auto.ts
@ -549,11 +549,13 @@ function buildSnapshotOpts(
  _unitType: string,
  _unitId: string,
 ): {
+  autoSessionKey?: string;
  continueHereFired?: boolean;
  promptCharCount?: number;
  baselineCharCount?: number;
 } & Record<string, unknown> {
  return {
+    ...(s.autoStartTime > 0 ? { autoSessionKey: String(s.autoStartTime) } : {}),
    promptCharCount: s.lastPromptCharCount,
    baselineCharCount: s.lastBaselineCharCount,
    ...(s.currentUnitRouting ?? {}),
--- a/src/resources/extensions/gsd/forensics.ts
+++ b/src/resources/extensions/gsd/forensics.ts
@ -650,19 +650,33 @@ function getDbCompletionCounts(): DbCompletionCounts | null {
 * Exported for testability.
 */
 export function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomaly[]): void {
-  // First, collect unique startedAt values per type/id key
-  const dispatchMap = new Map<string, Set<number>>();
+  // First, collect unique startedAt values per type/id key, bucketed by
+  // autoSessionKey when available so cross-session recovery does not look
+  // like a within-session stuck loop.
+  const dispatchMap = new Map<string, Map<string, Set<number>>>();
  for (const u of units) {
    const key = `${u.type}/${u.id}`;
-    let starts = dispatchMap.get(key);
+    let sessionBuckets = dispatchMap.get(key);
+    if (!sessionBuckets) {
+      sessionBuckets = new Map();
+      dispatchMap.set(key, sessionBuckets);
+    }
+
+    const sessionKey = u.autoSessionKey ?? "__legacy__";
+    let starts = sessionBuckets.get(sessionKey);
    if (!starts) {
      starts = new Set();
-      dispatchMap.set(key, starts);
+      sessionBuckets.set(sessionKey, starts);
    }
    starts.add(u.startedAt);
  }
-  for (const [key, starts] of dispatchMap) {
-    const count = starts.size;
+
+  for (const [key, sessionBuckets] of dispatchMap) {
+    const hasSessionAwareData = Array.from(sessionBuckets.keys()).some((sessionKey) => sessionKey !== "__legacy__");
+    const count = hasSessionAwareData
+      ? Math.max(...Array.from(sessionBuckets.values(), (starts) => starts.size))
+      : (sessionBuckets.get("__legacy__")?.size ?? 0);
+
    if (count > 1) {
      const [unitType, ...idParts] = key.split("/");
      anomalies.push({
@ -671,7 +685,9 @@ export function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomal
        unitType,
        unitId: idParts.join("/"),
        summary: `Unit ${key} was dispatched ${count} times`,
-        details: `Repeated dispatch suggests the unit completed but its artifacts weren't verified, or the state machine kept returning it.`,
+        details: hasSessionAwareData
+          ? `Repeated dispatch within the same auto session suggests the unit completed but its artifacts were not verified, or the state machine kept returning it. Cross-session recovery runs are ignored.`
+          : `Repeated dispatch suggests the unit completed but its artifacts weren't verified, or the state machine kept returning it.`,
      });
    }
  }
--- a/src/resources/extensions/gsd/metrics.ts
+++ b/src/resources/extensions/gsd/metrics.ts
@ -41,6 +41,7 @@ export interface UnitMetrics {
  model: string;           // model ID used
  startedAt: number;       // ms timestamp
  finishedAt: number;      // ms timestamp
+  autoSessionKey?: string; // identifies one auto-mode run across pause/resume
  tokens: TokenCounts;
  cost: number;            // total USD cost
  toolCalls: number;
@ -133,7 +134,16 @@ export function snapshotUnitMetrics(
  unitId: string,
  startedAt: number,
  model: string,
-  opts?: { tier?: string; modelDowngraded?: boolean; contextWindowTokens?: number; truncationSections?: number; continueHereFired?: boolean; promptCharCount?: number; baselineCharCount?: number },
+  opts?: {
+    tier?: string;
+    modelDowngraded?: boolean;
+    contextWindowTokens?: number;
+    truncationSections?: number;
+    continueHereFired?: boolean;
+    promptCharCount?: number;
+    baselineCharCount?: number;
+    autoSessionKey?: string;
+  },
 ): UnitMetrics | null {
  if (!ledger) return null;

@ -181,6 +191,7 @@ export function snapshotUnitMetrics(
    model,
    startedAt,
    finishedAt: Date.now(),
+    ...(opts?.autoSessionKey ? { autoSessionKey: opts.autoSessionKey } : {}),
    tokens,
    cost,
    toolCalls,
--- a/src/resources/extensions/gsd/tests/forensics-stuck-loops.test.ts
+++ b/src/resources/extensions/gsd/tests/forensics-stuck-loops.test.ts
@ -101,3 +101,65 @@ test("#1943 detectStuckLoops ignores watchdog duplicates but flags real re-dispa
  assert.equal(anomalies.length, 1, `expected 1 anomaly (for the 3x dispatched task), got ${anomalies.length}`);
  assert.ok(anomalies[0].summary.includes("3 times"));
 });
+
+test("#3760 detectStuckLoops ignores cross-session recovery re-dispatches", () => {
+  const anomalies: ForensicAnomaly[] = [];
+
+  const units: UnitMetrics[] = [
+    makeUnit({
+      type: "plan-slice",
+      id: "M001/S02",
+      startedAt: 1000,
+      finishedAt: 2000,
+      autoSessionKey: "session-a",
+    }),
+    makeUnit({
+      type: "plan-slice",
+      id: "M001/S02",
+      startedAt: 5000,
+      finishedAt: 6000,
+      autoSessionKey: "session-b",
+    }),
+  ];
+
+  detectStuckLoops(units, anomalies);
+
+  assert.equal(anomalies.length, 0, "cross-session recovery should not be flagged as a stuck loop");
+});
+
+test("#3760 detectStuckLoops still flags repeated dispatches within one auto session", () => {
+  const anomalies: ForensicAnomaly[] = [];
+
+  const units: UnitMetrics[] = [
+    makeUnit({
+      type: "complete-slice",
+      id: "M011/S02",
+      startedAt: 1000,
+      finishedAt: 2000,
+      autoSessionKey: "session-a",
+    }),
+    makeUnit({
+      type: "complete-slice",
+      id: "M011/S02",
+      startedAt: 5000,
+      finishedAt: 6000,
+      autoSessionKey: "session-a",
+    }),
+    makeUnit({
+      type: "complete-slice",
+      id: "M011/S02",
+      startedAt: 9000,
+      finishedAt: 10000,
+      autoSessionKey: "session-b",
+    }),
+  ];
+
+  detectStuckLoops(units, anomalies);
+
+  assert.equal(anomalies.length, 1, "within-session retries should still be flagged");
+  assert.ok(anomalies[0].summary.includes("2 times"), `summary should reflect the worst same-session loop: ${anomalies[0].summary}`);
+  assert.ok(
+    anomalies[0].details.includes("Cross-session recovery runs are ignored"),
+    `details should explain the session-aware rule: ${anomalies[0].details}`,
+  );
+});