From e98773455973221f2a668dc3fc7c410a1b1264b2 Mon Sep 17 00:00:00 2001 From: mastertyko <11311479+mastertyko@users.noreply.github.com> Date: Sun, 12 Apr 2026 14:00:01 +0200 Subject: [PATCH] fix(gsd): scope stuck-loop forensics to auto sessions --- src/resources/extensions/gsd/auto.ts | 2 + src/resources/extensions/gsd/forensics.ts | 30 ++++++--- src/resources/extensions/gsd/metrics.ts | 13 +++- .../gsd/tests/forensics-stuck-loops.test.ts | 62 +++++++++++++++++++ 4 files changed, 99 insertions(+), 8 deletions(-) diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 1b8d4fd47..a39d05a5f 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -549,11 +549,13 @@ function buildSnapshotOpts( _unitType: string, _unitId: string, ): { + autoSessionKey?: string; continueHereFired?: boolean; promptCharCount?: number; baselineCharCount?: number; } & Record { return { + ...(s.autoStartTime > 0 ? { autoSessionKey: String(s.autoStartTime) } : {}), promptCharCount: s.lastPromptCharCount, baselineCharCount: s.lastBaselineCharCount, ...(s.currentUnitRouting ?? {}), diff --git a/src/resources/extensions/gsd/forensics.ts b/src/resources/extensions/gsd/forensics.ts index ba2746f8b..76be923d8 100644 --- a/src/resources/extensions/gsd/forensics.ts +++ b/src/resources/extensions/gsd/forensics.ts @@ -650,19 +650,33 @@ function getDbCompletionCounts(): DbCompletionCounts | null { * Exported for testability. */ export function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomaly[]): void { - // First, collect unique startedAt values per type/id key - const dispatchMap = new Map>(); + // First, collect unique startedAt values per type/id key, bucketed by + // autoSessionKey when available so cross-session recovery does not look + // like a within-session stuck loop. + const dispatchMap = new Map>>(); for (const u of units) { const key = `${u.type}/${u.id}`; - let starts = dispatchMap.get(key); + let sessionBuckets = dispatchMap.get(key); + if (!sessionBuckets) { + sessionBuckets = new Map(); + dispatchMap.set(key, sessionBuckets); + } + + const sessionKey = u.autoSessionKey ?? "__legacy__"; + let starts = sessionBuckets.get(sessionKey); if (!starts) { starts = new Set(); - dispatchMap.set(key, starts); + sessionBuckets.set(sessionKey, starts); } starts.add(u.startedAt); } - for (const [key, starts] of dispatchMap) { - const count = starts.size; + + for (const [key, sessionBuckets] of dispatchMap) { + const hasSessionAwareData = Array.from(sessionBuckets.keys()).some((sessionKey) => sessionKey !== "__legacy__"); + const count = hasSessionAwareData + ? Math.max(...Array.from(sessionBuckets.values(), (starts) => starts.size)) + : (sessionBuckets.get("__legacy__")?.size ?? 0); + if (count > 1) { const [unitType, ...idParts] = key.split("/"); anomalies.push({ @@ -671,7 +685,9 @@ export function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomal unitType, unitId: idParts.join("/"), summary: `Unit ${key} was dispatched ${count} times`, - details: `Repeated dispatch suggests the unit completed but its artifacts weren't verified, or the state machine kept returning it.`, + details: hasSessionAwareData + ? `Repeated dispatch within the same auto session suggests the unit completed but its artifacts were not verified, or the state machine kept returning it. Cross-session recovery runs are ignored.` + : `Repeated dispatch suggests the unit completed but its artifacts weren't verified, or the state machine kept returning it.`, }); } } diff --git a/src/resources/extensions/gsd/metrics.ts b/src/resources/extensions/gsd/metrics.ts index a29d4f39d..85f3484bb 100644 --- a/src/resources/extensions/gsd/metrics.ts +++ b/src/resources/extensions/gsd/metrics.ts @@ -41,6 +41,7 @@ export interface UnitMetrics { model: string; // model ID used startedAt: number; // ms timestamp finishedAt: number; // ms timestamp + autoSessionKey?: string; // identifies one auto-mode run across pause/resume tokens: TokenCounts; cost: number; // total USD cost toolCalls: number; @@ -133,7 +134,16 @@ export function snapshotUnitMetrics( unitId: string, startedAt: number, model: string, - opts?: { tier?: string; modelDowngraded?: boolean; contextWindowTokens?: number; truncationSections?: number; continueHereFired?: boolean; promptCharCount?: number; baselineCharCount?: number }, + opts?: { + tier?: string; + modelDowngraded?: boolean; + contextWindowTokens?: number; + truncationSections?: number; + continueHereFired?: boolean; + promptCharCount?: number; + baselineCharCount?: number; + autoSessionKey?: string; + }, ): UnitMetrics | null { if (!ledger) return null; @@ -181,6 +191,7 @@ export function snapshotUnitMetrics( model, startedAt, finishedAt: Date.now(), + ...(opts?.autoSessionKey ? { autoSessionKey: opts.autoSessionKey } : {}), tokens, cost, toolCalls, diff --git a/src/resources/extensions/gsd/tests/forensics-stuck-loops.test.ts b/src/resources/extensions/gsd/tests/forensics-stuck-loops.test.ts index 555570bab..8b03e0cf7 100644 --- a/src/resources/extensions/gsd/tests/forensics-stuck-loops.test.ts +++ b/src/resources/extensions/gsd/tests/forensics-stuck-loops.test.ts @@ -101,3 +101,65 @@ test("#1943 detectStuckLoops ignores watchdog duplicates but flags real re-dispa assert.equal(anomalies.length, 1, `expected 1 anomaly (for the 3x dispatched task), got ${anomalies.length}`); assert.ok(anomalies[0].summary.includes("3 times")); }); + +test("#3760 detectStuckLoops ignores cross-session recovery re-dispatches", () => { + const anomalies: ForensicAnomaly[] = []; + + const units: UnitMetrics[] = [ + makeUnit({ + type: "plan-slice", + id: "M001/S02", + startedAt: 1000, + finishedAt: 2000, + autoSessionKey: "session-a", + }), + makeUnit({ + type: "plan-slice", + id: "M001/S02", + startedAt: 5000, + finishedAt: 6000, + autoSessionKey: "session-b", + }), + ]; + + detectStuckLoops(units, anomalies); + + assert.equal(anomalies.length, 0, "cross-session recovery should not be flagged as a stuck loop"); +}); + +test("#3760 detectStuckLoops still flags repeated dispatches within one auto session", () => { + const anomalies: ForensicAnomaly[] = []; + + const units: UnitMetrics[] = [ + makeUnit({ + type: "complete-slice", + id: "M011/S02", + startedAt: 1000, + finishedAt: 2000, + autoSessionKey: "session-a", + }), + makeUnit({ + type: "complete-slice", + id: "M011/S02", + startedAt: 5000, + finishedAt: 6000, + autoSessionKey: "session-a", + }), + makeUnit({ + type: "complete-slice", + id: "M011/S02", + startedAt: 9000, + finishedAt: 10000, + autoSessionKey: "session-b", + }), + ]; + + detectStuckLoops(units, anomalies); + + assert.equal(anomalies.length, 1, "within-session retries should still be flagged"); + assert.ok(anomalies[0].summary.includes("2 times"), `summary should reflect the worst same-session loop: ${anomalies[0].summary}`); + assert.ok( + anomalies[0].details.includes("Cross-session recovery runs are ignored"), + `details should explain the session-aware rule: ${anomalies[0].details}`, + ); +});