Merge pull request #4042 from mastertyko/fix/3760-forensics-session-aware-loops

fix(gsd): scope stuck-loop forensics to auto sessions
This commit is contained in:
Jeremy McSpadden 2026-04-12 08:46:35 -05:00 committed by GitHub
commit 7c45b5abf2
4 changed files with 99 additions and 8 deletions

View file

@ -549,11 +549,13 @@ function buildSnapshotOpts(
_unitType: string,
_unitId: string,
): {
autoSessionKey?: string;
continueHereFired?: boolean;
promptCharCount?: number;
baselineCharCount?: number;
} & Record<string, unknown> {
return {
...(s.autoStartTime > 0 ? { autoSessionKey: String(s.autoStartTime) } : {}),
promptCharCount: s.lastPromptCharCount,
baselineCharCount: s.lastBaselineCharCount,
...(s.currentUnitRouting ?? {}),

View file

@ -650,19 +650,33 @@ function getDbCompletionCounts(): DbCompletionCounts | null {
* Exported for testability.
*/
export function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomaly[]): void {
// First, collect unique startedAt values per type/id key
const dispatchMap = new Map<string, Set<number>>();
// First, collect unique startedAt values per type/id key, bucketed by
// autoSessionKey when available so cross-session recovery does not look
// like a within-session stuck loop.
const dispatchMap = new Map<string, Map<string, Set<number>>>();
for (const u of units) {
const key = `${u.type}/${u.id}`;
let starts = dispatchMap.get(key);
let sessionBuckets = dispatchMap.get(key);
if (!sessionBuckets) {
sessionBuckets = new Map();
dispatchMap.set(key, sessionBuckets);
}
const sessionKey = u.autoSessionKey ?? "__legacy__";
let starts = sessionBuckets.get(sessionKey);
if (!starts) {
starts = new Set();
dispatchMap.set(key, starts);
sessionBuckets.set(sessionKey, starts);
}
starts.add(u.startedAt);
}
for (const [key, starts] of dispatchMap) {
const count = starts.size;
for (const [key, sessionBuckets] of dispatchMap) {
const hasSessionAwareData = Array.from(sessionBuckets.keys()).some((sessionKey) => sessionKey !== "__legacy__");
const count = hasSessionAwareData
? Math.max(...Array.from(sessionBuckets.values(), (starts) => starts.size))
: (sessionBuckets.get("__legacy__")?.size ?? 0);
if (count > 1) {
const [unitType, ...idParts] = key.split("/");
anomalies.push({
@ -671,7 +685,9 @@ export function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomal
unitType,
unitId: idParts.join("/"),
summary: `Unit ${key} was dispatched ${count} times`,
details: `Repeated dispatch suggests the unit completed but its artifacts weren't verified, or the state machine kept returning it.`,
details: hasSessionAwareData
? `Repeated dispatch within the same auto session suggests the unit completed but its artifacts were not verified, or the state machine kept returning it. Cross-session recovery runs are ignored.`
: `Repeated dispatch suggests the unit completed but its artifacts weren't verified, or the state machine kept returning it.`,
});
}
}

View file

@ -41,6 +41,7 @@ export interface UnitMetrics {
model: string; // model ID used
startedAt: number; // ms timestamp
finishedAt: number; // ms timestamp
autoSessionKey?: string; // identifies one auto-mode run across pause/resume
tokens: TokenCounts;
cost: number; // total USD cost
toolCalls: number;
@ -133,7 +134,16 @@ export function snapshotUnitMetrics(
unitId: string,
startedAt: number,
model: string,
opts?: { tier?: string; modelDowngraded?: boolean; contextWindowTokens?: number; truncationSections?: number; continueHereFired?: boolean; promptCharCount?: number; baselineCharCount?: number },
opts?: {
tier?: string;
modelDowngraded?: boolean;
contextWindowTokens?: number;
truncationSections?: number;
continueHereFired?: boolean;
promptCharCount?: number;
baselineCharCount?: number;
autoSessionKey?: string;
},
): UnitMetrics | null {
if (!ledger) return null;
@ -181,6 +191,7 @@ export function snapshotUnitMetrics(
model,
startedAt,
finishedAt: Date.now(),
...(opts?.autoSessionKey ? { autoSessionKey: opts.autoSessionKey } : {}),
tokens,
cost,
toolCalls,

View file

@ -101,3 +101,65 @@ test("#1943 detectStuckLoops ignores watchdog duplicates but flags real re-dispa
assert.equal(anomalies.length, 1, `expected 1 anomaly (for the 3x dispatched task), got ${anomalies.length}`);
assert.ok(anomalies[0].summary.includes("3 times"));
});
test("#3760 detectStuckLoops ignores cross-session recovery re-dispatches", () => {
const anomalies: ForensicAnomaly[] = [];
const units: UnitMetrics[] = [
makeUnit({
type: "plan-slice",
id: "M001/S02",
startedAt: 1000,
finishedAt: 2000,
autoSessionKey: "session-a",
}),
makeUnit({
type: "plan-slice",
id: "M001/S02",
startedAt: 5000,
finishedAt: 6000,
autoSessionKey: "session-b",
}),
];
detectStuckLoops(units, anomalies);
assert.equal(anomalies.length, 0, "cross-session recovery should not be flagged as a stuck loop");
});
test("#3760 detectStuckLoops still flags repeated dispatches within one auto session", () => {
const anomalies: ForensicAnomaly[] = [];
const units: UnitMetrics[] = [
makeUnit({
type: "complete-slice",
id: "M011/S02",
startedAt: 1000,
finishedAt: 2000,
autoSessionKey: "session-a",
}),
makeUnit({
type: "complete-slice",
id: "M011/S02",
startedAt: 5000,
finishedAt: 6000,
autoSessionKey: "session-a",
}),
makeUnit({
type: "complete-slice",
id: "M011/S02",
startedAt: 9000,
finishedAt: 10000,
autoSessionKey: "session-b",
}),
];
detectStuckLoops(units, anomalies);
assert.equal(anomalies.length, 1, "within-session retries should still be flagged");
assert.ok(anomalies[0].summary.includes("2 times"), `summary should reflect the worst same-session loop: ${anomalies[0].summary}`);
assert.ok(
anomalies[0].details.includes("Cross-session recovery runs are ignored"),
`details should explain the session-aware rule: ${anomalies[0].details}`,
);
});