Merge pull request #4042 from mastertyko/fix/3760-forensics-session-aware-loops
fix(gsd): scope stuck-loop forensics to auto sessions
This commit is contained in:
commit
7c45b5abf2
4 changed files with 99 additions and 8 deletions
|
|
@ -549,11 +549,13 @@ function buildSnapshotOpts(
|
|||
_unitType: string,
|
||||
_unitId: string,
|
||||
): {
|
||||
autoSessionKey?: string;
|
||||
continueHereFired?: boolean;
|
||||
promptCharCount?: number;
|
||||
baselineCharCount?: number;
|
||||
} & Record<string, unknown> {
|
||||
return {
|
||||
...(s.autoStartTime > 0 ? { autoSessionKey: String(s.autoStartTime) } : {}),
|
||||
promptCharCount: s.lastPromptCharCount,
|
||||
baselineCharCount: s.lastBaselineCharCount,
|
||||
...(s.currentUnitRouting ?? {}),
|
||||
|
|
|
|||
|
|
@ -650,19 +650,33 @@ function getDbCompletionCounts(): DbCompletionCounts | null {
|
|||
* Exported for testability.
|
||||
*/
|
||||
export function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomaly[]): void {
|
||||
// First, collect unique startedAt values per type/id key
|
||||
const dispatchMap = new Map<string, Set<number>>();
|
||||
// First, collect unique startedAt values per type/id key, bucketed by
|
||||
// autoSessionKey when available so cross-session recovery does not look
|
||||
// like a within-session stuck loop.
|
||||
const dispatchMap = new Map<string, Map<string, Set<number>>>();
|
||||
for (const u of units) {
|
||||
const key = `${u.type}/${u.id}`;
|
||||
let starts = dispatchMap.get(key);
|
||||
let sessionBuckets = dispatchMap.get(key);
|
||||
if (!sessionBuckets) {
|
||||
sessionBuckets = new Map();
|
||||
dispatchMap.set(key, sessionBuckets);
|
||||
}
|
||||
|
||||
const sessionKey = u.autoSessionKey ?? "__legacy__";
|
||||
let starts = sessionBuckets.get(sessionKey);
|
||||
if (!starts) {
|
||||
starts = new Set();
|
||||
dispatchMap.set(key, starts);
|
||||
sessionBuckets.set(sessionKey, starts);
|
||||
}
|
||||
starts.add(u.startedAt);
|
||||
}
|
||||
for (const [key, starts] of dispatchMap) {
|
||||
const count = starts.size;
|
||||
|
||||
for (const [key, sessionBuckets] of dispatchMap) {
|
||||
const hasSessionAwareData = Array.from(sessionBuckets.keys()).some((sessionKey) => sessionKey !== "__legacy__");
|
||||
const count = hasSessionAwareData
|
||||
? Math.max(...Array.from(sessionBuckets.values(), (starts) => starts.size))
|
||||
: (sessionBuckets.get("__legacy__")?.size ?? 0);
|
||||
|
||||
if (count > 1) {
|
||||
const [unitType, ...idParts] = key.split("/");
|
||||
anomalies.push({
|
||||
|
|
@ -671,7 +685,9 @@ export function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomal
|
|||
unitType,
|
||||
unitId: idParts.join("/"),
|
||||
summary: `Unit ${key} was dispatched ${count} times`,
|
||||
details: `Repeated dispatch suggests the unit completed but its artifacts weren't verified, or the state machine kept returning it.`,
|
||||
details: hasSessionAwareData
|
||||
? `Repeated dispatch within the same auto session suggests the unit completed but its artifacts were not verified, or the state machine kept returning it. Cross-session recovery runs are ignored.`
|
||||
: `Repeated dispatch suggests the unit completed but its artifacts weren't verified, or the state machine kept returning it.`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ export interface UnitMetrics {
|
|||
model: string; // model ID used
|
||||
startedAt: number; // ms timestamp
|
||||
finishedAt: number; // ms timestamp
|
||||
autoSessionKey?: string; // identifies one auto-mode run across pause/resume
|
||||
tokens: TokenCounts;
|
||||
cost: number; // total USD cost
|
||||
toolCalls: number;
|
||||
|
|
@ -133,7 +134,16 @@ export function snapshotUnitMetrics(
|
|||
unitId: string,
|
||||
startedAt: number,
|
||||
model: string,
|
||||
opts?: { tier?: string; modelDowngraded?: boolean; contextWindowTokens?: number; truncationSections?: number; continueHereFired?: boolean; promptCharCount?: number; baselineCharCount?: number },
|
||||
opts?: {
|
||||
tier?: string;
|
||||
modelDowngraded?: boolean;
|
||||
contextWindowTokens?: number;
|
||||
truncationSections?: number;
|
||||
continueHereFired?: boolean;
|
||||
promptCharCount?: number;
|
||||
baselineCharCount?: number;
|
||||
autoSessionKey?: string;
|
||||
},
|
||||
): UnitMetrics | null {
|
||||
if (!ledger) return null;
|
||||
|
||||
|
|
@ -181,6 +191,7 @@ export function snapshotUnitMetrics(
|
|||
model,
|
||||
startedAt,
|
||||
finishedAt: Date.now(),
|
||||
...(opts?.autoSessionKey ? { autoSessionKey: opts.autoSessionKey } : {}),
|
||||
tokens,
|
||||
cost,
|
||||
toolCalls,
|
||||
|
|
|
|||
|
|
@ -101,3 +101,65 @@ test("#1943 detectStuckLoops ignores watchdog duplicates but flags real re-dispa
|
|||
assert.equal(anomalies.length, 1, `expected 1 anomaly (for the 3x dispatched task), got ${anomalies.length}`);
|
||||
assert.ok(anomalies[0].summary.includes("3 times"));
|
||||
});
|
||||
|
||||
test("#3760 detectStuckLoops ignores cross-session recovery re-dispatches", () => {
|
||||
const anomalies: ForensicAnomaly[] = [];
|
||||
|
||||
const units: UnitMetrics[] = [
|
||||
makeUnit({
|
||||
type: "plan-slice",
|
||||
id: "M001/S02",
|
||||
startedAt: 1000,
|
||||
finishedAt: 2000,
|
||||
autoSessionKey: "session-a",
|
||||
}),
|
||||
makeUnit({
|
||||
type: "plan-slice",
|
||||
id: "M001/S02",
|
||||
startedAt: 5000,
|
||||
finishedAt: 6000,
|
||||
autoSessionKey: "session-b",
|
||||
}),
|
||||
];
|
||||
|
||||
detectStuckLoops(units, anomalies);
|
||||
|
||||
assert.equal(anomalies.length, 0, "cross-session recovery should not be flagged as a stuck loop");
|
||||
});
|
||||
|
||||
test("#3760 detectStuckLoops still flags repeated dispatches within one auto session", () => {
|
||||
const anomalies: ForensicAnomaly[] = [];
|
||||
|
||||
const units: UnitMetrics[] = [
|
||||
makeUnit({
|
||||
type: "complete-slice",
|
||||
id: "M011/S02",
|
||||
startedAt: 1000,
|
||||
finishedAt: 2000,
|
||||
autoSessionKey: "session-a",
|
||||
}),
|
||||
makeUnit({
|
||||
type: "complete-slice",
|
||||
id: "M011/S02",
|
||||
startedAt: 5000,
|
||||
finishedAt: 6000,
|
||||
autoSessionKey: "session-a",
|
||||
}),
|
||||
makeUnit({
|
||||
type: "complete-slice",
|
||||
id: "M011/S02",
|
||||
startedAt: 9000,
|
||||
finishedAt: 10000,
|
||||
autoSessionKey: "session-b",
|
||||
}),
|
||||
];
|
||||
|
||||
detectStuckLoops(units, anomalies);
|
||||
|
||||
assert.equal(anomalies.length, 1, "within-session retries should still be flagged");
|
||||
assert.ok(anomalies[0].summary.includes("2 times"), `summary should reflect the worst same-session loop: ${anomalies[0].summary}`);
|
||||
assert.ok(
|
||||
anomalies[0].details.includes("Cross-session recovery runs are ignored"),
|
||||
`details should explain the session-aware rule: ${anomalies[0].details}`,
|
||||
);
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue