refactor(gsd): replace stuck counter with sliding-window detection (#1618)
Replace the crude sameUnitCount counter with a sliding window (size 6) that detects three stuck patterns: 1. Same error repeated twice in a row → stuck immediately 2. Same unit derived 3 consecutive times → stuck (was 5, now faster) 3. Oscillation pattern A→B→A→B → stuck (previously undetected) Graduated recovery preserved: first detection triggers cache invalidation + retry, second detection triggers hard stop. Exported detectStuck() function with 8 unit tests covering all rules plus edge cases (truncation, priority, non-triggers). Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
5dbfc86882
commit
da0a8f31f7
2 changed files with 250 additions and 91 deletions
|
|
@ -133,6 +133,63 @@ export function _setActiveSession(_session: AutoSession | null): void {
|
|||
// No-op — kept for test backward compatibility
|
||||
}
|
||||
|
||||
// ─── detectStuck ─────────────────────────────────────────────────────────────
|
||||
|
||||
type WindowEntry = { key: string; error?: string };
|
||||
|
||||
/**
|
||||
* Analyze a sliding window of recent unit dispatches for stuck patterns.
|
||||
* Returns a signal with reason if stuck, null otherwise.
|
||||
*
|
||||
* Rule 1: Same error string twice in a row → stuck immediately.
|
||||
* Rule 2: Same unit key 3+ consecutive times → stuck (preserves prior behavior).
|
||||
* Rule 3: Oscillation A→B→A→B in last 4 entries → stuck.
|
||||
*/
|
||||
export function detectStuck(
|
||||
window: readonly WindowEntry[],
|
||||
): { stuck: true; reason: string } | null {
|
||||
if (window.length < 2) return null;
|
||||
|
||||
const last = window[window.length - 1];
|
||||
const prev = window[window.length - 2];
|
||||
|
||||
// Rule 1: Same error repeated consecutively
|
||||
if (last.error && prev.error && last.error === prev.error) {
|
||||
return {
|
||||
stuck: true,
|
||||
reason: `Same error repeated: ${last.error.slice(0, 200)}`,
|
||||
};
|
||||
}
|
||||
|
||||
// Rule 2: Same unit 3+ consecutive times
|
||||
if (window.length >= 3) {
|
||||
const lastThree = window.slice(-3);
|
||||
if (lastThree.every((u) => u.key === last.key)) {
|
||||
return {
|
||||
stuck: true,
|
||||
reason: `${last.key} derived 3 consecutive times without progress`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Rule 3: Oscillation (A→B→A→B in last 4)
|
||||
if (window.length >= 4) {
|
||||
const w = window.slice(-4);
|
||||
if (
|
||||
w[0].key === w[2].key &&
|
||||
w[1].key === w[3].key &&
|
||||
w[0].key !== w[1].key
|
||||
) {
|
||||
return {
|
||||
stuck: true,
|
||||
reason: `Oscillation detected: ${w[0].key} ↔ ${w[1].key}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// ─── runUnit ─────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
|
|
@ -601,8 +658,10 @@ export async function autoLoop(
|
|||
): Promise<void> {
|
||||
debugLog("autoLoop", { phase: "enter" });
|
||||
let iteration = 0;
|
||||
let lastDerivedUnit = "";
|
||||
let sameUnitCount = 0;
|
||||
// ── Sliding-window stuck detection ──
|
||||
const recentUnits: Array<{ key: string; error?: string }> = [];
|
||||
const STUCK_WINDOW_SIZE = 6;
|
||||
let stuckRecoveryAttempts = 0;
|
||||
|
||||
let consecutiveErrors = 0;
|
||||
|
||||
|
|
@ -750,8 +809,8 @@ export async function autoLoop(
|
|||
s.unitDispatchCount.clear();
|
||||
s.unitRecoveryCount.clear();
|
||||
s.unitLifetimeDispatches.clear();
|
||||
lastDerivedUnit = "";
|
||||
sameUnitCount = 0;
|
||||
recentUnits.length = 0;
|
||||
stuckRecoveryAttempts = 0;
|
||||
|
||||
// Worktree lifecycle on milestone transition — merge current, enter next
|
||||
deps.resolver.mergeAndExit(s.currentMilestoneId!, ctx.ui);
|
||||
|
|
@ -1077,71 +1136,79 @@ export async function autoLoop(
|
|||
let prompt = dispatchResult.prompt;
|
||||
const pauseAfterUatDispatch = dispatchResult.pauseAfterDispatch ?? false;
|
||||
|
||||
// ── Same-unit stuck counter with graduated recovery ──
|
||||
// ── Sliding-window stuck detection with graduated recovery ──
|
||||
const derivedKey = `${unitType}/${unitId}`;
|
||||
if (derivedKey === lastDerivedUnit && !s.pendingVerificationRetry) {
|
||||
sameUnitCount++;
|
||||
debugLog("autoLoop", {
|
||||
phase: "stuck-check",
|
||||
unitType,
|
||||
unitId,
|
||||
sameUnitCount,
|
||||
});
|
||||
|
||||
if (sameUnitCount === 3) {
|
||||
// Level 1: try verifying the artifact — maybe it was written but not detected
|
||||
const artifactExists = deps.verifyExpectedArtifact(
|
||||
if (!s.pendingVerificationRetry) {
|
||||
recentUnits.push({ key: derivedKey });
|
||||
if (recentUnits.length > STUCK_WINDOW_SIZE) recentUnits.shift();
|
||||
|
||||
const stuckSignal = detectStuck(recentUnits);
|
||||
if (stuckSignal) {
|
||||
debugLog("autoLoop", {
|
||||
phase: "stuck-check",
|
||||
unitType,
|
||||
unitId,
|
||||
s.basePath,
|
||||
);
|
||||
if (artifactExists) {
|
||||
debugLog("autoLoop", {
|
||||
phase: "stuck-recovery",
|
||||
level: 1,
|
||||
action: "artifact-found",
|
||||
});
|
||||
reason: stuckSignal.reason,
|
||||
recoveryAttempts: stuckRecoveryAttempts,
|
||||
});
|
||||
|
||||
if (stuckRecoveryAttempts === 0) {
|
||||
// Level 1: try verifying the artifact, then cache invalidation + retry
|
||||
stuckRecoveryAttempts++;
|
||||
const artifactExists = deps.verifyExpectedArtifact(
|
||||
unitType,
|
||||
unitId,
|
||||
s.basePath,
|
||||
);
|
||||
if (artifactExists) {
|
||||
debugLog("autoLoop", {
|
||||
phase: "stuck-recovery",
|
||||
level: 1,
|
||||
action: "artifact-found",
|
||||
});
|
||||
ctx.ui.notify(
|
||||
`Stuck recovery: artifact for ${unitType} ${unitId} found on disk. Invalidating caches.`,
|
||||
"info",
|
||||
);
|
||||
deps.invalidateAllCaches();
|
||||
continue;
|
||||
}
|
||||
ctx.ui.notify(
|
||||
`Stuck recovery: artifact for ${unitType} ${unitId} found on disk. Invalidating caches.`,
|
||||
"info",
|
||||
`Stuck on ${unitType} ${unitId} (${stuckSignal.reason}). Invalidating caches and retrying.`,
|
||||
"warning",
|
||||
);
|
||||
deps.invalidateAllCaches();
|
||||
continue;
|
||||
} else {
|
||||
// Level 2: hard stop — genuinely stuck
|
||||
debugLog("autoLoop", {
|
||||
phase: "stuck-detected",
|
||||
unitType,
|
||||
unitId,
|
||||
reason: stuckSignal.reason,
|
||||
});
|
||||
await deps.stopAuto(
|
||||
ctx,
|
||||
pi,
|
||||
`Stuck: ${stuckSignal.reason}`,
|
||||
);
|
||||
ctx.ui.notify(
|
||||
`Stuck on ${unitType} ${unitId} — ${stuckSignal.reason}. The expected artifact was not written.`,
|
||||
"error",
|
||||
);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Progress detected — reset recovery counter
|
||||
if (stuckRecoveryAttempts > 0) {
|
||||
debugLog("autoLoop", {
|
||||
phase: "stuck-counter-reset",
|
||||
from: recentUnits[recentUnits.length - 2]?.key ?? "",
|
||||
to: derivedKey,
|
||||
});
|
||||
stuckRecoveryAttempts = 0;
|
||||
}
|
||||
ctx.ui.notify(
|
||||
`Stuck on ${unitType} ${unitId} (attempt ${sameUnitCount}). Invalidating caches and retrying.`,
|
||||
"warning",
|
||||
);
|
||||
deps.invalidateAllCaches();
|
||||
} else if (sameUnitCount === 5) {
|
||||
// Level 2: hard stop — genuinely stuck
|
||||
debugLog("autoLoop", {
|
||||
phase: "stuck-detected",
|
||||
unitType,
|
||||
unitId,
|
||||
sameUnitCount,
|
||||
});
|
||||
await deps.stopAuto(
|
||||
ctx,
|
||||
pi,
|
||||
`Stuck: ${unitType} ${unitId} derived ${sameUnitCount} consecutive times without progress`,
|
||||
);
|
||||
ctx.ui.notify(
|
||||
`Stuck on ${unitType} ${unitId} — deriveState returns the same unit after ${sameUnitCount} attempts. The expected artifact was not written.`,
|
||||
"error",
|
||||
);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (derivedKey !== lastDerivedUnit) {
|
||||
debugLog("autoLoop", {
|
||||
phase: "stuck-counter-reset",
|
||||
from: lastDerivedUnit,
|
||||
to: derivedKey,
|
||||
});
|
||||
}
|
||||
lastDerivedUnit = derivedKey;
|
||||
sameUnitCount = 0;
|
||||
}
|
||||
|
||||
// Pre-dispatch hooks
|
||||
|
|
@ -1374,6 +1441,23 @@ export async function autoLoop(
|
|||
status: unitResult.status,
|
||||
});
|
||||
|
||||
// Tag the most recent window entry with error info for stuck detection
|
||||
if (unitResult.status === "error" || unitResult.status === "cancelled") {
|
||||
const lastEntry = recentUnits[recentUnits.length - 1];
|
||||
if (lastEntry) {
|
||||
lastEntry.error = `${unitResult.status}:${unitType}/${unitId}`;
|
||||
}
|
||||
} else if (unitResult.event?.messages?.length) {
|
||||
const lastMsg = unitResult.event.messages[unitResult.event.messages.length - 1];
|
||||
const msgStr = typeof lastMsg === "string" ? lastMsg : JSON.stringify(lastMsg);
|
||||
if (/error|fail|exception/i.test(msgStr)) {
|
||||
const lastEntry = recentUnits[recentUnits.length - 1];
|
||||
if (lastEntry) {
|
||||
lastEntry.error = msgStr.slice(0, 200);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (unitResult.status === "cancelled") {
|
||||
ctx.ui.notify(
|
||||
`Session creation timed out or was cancelled for ${unitType} ${unitId}. Will retry.`,
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import {
|
|||
resolveAgentEnd,
|
||||
runUnit,
|
||||
autoLoop,
|
||||
detectStuck,
|
||||
_resetPendingResolve,
|
||||
_setActiveSession,
|
||||
isSessionSwitchInFlight,
|
||||
|
|
@ -1042,7 +1043,7 @@ test("handleAgentEnd in auto.ts is a thin wrapper calling resolveAgentEnd", () =
|
|||
|
||||
// ── Stuck counter tests ──────────────────────────────────────────────────────
|
||||
|
||||
test("stuck counter: stops when deriveState returns same unit 5 consecutive times", async () => {
|
||||
test("stuck detection: stops when sliding window detects same unit 3 consecutive times", async () => {
|
||||
_resetPendingResolve();
|
||||
|
||||
const ctx = makeMockCtx();
|
||||
|
|
@ -1077,20 +1078,15 @@ test("stuck counter: stops when deriveState returns same unit 5 consecutive time
|
|||
|
||||
const loopPromise = autoLoop(ctx, pi, s, deps);
|
||||
|
||||
// The loop will dispatch the same unit each iteration. On iteration 1, sameUnitCount
|
||||
// starts at 0 and the unit key is set. On iterations 2-5, sameUnitCount increments.
|
||||
// At sameUnitCount=5 (iteration 6), stopAuto is called.
|
||||
// Each iteration requires resolving an agent_end event.
|
||||
// But the stuck counter fires BEFORE runUnit, so we only need to resolve 4 times
|
||||
// (iterations 1-4 each run a unit, iteration 5 increments to 5 and stops).
|
||||
// Sliding window: iteration 1 pushes [A], iteration 2 pushes [A,A],
|
||||
// iteration 3 pushes [A,A,A] → Rule 2 fires (3 consecutive) → Level 1 recovery.
|
||||
// Level 1 invalidates caches and continues. Iteration 4 pushes [A,A,A,A] →
|
||||
// Rule 2 fires again → Level 2 hard stop.
|
||||
// Iterations 1-3 each run a unit (3 resolves needed). Iteration 3 triggers
|
||||
// Level 1 (cache invalidation + continue). Iteration 4 triggers Level 2 (stop
|
||||
// before runUnit), so no 4th resolve needed.
|
||||
|
||||
// Actually: iteration 1 sets lastDerivedUnit (sameUnitCount=0).
|
||||
// Iteration 2: derivedKey === lastDerivedUnit → sameUnitCount=1.
|
||||
// Iteration 3: sameUnitCount=2. Iteration 4: sameUnitCount=3.
|
||||
// Iteration 5: sameUnitCount=4. Iteration 6: sameUnitCount=5 → stop.
|
||||
// So we need to resolve 5 agent_end events (iterations 1-5 each run a unit).
|
||||
|
||||
for (let i = 0; i < 5; i++) {
|
||||
for (let i = 0; i < 3; i++) {
|
||||
await new Promise((r) => setTimeout(r, 30));
|
||||
resolveAgentEnd(makeEvent());
|
||||
}
|
||||
|
|
@ -1105,17 +1101,13 @@ test("stuck counter: stops when deriveState returns same unit 5 consecutive time
|
|||
stopReason.includes("Stuck"),
|
||||
`stop reason should mention 'Stuck', got: ${stopReason}`,
|
||||
);
|
||||
assert.ok(
|
||||
stopReason.includes("execute-task"),
|
||||
"stop reason should include unitType",
|
||||
);
|
||||
assert.ok(
|
||||
stopReason.includes("M001/S01/T01"),
|
||||
"stop reason should include unitId",
|
||||
);
|
||||
});
|
||||
|
||||
test("stuck counter: resets when deriveState returns a different unit", async () => {
|
||||
test("stuck detection: window resets recovery when deriveState returns a different unit", async () => {
|
||||
_resetPendingResolve();
|
||||
|
||||
const ctx = makeMockCtx();
|
||||
|
|
@ -1176,10 +1168,11 @@ test("stuck counter: resets when deriveState returns a different unit", async ()
|
|||
|
||||
await loopPromise;
|
||||
|
||||
// The counter should have reset when T02 was derived — no stuck stop
|
||||
// Level 1 recovery fires on iteration 3 (cache invalidation + continue),
|
||||
// then iteration 4 derives T02 — no Level 2 hard stop.
|
||||
assert.ok(
|
||||
!stopCalled,
|
||||
"stopAuto should NOT have been called — counter reset on unit change",
|
||||
"stopAuto should NOT have been called — different unit broke stuck pattern",
|
||||
);
|
||||
assert.ok(
|
||||
deriveCallCount >= 4,
|
||||
|
|
@ -1187,7 +1180,7 @@ test("stuck counter: resets when deriveState returns a different unit", async ()
|
|||
);
|
||||
});
|
||||
|
||||
test("stuck counter: does not increment during verification retry", async () => {
|
||||
test("stuck detection: does not push to window during verification retry", async () => {
|
||||
_resetPendingResolve();
|
||||
|
||||
const ctx = makeMockCtx();
|
||||
|
|
@ -1249,10 +1242,10 @@ test("stuck counter: does not increment during verification retry", async () =>
|
|||
await loopPromise;
|
||||
|
||||
// Even though same unit was derived 4 times, verification retries should
|
||||
// not count, so stuck counter should not have fired
|
||||
// not push to the sliding window, so stuck detection should not have fired
|
||||
assert.ok(
|
||||
!stopReason.includes("Stuck"),
|
||||
`stuck counter should not fire during verification retries, got: ${stopReason}`,
|
||||
`stuck detection should not fire during verification retries, got: ${stopReason}`,
|
||||
);
|
||||
assert.equal(
|
||||
verifyCallCount,
|
||||
|
|
@ -1261,24 +1254,106 @@ test("stuck counter: does not increment during verification retry", async () =>
|
|||
);
|
||||
});
|
||||
|
||||
test("stuck counter: logs debug output with stuck-detected phase", () => {
|
||||
// Structural test: verify the auto-loop.ts source contains both
|
||||
// stuck-detected and stuck-counter-reset debug log phases
|
||||
// ── detectStuck unit tests ────────────────────────────────────────────────────
|
||||
|
||||
test("detectStuck: returns null for fewer than 2 entries", () => {
|
||||
assert.equal(detectStuck([]), null);
|
||||
assert.equal(detectStuck([{ key: "A" }]), null);
|
||||
});
|
||||
|
||||
test("detectStuck: Rule 1 — same error twice in a row", () => {
|
||||
const result = detectStuck([
|
||||
{ key: "A", error: "ENOENT: file not found" },
|
||||
{ key: "A", error: "ENOENT: file not found" },
|
||||
]);
|
||||
assert.ok(result?.stuck, "should detect same error repeated");
|
||||
assert.ok(result?.reason.includes("Same error repeated"));
|
||||
});
|
||||
|
||||
test("detectStuck: Rule 1 — different errors do not trigger", () => {
|
||||
const result = detectStuck([
|
||||
{ key: "A", error: "ENOENT: file not found" },
|
||||
{ key: "A", error: "EACCES: permission denied" },
|
||||
]);
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
test("detectStuck: Rule 2 — same unit 3 consecutive times", () => {
|
||||
const result = detectStuck([
|
||||
{ key: "execute-task/M001/S01/T01" },
|
||||
{ key: "execute-task/M001/S01/T01" },
|
||||
{ key: "execute-task/M001/S01/T01" },
|
||||
]);
|
||||
assert.ok(result?.stuck);
|
||||
assert.ok(result?.reason.includes("3 consecutive times"));
|
||||
});
|
||||
|
||||
test("detectStuck: Rule 2 — 2 consecutive does not trigger", () => {
|
||||
assert.equal(detectStuck([
|
||||
{ key: "A" },
|
||||
{ key: "A" },
|
||||
]), null);
|
||||
});
|
||||
|
||||
test("detectStuck: Rule 3 — oscillation A→B→A→B", () => {
|
||||
const result = detectStuck([
|
||||
{ key: "A" },
|
||||
{ key: "B" },
|
||||
{ key: "A" },
|
||||
{ key: "B" },
|
||||
]);
|
||||
assert.ok(result?.stuck);
|
||||
assert.ok(result?.reason.includes("Oscillation"));
|
||||
});
|
||||
|
||||
test("detectStuck: Rule 3 — non-oscillation pattern A→B→C→B", () => {
|
||||
assert.equal(detectStuck([
|
||||
{ key: "A" },
|
||||
{ key: "B" },
|
||||
{ key: "C" },
|
||||
{ key: "B" },
|
||||
]), null);
|
||||
});
|
||||
|
||||
test("detectStuck: Rule 1 takes priority over Rule 2 when both match", () => {
|
||||
const result = detectStuck([
|
||||
{ key: "A", error: "test error" },
|
||||
{ key: "A", error: "test error" },
|
||||
{ key: "A", error: "test error" },
|
||||
]);
|
||||
assert.ok(result?.stuck);
|
||||
// Rule 1 fires first
|
||||
assert.ok(result?.reason.includes("Same error repeated"));
|
||||
});
|
||||
|
||||
test("detectStuck: truncates long error strings", () => {
|
||||
const longError = "x".repeat(500);
|
||||
const result = detectStuck([
|
||||
{ key: "A", error: longError },
|
||||
{ key: "A", error: longError },
|
||||
]);
|
||||
assert.ok(result?.stuck);
|
||||
assert.ok(result!.reason.length < 300, "reason should be truncated");
|
||||
});
|
||||
|
||||
test("stuck detection: logs debug output with stuck-detected phase", () => {
|
||||
// Structural test: verify the auto-loop.ts source contains
|
||||
// stuck-detected and stuck-counter-reset debug log phases, plus detectStuck
|
||||
const src = readFileSync(
|
||||
resolve(import.meta.dirname, "..", "auto-loop.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
assert.ok(
|
||||
src.includes('"stuck-detected"'),
|
||||
"auto-loop.ts must log phase: 'stuck-detected' when stuck counter fires",
|
||||
"auto-loop.ts must log phase: 'stuck-detected' when stuck detection fires",
|
||||
);
|
||||
assert.ok(
|
||||
src.includes('"stuck-counter-reset"'),
|
||||
"auto-loop.ts must log phase: 'stuck-counter-reset' when counter resets on new unit",
|
||||
"auto-loop.ts must log phase: 'stuck-counter-reset' when recovery resets on new unit",
|
||||
);
|
||||
assert.ok(
|
||||
src.includes("sameUnitCount"),
|
||||
"auto-loop.ts must track sameUnitCount for stuck detection",
|
||||
src.includes("detectStuck"),
|
||||
"auto-loop.ts must use detectStuck for sliding window analysis",
|
||||
);
|
||||
});
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue