fix(auto): break infinite skip loop on repeatedly-skipped completed units
When deriveState() keeps returning the same already-completed unit, the idempotency skip paths in dispatchNextUnit recursively call themselves forever. The existing MAX_SKIP_DEPTH (20) breaker yields to the UI but then re-enters the same loop; the hard lifetime counter (unitLifetimeDispatches) is never reached because skip paths return before touching it. Root cause: no per-unit counter on the skip-only path. Fix: - Add unitConsecutiveSkips map + MAX_CONSECUTIVE_SKIPS = 3 - Both skip paths (completedKeySet hit, and fallback artifact-exists) increment the counter on each skip of the same idempotencyKey - When the counter exceeds MAX_CONSECUTIVE_SKIPS, evict the key from completedKeySet and persisted storage, invalidate state, and let deriveState reconcile on the next real dispatch - Counter resets to 0 for a given key whenever a real dispatch proceeds (i.e., past both skip paths) - Counter fully cleared at all 4 existing clear sites (stopAuto, startAuto, crash recovery, pause/resume) Export _getUnitConsecutiveSkips / _resetUnitConsecutiveSkips / MAX_CONSECUTIVE_SKIPS for testability (same pattern as doctor-proactive.ts resetProactiveHealing). Tests: auto-skip-loop.test.ts — counter mechanics, threshold bounds, eviction round-trip, per-key isolation (10 assertions). Closes #728
This commit is contained in:
parent
1a85853fd8
commit
742cd70c9b
2 changed files with 180 additions and 0 deletions
|
|
@ -211,6 +211,11 @@ const MAX_LIFETIME_DISPATCHES = 6;
|
|||
/** Tracks recovery attempt count per unit for backoff and diagnostics. */
|
||||
const unitRecoveryCount = new Map<string, number>();
|
||||
|
||||
/** Track consecutive skips per unit — catches infinite skip loops where deriveState
|
||||
* keeps returning the same already-completed unit. Reset on any real dispatch. */
|
||||
const unitConsecutiveSkips = new Map<string, number>();
|
||||
const MAX_CONSECUTIVE_SKIPS = 3;
|
||||
|
||||
/** Persisted completed-unit keys — survives restarts. Loaded from .gsd/completed-units.json. */
|
||||
const completedKeySet = new Set<string>();
|
||||
|
||||
|
|
@ -624,6 +629,7 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promi
|
|||
stepMode = false;
|
||||
unitDispatchCount.clear();
|
||||
unitRecoveryCount.clear();
|
||||
unitConsecutiveSkips.clear();
|
||||
inFlightTools.clear();
|
||||
lastBudgetAlertLevel = 0;
|
||||
unitLifetimeDispatches.clear();
|
||||
|
|
@ -714,6 +720,7 @@ export async function startAuto(
|
|||
basePath = base;
|
||||
unitDispatchCount.clear();
|
||||
unitLifetimeDispatches.clear();
|
||||
unitConsecutiveSkips.clear();
|
||||
// Re-initialize metrics in case ledger was lost during pause
|
||||
if (!getLedger()) initMetrics(base);
|
||||
// Ensure milestone ID is set on git service for integration branch resolution
|
||||
|
|
@ -992,6 +999,7 @@ export async function startAuto(
|
|||
basePath = base;
|
||||
unitDispatchCount.clear();
|
||||
unitRecoveryCount.clear();
|
||||
unitConsecutiveSkips.clear();
|
||||
lastBudgetAlertLevel = 0;
|
||||
unitLifetimeDispatches.clear();
|
||||
completedKeySet.clear();
|
||||
|
|
@ -1919,6 +1927,7 @@ async function dispatchNextUnit(
|
|||
// Reset stuck detection for new milestone
|
||||
unitDispatchCount.clear();
|
||||
unitRecoveryCount.clear();
|
||||
unitConsecutiveSkips.clear();
|
||||
unitLifetimeDispatches.clear();
|
||||
// Clear completed-units.json for the finished milestone
|
||||
try {
|
||||
|
|
@ -2286,6 +2295,26 @@ async function dispatchNextUnit(
|
|||
// Cross-validate: does the expected artifact actually exist?
|
||||
const artifactExists = verifyExpectedArtifact(unitType, unitId, basePath);
|
||||
if (artifactExists) {
|
||||
// Guard against infinite skip loops: if deriveState keeps returning the
|
||||
// same completed unit, consecutive skips will trip this breaker. Evict the
|
||||
// key so the next dispatch forces full reconciliation instead of looping.
|
||||
const skipCount = (unitConsecutiveSkips.get(idempotencyKey) ?? 0) + 1;
|
||||
unitConsecutiveSkips.set(idempotencyKey, skipCount);
|
||||
if (skipCount > MAX_CONSECUTIVE_SKIPS) {
|
||||
unitConsecutiveSkips.delete(idempotencyKey);
|
||||
completedKeySet.delete(idempotencyKey);
|
||||
removePersistedKey(basePath, idempotencyKey);
|
||||
invalidateStateCache();
|
||||
ctx.ui.notify(
|
||||
`Skip loop detected: ${unitType} ${unitId} skipped ${skipCount} times without advancing. Evicting completion record and forcing reconciliation.`,
|
||||
"warning",
|
||||
);
|
||||
_skipDepth++;
|
||||
await new Promise(r => setTimeout(r, 50));
|
||||
await dispatchNextUnit(ctx, pi);
|
||||
_skipDepth = Math.max(0, _skipDepth - 1);
|
||||
return;
|
||||
}
|
||||
ctx.ui.notify(
|
||||
`Skipping ${unitType} ${unitId} — already completed in a prior session. Advancing.`,
|
||||
"info",
|
||||
|
|
@ -2315,6 +2344,24 @@ async function dispatchNextUnit(
|
|||
persistCompletedKey(basePath, idempotencyKey);
|
||||
completedKeySet.add(idempotencyKey);
|
||||
invalidateStateCache();
|
||||
// Same consecutive-skip guard as the idempotency path above.
|
||||
const skipCount2 = (unitConsecutiveSkips.get(idempotencyKey) ?? 0) + 1;
|
||||
unitConsecutiveSkips.set(idempotencyKey, skipCount2);
|
||||
if (skipCount2 > MAX_CONSECUTIVE_SKIPS) {
|
||||
unitConsecutiveSkips.delete(idempotencyKey);
|
||||
completedKeySet.delete(idempotencyKey);
|
||||
removePersistedKey(basePath, idempotencyKey);
|
||||
invalidateStateCache();
|
||||
ctx.ui.notify(
|
||||
`Skip loop detected: ${unitType} ${unitId} skipped ${skipCount2} times without advancing. Evicting completion record and forcing reconciliation.`,
|
||||
"warning",
|
||||
);
|
||||
_skipDepth++;
|
||||
await new Promise(r => setTimeout(r, 50));
|
||||
await dispatchNextUnit(ctx, pi);
|
||||
_skipDepth = Math.max(0, _skipDepth - 1);
|
||||
return;
|
||||
}
|
||||
ctx.ui.notify(
|
||||
`Skipping ${unitType} ${unitId} — artifact exists but completion key was missing. Repaired and advancing.`,
|
||||
"info",
|
||||
|
|
@ -2330,6 +2377,8 @@ async function dispatchNextUnit(
|
|||
// Pattern A→B→A→B would reset retryCount every time; this map catches it.
|
||||
const dispatchKey = `${unitType}/${unitId}`;
|
||||
const prevCount = unitDispatchCount.get(dispatchKey) ?? 0;
|
||||
// Real dispatch reached — clear the consecutive-skip counter for this unit.
|
||||
unitConsecutiveSkips.delete(dispatchKey);
|
||||
|
||||
debugLog("dispatch-unit", {
|
||||
type: unitType,
|
||||
|
|
@ -3275,6 +3324,14 @@ export {
|
|||
buildLoopRemediationSteps,
|
||||
} from "./auto-recovery.js";
|
||||
|
||||
/**
|
||||
* Test-only: expose skip-loop state for unit tests.
|
||||
* Not part of the public API.
|
||||
*/
|
||||
export function _getUnitConsecutiveSkips(): Map<string, number> { return unitConsecutiveSkips; }
|
||||
export function _resetUnitConsecutiveSkips(): void { unitConsecutiveSkips.clear(); }
|
||||
export { MAX_CONSECUTIVE_SKIPS };
|
||||
|
||||
/**
|
||||
* Dispatch a hook unit directly, bypassing normal pre-dispatch hooks.
|
||||
* Used for manual hook triggers via /gsd run-hook.
|
||||
|
|
|
|||
123
src/resources/extensions/gsd/tests/auto-skip-loop.test.ts
Normal file
123
src/resources/extensions/gsd/tests/auto-skip-loop.test.ts
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
/**
|
||||
* auto-skip-loop.test.ts — Tests for the consecutive-skip loop breaker.
|
||||
*
|
||||
* Regression for #728: auto-mode infinite skip loop on previously completed
|
||||
* plan-slice units when deriveState keeps returning the same unit.
|
||||
*
|
||||
* The skip paths in dispatchNextUnit track consecutive skips per unit via
|
||||
* unitConsecutiveSkips. When the same unit is skipped > MAX_CONSECUTIVE_SKIPS
|
||||
* times without a real dispatch in between, the completion record is evicted
|
||||
* so deriveState can reconcile.
|
||||
*/
|
||||
|
||||
import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
|
||||
import {
|
||||
_getUnitConsecutiveSkips,
|
||||
_resetUnitConsecutiveSkips,
|
||||
MAX_CONSECUTIVE_SKIPS,
|
||||
} from "../auto.ts";
|
||||
import { persistCompletedKey, removePersistedKey, loadPersistedKeys } from "../auto-recovery.ts";
|
||||
import { createTestContext } from "./test-helpers.ts";
|
||||
|
||||
const { assertEq, assertTrue, report } = createTestContext();
|
||||
|
||||
function makeTmpBase(): string {
|
||||
const dir = mkdtempSync(join(tmpdir(), "gsd-skip-loop-test-"));
|
||||
mkdirSync(join(dir, ".gsd"), { recursive: true });
|
||||
return dir;
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
// ─── Counter starts at zero ────────────────────────────────────────────
|
||||
console.log("\n=== skip loop counter: initial state ===");
|
||||
{
|
||||
_resetUnitConsecutiveSkips();
|
||||
const map = _getUnitConsecutiveSkips();
|
||||
assertEq(map.size, 0, "counter map starts empty after reset");
|
||||
}
|
||||
|
||||
// ─── Counter increments correctly ────────────────────────────────────
|
||||
console.log("\n=== skip loop counter: increments on repeated calls ===");
|
||||
{
|
||||
_resetUnitConsecutiveSkips();
|
||||
const map = _getUnitConsecutiveSkips();
|
||||
const key = "plan-slice/M001/S04";
|
||||
|
||||
for (let i = 1; i <= MAX_CONSECUTIVE_SKIPS; i++) {
|
||||
const prev = map.get(key) ?? 0;
|
||||
map.set(key, prev + 1);
|
||||
}
|
||||
|
||||
assertEq(map.get(key), MAX_CONSECUTIVE_SKIPS, `counter reaches MAX_CONSECUTIVE_SKIPS (${MAX_CONSECUTIVE_SKIPS})`);
|
||||
}
|
||||
|
||||
// ─── Threshold constant is sane ──────────────────────────────────────
|
||||
console.log("\n=== skip loop counter: threshold is reasonable ===");
|
||||
{
|
||||
assertTrue(MAX_CONSECUTIVE_SKIPS >= 3, "threshold allows a few legitimate skips");
|
||||
assertTrue(MAX_CONSECUTIVE_SKIPS <= 10, "threshold catches loops quickly");
|
||||
}
|
||||
|
||||
// ─── Reset clears all keys ────────────────────────────────────────────
|
||||
console.log("\n=== skip loop counter: reset clears all keys ===");
|
||||
{
|
||||
_resetUnitConsecutiveSkips();
|
||||
const map = _getUnitConsecutiveSkips();
|
||||
map.set("plan-slice/M001/S01", 2);
|
||||
map.set("plan-slice/M001/S02", 1);
|
||||
assertEq(map.size, 2, "map has 2 entries before reset");
|
||||
|
||||
_resetUnitConsecutiveSkips();
|
||||
assertEq(_getUnitConsecutiveSkips().size, 0, "map empty after reset");
|
||||
}
|
||||
|
||||
// ─── Eviction path: persistCompletedKey + removePersistedKey round-trip
|
||||
// (simulates what the loop-breaker does) ───────────────────────────
|
||||
console.log("\n=== skip loop counter: eviction removes persisted key ===");
|
||||
{
|
||||
_resetUnitConsecutiveSkips();
|
||||
const base = makeTmpBase();
|
||||
try {
|
||||
const key = "plan-slice/M001/S04";
|
||||
const keySet = new Set<string>();
|
||||
|
||||
persistCompletedKey(base, key);
|
||||
loadPersistedKeys(base, keySet);
|
||||
assertTrue(keySet.has(key), "key persisted before eviction");
|
||||
|
||||
// Simulate loop-breaker eviction
|
||||
keySet.delete(key);
|
||||
removePersistedKey(base, key);
|
||||
const keySet2 = new Set<string>();
|
||||
loadPersistedKeys(base, keySet2);
|
||||
assertTrue(!keySet2.has(key), "key absent after eviction");
|
||||
} finally {
|
||||
rmSync(base, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Counter resets per-key, not globally ─────────────────────────────
|
||||
console.log("\n=== skip loop counter: per-key isolation ===");
|
||||
{
|
||||
_resetUnitConsecutiveSkips();
|
||||
const map = _getUnitConsecutiveSkips();
|
||||
map.set("plan-slice/M001/S04", MAX_CONSECUTIVE_SKIPS + 1);
|
||||
map.set("plan-slice/M001/S05", 1);
|
||||
|
||||
// Deleting S04 (eviction) should not affect S05
|
||||
map.delete("plan-slice/M001/S04");
|
||||
assertTrue(!map.has("plan-slice/M001/S04"), "S04 evicted");
|
||||
assertEq(map.get("plan-slice/M001/S05"), 1, "S05 counter unaffected");
|
||||
}
|
||||
|
||||
_resetUnitConsecutiveSkips();
|
||||
report();
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue