diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 87ef155f4..d3cc18967 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -211,6 +211,11 @@ const MAX_LIFETIME_DISPATCHES = 6; /** Tracks recovery attempt count per unit for backoff and diagnostics. */ const unitRecoveryCount = new Map(); +/** Track consecutive skips per unit — catches infinite skip loops where deriveState + * keeps returning the same already-completed unit. Reset on any real dispatch. */ +const unitConsecutiveSkips = new Map(); +const MAX_CONSECUTIVE_SKIPS = 3; + /** Persisted completed-unit keys — survives restarts. Loaded from .gsd/completed-units.json. */ const completedKeySet = new Set(); @@ -624,6 +629,7 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promi stepMode = false; unitDispatchCount.clear(); unitRecoveryCount.clear(); + unitConsecutiveSkips.clear(); inFlightTools.clear(); lastBudgetAlertLevel = 0; unitLifetimeDispatches.clear(); @@ -714,6 +720,7 @@ export async function startAuto( basePath = base; unitDispatchCount.clear(); unitLifetimeDispatches.clear(); + unitConsecutiveSkips.clear(); // Re-initialize metrics in case ledger was lost during pause if (!getLedger()) initMetrics(base); // Ensure milestone ID is set on git service for integration branch resolution @@ -992,6 +999,7 @@ export async function startAuto( basePath = base; unitDispatchCount.clear(); unitRecoveryCount.clear(); + unitConsecutiveSkips.clear(); lastBudgetAlertLevel = 0; unitLifetimeDispatches.clear(); completedKeySet.clear(); @@ -1919,6 +1927,7 @@ async function dispatchNextUnit( // Reset stuck detection for new milestone unitDispatchCount.clear(); unitRecoveryCount.clear(); + unitConsecutiveSkips.clear(); unitLifetimeDispatches.clear(); // Clear completed-units.json for the finished milestone try { @@ -2286,6 +2295,26 @@ async function dispatchNextUnit( // Cross-validate: does the expected artifact actually exist? const artifactExists = verifyExpectedArtifact(unitType, unitId, basePath); if (artifactExists) { + // Guard against infinite skip loops: if deriveState keeps returning the + // same completed unit, consecutive skips will trip this breaker. Evict the + // key so the next dispatch forces full reconciliation instead of looping. + const skipCount = (unitConsecutiveSkips.get(idempotencyKey) ?? 0) + 1; + unitConsecutiveSkips.set(idempotencyKey, skipCount); + if (skipCount > MAX_CONSECUTIVE_SKIPS) { + unitConsecutiveSkips.delete(idempotencyKey); + completedKeySet.delete(idempotencyKey); + removePersistedKey(basePath, idempotencyKey); + invalidateStateCache(); + ctx.ui.notify( + `Skip loop detected: ${unitType} ${unitId} skipped ${skipCount} times without advancing. Evicting completion record and forcing reconciliation.`, + "warning", + ); + _skipDepth++; + await new Promise(r => setTimeout(r, 50)); + await dispatchNextUnit(ctx, pi); + _skipDepth = Math.max(0, _skipDepth - 1); + return; + } ctx.ui.notify( `Skipping ${unitType} ${unitId} — already completed in a prior session. Advancing.`, "info", @@ -2315,6 +2344,24 @@ async function dispatchNextUnit( persistCompletedKey(basePath, idempotencyKey); completedKeySet.add(idempotencyKey); invalidateStateCache(); + // Same consecutive-skip guard as the idempotency path above. + const skipCount2 = (unitConsecutiveSkips.get(idempotencyKey) ?? 0) + 1; + unitConsecutiveSkips.set(idempotencyKey, skipCount2); + if (skipCount2 > MAX_CONSECUTIVE_SKIPS) { + unitConsecutiveSkips.delete(idempotencyKey); + completedKeySet.delete(idempotencyKey); + removePersistedKey(basePath, idempotencyKey); + invalidateStateCache(); + ctx.ui.notify( + `Skip loop detected: ${unitType} ${unitId} skipped ${skipCount2} times without advancing. Evicting completion record and forcing reconciliation.`, + "warning", + ); + _skipDepth++; + await new Promise(r => setTimeout(r, 50)); + await dispatchNextUnit(ctx, pi); + _skipDepth = Math.max(0, _skipDepth - 1); + return; + } ctx.ui.notify( `Skipping ${unitType} ${unitId} — artifact exists but completion key was missing. Repaired and advancing.`, "info", @@ -2330,6 +2377,8 @@ async function dispatchNextUnit( // Pattern A→B→A→B would reset retryCount every time; this map catches it. const dispatchKey = `${unitType}/${unitId}`; const prevCount = unitDispatchCount.get(dispatchKey) ?? 0; + // Real dispatch reached — clear the consecutive-skip counter for this unit. + unitConsecutiveSkips.delete(dispatchKey); debugLog("dispatch-unit", { type: unitType, @@ -3275,6 +3324,14 @@ export { buildLoopRemediationSteps, } from "./auto-recovery.js"; +/** + * Test-only: expose skip-loop state for unit tests. + * Not part of the public API. + */ +export function _getUnitConsecutiveSkips(): Map { return unitConsecutiveSkips; } +export function _resetUnitConsecutiveSkips(): void { unitConsecutiveSkips.clear(); } +export { MAX_CONSECUTIVE_SKIPS }; + /** * Dispatch a hook unit directly, bypassing normal pre-dispatch hooks. * Used for manual hook triggers via /gsd run-hook. diff --git a/src/resources/extensions/gsd/tests/auto-skip-loop.test.ts b/src/resources/extensions/gsd/tests/auto-skip-loop.test.ts new file mode 100644 index 000000000..b50526115 --- /dev/null +++ b/src/resources/extensions/gsd/tests/auto-skip-loop.test.ts @@ -0,0 +1,123 @@ +/** + * auto-skip-loop.test.ts — Tests for the consecutive-skip loop breaker. + * + * Regression for #728: auto-mode infinite skip loop on previously completed + * plan-slice units when deriveState keeps returning the same unit. + * + * The skip paths in dispatchNextUnit track consecutive skips per unit via + * unitConsecutiveSkips. When the same unit is skipped > MAX_CONSECUTIVE_SKIPS + * times without a real dispatch in between, the completion record is evicted + * so deriveState can reconcile. + */ + +import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { + _getUnitConsecutiveSkips, + _resetUnitConsecutiveSkips, + MAX_CONSECUTIVE_SKIPS, +} from "../auto.ts"; +import { persistCompletedKey, removePersistedKey, loadPersistedKeys } from "../auto-recovery.ts"; +import { createTestContext } from "./test-helpers.ts"; + +const { assertEq, assertTrue, report } = createTestContext(); + +function makeTmpBase(): string { + const dir = mkdtempSync(join(tmpdir(), "gsd-skip-loop-test-")); + mkdirSync(join(dir, ".gsd"), { recursive: true }); + return dir; +} + +async function main(): Promise { + // ─── Counter starts at zero ──────────────────────────────────────────── + console.log("\n=== skip loop counter: initial state ==="); + { + _resetUnitConsecutiveSkips(); + const map = _getUnitConsecutiveSkips(); + assertEq(map.size, 0, "counter map starts empty after reset"); + } + + // ─── Counter increments correctly ──────────────────────────────────── + console.log("\n=== skip loop counter: increments on repeated calls ==="); + { + _resetUnitConsecutiveSkips(); + const map = _getUnitConsecutiveSkips(); + const key = "plan-slice/M001/S04"; + + for (let i = 1; i <= MAX_CONSECUTIVE_SKIPS; i++) { + const prev = map.get(key) ?? 0; + map.set(key, prev + 1); + } + + assertEq(map.get(key), MAX_CONSECUTIVE_SKIPS, `counter reaches MAX_CONSECUTIVE_SKIPS (${MAX_CONSECUTIVE_SKIPS})`); + } + + // ─── Threshold constant is sane ────────────────────────────────────── + console.log("\n=== skip loop counter: threshold is reasonable ==="); + { + assertTrue(MAX_CONSECUTIVE_SKIPS >= 3, "threshold allows a few legitimate skips"); + assertTrue(MAX_CONSECUTIVE_SKIPS <= 10, "threshold catches loops quickly"); + } + + // ─── Reset clears all keys ──────────────────────────────────────────── + console.log("\n=== skip loop counter: reset clears all keys ==="); + { + _resetUnitConsecutiveSkips(); + const map = _getUnitConsecutiveSkips(); + map.set("plan-slice/M001/S01", 2); + map.set("plan-slice/M001/S02", 1); + assertEq(map.size, 2, "map has 2 entries before reset"); + + _resetUnitConsecutiveSkips(); + assertEq(_getUnitConsecutiveSkips().size, 0, "map empty after reset"); + } + + // ─── Eviction path: persistCompletedKey + removePersistedKey round-trip + // (simulates what the loop-breaker does) ─────────────────────────── + console.log("\n=== skip loop counter: eviction removes persisted key ==="); + { + _resetUnitConsecutiveSkips(); + const base = makeTmpBase(); + try { + const key = "plan-slice/M001/S04"; + const keySet = new Set(); + + persistCompletedKey(base, key); + loadPersistedKeys(base, keySet); + assertTrue(keySet.has(key), "key persisted before eviction"); + + // Simulate loop-breaker eviction + keySet.delete(key); + removePersistedKey(base, key); + const keySet2 = new Set(); + loadPersistedKeys(base, keySet2); + assertTrue(!keySet2.has(key), "key absent after eviction"); + } finally { + rmSync(base, { recursive: true, force: true }); + } + } + + // ─── Counter resets per-key, not globally ───────────────────────────── + console.log("\n=== skip loop counter: per-key isolation ==="); + { + _resetUnitConsecutiveSkips(); + const map = _getUnitConsecutiveSkips(); + map.set("plan-slice/M001/S04", MAX_CONSECUTIVE_SKIPS + 1); + map.set("plan-slice/M001/S05", 1); + + // Deleting S04 (eviction) should not affect S05 + map.delete("plan-slice/M001/S04"); + assertTrue(!map.has("plan-slice/M001/S04"), "S04 evicted"); + assertEq(map.get("plan-slice/M001/S05"), 1, "S05 counter unaffected"); + } + + _resetUnitConsecutiveSkips(); + report(); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +});