diff --git a/src/resources/extensions/gsd/auto-post-unit.ts b/src/resources/extensions/gsd/auto-post-unit.ts index 38d730fec..2ac1c4c40 100644 --- a/src/resources/extensions/gsd/auto-post-unit.ts +++ b/src/resources/extensions/gsd/auto-post-unit.ts @@ -33,6 +33,7 @@ import { import { verifyExpectedArtifact, resolveExpectedArtifactPath, + writeBlockerPlaceholder, diagnoseExpectedArtifact, } from "./auto-recovery.js"; import { regenerateIfMissing } from "./workflow-projections.js"; @@ -52,6 +53,9 @@ import { debugLog } from "./debug-logger.js"; import { runSafely } from "./auto-utils.js"; import type { AutoSession, SidecarItem } from "./auto/session.js"; +/** Maximum verification retry attempts before escalating to blocker placeholder (#2653). */ +const MAX_VERIFICATION_RETRIES = 3; + /** Enqueue a sidecar item (hook, triage, or quick-task) for the main loop to * drain via runUnit. Logs the enqueue event and notifies the UI. */ @@ -468,6 +472,8 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV // When artifact verification fails for a unit type that has a known expected // artifact, return "retry" so the caller re-dispatches with failure context // instead of blindly re-dispatching the same unit (#1571). + // After MAX_VERIFICATION_RETRIES, escalate to writeBlockerPlaceholder so the + // pipeline can advance instead of looping forever (#2653). // // HOWEVER, if the DB is unavailable (db_unavailable), the artifact was never // written because the completion tool failed at the infra level. Retrying @@ -488,18 +494,41 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV const retryKey = `${s.currentUnit.type}:${s.currentUnit.id}`; const attempt = (s.verificationRetryCount.get(retryKey) ?? 0) + 1; s.verificationRetryCount.set(retryKey, attempt); - const retryDiag = diagnoseExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath); - s.pendingVerificationRetry = { - unitId: s.currentUnit.id, - failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).${retryDiag ? ` Expected: ${retryDiag}` : ""}`, - attempt, - }; - debugLog("postUnit", { phase: "artifact-verify-retry", unitType: s.currentUnit.type, unitId: s.currentUnit.id, attempt }); - ctx.ui.notify( - `Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — retrying (attempt ${attempt}).${retryDiag ? ` Expected: ${retryDiag}` : ""}`, - "warning", - ); - return "retry"; + + if (attempt > MAX_VERIFICATION_RETRIES) { + // Retries exhausted — write a blocker placeholder so the pipeline + // can advance past this stuck unit (#2653). + debugLog("postUnit", { + phase: "artifact-verify-escalate", + unitType: s.currentUnit.type, + unitId: s.currentUnit.id, + attempt, + maxRetries: MAX_VERIFICATION_RETRIES, + }); + const reason = `Artifact verification failed after ${MAX_VERIFICATION_RETRIES} retries for ${s.currentUnit.type} "${s.currentUnit.id}".`; + writeBlockerPlaceholder(s.currentUnit.type, s.currentUnit.id, s.basePath, reason); + ctx.ui.notify( + `${s.currentUnit.type} ${s.currentUnit.id} — verification retries exhausted (${MAX_VERIFICATION_RETRIES}), wrote blocker placeholder to advance pipeline`, + "warning", + ); + // Reset retry count and fall through to "continue" so the loop + // re-derives state with the placeholder in place. + s.verificationRetryCount.delete(retryKey); + s.pendingVerificationRetry = null; + // Do NOT return "retry" — fall through to "continue" below. + } else { + s.pendingVerificationRetry = { + unitId: s.currentUnit.id, + failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).`, + attempt, + }; + debugLog("postUnit", { phase: "artifact-verify-retry", unitType: s.currentUnit.type, unitId: s.currentUnit.id, attempt }); + ctx.ui.notify( + `Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — retrying (attempt ${attempt})`, + "warning", + ); + return "retry"; + } } } } else { diff --git a/src/resources/extensions/gsd/auto-recovery.ts b/src/resources/extensions/gsd/auto-recovery.ts index 715f9db79..86c9d70be 100644 --- a/src/resources/extensions/gsd/auto-recovery.ts +++ b/src/resources/extensions/gsd/auto-recovery.ts @@ -12,7 +12,7 @@ import { parseUnitId } from "./unit-id.js"; import { atomicWriteSync } from "./atomic-write.js"; import { clearParseCache } from "./files.js"; import { parseRoadmap as parseLegacyRoadmap, parsePlan as parseLegacyPlan } from "./parsers-legacy.js"; -import { isDbAvailable, getTask, getSlice, getSliceTasks, updateTaskStatus } from "./gsd-db.js"; +import { isDbAvailable, getTask, getSlice, getSliceTasks, updateTaskStatus, updateSliceStatus } from "./gsd-db.js"; import { isValidationTerminal } from "./state.js"; import { getErrorMessage } from "./error-utils.js"; import { logWarning, logError } from "./workflow-logger.js"; @@ -424,15 +424,16 @@ export function writeBlockerPlaceholder( ].join("\n"); writeFileSync(absPath, content, "utf-8"); - // Mark the task as complete in the DB so verifyExpectedArtifact passes. + // Mark the task/slice as complete in the DB so verifyExpectedArtifact passes. // Without this, the DB status stays "pending" and the dispatch loop - // re-derives the same task indefinitely (#2531). - if (unitType === "execute-task" && isDbAvailable()) { + // re-derives the same unit indefinitely (#2531, #2653). + if (isDbAvailable()) { const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId); - if (mid && sid && tid) { - try { updateTaskStatus(mid, sid, tid, "complete", new Date().toISOString()); } catch (err) { /* non-fatal */ - logError("recovery", `DB status update failed: ${err instanceof Error ? err.message : String(err)}`); - } + if (unitType === "execute-task" && mid && sid && tid) { + try { updateTaskStatus(mid, sid, tid, "complete", new Date().toISOString()); } catch (e) { logWarning("recovery", `updateTaskStatus failed during context exhaustion: ${e instanceof Error ? e.message : String(e)}`); } + } + if (unitType === "complete-slice" && mid && sid) { + try { updateSliceStatus(mid, sid, "complete", new Date().toISOString()); } catch (e) { logWarning("recovery", `updateSliceStatus failed during context exhaustion: ${e instanceof Error ? e.message : String(e)}`); } } } diff --git a/src/resources/extensions/gsd/auto/phases.ts b/src/resources/extensions/gsd/auto/phases.ts index 174daaa2c..2a02415e5 100644 --- a/src/resources/extensions/gsd/auto/phases.ts +++ b/src/resources/extensions/gsd/auto/phases.ts @@ -1220,11 +1220,13 @@ export async function runUnitPhase( ); } - // ── Zero tool-call guard (#1833) ────────────────────────────────── - // An execute-task agent that completes with 0 tool calls made no - // real changes — its summary is hallucinated. Treat as failed so - // the task is retried instead of silently marked complete. - if (unitType === "execute-task") { + // ── Zero tool-call guard (#1833, #2653) ────────────────────────── + // Any unit that completes with 0 tool calls made no real progress — + // likely context exhaustion where all tool calls errored out. Treat + // as failed so the unit is retried in a fresh context instead of + // silently passing through to artifact verification (which loops + // forever when the unit never produced its artifact). + { const currentLedger = deps.getLedger() as { units: Array<{ type: string; id: string; startedAt: number; toolCalls: number }> } | null; if (currentLedger?.units) { const lastUnit = [...currentLedger.units].reverse().find( @@ -1235,14 +1237,14 @@ export async function runUnitPhase( phase: "zero-tool-calls", unitType, unitId, - warning: "Task completed with 0 tool calls — likely hallucinated, marking as failed", + warning: "Unit completed with 0 tool calls — likely context exhaustion, marking as failed", }); ctx.ui.notify( - `${unitType} ${unitId} completed with 0 tool calls — hallucinated summary, will retry`, + `${unitType} ${unitId} completed with 0 tool calls — context exhaustion, will retry`, "warning", ); // Fall through to next iteration where dispatch will re-derive - // and re-dispatch this task. + // and re-dispatch this unit. return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } }; } } diff --git a/src/resources/extensions/gsd/tests/auto-loop.test.ts b/src/resources/extensions/gsd/tests/auto-loop.test.ts index 2414d280a..32a7c261b 100644 --- a/src/resources/extensions/gsd/tests/auto-loop.test.ts +++ b/src/resources/extensions/gsd/tests/auto-loop.test.ts @@ -2107,11 +2107,11 @@ test("autoLoop rejects execute-task with 0 tool calls as hallucinated (#1833)", // The task should NOT have been added to completedUnits on the first iteration // (0 tool calls), but SHOULD be added on the second iteration (5 tool calls) const warningNotification = notifications.find( - (n) => n.includes("0 tool calls") && n.includes("hallucinated"), + (n) => n.includes("0 tool calls") && n.includes("context exhaustion"), ); assert.ok( warningNotification, - "should notify about 0 tool calls hallucination", + "should notify about 0 tool calls context exhaustion", ); // Verify deriveState was called at least twice (two iterations) @@ -2122,7 +2122,7 @@ test("autoLoop rejects execute-task with 0 tool calls as hallucinated (#1833)", ); }); -test("autoLoop does NOT reject non-execute-task units with 0 tool calls (#1833)", async () => { +test("autoLoop rejects complete-slice with 0 tool calls as context-exhausted (#2653)", async () => { _resetPendingResolve(); const ctx = makeMockCtx(); @@ -2130,6 +2130,7 @@ test("autoLoop does NOT reject non-execute-task units with 0 tool calls (#1833)" ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" }; const pi = makeMockPi(); + let iterationCount = 0; const notifications: string[] = []; ctx.ui.notify = (msg: string) => { notifications.push(msg); }; @@ -2163,7 +2164,7 @@ test("autoLoop does NOT reject non-execute-task units with 0 tool calls (#1833)" }; }, closeoutUnit: async () => { - // complete-slice with 0 tool calls is fine (e.g. it may just update status) + // complete-slice with 0 tool calls — context exhausted, no progress mockLedger.units.push({ type: "complete-slice", id: "M001/S01", @@ -2177,31 +2178,51 @@ test("autoLoop does NOT reject non-execute-task units with 0 tool calls (#1833)" getLedger: () => mockLedger, postUnitPostVerification: async () => { deps.callLog.push("postUnitPostVerification"); - s.active = false; + iterationCount++; + // Deactivate after 2nd iteration + s.active = iterationCount < 2; return "continue" as const; }, }); const loopPromise = autoLoop(ctx, pi, s, deps); + // First iteration: complete-slice with 0 tool calls → rejected await new Promise((r) => setTimeout(r, 50)); resolveAgentEnd(makeEvent()); + // Second iteration: re-dispatched, this time with tool calls + await new Promise((r) => setTimeout(r, 50)); + mockLedger.units.length = 0; + (deps as any).closeoutUnit = async () => { + mockLedger.units.push({ + type: "complete-slice", + id: "M001/S01", + startedAt: s.currentUnit?.startedAt ?? Date.now(), + toolCalls: 3, + assistantMessages: 2, + tokens: { input: 200, output: 400, total: 600, cacheRead: 0, cacheWrite: 0 }, + cost: 0.30, + }); + }; + resolveAgentEnd(makeEvent()); + await loopPromise; - // Should NOT have a hallucination warning for non-execute-task units + // Should have a warning about 0 tool calls for complete-slice const warningNotification = notifications.find( - (n) => n.includes("0 tool calls") && n.includes("hallucinated"), + (n) => n.includes("0 tool calls"), ); assert.ok( - !warningNotification, - "should NOT flag non-execute-task units with 0 tool calls", + warningNotification, + "should flag complete-slice with 0 tool calls as failed (#2653)", ); - // Verify the loop ran to completion (postUnitPostVerification was called) + // Verify deriveState was called at least twice (two iterations: rejected + retry) + const deriveCount = deps.callLog.filter((c) => c === "deriveState").length; assert.ok( - deps.callLog.includes("postUnitPostVerification"), - "complete-slice with 0 tool calls should still complete the post-unit pipeline", + deriveCount >= 2, + `deriveState should be called at least 2 times for retry (got ${deriveCount})`, ); }); diff --git a/src/resources/extensions/gsd/tests/integration/idle-recovery.test.ts b/src/resources/extensions/gsd/tests/integration/idle-recovery.test.ts index a2369e4d8..925e7adc2 100644 --- a/src/resources/extensions/gsd/tests/integration/idle-recovery.test.ts +++ b/src/resources/extensions/gsd/tests/integration/idle-recovery.test.ts @@ -357,3 +357,37 @@ test('writeBlockerPlaceholder: does NOT update DB for non-execute-task types', a cleanup(base); } }); + +test('writeBlockerPlaceholder: updates DB slice status for complete-slice (#2653)', async () => { + const base = createFixtureBase(); + try { + const { openDatabase, closeDatabase, insertMilestone, insertSlice, getSlice, isDbAvailable } = + await import("../../gsd-db.ts"); + + const dbPath = join(base, ".gsd", "gsd.db"); + mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01"), { recursive: true }); + + openDatabase(dbPath); + try { + insertMilestone({ id: "M001", title: "Test", status: "active" }); + insertSlice({ id: "S01", milestoneId: "M001", title: "Slice", status: "active" }); + + // complete-slice blocker should update slice DB status to "complete" + writeBlockerPlaceholder("complete-slice", "M001/S01", base, "context exhaustion recovery"); + + const slice = getSlice("M001", "S01"); + assert.equal(slice?.status, "complete", + "writeBlockerPlaceholder must update DB slice status to 'complete' for complete-slice so dispatch guard unblocks downstream (#2653)"); + + // Verify the full chain works: verifyExpectedArtifact should return true + // (requires both UAT file and DB status = complete) + // Note: the placeholder writes a SUMMARY file, but complete-slice also needs UAT. + // The placeholder itself doesn't write UAT, so artifact verification may still fail + // for complete-slice — but the DB status is now correct, breaking the circular dep. + } finally { + if (isDbAvailable()) closeDatabase(); + } + } finally { + cleanup(base); + } +});