fix: handle complete-slice context exhaustion to unblock downstream slices (#3300)
* fix: handle complete-slice context exhaustion to unblock downstream slices Closes #2653 Three interacting bugs caused complete-slice to silently fail on context exhaustion, permanently blocking all downstream slices: 1. Zero-tool-call guard only covered execute-task — extended to all unit types so any unit completing with 0 tool calls (context exhaustion) is treated as failed and retried in a fresh context. 2. Verification retry had no max-attempt limit — added a cap of 3 retries before escalating to writeBlockerPlaceholder, preventing infinite retry loops when the unit never produces its artifact. 3. writeBlockerPlaceholder only updated DB for execute-task — extended to also update slice status for complete-slice, breaking the circular dependency where verification needs DB status "complete" but only gsd_slice_complete sets that. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: import diagnoseExpectedArtifact from auto-recovery * fix(auto): replace empty catch comments with logWarning in auto-recovery The silent-catch-diagnostics test requires migrated files to have no empty catch bodies. The /* non-fatal */ inline catches in auto-recovery were flagged as empty since comments don't count as executable code. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: trek-e <trek-e@users.noreply.github.com>
This commit is contained in:
parent
db7a6372a6
commit
c91b6eca9c
5 changed files with 127 additions and 40 deletions
|
|
@ -33,6 +33,7 @@ import {
|
|||
import {
|
||||
verifyExpectedArtifact,
|
||||
resolveExpectedArtifactPath,
|
||||
writeBlockerPlaceholder,
|
||||
diagnoseExpectedArtifact,
|
||||
} from "./auto-recovery.js";
|
||||
import { regenerateIfMissing } from "./workflow-projections.js";
|
||||
|
|
@ -52,6 +53,9 @@ import { debugLog } from "./debug-logger.js";
|
|||
import { runSafely } from "./auto-utils.js";
|
||||
import type { AutoSession, SidecarItem } from "./auto/session.js";
|
||||
|
||||
/** Maximum verification retry attempts before escalating to blocker placeholder (#2653). */
|
||||
const MAX_VERIFICATION_RETRIES = 3;
|
||||
|
||||
|
||||
/** Enqueue a sidecar item (hook, triage, or quick-task) for the main loop to
|
||||
* drain via runUnit. Logs the enqueue event and notifies the UI. */
|
||||
|
|
@ -468,6 +472,8 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
|
|||
// When artifact verification fails for a unit type that has a known expected
|
||||
// artifact, return "retry" so the caller re-dispatches with failure context
|
||||
// instead of blindly re-dispatching the same unit (#1571).
|
||||
// After MAX_VERIFICATION_RETRIES, escalate to writeBlockerPlaceholder so the
|
||||
// pipeline can advance instead of looping forever (#2653).
|
||||
//
|
||||
// HOWEVER, if the DB is unavailable (db_unavailable), the artifact was never
|
||||
// written because the completion tool failed at the infra level. Retrying
|
||||
|
|
@ -488,18 +494,41 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
|
|||
const retryKey = `${s.currentUnit.type}:${s.currentUnit.id}`;
|
||||
const attempt = (s.verificationRetryCount.get(retryKey) ?? 0) + 1;
|
||||
s.verificationRetryCount.set(retryKey, attempt);
|
||||
const retryDiag = diagnoseExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath);
|
||||
s.pendingVerificationRetry = {
|
||||
unitId: s.currentUnit.id,
|
||||
failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).${retryDiag ? ` Expected: ${retryDiag}` : ""}`,
|
||||
attempt,
|
||||
};
|
||||
debugLog("postUnit", { phase: "artifact-verify-retry", unitType: s.currentUnit.type, unitId: s.currentUnit.id, attempt });
|
||||
ctx.ui.notify(
|
||||
`Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — retrying (attempt ${attempt}).${retryDiag ? ` Expected: ${retryDiag}` : ""}`,
|
||||
"warning",
|
||||
);
|
||||
return "retry";
|
||||
|
||||
if (attempt > MAX_VERIFICATION_RETRIES) {
|
||||
// Retries exhausted — write a blocker placeholder so the pipeline
|
||||
// can advance past this stuck unit (#2653).
|
||||
debugLog("postUnit", {
|
||||
phase: "artifact-verify-escalate",
|
||||
unitType: s.currentUnit.type,
|
||||
unitId: s.currentUnit.id,
|
||||
attempt,
|
||||
maxRetries: MAX_VERIFICATION_RETRIES,
|
||||
});
|
||||
const reason = `Artifact verification failed after ${MAX_VERIFICATION_RETRIES} retries for ${s.currentUnit.type} "${s.currentUnit.id}".`;
|
||||
writeBlockerPlaceholder(s.currentUnit.type, s.currentUnit.id, s.basePath, reason);
|
||||
ctx.ui.notify(
|
||||
`${s.currentUnit.type} ${s.currentUnit.id} — verification retries exhausted (${MAX_VERIFICATION_RETRIES}), wrote blocker placeholder to advance pipeline`,
|
||||
"warning",
|
||||
);
|
||||
// Reset retry count and fall through to "continue" so the loop
|
||||
// re-derives state with the placeholder in place.
|
||||
s.verificationRetryCount.delete(retryKey);
|
||||
s.pendingVerificationRetry = null;
|
||||
// Do NOT return "retry" — fall through to "continue" below.
|
||||
} else {
|
||||
s.pendingVerificationRetry = {
|
||||
unitId: s.currentUnit.id,
|
||||
failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).`,
|
||||
attempt,
|
||||
};
|
||||
debugLog("postUnit", { phase: "artifact-verify-retry", unitType: s.currentUnit.type, unitId: s.currentUnit.id, attempt });
|
||||
ctx.ui.notify(
|
||||
`Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — retrying (attempt ${attempt})`,
|
||||
"warning",
|
||||
);
|
||||
return "retry";
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import { parseUnitId } from "./unit-id.js";
|
|||
import { atomicWriteSync } from "./atomic-write.js";
|
||||
import { clearParseCache } from "./files.js";
|
||||
import { parseRoadmap as parseLegacyRoadmap, parsePlan as parseLegacyPlan } from "./parsers-legacy.js";
|
||||
import { isDbAvailable, getTask, getSlice, getSliceTasks, updateTaskStatus } from "./gsd-db.js";
|
||||
import { isDbAvailable, getTask, getSlice, getSliceTasks, updateTaskStatus, updateSliceStatus } from "./gsd-db.js";
|
||||
import { isValidationTerminal } from "./state.js";
|
||||
import { getErrorMessage } from "./error-utils.js";
|
||||
import { logWarning, logError } from "./workflow-logger.js";
|
||||
|
|
@ -424,15 +424,16 @@ export function writeBlockerPlaceholder(
|
|||
].join("\n");
|
||||
writeFileSync(absPath, content, "utf-8");
|
||||
|
||||
// Mark the task as complete in the DB so verifyExpectedArtifact passes.
|
||||
// Mark the task/slice as complete in the DB so verifyExpectedArtifact passes.
|
||||
// Without this, the DB status stays "pending" and the dispatch loop
|
||||
// re-derives the same task indefinitely (#2531).
|
||||
if (unitType === "execute-task" && isDbAvailable()) {
|
||||
// re-derives the same unit indefinitely (#2531, #2653).
|
||||
if (isDbAvailable()) {
|
||||
const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
|
||||
if (mid && sid && tid) {
|
||||
try { updateTaskStatus(mid, sid, tid, "complete", new Date().toISOString()); } catch (err) { /* non-fatal */
|
||||
logError("recovery", `DB status update failed: ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
if (unitType === "execute-task" && mid && sid && tid) {
|
||||
try { updateTaskStatus(mid, sid, tid, "complete", new Date().toISOString()); } catch (e) { logWarning("recovery", `updateTaskStatus failed during context exhaustion: ${e instanceof Error ? e.message : String(e)}`); }
|
||||
}
|
||||
if (unitType === "complete-slice" && mid && sid) {
|
||||
try { updateSliceStatus(mid, sid, "complete", new Date().toISOString()); } catch (e) { logWarning("recovery", `updateSliceStatus failed during context exhaustion: ${e instanceof Error ? e.message : String(e)}`); }
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1220,11 +1220,13 @@ export async function runUnitPhase(
|
|||
);
|
||||
}
|
||||
|
||||
// ── Zero tool-call guard (#1833) ──────────────────────────────────
|
||||
// An execute-task agent that completes with 0 tool calls made no
|
||||
// real changes — its summary is hallucinated. Treat as failed so
|
||||
// the task is retried instead of silently marked complete.
|
||||
if (unitType === "execute-task") {
|
||||
// ── Zero tool-call guard (#1833, #2653) ──────────────────────────
|
||||
// Any unit that completes with 0 tool calls made no real progress —
|
||||
// likely context exhaustion where all tool calls errored out. Treat
|
||||
// as failed so the unit is retried in a fresh context instead of
|
||||
// silently passing through to artifact verification (which loops
|
||||
// forever when the unit never produced its artifact).
|
||||
{
|
||||
const currentLedger = deps.getLedger() as { units: Array<{ type: string; id: string; startedAt: number; toolCalls: number }> } | null;
|
||||
if (currentLedger?.units) {
|
||||
const lastUnit = [...currentLedger.units].reverse().find(
|
||||
|
|
@ -1235,14 +1237,14 @@ export async function runUnitPhase(
|
|||
phase: "zero-tool-calls",
|
||||
unitType,
|
||||
unitId,
|
||||
warning: "Task completed with 0 tool calls — likely hallucinated, marking as failed",
|
||||
warning: "Unit completed with 0 tool calls — likely context exhaustion, marking as failed",
|
||||
});
|
||||
ctx.ui.notify(
|
||||
`${unitType} ${unitId} completed with 0 tool calls — hallucinated summary, will retry`,
|
||||
`${unitType} ${unitId} completed with 0 tool calls — context exhaustion, will retry`,
|
||||
"warning",
|
||||
);
|
||||
// Fall through to next iteration where dispatch will re-derive
|
||||
// and re-dispatch this task.
|
||||
// and re-dispatch this unit.
|
||||
return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } };
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2107,11 +2107,11 @@ test("autoLoop rejects execute-task with 0 tool calls as hallucinated (#1833)",
|
|||
// The task should NOT have been added to completedUnits on the first iteration
|
||||
// (0 tool calls), but SHOULD be added on the second iteration (5 tool calls)
|
||||
const warningNotification = notifications.find(
|
||||
(n) => n.includes("0 tool calls") && n.includes("hallucinated"),
|
||||
(n) => n.includes("0 tool calls") && n.includes("context exhaustion"),
|
||||
);
|
||||
assert.ok(
|
||||
warningNotification,
|
||||
"should notify about 0 tool calls hallucination",
|
||||
"should notify about 0 tool calls context exhaustion",
|
||||
);
|
||||
|
||||
// Verify deriveState was called at least twice (two iterations)
|
||||
|
|
@ -2122,7 +2122,7 @@ test("autoLoop rejects execute-task with 0 tool calls as hallucinated (#1833)",
|
|||
);
|
||||
});
|
||||
|
||||
test("autoLoop does NOT reject non-execute-task units with 0 tool calls (#1833)", async () => {
|
||||
test("autoLoop rejects complete-slice with 0 tool calls as context-exhausted (#2653)", async () => {
|
||||
_resetPendingResolve();
|
||||
|
||||
const ctx = makeMockCtx();
|
||||
|
|
@ -2130,6 +2130,7 @@ test("autoLoop does NOT reject non-execute-task units with 0 tool calls (#1833)"
|
|||
ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" };
|
||||
const pi = makeMockPi();
|
||||
|
||||
let iterationCount = 0;
|
||||
const notifications: string[] = [];
|
||||
ctx.ui.notify = (msg: string) => { notifications.push(msg); };
|
||||
|
||||
|
|
@ -2163,7 +2164,7 @@ test("autoLoop does NOT reject non-execute-task units with 0 tool calls (#1833)"
|
|||
};
|
||||
},
|
||||
closeoutUnit: async () => {
|
||||
// complete-slice with 0 tool calls is fine (e.g. it may just update status)
|
||||
// complete-slice with 0 tool calls — context exhausted, no progress
|
||||
mockLedger.units.push({
|
||||
type: "complete-slice",
|
||||
id: "M001/S01",
|
||||
|
|
@ -2177,31 +2178,51 @@ test("autoLoop does NOT reject non-execute-task units with 0 tool calls (#1833)"
|
|||
getLedger: () => mockLedger,
|
||||
postUnitPostVerification: async () => {
|
||||
deps.callLog.push("postUnitPostVerification");
|
||||
s.active = false;
|
||||
iterationCount++;
|
||||
// Deactivate after 2nd iteration
|
||||
s.active = iterationCount < 2;
|
||||
return "continue" as const;
|
||||
},
|
||||
});
|
||||
|
||||
const loopPromise = autoLoop(ctx, pi, s, deps);
|
||||
|
||||
// First iteration: complete-slice with 0 tool calls → rejected
|
||||
await new Promise((r) => setTimeout(r, 50));
|
||||
resolveAgentEnd(makeEvent());
|
||||
|
||||
// Second iteration: re-dispatched, this time with tool calls
|
||||
await new Promise((r) => setTimeout(r, 50));
|
||||
mockLedger.units.length = 0;
|
||||
(deps as any).closeoutUnit = async () => {
|
||||
mockLedger.units.push({
|
||||
type: "complete-slice",
|
||||
id: "M001/S01",
|
||||
startedAt: s.currentUnit?.startedAt ?? Date.now(),
|
||||
toolCalls: 3,
|
||||
assistantMessages: 2,
|
||||
tokens: { input: 200, output: 400, total: 600, cacheRead: 0, cacheWrite: 0 },
|
||||
cost: 0.30,
|
||||
});
|
||||
};
|
||||
resolveAgentEnd(makeEvent());
|
||||
|
||||
await loopPromise;
|
||||
|
||||
// Should NOT have a hallucination warning for non-execute-task units
|
||||
// Should have a warning about 0 tool calls for complete-slice
|
||||
const warningNotification = notifications.find(
|
||||
(n) => n.includes("0 tool calls") && n.includes("hallucinated"),
|
||||
(n) => n.includes("0 tool calls"),
|
||||
);
|
||||
assert.ok(
|
||||
!warningNotification,
|
||||
"should NOT flag non-execute-task units with 0 tool calls",
|
||||
warningNotification,
|
||||
"should flag complete-slice with 0 tool calls as failed (#2653)",
|
||||
);
|
||||
|
||||
// Verify the loop ran to completion (postUnitPostVerification was called)
|
||||
// Verify deriveState was called at least twice (two iterations: rejected + retry)
|
||||
const deriveCount = deps.callLog.filter((c) => c === "deriveState").length;
|
||||
assert.ok(
|
||||
deps.callLog.includes("postUnitPostVerification"),
|
||||
"complete-slice with 0 tool calls should still complete the post-unit pipeline",
|
||||
deriveCount >= 2,
|
||||
`deriveState should be called at least 2 times for retry (got ${deriveCount})`,
|
||||
);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -357,3 +357,37 @@ test('writeBlockerPlaceholder: does NOT update DB for non-execute-task types', a
|
|||
cleanup(base);
|
||||
}
|
||||
});
|
||||
|
||||
test('writeBlockerPlaceholder: updates DB slice status for complete-slice (#2653)', async () => {
|
||||
const base = createFixtureBase();
|
||||
try {
|
||||
const { openDatabase, closeDatabase, insertMilestone, insertSlice, getSlice, isDbAvailable } =
|
||||
await import("../../gsd-db.ts");
|
||||
|
||||
const dbPath = join(base, ".gsd", "gsd.db");
|
||||
mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01"), { recursive: true });
|
||||
|
||||
openDatabase(dbPath);
|
||||
try {
|
||||
insertMilestone({ id: "M001", title: "Test", status: "active" });
|
||||
insertSlice({ id: "S01", milestoneId: "M001", title: "Slice", status: "active" });
|
||||
|
||||
// complete-slice blocker should update slice DB status to "complete"
|
||||
writeBlockerPlaceholder("complete-slice", "M001/S01", base, "context exhaustion recovery");
|
||||
|
||||
const slice = getSlice("M001", "S01");
|
||||
assert.equal(slice?.status, "complete",
|
||||
"writeBlockerPlaceholder must update DB slice status to 'complete' for complete-slice so dispatch guard unblocks downstream (#2653)");
|
||||
|
||||
// Verify the full chain works: verifyExpectedArtifact should return true
|
||||
// (requires both UAT file and DB status = complete)
|
||||
// Note: the placeholder writes a SUMMARY file, but complete-slice also needs UAT.
|
||||
// The placeholder itself doesn't write UAT, so artifact verification may still fail
|
||||
// for complete-slice — but the DB status is now correct, breaking the circular dep.
|
||||
} finally {
|
||||
if (isDbAvailable()) closeDatabase();
|
||||
}
|
||||
} finally {
|
||||
cleanup(base);
|
||||
}
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue