fix(auto-mode): prevent false milestone merge after complete-milestone failure (#4175)

When complete-milestone failed verification, auto-mode could end up merging
the worktree to main anyway and emit a metadata-only merge warning, creating
a misleading near-complete signal while the SUMMARY was never actually written.

The blocker-placeholder path for complete-milestone wrote a stub SUMMARY
without updating DB status, and stopAuto's SUMMARY-presence check treated
the stub as a legitimate completion signal.

- auto-post-unit.ts: skip blocker placeholder and pause auto-mode on
  complete-milestone verification retry exhaustion.
- auto-timeout-recovery.ts: same guard for the idle/hard timeout path.
- auto.ts: make stopAuto Step 4 DB-authoritative (getMilestone.status ===
  "complete") with SUMMARY-presence fallback only for DB-unavailable
  legacy projects.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jeremy 2026-04-14 07:22:56 -05:00
parent 3a529f7a95
commit 9957152024
3 changed files with 67 additions and 14 deletions

View file

@ -612,6 +612,30 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
s.verificationRetryCount.set(retryKey, attempt);
if (attempt > MAX_VERIFICATION_RETRIES) {
// #4175: For complete-milestone, a blocker placeholder is harmful —
// the stub SUMMARY has no recovery value (milestone is terminal),
// it does not update DB status (so deriveState never advances),
// and it fools stopAuto's presence check into merging a milestone
// that was never legitimately completed. Pause auto-mode with a
// clear single failure signal and preserve the worktree branch.
if (s.currentUnit.type === "complete-milestone") {
debugLog("postUnit", {
phase: "artifact-verify-pause-complete-milestone",
unitType: s.currentUnit.type,
unitId: s.currentUnit.id,
attempt,
maxRetries: MAX_VERIFICATION_RETRIES,
});
s.verificationRetryCount.delete(retryKey);
s.pendingVerificationRetry = null;
ctx.ui.notify(
`Milestone ${s.currentUnit.id} verification failed after ${MAX_VERIFICATION_RETRIES} retries — worktree branch preserved. Re-run /gsd auto once blockers are resolved.`,
"error",
);
await pauseAuto(ctx, pi);
return "dispatched";
}
// Retries exhausted — write a blocker placeholder so the pipeline
// can advance past this stuck unit (#2653).
debugLog("postUnit", {

View file

@ -230,6 +230,23 @@ export async function recoverTimedOutUnit(
return "recovered";
}
// #4175: For complete-milestone, never write a blocker placeholder — a stub
// SUMMARY has no recovery value (milestone is terminal), it does not update
// DB status, and downstream merge paths can treat the stub as a legitimate
// completion signal. Pause instead so the worktree branch is preserved.
if (unitType === "complete-milestone") {
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, {
phase: "paused",
recoveryAttempts: recoveryAttempts + 1,
lastRecoveryReason: reason,
});
ctx.ui.notify(
`Milestone ${unitId} ${reason}-recovery exhausted ${maxRecoveryAttempts} attempt(s) — worktree branch preserved. Re-run /gsd auto once blockers are resolved.`,
"error",
);
return "paused";
}
// Retries exhausted — write a blocker placeholder and advance the pipeline
// instead of silently stalling.
const placeholder = writeBlockerPlaceholder(

View file

@ -186,7 +186,7 @@ import {
deregisterSigtermHandler as _deregisterSigtermHandler,
detectWorkingTreeActivity,
} from "./auto-supervisor.js";
import { isDbAvailable } from "./gsd-db.js";
import { isDbAvailable, getMilestone } from "./gsd-db.js";
import { countPendingCaptures } from "./captures.js";
import { clearCmuxSidebar, logCmuxEvent, syncCmuxSidebar } from "../cmux/index.js";
@ -753,24 +753,36 @@ export async function stopAuto(
: { notify: () => {} };
const resolver = buildResolver();
// Check if the milestone is complete — SUMMARY file is the authoritative signal.
// Check if the milestone is complete. DB status is the authoritative
// signal — only a successful gsd_complete_milestone call flips it to
// "complete" (tools/complete-milestone.ts). SUMMARY file presence is
// NOT sufficient: a blocker placeholder stub or a partial write can
// leave a file behind without the milestone actually being done,
// which previously caused stopAuto to merge a failed milestone and
// emit a misleading metadata-only merge warning (#4175).
// DB-unavailable projects fall back to SUMMARY-file presence.
let milestoneComplete = false;
try {
const summaryPath = resolveMilestoneFile(
s.originalBasePath || s.basePath,
s.currentMilestoneId,
"SUMMARY",
);
if (!summaryPath) {
// Also check in the worktree path (SUMMARY may not be synced yet)
const wtSummaryPath = resolveMilestoneFile(
s.basePath,
if (isDbAvailable()) {
const dbRow = getMilestone(s.currentMilestoneId);
milestoneComplete = dbRow?.status === "complete";
} else {
const summaryPath = resolveMilestoneFile(
s.originalBasePath || s.basePath,
s.currentMilestoneId,
"SUMMARY",
);
milestoneComplete = wtSummaryPath !== null;
} else {
milestoneComplete = true;
if (!summaryPath) {
// Also check in the worktree path (SUMMARY may not be synced yet)
const wtSummaryPath = resolveMilestoneFile(
s.basePath,
s.currentMilestoneId,
"SUMMARY",
);
milestoneComplete = wtSummaryPath !== null;
} else {
milestoneComplete = true;
}
}
} catch (err) {
// Non-fatal — fall through to preserveBranch path