Merge pull request #4178 from jeremymcs/fix/4175-complete-milestone-false-merge

fix(auto-mode): prevent false milestone merge after complete-milestone failure (#4175)
This commit is contained in:
Jeremy McSpadden 2026-04-14 07:53:41 -05:00 committed by GitHub
commit bdef500c85
6 changed files with 301 additions and 36 deletions

View file

@ -632,6 +632,30 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
s.verificationRetryCount.set(retryKey, attempt);
if (attempt > MAX_VERIFICATION_RETRIES) {
// #4175: For complete-milestone, a blocker placeholder is harmful —
// the stub SUMMARY has no recovery value (milestone is terminal),
// it does not update DB status (so deriveState never advances),
// and it fools stopAuto's presence check into merging a milestone
// that was never legitimately completed. Pause auto-mode with a
// clear single failure signal and preserve the worktree branch.
if (s.currentUnit.type === "complete-milestone") {
debugLog("postUnit", {
phase: "artifact-verify-pause-complete-milestone",
unitType: s.currentUnit.type,
unitId: s.currentUnit.id,
attempt,
maxRetries: MAX_VERIFICATION_RETRIES,
});
s.verificationRetryCount.delete(retryKey);
s.pendingVerificationRetry = null;
ctx.ui.notify(
`Milestone ${s.currentUnit.id} verification failed after ${MAX_VERIFICATION_RETRIES} retries — worktree branch preserved. Re-run /gsd auto once blockers are resolved.`,
"error",
);
await pauseAuto(ctx, pi);
return "dispatched";
}
// Retries exhausted — write a blocker placeholder so the pipeline
// can advance past this stuck unit (#2653).
debugLog("postUnit", {

View file

@ -230,6 +230,23 @@ export async function recoverTimedOutUnit(
return "recovered";
}
// #4175: For complete-milestone, never write a blocker placeholder — a stub
// SUMMARY has no recovery value (milestone is terminal), it does not update
// DB status, and downstream merge paths can treat the stub as a legitimate
// completion signal. Pause instead so the worktree branch is preserved.
if (unitType === "complete-milestone") {
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, {
phase: "paused",
recoveryAttempts: recoveryAttempts + 1,
lastRecoveryReason: reason,
});
ctx.ui.notify(
`Milestone ${unitId} ${reason}-recovery exhausted ${maxRecoveryAttempts} attempt(s) — worktree branch preserved. Re-run /gsd auto once blockers are resolved.`,
"error",
);
return "paused";
}
// Retries exhausted — write a blocker placeholder and advance the pipeline
// instead of silently stalling.
const placeholder = writeBlockerPlaceholder(

View file

@ -187,7 +187,7 @@ import {
deregisterSigtermHandler as _deregisterSigtermHandler,
detectWorkingTreeActivity,
} from "./auto-supervisor.js";
import { isDbAvailable } from "./gsd-db.js";
import { isDbAvailable, getMilestone } from "./gsd-db.js";
import { countPendingCaptures } from "./captures.js";
import { clearCmuxSidebar, logCmuxEvent, syncCmuxSidebar } from "../cmux/index.js";
@ -761,24 +761,36 @@ export async function stopAuto(
: { notify: () => {} };
const resolver = buildResolver();
// Check if the milestone is complete — SUMMARY file is the authoritative signal.
// Check if the milestone is complete. DB status is the authoritative
// signal — only a successful gsd_complete_milestone call flips it to
// "complete" (tools/complete-milestone.ts). SUMMARY file presence is
// NOT sufficient: a blocker placeholder stub or a partial write can
// leave a file behind without the milestone actually being done,
// which previously caused stopAuto to merge a failed milestone and
// emit a misleading metadata-only merge warning (#4175).
// DB-unavailable projects fall back to SUMMARY-file presence.
let milestoneComplete = false;
try {
const summaryPath = resolveMilestoneFile(
s.originalBasePath || s.basePath,
s.currentMilestoneId,
"SUMMARY",
);
if (!summaryPath) {
// Also check in the worktree path (SUMMARY may not be synced yet)
const wtSummaryPath = resolveMilestoneFile(
s.basePath,
if (isDbAvailable()) {
const dbRow = getMilestone(s.currentMilestoneId);
milestoneComplete = dbRow?.status === "complete";
} else {
const summaryPath = resolveMilestoneFile(
s.originalBasePath || s.basePath,
s.currentMilestoneId,
"SUMMARY",
);
milestoneComplete = wtSummaryPath !== null;
} else {
milestoneComplete = true;
if (!summaryPath) {
// Also check in the worktree path (SUMMARY may not be synced yet)
const wtSummaryPath = resolveMilestoneFile(
s.basePath,
s.currentMilestoneId,
"SUMMARY",
);
milestoneComplete = wtSummaryPath !== null;
} else {
milestoneComplete = true;
}
}
} catch (err) {
// Non-fatal — fall through to preserveBranch path

View file

@ -386,6 +386,10 @@ function buildCompletenessSet(basePath: string, milestones: MilestoneRow[]) {
const completeMilestoneIds = new Set<string>();
const parkedMilestoneIds = new Set<string>();
// DB-authoritative: a milestone is only "complete" when its DB row says so.
// SUMMARY-file presence is NOT a completion signal here — an orphan SUMMARY
// (crashed complete-milestone turn, partial merge, manual edit) must not
// flip derived state to complete and cascade into a false auto-merge (#4179).
for (const m of milestones) {
const parkedFile = resolveMilestoneFile(basePath, m.id, "PARKED");
if (parkedFile || m.status === 'parked') {
@ -396,11 +400,6 @@ function buildCompletenessSet(basePath: string, milestones: MilestoneRow[]) {
completeMilestoneIds.add(m.id);
continue;
}
const summaryFile = resolveMilestoneFile(basePath, m.id, "SUMMARY");
if (summaryFile) {
completeMilestoneIds.add(m.id);
continue;
}
}
return { completeMilestoneIds, parkedMilestoneIds };
}
@ -429,18 +428,22 @@ async function buildRegistryAndFindActive(
if (isGhostMilestone(basePath, m.id)) continue;
}
const summaryFile = resolveMilestoneFile(basePath, m.id, "SUMMARY");
if (completeMilestoneIds.has(m.id) || (summaryFile !== null)) {
// DB-authoritative completeness (#4179): only trust completeMilestoneIds,
// which is itself derived from DB status. SUMMARY-file presence alone must
// not imply completion. The summary file may still be consulted below as a
// title source for legitimately-complete milestones whose DB row has no title.
if (completeMilestoneIds.has(m.id)) {
let title = stripMilestonePrefix(m.title) || m.id;
if (summaryFile && !m.title) {
const summaryContent = await loadFile(summaryFile);
if (summaryContent) {
title = parseSummary(summaryContent).title || m.id;
if (!m.title) {
const summaryFile = resolveMilestoneFile(basePath, m.id, "SUMMARY");
if (summaryFile) {
const summaryContent = await loadFile(summaryFile);
if (summaryContent) {
title = parseSummary(summaryContent).title || m.id;
}
}
}
registry.push({ id: m.id, title, status: 'complete' });
completeMilestoneIds.add(m.id);
continue;
}
@ -481,7 +484,14 @@ async function buildRegistryAndFindActive(
const validationContent = validationFile ? await loadFile(validationFile) : null;
const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false;
if (!validationTerminal || (validationTerminal && !summaryFile)) {
// DB-authoritative (#4179): completeness is already decided by
// completeMilestoneIds above. If we reached this branch, the DB says
// the milestone is NOT complete — so any SUMMARY file on disk is an
// orphan (crashed complete-milestone, partial merge, manual edit) and
// must not short-circuit this path. When validation is terminal, fall
// through to the default active-push below so `complete-milestone` can
// re-run idempotently.
if (!validationTerminal) {
activeMilestone = { id: m.id, title };
activeMilestoneSlices = slices;
activeMilestoneFound = true;

View file

@ -0,0 +1,142 @@
/**
* complete-milestone-false-merge.test.ts Regression test for #4175.
*
* Before the fix, a failed complete-milestone unit could leave a stub
* SUMMARY blocker placeholder on disk. stopAuto's SUMMARY-presence check
* then treated the milestone as complete and merged the worktree branch
* into main emitting a misleading metadata-only merge warning for a
* milestone that was never legitimately finished.
*
* The fix has three cooperating parts:
* 1. stopAuto uses DB status (authoritative) instead of SUMMARY presence
* when the project DB is available.
* 2. postUnitPreVerification pauses auto-mode for complete-milestone
* after retries are exhausted instead of writing a blocker placeholder.
* 3. recoverTimedOutUnit pauses for complete-milestone instead of
* writing a blocker placeholder.
*
* This test guards all three via source inspection so a future refactor
* cannot silently reintroduce the false-merge path.
*/
import test from "node:test";
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { join } from "node:path";
const gsdDir = join(import.meta.dirname, "..");
const autoSrc = readFileSync(join(gsdDir, "auto.ts"), "utf-8");
const postUnitSrc = readFileSync(join(gsdDir, "auto-post-unit.ts"), "utf-8");
const timeoutSrc = readFileSync(join(gsdDir, "auto-timeout-recovery.ts"), "utf-8");
test("#4175: stopAuto uses DB status as the authoritative milestone-complete signal", () => {
const step4Idx = autoSrc.indexOf("Step 4: Auto-worktree exit");
assert.ok(step4Idx !== -1, "Step 4 comment exists in stopAuto");
const step5Idx = autoSrc.indexOf("Step 5:", step4Idx);
const step4Block = autoSrc.slice(step4Idx, step5Idx);
assert.ok(
step4Block.includes("isDbAvailable()"),
"Step 4 should branch on isDbAvailable() so DB is consulted when present",
);
assert.ok(
step4Block.includes("getMilestone(s.currentMilestoneId)"),
"Step 4 should read authoritative milestone status via getMilestone()",
);
assert.ok(
/status\s*===\s*"complete"/.test(step4Block),
'Step 4 should compare the DB row status to "complete"',
);
});
test("#4175: stopAuto imports getMilestone from gsd-db", () => {
assert.ok(
/import\s*\{[^}]*\bgetMilestone\b[^}]*\}\s*from\s*"\.\/gsd-db\.js"/.test(autoSrc),
"auto.ts should import getMilestone from ./gsd-db.js",
);
});
test("#4175: stopAuto still falls back to SUMMARY presence when DB is unavailable", () => {
const step4Idx = autoSrc.indexOf("Step 4: Auto-worktree exit");
const step5Idx = autoSrc.indexOf("Step 5:", step4Idx);
const step4Block = autoSrc.slice(step4Idx, step5Idx);
assert.ok(
step4Block.includes("resolveMilestoneFile"),
"Step 4 should keep SUMMARY-file resolution for DB-unavailable projects",
);
assert.ok(
step4Block.includes("preserveBranch"),
"Step 4 should still preserve branch for incomplete milestones (fallback path)",
);
});
test("#4175: postUnitPreVerification pauses complete-milestone after retries exhausted", () => {
// The pause branch must live inside the retries-exhausted block, above the
// writeBlockerPlaceholder call — otherwise the stub SUMMARY is still written.
const retriesExhaustedIdx = postUnitSrc.indexOf(
"if (attempt > MAX_VERIFICATION_RETRIES)",
);
assert.ok(
retriesExhaustedIdx !== -1,
"retries-exhausted guard exists in postUnitPreVerification",
);
const blockerCallIdx = postUnitSrc.indexOf("writeBlockerPlaceholder", retriesExhaustedIdx);
assert.ok(
blockerCallIdx !== -1,
"blocker placeholder call still exists for non-milestone units",
);
const exhaustedBlock = postUnitSrc.slice(retriesExhaustedIdx, blockerCallIdx);
assert.ok(
/s\.currentUnit\.type\s*===\s*"complete-milestone"/.test(exhaustedBlock),
"retries-exhausted block should specifically handle complete-milestone",
);
assert.ok(
/pauseAuto\s*\(\s*ctx\s*,\s*pi\s*\)/.test(exhaustedBlock),
"complete-milestone path should call pauseAuto instead of falling through",
);
// The pause branch must return so execution never reaches writeBlockerPlaceholder.
assert.ok(
/return\s+"dispatched"\s*;/.test(exhaustedBlock),
"complete-milestone pause branch should return before the placeholder call",
);
});
test("#4175: recoverTimedOutUnit pauses complete-milestone instead of writing a blocker placeholder", () => {
// The complete-milestone pause branch must sit immediately above the
// "retries exhausted" writeBlockerPlaceholder call so a failed
// complete-milestone never produces a stub SUMMARY. Anchor on the
// comment that precedes that specific placeholder call rather than the
// function's earlier writeBlockerPlaceholder use sites or its import.
// Use lastIndexOf so we find the final retries-exhausted block in
// recoverTimedOutUnit, not an earlier helper with the same comment.
const exhaustedAnchor = "Retries exhausted — write a blocker placeholder";
const exhaustedIdx = timeoutSrc.lastIndexOf(exhaustedAnchor);
assert.ok(
exhaustedIdx !== -1,
"retries-exhausted blocker-placeholder path still exists for non-milestone units",
);
const guardIdx = timeoutSrc.lastIndexOf(
'unitType === "complete-milestone"',
exhaustedIdx,
);
assert.ok(
guardIdx !== -1,
"complete-milestone guard should appear above the retries-exhausted placeholder call",
);
const guardBlock = timeoutSrc.slice(guardIdx, exhaustedIdx);
assert.ok(
/return\s+"paused"\s*;/.test(guardBlock),
"complete-milestone guard should return 'paused' before the placeholder call",
);
// The guard itself must not call writeBlockerPlaceholder.
assert.ok(
!guardBlock.includes("writeBlockerPlaceholder"),
"complete-milestone guard must not write a blocker placeholder",
);
});

View file

@ -307,27 +307,87 @@ describe('derive-state-helpers', () => {
}
});
// ─── buildCompletenessSet: SUMMARY-on-disk marks complete ───────────
test('buildCompletenessSet: milestone with SUMMARY on disk treated as complete', async () => {
// ─── buildCompletenessSet: DB status is authoritative ──────────────
test('buildCompletenessSet: DB status=complete marks milestone complete', async () => {
const base = createFixtureBase();
try {
// M001 has summary on disk but DB status is still 'active'
writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nDone.');
// M002 is the real active milestone
writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002\n\nActive.');
openDatabase(':memory:');
insertMilestone({ id: 'M001', title: 'First', status: 'active' });
insertMilestone({ id: 'M001', title: 'First', status: 'complete' });
insertMilestone({ id: 'M002', title: 'Second', status: 'active' });
invalidateStateCache();
const state = await deriveStateFromDb(base);
// M001 should be complete (summary on disk), M002 should be active
const m1 = state.registry.find(e => e.id === 'M001');
assert.equal(m1?.status, 'complete', 'summary-disk: M001 marked complete via disk SUMMARY');
assert.equal(state.activeMilestone?.id, 'M002', 'summary-disk: M002 is active');
assert.equal(m1?.status, 'complete', 'DB status=complete → registry entry complete');
assert.equal(state.activeMilestone?.id, 'M002', 'M002 is the active milestone');
} finally {
closeDatabase();
cleanup(base);
}
});
// ─── Regression #4179: orphan SUMMARY must NOT flip DB-active milestone ───
// A crashed complete-milestone turn (or stale/manual SUMMARY.md) can leave
// a milestone SUMMARY on disk while the DB row still reads 'active'. The
// read-side of state derivation must NOT treat the orphan SUMMARY as a
// completion signal, or the auto-loop advances and merges work that was
// never actually finished (same failure class as #4175, read-side twin).
test('buildCompletenessSet (#4179): orphan SUMMARY on disk does not mark DB-active milestone complete', async () => {
const base = createFixtureBase();
try {
writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Orphan Summary\n\nLeft over from crashed turn.');
openDatabase(':memory:');
insertMilestone({ id: 'M001', title: 'First', status: 'active' });
// Slice still in-flight — auto should resume, not merge.
insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'active', risk: 'low', depends: [] });
insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'pending', risk: 'low', depends: ['S01'] });
insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'In-flight', status: 'pending' });
invalidateStateCache();
const state = await deriveStateFromDb(base);
const m1 = state.registry.find(e => e.id === 'M001');
assert.notEqual(m1?.status, 'complete', 'orphan SUMMARY must not mark milestone complete');
assert.equal(m1?.status, 'active', 'M001 remains active — DB is authoritative');
assert.equal(state.activeMilestone?.id, 'M001', 'M001 is still the active milestone');
assert.notEqual(state.phase, 'completing-milestone', 'must not short-circuit into completion');
} finally {
closeDatabase();
cleanup(base);
}
});
// Regression #4179 (companion): DB-active milestone with all slices done +
// validation terminal + orphan SUMMARY must still flow through completing-milestone
// (re-runs complete-milestone), not be reported as already-complete.
test('buildRegistryAndFindActive (#4179): orphan SUMMARY with validation-terminal falls through to completing-milestone', async () => {
const base = createFixtureBase();
try {
writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT);
writeFile(base, 'milestones/M001/slices/S02/S02-PLAN.md', PLAN_CONTENT);
writeFile(base, 'milestones/M001/M001-VALIDATION.md', '---\nverdict: passed\n---\n# Validation\nAll good.');
writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Orphan Summary\n\nLeft over.');
openDatabase(':memory:');
insertMilestone({ id: 'M001', title: 'First', status: 'active' });
insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'complete', risk: 'low', depends: [] });
insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'complete', risk: 'low', depends: ['S01'] });
invalidateStateCache();
const state = await deriveStateFromDb(base);
const m1 = state.registry.find(e => e.id === 'M001');
assert.equal(m1?.status, 'active', 'M001 stays active despite orphan SUMMARY + validation-terminal');
assert.equal(state.activeMilestone?.id, 'M001', 'M001 is still the active milestone');
assert.equal(state.phase, 'completing-milestone', 'phase flows through completing-milestone (re-run)');
} finally {
closeDatabase();
cleanup(base);