diff --git a/src/resources/extensions/gsd/auto-post-unit.ts b/src/resources/extensions/gsd/auto-post-unit.ts index cb146679d..f59334f82 100644 --- a/src/resources/extensions/gsd/auto-post-unit.ts +++ b/src/resources/extensions/gsd/auto-post-unit.ts @@ -632,6 +632,30 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV s.verificationRetryCount.set(retryKey, attempt); if (attempt > MAX_VERIFICATION_RETRIES) { + // #4175: For complete-milestone, a blocker placeholder is harmful — + // the stub SUMMARY has no recovery value (milestone is terminal), + // it does not update DB status (so deriveState never advances), + // and it fools stopAuto's presence check into merging a milestone + // that was never legitimately completed. Pause auto-mode with a + // clear single failure signal and preserve the worktree branch. + if (s.currentUnit.type === "complete-milestone") { + debugLog("postUnit", { + phase: "artifact-verify-pause-complete-milestone", + unitType: s.currentUnit.type, + unitId: s.currentUnit.id, + attempt, + maxRetries: MAX_VERIFICATION_RETRIES, + }); + s.verificationRetryCount.delete(retryKey); + s.pendingVerificationRetry = null; + ctx.ui.notify( + `Milestone ${s.currentUnit.id} verification failed after ${MAX_VERIFICATION_RETRIES} retries — worktree branch preserved. Re-run /gsd auto once blockers are resolved.`, + "error", + ); + await pauseAuto(ctx, pi); + return "dispatched"; + } + // Retries exhausted — write a blocker placeholder so the pipeline // can advance past this stuck unit (#2653). debugLog("postUnit", { diff --git a/src/resources/extensions/gsd/auto-timeout-recovery.ts b/src/resources/extensions/gsd/auto-timeout-recovery.ts index 4d62a9fec..fa385fc1b 100644 --- a/src/resources/extensions/gsd/auto-timeout-recovery.ts +++ b/src/resources/extensions/gsd/auto-timeout-recovery.ts @@ -230,6 +230,23 @@ export async function recoverTimedOutUnit( return "recovered"; } + // #4175: For complete-milestone, never write a blocker placeholder — a stub + // SUMMARY has no recovery value (milestone is terminal), it does not update + // DB status, and downstream merge paths can treat the stub as a legitimate + // completion signal. Pause instead so the worktree branch is preserved. + if (unitType === "complete-milestone") { + writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, { + phase: "paused", + recoveryAttempts: recoveryAttempts + 1, + lastRecoveryReason: reason, + }); + ctx.ui.notify( + `Milestone ${unitId} ${reason}-recovery exhausted ${maxRecoveryAttempts} attempt(s) — worktree branch preserved. Re-run /gsd auto once blockers are resolved.`, + "error", + ); + return "paused"; + } + // Retries exhausted — write a blocker placeholder and advance the pipeline // instead of silently stalling. const placeholder = writeBlockerPlaceholder( diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 344055951..666341dbf 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -187,7 +187,7 @@ import { deregisterSigtermHandler as _deregisterSigtermHandler, detectWorkingTreeActivity, } from "./auto-supervisor.js"; -import { isDbAvailable } from "./gsd-db.js"; +import { isDbAvailable, getMilestone } from "./gsd-db.js"; import { countPendingCaptures } from "./captures.js"; import { clearCmuxSidebar, logCmuxEvent, syncCmuxSidebar } from "../cmux/index.js"; @@ -761,24 +761,36 @@ export async function stopAuto( : { notify: () => {} }; const resolver = buildResolver(); - // Check if the milestone is complete — SUMMARY file is the authoritative signal. + // Check if the milestone is complete. DB status is the authoritative + // signal — only a successful gsd_complete_milestone call flips it to + // "complete" (tools/complete-milestone.ts). SUMMARY file presence is + // NOT sufficient: a blocker placeholder stub or a partial write can + // leave a file behind without the milestone actually being done, + // which previously caused stopAuto to merge a failed milestone and + // emit a misleading metadata-only merge warning (#4175). + // DB-unavailable projects fall back to SUMMARY-file presence. let milestoneComplete = false; try { - const summaryPath = resolveMilestoneFile( - s.originalBasePath || s.basePath, - s.currentMilestoneId, - "SUMMARY", - ); - if (!summaryPath) { - // Also check in the worktree path (SUMMARY may not be synced yet) - const wtSummaryPath = resolveMilestoneFile( - s.basePath, + if (isDbAvailable()) { + const dbRow = getMilestone(s.currentMilestoneId); + milestoneComplete = dbRow?.status === "complete"; + } else { + const summaryPath = resolveMilestoneFile( + s.originalBasePath || s.basePath, s.currentMilestoneId, "SUMMARY", ); - milestoneComplete = wtSummaryPath !== null; - } else { - milestoneComplete = true; + if (!summaryPath) { + // Also check in the worktree path (SUMMARY may not be synced yet) + const wtSummaryPath = resolveMilestoneFile( + s.basePath, + s.currentMilestoneId, + "SUMMARY", + ); + milestoneComplete = wtSummaryPath !== null; + } else { + milestoneComplete = true; + } } } catch (err) { // Non-fatal — fall through to preserveBranch path diff --git a/src/resources/extensions/gsd/state.ts b/src/resources/extensions/gsd/state.ts index ec6e36210..8e5340d18 100644 --- a/src/resources/extensions/gsd/state.ts +++ b/src/resources/extensions/gsd/state.ts @@ -386,6 +386,10 @@ function buildCompletenessSet(basePath: string, milestones: MilestoneRow[]) { const completeMilestoneIds = new Set(); const parkedMilestoneIds = new Set(); + // DB-authoritative: a milestone is only "complete" when its DB row says so. + // SUMMARY-file presence is NOT a completion signal here — an orphan SUMMARY + // (crashed complete-milestone turn, partial merge, manual edit) must not + // flip derived state to complete and cascade into a false auto-merge (#4179). for (const m of milestones) { const parkedFile = resolveMilestoneFile(basePath, m.id, "PARKED"); if (parkedFile || m.status === 'parked') { @@ -396,11 +400,6 @@ function buildCompletenessSet(basePath: string, milestones: MilestoneRow[]) { completeMilestoneIds.add(m.id); continue; } - const summaryFile = resolveMilestoneFile(basePath, m.id, "SUMMARY"); - if (summaryFile) { - completeMilestoneIds.add(m.id); - continue; - } } return { completeMilestoneIds, parkedMilestoneIds }; } @@ -429,18 +428,22 @@ async function buildRegistryAndFindActive( if (isGhostMilestone(basePath, m.id)) continue; } - const summaryFile = resolveMilestoneFile(basePath, m.id, "SUMMARY"); - - if (completeMilestoneIds.has(m.id) || (summaryFile !== null)) { + // DB-authoritative completeness (#4179): only trust completeMilestoneIds, + // which is itself derived from DB status. SUMMARY-file presence alone must + // not imply completion. The summary file may still be consulted below as a + // title source for legitimately-complete milestones whose DB row has no title. + if (completeMilestoneIds.has(m.id)) { let title = stripMilestonePrefix(m.title) || m.id; - if (summaryFile && !m.title) { - const summaryContent = await loadFile(summaryFile); - if (summaryContent) { - title = parseSummary(summaryContent).title || m.id; + if (!m.title) { + const summaryFile = resolveMilestoneFile(basePath, m.id, "SUMMARY"); + if (summaryFile) { + const summaryContent = await loadFile(summaryFile); + if (summaryContent) { + title = parseSummary(summaryContent).title || m.id; + } } } registry.push({ id: m.id, title, status: 'complete' }); - completeMilestoneIds.add(m.id); continue; } @@ -481,7 +484,14 @@ async function buildRegistryAndFindActive( const validationContent = validationFile ? await loadFile(validationFile) : null; const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false; - if (!validationTerminal || (validationTerminal && !summaryFile)) { + // DB-authoritative (#4179): completeness is already decided by + // completeMilestoneIds above. If we reached this branch, the DB says + // the milestone is NOT complete — so any SUMMARY file on disk is an + // orphan (crashed complete-milestone, partial merge, manual edit) and + // must not short-circuit this path. When validation is terminal, fall + // through to the default active-push below so `complete-milestone` can + // re-run idempotently. + if (!validationTerminal) { activeMilestone = { id: m.id, title }; activeMilestoneSlices = slices; activeMilestoneFound = true; diff --git a/src/resources/extensions/gsd/tests/complete-milestone-false-merge.test.ts b/src/resources/extensions/gsd/tests/complete-milestone-false-merge.test.ts new file mode 100644 index 000000000..3c7e40996 --- /dev/null +++ b/src/resources/extensions/gsd/tests/complete-milestone-false-merge.test.ts @@ -0,0 +1,142 @@ +/** + * complete-milestone-false-merge.test.ts — Regression test for #4175. + * + * Before the fix, a failed complete-milestone unit could leave a stub + * SUMMARY blocker placeholder on disk. stopAuto's SUMMARY-presence check + * then treated the milestone as complete and merged the worktree branch + * into main — emitting a misleading metadata-only merge warning for a + * milestone that was never legitimately finished. + * + * The fix has three cooperating parts: + * 1. stopAuto uses DB status (authoritative) instead of SUMMARY presence + * when the project DB is available. + * 2. postUnitPreVerification pauses auto-mode for complete-milestone + * after retries are exhausted instead of writing a blocker placeholder. + * 3. recoverTimedOutUnit pauses for complete-milestone instead of + * writing a blocker placeholder. + * + * This test guards all three via source inspection so a future refactor + * cannot silently reintroduce the false-merge path. + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const gsdDir = join(import.meta.dirname, ".."); +const autoSrc = readFileSync(join(gsdDir, "auto.ts"), "utf-8"); +const postUnitSrc = readFileSync(join(gsdDir, "auto-post-unit.ts"), "utf-8"); +const timeoutSrc = readFileSync(join(gsdDir, "auto-timeout-recovery.ts"), "utf-8"); + +test("#4175: stopAuto uses DB status as the authoritative milestone-complete signal", () => { + const step4Idx = autoSrc.indexOf("Step 4: Auto-worktree exit"); + assert.ok(step4Idx !== -1, "Step 4 comment exists in stopAuto"); + const step5Idx = autoSrc.indexOf("Step 5:", step4Idx); + const step4Block = autoSrc.slice(step4Idx, step5Idx); + + assert.ok( + step4Block.includes("isDbAvailable()"), + "Step 4 should branch on isDbAvailable() so DB is consulted when present", + ); + assert.ok( + step4Block.includes("getMilestone(s.currentMilestoneId)"), + "Step 4 should read authoritative milestone status via getMilestone()", + ); + assert.ok( + /status\s*===\s*"complete"/.test(step4Block), + 'Step 4 should compare the DB row status to "complete"', + ); +}); + +test("#4175: stopAuto imports getMilestone from gsd-db", () => { + assert.ok( + /import\s*\{[^}]*\bgetMilestone\b[^}]*\}\s*from\s*"\.\/gsd-db\.js"/.test(autoSrc), + "auto.ts should import getMilestone from ./gsd-db.js", + ); +}); + +test("#4175: stopAuto still falls back to SUMMARY presence when DB is unavailable", () => { + const step4Idx = autoSrc.indexOf("Step 4: Auto-worktree exit"); + const step5Idx = autoSrc.indexOf("Step 5:", step4Idx); + const step4Block = autoSrc.slice(step4Idx, step5Idx); + + assert.ok( + step4Block.includes("resolveMilestoneFile"), + "Step 4 should keep SUMMARY-file resolution for DB-unavailable projects", + ); + assert.ok( + step4Block.includes("preserveBranch"), + "Step 4 should still preserve branch for incomplete milestones (fallback path)", + ); +}); + +test("#4175: postUnitPreVerification pauses complete-milestone after retries exhausted", () => { + // The pause branch must live inside the retries-exhausted block, above the + // writeBlockerPlaceholder call — otherwise the stub SUMMARY is still written. + const retriesExhaustedIdx = postUnitSrc.indexOf( + "if (attempt > MAX_VERIFICATION_RETRIES)", + ); + assert.ok( + retriesExhaustedIdx !== -1, + "retries-exhausted guard exists in postUnitPreVerification", + ); + + const blockerCallIdx = postUnitSrc.indexOf("writeBlockerPlaceholder", retriesExhaustedIdx); + assert.ok( + blockerCallIdx !== -1, + "blocker placeholder call still exists for non-milestone units", + ); + + const exhaustedBlock = postUnitSrc.slice(retriesExhaustedIdx, blockerCallIdx); + + assert.ok( + /s\.currentUnit\.type\s*===\s*"complete-milestone"/.test(exhaustedBlock), + "retries-exhausted block should specifically handle complete-milestone", + ); + assert.ok( + /pauseAuto\s*\(\s*ctx\s*,\s*pi\s*\)/.test(exhaustedBlock), + "complete-milestone path should call pauseAuto instead of falling through", + ); + // The pause branch must return so execution never reaches writeBlockerPlaceholder. + assert.ok( + /return\s+"dispatched"\s*;/.test(exhaustedBlock), + "complete-milestone pause branch should return before the placeholder call", + ); +}); + +test("#4175: recoverTimedOutUnit pauses complete-milestone instead of writing a blocker placeholder", () => { + // The complete-milestone pause branch must sit immediately above the + // "retries exhausted" writeBlockerPlaceholder call so a failed + // complete-milestone never produces a stub SUMMARY. Anchor on the + // comment that precedes that specific placeholder call rather than the + // function's earlier writeBlockerPlaceholder use sites or its import. + // Use lastIndexOf so we find the final retries-exhausted block in + // recoverTimedOutUnit, not an earlier helper with the same comment. + const exhaustedAnchor = "Retries exhausted — write a blocker placeholder"; + const exhaustedIdx = timeoutSrc.lastIndexOf(exhaustedAnchor); + assert.ok( + exhaustedIdx !== -1, + "retries-exhausted blocker-placeholder path still exists for non-milestone units", + ); + + const guardIdx = timeoutSrc.lastIndexOf( + 'unitType === "complete-milestone"', + exhaustedIdx, + ); + assert.ok( + guardIdx !== -1, + "complete-milestone guard should appear above the retries-exhausted placeholder call", + ); + + const guardBlock = timeoutSrc.slice(guardIdx, exhaustedIdx); + assert.ok( + /return\s+"paused"\s*;/.test(guardBlock), + "complete-milestone guard should return 'paused' before the placeholder call", + ); + // The guard itself must not call writeBlockerPlaceholder. + assert.ok( + !guardBlock.includes("writeBlockerPlaceholder"), + "complete-milestone guard must not write a blocker placeholder", + ); +}); diff --git a/src/resources/extensions/gsd/tests/derive-state-helpers.test.ts b/src/resources/extensions/gsd/tests/derive-state-helpers.test.ts index 035e5efb2..c4f7f5ddb 100644 --- a/src/resources/extensions/gsd/tests/derive-state-helpers.test.ts +++ b/src/resources/extensions/gsd/tests/derive-state-helpers.test.ts @@ -307,27 +307,87 @@ describe('derive-state-helpers', () => { } }); - // ─── buildCompletenessSet: SUMMARY-on-disk marks complete ─────────── - test('buildCompletenessSet: milestone with SUMMARY on disk treated as complete', async () => { + // ─── buildCompletenessSet: DB status is authoritative ────────────── + test('buildCompletenessSet: DB status=complete marks milestone complete', async () => { const base = createFixtureBase(); try { - // M001 has summary on disk but DB status is still 'active' writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nDone.'); - // M002 is the real active milestone writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002\n\nActive.'); openDatabase(':memory:'); - insertMilestone({ id: 'M001', title: 'First', status: 'active' }); + insertMilestone({ id: 'M001', title: 'First', status: 'complete' }); insertMilestone({ id: 'M002', title: 'Second', status: 'active' }); invalidateStateCache(); const state = await deriveStateFromDb(base); - // M001 should be complete (summary on disk), M002 should be active const m1 = state.registry.find(e => e.id === 'M001'); - assert.equal(m1?.status, 'complete', 'summary-disk: M001 marked complete via disk SUMMARY'); - assert.equal(state.activeMilestone?.id, 'M002', 'summary-disk: M002 is active'); + assert.equal(m1?.status, 'complete', 'DB status=complete → registry entry complete'); + assert.equal(state.activeMilestone?.id, 'M002', 'M002 is the active milestone'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── Regression #4179: orphan SUMMARY must NOT flip DB-active milestone ─── + // A crashed complete-milestone turn (or stale/manual SUMMARY.md) can leave + // a milestone SUMMARY on disk while the DB row still reads 'active'. The + // read-side of state derivation must NOT treat the orphan SUMMARY as a + // completion signal, or the auto-loop advances and merges work that was + // never actually finished (same failure class as #4175, read-side twin). + test('buildCompletenessSet (#4179): orphan SUMMARY on disk does not mark DB-active milestone complete', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Orphan Summary\n\nLeft over from crashed turn.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'First', status: 'active' }); + // Slice still in-flight — auto should resume, not merge. + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'active', risk: 'low', depends: [] }); + insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'pending', risk: 'low', depends: ['S01'] }); + insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'In-flight', status: 'pending' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + const m1 = state.registry.find(e => e.id === 'M001'); + assert.notEqual(m1?.status, 'complete', 'orphan SUMMARY must not mark milestone complete'); + assert.equal(m1?.status, 'active', 'M001 remains active — DB is authoritative'); + assert.equal(state.activeMilestone?.id, 'M001', 'M001 is still the active milestone'); + assert.notEqual(state.phase, 'completing-milestone', 'must not short-circuit into completion'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // Regression #4179 (companion): DB-active milestone with all slices done + + // validation terminal + orphan SUMMARY must still flow through completing-milestone + // (re-runs complete-milestone), not be reported as already-complete. + test('buildRegistryAndFindActive (#4179): orphan SUMMARY with validation-terminal falls through to completing-milestone', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT); + writeFile(base, 'milestones/M001/slices/S02/S02-PLAN.md', PLAN_CONTENT); + writeFile(base, 'milestones/M001/M001-VALIDATION.md', '---\nverdict: passed\n---\n# Validation\nAll good.'); + writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Orphan Summary\n\nLeft over.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'First', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'complete', risk: 'low', depends: [] }); + insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'complete', risk: 'low', depends: ['S01'] }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + const m1 = state.registry.find(e => e.id === 'M001'); + assert.equal(m1?.status, 'active', 'M001 stays active despite orphan SUMMARY + validation-terminal'); + assert.equal(state.activeMilestone?.id, 'M001', 'M001 is still the active milestone'); + assert.equal(state.phase, 'completing-milestone', 'phase flows through completing-milestone (re-run)'); } finally { closeDatabase(); cleanup(base);