fix(gsd): block milestone completion when verification fails (#2500)

Closes #2499
This commit is contained in:
Jeremy McSpadden 2026-03-25 10:25:54 -05:00 committed by GitHub
parent 9574c5796d
commit ddbf3105a3
4 changed files with 124 additions and 7 deletions

View file

@ -853,6 +853,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
promptGuidelines: [
"Use gsd_complete_milestone when all slices in a milestone are finished and the milestone needs to be recorded.",
"All slices in the milestone must have status 'complete' — the handler validates this before proceeding.",
"verificationPassed must be explicitly set to true — the handler rejects completion if verification did not pass.",
"On success, returns summaryPath where the MILESTONE-SUMMARY.md was written.",
],
parameters: Type.Object({
@ -868,6 +869,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
lessonsLearned: Type.Array(Type.String(), { description: "Lessons learned during the milestone" }),
followUps: Type.Optional(Type.String({ description: "Follow-up items for future milestones" })),
deviations: Type.Optional(Type.String({ description: "Deviations from the original plan" })),
verificationPassed: Type.Boolean({ description: "Must be true — confirms that code change verification, success criteria, and definition of done checks all passed before completion" }),
}),
execute: milestoneCompleteExecute,
};

View file

@ -17,18 +17,31 @@ All relevant context has been preloaded below — the roadmap, all slice summari
Then:
1. Use the **Milestone Summary** output template from the inlined context above
2. {{skillActivation}}
3. **Verify code changes exist.** Run `git diff --stat HEAD $(git merge-base HEAD main) -- ':!.gsd/'` (or the equivalent for the integration branch). If no non-`.gsd/` files appear in the diff, the milestone produced only planning artifacts and no actual code. In that case, do NOT mark the milestone as passing verification — document the gap clearly in the summary and state that implementation is missing.
4. Verify each **success criterion** from the milestone definition in `{{roadmapPath}}`. For each criterion, confirm it was met with specific evidence from slice summaries, test results, or observable behavior. List any criterion that was NOT met.
5. Verify the milestone's **definition of done** — all slices are `[x]`, all slice summaries exist, and any cross-slice integration points work correctly.
3. **Verify code changes exist.** Run `git diff --stat HEAD $(git merge-base HEAD main) -- ':!.gsd/'` (or the equivalent for the integration branch). If no non-`.gsd/` files appear in the diff, the milestone produced only planning artifacts and no actual code. Record this as a **verification failure**.
4. Verify each **success criterion** from the milestone definition in `{{roadmapPath}}`. For each criterion, confirm it was met with specific evidence from slice summaries, test results, or observable behavior. Record any criterion that was NOT met as a **verification failure**.
5. Verify the milestone's **definition of done** — all slices are `[x]`, all slice summaries exist, and any cross-slice integration points work correctly. Record any unmet items as a **verification failure**.
6. Validate **requirement status transitions**. For each requirement that changed status during this milestone, confirm the transition is supported by evidence. Requirements can move between Active, Validated, Deferred, Blocked, or Out of Scope — but only with proof.
7. **Persist completion through `gsd_complete_milestone`.** Call it with: `milestoneId`, `title`, `oneLiner`, `narrative`, `successCriteriaResults`, `definitionOfDoneResults`, `requirementOutcomes`, `keyDecisions`, `keyFiles`, `lessonsLearned`, `followUps`, `deviations`. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
### Verification Gate — STOP if verification failed
**If ANY verification failure was recorded in steps 3, 4, or 5, you MUST follow the failure path below. Do NOT proceed to step 7.**
**Failure path** (verification failed):
- Do NOT call `gsd_complete_milestone` — the milestone must not be marked as complete.
- Do NOT update `.gsd/PROJECT.md` to reflect completion.
- Do NOT update `.gsd/REQUIREMENTS.md` to mark requirements as validated.
- Write a clear summary of what failed and why to help the next attempt.
- Say: "Milestone {{milestoneId}} verification FAILED — not complete." and stop.
**Success path** (all verifications passed — continue with steps 711):
7. **Persist completion through `gsd_complete_milestone`.** Call it with: `milestoneId`, `title`, `oneLiner`, `narrative`, `successCriteriaResults`, `definitionOfDoneResults`, `requirementOutcomes`, `keyDecisions`, `keyFiles`, `lessonsLearned`, `followUps`, `deviations`, `verificationPassed: true`. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
8. Update `.gsd/REQUIREMENTS.md` if any requirement status transitions were validated in step 6.
9. Update `.gsd/PROJECT.md` to reflect milestone completion and current project state.
10. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.gsd/KNOWLEDGE.md`.
11. Do not commit manually — the system auto-commits your changes after this unit completes.
- Say: "Milestone {{milestoneId}} complete."
**Important:** Do NOT skip the code change verification, success criteria, or definition of done verification (steps 3-5). The milestone summary must reflect actual verified outcomes, not assumed success. If any criterion was not met or no code changes exist, document it clearly in the summary and do not mark the milestone as passing verification.
**Important:** Do NOT skip the code change verification, success criteria, or definition of done verification (steps 3-5). The milestone summary must reflect actual verified outcomes, not assumed success. Verification failures BLOCK completion — there is no override. The milestone stays in its current state until issues are resolved and verification is re-run.
**File system safety:** When scanning milestone directories for evidence, use `ls` or `find` to list directory contents first — never pass a directory path (e.g. `tasks/`, `slices/`) directly to the `read` tool. The `read` tool only accepts file paths, not directories.
When done, say: "Milestone {{milestoneId}} complete."

View file

@ -115,6 +115,102 @@ describe("complete-milestone", () => {
assert.ok(prompt.includes("Milestone M002 complete"), "prompt contains completion sentinel for M002");
});
test("prompt contains verification gate that blocks completion on failure", () => {
const prompt = loadPromptFromWorktree("complete-milestone", {
workingDirectory: "/tmp/test-project",
milestoneId: "M001",
milestoneTitle: "Gate Test",
roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md",
inlinedContext: "context",
});
// Verification gate section must exist
assert.ok(
prompt.includes("Verification Gate"),
"prompt contains 'Verification Gate' section",
);
// Failure path must block gsd_complete_milestone
assert.ok(
prompt.includes("Do NOT call `gsd_complete_milestone`"),
"failure path explicitly blocks calling the completion tool",
);
// Failure path must have its own sentinel distinct from success
assert.ok(
prompt.includes("verification FAILED"),
"failure path outputs a FAILED sentinel",
);
// verificationPassed parameter must be referenced
assert.ok(
prompt.includes("verificationPassed"),
"prompt references verificationPassed parameter",
);
});
test("handleCompleteMilestone rejects when verificationPassed is false", async () => {
const { handleCompleteMilestone } = await import("../tools/complete-milestone.ts");
const base = createFixtureBase();
try {
const result = await handleCompleteMilestone({
milestoneId: "M001",
title: "Test Milestone",
oneLiner: "Test",
narrative: "Test narrative",
successCriteriaResults: "None met",
definitionOfDoneResults: "Incomplete",
requirementOutcomes: "None validated",
keyDecisions: [],
keyFiles: [],
lessonsLearned: [],
followUps: "",
deviations: "",
verificationPassed: false,
}, base);
assert.ok("error" in result, "returns error when verificationPassed is false");
assert.ok(
(result as { error: string }).error.includes("verification did not pass"),
"error message mentions verification did not pass",
);
} finally {
cleanup(base);
}
});
test("handleCompleteMilestone rejects when verificationPassed is omitted", async () => {
const { handleCompleteMilestone } = await import("../tools/complete-milestone.ts");
const base = createFixtureBase();
try {
// Simulate omitted verificationPassed (undefined coerced via any)
const params: any = {
milestoneId: "M001",
title: "Test Milestone",
oneLiner: "Test",
narrative: "Test narrative",
successCriteriaResults: "Results",
definitionOfDoneResults: "Done results",
requirementOutcomes: "Outcomes",
keyDecisions: [],
keyFiles: [],
lessonsLearned: [],
followUps: "",
deviations: "",
// verificationPassed intentionally omitted
};
const result = await handleCompleteMilestone(params, base);
assert.ok("error" in result, "returns error when verificationPassed is omitted");
assert.ok(
(result as { error: string }).error.includes("verification did not pass"),
"error message mentions verification did not pass",
);
} finally {
cleanup(base);
}
});
test("diagnoseExpectedArtifact logic for complete-milestone", async () => {
// Import the path helpers used by diagnoseExpectedArtifact
const { relMilestoneFile } = await import("../paths.ts");

View file

@ -31,6 +31,7 @@ export interface CompleteMilestoneParams {
lessonsLearned: string[];
followUps: string;
deviations: string;
verificationPassed: boolean;
}
export interface CompleteMilestoneResult {
@ -108,6 +109,11 @@ export async function handleCompleteMilestone(
return { error: "title is required and must be a non-empty string" };
}
// ── Verify that verification passed ─────────────────────────────────────
if (params.verificationPassed !== true) {
return { error: "verification did not pass — milestone completion blocked. verificationPassed must be explicitly set to true after all verification steps succeed" };
}
// ── Verify all slices are complete ───────────────────────────────────────
const slices = getMilestoneSlices(params.milestoneId);
if (slices.length === 0) {