From 722dfc96cb33005eef1738fffaff3a0c315c0e44 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Wed, 8 Apr 2026 20:07:46 -0500 Subject: [PATCH] fix(gsd): align prompt contracts and validation flow --- src/resources/extensions/gsd/auto-prompts.ts | 11 ++++---- .../gsd/prompts/guided-discuss-milestone.md | 2 +- .../gsd/prompts/validate-milestone.md | 8 +++--- .../gsd/tests/prompt-contracts.test.ts | 9 ++++++- .../gsd/tests/validate-milestone.test.ts | 26 +++++++++++++++++++ 5 files changed, 44 insertions(+), 12 deletions(-) diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts index 43b61bd2e..5e8bff3c4 100644 --- a/src/resources/extensions/gsd/auto-prompts.ts +++ b/src/resources/extensions/gsd/auto-prompts.ts @@ -1592,7 +1592,7 @@ export async function buildValidateMilestonePrompt( logWarning("prompt", `buildValidateMilestonePrompt verification classes lookup failed: ${err instanceof Error ? err.message : String(err)}`); } - // Inline all slice summaries and UAT results + // Inline all slice summaries and assessment results let valSliceIds: string[] = []; try { const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js"); @@ -1617,10 +1617,10 @@ export async function buildValidateMilestonePrompt( const summaryRel = relSliceFile(base, mid, sid, "SUMMARY"); inlined.push(await inlineFile(summaryPath, summaryRel, `${sid} Summary`)); - const uatPath = resolveSliceFile(base, mid, sid, "UAT"); - const uatRel = relSliceFile(base, mid, sid, "UAT"); - const uatInline = await inlineFileOptional(uatPath, uatRel, `${sid} UAT Result`); - if (uatInline) inlined.push(uatInline); + const assessmentPath = resolveSliceFile(base, mid, sid, "ASSESSMENT"); + const assessmentRel = relSliceFile(base, mid, sid, "ASSESSMENT"); + const assessmentInline = await inlineFileOptional(assessmentPath, assessmentRel, `${sid} Assessment`); + if (assessmentInline) inlined.push(assessmentInline); } // Aggregate unresolved follow-ups and known limitations across slices @@ -2150,4 +2150,3 @@ export async function buildRewriteDocsPrompt( overridesPath: relGsdRootFile("OVERRIDES"), }); } - diff --git a/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md b/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md index 41547e0c0..efa3cda62 100644 --- a/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md +++ b/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md @@ -92,7 +92,7 @@ Before moving to the wrap-up gate, verify you have covered: - header: "Depth Check" - question: "Did I capture the depth right?" - options: "Yes, you got it (Recommended)", "Not quite — let me clarify" -- **The question ID must contain `depth_verification`** (e.g. `depth_verification_confirm`) — this enables the write-gate downstream. +- **The question ID must contain `depth_verification` and the milestone id** (e.g. `depth_verification_{{milestoneId}}_confirm`) — this enables the write-gate downstream and keeps verification scoped to the milestone being discussed. **If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text: "Did I capture that correctly? If not, tell me what I missed." Wait for explicit confirmation before proceeding. **The same non-bypassable gate applies to the plain-text path** — if the user does not respond, gives an ambiguous answer, or does not explicitly confirm, you MUST re-ask. Never rationalize past a missing confirmation. diff --git a/src/resources/extensions/gsd/prompts/validate-milestone.md b/src/resources/extensions/gsd/prompts/validate-milestone.md index dec0f2af2..277cf7173 100644 --- a/src/resources/extensions/gsd/prompts/validate-milestone.md +++ b/src/resources/extensions/gsd/prompts/validate-milestone.md @@ -14,7 +14,7 @@ This is remediation round {{remediationRound}}. If this is round 0, this is the ## Context -All relevant context has been preloaded below — the roadmap, all slice summaries, UAT results, requirements, decisions, and project context are inlined. Start working immediately without re-reading these files. +All relevant context has been preloaded below — the roadmap, all slice summaries, assessment results, requirements, decisions, and project context are inlined. Start working immediately without re-reading these files. {{inlinedContext}} @@ -30,8 +30,8 @@ Prompt: "Review milestone {{milestoneId}} requirements coverage. Working directo **Reviewer B — Cross-Slice Integration** Prompt: "Review milestone {{milestoneId}} cross-slice integration. Working directory: {{workingDirectory}}. Read `{{roadmapPath}}` and find the boundary map (produces/consumes contracts). For each boundary, check that the producing slice's SUMMARY confirms it produced the artifact, and the consuming slice's SUMMARY confirms it consumed it. Output a markdown table: Boundary | Producer Summary | Consumer Summary | Status. End with a one-line verdict: PASS if all boundaries honored, NEEDS-ATTENTION if any gaps." -**Reviewer C — UAT & Acceptance Criteria** -Prompt: "Review milestone {{milestoneId}} UAT and acceptance criteria. Working directory: {{workingDirectory}}. Read `.gsd/{{milestoneId}}/CONTEXT.md` for acceptance criteria. Check for UAT-RESULT files in each slice directory. Verify each acceptance criterion maps to either a passing UAT result or clear SUMMARY evidence. Output a checklist: [ ] Criterion | Evidence. End with a one-line verdict: PASS if all criteria met, NEEDS-ATTENTION if gaps exist." +**Reviewer C — Assessment & Acceptance Criteria** +Prompt: "Review milestone {{milestoneId}} assessment evidence and acceptance criteria. Working directory: {{workingDirectory}}. Read `.gsd/{{milestoneId}}/CONTEXT.md` for acceptance criteria. Check for ASSESSMENT files in each slice directory. Verify each acceptance criterion maps to either a passing assessment result or clear SUMMARY evidence. Output a checklist: [ ] Criterion | Evidence. End with a one-line verdict: PASS if all criteria met, NEEDS-ATTENTION if gaps exist." ### Step 2 — Synthesize Findings @@ -59,7 +59,7 @@ reviewers: 3 ## Reviewer B — Cross-Slice Integration -## Reviewer C — UAT & Acceptance Criteria +## Reviewer C — Assessment & Acceptance Criteria ## Synthesis diff --git a/src/resources/extensions/gsd/tests/prompt-contracts.test.ts b/src/resources/extensions/gsd/tests/prompt-contracts.test.ts index b62772bea..7c1092641 100644 --- a/src/resources/extensions/gsd/tests/prompt-contracts.test.ts +++ b/src/resources/extensions/gsd/tests/prompt-contracts.test.ts @@ -51,6 +51,12 @@ test("guided discussion prompts avoid wrap-up prompts after every round", () => assert.doesNotMatch(slicePrompt, /I think I have a solid picture of this slice\. Ready to wrap up/i); }); +test("guided milestone discussion scopes depth verification to the milestone id", () => { + const prompt = readPrompt("guided-discuss-milestone"); + assert.match(prompt, /depth_verification_\{\{milestoneId\}\}/, "depth verification id should include the milestone id"); + assert.doesNotMatch(prompt, /depth_verification_confirm" — this enables the write-gate downstream/i, "legacy global depth gate wording should be gone"); +}); + test("guided-resume-task prompt preserves recovery state until work is superseded", () => { const prompt = readPrompt("guided-resume-task"); assert.match(prompt, /Do \*\*not\*\* delete the continue file immediately/i); @@ -188,7 +194,8 @@ test("validate-milestone prompt dispatches parallel reviewers", () => { assert.match(prompt, /Reviewer C/); assert.match(prompt, /Requirements Coverage/); assert.match(prompt, /Cross-Slice Integration/); - assert.match(prompt, /UAT/); + assert.match(prompt, /Assessment & Acceptance Criteria/); + assert.match(prompt, /assessment evidence/i); }); // ─── Prompt migration: replan-slice → gsd_replan_slice ──────────────── diff --git a/src/resources/extensions/gsd/tests/validate-milestone.test.ts b/src/resources/extensions/gsd/tests/validate-milestone.test.ts index 633862b83..569abd796 100644 --- a/src/resources/extensions/gsd/tests/validate-milestone.test.ts +++ b/src/resources/extensions/gsd/tests/validate-milestone.test.ts @@ -9,6 +9,7 @@ import { deriveState, isValidationTerminal } from "../state.ts"; import { resolveExpectedArtifactPath, diagnoseExpectedArtifact } from "../auto-artifact-paths.ts"; import { verifyExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.ts"; import { resolveDispatch, type DispatchContext } from "../auto-dispatch.ts"; +import { buildValidateMilestonePrompt } from "../auto-prompts.ts"; import type { GSDState } from "../types.ts"; import { clearPathCache } from "../paths.ts"; import { clearParseCache } from "../files.ts"; @@ -57,6 +58,12 @@ function writeSliceSummary(base: string, mid: string, sid: string, content: stri writeFileSync(join(dir, `${sid}-SUMMARY.md`), content); } +function writeSliceAssessment(base: string, mid: string, sid: string, content: string): void { + const dir = join(base, ".gsd", "milestones", mid, "slices", sid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${sid}-ASSESSMENT.md`), content); +} + const ALL_DONE_ROADMAP = `# M001: Test Milestone ## Vision @@ -192,6 +199,25 @@ test("deriveState returns complete when both VALIDATION and SUMMARY exist", asyn } }); +test("buildValidateMilestonePrompt inlines ASSESSMENT evidence instead of UAT spec", async () => { + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", ALL_DONE_ROADMAP); + const dir = join(base, ".gsd", "milestones", "M001"); + writeFileSync(join(dir, "M001-CONTEXT.md"), CONTEXT_FILE); + writeSliceSummary(base, "M001", "S01", "# S01 Summary\nDelivered."); + writeFileSync(join(dir, "slices", "S01", "S01-UAT.md"), "# UAT Spec\nDo the thing.\n"); + writeSliceAssessment(base, "M001", "S01", "---\nverdict: PASS\n---\n# Assessment\nEvidence captured."); + + const prompt = await buildValidateMilestonePrompt("M001", "Test Milestone", base); + assert.match(prompt, /S01 Assessment/i, "prompt should inline assessment evidence"); + assert.match(prompt, /verdict: PASS/i, "prompt should include the assessment verdict"); + assert.doesNotMatch(prompt, /UAT Spec/i, "prompt should not inline the raw UAT spec as evidence"); + } finally { + cleanup(base); + } +}); + // ─── Dispatch rule ──────────────────────────────────────────────────────── test("dispatch rule matches validating-milestone phase", async () => {