From 09d62e01d148e777707453a06787baab8e692823 Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Mon, 16 Mar 2026 16:59:48 -0600 Subject: [PATCH] feat(gsd): implement validate-milestone phase and dispatch Add a `validating-milestone` phase that runs BEFORE `completing-milestone` to reconcile planned work against delivered work. The validator checks success criteria, slice deliverables, cross-slice integration, and requirement coverage before allowing milestone completion. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/resources/extensions/gsd/auto-dispatch.ts | 39 ++- src/resources/extensions/gsd/auto-prompts.ts | 73 ++++ src/resources/extensions/gsd/auto-recovery.ts | 15 + src/resources/extensions/gsd/complexity.ts | 1 + src/resources/extensions/gsd/doctor.ts | 14 + src/resources/extensions/gsd/preferences.ts | 4 +- .../gsd/prompts/validate-milestone.md | 101 +++--- src/resources/extensions/gsd/state.ts | 53 ++- .../gsd/tests/auto-preflight.test.ts | 1 + .../gsd/tests/complete-milestone.test.ts | 9 +- .../gsd/tests/derive-state-db.test.ts | 1 + .../gsd/tests/derive-state-deps.test.ts | 9 + .../gsd/tests/derive-state-draft.test.ts | 8 + .../extensions/gsd/tests/derive-state.test.ts | 14 + .../integration-mixed-milestones.test.ts | 8 + .../tests/migrate-writer-integration.test.ts | 10 +- .../gsd/tests/queue-reorder-e2e.test.ts | 1 + .../gsd/tests/validate-milestone.test.ts | 316 ++++++++++++++++++ src/resources/extensions/gsd/types.ts | 3 +- 19 files changed, 605 insertions(+), 75 deletions(-) create mode 100644 src/resources/extensions/gsd/tests/validate-milestone.test.ts diff --git a/src/resources/extensions/gsd/auto-dispatch.ts b/src/resources/extensions/gsd/auto-dispatch.ts index a280a37c8..4f7258b09 100644 --- a/src/resources/extensions/gsd/auto-dispatch.ts +++ b/src/resources/extensions/gsd/auto-dispatch.ts @@ -14,9 +14,11 @@ import type { GSDPreferences } from "./preferences.js"; import type { UatType } from "./files.js"; import { loadFile, extractUatType, loadActiveOverrides } from "./files.js"; import { - resolveMilestoneFile, resolveSliceFile, - relSliceFile, + resolveMilestoneFile, resolveMilestonePath, resolveSliceFile, + relSliceFile, buildMilestoneFileName, } from "./paths.js"; +import { existsSync, mkdirSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; import { buildResearchMilestonePrompt, buildPlanMilestonePrompt, @@ -25,6 +27,7 @@ import { buildExecuteTaskPrompt, buildCompleteSlicePrompt, buildCompleteMilestonePrompt, + buildValidateMilestonePrompt, buildReplanSlicePrompt, buildRunUatPrompt, buildReassessRoadmapPrompt, @@ -254,6 +257,38 @@ const DISPATCH_RULES: DispatchRule[] = [ }; }, }, + { + name: "validating-milestone → validate-milestone", + match: async ({ state, mid, midTitle, basePath, prefs }) => { + if (state.phase !== "validating-milestone") return null; + // Skip preference: write a minimal pass-through VALIDATION file + if (prefs?.phases?.skip_milestone_validation) { + const mDir = resolveMilestonePath(basePath, mid); + if (mDir) { + if (!existsSync(mDir)) mkdirSync(mDir, { recursive: true }); + const validationPath = join(mDir, buildMilestoneFileName(mid, "VALIDATION")); + const content = [ + "---", + "verdict: pass", + "remediation_round: 0", + "---", + "", + "# Milestone Validation (skipped by preference)", + "", + "Milestone validation was skipped via `skip_milestone_validation` preference.", + ].join("\n"); + writeFileSync(validationPath, content, "utf-8"); + } + return { action: "skip" }; + } + return { + action: "dispatch", + unitType: "validate-milestone", + unitId: mid, + prompt: await buildValidateMilestonePrompt(mid, midTitle, basePath), + }; + }, + }, { name: "completing-milestone → complete-milestone", match: async ({ state, mid, midTitle, basePath }) => { diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts index ad389c2f7..9d7b93824 100644 --- a/src/resources/extensions/gsd/auto-prompts.ts +++ b/src/resources/extensions/gsd/auto-prompts.ts @@ -855,6 +855,79 @@ export async function buildCompleteMilestonePrompt( }); } +export async function buildValidateMilestonePrompt( + mid: string, midTitle: string, base: string, level?: InlineLevel, +): Promise { + const inlineLevel = level ?? resolveInlineLevel(); + const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP"); + const roadmapRel = relMilestoneFile(base, mid, "ROADMAP"); + + const inlined: string[] = []; + inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap")); + + // Inline all slice summaries and UAT results + const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null; + if (roadmapContent) { + const roadmap = parseRoadmap(roadmapContent); + const seenSlices = new Set(); + for (const slice of roadmap.slices) { + if (seenSlices.has(slice.id)) continue; + seenSlices.add(slice.id); + const summaryPath = resolveSliceFile(base, mid, slice.id, "SUMMARY"); + const summaryRel = relSliceFile(base, mid, slice.id, "SUMMARY"); + inlined.push(await inlineFile(summaryPath, summaryRel, `${slice.id} Summary`)); + + const uatPath = resolveSliceFile(base, mid, slice.id, "UAT-RESULT"); + const uatRel = relSliceFile(base, mid, slice.id, "UAT-RESULT"); + const uatInline = await inlineFileOptional(uatPath, uatRel, `${slice.id} UAT Result`); + if (uatInline) inlined.push(uatInline); + } + } + + // Inline existing VALIDATION file if this is a re-validation round + const validationPath = resolveMilestoneFile(base, mid, "VALIDATION"); + const validationRel = relMilestoneFile(base, mid, "VALIDATION"); + const validationContent = validationPath ? await loadFile(validationPath) : null; + let remediationRound = 0; + if (validationContent) { + const roundMatch = validationContent.match(/remediation_round:\s*(\d+)/); + remediationRound = roundMatch ? parseInt(roundMatch[1], 10) + 1 : 1; + inlined.push(`### Previous Validation (re-validation round ${remediationRound})\nSource: \`${validationRel}\`\n\n${validationContent.trim()}`); + } + + // Inline root GSD files + if (inlineLevel !== "minimal") { + const requirementsInline = await inlineRequirementsFromDb(base); + if (requirementsInline) inlined.push(requirementsInline); + const decisionsInline = await inlineDecisionsFromDb(base, mid); + if (decisionsInline) inlined.push(decisionsInline); + const projectInline = await inlineProjectFromDb(base); + if (projectInline) inlined.push(projectInline); + } + const knowledgeInline = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge"); + if (knowledgeInline) inlined.push(knowledgeInline); + // Inline milestone context file + const contextPath = resolveMilestoneFile(base, mid, "CONTEXT"); + const contextRel = relMilestoneFile(base, mid, "CONTEXT"); + const contextInline = await inlineFileOptional(contextPath, contextRel, "Milestone Context"); + if (contextInline) inlined.push(contextInline); + + const inlinedContext = `## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`; + + const validationOutputPath = join(base, `${relMilestonePath(base, mid)}/${mid}-VALIDATION.md`); + const roadmapOutputPath = `${relMilestonePath(base, mid)}/${mid}-ROADMAP.md`; + + return loadPrompt("validate-milestone", { + workingDirectory: base, + milestoneId: mid, + milestoneTitle: midTitle, + roadmapPath: roadmapOutputPath, + inlinedContext, + validationPath: validationOutputPath, + remediationRound: String(remediationRound), + }); +} + export async function buildReplanSlicePrompt( mid: string, midTitle: string, sid: string, sTitle: string, base: string, ): Promise { diff --git a/src/resources/extensions/gsd/auto-recovery.ts b/src/resources/extensions/gsd/auto-recovery.ts index 6792f83e7..c4f5400e1 100644 --- a/src/resources/extensions/gsd/auto-recovery.ts +++ b/src/resources/extensions/gsd/auto-recovery.ts @@ -83,6 +83,10 @@ export function resolveExpectedArtifactPath(unitType: string, unitId: string, ba const dir = resolveSlicePath(base, mid, sid!); return dir ? join(dir, buildSliceFileName(sid!, "SUMMARY")) : null; } + case "validate-milestone": { + const dir = resolveMilestonePath(base, mid); + return dir ? join(dir, buildMilestoneFileName(mid, "VALIDATION")) : null; + } case "complete-milestone": { const dir = resolveMilestonePath(base, mid); return dir ? join(dir, buildMilestoneFileName(mid, "SUMMARY")) : null; @@ -244,6 +248,8 @@ export function diagnoseExpectedArtifact(unitType: string, unitId: string, base: return `${relSliceFile(base, mid!, sid!, "ASSESSMENT")} (roadmap reassessment)`; case "run-uat": return `${relSliceFile(base, mid!, sid!, "UAT-RESULT")} (UAT result)`; + case "validate-milestone": + return `${relMilestoneFile(base, mid!, "VALIDATION")} (milestone validation report)`; case "complete-milestone": return `${relMilestoneFile(base, mid!, "SUMMARY")} (milestone summary)`; default: @@ -537,6 +543,15 @@ export function buildLoopRemediationSteps(unitType: string, unitId: string, base ` 4. Resume auto-mode`, ].join("\n"); } + case "validate-milestone": { + if (!mid) break; + const artifactRel = relMilestoneFile(base, mid, "VALIDATION"); + return [ + ` 1. Write ${artifactRel} with verdict: pass`, + ` 2. Run \`gsd doctor\``, + ` 3. Resume auto-mode`, + ].join("\n"); + } default: break; } diff --git a/src/resources/extensions/gsd/complexity.ts b/src/resources/extensions/gsd/complexity.ts index 7fac93a73..c27c388be 100644 --- a/src/resources/extensions/gsd/complexity.ts +++ b/src/resources/extensions/gsd/complexity.ts @@ -87,6 +87,7 @@ const UNIT_TYPE_TIERS: Record = { "execute-task": "standard", "replan-slice": "heavy", "reassess-roadmap": "heavy", + "validate-milestone": "heavy", "complete-milestone": "standard", }; diff --git a/src/resources/extensions/gsd/doctor.ts b/src/resources/extensions/gsd/doctor.ts index ca28fbce0..cf26589ad 100644 --- a/src/resources/extensions/gsd/doctor.ts +++ b/src/resources/extensions/gsd/doctor.ts @@ -24,6 +24,7 @@ export type DoctorIssueCode = | "all_tasks_done_roadmap_not_checked" | "slice_checked_missing_summary" | "slice_checked_missing_uat" + | "all_slices_done_missing_milestone_validation" | "all_slices_done_missing_milestone_summary" | "task_done_must_haves_not_verified" | "active_requirement_missing_owner" @@ -1255,6 +1256,19 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; } } + // Milestone-level check: all slices done but no validation file + if (isMilestoneComplete(roadmap) && !resolveMilestoneFile(basePath, milestoneId, "VALIDATION") && !resolveMilestoneFile(basePath, milestoneId, "SUMMARY")) { + issues.push({ + severity: "info", + code: "all_slices_done_missing_milestone_validation", + scope: "milestone", + unitId: milestoneId, + message: `All slices are done but ${milestoneId}-VALIDATION.md is missing — milestone is in validating-milestone phase`, + file: relMilestoneFile(basePath, milestoneId, "VALIDATION"), + fixable: false, + }); + } + // Milestone-level check: all slices done but no milestone summary if (isMilestoneComplete(roadmap) && !resolveMilestoneFile(basePath, milestoneId, "SUMMARY")) { issues.push({ diff --git a/src/resources/extensions/gsd/preferences.ts b/src/resources/extensions/gsd/preferences.ts index 686a5f72d..65e77d13d 100644 --- a/src/resources/extensions/gsd/preferences.ts +++ b/src/resources/extensions/gsd/preferences.ts @@ -688,6 +688,7 @@ export function resolveProfileDefaults(profile: TokenProfile): Partial 0, prior validation found issues and remediation slices were added and executed — verify those remediation slices resolved the issues. All relevant context has been preloaded below — the roadmap, all slice summaries, UAT results, requirements, decisions, and project context are inlined. Start working immediately without re-reading these files. {{inlinedContext}} -If a `GSD Skill Preferences` block is present in system context, use it to decide which skills to load and follow during validation, without relaxing required verification or artifact rules. +## Validation Steps -Then: +1. For each **success criterion** in `{{roadmapPath}}`, check whether slice summaries and UAT results provide evidence that it was met. Record pass/fail per criterion. +2. For each **slice** in the roadmap, verify its demo/deliverable claim against its summary. Flag any slice whose summary does not substantiate its claimed output. +3. Check **cross-slice integration points** — do boundary map entries (produces/consumes) align with what was actually built? +4. Check **requirement coverage** — are all active requirements addressed by at least one slice? +5. Determine a verdict: + - `pass` — all criteria met, all slices delivered, no gaps + - `needs-attention` — minor gaps that do not block completion (document them) + - `needs-remediation` — material gaps found; add remediation slices to the roadmap -### Step 1: Audit Success Criteria +## Output -Enumerate each success criterion from the roadmap's `## Success Criteria` section. For each criterion, map it to concrete evidence from slice summaries, UAT results, or observable behavior. +Write `{{validationPath}}` with this structure: -Format each criterion as: +```markdown +--- +verdict: +remediation_round: {{remediationRound}} +--- -- `Criterion text` — **MET** — evidence: {{specific slice summary, UAT result, test output, or observable behavior}} -- `Criterion text` — **NOT MET** — gap: {{what's missing and why}} +# Milestone Validation: {{milestoneId}} -Every criterion must have a definitive verdict. Do not mark a criterion as MET without specific evidence. +## Success Criteria Checklist +- [x] Criterion 1 — evidence: ... +- [ ] Criterion 2 — gap: ... -### Step 2: Inventory Deferred Work +## Slice Delivery Audit +| Slice | Claimed | Delivered | Status | +|-------|---------|-----------|--------| +| S01 | ... | ... | pass | -Scan ALL slice summaries for: -- `Known Limitations` sections -- `Follow-ups` sections -- `Deviations` sections +## Cross-Slice Integration +(any boundary mismatches) -Scan ALL UAT results for: -- `Not Proven By This UAT` sections -- Any PARTIAL or FAIL verdicts +## Requirement Coverage +(any unaddressed requirements) -Check: -- `.gsd/REQUIREMENTS.md` for Active requirements not yet Validated -- `.gsd/CAPTURES.md` for unresolved deferred captures +## Verdict Rationale +(why this verdict was chosen) -Collect every item into a single inventory. Do not skip items because they seem minor — the classification step handles prioritization. +## Remediation Plan +(only if verdict is needs-remediation — list new slices to add to the roadmap) +``` -### Step 3: Classify Each Gap - -For every unmet criterion and every deferred work item, classify it as one of: - -- **auto-remediable** — can be fixed by adding a new slice (missing feature, unfixed bug, untested path, incomplete integration) -- **human-required** — needs Lex's input (design decision, external service dependency, manual verification, judgment call, ambiguous requirement) -- **acceptable** — known limitation that's OK to ship (documented trade-off, explicitly scoped for a future milestone, minor rough edge with no user impact) - -Be conservative with **auto-remediable**. Only classify a gap as auto-remediable if you're confident a slice can resolve it without human judgment. When in doubt, classify as **human-required**. - -### Step 4: Act on Gaps - -**If this is remediation round 0 AND auto-remediable gaps exist:** - -1. Define remediation slices to address auto-remediable gaps. Follow the exact roadmap slice format: - `- [ ] **S0X: Title** \`risk:medium\` \`depends:[]\`` - Include a brief description of what each slice must accomplish. -2. Append these slices to `{{roadmapPath}}` after existing slices (do not modify completed slices). -3. Update the boundary map in the roadmap if the new slices introduce new integration points. -4. Set verdict to `needs-remediation`. - -**If this is remediation round 1 or higher:** - -Do NOT add more slices. At this point either: -- All remaining gaps are acceptable — set verdict to `pass` -- Remaining gaps need Lex's input — set verdict to `needs-attention` - -Never add remediation slices after round 0. If round 0 remediation didn't close the gaps, escalate. - -**If no auto-remediable gaps exist (any round):** - -- If all criteria are MET and deferred items are acceptable or human-required only — set verdict to `pass` (with human-required items noted) -- If human-required items are blocking — set verdict to `needs-attention` - -### Step 5: Write Validation Report - -Write `{{validationPath}}` using the milestone-validation template. Fill all frontmatter fields and every section. The report must be a complete record of the validation — a future agent reading only this file should understand what was checked, what passed, and what remains. +If verdict is `needs-remediation`: +- Add new slices to `{{roadmapPath}}` with unchecked `[ ]` status +- These slices will be planned and executed before validation re-runs **You MUST write `{{validationPath}}` before finishing.** -When done, say: "Milestone {{milestoneId}} validated." +When done, say: "Milestone {{milestoneId}} validation complete — verdict: ." diff --git a/src/resources/extensions/gsd/state.ts b/src/resources/extensions/gsd/state.ts index 725f92e2f..33a16277c 100644 --- a/src/resources/extensions/gsd/state.ts +++ b/src/resources/extensions/gsd/state.ts @@ -53,6 +53,19 @@ export function isMilestoneComplete(roadmap: Roadmap): boolean { return roadmap.slices.length > 0 && roadmap.slices.every(s => s.done); } +/** + * Check whether a VALIDATION file's verdict is terminal (pass or needs-attention). + * A non-terminal verdict (needs-remediation) means validation must re-run + * after remediation slices are executed. + */ +export function isValidationTerminal(validationContent: string): boolean { + const match = validationContent.match(/^---\n([\s\S]*?)\n---/); + if (!match) return false; + const verdict = match[1].match(/verdict:\s*(\S+)/); + if (!verdict) return false; + return verdict[1] === 'pass' || verdict[1] === 'needs-attention'; +} + // ─── State Derivation ────────────────────────────────────────────────────── // ── deriveState memoization ───────────────────────────────────────────────── @@ -279,10 +292,20 @@ async function _deriveStateImpl(basePath: string): Promise { const complete = isMilestoneComplete(roadmap); if (complete) { - // All slices done — check if milestone summary exists + // All slices done — check validation and summary state + const validationFile = resolveMilestoneFile(basePath, mid, "VALIDATION"); + const validationContent = validationFile ? await cachedLoadFile(validationFile) : null; + const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false; const summaryFile = resolveMilestoneFile(basePath, mid, "SUMMARY"); - if (!summaryFile && !activeMilestoneFound) { - // All slices complete but no summary written yet → completing-milestone + + if (!validationTerminal && !activeMilestoneFound) { + // No terminal validation yet → validating-milestone + activeMilestone = { id: mid, title }; + activeRoadmap = roadmap; + activeMilestoneFound = true; + registry.push({ id: mid, title, status: 'active' }); + } else if (!summaryFile && !activeMilestoneFound) { + // Validated but no summary written yet → completing-milestone activeMilestone = { id: mid, title }; activeRoadmap = roadmap; activeMilestoneFound = true; @@ -385,12 +408,34 @@ async function _deriveStateImpl(basePath: string): Promise { }; } - // Check if active milestone needs completion (all slices done, no summary) + // Check if active milestone needs validation or completion (all slices done) if (isMilestoneComplete(activeRoadmap)) { + const validationFile = resolveMilestoneFile(basePath, activeMilestone.id, "VALIDATION"); + const validationContent = validationFile ? await cachedLoadFile(validationFile) : null; + const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false; const sliceProgress = { done: activeRoadmap.slices.length, total: activeRoadmap.slices.length, }; + + if (!validationTerminal) { + return { + activeMilestone, + activeSlice: null, + activeTask: null, + phase: 'validating-milestone', + recentDecisions: [], + blockers: [], + nextAction: `Validate milestone ${activeMilestone.id} before completion.`, + registry, + requirements, + progress: { + milestones: milestoneProgress, + slices: sliceProgress, + }, + }; + } + return { activeMilestone, activeSlice: null, diff --git a/src/resources/extensions/gsd/tests/auto-preflight.test.ts b/src/resources/extensions/gsd/tests/auto-preflight.test.ts index b89b675ef..eb421646c 100644 --- a/src/resources/extensions/gsd/tests/auto-preflight.test.ts +++ b/src/resources/extensions/gsd/tests/auto-preflight.test.ts @@ -17,6 +17,7 @@ writeFileSync(join(gsd, "milestones", "M001", "slices", "S01", "S01-PLAN.md"), ` writeFileSync(join(gsd, "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md"), `---\nid: T01\nparent: S01\nmilestone: M001\nprovides: []\nrequires: []\naffects: []\nkey_files: []\nkey_decisions: []\npatterns_established: []\nobservability_surfaces: []\ndrill_down_paths: []\nduration: 5m\nverification_result: passed\ncompleted_at: 2026-03-09T00:00:00Z\n---\n\n# T01: Old Task\n\n**Done**\n\n## What Happened\nDone.\n\n## Diagnostics\n- log\n`); writeFileSync(join(gsd, "milestones", "M001", "slices", "S01", "S01-SUMMARY.md"), `---\nid: S01\nparent: M001\nmilestone: M001\nprovides: []\nrequires: []\naffects: []\nkey_files: []\nkey_decisions: []\npatterns_established: []\nobservability_surfaces: []\ndrill_down_paths: []\nduration: 5m\nverification_result: passed\ncompleted_at: 2026-03-09T00:00:00Z\n---\n\n# S01: Old Slice\n\n**Done**\n\n## What Happened\nDone.\n\n## Verification\nDone.\n\n## Deviations\nNone\n\n## Known Limitations\nNone\n\n## Follow-ups\nNone\n\n## Files Created/Modified\n- \`x\` — x\n\n## Forward Intelligence\n\n### What the next slice should know\n- x\n\n### What's fragile\n- x\n\n### Authoritative diagnostics\n- x\n\n### What assumptions changed\n- x\n`); +writeFileSync(join(gsd, "milestones", "M001", "M001-VALIDATION.md"), `---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.\n`); writeFileSync(join(gsd, "milestones", "M001", "M001-SUMMARY.md"), `---\nid: M001\nstatus: complete\ncompleted_at: 2026-03-09T00:00:00Z\n---\n\n# M001: Historical\n\nComplete.\n`); writeFileSync(join(gsd, "milestones", "M009", "M009-ROADMAP.md"), `# M009: Active\n\n## Slices\n- [ ] **S01: Active Slice** \`risk:low\` \`depends:[]\`\n > After this: active works\n`); diff --git a/src/resources/extensions/gsd/tests/complete-milestone.test.ts b/src/resources/extensions/gsd/tests/complete-milestone.test.ts index cb1a7124a..31c77e054 100644 --- a/src/resources/extensions/gsd/tests/complete-milestone.test.ts +++ b/src/resources/extensions/gsd/tests/complete-milestone.test.ts @@ -45,6 +45,12 @@ function writeMilestoneSummary(base: string, mid: string, content: string): void writeFileSync(join(dir, `${mid}-SUMMARY.md`), content); } +function writeMilestoneValidation(base: string, mid: string, verdict: string = "pass"): void { + const dir = join(base, ".gsd", "milestones", mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-VALIDATION.md`), `---\nverdict: ${verdict}\nremediation_round: 0\n---\n\n# Validation\nValidated.`); +} + function cleanup(base: string): void { rmSync(base, { recursive: true, force: true }); } @@ -176,7 +182,8 @@ async function main(): Promise { const roadmap = parseRoadmap(roadmapContent!); assertTrue(isMilestoneComplete(roadmap), "isMilestoneComplete returns true when all slices are [x]"); - // Verify deriveState returns completing-milestone phase + // Verify deriveState returns completing-milestone phase (with validation already done) + writeMilestoneValidation(base, "M001"); const state = await deriveState(base); assertEq(state.phase, "completing-milestone", "deriveState returns completing-milestone when all slices done, no summary"); assertEq(state.activeMilestone?.id, "M001", "active milestone is M001"); diff --git a/src/resources/extensions/gsd/tests/derive-state-db.test.ts b/src/resources/extensions/gsd/tests/derive-state-db.test.ts index 58391f028..684302731 100644 --- a/src/resources/extensions/gsd/tests/derive-state-db.test.ts +++ b/src/resources/extensions/gsd/tests/derive-state-db.test.ts @@ -310,6 +310,7 @@ async function main(): Promise { mkdirSync(join(base, '.gsd', 'milestones', 'M001'), { recursive: true }); mkdirSync(join(base, '.gsd', 'milestones', 'M002'), { recursive: true }); writeFile(base, 'milestones/M001/M001-ROADMAP.md', completedRoadmap); + writeFile(base, 'milestones/M001/M001-VALIDATION.md', `---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.`); writeFile(base, 'milestones/M001/M001-SUMMARY.md', summaryContent); writeFile(base, 'milestones/M002/M002-ROADMAP.md', activeRoadmap); diff --git a/src/resources/extensions/gsd/tests/derive-state-deps.test.ts b/src/resources/extensions/gsd/tests/derive-state-deps.test.ts index 42b07619c..12b75c232 100644 --- a/src/resources/extensions/gsd/tests/derive-state-deps.test.ts +++ b/src/resources/extensions/gsd/tests/derive-state-deps.test.ts @@ -26,6 +26,12 @@ function writeMilestoneSummary(base: string, mid: string, content: string): void writeFileSync(join(dir, `${mid}-SUMMARY.md`), content); } +function writeMilestoneValidation(base: string, mid: string): void { + const dir = join(base, '.gsd', 'milestones', mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-VALIDATION.md`), `---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.`); +} + /** * Creates M00x-CONTEXT.md with a valid YAML frontmatter block. * frontmatter is the raw YAML lines between the --- delimiters. @@ -120,6 +126,7 @@ async function main(): Promise { - [x] **S01: Done** \`risk:low\` \`depends:[]\` > After this: Done. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', '# M001 Summary\n\nFirst milestone is complete.'); // M002: depends on M001, now unblocked @@ -252,6 +259,7 @@ async function main(): Promise { - [x] **S01: Done** \`risk:low\` \`depends:[]\` > After this: Done. `); + writeMilestoneValidation(base, 'M002'); writeMilestoneSummary(base, 'M002', '# M002 Summary\n\nSecond milestone is complete.'); const state = await deriveState(base); @@ -321,6 +329,7 @@ async function main(): Promise { - [x] **S01: Done** \`risk:low\` \`depends:[]\` > After this: Done. `); + writeMilestoneValidation(base, 'M004-0zjrg0'); writeMilestoneSummary(base, 'M004-0zjrg0', '# M004-0zjrg0 Summary\n\nComplete.'); // M005-b0m2hl: depends on M004-0zjrg0 (lowercase hex suffix) diff --git a/src/resources/extensions/gsd/tests/derive-state-draft.test.ts b/src/resources/extensions/gsd/tests/derive-state-draft.test.ts index 72b980a93..19ddc8247 100644 --- a/src/resources/extensions/gsd/tests/derive-state-draft.test.ts +++ b/src/resources/extensions/gsd/tests/derive-state-draft.test.ts @@ -54,6 +54,12 @@ function writeMilestoneSummary(base: string, mid: string, content: string): void writeFileSync(join(dir, `${mid}-SUMMARY.md`), content); } +function writeMilestoneValidation(base: string, mid: string): void { + const dir = join(base, '.gsd', 'milestones', mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-VALIDATION.md`), `---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.`); +} + function cleanup(base: string): void { rmSync(base, { recursive: true, force: true }); } @@ -143,6 +149,7 @@ async function main(): Promise { - [x] **S01: Done** \`risk:low\` \`depends:[]\` > After this: Done. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', '# M001 Summary\n\nFirst milestone complete.'); // M002: only CONTEXT-DRAFT.md @@ -178,6 +185,7 @@ async function main(): Promise { - [x] **S01: Done** \`risk:low\` \`depends:[]\` > After this: Done. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', '# M001 Summary\n\nComplete.'); // M002: draft only — should become active with needs-discussion diff --git a/src/resources/extensions/gsd/tests/derive-state.test.ts b/src/resources/extensions/gsd/tests/derive-state.test.ts index 6c97d31c0..20f21153d 100644 --- a/src/resources/extensions/gsd/tests/derive-state.test.ts +++ b/src/resources/extensions/gsd/tests/derive-state.test.ts @@ -38,6 +38,12 @@ function writeMilestoneSummary(base: string, mid: string, content: string): void writeFileSync(join(dir, `${mid}-SUMMARY.md`), content); } +function writeMilestoneValidation(base: string, mid: string, verdict: string = 'pass'): void { + const dir = join(base, '.gsd', 'milestones', mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-VALIDATION.md`), `---\nverdict: ${verdict}\nremediation_round: 0\n---\n\n# Validation\nValidated.`); +} + function writeRequirements(base: string, content: string): void { writeFileSync(join(base, '.gsd', 'REQUIREMENTS.md'), content); } @@ -285,6 +291,7 @@ Continue from step 2. > After this: Done. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', `# M001 Summary\n\nMilestone complete.`); const state = await deriveState(base); @@ -381,6 +388,7 @@ Continue from step 2. > After this: Done. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', `# M001 Summary\n\nFirst milestone complete.`); // M002: active (has incomplete slices) @@ -486,6 +494,8 @@ Continue from step 2. > After this: S02 complete. `); + writeMilestoneValidation(base, 'M001'); + const state = await deriveState(base); assertEq(state.phase, 'completing-milestone', 'completing-ms: phase is completing-milestone'); @@ -521,6 +531,7 @@ Continue from step 2. > After this: Done. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', `# M001 Summary\n\nMilestone is complete.`); const state = await deriveState(base); @@ -550,6 +561,7 @@ Continue from step 2. - [x] **S01: Done** \`risk:low\` \`depends:[]\` > After this: Done. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', `# M001 Summary\n\nFirst milestone complete.`); // M002: all slices done, no summary → completing-milestone @@ -566,6 +578,8 @@ Continue from step 2. > After this: Done. `); + writeMilestoneValidation(base, 'M002'); + // M003: has incomplete slices → pending (M002 is active) writeRoadmap(base, 'M003', `# M003: Third Milestone diff --git a/src/resources/extensions/gsd/tests/integration-mixed-milestones.test.ts b/src/resources/extensions/gsd/tests/integration-mixed-milestones.test.ts index b01fed2bb..4cec135ce 100644 --- a/src/resources/extensions/gsd/tests/integration-mixed-milestones.test.ts +++ b/src/resources/extensions/gsd/tests/integration-mixed-milestones.test.ts @@ -51,6 +51,12 @@ function writeMilestoneSummary(base: string, mid: string, content: string): void writeFileSync(join(dir, `${mid}-SUMMARY.md`), content); } +function writeMilestoneValidation(base: string, mid: string): void { + const dir = join(base, '.gsd', 'milestones', mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-VALIDATION.md`), `---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.`); +} + function cleanup(base: string): void { rmSync(base, { recursive: true, force: true }); } @@ -166,6 +172,7 @@ async function main(): Promise { Did it. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', `# M001: Legacy Feature Summary **One-liner summary** @@ -265,6 +272,7 @@ Everything worked. Did it. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', `# M001: Legacy Feature Summary **One-liner summary** diff --git a/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts b/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts index bb6233b74..f86dae777 100644 --- a/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts +++ b/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts @@ -263,12 +263,12 @@ async function main(): Promise { // No REQUIREMENTS.md since empty requirements assertTrue(!existsSync(join(base, '.gsd', 'REQUIREMENTS.md')), 'complete: REQUIREMENTS.md NOT written (empty)'); - // deriveState: all slices done, all tasks done — needs milestone summary for 'complete' - // Without milestone summary, it should be 'completing-milestone' or 'summarizing' + // deriveState: all slices done, all tasks done — needs validation then milestone summary + // Without VALIDATION file, it should be 'validating-milestone' const state = await deriveState(base); - // All slices are done in roadmap. Milestone summary doesn't exist. - // deriveState should return 'completing-milestone' since all slices done but no milestone summary. - assertEq(state.phase, 'completing-milestone', 'complete: deriveState phase is completing-milestone'); + // All slices are done in roadmap. No VALIDATION or SUMMARY exists. + // deriveState should return 'validating-milestone' since validation gate precedes completion. + assertEq(state.phase, 'validating-milestone', 'complete: deriveState phase is validating-milestone'); assertTrue(state.activeMilestone !== null, 'complete: deriveState has activeMilestone'); assertEq(state.activeMilestone!.id, 'M001', 'complete: deriveState activeMilestone is M001'); diff --git a/src/resources/extensions/gsd/tests/queue-reorder-e2e.test.ts b/src/resources/extensions/gsd/tests/queue-reorder-e2e.test.ts index 1077e70b1..b9140c561 100644 --- a/src/resources/extensions/gsd/tests/queue-reorder-e2e.test.ts +++ b/src/resources/extensions/gsd/tests/queue-reorder-e2e.test.ts @@ -58,6 +58,7 @@ function writeCompleteMilestone(base: string, mid: string): void { - [x] **S01: Done** \`risk:low\` \`depends:[]\` > After this: Done. `); + writeFileSync(join(dir, `${mid}-VALIDATION.md`), `---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.`); writeFileSync(join(dir, `${mid}-SUMMARY.md`), `# ${mid} Summary\n\nComplete.`); } diff --git a/src/resources/extensions/gsd/tests/validate-milestone.test.ts b/src/resources/extensions/gsd/tests/validate-milestone.test.ts new file mode 100644 index 000000000..d0e0f4c2d --- /dev/null +++ b/src/resources/extensions/gsd/tests/validate-milestone.test.ts @@ -0,0 +1,316 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdirSync, writeFileSync, existsSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { randomUUID } from "node:crypto"; + +import { deriveState, isValidationTerminal } from "../state.ts"; +import { resolveExpectedArtifactPath, verifyExpectedArtifact, diagnoseExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.ts"; +import { resolveDispatch, type DispatchContext } from "../auto-dispatch.ts"; +import type { GSDState } from "../types.ts"; +import { clearPathCache } from "../paths.ts"; +import { clearParseCache } from "../files.ts"; + +// ─── Helpers ────────────────────────────────────────────────────────────── + +function makeTmpBase(): string { + const base = join(tmpdir(), `gsd-val-test-${randomUUID()}`); + mkdirSync(join(base, ".gsd", "milestones"), { recursive: true }); + return base; +} + +function cleanup(base: string): void { + clearPathCache(); + clearParseCache(); + try { rmSync(base, { recursive: true, force: true }); } catch { /* */ } +} + +function writeRoadmap(base: string, mid: string, content: string): void { + const dir = join(base, ".gsd", "milestones", mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-ROADMAP.md`), content); +} + +function writeMilestoneSummary(base: string, mid: string, content: string): void { + const dir = join(base, ".gsd", "milestones", mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-SUMMARY.md`), content); +} + +function writeValidation(base: string, mid: string, content: string): void { + const dir = join(base, ".gsd", "milestones", mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-VALIDATION.md`), content); +} + +function writeSlicePlan(base: string, mid: string, sid: string, content: string): void { + const dir = join(base, ".gsd", "milestones", mid, "slices", sid); + mkdirSync(join(dir, "tasks"), { recursive: true }); + writeFileSync(join(dir, `${sid}-PLAN.md`), content); +} + +function writeSliceSummary(base: string, mid: string, sid: string, content: string): void { + const dir = join(base, ".gsd", "milestones", mid, "slices", sid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${sid}-SUMMARY.md`), content); +} + +const ALL_DONE_ROADMAP = `# M001: Test Milestone + +## Vision +Test + +## Success Criteria +- It works + +## Slices + +- [x] **S01: First slice** \`risk:low\` \`depends:[]\` + > After this: it works + +## Boundary Map + +| From | To | Produces | Consumes | +|------|-----|----------|----------| +| S01 | terminal | output | nothing | +`; + +const CONTEXT_FILE = `--- +id: M001 +title: Test Milestone +--- + +# Context +Test context. +`; + +// ─── isValidationTerminal ───────────────────────────────────────────────── + +test("isValidationTerminal returns true for verdict: pass", () => { + const content = "---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation"; + assert.equal(isValidationTerminal(content), true); +}); + +test("isValidationTerminal returns true for verdict: needs-attention", () => { + const content = "---\nverdict: needs-attention\nremediation_round: 0\n---\n\n# Validation"; + assert.equal(isValidationTerminal(content), true); +}); + +test("isValidationTerminal returns false for verdict: needs-remediation", () => { + const content = "---\nverdict: needs-remediation\nremediation_round: 0\n---\n\n# Validation"; + assert.equal(isValidationTerminal(content), false); +}); + +test("isValidationTerminal returns false for missing frontmatter", () => { + const content = "# Validation\nNo frontmatter here."; + assert.equal(isValidationTerminal(content), false); +}); + +test("isValidationTerminal returns false for missing verdict field", () => { + const content = "---\nremediation_round: 0\n---\n\n# Validation"; + assert.equal(isValidationTerminal(content), false); +}); + +// ─── deriveState: validating-milestone ──────────────────────────────────── + +test("deriveState returns validating-milestone when all slices done and no VALIDATION file", async () => { + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", ALL_DONE_ROADMAP); + // Write CONTEXT so milestone has a title + const dir = join(base, ".gsd", "milestones", "M001"); + writeFileSync(join(dir, "M001-CONTEXT.md"), CONTEXT_FILE); + + const state = await deriveState(base); + assert.equal(state.phase, "validating-milestone"); + assert.equal(state.activeMilestone?.id, "M001"); + assert.equal(state.activeSlice, null); + } finally { + cleanup(base); + } +}); + +test("deriveState returns completing-milestone when VALIDATION exists with terminal verdict", async () => { + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", ALL_DONE_ROADMAP); + writeValidation(base, "M001", "---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nAll good."); + + const state = await deriveState(base); + assert.equal(state.phase, "completing-milestone"); + assert.equal(state.activeMilestone?.id, "M001"); + } finally { + cleanup(base); + } +}); + +test("deriveState returns validating-milestone when VALIDATION exists with needs-remediation verdict", async () => { + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", ALL_DONE_ROADMAP); + writeValidation(base, "M001", "---\nverdict: needs-remediation\nremediation_round: 0\n---\n\n# Validation\nNeeds fixes."); + + const state = await deriveState(base); + assert.equal(state.phase, "validating-milestone"); + assert.equal(state.activeMilestone?.id, "M001"); + } finally { + cleanup(base); + } +}); + +test("deriveState returns complete when both VALIDATION and SUMMARY exist", async () => { + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", ALL_DONE_ROADMAP); + writeValidation(base, "M001", "---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed."); + writeMilestoneSummary(base, "M001", "# Summary\nDone."); + + const state = await deriveState(base); + assert.equal(state.phase, "complete"); + } finally { + cleanup(base); + } +}); + +// ─── Dispatch rule ──────────────────────────────────────────────────────── + +test("dispatch rule matches validating-milestone phase", async () => { + const state: GSDState = { + activeMilestone: { id: "M001", title: "Test" }, + activeSlice: null, + activeTask: null, + phase: "validating-milestone", + recentDecisions: [], + blockers: [], + nextAction: "Validate milestone M001.", + registry: [{ id: "M001", title: "Test", status: "active" }], + progress: { milestones: { done: 0, total: 1 } }, + }; + + const base = makeTmpBase(); + try { + // Set up minimal milestone structure for the prompt builder + writeRoadmap(base, "M001", ALL_DONE_ROADMAP); + + const ctx: DispatchContext = { + basePath: base, + mid: "M001", + midTitle: "Test", + state, + prefs: undefined, + }; + const result = await resolveDispatch(ctx); + assert.equal(result.action, "dispatch"); + if (result.action === "dispatch") { + assert.equal(result.unitType, "validate-milestone"); + assert.equal(result.unitId, "M001"); + } + } finally { + cleanup(base); + } +}); + +test("dispatch rule skips when skip_milestone_validation preference is set", async () => { + const state: GSDState = { + activeMilestone: { id: "M001", title: "Test" }, + activeSlice: null, + activeTask: null, + phase: "validating-milestone", + recentDecisions: [], + blockers: [], + nextAction: "Validate milestone M001.", + registry: [{ id: "M001", title: "Test", status: "active" }], + progress: { milestones: { done: 0, total: 1 } }, + }; + + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", ALL_DONE_ROADMAP); + + const ctx: DispatchContext = { + basePath: base, + mid: "M001", + midTitle: "Test", + state, + prefs: { phases: { skip_milestone_validation: true } }, + }; + const result = await resolveDispatch(ctx); + assert.equal(result.action, "skip"); + + // Verify the VALIDATION file was written + const validationPath = join(base, ".gsd", "milestones", "M001", "M001-VALIDATION.md"); + assert.ok(existsSync(validationPath), "VALIDATION file should be written on skip"); + } finally { + cleanup(base); + } +}); + +// ─── Artifact resolution & verification ─────────────────────────────────── + +test("resolveExpectedArtifactPath returns VALIDATION path for validate-milestone", () => { + const base = makeTmpBase(); + try { + mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true }); + const result = resolveExpectedArtifactPath("validate-milestone", "M001", base); + assert.ok(result); + assert.ok(result!.includes("VALIDATION")); + } finally { + cleanup(base); + } +}); + +test("verifyExpectedArtifact passes when VALIDATION.md exists", () => { + const base = makeTmpBase(); + try { + writeValidation(base, "M001", "---\nverdict: pass\n---\n# Val"); + clearPathCache(); + clearParseCache(); + const result = verifyExpectedArtifact("validate-milestone", "M001", base); + assert.equal(result, true); + } finally { + cleanup(base); + } +}); + +test("verifyExpectedArtifact fails when VALIDATION.md is missing", () => { + const base = makeTmpBase(); + try { + mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true }); + clearPathCache(); + clearParseCache(); + const result = verifyExpectedArtifact("validate-milestone", "M001", base); + assert.equal(result, false); + } finally { + cleanup(base); + } +}); + +// ─── diagnoseExpectedArtifact ───────────────────────────────────────────── + +test("diagnoseExpectedArtifact returns validation path for validate-milestone", () => { + const base = makeTmpBase(); + try { + const result = diagnoseExpectedArtifact("validate-milestone", "M001", base); + assert.ok(result); + assert.ok(result!.includes("VALIDATION")); + assert.ok(result!.includes("milestone validation report")); + } finally { + cleanup(base); + } +}); + +// ─── buildLoopRemediationSteps ──────────────────────────────────────────── + +test("buildLoopRemediationSteps returns steps for validate-milestone", () => { + const base = makeTmpBase(); + try { + const result = buildLoopRemediationSteps("validate-milestone", "M001", base); + assert.ok(result); + assert.ok(result!.includes("VALIDATION")); + assert.ok(result!.includes("verdict: pass")); + assert.ok(result!.includes("gsd doctor")); + } finally { + cleanup(base); + } +}); diff --git a/src/resources/extensions/gsd/types.ts b/src/resources/extensions/gsd/types.ts index 49da86004..add4f09d7 100644 --- a/src/resources/extensions/gsd/types.ts +++ b/src/resources/extensions/gsd/types.ts @@ -5,7 +5,7 @@ // ─── Enums & Literal Unions ──────────────────────────────────────────────── export type RiskLevel = 'low' | 'medium' | 'high'; -export type Phase = 'pre-planning' | 'needs-discussion' | 'discussing' | 'researching' | 'planning' | 'executing' | 'verifying' | 'summarizing' | 'advancing' | 'completing-milestone' | 'replanning-slice' | 'complete' | 'paused' | 'blocked'; +export type Phase = 'pre-planning' | 'needs-discussion' | 'discussing' | 'researching' | 'planning' | 'executing' | 'verifying' | 'summarizing' | 'advancing' | 'validating-milestone' | 'completing-milestone' | 'replanning-slice' | 'complete' | 'paused' | 'blocked'; export type ContinueStatus = 'in_progress' | 'interrupted' | 'compacted'; // ─── Roadmap (Milestone-level) ───────────────────────────────────────────── @@ -264,6 +264,7 @@ export interface PhaseSkipPreferences { skip_research?: boolean; skip_reassess?: boolean; skip_slice_research?: boolean; + skip_milestone_validation?: boolean; } export interface NotificationPreferences {