diff --git a/docs-internal/ADR-003-pipeline-simplification.md b/docs-internal/ADR-003-pipeline-simplification.md index ddc31f609..917927eea 100644 --- a/docs-internal/ADR-003-pipeline-simplification.md +++ b/docs-internal/ADR-003-pipeline-simplification.md @@ -217,18 +217,18 @@ For the same 4-slice, 3-task milestone: #### 5. Replace validate-milestone with mechanical verification -**Current:** An LLM session re-reads the ROADMAP and all slice summaries, checks success criteria against delivery evidence, and writes a VALIDATION.md with a verdict. It also inlines UAT-RESULT artifacts from slices with `uat_dispatch` enabled. +**Current:** An LLM session re-reads the ROADMAP and all slice summaries, checks success criteria against delivery evidence, and writes a VALIDATION.md with a verdict. It also inlines UAT artifacts from slices with `uat_dispatch` enabled. **New:** The system mechanically aggregates verification results from all tasks and slices. The canonical verification data sources are: 1. **`T##-VERIFY.json`** files (written by `writeVerificationJSON()` in `verification-evidence.ts`) — machine-readable per-task verification results with command, exit code, verdict, duration, and blocking status. -2. **`S##-UAT-RESULT.md`** files (when `uat_dispatch` is enabled) — human or artifact-driven UAT outcomes. +2. **`S##-UAT.md`** files (when `uat_dispatch` is enabled) — human or artifact-driven UAT outcomes. 3. **Task summary frontmatter** `verification_result` field — a human-readable pass/fail string (not structured, used as a secondary signal). -The aggregator reads `T##-VERIFY.json` as the primary source of truth, supplements with UAT-RESULT artifacts, and produces a deterministic VALIDATION.md. +The aggregator reads `T##-VERIFY.json` as the primary source of truth, supplements with UAT artifacts, and produces a deterministic VALIDATION.md. **What changes:** -- A new `aggregateMilestoneVerification()` function collects `T##-VERIFY.json` files and `S##-UAT-RESULT.md` files across all slices. +- A new `aggregateMilestoneVerification()` function collects `T##-VERIFY.json` files and `S##-UAT.md` files across all slices. - The function produces a VALIDATION.md with per-task and per-slice pass/fail status, UAT evidence, and an overall verdict. - The LLM-driven validate-milestone session is removed from the default pipeline. - The validate-milestone template is retained for explicit dispatch (users who want LLM-driven validation can run `/gsd dispatch validate`). @@ -254,8 +254,8 @@ async function aggregateMilestoneVerification(base: string, mid: string): Promis } } - // Secondary source: S##-UAT-RESULT.md (when uat_dispatch enabled) - const uatResultFile = resolveSliceFile(base, mid, slice.id, "UAT-RESULT"); + // Secondary source: S##-UAT.md (when uat_dispatch enabled) + const uatResultFile = resolveSliceFile(base, mid, slice.id, "UAT"); if (uatResultFile) { const uatContent = await loadFile(uatResultFile); if (uatContent) uatResults.push({ sliceId: slice.id, content: uatContent }); @@ -476,7 +476,7 @@ async function mechanicalSliceCompletion(base: string, mid: string, sid: string) #### Mechanical milestone validation -See `aggregateMilestoneVerification()` above (Section 5). Reads `T##-VERIFY.json` and `S##-UAT-RESULT.md` as canonical sources. +See `aggregateMilestoneVerification()` above (Section 5). Reads `T##-VERIFY.json` and `S##-UAT.md` as canonical sources. #### Mechanical milestone summary @@ -547,7 +547,7 @@ At current Opus pricing ($15/MTok input, $75/MTok output — as of March 2026), | `auto-prompts.ts` — plan-milestone exploration | ~30 | Research instructions merged in | | `auto-prompts.ts` — plan-slice reassessment + exploration | ~25 | Reassessment + exploration preamble | | `auto-post-unit.ts` — `mechanicalSliceCompletion()` | ~80 | Structured frontmatter aggregation, UAT generation, artifact writes | -| `auto-verification.ts` — `aggregateMilestoneVerification()` | ~60 | T##-VERIFY.json + UAT-RESULT aggregation | +| `auto-verification.ts` — `aggregateMilestoneVerification()` | ~60 | T##-VERIFY.json + UAT aggregation | | `auto-unit-closeout.ts` — `generateMilestoneSummary()` | ~60 | Mechanical summary generation | | **Total added** | **~255** | | @@ -694,7 +694,7 @@ The mechanical summary quality might be insufficient for complex slices. 13. Implement `mechanicalRequirementsUpdate()` and `appendNewDecisions()` ### Phase 3: Mechanical milestone validation + completion -14. Implement `aggregateMilestoneVerification()` reading `T##-VERIFY.json` and `S##-UAT-RESULT.md` +14. Implement `aggregateMilestoneVerification()` reading `T##-VERIFY.json` and `S##-UAT.md` 15. Implement `generateMilestoneSummary()` from slice summary aggregation 16. Wire into post-unit processing: after last slice completion, run mechanical validation + summary 17. Make reassess-roadmap opt-in via `reassess_after_slice` preference (default: false) @@ -723,14 +723,14 @@ The mechanical summary quality might be insufficient for complex slices. 3. ✅ Token savings double-counting (eliminated sessions + re-ingestion) — **fixed**: removed overlap, noted savings are not additive 4. ✅ Context inlining change (file paths vs inline) underanalyzed — **fixed**: expanded to dedicated risk section with enforcement strategy, phased rollout, and interaction with budget engine 5. ✅ Budget engine interaction not discussed — **fixed**: addressed in context inlining section -6. ✅ `aggregateMilestoneVerification()` reads wrong data source — **fixed**: now reads `T##-VERIFY.json` as primary source, supplemented by `S##-UAT-RESULT.md` +6. ✅ `aggregateMilestoneVerification()` reads wrong data source — **fixed**: now reads `T##-VERIFY.json` as primary source, supplemented by `S##-UAT.md` 7. ✅ Phase ordering creates heavy intermediate state (Phase 1 without Phase 4) — **fixed**: Phase 1 now includes targeted inlining reduction for planning sessions 8. ✅ ADR number conflict — **fixed**: confirmed no ADR-003 exists in `docs/` (the referenced file doesn't exist in current git) **OpenAI Codex** identified 6 issues: 1. ✅ HIGH: Folding completion into execute-task breaks verification-retry model — **fixed**: moved completion to post-gate mechanical processing instead of executor prompt. Added Alternative D explaining why. 2. ✅ HIGH: Mechanical validation reads nonexistent `verification_evidence` frontmatter — **fixed**: now reads `T##-VERIFY.json` (canonical machine-readable source from `verification-evidence.ts`) -3. ✅ HIGH: Replacement validation drops UAT evidence — **fixed**: aggregator now reads both `T##-VERIFY.json` and `S##-UAT-RESULT.md` +3. ✅ HIGH: Replacement validation drops UAT evidence — **fixed**: aggregator now reads both `T##-VERIFY.json` and `S##-UAT.md` 4. ✅ HIGH: "State derivation stays unchanged" is false — **fixed**: explicitly documented that `deriveState()` phases are preserved, mechanical processing resolves them synchronously, fallback dispatch rules handle failures 5. ✅ MEDIUM: Folded completion omits REQUIREMENTS.md and KNOWLEDGE.md updates — **fixed**: mechanical completion handles REQUIREMENTS.md and DECISIONS.md; KNOWLEDGE.md addressed in Risk 5 6. ✅ MEDIUM: Session and token math inconsistent — **fixed**: complete rederivation with per-slice breakdown, corrected to 30 baseline sessions, noted profile variations diff --git a/src/resources/extensions/gsd/auto-artifact-paths.ts b/src/resources/extensions/gsd/auto-artifact-paths.ts index c296ad94a..41b72fe6e 100644 --- a/src/resources/extensions/gsd/auto-artifact-paths.ts +++ b/src/resources/extensions/gsd/auto-artifact-paths.ts @@ -53,7 +53,7 @@ export function resolveExpectedArtifactPath( } case "run-uat": { const dir = resolveSlicePath(base, mid, sid!); - return dir ? join(dir, buildSliceFileName(sid!, "UAT-RESULT")) : null; + return dir ? join(dir, buildSliceFileName(sid!, "UAT")) : null; } case "execute-task": { const tid = parts[2]; @@ -120,7 +120,7 @@ export function diagnoseExpectedArtifact( case "reassess-roadmap": return `${relSliceFile(base, mid!, sid!, "ASSESSMENT")} (roadmap reassessment)`; case "run-uat": - return `${relSliceFile(base, mid!, sid!, "UAT-RESULT")} (UAT result)`; + return `${relSliceFile(base, mid!, sid!, "UAT")} (UAT result)`; case "validate-milestone": return `${relMilestoneFile(base, mid!, "VALIDATION")} (milestone validation report)`; case "complete-milestone": diff --git a/src/resources/extensions/gsd/auto-dispatch.ts b/src/resources/extensions/gsd/auto-dispatch.ts index a84739d70..db88b5e7f 100644 --- a/src/resources/extensions/gsd/auto-dispatch.ts +++ b/src/resources/extensions/gsd/auto-dispatch.ts @@ -184,7 +184,7 @@ export const DISPATCH_RULES: DispatchRule[] = [ } for (const sliceId of completedSliceIds) { - const resultFile = resolveSliceFile(basePath, mid, sliceId, "UAT-RESULT"); + const resultFile = resolveSliceFile(basePath, mid, sliceId, "UAT"); if (!resultFile) continue; const content = await loadFile(resultFile); if (!content) continue; @@ -196,15 +196,9 @@ export const DISPATCH_RULES: DispatchRule[] = [ // produce PARTIAL when all automatable checks pass but human-only // checks remain — this should not block progression. const acceptableVerdicts: string[] = ["pass", "passed"]; - const uatFile = resolveSliceFile(basePath, mid, sliceId, "UAT"); - if (uatFile) { - const uatContent = await loadFile(uatFile); - if (uatContent) { - const uatType = extractUatType(uatContent); - if (uatType === "mixed" || uatType === "human-experience" || uatType === "live-runtime") { - acceptableVerdicts.push("partial"); - } - } + const uatType = extractUatType(content); + if (uatType === "mixed" || uatType === "human-experience" || uatType === "live-runtime") { + acceptableVerdicts.push("partial"); } if (verdict && !acceptableVerdicts.includes(verdict)) { diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts index d683102dc..b710154f0 100644 --- a/src/resources/extensions/gsd/auto-prompts.ts +++ b/src/resources/extensions/gsd/auto-prompts.ts @@ -772,11 +772,8 @@ export async function checkNeedsRunUat( if (!uatFile) return null; const uatContent = await loadFile(uatFile); if (!uatContent) return null; - const uatResultFile = resolveSliceFile(base, mid, sid, "UAT-RESULT"); - if (uatResultFile) { - const hasResult = !!(await loadFile(uatResultFile)); - if (hasResult) return null; - } + // If the UAT file already contains a verdict, UAT has been run — skip + if (/verdict:\s*[\w-]+/i.test(uatContent)) return null; const uatType = extractUatType(uatContent) ?? "artifact-driven"; return { sliceId: sid, uatType }; } @@ -799,11 +796,8 @@ export async function checkNeedsRunUat( if (!uatFileFb) return null; const uatContentFb = await loadFile(uatFileFb); if (!uatContentFb) return null; - const uatResultFb = resolveSliceFile(base, mid, uatSid, "UAT-RESULT"); - if (uatResultFb) { - const hasResultFb = !!(await loadFile(uatResultFb)); - if (hasResultFb) return null; - } + // If the UAT file already contains a verdict, UAT has been run — skip + if (/verdict:\s*[\w-]+/i.test(uatContentFb)) return null; const uatTypeFb = extractUatType(uatContentFb) ?? "artifact-driven"; return { sliceId: uatSid, uatType: uatTypeFb }; } @@ -1349,8 +1343,8 @@ export async function buildValidateMilestonePrompt( const summaryRel = relSliceFile(base, mid, sid, "SUMMARY"); inlined.push(await inlineFile(summaryPath, summaryRel, `${sid} Summary`)); - const uatPath = resolveSliceFile(base, mid, sid, "UAT-RESULT"); - const uatRel = relSliceFile(base, mid, sid, "UAT-RESULT"); + const uatPath = resolveSliceFile(base, mid, sid, "UAT"); + const uatRel = relSliceFile(base, mid, sid, "UAT"); const uatInline = await inlineFileOptional(uatPath, uatRel, `${sid} UAT Result`); if (uatInline) inlined.push(uatInline); } @@ -1501,7 +1495,7 @@ export async function buildRunUatPrompt( const inlinedContext = capPreamble(`## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`); - const uatResultPath = join(base, relSliceFile(base, mid, sliceId, "UAT-RESULT")); + const uatResultPath = join(base, relSliceFile(base, mid, sliceId, "UAT")); const uatType = extractUatType(uatContent) ?? "artifact-driven"; return loadPrompt("run-uat", { diff --git a/src/resources/extensions/gsd/auto-recovery.ts b/src/resources/extensions/gsd/auto-recovery.ts index 740eea825..a03b5887a 100644 --- a/src/resources/extensions/gsd/auto-recovery.ts +++ b/src/resources/extensions/gsd/auto-recovery.ts @@ -90,7 +90,7 @@ export function resolveExpectedArtifactPath( } case "run-uat": { const dir = resolveSlicePath(base, mid, sid!); - return dir ? join(dir, buildSliceFileName(sid!, "UAT-RESULT")) : null; + return dir ? join(dir, buildSliceFileName(sid!, "UAT")) : null; } case "execute-task": { const tid = parts[2]; @@ -503,7 +503,7 @@ export function diagnoseExpectedArtifact( case "reassess-roadmap": return `${relSliceFile(base, mid!, sid!, "ASSESSMENT")} (roadmap reassessment)`; case "run-uat": - return `${relSliceFile(base, mid!, sid!, "UAT-RESULT")} (UAT result)`; + return `${relSliceFile(base, mid!, sid!, "UAT")} (UAT result)`; case "validate-milestone": return `${relMilestoneFile(base, mid!, "VALIDATION")} (milestone validation report)`; case "complete-milestone": diff --git a/src/resources/extensions/gsd/prompts/forensics.md b/src/resources/extensions/gsd/prompts/forensics.md index 6be348c6e..f576d17c4 100644 --- a/src/resources/extensions/gsd/prompts/forensics.md +++ b/src/resources/extensions/gsd/prompts/forensics.md @@ -46,7 +46,7 @@ GSD extension source code is at: `{{gsdSourceDir}}` ├── milestones/{ID}/ — milestone artifacts │ ├── {ID}-ROADMAP.md, {ID}-RESEARCH.md, {ID}-CONTEXT.md, {ID}-SUMMARY.md │ └── slices/{SID}/ — slice artifacts -│ ├── {SID}-PLAN.md, {SID}-RESEARCH.md, {SID}-UAT-RESULT.md, {SID}-SUMMARY.md +│ ├── {SID}-PLAN.md, {SID}-RESEARCH.md, {SID}-UAT.md, {SID}-SUMMARY.md │ └── tasks/{TID}-PLAN.md, {TID}-SUMMARY.md └── worktrees/{milestoneId}/ — per-milestone worktree with replicated .gsd/ ``` diff --git a/src/resources/extensions/gsd/tests/auto-recovery.test.ts b/src/resources/extensions/gsd/tests/auto-recovery.test.ts index 4dc67b702..b533eaca4 100644 --- a/src/resources/extensions/gsd/tests/auto-recovery.test.ts +++ b/src/resources/extensions/gsd/tests/auto-recovery.test.ts @@ -112,7 +112,7 @@ test("resolveExpectedArtifactPath returns correct path for all slice-level types const uatResult = resolveExpectedArtifactPath("run-uat", "M001/S01", base); assert.ok(uatResult); - assert.ok(uatResult!.includes("UAT-RESULT")); + assert.ok(uatResult!.includes("UAT")); }); // ─── diagnoseExpectedArtifact ───────────────────────────────────────────── diff --git a/src/resources/extensions/gsd/tests/run-uat.test.ts b/src/resources/extensions/gsd/tests/run-uat.test.ts index cff22ff0e..a6c6be294 100644 --- a/src/resources/extensions/gsd/tests/run-uat.test.ts +++ b/src/resources/extensions/gsd/tests/run-uat.test.ts @@ -171,7 +171,7 @@ test('(k) run-uat prompt template', () => { const milestoneId = 'M001'; const sliceId = 'S01'; const uatPath = '.gsd/milestones/M001/slices/S01/S01-UAT.md'; - const uatResultPath = '.gsd/milestones/M001/slices/S01/S01-UAT-RESULT.md'; + const uatResultPath = '.gsd/milestones/M001/slices/S01/S01-UAT.md'; const uatType = 'live-runtime'; const inlinedContext = ''; let promptResult: string | undefined; @@ -234,7 +234,7 @@ test('(k2) run-uat prompt references gsd_summary_save, not direct write', () => milestoneId: 'M001', sliceId: 'S01', uatPath: '.gsd/milestones/M001/slices/S01/S01-UAT.md', - uatResultPath: '.gsd/milestones/M001/slices/S01/S01-UAT-RESULT.md', + uatResultPath: '.gsd/milestones/M001/slices/S01/S01-UAT.md', uatType: 'artifact-driven', inlinedContext: '', }); @@ -265,14 +265,13 @@ test('(l) dispatch preconditions via resolveSliceFile', () => { 'resolveSliceFile(..., "UAT") returns non-null when UAT file exists (dispatch trigger state)', ); - const uatResultFilePath = resolveSliceFile(base, 'M001', 'S01', 'UAT-RESULT'); - assert.deepStrictEqual( - uatResultFilePath, - null, - 'resolveSliceFile(..., "UAT-RESULT") returns null when result file missing (dispatch trigger state)', + // UAT spec without a verdict line means UAT has not been run yet + const rawContent = readFileSync(uatFilePath!, 'utf-8'); + assert.ok( + !/verdict:\s*[\w-]+/i.test(rawContent), + 'UAT file without verdict indicates UAT has not been run (dispatch trigger state)', ); - const rawContent = readFileSync(uatFilePath!, 'utf-8'); assert.deepStrictEqual( extractUatType(rawContent), 'artifact-driven', @@ -286,13 +285,18 @@ test('(l) dispatch preconditions via resolveSliceFile', () => { test('test block at line 307', () => { const base = createFixtureBase(); try { - writeSliceFile(base, 'M001', 'S01', 'UAT', makeUatContent('artifact-driven')); - writeSliceFile(base, 'M001', 'S01', 'UAT-RESULT', '# UAT Result\n\nverdict: PASS\n'); + // Write UAT file with a verdict — simulates completed UAT + writeSliceFile(base, 'M001', 'S01', 'UAT', '# UAT Result\n\nverdict: PASS\n'); - const uatResultFilePath = resolveSliceFile(base, 'M001', 'S01', 'UAT-RESULT'); + const uatFilePath = resolveSliceFile(base, 'M001', 'S01', 'UAT'); assert.ok( - uatResultFilePath !== null, - 'resolveSliceFile(..., "UAT-RESULT") returns non-null when result file exists (idempotent skip state)', + uatFilePath !== null, + 'resolveSliceFile(..., "UAT") returns non-null when UAT file exists', + ); + const content = readFileSync(uatFilePath!, 'utf-8'); + assert.ok( + /verdict:\s*[\w-]+/i.test(content), + 'UAT file with verdict indicates UAT has been completed (idempotent skip state)', ); } finally { cleanup(base); @@ -390,7 +394,7 @@ test('(p) run-uat prompt allows PASS when human-only checks remain as NEEDS-HUMA milestoneId: 'M001', sliceId: 'S01', uatPath: '.gsd/milestones/M001/slices/S01/S01-UAT.md', - uatResultPath: '.gsd/milestones/M001/slices/S01/S01-UAT-RESULT.md', + uatResultPath: '.gsd/milestones/M001/slices/S01/S01-UAT.md', uatType: 'mixed', inlinedContext: '', }); @@ -432,7 +436,7 @@ test('(n) stale replay guard', async () => { ); writeSliceFile(base, 'M001', 'S01', 'UAT', makeUatContent('artifact-driven')); - writeSliceFile(base, 'M001', 'S01', 'UAT-RESULT', '---\nverdict: FAIL\n---\n'); + writeSliceFile(base, 'M001', 'S01', 'UAT', '---\nverdict: FAIL\n---\n'); const state = { activeMilestone: { id: 'M001', title: 'Test roadmap' }, @@ -449,7 +453,7 @@ test('(n) stale replay guard', async () => { assert.deepStrictEqual( result, null, - 'existing UAT-RESULT with FAIL verdict does not re-dispatch; verdict gate owns blocking', + 'existing UAT with FAIL verdict does not re-dispatch; verdict gate owns blocking', ); } finally { cleanup(base);