From 6b21ccbd543577f67a67f571e07688821063a1fa Mon Sep 17 00:00:00 2001 From: Jeremy Date: Tue, 14 Apr 2026 06:32:42 -0500 Subject: [PATCH] fix(auto): pause on validate-milestone needs-remediation without slices (#4094) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When validate-milestone wrote VALIDATION.md with verdict=needs-remediation but the agent failed to call gsd_reassess_roadmap to add remediation slices, state.ts re-derived phase: validating-milestone indefinitely because the existing #3596/#3670 guard treats needs-remediation as non-terminal regardless of whether new work was queued. The stuck detector only fired after 3 consecutive dispatches (~$3 + ~12 min wasted per incident). Reproduced on M022 and M024. Add a post-unit guard in runPostUnitVerification for validate-milestone units: if VALIDATION.md verdict is needs-remediation and no incomplete slices exist for the milestone (DB-authoritative via getMilestoneSlices, filesystem fallback via parseRoadmap), pause auto-mode immediately with a clear blocker. The legitimate re-validation flow is preserved — when remediation slices have been queued (any non-closed status), the guard returns continue and the existing state machine handles the work. Tests cover: pause on all-closed scenario, skipped-status handled as closed, continue when a queued remediation slice exists, continue on verdict=pass, and continue when no VALIDATION file is present. --- .../extensions/gsd/auto-verification.ts | 101 +++++++++- .../validate-milestone-stuck-guard.test.ts | 179 ++++++++++++++++++ 2 files changed, 277 insertions(+), 3 deletions(-) create mode 100644 src/resources/extensions/gsd/tests/validate-milestone-stuck-guard.test.ts diff --git a/src/resources/extensions/gsd/auto-verification.ts b/src/resources/extensions/gsd/auto-verification.ts index 73595df46..943edf4c8 100644 --- a/src/resources/extensions/gsd/auto-verification.ts +++ b/src/resources/extensions/gsd/auto-verification.ts @@ -12,10 +12,15 @@ import type { ExtensionContext, ExtensionAPI } from "@gsd/pi-coding-agent"; import { mkdirSync, writeFileSync } from "node:fs"; -import { resolveSliceFile, resolveSlicePath } from "./paths.js"; +import { resolveSliceFile, resolveSlicePath, resolveMilestoneFile } from "./paths.js"; import { parseUnitId } from "./unit-id.js"; -import { isDbAvailable, getTask, getSliceTasks, type TaskRow } from "./gsd-db.js"; +import { isDbAvailable, getTask, getSliceTasks, getMilestoneSlices, type TaskRow } from "./gsd-db.js"; import { loadEffectiveGSDPreferences } from "./preferences.js"; +import { extractVerdict } from "./verdict-parser.js"; +import { isClosedStatus } from "./status-guards.js"; +import { loadFile } from "./files.js"; +import { parseRoadmap } from "./parsers-legacy.js"; +import { isMilestoneComplete } from "./state.js"; import { runVerificationGate, formatFailureContext, @@ -43,6 +48,88 @@ function isInfraVerificationFailure(stderr: string): boolean { ); } +/** + * Post-unit guard for `validate-milestone` units (#4094). + * + * When validate-milestone writes verdict=needs-remediation, the agent is + * expected to also call gsd_reassess_roadmap in the same turn to add + * remediation slices. If they don't, the state machine re-derives + * `phase: validating-milestone` indefinitely (all slices still complete + + * verdict still needs-remediation), wasting ~3 dispatches before the stuck + * detector fires. + * + * This guard fires immediately on the first occurrence: if VALIDATION.md + * verdict is needs-remediation and no incomplete slices exist for the + * milestone, pause the auto-loop with a clear blocker. + */ +async function runValidateMilestonePostCheck( + vctx: VerificationContext, + pauseAuto: (ctx?: ExtensionContext, pi?: ExtensionAPI) => Promise, +): Promise { + const { s, ctx, pi } = vctx; + if (!s.currentUnit) return "continue"; + + const { milestone: mid } = parseUnitId(s.currentUnit.id); + if (!mid) return "continue"; + + const validationFile = resolveMilestoneFile(s.basePath, mid, "VALIDATION"); + if (!validationFile) return "continue"; + + const validationContent = await loadFile(validationFile); + if (!validationContent) return "continue"; + + const verdict = extractVerdict(validationContent); + if (verdict !== "needs-remediation") return "continue"; + + const incompleteSliceCount = await countIncompleteSlices(s.basePath, mid); + + // If any non-closed slices exist, the agent successfully queued remediation + // work — proceed normally. The state machine will execute those slices and + // re-validate per the #3596/#3670 fix. + if (incompleteSliceCount > 0) return "continue"; + + ctx.ui.notify( + `Milestone ${mid} validation returned verdict=needs-remediation but no remediation slices were added. Pausing for human review.`, + "error", + ); + process.stderr.write( + `validate-milestone: pausing — verdict=needs-remediation with no incomplete slices for ${mid}. ` + + `The agent must call gsd_reassess_roadmap to add remediation slices before re-validation.\n`, + ); + await pauseAuto(ctx, pi); + return "pause"; +} + +/** + * Count slices for a milestone that are not in a closed status. + * DB-backed projects are authoritative (#4094 peer review); falls back to + * roadmap parsing only when the DB is unavailable. + */ +async function countIncompleteSlices(basePath: string, milestoneId: string): Promise { + if (isDbAvailable()) { + const slices = getMilestoneSlices(milestoneId); + if (slices.length === 0) { + // No DB rows — treat as "unknown", do not pause. + return 1; + } + return slices.filter((slice) => !isClosedStatus(slice.status)).length; + } + + // Filesystem fallback: parse the roadmap markdown. + try { + const roadmapFile = resolveMilestoneFile(basePath, milestoneId, "ROADMAP"); + if (!roadmapFile) return 1; + const roadmapContent = await loadFile(roadmapFile); + if (!roadmapContent) return 1; + const roadmap = parseRoadmap(roadmapContent); + if (roadmap.slices.length === 0) return 1; + return isMilestoneComplete(roadmap) ? 0 : 1; + } catch { + // Parsing failures should not cause false-positive pauses. + return 1; + } +} + /** * Run the verification gate for the current execute-task unit. * Returns: @@ -56,7 +143,15 @@ export async function runPostUnitVerification( ): Promise { const { s, ctx, pi } = vctx; - if (!s.currentUnit || s.currentUnit.type !== "execute-task") { + if (!s.currentUnit) { + return "continue"; + } + + if (s.currentUnit.type === "validate-milestone") { + return await runValidateMilestonePostCheck(vctx, pauseAuto); + } + + if (s.currentUnit.type !== "execute-task") { return "continue"; } diff --git a/src/resources/extensions/gsd/tests/validate-milestone-stuck-guard.test.ts b/src/resources/extensions/gsd/tests/validate-milestone-stuck-guard.test.ts new file mode 100644 index 000000000..0c907df54 --- /dev/null +++ b/src/resources/extensions/gsd/tests/validate-milestone-stuck-guard.test.ts @@ -0,0 +1,179 @@ +// gsd-pi — Regression tests for the validate-milestone stuck-loop guard (#4094) + +import { describe, test, mock, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; +import { tmpdir } from "node:os"; +import { mkdirSync, writeFileSync, rmSync } from "node:fs"; +import { join } from "node:path"; + +import { runPostUnitVerification, type VerificationContext } from "../auto-verification.ts"; +import { AutoSession } from "../auto/session.ts"; +import { + openDatabase, + closeDatabase, + insertMilestone, + insertSlice, +} from "../gsd-db.ts"; +import { invalidateAllCaches } from "../cache.ts"; +import { _clearGsdRootCache } from "../paths.ts"; + +let tempDir: string; +let dbPath: string; +let originalCwd: string; + +function makeMockCtx() { + return { + ui: { + notify: mock.fn(), + setStatus: () => {}, + setWidget: () => {}, + setFooter: () => {}, + }, + model: { id: "test-model" }, + } as any; +} + +function makeMockPi() { + return { + sendMessage: mock.fn(), + setModel: mock.fn(async () => true), + } as any; +} + +function makeMockSession(basePath: string, unitType: string, unitId: string): AutoSession { + const s = new AutoSession(); + s.basePath = basePath; + s.active = true; + s.pendingVerificationRetry = null; + s.currentUnit = { type: unitType, id: unitId, startedAt: Date.now() }; + return s; +} + +function setupTestEnvironment(): void { + originalCwd = process.cwd(); + tempDir = join(tmpdir(), `validate-milestone-guard-${Date.now()}-${Math.random().toString(36).slice(2)}`); + mkdirSync(tempDir, { recursive: true }); + + const milestoneDir = join(tempDir, ".gsd", "milestones", "M001"); + mkdirSync(milestoneDir, { recursive: true }); + + process.chdir(tempDir); + _clearGsdRootCache(); + + dbPath = join(tempDir, ".gsd", "gsd.db"); + openDatabase(dbPath); + invalidateAllCaches(); +} + +function cleanupTestEnvironment(): void { + try { process.chdir(originalCwd); } catch { /* ignore */ } + try { closeDatabase(); } catch { /* ignore */ } + try { rmSync(tempDir, { recursive: true, force: true }); } catch { /* ignore */ } +} + +function writeValidationFile(verdict: string): void { + const path = join(tempDir, ".gsd", "milestones", "M001", "M001-VALIDATION.md"); + const content = `--- +verdict: ${verdict} +remediation_round: 1 +--- + +# Milestone Validation: M001 + +## Verdict Rationale +Test fixture +`; + writeFileSync(path, content, "utf-8"); + invalidateAllCaches(); +} + +describe("validate-milestone stuck-loop guard (#4094)", () => { + beforeEach(() => setupTestEnvironment()); + afterEach(() => cleanupTestEnvironment()); + + test("pauses when verdict=needs-remediation and all slices are closed", async () => { + insertMilestone({ id: "M001" }); + insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" }); + insertSlice({ id: "S02", milestoneId: "M001", title: "Slice 2", status: "done" }); + writeValidationFile("needs-remediation"); + + const ctx = makeMockCtx(); + const pi = makeMockPi(); + const pauseAutoMock = mock.fn(async () => {}); + const s = makeMockSession(tempDir, "validate-milestone", "M001"); + + const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock); + + assert.equal(result, "pause"); + assert.equal(pauseAutoMock.mock.callCount(), 1); + assert.equal(ctx.ui.notify.mock.callCount(), 1); + const notifyArgs = ctx.ui.notify.mock.calls[0].arguments; + assert.match(notifyArgs[0], /needs-remediation/); + assert.equal(notifyArgs[1], "error"); + }); + + test("treats skipped slices as closed", async () => { + insertMilestone({ id: "M001" }); + insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" }); + insertSlice({ id: "S02", milestoneId: "M001", title: "Slice 2", status: "skipped" }); + writeValidationFile("needs-remediation"); + + const ctx = makeMockCtx(); + const pi = makeMockPi(); + const pauseAutoMock = mock.fn(async () => {}); + const s = makeMockSession(tempDir, "validate-milestone", "M001"); + + const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock); + + assert.equal(result, "pause"); + assert.equal(pauseAutoMock.mock.callCount(), 1); + }); + + test("continues when verdict=needs-remediation but a queued remediation slice exists", async () => { + insertMilestone({ id: "M001" }); + insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" }); + insertSlice({ id: "S02", milestoneId: "M001", title: "Remediation", status: "queued" }); + writeValidationFile("needs-remediation"); + + const ctx = makeMockCtx(); + const pi = makeMockPi(); + const pauseAutoMock = mock.fn(async () => {}); + const s = makeMockSession(tempDir, "validate-milestone", "M001"); + + const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock); + + assert.equal(result, "continue"); + assert.equal(pauseAutoMock.mock.callCount(), 0); + }); + + test("continues when verdict is pass", async () => { + insertMilestone({ id: "M001" }); + insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" }); + writeValidationFile("pass"); + + const ctx = makeMockCtx(); + const pi = makeMockPi(); + const pauseAutoMock = mock.fn(async () => {}); + const s = makeMockSession(tempDir, "validate-milestone", "M001"); + + const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock); + + assert.equal(result, "continue"); + assert.equal(pauseAutoMock.mock.callCount(), 0); + }); + + test("continues when no VALIDATION file exists yet", async () => { + insertMilestone({ id: "M001" }); + insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" }); + + const ctx = makeMockCtx(); + const pi = makeMockPi(); + const pauseAutoMock = mock.fn(async () => {}); + const s = makeMockSession(tempDir, "validate-milestone", "M001"); + + const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock); + + assert.equal(result, "continue"); + assert.equal(pauseAutoMock.mock.callCount(), 0); + }); +});