fix(auto): pause on validate-milestone needs-remediation without slices (#4094)

When validate-milestone wrote VALIDATION.md with verdict=needs-remediation
but the agent failed to call gsd_reassess_roadmap to add remediation
slices, state.ts re-derived phase: validating-milestone indefinitely
because the existing #3596/#3670 guard treats needs-remediation as
non-terminal regardless of whether new work was queued. The stuck
detector only fired after 3 consecutive dispatches (~$3 + ~12 min wasted
per incident). Reproduced on M022 and M024.

Add a post-unit guard in runPostUnitVerification for validate-milestone
units: if VALIDATION.md verdict is needs-remediation and no incomplete
slices exist for the milestone (DB-authoritative via getMilestoneSlices,
filesystem fallback via parseRoadmap), pause auto-mode immediately with
a clear blocker. The legitimate re-validation flow is preserved — when
remediation slices have been queued (any non-closed status), the guard
returns continue and the existing state machine handles the work.

Tests cover: pause on all-closed scenario, skipped-status handled as
closed, continue when a queued remediation slice exists, continue on
verdict=pass, and continue when no VALIDATION file is present.
This commit is contained in:
Jeremy 2026-04-14 06:32:42 -05:00
parent 24f51fd76b
commit 6b21ccbd54
2 changed files with 277 additions and 3 deletions

View file

@ -12,10 +12,15 @@
import type { ExtensionContext, ExtensionAPI } from "@gsd/pi-coding-agent";
import { mkdirSync, writeFileSync } from "node:fs";
import { resolveSliceFile, resolveSlicePath } from "./paths.js";
import { resolveSliceFile, resolveSlicePath, resolveMilestoneFile } from "./paths.js";
import { parseUnitId } from "./unit-id.js";
import { isDbAvailable, getTask, getSliceTasks, type TaskRow } from "./gsd-db.js";
import { isDbAvailable, getTask, getSliceTasks, getMilestoneSlices, type TaskRow } from "./gsd-db.js";
import { loadEffectiveGSDPreferences } from "./preferences.js";
import { extractVerdict } from "./verdict-parser.js";
import { isClosedStatus } from "./status-guards.js";
import { loadFile } from "./files.js";
import { parseRoadmap } from "./parsers-legacy.js";
import { isMilestoneComplete } from "./state.js";
import {
runVerificationGate,
formatFailureContext,
@ -43,6 +48,88 @@ function isInfraVerificationFailure(stderr: string): boolean {
);
}
/**
* Post-unit guard for `validate-milestone` units (#4094).
*
* When validate-milestone writes verdict=needs-remediation, the agent is
* expected to also call gsd_reassess_roadmap in the same turn to add
* remediation slices. If they don't, the state machine re-derives
* `phase: validating-milestone` indefinitely (all slices still complete +
* verdict still needs-remediation), wasting ~3 dispatches before the stuck
* detector fires.
*
* This guard fires immediately on the first occurrence: if VALIDATION.md
* verdict is needs-remediation and no incomplete slices exist for the
* milestone, pause the auto-loop with a clear blocker.
*/
async function runValidateMilestonePostCheck(
vctx: VerificationContext,
pauseAuto: (ctx?: ExtensionContext, pi?: ExtensionAPI) => Promise<void>,
): Promise<VerificationResult> {
const { s, ctx, pi } = vctx;
if (!s.currentUnit) return "continue";
const { milestone: mid } = parseUnitId(s.currentUnit.id);
if (!mid) return "continue";
const validationFile = resolveMilestoneFile(s.basePath, mid, "VALIDATION");
if (!validationFile) return "continue";
const validationContent = await loadFile(validationFile);
if (!validationContent) return "continue";
const verdict = extractVerdict(validationContent);
if (verdict !== "needs-remediation") return "continue";
const incompleteSliceCount = await countIncompleteSlices(s.basePath, mid);
// If any non-closed slices exist, the agent successfully queued remediation
// work — proceed normally. The state machine will execute those slices and
// re-validate per the #3596/#3670 fix.
if (incompleteSliceCount > 0) return "continue";
ctx.ui.notify(
`Milestone ${mid} validation returned verdict=needs-remediation but no remediation slices were added. Pausing for human review.`,
"error",
);
process.stderr.write(
`validate-milestone: pausing — verdict=needs-remediation with no incomplete slices for ${mid}. ` +
`The agent must call gsd_reassess_roadmap to add remediation slices before re-validation.\n`,
);
await pauseAuto(ctx, pi);
return "pause";
}
/**
* Count slices for a milestone that are not in a closed status.
* DB-backed projects are authoritative (#4094 peer review); falls back to
* roadmap parsing only when the DB is unavailable.
*/
async function countIncompleteSlices(basePath: string, milestoneId: string): Promise<number> {
if (isDbAvailable()) {
const slices = getMilestoneSlices(milestoneId);
if (slices.length === 0) {
// No DB rows — treat as "unknown", do not pause.
return 1;
}
return slices.filter((slice) => !isClosedStatus(slice.status)).length;
}
// Filesystem fallback: parse the roadmap markdown.
try {
const roadmapFile = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
if (!roadmapFile) return 1;
const roadmapContent = await loadFile(roadmapFile);
if (!roadmapContent) return 1;
const roadmap = parseRoadmap(roadmapContent);
if (roadmap.slices.length === 0) return 1;
return isMilestoneComplete(roadmap) ? 0 : 1;
} catch {
// Parsing failures should not cause false-positive pauses.
return 1;
}
}
/**
* Run the verification gate for the current execute-task unit.
* Returns:
@ -56,7 +143,15 @@ export async function runPostUnitVerification(
): Promise<VerificationResult> {
const { s, ctx, pi } = vctx;
if (!s.currentUnit || s.currentUnit.type !== "execute-task") {
if (!s.currentUnit) {
return "continue";
}
if (s.currentUnit.type === "validate-milestone") {
return await runValidateMilestonePostCheck(vctx, pauseAuto);
}
if (s.currentUnit.type !== "execute-task") {
return "continue";
}

View file

@ -0,0 +1,179 @@
// gsd-pi — Regression tests for the validate-milestone stuck-loop guard (#4094)
import { describe, test, mock, beforeEach, afterEach } from "node:test";
import assert from "node:assert/strict";
import { tmpdir } from "node:os";
import { mkdirSync, writeFileSync, rmSync } from "node:fs";
import { join } from "node:path";
import { runPostUnitVerification, type VerificationContext } from "../auto-verification.ts";
import { AutoSession } from "../auto/session.ts";
import {
openDatabase,
closeDatabase,
insertMilestone,
insertSlice,
} from "../gsd-db.ts";
import { invalidateAllCaches } from "../cache.ts";
import { _clearGsdRootCache } from "../paths.ts";
let tempDir: string;
let dbPath: string;
let originalCwd: string;
function makeMockCtx() {
return {
ui: {
notify: mock.fn(),
setStatus: () => {},
setWidget: () => {},
setFooter: () => {},
},
model: { id: "test-model" },
} as any;
}
function makeMockPi() {
return {
sendMessage: mock.fn(),
setModel: mock.fn(async () => true),
} as any;
}
function makeMockSession(basePath: string, unitType: string, unitId: string): AutoSession {
const s = new AutoSession();
s.basePath = basePath;
s.active = true;
s.pendingVerificationRetry = null;
s.currentUnit = { type: unitType, id: unitId, startedAt: Date.now() };
return s;
}
function setupTestEnvironment(): void {
originalCwd = process.cwd();
tempDir = join(tmpdir(), `validate-milestone-guard-${Date.now()}-${Math.random().toString(36).slice(2)}`);
mkdirSync(tempDir, { recursive: true });
const milestoneDir = join(tempDir, ".gsd", "milestones", "M001");
mkdirSync(milestoneDir, { recursive: true });
process.chdir(tempDir);
_clearGsdRootCache();
dbPath = join(tempDir, ".gsd", "gsd.db");
openDatabase(dbPath);
invalidateAllCaches();
}
function cleanupTestEnvironment(): void {
try { process.chdir(originalCwd); } catch { /* ignore */ }
try { closeDatabase(); } catch { /* ignore */ }
try { rmSync(tempDir, { recursive: true, force: true }); } catch { /* ignore */ }
}
function writeValidationFile(verdict: string): void {
const path = join(tempDir, ".gsd", "milestones", "M001", "M001-VALIDATION.md");
const content = `---
verdict: ${verdict}
remediation_round: 1
---
# Milestone Validation: M001
## Verdict Rationale
Test fixture
`;
writeFileSync(path, content, "utf-8");
invalidateAllCaches();
}
describe("validate-milestone stuck-loop guard (#4094)", () => {
beforeEach(() => setupTestEnvironment());
afterEach(() => cleanupTestEnvironment());
test("pauses when verdict=needs-remediation and all slices are closed", async () => {
insertMilestone({ id: "M001" });
insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" });
insertSlice({ id: "S02", milestoneId: "M001", title: "Slice 2", status: "done" });
writeValidationFile("needs-remediation");
const ctx = makeMockCtx();
const pi = makeMockPi();
const pauseAutoMock = mock.fn(async () => {});
const s = makeMockSession(tempDir, "validate-milestone", "M001");
const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock);
assert.equal(result, "pause");
assert.equal(pauseAutoMock.mock.callCount(), 1);
assert.equal(ctx.ui.notify.mock.callCount(), 1);
const notifyArgs = ctx.ui.notify.mock.calls[0].arguments;
assert.match(notifyArgs[0], /needs-remediation/);
assert.equal(notifyArgs[1], "error");
});
test("treats skipped slices as closed", async () => {
insertMilestone({ id: "M001" });
insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" });
insertSlice({ id: "S02", milestoneId: "M001", title: "Slice 2", status: "skipped" });
writeValidationFile("needs-remediation");
const ctx = makeMockCtx();
const pi = makeMockPi();
const pauseAutoMock = mock.fn(async () => {});
const s = makeMockSession(tempDir, "validate-milestone", "M001");
const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock);
assert.equal(result, "pause");
assert.equal(pauseAutoMock.mock.callCount(), 1);
});
test("continues when verdict=needs-remediation but a queued remediation slice exists", async () => {
insertMilestone({ id: "M001" });
insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" });
insertSlice({ id: "S02", milestoneId: "M001", title: "Remediation", status: "queued" });
writeValidationFile("needs-remediation");
const ctx = makeMockCtx();
const pi = makeMockPi();
const pauseAutoMock = mock.fn(async () => {});
const s = makeMockSession(tempDir, "validate-milestone", "M001");
const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock);
assert.equal(result, "continue");
assert.equal(pauseAutoMock.mock.callCount(), 0);
});
test("continues when verdict is pass", async () => {
insertMilestone({ id: "M001" });
insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" });
writeValidationFile("pass");
const ctx = makeMockCtx();
const pi = makeMockPi();
const pauseAutoMock = mock.fn(async () => {});
const s = makeMockSession(tempDir, "validate-milestone", "M001");
const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock);
assert.equal(result, "continue");
assert.equal(pauseAutoMock.mock.callCount(), 0);
});
test("continues when no VALIDATION file exists yet", async () => {
insertMilestone({ id: "M001" });
insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" });
const ctx = makeMockCtx();
const pi = makeMockPi();
const pauseAutoMock = mock.fn(async () => {});
const s = makeMockSession(tempDir, "validate-milestone", "M001");
const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock);
assert.equal(result, "continue");
assert.equal(pauseAutoMock.mock.callCount(), 0);
});
});