Merge pull request #4176 from jeremymcs/worktree-fix-4094-validate-milestone-loop
fix(auto): pause validate-milestone loop when needs-remediation has no slices (#4094)
This commit is contained in:
commit
cdcdb1459e
2 changed files with 277 additions and 3 deletions
|
|
@ -12,10 +12,15 @@
|
|||
|
||||
import type { ExtensionContext, ExtensionAPI } from "@gsd/pi-coding-agent";
|
||||
import { mkdirSync, writeFileSync } from "node:fs";
|
||||
import { resolveSliceFile, resolveSlicePath } from "./paths.js";
|
||||
import { resolveSliceFile, resolveSlicePath, resolveMilestoneFile } from "./paths.js";
|
||||
import { parseUnitId } from "./unit-id.js";
|
||||
import { isDbAvailable, getTask, getSliceTasks, type TaskRow } from "./gsd-db.js";
|
||||
import { isDbAvailable, getTask, getSliceTasks, getMilestoneSlices, type TaskRow } from "./gsd-db.js";
|
||||
import { loadEffectiveGSDPreferences } from "./preferences.js";
|
||||
import { extractVerdict } from "./verdict-parser.js";
|
||||
import { isClosedStatus } from "./status-guards.js";
|
||||
import { loadFile } from "./files.js";
|
||||
import { parseRoadmap } from "./parsers-legacy.js";
|
||||
import { isMilestoneComplete } from "./state.js";
|
||||
import {
|
||||
runVerificationGate,
|
||||
formatFailureContext,
|
||||
|
|
@ -43,6 +48,88 @@ function isInfraVerificationFailure(stderr: string): boolean {
|
|||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Post-unit guard for `validate-milestone` units (#4094).
|
||||
*
|
||||
* When validate-milestone writes verdict=needs-remediation, the agent is
|
||||
* expected to also call gsd_reassess_roadmap in the same turn to add
|
||||
* remediation slices. If they don't, the state machine re-derives
|
||||
* `phase: validating-milestone` indefinitely (all slices still complete +
|
||||
* verdict still needs-remediation), wasting ~3 dispatches before the stuck
|
||||
* detector fires.
|
||||
*
|
||||
* This guard fires immediately on the first occurrence: if VALIDATION.md
|
||||
* verdict is needs-remediation and no incomplete slices exist for the
|
||||
* milestone, pause the auto-loop with a clear blocker.
|
||||
*/
|
||||
async function runValidateMilestonePostCheck(
|
||||
vctx: VerificationContext,
|
||||
pauseAuto: (ctx?: ExtensionContext, pi?: ExtensionAPI) => Promise<void>,
|
||||
): Promise<VerificationResult> {
|
||||
const { s, ctx, pi } = vctx;
|
||||
if (!s.currentUnit) return "continue";
|
||||
|
||||
const { milestone: mid } = parseUnitId(s.currentUnit.id);
|
||||
if (!mid) return "continue";
|
||||
|
||||
const validationFile = resolveMilestoneFile(s.basePath, mid, "VALIDATION");
|
||||
if (!validationFile) return "continue";
|
||||
|
||||
const validationContent = await loadFile(validationFile);
|
||||
if (!validationContent) return "continue";
|
||||
|
||||
const verdict = extractVerdict(validationContent);
|
||||
if (verdict !== "needs-remediation") return "continue";
|
||||
|
||||
const incompleteSliceCount = await countIncompleteSlices(s.basePath, mid);
|
||||
|
||||
// If any non-closed slices exist, the agent successfully queued remediation
|
||||
// work — proceed normally. The state machine will execute those slices and
|
||||
// re-validate per the #3596/#3670 fix.
|
||||
if (incompleteSliceCount > 0) return "continue";
|
||||
|
||||
ctx.ui.notify(
|
||||
`Milestone ${mid} validation returned verdict=needs-remediation but no remediation slices were added. Pausing for human review.`,
|
||||
"error",
|
||||
);
|
||||
process.stderr.write(
|
||||
`validate-milestone: pausing — verdict=needs-remediation with no incomplete slices for ${mid}. ` +
|
||||
`The agent must call gsd_reassess_roadmap to add remediation slices before re-validation.\n`,
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
return "pause";
|
||||
}
|
||||
|
||||
/**
|
||||
* Count slices for a milestone that are not in a closed status.
|
||||
* DB-backed projects are authoritative (#4094 peer review); falls back to
|
||||
* roadmap parsing only when the DB is unavailable.
|
||||
*/
|
||||
async function countIncompleteSlices(basePath: string, milestoneId: string): Promise<number> {
|
||||
if (isDbAvailable()) {
|
||||
const slices = getMilestoneSlices(milestoneId);
|
||||
if (slices.length === 0) {
|
||||
// No DB rows — treat as "unknown", do not pause.
|
||||
return 1;
|
||||
}
|
||||
return slices.filter((slice) => !isClosedStatus(slice.status)).length;
|
||||
}
|
||||
|
||||
// Filesystem fallback: parse the roadmap markdown.
|
||||
try {
|
||||
const roadmapFile = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
|
||||
if (!roadmapFile) return 1;
|
||||
const roadmapContent = await loadFile(roadmapFile);
|
||||
if (!roadmapContent) return 1;
|
||||
const roadmap = parseRoadmap(roadmapContent);
|
||||
if (roadmap.slices.length === 0) return 1;
|
||||
return isMilestoneComplete(roadmap) ? 0 : 1;
|
||||
} catch {
|
||||
// Parsing failures should not cause false-positive pauses.
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the verification gate for the current execute-task unit.
|
||||
* Returns:
|
||||
|
|
@ -56,7 +143,15 @@ export async function runPostUnitVerification(
|
|||
): Promise<VerificationResult> {
|
||||
const { s, ctx, pi } = vctx;
|
||||
|
||||
if (!s.currentUnit || s.currentUnit.type !== "execute-task") {
|
||||
if (!s.currentUnit) {
|
||||
return "continue";
|
||||
}
|
||||
|
||||
if (s.currentUnit.type === "validate-milestone") {
|
||||
return await runValidateMilestonePostCheck(vctx, pauseAuto);
|
||||
}
|
||||
|
||||
if (s.currentUnit.type !== "execute-task") {
|
||||
return "continue";
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,179 @@
|
|||
// gsd-pi — Regression tests for the validate-milestone stuck-loop guard (#4094)
|
||||
|
||||
import { describe, test, mock, beforeEach, afterEach } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { tmpdir } from "node:os";
|
||||
import { mkdirSync, writeFileSync, rmSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
|
||||
import { runPostUnitVerification, type VerificationContext } from "../auto-verification.ts";
|
||||
import { AutoSession } from "../auto/session.ts";
|
||||
import {
|
||||
openDatabase,
|
||||
closeDatabase,
|
||||
insertMilestone,
|
||||
insertSlice,
|
||||
} from "../gsd-db.ts";
|
||||
import { invalidateAllCaches } from "../cache.ts";
|
||||
import { _clearGsdRootCache } from "../paths.ts";
|
||||
|
||||
let tempDir: string;
|
||||
let dbPath: string;
|
||||
let originalCwd: string;
|
||||
|
||||
function makeMockCtx() {
|
||||
return {
|
||||
ui: {
|
||||
notify: mock.fn(),
|
||||
setStatus: () => {},
|
||||
setWidget: () => {},
|
||||
setFooter: () => {},
|
||||
},
|
||||
model: { id: "test-model" },
|
||||
} as any;
|
||||
}
|
||||
|
||||
function makeMockPi() {
|
||||
return {
|
||||
sendMessage: mock.fn(),
|
||||
setModel: mock.fn(async () => true),
|
||||
} as any;
|
||||
}
|
||||
|
||||
function makeMockSession(basePath: string, unitType: string, unitId: string): AutoSession {
|
||||
const s = new AutoSession();
|
||||
s.basePath = basePath;
|
||||
s.active = true;
|
||||
s.pendingVerificationRetry = null;
|
||||
s.currentUnit = { type: unitType, id: unitId, startedAt: Date.now() };
|
||||
return s;
|
||||
}
|
||||
|
||||
function setupTestEnvironment(): void {
|
||||
originalCwd = process.cwd();
|
||||
tempDir = join(tmpdir(), `validate-milestone-guard-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
||||
mkdirSync(tempDir, { recursive: true });
|
||||
|
||||
const milestoneDir = join(tempDir, ".gsd", "milestones", "M001");
|
||||
mkdirSync(milestoneDir, { recursive: true });
|
||||
|
||||
process.chdir(tempDir);
|
||||
_clearGsdRootCache();
|
||||
|
||||
dbPath = join(tempDir, ".gsd", "gsd.db");
|
||||
openDatabase(dbPath);
|
||||
invalidateAllCaches();
|
||||
}
|
||||
|
||||
function cleanupTestEnvironment(): void {
|
||||
try { process.chdir(originalCwd); } catch { /* ignore */ }
|
||||
try { closeDatabase(); } catch { /* ignore */ }
|
||||
try { rmSync(tempDir, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||
}
|
||||
|
||||
function writeValidationFile(verdict: string): void {
|
||||
const path = join(tempDir, ".gsd", "milestones", "M001", "M001-VALIDATION.md");
|
||||
const content = `---
|
||||
verdict: ${verdict}
|
||||
remediation_round: 1
|
||||
---
|
||||
|
||||
# Milestone Validation: M001
|
||||
|
||||
## Verdict Rationale
|
||||
Test fixture
|
||||
`;
|
||||
writeFileSync(path, content, "utf-8");
|
||||
invalidateAllCaches();
|
||||
}
|
||||
|
||||
describe("validate-milestone stuck-loop guard (#4094)", () => {
|
||||
beforeEach(() => setupTestEnvironment());
|
||||
afterEach(() => cleanupTestEnvironment());
|
||||
|
||||
test("pauses when verdict=needs-remediation and all slices are closed", async () => {
|
||||
insertMilestone({ id: "M001" });
|
||||
insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" });
|
||||
insertSlice({ id: "S02", milestoneId: "M001", title: "Slice 2", status: "done" });
|
||||
writeValidationFile("needs-remediation");
|
||||
|
||||
const ctx = makeMockCtx();
|
||||
const pi = makeMockPi();
|
||||
const pauseAutoMock = mock.fn(async () => {});
|
||||
const s = makeMockSession(tempDir, "validate-milestone", "M001");
|
||||
|
||||
const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock);
|
||||
|
||||
assert.equal(result, "pause");
|
||||
assert.equal(pauseAutoMock.mock.callCount(), 1);
|
||||
assert.equal(ctx.ui.notify.mock.callCount(), 1);
|
||||
const notifyArgs = ctx.ui.notify.mock.calls[0].arguments;
|
||||
assert.match(notifyArgs[0], /needs-remediation/);
|
||||
assert.equal(notifyArgs[1], "error");
|
||||
});
|
||||
|
||||
test("treats skipped slices as closed", async () => {
|
||||
insertMilestone({ id: "M001" });
|
||||
insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" });
|
||||
insertSlice({ id: "S02", milestoneId: "M001", title: "Slice 2", status: "skipped" });
|
||||
writeValidationFile("needs-remediation");
|
||||
|
||||
const ctx = makeMockCtx();
|
||||
const pi = makeMockPi();
|
||||
const pauseAutoMock = mock.fn(async () => {});
|
||||
const s = makeMockSession(tempDir, "validate-milestone", "M001");
|
||||
|
||||
const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock);
|
||||
|
||||
assert.equal(result, "pause");
|
||||
assert.equal(pauseAutoMock.mock.callCount(), 1);
|
||||
});
|
||||
|
||||
test("continues when verdict=needs-remediation but a queued remediation slice exists", async () => {
|
||||
insertMilestone({ id: "M001" });
|
||||
insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" });
|
||||
insertSlice({ id: "S02", milestoneId: "M001", title: "Remediation", status: "queued" });
|
||||
writeValidationFile("needs-remediation");
|
||||
|
||||
const ctx = makeMockCtx();
|
||||
const pi = makeMockPi();
|
||||
const pauseAutoMock = mock.fn(async () => {});
|
||||
const s = makeMockSession(tempDir, "validate-milestone", "M001");
|
||||
|
||||
const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock);
|
||||
|
||||
assert.equal(result, "continue");
|
||||
assert.equal(pauseAutoMock.mock.callCount(), 0);
|
||||
});
|
||||
|
||||
test("continues when verdict is pass", async () => {
|
||||
insertMilestone({ id: "M001" });
|
||||
insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" });
|
||||
writeValidationFile("pass");
|
||||
|
||||
const ctx = makeMockCtx();
|
||||
const pi = makeMockPi();
|
||||
const pauseAutoMock = mock.fn(async () => {});
|
||||
const s = makeMockSession(tempDir, "validate-milestone", "M001");
|
||||
|
||||
const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock);
|
||||
|
||||
assert.equal(result, "continue");
|
||||
assert.equal(pauseAutoMock.mock.callCount(), 0);
|
||||
});
|
||||
|
||||
test("continues when no VALIDATION file exists yet", async () => {
|
||||
insertMilestone({ id: "M001" });
|
||||
insertSlice({ id: "S01", milestoneId: "M001", title: "Slice 1", status: "complete" });
|
||||
|
||||
const ctx = makeMockCtx();
|
||||
const pi = makeMockPi();
|
||||
const pauseAutoMock = mock.fn(async () => {});
|
||||
const s = makeMockSession(tempDir, "validate-milestone", "M001");
|
||||
|
||||
const result = await runPostUnitVerification({ s, ctx, pi } as VerificationContext, pauseAutoMock);
|
||||
|
||||
assert.equal(result, "continue");
|
||||
assert.equal(pauseAutoMock.mock.callCount(), 0);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue