feat(gsd-uok): unify gate plane across pre/post validation checks

This commit is contained in:
Jeremy McSpadden 2026-04-14 20:41:03 -05:00
parent 76a85300ae
commit 00521b1418
7 changed files with 449 additions and 8 deletions

View file

@ -66,6 +66,8 @@ import { getSliceTasks } from "./gsd-db.js";
import { runPreExecutionChecks, type PreExecutionResult } from "./pre-execution-checks.js";
import { writePreExecutionEvidence } from "./verification-evidence.js";
import { ensureCodebaseMapFresh } from "./codebase-generator.js";
import { resolveUokFlags } from "./uok/flags.js";
import { UokGateRunner } from "./uok/gate-runner.js";
/** Maximum verification retry attempts before escalating to blocker placeholder (#2653). */
const MAX_VERIFICATION_RETRIES = 3;
@ -871,9 +873,10 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
) {
let preExecPauseNeeded = false;
await runSafely("postUnitPostVerification", "pre-execution-checks", async () => {
const prefs = loadEffectiveGSDPreferences()?.preferences;
const uokFlags = resolveUokFlags(prefs);
try {
// Check preferences — respect enhanced_verification and enhanced_verification_pre
const prefs = loadEffectiveGSDPreferences()?.preferences;
const enhancedEnabled = prefs?.enhanced_verification !== false; // default true
const preEnabled = prefs?.enhanced_verification_pre !== false; // default true
@ -908,6 +911,8 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
return;
}
const strictMode = prefs?.enhanced_verification_strict === true;
// Run pre-execution checks
const result: PreExecutionResult = await runPreExecutionChecks(tasks, s.basePath);
@ -931,6 +936,36 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
writePreExecutionEvidence(result, slicePath, mid, sid);
}
if (uokFlags.gates) {
const failedChecks = result.checks
.filter((check) => !check.passed)
.map((check) => `[${check.category}] ${check.target}: ${check.message}`);
const warnEscalated = result.status === "warn" && strictMode;
const blockingFailure = result.status === "fail" || warnEscalated;
const gateRunner = new UokGateRunner();
gateRunner.register({
id: "pre-execution-checks",
type: "input",
execute: async () => ({
outcome: blockingFailure ? "fail" : "pass",
failureClass: result.status === "fail" ? "input" : warnEscalated ? "policy" : "none",
rationale: blockingFailure
? `pre-execution checks ${result.status}${warnEscalated ? " (strict)" : ""}`
: "pre-execution checks passed",
findings: failedChecks.join("\n"),
}),
});
await gateRunner.run("pre-execution-checks", {
basePath: s.basePath,
traceId: `pre-execution:${s.currentUnit.id}`,
turnId: s.currentUnit.id,
milestoneId: mid,
sliceId: sid,
unitType: s.currentUnit.type,
unitId: s.currentUnit.id,
});
}
// Notify UI
if (result.status === "fail") {
const blockingCount = result.checks.filter(c => !c.passed && c.blocking).length;
@ -969,6 +1004,29 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
`Pre-execution checks error: ${errorMessage} — pausing for human review`,
"error",
);
if (uokFlags.gates && s.currentUnit) {
const { milestone: mid, slice: sid } = parseUnitId(s.currentUnit.id);
const gateRunner = new UokGateRunner();
gateRunner.register({
id: "pre-execution-checks",
type: "input",
execute: async () => ({
outcome: "manual-attention",
failureClass: "manual-attention",
rationale: "pre-execution checks threw before completion",
findings: errorMessage,
}),
});
await gateRunner.run("pre-execution-checks", {
basePath: s.basePath,
traceId: `pre-execution:${s.currentUnit.id}`,
turnId: s.currentUnit.id,
milestoneId: mid ?? undefined,
sliceId: sid ?? undefined,
unitType: s.currentUnit.type,
unitId: s.currentUnit.id,
});
}
preExecPauseNeeded = true;
}
});

View file

@ -69,6 +69,37 @@ async function runValidateMilestonePostCheck(
pauseAuto: (ctx?: ExtensionContext, pi?: ExtensionAPI) => Promise<void>,
): Promise<VerificationResult> {
const { s, ctx, pi } = vctx;
const prefs = loadEffectiveGSDPreferences()?.preferences;
const uokFlags = resolveUokFlags(prefs);
const persistMilestoneValidationGate = async (
outcome: "pass" | "fail" | "retry" | "manual-attention",
failureClass: "none" | "verification" | "manual-attention",
rationale: string,
findings = "",
milestoneId?: string,
): Promise<void> => {
if (!uokFlags.gates || !s.currentUnit) return;
const gateRunner = new UokGateRunner();
gateRunner.register({
id: "milestone-validation-post-check",
type: "verification",
execute: async () => ({
outcome,
failureClass,
rationale,
findings,
}),
});
await gateRunner.run("milestone-validation-post-check", {
basePath: s.basePath,
traceId: `validation-post-check:${s.currentUnit.id}`,
turnId: s.currentUnit.id,
milestoneId,
unitType: s.currentUnit.type,
unitId: s.currentUnit.id,
});
};
if (!s.currentUnit) return "continue";
const { milestone: mid } = parseUnitId(s.currentUnit.id);
@ -81,14 +112,32 @@ async function runValidateMilestonePostCheck(
if (!validationContent) return "continue";
const verdict = extractVerdict(validationContent);
if (verdict !== "needs-remediation") return "continue";
if (verdict !== "needs-remediation") {
await persistMilestoneValidationGate(
"pass",
"none",
`milestone validation verdict is ${verdict}; no remediation loop risk`,
"",
mid,
);
return "continue";
}
const incompleteSliceCount = await countIncompleteSlices(s.basePath, mid);
// If any non-closed slices exist, the agent successfully queued remediation
// work — proceed normally. The state machine will execute those slices and
// re-validate per the #3596/#3670 fix.
if (incompleteSliceCount > 0) return "continue";
if (incompleteSliceCount > 0) {
await persistMilestoneValidationGate(
"pass",
"none",
`remediation slices present (${incompleteSliceCount}); validation can continue`,
"",
mid,
);
return "continue";
}
ctx.ui.notify(
`Milestone ${mid} validation returned verdict=needs-remediation but no remediation slices were added. Pausing for human review.`,
@ -98,6 +147,13 @@ async function runValidateMilestonePostCheck(
`validate-milestone: pausing — verdict=needs-remediation with no incomplete slices for ${mid}. ` +
`The agent must call gsd_reassess_roadmap to add remediation slices before re-validation.\n`,
);
await persistMilestoneValidationGate(
"manual-attention",
"manual-attention",
"needs-remediation verdict without queued remediation slices",
`No incomplete slices found for ${mid} while verdict=needs-remediation`,
mid,
);
await pauseAuto(ctx, pi);
return "pause";
}
@ -372,6 +428,43 @@ export async function runPostUnitVerification(
);
}
if (uokFlags.gates) {
const strictMode = prefs?.enhanced_verification_strict === true;
const warnEscalated = postExecResult.status === "warn" && strictMode;
const blockingFailure = postExecResult.status === "fail" || warnEscalated;
const findings = postExecResult.checks
.filter((check) => !check.passed)
.map((check) => `[${check.category}] ${check.target}: ${check.message}`)
.join("\n");
const gateRunner = new UokGateRunner();
gateRunner.register({
id: "post-execution-checks",
type: "artifact",
execute: async () => ({
outcome: blockingFailure ? "fail" : "pass",
failureClass: postExecResult.status === "fail"
? "artifact"
: warnEscalated
? "policy"
: "none",
rationale: blockingFailure
? `post-execution checks ${postExecResult.status}${warnEscalated ? " (strict)" : ""}`
: "post-execution checks passed",
findings,
}),
});
await gateRunner.run("post-execution-checks", {
basePath: s.basePath,
traceId: `verification:${s.currentUnit.id}`,
turnId: s.currentUnit.id,
milestoneId: mid,
sliceId: sid,
taskId: tid,
unitType: s.currentUnit.type,
unitId: s.currentUnit.id,
});
}
// Check for blocking failures
if (postExecResult.status === "fail") {
postExecBlockingFailure = true;

View file

@ -48,6 +48,8 @@ import { getEligibleSlices } from "../slice-parallel-eligibility.js";
import { startSliceParallel } from "../slice-parallel-orchestrator.js";
import { isDbAvailable, getMilestoneSlices } from "../gsd-db.js";
import { ensurePlanV2Graph } from "../uok/plan-v2.js";
import { resolveUokFlags } from "../uok/flags.js";
import { UokGateRunner } from "../uok/gate-runner.js";
import { resetEvidence } from "../safety/evidence-collector.js";
import { createCheckpoint, cleanupCheckpoint, rollbackToCheckpoint } from "../safety/git-checkpoint.js";
import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
@ -203,14 +205,60 @@ export async function runPreDispatch(
loopState: LoopState,
): Promise<PhaseResult<PreDispatchData>> {
const { ctx, pi, s, deps, prefs } = ic;
const uokFlags = resolveUokFlags(prefs);
const runPreDispatchGate = async (input: {
gateId: string;
gateType: string;
outcome: "pass" | "fail" | "retry" | "manual-attention";
failureClass: "none" | "policy" | "input" | "execution" | "artifact" | "verification" | "closeout" | "git" | "timeout" | "manual-attention" | "unknown";
rationale: string;
findings?: string;
milestoneId?: string;
}): Promise<void> => {
if (!uokFlags.gates) return;
const gateRunner = new UokGateRunner();
gateRunner.register({
id: input.gateId,
type: input.gateType,
execute: async () => ({
outcome: input.outcome,
failureClass: input.failureClass,
rationale: input.rationale,
findings: input.findings ?? "",
}),
});
await gateRunner.run(input.gateId, {
basePath: s.basePath,
traceId: `pre-dispatch:${ic.flowId}`,
turnId: `iter-${ic.iteration}`,
milestoneId: input.milestoneId ?? s.currentMilestoneId ?? undefined,
unitType: "pre-dispatch",
unitId: `iter-${ic.iteration}`,
});
};
// Resource version guard
const staleMsg = deps.checkResourcesStale(s.resourceVersionOnStart);
if (staleMsg) {
await runPreDispatchGate({
gateId: "resource-version-guard",
gateType: "policy",
outcome: "fail",
failureClass: "policy",
rationale: "resource version guard blocked dispatch",
findings: staleMsg,
});
await deps.stopAuto(ctx, pi, staleMsg);
debugLog("autoLoop", { phase: "exit", reason: "resources-stale" });
return { action: "break", reason: "resources-stale" };
}
await runPreDispatchGate({
gateId: "resource-version-guard",
gateType: "policy",
outcome: "pass",
failureClass: "none",
rationale: "resource version guard passed",
});
deps.invalidateAllCaches();
s.lastPromptCharCount = undefined;
@ -226,6 +274,14 @@ export async function runPreDispatch(
);
}
if (!healthGate.proceed) {
await runPreDispatchGate({
gateId: "pre-dispatch-health-gate",
gateType: "execution",
outcome: "manual-attention",
failureClass: "manual-attention",
rationale: "pre-dispatch health gate blocked dispatch",
findings: healthGate.reason,
});
ctx.ui.notify(
healthGate.reason || "Pre-dispatch health check failed — run /gsd doctor for details.",
"error",
@ -234,7 +290,23 @@ export async function runPreDispatch(
debugLog("autoLoop", { phase: "exit", reason: "health-gate-failed" });
return { action: "break", reason: "health-gate-failed" };
}
await runPreDispatchGate({
gateId: "pre-dispatch-health-gate",
gateType: "execution",
outcome: "pass",
failureClass: "none",
rationale: "pre-dispatch health gate passed",
findings: healthGate.fixesApplied.length > 0 ? healthGate.fixesApplied.join(", ") : "",
});
} catch (e) {
await runPreDispatchGate({
gateId: "pre-dispatch-health-gate",
gateType: "execution",
outcome: "manual-attention",
failureClass: "manual-attention",
rationale: "pre-dispatch health gate threw unexpectedly",
findings: String(e),
});
logWarning("engine", "Pre-dispatch health gate threw unexpectedly", { error: String(e) });
}
@ -257,10 +329,27 @@ export async function runPreDispatch(
const compiled = ensurePlanV2Graph(s.basePath, state);
if (!compiled.ok) {
const reason = compiled.reason ?? "Plan v2 compilation failed";
await runPreDispatchGate({
gateId: "plan-v2-gate",
gateType: "policy",
outcome: "manual-attention",
failureClass: "manual-attention",
rationale: "plan v2 compile gate failed",
findings: reason,
milestoneId: state.activeMilestone?.id ?? undefined,
});
ctx.ui.notify(`Plan gate failed-closed: ${reason}`, "error");
await deps.pauseAuto(ctx, pi);
return { action: "break", reason: "plan-v2-gate-failed" };
}
await runPreDispatchGate({
gateId: "plan-v2-gate",
gateType: "policy",
outcome: "pass",
failureClass: "none",
rationale: "plan v2 compile gate passed",
milestoneId: state.activeMilestone?.id ?? undefined,
});
}
deps.syncCmuxSidebar(prefs, state);
let mid = state.activeMilestone?.id;

View file

@ -14,7 +14,7 @@ import { join } from "node:path";
import { runPostUnitVerification, type VerificationContext } from "../auto-verification.ts";
import { AutoSession } from "../auto/session.ts";
import { openDatabase, closeDatabase, insertMilestone, insertSlice, insertTask } from "../gsd-db.ts";
import { openDatabase, closeDatabase, insertMilestone, insertSlice, insertTask, _getAdapter } from "../gsd-db.ts";
import { invalidateAllCaches } from "../cache.ts";
import { _clearGsdRootCache } from "../paths.ts";
@ -140,6 +140,43 @@ function createBasicTask(): void {
});
}
function createPostExecFailureTask(): void {
insertMilestone({ id: "M001" });
insertSlice({
id: "S01",
milestoneId: "M001",
title: "Test Slice",
risk: "low",
});
const srcDir = join(tempDir, "src");
mkdirSync(srcDir, { recursive: true });
writeFileSync(
join(srcDir, "broken.ts"),
"import { missing } from './does-not-exist.js';\nexport const ok = 1;\n",
"utf-8",
);
insertTask({
id: "T01",
sliceId: "S01",
milestoneId: "M001",
title: "Task with broken import",
status: "pending",
keyFiles: ["src/broken.ts"],
planning: {
description: "Task that introduces an unresolved import in key files",
estimate: "1h",
files: ["src/broken.ts"],
verify: "echo pass",
inputs: [],
expectedOutput: [],
observabilityImpact: "",
},
sequence: 0,
});
}
// ─── Tests ───────────────────────────────────────────────────────────────────
describe("Post-execution blocking failure retry bypass", () => {
@ -249,6 +286,47 @@ describe("Post-execution blocking failure retry bypass", () => {
// This test mainly confirms the wiring is correct
assert.equal(result, "continue");
});
test("uok gate runner persists post-execution gate failures when enabled", async () => {
createPostExecFailureTask();
writePreferences({
enhanced_verification: true,
enhanced_verification_post: true,
verification_auto_fix: true,
verification_max_retries: 2,
uok: {
enabled: true,
gates: { enabled: true },
},
});
const ctx = makeMockCtx();
const pi = makeMockPi();
const pauseAutoMock = mock.fn(async () => {});
const s = makeMockSession(tempDir, { type: "execute-task", id: "M001/S01/T01" });
const vctx: VerificationContext = { s, ctx, pi };
const result = await runPostUnitVerification(vctx, pauseAutoMock);
assert.equal(result, "pause");
assert.equal(pauseAutoMock.mock.callCount(), 1);
const adapter = _getAdapter();
const row = adapter
?.prepare(
`SELECT gate_id, outcome, failure_class
FROM gate_runs
WHERE gate_id = 'post-execution-checks'
ORDER BY id DESC
LIMIT 1`,
)
.get() as { gate_id: string; outcome: string; failure_class: string } | undefined;
assert.ok(row, "post-execution gate run should be persisted when uok.gates is enabled");
assert.equal(row?.gate_id, "post-execution-checks");
assert.equal(row?.outcome, "fail");
assert.equal(row?.failure_class, "artifact");
});
});
describe("Post-execution retry behavior", () => {

View file

@ -17,7 +17,7 @@ import { join } from "node:path";
import { postUnitPostVerification, type PostUnitContext } from "../auto-post-unit.ts";
import { AutoSession } from "../auto/session.ts";
import { openDatabase, closeDatabase, insertMilestone, insertSlice, insertTask } from "../gsd-db.ts";
import { openDatabase, closeDatabase, insertMilestone, insertSlice, insertTask, _getAdapter } from "../gsd-db.ts";
import { invalidateAllCaches } from "../cache.ts";
import { _clearGsdRootCache } from "../paths.ts";
@ -454,4 +454,43 @@ describe("Pre-execution checks → pauseAuto wiring", () => {
"postUnitPostVerification should return 'continue' when pre-execution checks are disabled"
);
});
test("uok gate runner persists pre-execution gate outcomes when enabled", async () => {
writePreferences({
enhanced_verification: true,
enhanced_verification_pre: true,
enhanced_verification_strict: true,
uok: {
enabled: true,
gates: { enabled: true },
},
});
createFailingTasks();
const ctx = makeMockCtx();
const pi = makeMockPi();
const pauseAutoMock = mock.fn(async () => {});
const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
const result = await postUnitPostVerification(pctx);
assert.equal(result, "stopped");
const adapter = _getAdapter();
const row = adapter
?.prepare(
`SELECT gate_id, outcome, failure_class
FROM gate_runs
WHERE gate_id = 'pre-execution-checks'
ORDER BY id DESC
LIMIT 1`,
)
.get() as { gate_id: string; outcome: string; failure_class: string } | undefined;
assert.ok(row, "pre-execution gate run should be persisted when uok.gates is enabled");
assert.equal(row?.gate_id, "pre-execution-checks");
assert.equal(row?.outcome, "fail");
assert.equal(row?.failure_class, "input");
});
});

View file

@ -112,4 +112,43 @@ describe("handleValidateMilestone write ordering (#2725)", () => {
).get();
assert.equal(row, undefined, "assessment row should be deleted after disk-write rollback");
});
it("persists milestone validation gate_runs rows when UOK gates are enabled", async () => {
base = makeTmpBase();
const dbPath = join(base, ".gsd", "gsd.db");
openDatabase(dbPath);
insertMilestone({ id: "M001" });
insertSlice({ id: "S01", milestoneId: "M001" });
const result = await handleValidateMilestone(VALID_PARAMS, base, {
uokGatesEnabled: true,
traceId: "trace-val-1",
turnId: "turn-val-1",
});
assert.ok(!("error" in result), `unexpected error: ${"error" in result ? result.error : ""}`);
const adapter = _getAdapter()!;
const row = adapter.prepare(
`SELECT gate_id, outcome, failure_class, trace_id, turn_id
FROM gate_runs
WHERE gate_id = 'milestone-validation-gates'
ORDER BY id DESC
LIMIT 1`,
).get() as
| {
gate_id: string;
outcome: string;
failure_class: string;
trace_id: string;
turn_id: string;
}
| undefined;
assert.ok(row, "milestone validation gate row should be persisted");
assert.equal(row?.gate_id, "milestone-validation-gates");
assert.equal(row?.outcome, "pass");
assert.equal(row?.failure_class, "none");
assert.equal(row?.trace_id, "trace-val-1");
assert.equal(row?.turn_id, "turn-val-1");
});
});

View file

@ -23,6 +23,9 @@ import { invalidateStateCache } from "../state.js";
import { VALIDATION_VERDICTS, isValidMilestoneVerdict } from "../verdict-parser.js";
import { insertMilestoneValidationGates } from "../milestone-validation-gates.js";
import { logWarning } from "../workflow-logger.js";
import { UokGateRunner } from "../uok/gate-runner.js";
import { loadEffectiveGSDPreferences } from "../preferences.js";
import { resolveUokFlags } from "../uok/flags.js";
export interface ValidateMilestoneParams {
milestoneId: string;
@ -43,6 +46,12 @@ export interface ValidateMilestoneResult {
validationPath: string;
}
export interface ValidateMilestoneOptions {
uokGatesEnabled?: boolean;
traceId?: string;
turnId?: string;
}
function renderValidationMarkdown(params: ValidateMilestoneParams): string {
let md = `---
verdict: ${params.verdict}
@ -81,6 +90,7 @@ ${params.verdictRationale}
export async function handleValidateMilestone(
params: ValidateMilestoneParams,
basePath: string,
opts?: ValidateMilestoneOptions,
): Promise<ValidateMilestoneResult | { error: string }> {
if (!params.milestoneId || typeof params.milestoneId !== "string" || params.milestoneId.trim() === "") {
return { error: "milestoneId is required and must be a non-empty string" };
@ -108,6 +118,8 @@ export async function handleValidateMilestone(
// rendering can regenerate. The inverse (file exists, no DB row) is
// harder to detect and recover from (#2725).
const validatedAt = new Date().toISOString();
const slices = getMilestoneSlices(params.milestoneId);
const gateSliceId = slices.length > 0 ? slices[0].id : "_milestone";
transaction(() => {
insertAssessment({
@ -123,11 +135,9 @@ export async function handleValidateMilestone(
// #2945 Bug 4: persist quality_gates records alongside the assessment.
// Previously only the assessment was written, leaving M002+ milestones
// with zero quality_gate records despite passing validation.
const slices = getMilestoneSlices(params.milestoneId);
const sliceId = slices.length > 0 ? slices[0].id : "_milestone";
insertMilestoneValidationGates(
params.milestoneId,
sliceId,
gateSliceId,
params.verdict,
validatedAt,
);
@ -147,6 +157,41 @@ export async function handleValidateMilestone(
clearPathCache();
clearParseCache();
const prefs = loadEffectiveGSDPreferences()?.preferences;
const gatesEnabled = opts?.uokGatesEnabled ?? resolveUokFlags(prefs).gates;
if (gatesEnabled) {
try {
const gateRunner = new UokGateRunner();
const nonPassVerdict = params.verdict !== "pass";
gateRunner.register({
id: "milestone-validation-gates",
type: "verification",
execute: async () => ({
outcome: nonPassVerdict ? "manual-attention" : "pass",
failureClass: nonPassVerdict ? "manual-attention" : "none",
rationale: `milestone validation verdict: ${params.verdict}`,
findings: nonPassVerdict
? [params.verdictRationale, params.remediationPlan ?? ""].filter(Boolean).join("\n")
: "",
}),
});
await gateRunner.run("milestone-validation-gates", {
basePath,
traceId: opts?.traceId ?? `validate-milestone:${params.milestoneId}`,
turnId: opts?.turnId ?? `${params.milestoneId}:validate`,
milestoneId: params.milestoneId,
sliceId: gateSliceId,
unitType: "validate-milestone",
unitId: params.milestoneId,
});
} catch (err) {
logWarning(
"tool",
`validate_milestone — failed to persist UOK gate result: ${(err as Error).message}`,
);
}
}
return {
milestoneId: params.milestoneId,
verdict: params.verdict,