Merge pull request #4084 from jeremymcs/claude/prompt-system-validation-gates-FK9NB

Introduce gate registry as single source of truth for quality gates
This commit is contained in:
Jeremy McSpadden 2026-04-12 21:54:17 -05:00 committed by GitHub
commit 6d0b8b4b27
19 changed files with 1303 additions and 54 deletions

View file

@ -787,7 +787,7 @@ const saveGateResultParams = {
projectDir: projectDirParam,
milestoneId: z.string().describe("Milestone ID (e.g. M001)"),
sliceId: z.string().describe("Slice ID (e.g. S01)"),
gateId: z.enum(["Q3", "Q4", "Q5", "Q6", "Q7", "Q8"]).describe("Gate ID"),
gateId: z.enum(["Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "MV01", "MV02", "MV03", "MV04"]).describe("Gate ID"),
taskId: z.string().optional().describe("Task ID for task-scoped gates"),
verdict: z.enum(["pass", "flag", "omitted"]).describe("Gate verdict"),
rationale: z.string().describe("One-sentence justification"),

View file

@ -24,7 +24,13 @@ import { getLoadedSkills, type Skill } from "@gsd/pi-coding-agent";
import { join, basename } from "node:path";
import { existsSync } from "node:fs";
import { computeBudgets, resolveExecutorContextWindow, truncateAtSectionBoundary } from "./context-budget.js";
import { getPendingGates } from "./gsd-db.js";
import { getPendingGates, getPendingGatesForTurn } from "./gsd-db.js";
import {
GATE_REGISTRY,
assertGateCoverage,
getGatesForTurn,
type GateDefinition,
} from "./gate-registry.js";
import { formatDecisionsCompact, formatRequirementsCompact } from "./structured-data-formatter.js";
import { readPhaseAnchor, formatAnchorForPrompt } from "./phase-anchor.js";
import { logWarning } from "./workflow-logger.js";
@ -1395,6 +1401,17 @@ export async function buildExecuteTaskPrompt(
const phaseAnchorSection = planAnchor ? formatAnchorForPrompt(planAnchor) : "";
// Task-scoped gates owned by execute-task (Q5/Q6/Q7). Pull only the
// gates that plan-slice actually seeded for this task — tasks with no
// external dependencies legitimately skip Q5, tasks with no runtime
// load dimension skip Q6, etc.
const etPending = getPendingGatesForTurn(mid, sid, "execute-task", tid);
assertGateCoverage(etPending, "execute-task", { requireAll: false });
const gatesToClose = renderGatesToCloseBlock(
getGatesForTurn("execute-task"),
{ pending: new Set(etPending.map((g) => g.gate_id)), allowOmit: true },
);
return loadPrompt("execute-task", {
overridesSection,
runtimeContext,
@ -1412,6 +1429,7 @@ export async function buildExecuteTaskPrompt(
taskSummaryPath,
inlinedTemplates,
verificationBudget,
gatesToClose,
skillActivation: buildSkillActivationBlock({
base,
milestoneId: mid,
@ -1477,6 +1495,19 @@ export async function buildCompleteSlicePrompt(
const sliceSummaryPath = join(base, `${sliceRel}/${sid}-SUMMARY.md`);
const sliceUatPath = join(base, `${sliceRel}/${sid}-UAT.md`);
// Gates owned by complete-slice (e.g. Q8). Pull from the DB so the
// prompt only prompts for gates the plan actually seeded. The tool
// handler closes each gate based on the SUMMARY.md section content
// after the assistant calls gsd_complete_slice.
const csPending = getPendingGatesForTurn(mid, sid, "complete-slice");
// coverage check: every pending row must be owned by complete-slice.
// requireAll:false because a slice may have already closed some gates.
assertGateCoverage(csPending, "complete-slice", { requireAll: false });
const gatesToClose = renderGatesToCloseBlock(
getGatesForTurn("complete-slice"),
{ pending: new Set(csPending.map((g) => g.gate_id)), allowOmit: true },
);
return loadPrompt("complete-slice", {
workingDirectory: base,
milestoneId: mid, sliceId: sid, sliceTitle: sTitle,
@ -1485,6 +1516,7 @@ export async function buildCompleteSlicePrompt(
inlinedContext,
sliceSummaryPath,
sliceUatPath,
gatesToClose,
});
}
@ -1675,6 +1707,16 @@ export async function buildValidateMilestonePrompt(
const validationOutputPath = join(base, `${relMilestonePath(base, mid)}/${mid}-VALIDATION.md`);
const roadmapOutputPath = `${relMilestonePath(base, mid)}/${mid}-ROADMAP.md`;
// Every milestone validation turn owns MV01MV04 unconditionally: the
// registry is the source of truth for which gates the validator must
// address, and the block below is what the template renders so the
// assistant can never accidentally skip one.
const mvGates = getGatesForTurn("validate-milestone");
const gatesToEvaluate = renderGatesToCloseBlock(mvGates, {
pending: new Set(mvGates.map((g) => g.id)),
allowOmit: false,
});
return loadPrompt("validate-milestone", {
workingDirectory: base,
milestoneId: mid,
@ -1683,6 +1725,7 @@ export async function buildValidateMilestonePrompt(
inlinedContext,
validationPath: validationOutputPath,
remediationRound: String(remediationRound),
gatesToEvaluate,
skillActivation: buildSkillActivationBlock({
base,
milestoneId: mid,
@ -1955,27 +1998,51 @@ export async function buildReactiveExecutePrompt(
}
// ─── Gate Evaluation ──────────────────────────────────────────────────────
//
// Gate definitions (question, guidance, owner turn) now live in
// gate-registry.ts so that prompt builders, dispatch rules, state
// derivation, and tool handlers all consult the same source of truth.
// See gate-registry.ts for the full ownership map.
const GATE_QUESTIONS: Record<string, { question: string; guidance: string }> = {
Q3: {
question: "How can this be exploited?",
guidance: [
"Identify abuse scenarios: parameter tampering, replay attacks, privilege escalation.",
"Map data exposure risks: PII, tokens, secrets accessible through this slice.",
"Define input trust boundaries: untrusted user input reaching DB, API, or filesystem.",
"If none apply, return verdict 'omitted' with rationale explaining why.",
].join("\n"),
},
Q4: {
question: "What existing promises does this break?",
guidance: [
"List which existing requirements (R001, R003, etc.) are touched by this slice.",
"Identify what must be re-tested after shipping.",
"Flag decisions that should be revisited given the new scope.",
"If no existing requirements are affected, return verdict 'omitted'.",
].join("\n"),
},
};
/**
* Render a "Gates to Close" block for turns like `complete-slice` and
* `validate-milestone` that own gates which are closed as a side-effect
* of writing artifact sections (not via a dedicated gate-evaluate
* subagent loop).
*
* Returns a plain-text block or an empty string if there are no gates to
* close, so callers can drop it straight into a template variable.
*/
function renderGatesToCloseBlock(
gates: ReadonlyArray<GateDefinition>,
opts: { pending: ReadonlySet<string>; allowOmit: boolean },
): string {
const applicable = gates.filter((g) => opts.pending.has(g.id));
if (applicable.length === 0) return "";
const lines: string[] = [];
lines.push("## Gates to Close");
lines.push("");
lines.push(
"These quality gates are still pending for this unit. You MUST address every one before calling the closing tool — the handler closes the DB row based on whether the corresponding artifact section is present.",
);
lines.push("");
for (const def of applicable) {
lines.push(`### ${def.id}${def.promptSection}`);
lines.push("");
lines.push(`**Question:** ${def.question}`);
lines.push("");
lines.push(def.guidance);
if (opts.allowOmit) {
lines.push("");
lines.push(
`If this gate genuinely does not apply to this unit, leave the **${def.promptSection}** section empty and the handler will record it as \`omitted\`. Otherwise, fill the section with concrete evidence.`,
);
}
lines.push("");
}
return lines.join("\n").trimEnd();
}
export async function buildParallelResearchSlicesPrompt(
mid: string,
@ -2011,28 +2078,39 @@ export async function buildGateEvaluatePrompt(
mid: string, midTitle: string, sid: string, sTitle: string,
base: string,
): Promise<string> {
const pending = getPendingGates(mid, sid, "slice");
// Pull only the gates this turn actually owns (Q3/Q4). Filter via the
// registry so that scope:"slice" gates owned by other turns (Q8) can't
// leak into this prompt and can't block dispatch via silent skip.
const pending = getPendingGatesForTurn(mid, sid, "gate-evaluate");
// Fails loudly if the pending list contains a gate id the registry
// doesn't own for this turn. Missing owned gates is allowed here —
// `gate-evaluate` is dispatched whenever *any* of its owned gates are
// pending, not only when all of them are.
assertGateCoverage(pending, "gate-evaluate", { requireAll: false });
// Load the slice plan for context
const planFile = resolveSliceFile(base, mid, sid, "PLAN");
const planContent = planFile ? (await loadFile(planFile)) ?? "(plan file empty)" : "(plan file not found)";
// Build per-gate subagent prompts
// Build per-gate subagent prompts from the pending rows. Because the
// registry has already validated every row, `getGateDefinition` cannot
// return undefined here.
const pendingIds = new Set(pending.map((g) => g.gate_id));
const gateDefs = getGatesForTurn("gate-evaluate").filter((def) => pendingIds.has(def.id));
const subagentSections: string[] = [];
const gateListLines: string[] = [];
for (const gate of pending) {
const meta = GATE_QUESTIONS[gate.gate_id];
if (!meta) continue;
gateListLines.push(`- **${gate.gate_id}**: ${meta.question}`);
for (const def of gateDefs) {
gateListLines.push(`- **${def.id}**: ${def.question}`);
const subPrompt = [
`You are evaluating quality gate **${gate.gate_id}** for slice ${sid} (${sTitle}).`,
`You are evaluating quality gate **${def.id}** for slice ${sid} (${sTitle}).`,
"",
`## Question: ${meta.question}`,
`## Question: ${def.question}`,
"",
meta.guidance,
def.guidance,
"",
"## Slice Plan",
"",
@ -2044,14 +2122,14 @@ export async function buildGateEvaluatePrompt(
`Call the \`gsd_save_gate_result\` tool with:`,
`- \`milestoneId\`: "${mid}"`,
`- \`sliceId\`: "${sid}"`,
`- \`gateId\`: "${gate.gate_id}"`,
`- \`gateId\`: "${def.id}"`,
"- `verdict`: \"pass\" (no concerns), \"flag\" (concerns found), or \"omitted\" (not applicable)",
"- `rationale`: one-sentence justification",
"- `findings`: detailed markdown findings (or empty if omitted)",
].join("\n");
subagentSections.push([
`### ${gate.gate_id}: ${meta.question}`,
`### ${def.id}: ${def.question}`,
"",
"Use this as the prompt for a `subagent` call:",
"",

View file

@ -1026,12 +1026,12 @@ export function registerDbTools(pi: ExtensionAPI): void {
name: "gsd_save_gate_result",
label: "Save Gate Result",
description:
"Save the result of a quality gate evaluation (Q3-Q8) to the GSD database. " +
"Save the result of a quality gate evaluation (Q3-Q8 or MV01-MV04) to the GSD database. " +
"Called by gate evaluation sub-agents after analyzing a specific quality question.",
promptSnippet: "Save quality gate evaluation result (verdict, rationale, findings)",
promptGuidelines: [
"Use gsd_save_gate_result after evaluating a quality gate question.",
"gateId must be one of: Q3, Q4, Q5, Q6, Q7, Q8.",
"gateId must be one of: Q3, Q4, Q5, Q6, Q7, Q8, MV01, MV02, MV03, MV04.",
"verdict must be: pass (no concerns), flag (concerns found), or omitted (not applicable).",
"rationale should be a one-sentence justification for the verdict.",
"findings should contain detailed markdown analysis (or empty string if omitted).",
@ -1039,7 +1039,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
parameters: Type.Object({
milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }),
sliceId: Type.String({ description: "Slice ID (e.g. S01)" }),
gateId: Type.String({ description: "Gate ID: Q3, Q4, Q5, Q6, Q7, or Q8" }),
gateId: Type.String({ description: "Gate ID: Q3, Q4, Q5, Q6, Q7, Q8, MV01, MV02, MV03, or MV04" }),
taskId: Type.Optional(Type.String({ description: "Task ID for task-scoped gates (Q5/Q6/Q7)" })),
verdict: Type.String({ description: "pass, flag, or omitted" }),
rationale: Type.String({ description: "One-sentence justification" }),

View file

@ -0,0 +1,251 @@
/**
* GSD Gate Registry single source of truth for quality-gate ownership.
*
* Each gate declares which workflow turn owns it, the scope at which it is
* persisted in the `quality_gates` table, and the question/guidance text used
* in the prompt that turn sends. The registry replaces the ad-hoc
* `GATE_QUESTIONS` table that used to live in `auto-prompts.ts`, and every
* layer of the prompt system (prompt builders, dispatch rules, state
* derivation, tool handlers) consults it so a pending gate can never be
* silently dropped.
*
* Design notes:
* - `GATE_REGISTRY` is exhaustiveness-checked against `GateId` via
* `satisfies Record<GateId, GateDefinition>`, so adding a new GateId
* without a registry entry is a compile error.
* - `getGatesForTurn(turn)` returns the definitions a turn owns.
* - `assertGateCoverage(pending, turn)` throws a GSDError if the pending
* list for a turn contains unknown gates, or if any gate owned by the
* turn is missing from the pending list.
*/
import { GSDError, GSD_PARSE_ERROR } from "./errors.js";
import type { GateId, GateRow, GateScope } from "./types.js";
/** Which workflow turn is responsible for evaluating / closing a gate. */
export type OwnerTurn =
| "gate-evaluate"
| "execute-task"
| "complete-slice"
| "validate-milestone";
export interface GateDefinition {
id: GateId;
scope: GateScope;
ownerTurn: OwnerTurn;
/** One-line question the assistant must answer. */
question: string;
/** Markdown guidance describing what a good answer looks like. */
guidance: string;
/** H3 section header used in the artifact the turn writes
* (e.g. "Operational Readiness" for Q8 in the slice summary). */
promptSection: string;
}
export const GATE_REGISTRY = {
Q3: {
id: "Q3",
scope: "slice",
ownerTurn: "gate-evaluate",
question: "How can this be exploited?",
guidance: [
"Identify abuse scenarios: parameter tampering, replay attacks, privilege escalation.",
"Map data exposure risks: PII, tokens, secrets accessible through this slice.",
"Define input trust boundaries: untrusted user input reaching DB, API, or filesystem.",
"If none apply, return verdict 'omitted' with rationale explaining why.",
].join("\n"),
promptSection: "Abuse Surface",
},
Q4: {
id: "Q4",
scope: "slice",
ownerTurn: "gate-evaluate",
question: "What existing promises does this break?",
guidance: [
"List which existing requirements (R001, R003, etc.) are touched by this slice.",
"Identify what must be re-tested after shipping.",
"Flag decisions that should be revisited given the new scope.",
"If no existing requirements are affected, return verdict 'omitted'.",
].join("\n"),
promptSection: "Broken Promises",
},
Q5: {
id: "Q5",
scope: "task",
ownerTurn: "execute-task",
question: "What breaks when dependencies fail?",
guidance: [
"Enumerate the task's external dependencies (APIs, filesystem, network, subprocesses).",
"Describe the failure path for each: timeout, malformed response, connection loss.",
"Verify the implementation handles each failure or explicitly bubbles the error.",
"Return verdict 'omitted' only if the task has no external dependencies.",
].join("\n"),
promptSection: "Failure Modes",
},
Q6: {
id: "Q6",
scope: "task",
ownerTurn: "execute-task",
question: "What is the 10x load breakpoint?",
guidance: [
"Identify the resource that saturates first at 10x the expected load.",
"Describe the protection applied (pool sizing, rate limiting, pagination, caching).",
"Return verdict 'omitted' if the task has no runtime load dimension.",
].join("\n"),
promptSection: "Load Profile",
},
Q7: {
id: "Q7",
scope: "task",
ownerTurn: "execute-task",
question: "What negative tests protect this task?",
guidance: [
"List malformed inputs, error paths, and boundary conditions the tests cover.",
"Point to the specific test files or cases that assert each negative scenario.",
"Return verdict 'omitted' only if the task has no meaningful negative surface.",
].join("\n"),
promptSection: "Negative Tests",
},
Q8: {
id: "Q8",
scope: "slice",
ownerTurn: "complete-slice",
question: "How will ops know this slice is healthy or broken?",
guidance: [
"Describe the health signal (metric, log line, dashboard) that proves the slice works.",
"Describe the failure signal that triggers an alert or paging.",
"Document the recovery procedure and any monitoring gaps.",
"Return verdict 'omitted' only for slices with no runtime behavior at all.",
].join("\n"),
promptSection: "Operational Readiness",
},
MV01: {
id: "MV01",
scope: "milestone",
ownerTurn: "validate-milestone",
question: "Is every success criterion in the milestone roadmap satisfied?",
guidance: [
"Walk the success-criteria checklist from the milestone roadmap.",
"For each criterion, point to the slice / assessment / verification evidence that proves it.",
"Return verdict 'flag' if any criterion is unmet or unverifiable.",
].join("\n"),
promptSection: "Success Criteria Checklist",
},
MV02: {
id: "MV02",
scope: "milestone",
ownerTurn: "validate-milestone",
question: "Does every slice have a SUMMARY.md and a passing assessment?",
guidance: [
"Confirm every slice listed in the roadmap has a SUMMARY.md.",
"Confirm each slice has an ASSESSMENT verdict of 'pass' (or justified 'omitted').",
"Flag missing artifacts and slices with outstanding follow-ups or known limitations.",
].join("\n"),
promptSection: "Slice Delivery Audit",
},
MV03: {
id: "MV03",
scope: "milestone",
ownerTurn: "validate-milestone",
question: "Do the slices integrate end-to-end?",
guidance: [
"Trace at least one cross-slice flow proving the pieces compose.",
"Flag gaps where two slices were built in isolation with no integration evidence.",
].join("\n"),
promptSection: "Cross-Slice Integration",
},
MV04: {
id: "MV04",
scope: "milestone",
ownerTurn: "validate-milestone",
question: "Are all touched requirements covered and still coherent?",
guidance: [
"For each requirement advanced, validated, surfaced, or invalidated across the milestone's slices, confirm the milestone-level evidence matches.",
"Flag requirements that slices claim to advance but no artifact proves.",
].join("\n"),
promptSection: "Requirement Coverage",
},
} as const satisfies Record<GateId, GateDefinition>;
export type GateRegistry = typeof GATE_REGISTRY;
/** Stable ordered lists per owner turn — iteration order matches declaration. */
const ORDERED_GATES: readonly GateDefinition[] = Object.values(GATE_REGISTRY) as readonly GateDefinition[];
/** Return every gate owned by a turn, in stable declaration order. */
export function getGatesForTurn(turn: OwnerTurn): GateDefinition[] {
return ORDERED_GATES.filter((g) => g.ownerTurn === turn);
}
/** Return the set of gate ids a turn owns. */
export function getGateIdsForTurn(turn: OwnerTurn): Set<GateId> {
return new Set(getGatesForTurn(turn).map((g) => g.id));
}
/** Look up a definition by gate id, or undefined if unknown. */
export function getGateDefinition(id: string): GateDefinition | undefined {
return (GATE_REGISTRY as Record<string, GateDefinition>)[id];
}
/** Look up the owner turn for a gate id. Throws if the gate is unknown. */
export function getOwnerTurn(id: GateId): OwnerTurn {
const def = GATE_REGISTRY[id];
if (!def) {
throw new GSDError(GSD_PARSE_ERROR, `gate-registry: unknown gate id "${id}"`);
}
return def.ownerTurn;
}
/**
* Assert that the pending gate rows for a turn match what the registry says
* the turn owns. Fails loudly rather than silently skipping.
*
* - Every row in `pending` must have a definition whose `ownerTurn` matches `turn`.
* (The caller is responsible for scoping the pending list e.g. filtering
* by slice scope before passing it in.)
* - `options.requireAll` (default true): every gate the turn owns must appear
* in `pending`. Set to false for turns like `execute-task` that only need
* coverage for the subset of gates that were seeded (e.g. tasks with no
* external dependencies have no Q5 row).
*/
export function assertGateCoverage(
pending: ReadonlyArray<Pick<GateRow, "gate_id">>,
turn: OwnerTurn,
options: { requireAll?: boolean } = {},
): void {
const requireAll = options.requireAll ?? true;
const expected = getGateIdsForTurn(turn);
const pendingIds = new Set(pending.map((g) => g.gate_id));
const unknown: string[] = [];
for (const id of pendingIds) {
const def = getGateDefinition(id);
if (!def) {
unknown.push(id);
continue;
}
if (def.ownerTurn !== turn) {
unknown.push(`${id} (owned by ${def.ownerTurn}, not ${turn})`);
}
}
if (unknown.length > 0) {
throw new GSDError(
GSD_PARSE_ERROR,
`assertGateCoverage: turn "${turn}" received pending gates it does not own: ${unknown.join(", ")}`,
);
}
if (requireAll) {
const missing: GateId[] = [];
for (const id of expected) {
if (!pendingIds.has(id)) missing.push(id);
}
if (missing.length > 0) {
throw new GSDError(
GSD_PARSE_ERROR,
`assertGateCoverage: turn "${turn}" is missing required gates: ${missing.join(", ")}`,
);
}
}
}

View file

@ -10,6 +10,7 @@ import { existsSync, copyFileSync, mkdirSync, realpathSync } from "node:fs";
import { dirname } from "node:path";
import type { Decision, Requirement, GateRow, GateId, GateScope, GateStatus, GateVerdict } from "./types.js";
import { GSDError, GSD_STALE_STATE } from "./errors.js";
import { getGateIdsForTurn, type OwnerTurn } from "./gate-registry.js";
import { logError, logWarning } from "./workflow-logger.js";
const _require = createRequire(import.meta.url);
@ -2302,3 +2303,53 @@ export function getPendingSliceGateCount(milestoneId: string, sliceId: string):
).get({ ":mid": milestoneId, ":sid": sliceId });
return row ? (row["cnt"] as number) : 0;
}
/**
* Return pending gate rows owned by a specific workflow turn.
*
* Unlike `getPendingGates(..., scope)`, this filters by the registry's
* `ownerTurn` metadata so callers can distinguish Q3/Q4 (owned by
* gate-evaluate) from Q8 (owned by complete-slice) even though both are
* scope:"slice". Pass `taskId` to narrow task-scoped results to one task.
*/
export function getPendingGatesForTurn(
milestoneId: string,
sliceId: string,
turn: OwnerTurn,
taskId?: string,
): GateRow[] {
if (!currentDb) return [];
const ids = getGateIdsForTurn(turn);
if (ids.size === 0) return [];
const idList = [...ids];
const placeholders = idList.map((_, i) => `:gid${i}`).join(",");
const params: Record<string, unknown> = {
":mid": milestoneId,
":sid": sliceId,
};
idList.forEach((id, i) => {
params[`:gid${i}`] = id;
});
let sql =
`SELECT * FROM quality_gates
WHERE milestone_id = :mid AND slice_id = :sid
AND status = 'pending'
AND gate_id IN (${placeholders})`;
if (taskId !== undefined) {
sql += ` AND task_id = :tid`;
params[":tid"] = taskId;
}
return currentDb.prepare(sql).all(params).map(rowToGate);
}
/**
* Count pending gates for a turn. Convenience wrapper used by state
* derivation to decide whether a phase transition should pause.
*/
export function getPendingGateCountForTurn(
milestoneId: string,
sliceId: string,
turn: OwnerTurn,
): number {
return getPendingGatesForTurn(milestoneId, sliceId, turn).length;
}

View file

@ -6,19 +6,13 @@
* records in the DB. This module inserts milestone-level validation gates
* that correspond to the validation checks performed.
*
* Gate IDs for milestone validation:
* MV01 Success criteria checklist
* MV02 Slice delivery audit
* MV03 Cross-slice integration
* MV04 Requirement coverage
*
* These use the existing quality_gates table with scope "milestone".
* Gate IDs for milestone validation (MV01MV04) are sourced from the
* gate registry so the definitions stay in lockstep with prompt builders,
* dispatch rules, and state derivation. See gate-registry.ts.
*/
import { _getAdapter } from "./gsd-db.js";
/** Milestone validation gate IDs. */
const MILESTONE_GATE_IDS = ["MV01", "MV02", "MV03", "MV04"] as const;
import { getGatesForTurn } from "./gate-registry.js";
/**
* Insert milestone-level quality_gates records for a validation run.
@ -27,6 +21,9 @@ const MILESTONE_GATE_IDS = ["MV01", "MV02", "MV03", "MV04"] as const;
* from the overall milestone validation verdict. Individual gate-level
* verdicts are not available (the handler receives a single verdict),
* so all gates share the overall verdict.
*
* Gate IDs come from the registry adding/removing an MV-scoped gate
* in gate-registry.ts automatically flows through here.
*/
export function insertMilestoneValidationGates(
milestoneId: string,
@ -38,8 +35,9 @@ export function insertMilestoneValidationGates(
if (!db) return;
const gateVerdict = verdict === "pass" ? "pass" : "flag";
const milestoneGates = getGatesForTurn("validate-milestone");
for (const gateId of MILESTONE_GATE_IDS) {
for (const def of milestoneGates) {
db.prepare(
`INSERT OR REPLACE INTO quality_gates
(milestone_id, slice_id, gate_id, scope, task_id, status, verdict, rationale, findings, evaluated_at)
@ -47,9 +45,9 @@ export function insertMilestoneValidationGates(
).run({
":mid": milestoneId,
":sid": sliceId,
":gid": gateId,
":gid": def.id,
":verdict": gateVerdict,
":rationale": `Milestone validation verdict: ${verdict}`,
":rationale": `${def.promptSection} — milestone validation verdict: ${verdict}`,
":evaluated_at": evaluatedAt,
});
}

View file

@ -0,0 +1,157 @@
/**
* GSD Prompt Validation Validates enhanced context and turn output
* artifacts before writing.
*
* Implements R109 validation requirement: CONTEXT.md must have required
* sections before being written to disk. Additionally, per-turn validators
* check that artifacts produced by gate-owning turns contain the gate
* sections declared in gate-registry.ts, so a malformed summary/validation
* markdown file cannot silently drop a quality gate.
*/
import { getGatesForTurn, type OwnerTurn } from "./gate-registry.js";
/**
* Result of validating enhanced context output.
*/
export interface ValidationResult {
/** Whether all required sections are present. */
valid: boolean;
/** List of missing required sections. */
missing: string[];
}
/**
* Validate that enhanced context content has all required sections.
*
* Required sections per R109:
* - Scope section (## Scope, ## Milestone Scope, or ## Why This Milestone)
* - Architectural Decisions section (## Architectural Decisions)
* - Acceptance Criteria section (## Acceptance Criteria or ## Final Integrated Acceptance)
*
* Additionally validates that the Architectural Decisions section contains
* at least one decision entry (### heading or **Decision marker).
*
* @param content - The enhanced context markdown content
* @returns ValidationResult with valid flag and list of missing sections
*/
export function validateEnhancedContext(content: string): ValidationResult {
const missing: string[] = [];
// Required section 1: Scope (multiple acceptable header variants)
const hasScopeSection =
/^## Scope\b/m.test(content) ||
/^## Milestone Scope\b/m.test(content) ||
/^## Why This Milestone\b/m.test(content);
if (!hasScopeSection) {
missing.push("Milestone Scope or Why This Milestone");
}
// Required section 2: Architectural Decisions
const hasArchitecturalDecisions = /^## Architectural Decisions\b/m.test(content);
if (!hasArchitecturalDecisions) {
missing.push("Architectural Decisions");
}
// Required section 3: Acceptance Criteria (multiple acceptable header variants)
const hasAcceptanceCriteria =
/^## Acceptance Criteria\b/m.test(content) ||
/^## Final Integrated Acceptance\b/m.test(content);
if (!hasAcceptanceCriteria) {
missing.push("Acceptance Criteria");
}
// Additional validation: Architectural Decisions must have at least one entry
if (hasArchitecturalDecisions) {
// Extract the section content between ## Architectural Decisions and the next ## heading.
// Uses indexOf-based extraction instead of regex with \z (which is invalid in JavaScript
// regex — it's PCRE/Ruby syntax and JS treats it as literal 'z').
const sectionStart = content.indexOf("## Architectural Decisions");
if (sectionStart === -1) {
missing.push("Architectural Decisions");
} else {
const afterHeading = content.slice(sectionStart + "## Architectural Decisions".length);
const nextSection = afterHeading.search(/^## /m);
const sectionContent = nextSection === -1 ? afterHeading : afterHeading.slice(0, nextSection);
// Check for actual decision entries:
// - ### heading (subsection per decision)
// - **Decision marker (inline decision format)
const hasDecisionEntry = /^### /m.test(sectionContent) || /^\*\*Decision/m.test(sectionContent);
if (!hasDecisionEntry) {
missing.push("At least one architectural decision entry");
}
}
}
return {
valid: missing.length === 0,
missing,
};
}
// ─── Per-Turn Gate Section Validators ─────────────────────────────────────
//
// Each validator checks that the artifact written by a turn contains a
// heading for every gate owned by that turn. The registry is the source
// of truth for which sections must exist; adding a new gate automatically
// flows through via `getGatesForTurn(turn)`.
/**
* Escape a string so it can be embedded safely inside a regular expression.
*/
function escapeRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
/**
* Validate that an artifact contains an `## H2` heading for every gate the
* named turn owns. Returns the list of missing gate section headers.
*
* Soft rule: a section counts as "present" if it is declared (H2 heading
* exists) empty-body sections are allowed and handled by the tool
* handler, which will record such gates as `omitted`.
*/
export function validateGateSections(
content: string,
turn: OwnerTurn,
): ValidationResult {
const missing: string[] = [];
for (const def of getGatesForTurn(turn)) {
const pattern = new RegExp(`^##\\s+${escapeRegExp(def.promptSection)}\\b`, "m");
if (!pattern.test(content)) {
missing.push(`${def.id} (## ${def.promptSection})`);
}
}
return { valid: missing.length === 0, missing };
}
/**
* Validate a SUMMARY.md produced by the complete-slice turn. Requires
* an H2 heading for every gate owned by complete-slice (e.g. Q8
* "## Operational Readiness"). Intended for use in the tool handler's
* pre-write checks or in the post-unit validation sweep.
*/
export function validateSliceSummaryOutput(content: string): ValidationResult {
return validateGateSections(content, "complete-slice");
}
/**
* Validate a task SUMMARY.md produced by the execute-task turn. Only
* flags gates that are still pending for the task; skips the check
* when no rows are seeded (simple task).
*/
export function validateTaskSummaryOutput(content: string): ValidationResult {
return validateGateSections(content, "execute-task");
}
/**
* Validate a VALIDATION.md produced by the validate-milestone turn.
* Requires an H2 heading for every MV gate declared in the registry.
*/
export function validateMilestoneValidationOutput(content: string): ValidationResult {
return validateGateSections(content, "validate-milestone");
}

View file

@ -16,6 +16,8 @@ All relevant context has been preloaded below — the slice plan, all task summa
{{inlinedContext}}
{{gatesToClose}}
**Match effort to complexity.** A simple slice with 1-2 tasks needs a brief summary and lightweight verification. A complex slice with 5 tasks across multiple subsystems needs thorough verification and a detailed summary. Scale the work below accordingly.
Then:
@ -23,7 +25,7 @@ Then:
2. {{skillActivation}}
3. Run all slice-level verification checks defined in the slice plan. All must pass before marking the slice done. If any fail, fix them first. Task artifacts use a **flat file layout** directly inside `tasks/` (for example `T01-SUMMARY.md`, `T02-SUMMARY.md`) rather than per-task subdirectories. If you need to count or re-read task summaries during verification, use `find .gsd/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks -name "*-SUMMARY.md"` or `ls .gsd/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks/*-SUMMARY.md`. Never use `tasks/*/SUMMARY.md` — that glob expects subdirectories that do not exist.
4. If the slice plan includes observability/diagnostic surfaces, confirm they work. Skip this for simple slices that don't have observability sections.
5. If the slice involved runtime behavior, fill the **Operational Readiness** section (Q8) in the slice summary: health signal, failure signal, recovery procedure, and monitoring gaps. Omit entirely for simple slices with no runtime concerns.
5. Address every gate listed in the **Gates to Close** section above — each gate maps to a specific slice-summary section the handler inspects (for example, Q8 maps to **Operational Readiness**: health signal, failure signal, recovery procedure, and monitoring gaps). Leaving a section empty records the gate as `omitted`.
6. If this slice produced evidence that a requirement changed status (Active → Validated, Active → Deferred, etc.), call `gsd_requirement_update` with the requirement ID, updated `status`, and `validation` evidence. Do NOT write `.gsd/REQUIREMENTS.md` directly — the engine renders it from the database.
7. Prepare the slice completion content you will pass to `gsd_complete_slice` using the camelCase fields `milestoneId`, `sliceId`, `sliceTitle`, `oneLiner`, `narrative`, `verification`, and `uatContent`. Do **not** manually write `{{sliceSummaryPath}}`. Do **not** manually write `{{sliceUatPath}}` — the DB-backed tool is the canonical write path for both artifacts.
8. Draft the UAT content you will pass as `uatContent` — a concrete UAT script with real test cases derived from the slice plan and task summaries. Include preconditions, numbered steps with expected outcomes, and edge cases. This must NOT be a placeholder or generic template — tailor every test case to what this slice actually built.

View file

@ -22,6 +22,8 @@ A researcher explored the codebase and a planner decomposed the work — you are
{{slicePlanExcerpt}}
{{gatesToClose}}
## Backing Source Artifacts
- Slice plan: `{{planPath}}`
- Task plan source: `{{taskPlanPath}}`

View file

@ -18,6 +18,8 @@ All relevant context has been preloaded below — the roadmap, all slice summari
{{inlinedContext}}
{{gatesToEvaluate}}
## Execution Protocol
### Step 1 — Dispatch Parallel Reviewers

View file

@ -58,7 +58,7 @@ import {
insertSlice,
insertTask,
updateTaskStatus,
getPendingSliceGateCount,
getPendingGateCountForTurn,
type MilestoneRow,
type SliceRow,
type TaskRow,
@ -864,7 +864,18 @@ export async function deriveStateFromDb(basePath: string): Promise<GSDState> {
}
}
const pendingGateCount = getPendingSliceGateCount(activeMilestone.id, activeSlice.id);
// ── Quality gate evaluation check ──────────────────────────────────
// Pause before execution only when gates owned by the `gate-evaluate`
// turn (Q3/Q4) are still pending. Q8 is also `scope:"slice"` but is
// owned by `complete-slice`, so it must NOT block the evaluating-gates
// phase — otherwise auto-loop stalls forever waiting for a gate that
// this turn never evaluates. See gate-registry.ts for the ownership map.
// Slices with zero gate rows (pre-feature or simple) skip straight through.
const pendingGateCount = getPendingGateCountForTurn(
activeMilestone.id,
activeSlice.id,
"gate-evaluate",
);
if (pendingGateCount > 0) {
return {
activeMilestone, activeSlice, activeTask: null,

View file

@ -0,0 +1,167 @@
/**
* complete-slice gate closure integration test.
*
* Pins the fix for the Q8-stall bug: complete-slice must close every gate
* owned by the complete-slice turn based on the content of the matching
* CompleteSliceParams field. Without this, Q8 stays pending forever and
* blocks state derivation on subsequent loops.
*/
import { describe, test, beforeEach, afterEach } from "node:test";
import assert from "node:assert/strict";
import * as fs from "node:fs";
import * as path from "node:path";
import * as os from "node:os";
import {
openDatabase,
closeDatabase,
insertMilestone,
insertSlice,
insertTask,
insertGateRow,
getGateResults,
} from "../gsd-db.ts";
import { handleCompleteSlice } from "../tools/complete-slice.ts";
import type { CompleteSliceParams } from "../types.ts";
function makeValidSliceParams(overrides: Partial<CompleteSliceParams> = {}): CompleteSliceParams {
return {
sliceId: "S01",
milestoneId: "M001",
sliceTitle: "Test Slice",
oneLiner: "Implemented test slice",
narrative: "Built and tested.",
verification: "All tests pass.",
deviations: "None.",
knownLimitations: "None.",
followUps: "None.",
keyFiles: ["src/foo.ts"],
keyDecisions: [],
patternsEstablished: [],
observabilitySurfaces: [],
provides: [],
requirementsSurfaced: [],
drillDownPaths: [],
affects: [],
requirementsAdvanced: [],
requirementsValidated: [],
requirementsInvalidated: [],
filesModified: [],
requires: [],
uatContent: "## Smoke Test\n\nVerify happy path.",
...overrides,
};
}
describe("complete-slice closes complete-slice-owned gates", () => {
let dbPath: string;
let basePath: string;
beforeEach(() => {
dbPath = path.join(
fs.mkdtempSync(path.join(os.tmpdir(), "gsd-slice-gate-")),
"test.db",
);
openDatabase(dbPath);
basePath = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-slice-gate-handler-"));
const sliceDir = path.join(
basePath, ".gsd", "milestones", "M001", "slices", "S01", "tasks",
);
fs.mkdirSync(sliceDir, { recursive: true });
fs.writeFileSync(
path.join(basePath, ".gsd", "milestones", "M001", "M001-ROADMAP.md"),
[
"# M001: Test Milestone",
"",
"## Slices",
"",
'- [ ] **S01: Test Slice** `risk:medium` `depends:[]`',
" - After this: basic functionality works",
].join("\n"),
);
insertMilestone({ id: "M001" });
insertSlice({ id: "S01", milestoneId: "M001" });
insertTask({
id: "T01", sliceId: "S01", milestoneId: "M001",
status: "complete", title: "Task 1",
});
// Seed Q8 as pending — this is what plan-slice does today.
insertGateRow({
milestoneId: "M001", sliceId: "S01",
gateId: "Q8", scope: "slice",
});
});
afterEach(() => {
closeDatabase();
fs.rmSync(path.dirname(dbPath), { recursive: true, force: true });
fs.rmSync(basePath, { recursive: true, force: true });
});
test("Q8 closes as 'pass' when operationalReadiness is populated", async () => {
const params = makeValidSliceParams({
operationalReadiness: [
"- Health signal: /health endpoint returns 200",
"- Failure signal: error rate alert in observability dashboard",
"- Recovery: systemd auto-restart",
].join("\n"),
});
const result = await handleCompleteSlice(params, basePath);
assert.ok(!("error" in result), `handler failed: ${(result as any).error}`);
const gates = getGateResults("M001", "S01", "slice");
const q8 = gates.find((g) => g.gate_id === "Q8");
assert.ok(q8, "Q8 row must exist after complete-slice");
assert.equal(q8.status, "complete");
assert.equal(q8.verdict, "pass");
assert.ok(
q8.findings.includes("Health signal"),
"Q8 findings must capture the operationalReadiness content",
);
});
test("Q8 closes as 'omitted' when operationalReadiness is empty", async () => {
const params = makeValidSliceParams({ operationalReadiness: "" });
const result = await handleCompleteSlice(params, basePath);
assert.ok(!("error" in result), `handler failed: ${(result as any).error}`);
const gates = getGateResults("M001", "S01", "slice");
const q8 = gates.find((g) => g.gate_id === "Q8");
assert.ok(q8, "Q8 row must exist after complete-slice");
assert.equal(q8.status, "complete");
assert.equal(q8.verdict, "omitted");
});
test("Q8 also closes when operationalReadiness is omitted entirely", async () => {
// A model that doesn't pass operationalReadiness at all must still
// move Q8 out of 'pending' — leaving it pending produces the stall.
const params = makeValidSliceParams();
const result = await handleCompleteSlice(params, basePath);
assert.ok(!("error" in result), `handler failed: ${(result as any).error}`);
const gates = getGateResults("M001", "S01", "slice");
const q8 = gates.find((g) => g.gate_id === "Q8");
assert.ok(q8);
assert.notEqual(q8.status, "pending", "Q8 must never remain pending after complete-slice");
assert.equal(q8.verdict, "omitted");
});
test("summary markdown contains Operational Readiness section", async () => {
const params = makeValidSliceParams({
operationalReadiness: "- Health signal: /health\n- Failure signal: alert",
});
const result = await handleCompleteSlice(params, basePath);
assert.ok(!("error" in result));
if (!("error" in result)) {
const summary = fs.readFileSync(result.summaryPath, "utf-8");
assert.match(summary, /^## Operational Readiness/m);
assert.match(summary, /Health signal: \/health/);
}
});
});

View file

@ -186,4 +186,31 @@ describe("evaluating-gates phase", () => {
insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T01" });
assert.equal(getPendingSliceGateCount("M001", "S01"), 1);
});
test("Q8 (owned by complete-slice) does not block evaluating-gates phase", async () => {
// Regression: Q8 is stored with scope:"slice" but owned by the
// complete-slice turn. Before the gate registry landed, deriveState
// counted Q8 as a blocker for evaluating-gates while the gate-evaluate
// prompt silently dropped Q8 — an unrecoverable stall. After the
// registry change, deriveState filters by owner turn, so Q8 never
// blocks evaluating-gates.
planSlice(tmpDir);
await renderPlanFromDb(tmpDir, "M001", "S01");
insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" });
insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", scope: "slice" });
insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q8", scope: "slice" });
saveGateResult({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", verdict: "pass", rationale: "OK", findings: "" });
saveGateResult({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", verdict: "omitted", rationale: "N/A", findings: "" });
// Q8 deliberately left pending — it's complete-slice's problem.
invalidateStateCache();
const state = await deriveState(tmpDir);
assert.equal(
state.phase,
"executing",
`pending Q8 must not stall evaluating-gates — got phase=${state.phase}`,
);
});
});

View file

@ -0,0 +1,140 @@
/**
* Gate registry tests enforce that every declared GateId has a registry
* entry, that every owner-turn bucket is non-empty, and that coverage
* assertions fail loudly instead of silently skipping unknown gates.
*/
import { describe, test } from "node:test";
import assert from "node:assert/strict";
import {
GATE_REGISTRY,
assertGateCoverage,
getGateDefinition,
getGateIdsForTurn,
getGatesForTurn,
getOwnerTurn,
type OwnerTurn,
} from "../gate-registry.ts";
import type { GateId } from "../types.ts";
/** Authoritative list of GateIds as declared in types.ts. */
const ALL_GATE_IDS: readonly GateId[] = [
"Q3", "Q4", "Q5", "Q6", "Q7", "Q8",
"MV01", "MV02", "MV03", "MV04",
];
const ALL_OWNER_TURNS: readonly OwnerTurn[] = [
"gate-evaluate",
"execute-task",
"complete-slice",
"validate-milestone",
];
describe("gate-registry", () => {
test("every declared GateId has a registry entry", () => {
for (const id of ALL_GATE_IDS) {
const def = GATE_REGISTRY[id];
assert.ok(def, `missing registry entry for gate ${id}`);
assert.equal(def.id, id);
assert.ok(def.question.length > 0, `${id} missing question`);
assert.ok(def.guidance.length > 0, `${id} missing guidance`);
assert.ok(def.promptSection.length > 0, `${id} missing promptSection`);
}
});
test("registry contains no extra gate entries", () => {
const registryIds = new Set(Object.keys(GATE_REGISTRY));
const declaredIds = new Set<string>(ALL_GATE_IDS);
for (const id of registryIds) {
assert.ok(declaredIds.has(id), `registry has unknown gate ${id}`);
}
});
test("every owner turn owns at least one gate", () => {
for (const turn of ALL_OWNER_TURNS) {
const gates = getGatesForTurn(turn);
assert.ok(
gates.length > 0,
`owner turn "${turn}" has no gates — likely a registry mistake`,
);
}
});
test("owner turn buckets are disjoint", () => {
const seen = new Set<string>();
for (const turn of ALL_OWNER_TURNS) {
for (const def of getGatesForTurn(turn)) {
assert.ok(!seen.has(def.id), `gate ${def.id} claimed by two turns`);
seen.add(def.id);
}
}
// Every gate should appear in exactly one bucket.
assert.equal(seen.size, ALL_GATE_IDS.length);
});
test("getOwnerTurn round-trips against GATE_REGISTRY", () => {
for (const id of ALL_GATE_IDS) {
const turn = getOwnerTurn(id);
const idsForTurn = getGateIdsForTurn(turn);
assert.ok(idsForTurn.has(id), `${id} not in ${turn} bucket`);
}
});
test("getGateDefinition returns undefined for unknown ids", () => {
assert.equal(getGateDefinition("Q99"), undefined);
assert.equal(getGateDefinition("not-a-gate"), undefined);
});
});
describe("assertGateCoverage", () => {
test("throws when a row is owned by a different turn", () => {
// Q8 is owned by complete-slice, not gate-evaluate — this used to be
// silently dropped by the old `if (!meta) continue;` filter, causing
// the evaluating-gates phase to stall.
assert.throws(
() => assertGateCoverage([{ gate_id: "Q8" }], "gate-evaluate"),
(err: Error) =>
err.message.includes("Q8") && err.message.includes("gate-evaluate"),
);
});
test("throws when a row has an unknown gate id", () => {
assert.throws(
() => assertGateCoverage([{ gate_id: "Q999" as GateId }], "gate-evaluate", { requireAll: false }),
(err: Error) => err.message.includes("Q999"),
);
});
test("throws when requireAll is true and an owned gate is missing", () => {
// gate-evaluate owns Q3 and Q4. Passing only Q3 should fail.
assert.throws(
() => assertGateCoverage([{ gate_id: "Q3" }], "gate-evaluate", { requireAll: true }),
(err: Error) => err.message.includes("Q4"),
);
});
test("passes when requireAll is false and only a subset is pending", () => {
// execute-task owns Q5/Q6/Q7, but a task with no external dependencies
// may only have Q7 seeded. That's still valid coverage.
assert.doesNotThrow(() =>
assertGateCoverage([{ gate_id: "Q7" }], "execute-task", { requireAll: false }),
);
});
test("passes when requireAll is true and every owned gate is pending", () => {
assert.doesNotThrow(() =>
assertGateCoverage(
[{ gate_id: "Q3" }, { gate_id: "Q4" }],
"gate-evaluate",
{ requireAll: true },
),
);
});
test("empty pending list passes when requireAll is false", () => {
assert.doesNotThrow(() =>
assertGateCoverage([], "complete-slice", { requireAll: false }),
);
});
});

View file

@ -0,0 +1,208 @@
/**
* Prompt-system gate coverage tests.
*
* These tests pin the invariants the plan file documents:
* 1. Every pending slice-scoped gate is routed to exactly one owner turn.
* Q8 (owned by complete-slice) MUST NOT leak into gate-evaluate and
* get silently dropped the way it used to before the registry landed.
* 2. getPendingGatesForTurn filters by the registry's owner turn, not
* just the DB scope column.
* 3. Output validators recognize artifacts that contain the required
* gate section headings, and flag ones that don't.
* 4. Prompt output produced by the validators reflects MV01-MV04.
*
* They also assert the VALIDATION.md renderer still produces headings
* matching the registry's promptSection strings, so future renderer
* edits that drift from the registry fail the suite loudly.
*/
import { describe, test, beforeEach, afterEach } from "node:test";
import assert from "node:assert/strict";
import { mkdtempSync, rmSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import {
openDatabase,
closeDatabase,
insertMilestone,
insertSlice,
insertTask,
insertGateRow,
getPendingGates,
getPendingGatesForTurn,
} from "../gsd-db.ts";
import {
GATE_REGISTRY,
getGatesForTurn,
type OwnerTurn,
} from "../gate-registry.ts";
import {
validateSliceSummaryOutput,
validateTaskSummaryOutput,
validateMilestoneValidationOutput,
validateGateSections,
} from "../prompt-validation.ts";
function setupTestDb(): string {
const tmpDir = mkdtempSync(join(tmpdir(), "prompt-gate-coverage-"));
const dbPath = join(tmpDir, "gsd.db");
openDatabase(dbPath);
insertMilestone({ id: "M001", title: "Test", status: "active" });
insertSlice({
milestoneId: "M001",
id: "S01",
title: "Test Slice",
status: "pending",
risk: "medium",
depends: [],
});
insertTask({
id: "T01",
sliceId: "S01",
milestoneId: "M001",
title: "Test Task",
status: "pending",
});
return tmpDir;
}
describe("getPendingGatesForTurn routes by owner turn, not scope column", () => {
let tmpDir: string;
beforeEach(() => {
tmpDir = setupTestDb();
});
afterEach(() => {
closeDatabase();
rmSync(tmpDir, { recursive: true, force: true });
});
test("Q8 stored as scope:'slice' is owned by complete-slice, not gate-evaluate", () => {
// Seed the three slice-scoped gates plan-slice writes today.
insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" });
insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", scope: "slice" });
insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q8", scope: "slice" });
// getPendingGates(..., "slice") returns all three (unchanged).
const allSlicePending = getPendingGates("M001", "S01", "slice");
assert.equal(allSlicePending.length, 3);
// But the turn-aware helper routes them correctly.
const gateEval = getPendingGatesForTurn("M001", "S01", "gate-evaluate");
assert.deepEqual(gateEval.map((g) => g.gate_id).sort(), ["Q3", "Q4"]);
const completeSlice = getPendingGatesForTurn("M001", "S01", "complete-slice");
assert.deepEqual(completeSlice.map((g) => g.gate_id), ["Q8"]);
});
test("task-scoped gates are scoped to the requested task id", () => {
insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T01" });
insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q6", scope: "task", taskId: "T01" });
insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T02" });
const t1 = getPendingGatesForTurn("M001", "S01", "execute-task", "T01");
assert.equal(t1.length, 2);
assert.ok(t1.every((g) => g.gate_id === "Q5" || g.gate_id === "Q6"));
const t2 = getPendingGatesForTurn("M001", "S01", "execute-task", "T02");
assert.equal(t2.length, 1);
assert.equal(t2[0].gate_id, "Q5");
});
});
describe("per-turn output validators", () => {
test("validateSliceSummaryOutput flags missing Operational Readiness", () => {
const md = `# S01: Test Slice\n\n## What Happened\nstuff\n\n## Verification\nstuff\n`;
const result = validateSliceSummaryOutput(md);
assert.equal(result.valid, false);
assert.ok(result.missing.some((m) => m.includes("Q8")));
assert.ok(result.missing.some((m) => m.includes("Operational Readiness")));
});
test("validateSliceSummaryOutput passes when Operational Readiness heading is present", () => {
const md = `# S01\n\n## Operational Readiness\n- Health: /health\n- Failure: alert\n`;
const result = validateSliceSummaryOutput(md);
assert.equal(result.valid, true);
assert.equal(result.missing.length, 0);
});
test("validateMilestoneValidationOutput requires all four MV headings", () => {
// Missing Requirement Coverage.
const md = [
"# Milestone Validation: M001",
"## Success Criteria Checklist",
"ok",
"## Slice Delivery Audit",
"ok",
"## Cross-Slice Integration",
"ok",
].join("\n\n");
const result = validateMilestoneValidationOutput(md);
assert.equal(result.valid, false);
assert.ok(result.missing.some((m) => m.includes("MV04")));
});
test("validateMilestoneValidationOutput passes for a complete VALIDATION.md", () => {
const md = [
"# Milestone Validation: M001",
"## Success Criteria Checklist",
"ok",
"## Slice Delivery Audit",
"ok",
"## Cross-Slice Integration",
"ok",
"## Requirement Coverage",
"ok",
].join("\n\n");
const result = validateMilestoneValidationOutput(md);
assert.equal(result.valid, true, `unexpected missing: ${result.missing.join(", ")}`);
});
test("validateTaskSummaryOutput flags missing task-gate sections", () => {
const md = `# T01\n\n## What Happened\nstuff\n\n## Verification\nstuff\n`;
const result = validateTaskSummaryOutput(md);
assert.equal(result.valid, false);
const idsInMissing = result.missing.join(" ");
assert.ok(idsInMissing.includes("Q5"));
assert.ok(idsInMissing.includes("Q6"));
assert.ok(idsInMissing.includes("Q7"));
});
test("validateGateSections returns empty missing when gate bucket is empty", () => {
// Build a phoney owner turn that owns nothing (simulate by validating
// against a real turn against an artifact containing every section).
const fullMd = getGatesForTurn("validate-milestone")
.map((g) => `## ${g.promptSection}\n\nstuff`)
.join("\n\n");
const result = validateGateSections(fullMd, "validate-milestone");
assert.equal(result.valid, true);
});
});
describe("registry / renderer parity", () => {
test("MV promptSections match the validate-milestone renderer H2 headings", () => {
// Mirror the string literals from tools/validate-milestone.ts
// renderValidationMarkdown() so a rename there flips this test red.
const expectedHeadings = [
"Success Criteria Checklist",
"Slice Delivery Audit",
"Cross-Slice Integration",
"Requirement Coverage",
];
const registryHeadings = getGatesForTurn("validate-milestone").map((g) => g.promptSection);
assert.deepEqual(registryHeadings.sort(), [...expectedHeadings].sort());
});
test("Q8 promptSection matches the complete-slice renderer H2 heading", () => {
// Mirror the slice-summary H2 introduced in tools/complete-slice.ts.
assert.equal(GATE_REGISTRY.Q8.promptSection, "Operational Readiness");
});
test("registry owner turns cover every turn gate-registry.ts declares", () => {
const ownerTurns = new Set<OwnerTurn>(Object.values(GATE_REGISTRY).map((g) => g.ownerTurn));
assert.ok(ownerTurns.has("gate-evaluate"));
assert.ok(ownerTurns.has("execute-task"));
assert.ok(ownerTurns.has("complete-slice"));
assert.ok(ownerTurns.has("validate-milestone"));
});
});

View file

@ -21,7 +21,10 @@ import {
getMilestone,
updateSliceStatus,
setSliceSummaryMd,
saveGateResult,
getPendingGatesForTurn,
} from "../gsd-db.js";
import { getGatesForTurn } from "../gate-registry.js";
import { resolveSliceFile, resolveSlicePath, clearPathCache } from "../paths.js";
import { checkOwnership, sliceUnitKey } from "../unit-ownership.js";
import { saveFile, clearParseCache } from "../files.js";
@ -39,6 +42,23 @@ export interface CompleteSliceResult {
uatPath: string;
}
/**
* Map a complete-slice-owned gate id to the CompleteSliceParams field
* whose presence drives `pass` vs. `omitted`. Keep this in lockstep with
* the gates declared in gate-registry.ts under ownerTurn "complete-slice".
*/
function sliceGateFieldForId(
id: string,
params: CompleteSliceParams,
): string | undefined {
switch (id) {
case "Q8":
return params.operationalReadiness;
default:
return undefined;
}
}
/**
* Render slice summary markdown matching the template format.
* YAML frontmatter uses snake_case keys for parseSummary() compatibility.
@ -169,6 +189,10 @@ ${reqSurfaced}
${reqInvalidated}
## Operational Readiness
${params.operationalReadiness?.trim() || "None."}
## Deviations
${params.deviations || "None."}
@ -330,6 +354,45 @@ export async function handleCompleteSlice(
// Store rendered markdown in DB for D004 recovery
setSliceSummaryMd(params.milestoneId, params.sliceId, summaryMd, uatMd);
// ── Close gates owned by complete-slice (Q8) ───────────────────────────
// Each owned gate maps to a specific summary section via the registry.
// If the caller populated the corresponding field, record `pass`; if the
// field is empty, record `omitted`. Without this loop, Q8 would stay
// pending forever and block future state derivation (see gate-registry).
try {
const pendingGates = getPendingGatesForTurn(
params.milestoneId,
params.sliceId,
"complete-slice",
);
if (pendingGates.length > 0) {
const ownedDefs = new Map(getGatesForTurn("complete-slice").map((g) => [g.id, g] as const));
for (const row of pendingGates) {
const def = ownedDefs.get(row.gate_id);
if (!def) continue;
// Map gate id → param field it maps to. Keep the map local so
// adding a new complete-slice gate is a single place change.
const field = sliceGateFieldForId(def.id, params);
const hasContent = typeof field === "string" && field.trim().length > 0;
saveGateResult({
milestoneId: params.milestoneId,
sliceId: params.sliceId,
gateId: def.id,
verdict: hasContent ? "pass" : "omitted",
rationale: hasContent
? `${def.promptSection} section populated in slice summary`
: `${def.promptSection} section left empty — recorded as omitted`,
findings: hasContent ? (field as string).trim() : "",
});
}
}
} catch (gateErr) {
logWarning(
"tool",
`complete-slice gate close warning for ${params.milestoneId}/${params.sliceId}: ${(gateErr as Error).message}`,
);
}
// Invalidate all caches
invalidateStateCache();
clearPathCache();

View file

@ -24,7 +24,10 @@ import {
updateTaskStatus,
setTaskSummaryMd,
deleteVerificationEvidence,
saveGateResult,
getPendingGatesForTurn,
} from "../gsd-db.js";
import { getGatesForTurn } from "../gate-registry.js";
import { resolveSliceFile, resolveTasksDir, clearPathCache } from "../paths.js";
import { checkOwnership, taskUnitKey } from "../unit-ownership.js";
import { saveFile, clearParseCache } from "../files.js";
@ -44,6 +47,27 @@ export interface CompleteTaskResult {
import type { TaskRow } from "../gsd-db.js";
/**
* Map an execute-task-owned gate id to the CompleteTaskParams field whose
* presence drives `pass` vs. `omitted`. Keep in lockstep with the gates
* declared in gate-registry.ts under ownerTurn "execute-task".
*/
function taskGateFieldForId(
id: string,
params: CompleteTaskParams,
): string | undefined {
switch (id) {
case "Q5":
return params.failureModes;
case "Q6":
return params.loadProfile;
case "Q7":
return params.negativeTests;
default:
return undefined;
}
}
/**
* Normalize a list parameter that may arrive as a string (newline-delimited
* bullet list from the LLM) into a string array (#3361).
@ -236,6 +260,45 @@ export async function handleCompleteTask(
// Store rendered markdown in DB for D004 recovery
setTaskSummaryMd(params.milestoneId, params.sliceId, params.taskId, summaryMd);
// ── Close gates owned by execute-task (Q5/Q6/Q7) for this task ────────
// Each gate id maps to a specific params field via taskGateFieldForId.
// When the model populates the field, record `pass`; when it's empty,
// record `omitted`. Task-scoped rows are filtered by taskId so a single
// task's completion doesn't touch sibling tasks' gate rows.
try {
const pendingGates = getPendingGatesForTurn(
params.milestoneId,
params.sliceId,
"execute-task",
params.taskId,
);
if (pendingGates.length > 0) {
const ownedDefs = new Map(getGatesForTurn("execute-task").map((g) => [g.id, g] as const));
for (const row of pendingGates) {
const def = ownedDefs.get(row.gate_id);
if (!def) continue;
const field = taskGateFieldForId(def.id, params);
const hasContent = typeof field === "string" && field.trim().length > 0;
saveGateResult({
milestoneId: params.milestoneId,
sliceId: params.sliceId,
taskId: params.taskId,
gateId: def.id,
verdict: hasContent ? "pass" : "omitted",
rationale: hasContent
? `${def.promptSection} section populated in task summary`
: `${def.promptSection} section left empty — recorded as omitted`,
findings: hasContent ? (field as string).trim() : "",
});
}
}
} catch (gateErr) {
logWarning(
"tool",
`complete-task gate close warning for ${params.milestoneId}/${params.sliceId}/${params.taskId}: ${(gateErr as Error).message}`,
);
}
// Invalidate all caches
invalidateStateCache();
clearPathCache();

View file

@ -8,6 +8,7 @@ import {
_getAdapter,
saveGateResult,
} from "../gsd-db.js";
import { GATE_REGISTRY } from "../gate-registry.js";
import { saveArtifactToDb } from "../db-writer.js";
import type { CompleteMilestoneParams } from "./complete-milestone.js";
import { handleCompleteMilestone } from "./complete-milestone.js";
@ -427,7 +428,9 @@ export async function executeSaveGateResult(
};
}
const validGates = ["Q3", "Q4", "Q5", "Q6", "Q7", "Q8"];
// Source of truth: gate-registry.ts. Every declared GateId is accepted,
// so adding a new gate in one place automatically flows through here.
const validGates = Object.keys(GATE_REGISTRY);
if (!validGates.includes(params.gateId)) {
return {
content: [{ type: "text", text: `Error: Invalid gateId "${params.gateId}". Must be one of: ${validGates.join(", ")}` }],

View file

@ -536,6 +536,24 @@ export interface CompleteTaskParams {
verdict: string;
durationMs: number;
}>;
/**
* Q5 failure-modes section content (what breaks when dependencies fail).
* Populated `pass`; omitted/empty `omitted`.
* @optional
*/
failureModes?: string;
/**
* Q6 load-profile section content (10x breakpoint + protection).
* Populated `pass`; omitted/empty `omitted`.
* @optional
*/
loadProfile?: string;
/**
* Q7 negative-tests section content (malformed inputs, error paths,
* boundaries). Populated `pass`; omitted/empty `omitted`.
* @optional
*/
negativeTests?: string;
/** Optional caller-provided identity for audit trail */
actorName?: string;
/** Optional caller-provided reason this action was triggered */
@ -584,6 +602,14 @@ export interface CompleteSliceParams {
affects?: string[];
/** @optional — defaults to [] when omitted */
drillDownPaths?: string[];
/**
* Q8 operational readiness section content (health signal, failure signal,
* recovery, monitoring gaps). When populated, the complete-slice handler
* records Q8 as `pass`; when omitted or empty, Q8 is recorded as `omitted`.
* See gate-registry.ts.
* @optional
*/
operationalReadiness?: string;
/** Optional caller-provided identity for audit trail */
actorName?: string;
/** Optional caller-provided reason this action was triggered */