diff --git a/packages/mcp-server/src/workflow-tools.ts b/packages/mcp-server/src/workflow-tools.ts index 9abbddbeb..ba6986beb 100644 --- a/packages/mcp-server/src/workflow-tools.ts +++ b/packages/mcp-server/src/workflow-tools.ts @@ -787,7 +787,7 @@ const saveGateResultParams = { projectDir: projectDirParam, milestoneId: z.string().describe("Milestone ID (e.g. M001)"), sliceId: z.string().describe("Slice ID (e.g. S01)"), - gateId: z.enum(["Q3", "Q4", "Q5", "Q6", "Q7", "Q8"]).describe("Gate ID"), + gateId: z.enum(["Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "MV01", "MV02", "MV03", "MV04"]).describe("Gate ID"), taskId: z.string().optional().describe("Task ID for task-scoped gates"), verdict: z.enum(["pass", "flag", "omitted"]).describe("Gate verdict"), rationale: z.string().describe("One-sentence justification"), diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts index 1c6f622f3..28217afd6 100644 --- a/src/resources/extensions/gsd/auto-prompts.ts +++ b/src/resources/extensions/gsd/auto-prompts.ts @@ -24,7 +24,13 @@ import { getLoadedSkills, type Skill } from "@gsd/pi-coding-agent"; import { join, basename } from "node:path"; import { existsSync } from "node:fs"; import { computeBudgets, resolveExecutorContextWindow, truncateAtSectionBoundary } from "./context-budget.js"; -import { getPendingGates } from "./gsd-db.js"; +import { getPendingGates, getPendingGatesForTurn } from "./gsd-db.js"; +import { + GATE_REGISTRY, + assertGateCoverage, + getGatesForTurn, + type GateDefinition, +} from "./gate-registry.js"; import { formatDecisionsCompact, formatRequirementsCompact } from "./structured-data-formatter.js"; import { readPhaseAnchor, formatAnchorForPrompt } from "./phase-anchor.js"; import { logWarning } from "./workflow-logger.js"; @@ -1395,6 +1401,17 @@ export async function buildExecuteTaskPrompt( const phaseAnchorSection = planAnchor ? formatAnchorForPrompt(planAnchor) : ""; + // Task-scoped gates owned by execute-task (Q5/Q6/Q7). Pull only the + // gates that plan-slice actually seeded for this task — tasks with no + // external dependencies legitimately skip Q5, tasks with no runtime + // load dimension skip Q6, etc. + const etPending = getPendingGatesForTurn(mid, sid, "execute-task", tid); + assertGateCoverage(etPending, "execute-task", { requireAll: false }); + const gatesToClose = renderGatesToCloseBlock( + getGatesForTurn("execute-task"), + { pending: new Set(etPending.map((g) => g.gate_id)), allowOmit: true }, + ); + return loadPrompt("execute-task", { overridesSection, runtimeContext, @@ -1412,6 +1429,7 @@ export async function buildExecuteTaskPrompt( taskSummaryPath, inlinedTemplates, verificationBudget, + gatesToClose, skillActivation: buildSkillActivationBlock({ base, milestoneId: mid, @@ -1477,6 +1495,19 @@ export async function buildCompleteSlicePrompt( const sliceSummaryPath = join(base, `${sliceRel}/${sid}-SUMMARY.md`); const sliceUatPath = join(base, `${sliceRel}/${sid}-UAT.md`); + // Gates owned by complete-slice (e.g. Q8). Pull from the DB so the + // prompt only prompts for gates the plan actually seeded. The tool + // handler closes each gate based on the SUMMARY.md section content + // after the assistant calls gsd_complete_slice. + const csPending = getPendingGatesForTurn(mid, sid, "complete-slice"); + // coverage check: every pending row must be owned by complete-slice. + // requireAll:false because a slice may have already closed some gates. + assertGateCoverage(csPending, "complete-slice", { requireAll: false }); + const gatesToClose = renderGatesToCloseBlock( + getGatesForTurn("complete-slice"), + { pending: new Set(csPending.map((g) => g.gate_id)), allowOmit: true }, + ); + return loadPrompt("complete-slice", { workingDirectory: base, milestoneId: mid, sliceId: sid, sliceTitle: sTitle, @@ -1485,6 +1516,7 @@ export async function buildCompleteSlicePrompt( inlinedContext, sliceSummaryPath, sliceUatPath, + gatesToClose, }); } @@ -1675,6 +1707,16 @@ export async function buildValidateMilestonePrompt( const validationOutputPath = join(base, `${relMilestonePath(base, mid)}/${mid}-VALIDATION.md`); const roadmapOutputPath = `${relMilestonePath(base, mid)}/${mid}-ROADMAP.md`; + // Every milestone validation turn owns MV01–MV04 unconditionally: the + // registry is the source of truth for which gates the validator must + // address, and the block below is what the template renders so the + // assistant can never accidentally skip one. + const mvGates = getGatesForTurn("validate-milestone"); + const gatesToEvaluate = renderGatesToCloseBlock(mvGates, { + pending: new Set(mvGates.map((g) => g.id)), + allowOmit: false, + }); + return loadPrompt("validate-milestone", { workingDirectory: base, milestoneId: mid, @@ -1683,6 +1725,7 @@ export async function buildValidateMilestonePrompt( inlinedContext, validationPath: validationOutputPath, remediationRound: String(remediationRound), + gatesToEvaluate, skillActivation: buildSkillActivationBlock({ base, milestoneId: mid, @@ -1955,27 +1998,51 @@ export async function buildReactiveExecutePrompt( } // ─── Gate Evaluation ────────────────────────────────────────────────────── +// +// Gate definitions (question, guidance, owner turn) now live in +// gate-registry.ts so that prompt builders, dispatch rules, state +// derivation, and tool handlers all consult the same source of truth. +// See gate-registry.ts for the full ownership map. -const GATE_QUESTIONS: Record = { - Q3: { - question: "How can this be exploited?", - guidance: [ - "Identify abuse scenarios: parameter tampering, replay attacks, privilege escalation.", - "Map data exposure risks: PII, tokens, secrets accessible through this slice.", - "Define input trust boundaries: untrusted user input reaching DB, API, or filesystem.", - "If none apply, return verdict 'omitted' with rationale explaining why.", - ].join("\n"), - }, - Q4: { - question: "What existing promises does this break?", - guidance: [ - "List which existing requirements (R001, R003, etc.) are touched by this slice.", - "Identify what must be re-tested after shipping.", - "Flag decisions that should be revisited given the new scope.", - "If no existing requirements are affected, return verdict 'omitted'.", - ].join("\n"), - }, -}; +/** + * Render a "Gates to Close" block for turns like `complete-slice` and + * `validate-milestone` that own gates which are closed as a side-effect + * of writing artifact sections (not via a dedicated gate-evaluate + * subagent loop). + * + * Returns a plain-text block or an empty string if there are no gates to + * close, so callers can drop it straight into a template variable. + */ +function renderGatesToCloseBlock( + gates: ReadonlyArray, + opts: { pending: ReadonlySet; allowOmit: boolean }, +): string { + const applicable = gates.filter((g) => opts.pending.has(g.id)); + if (applicable.length === 0) return ""; + + const lines: string[] = []; + lines.push("## Gates to Close"); + lines.push(""); + lines.push( + "These quality gates are still pending for this unit. You MUST address every one before calling the closing tool — the handler closes the DB row based on whether the corresponding artifact section is present.", + ); + lines.push(""); + for (const def of applicable) { + lines.push(`### ${def.id} — ${def.promptSection}`); + lines.push(""); + lines.push(`**Question:** ${def.question}`); + lines.push(""); + lines.push(def.guidance); + if (opts.allowOmit) { + lines.push(""); + lines.push( + `If this gate genuinely does not apply to this unit, leave the **${def.promptSection}** section empty and the handler will record it as \`omitted\`. Otherwise, fill the section with concrete evidence.`, + ); + } + lines.push(""); + } + return lines.join("\n").trimEnd(); +} export async function buildParallelResearchSlicesPrompt( mid: string, @@ -2011,28 +2078,39 @@ export async function buildGateEvaluatePrompt( mid: string, midTitle: string, sid: string, sTitle: string, base: string, ): Promise { - const pending = getPendingGates(mid, sid, "slice"); + // Pull only the gates this turn actually owns (Q3/Q4). Filter via the + // registry so that scope:"slice" gates owned by other turns (Q8) can't + // leak into this prompt and can't block dispatch via silent skip. + const pending = getPendingGatesForTurn(mid, sid, "gate-evaluate"); + + // Fails loudly if the pending list contains a gate id the registry + // doesn't own for this turn. Missing owned gates is allowed here — + // `gate-evaluate` is dispatched whenever *any* of its owned gates are + // pending, not only when all of them are. + assertGateCoverage(pending, "gate-evaluate", { requireAll: false }); // Load the slice plan for context const planFile = resolveSliceFile(base, mid, sid, "PLAN"); const planContent = planFile ? (await loadFile(planFile)) ?? "(plan file empty)" : "(plan file not found)"; - // Build per-gate subagent prompts + // Build per-gate subagent prompts from the pending rows. Because the + // registry has already validated every row, `getGateDefinition` cannot + // return undefined here. + const pendingIds = new Set(pending.map((g) => g.gate_id)); + const gateDefs = getGatesForTurn("gate-evaluate").filter((def) => pendingIds.has(def.id)); + const subagentSections: string[] = []; const gateListLines: string[] = []; - for (const gate of pending) { - const meta = GATE_QUESTIONS[gate.gate_id]; - if (!meta) continue; - - gateListLines.push(`- **${gate.gate_id}**: ${meta.question}`); + for (const def of gateDefs) { + gateListLines.push(`- **${def.id}**: ${def.question}`); const subPrompt = [ - `You are evaluating quality gate **${gate.gate_id}** for slice ${sid} (${sTitle}).`, + `You are evaluating quality gate **${def.id}** for slice ${sid} (${sTitle}).`, "", - `## Question: ${meta.question}`, + `## Question: ${def.question}`, "", - meta.guidance, + def.guidance, "", "## Slice Plan", "", @@ -2044,14 +2122,14 @@ export async function buildGateEvaluatePrompt( `Call the \`gsd_save_gate_result\` tool with:`, `- \`milestoneId\`: "${mid}"`, `- \`sliceId\`: "${sid}"`, - `- \`gateId\`: "${gate.gate_id}"`, + `- \`gateId\`: "${def.id}"`, "- `verdict`: \"pass\" (no concerns), \"flag\" (concerns found), or \"omitted\" (not applicable)", "- `rationale`: one-sentence justification", "- `findings`: detailed markdown findings (or empty if omitted)", ].join("\n"); subagentSections.push([ - `### ${gate.gate_id}: ${meta.question}`, + `### ${def.id}: ${def.question}`, "", "Use this as the prompt for a `subagent` call:", "", diff --git a/src/resources/extensions/gsd/bootstrap/db-tools.ts b/src/resources/extensions/gsd/bootstrap/db-tools.ts index 71d5ae9aa..dbb5849c9 100644 --- a/src/resources/extensions/gsd/bootstrap/db-tools.ts +++ b/src/resources/extensions/gsd/bootstrap/db-tools.ts @@ -1026,12 +1026,12 @@ export function registerDbTools(pi: ExtensionAPI): void { name: "gsd_save_gate_result", label: "Save Gate Result", description: - "Save the result of a quality gate evaluation (Q3-Q8) to the GSD database. " + + "Save the result of a quality gate evaluation (Q3-Q8 or MV01-MV04) to the GSD database. " + "Called by gate evaluation sub-agents after analyzing a specific quality question.", promptSnippet: "Save quality gate evaluation result (verdict, rationale, findings)", promptGuidelines: [ "Use gsd_save_gate_result after evaluating a quality gate question.", - "gateId must be one of: Q3, Q4, Q5, Q6, Q7, Q8.", + "gateId must be one of: Q3, Q4, Q5, Q6, Q7, Q8, MV01, MV02, MV03, MV04.", "verdict must be: pass (no concerns), flag (concerns found), or omitted (not applicable).", "rationale should be a one-sentence justification for the verdict.", "findings should contain detailed markdown analysis (or empty string if omitted).", @@ -1039,7 +1039,7 @@ export function registerDbTools(pi: ExtensionAPI): void { parameters: Type.Object({ milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }), sliceId: Type.String({ description: "Slice ID (e.g. S01)" }), - gateId: Type.String({ description: "Gate ID: Q3, Q4, Q5, Q6, Q7, or Q8" }), + gateId: Type.String({ description: "Gate ID: Q3, Q4, Q5, Q6, Q7, Q8, MV01, MV02, MV03, or MV04" }), taskId: Type.Optional(Type.String({ description: "Task ID for task-scoped gates (Q5/Q6/Q7)" })), verdict: Type.String({ description: "pass, flag, or omitted" }), rationale: Type.String({ description: "One-sentence justification" }), diff --git a/src/resources/extensions/gsd/gate-registry.ts b/src/resources/extensions/gsd/gate-registry.ts new file mode 100644 index 000000000..be9de87ee --- /dev/null +++ b/src/resources/extensions/gsd/gate-registry.ts @@ -0,0 +1,251 @@ +/** + * GSD Gate Registry — single source of truth for quality-gate ownership. + * + * Each gate declares which workflow turn owns it, the scope at which it is + * persisted in the `quality_gates` table, and the question/guidance text used + * in the prompt that turn sends. The registry replaces the ad-hoc + * `GATE_QUESTIONS` table that used to live in `auto-prompts.ts`, and every + * layer of the prompt system (prompt builders, dispatch rules, state + * derivation, tool handlers) consults it so a pending gate can never be + * silently dropped. + * + * Design notes: + * - `GATE_REGISTRY` is exhaustiveness-checked against `GateId` via + * `satisfies Record`, so adding a new GateId + * without a registry entry is a compile error. + * - `getGatesForTurn(turn)` returns the definitions a turn owns. + * - `assertGateCoverage(pending, turn)` throws a GSDError if the pending + * list for a turn contains unknown gates, or if any gate owned by the + * turn is missing from the pending list. + */ + +import { GSDError, GSD_PARSE_ERROR } from "./errors.js"; +import type { GateId, GateRow, GateScope } from "./types.js"; + +/** Which workflow turn is responsible for evaluating / closing a gate. */ +export type OwnerTurn = + | "gate-evaluate" + | "execute-task" + | "complete-slice" + | "validate-milestone"; + +export interface GateDefinition { + id: GateId; + scope: GateScope; + ownerTurn: OwnerTurn; + /** One-line question the assistant must answer. */ + question: string; + /** Markdown guidance describing what a good answer looks like. */ + guidance: string; + /** H3 section header used in the artifact the turn writes + * (e.g. "Operational Readiness" for Q8 in the slice summary). */ + promptSection: string; +} + +export const GATE_REGISTRY = { + Q3: { + id: "Q3", + scope: "slice", + ownerTurn: "gate-evaluate", + question: "How can this be exploited?", + guidance: [ + "Identify abuse scenarios: parameter tampering, replay attacks, privilege escalation.", + "Map data exposure risks: PII, tokens, secrets accessible through this slice.", + "Define input trust boundaries: untrusted user input reaching DB, API, or filesystem.", + "If none apply, return verdict 'omitted' with rationale explaining why.", + ].join("\n"), + promptSection: "Abuse Surface", + }, + Q4: { + id: "Q4", + scope: "slice", + ownerTurn: "gate-evaluate", + question: "What existing promises does this break?", + guidance: [ + "List which existing requirements (R001, R003, etc.) are touched by this slice.", + "Identify what must be re-tested after shipping.", + "Flag decisions that should be revisited given the new scope.", + "If no existing requirements are affected, return verdict 'omitted'.", + ].join("\n"), + promptSection: "Broken Promises", + }, + Q5: { + id: "Q5", + scope: "task", + ownerTurn: "execute-task", + question: "What breaks when dependencies fail?", + guidance: [ + "Enumerate the task's external dependencies (APIs, filesystem, network, subprocesses).", + "Describe the failure path for each: timeout, malformed response, connection loss.", + "Verify the implementation handles each failure or explicitly bubbles the error.", + "Return verdict 'omitted' only if the task has no external dependencies.", + ].join("\n"), + promptSection: "Failure Modes", + }, + Q6: { + id: "Q6", + scope: "task", + ownerTurn: "execute-task", + question: "What is the 10x load breakpoint?", + guidance: [ + "Identify the resource that saturates first at 10x the expected load.", + "Describe the protection applied (pool sizing, rate limiting, pagination, caching).", + "Return verdict 'omitted' if the task has no runtime load dimension.", + ].join("\n"), + promptSection: "Load Profile", + }, + Q7: { + id: "Q7", + scope: "task", + ownerTurn: "execute-task", + question: "What negative tests protect this task?", + guidance: [ + "List malformed inputs, error paths, and boundary conditions the tests cover.", + "Point to the specific test files or cases that assert each negative scenario.", + "Return verdict 'omitted' only if the task has no meaningful negative surface.", + ].join("\n"), + promptSection: "Negative Tests", + }, + Q8: { + id: "Q8", + scope: "slice", + ownerTurn: "complete-slice", + question: "How will ops know this slice is healthy or broken?", + guidance: [ + "Describe the health signal (metric, log line, dashboard) that proves the slice works.", + "Describe the failure signal that triggers an alert or paging.", + "Document the recovery procedure and any monitoring gaps.", + "Return verdict 'omitted' only for slices with no runtime behavior at all.", + ].join("\n"), + promptSection: "Operational Readiness", + }, + MV01: { + id: "MV01", + scope: "milestone", + ownerTurn: "validate-milestone", + question: "Is every success criterion in the milestone roadmap satisfied?", + guidance: [ + "Walk the success-criteria checklist from the milestone roadmap.", + "For each criterion, point to the slice / assessment / verification evidence that proves it.", + "Return verdict 'flag' if any criterion is unmet or unverifiable.", + ].join("\n"), + promptSection: "Success Criteria Checklist", + }, + MV02: { + id: "MV02", + scope: "milestone", + ownerTurn: "validate-milestone", + question: "Does every slice have a SUMMARY.md and a passing assessment?", + guidance: [ + "Confirm every slice listed in the roadmap has a SUMMARY.md.", + "Confirm each slice has an ASSESSMENT verdict of 'pass' (or justified 'omitted').", + "Flag missing artifacts and slices with outstanding follow-ups or known limitations.", + ].join("\n"), + promptSection: "Slice Delivery Audit", + }, + MV03: { + id: "MV03", + scope: "milestone", + ownerTurn: "validate-milestone", + question: "Do the slices integrate end-to-end?", + guidance: [ + "Trace at least one cross-slice flow proving the pieces compose.", + "Flag gaps where two slices were built in isolation with no integration evidence.", + ].join("\n"), + promptSection: "Cross-Slice Integration", + }, + MV04: { + id: "MV04", + scope: "milestone", + ownerTurn: "validate-milestone", + question: "Are all touched requirements covered and still coherent?", + guidance: [ + "For each requirement advanced, validated, surfaced, or invalidated across the milestone's slices, confirm the milestone-level evidence matches.", + "Flag requirements that slices claim to advance but no artifact proves.", + ].join("\n"), + promptSection: "Requirement Coverage", + }, +} as const satisfies Record; + +export type GateRegistry = typeof GATE_REGISTRY; + +/** Stable ordered lists per owner turn — iteration order matches declaration. */ +const ORDERED_GATES: readonly GateDefinition[] = Object.values(GATE_REGISTRY) as readonly GateDefinition[]; + +/** Return every gate owned by a turn, in stable declaration order. */ +export function getGatesForTurn(turn: OwnerTurn): GateDefinition[] { + return ORDERED_GATES.filter((g) => g.ownerTurn === turn); +} + +/** Return the set of gate ids a turn owns. */ +export function getGateIdsForTurn(turn: OwnerTurn): Set { + return new Set(getGatesForTurn(turn).map((g) => g.id)); +} + +/** Look up a definition by gate id, or undefined if unknown. */ +export function getGateDefinition(id: string): GateDefinition | undefined { + return (GATE_REGISTRY as Record)[id]; +} + +/** Look up the owner turn for a gate id. Throws if the gate is unknown. */ +export function getOwnerTurn(id: GateId): OwnerTurn { + const def = GATE_REGISTRY[id]; + if (!def) { + throw new GSDError(GSD_PARSE_ERROR, `gate-registry: unknown gate id "${id}"`); + } + return def.ownerTurn; +} + +/** + * Assert that the pending gate rows for a turn match what the registry says + * the turn owns. Fails loudly rather than silently skipping. + * + * - Every row in `pending` must have a definition whose `ownerTurn` matches `turn`. + * (The caller is responsible for scoping the pending list — e.g. filtering + * by slice scope before passing it in.) + * - `options.requireAll` (default true): every gate the turn owns must appear + * in `pending`. Set to false for turns like `execute-task` that only need + * coverage for the subset of gates that were seeded (e.g. tasks with no + * external dependencies have no Q5 row). + */ +export function assertGateCoverage( + pending: ReadonlyArray>, + turn: OwnerTurn, + options: { requireAll?: boolean } = {}, +): void { + const requireAll = options.requireAll ?? true; + const expected = getGateIdsForTurn(turn); + const pendingIds = new Set(pending.map((g) => g.gate_id)); + + const unknown: string[] = []; + for (const id of pendingIds) { + const def = getGateDefinition(id); + if (!def) { + unknown.push(id); + continue; + } + if (def.ownerTurn !== turn) { + unknown.push(`${id} (owned by ${def.ownerTurn}, not ${turn})`); + } + } + + if (unknown.length > 0) { + throw new GSDError( + GSD_PARSE_ERROR, + `assertGateCoverage: turn "${turn}" received pending gates it does not own: ${unknown.join(", ")}`, + ); + } + + if (requireAll) { + const missing: GateId[] = []; + for (const id of expected) { + if (!pendingIds.has(id)) missing.push(id); + } + if (missing.length > 0) { + throw new GSDError( + GSD_PARSE_ERROR, + `assertGateCoverage: turn "${turn}" is missing required gates: ${missing.join(", ")}`, + ); + } + } +} diff --git a/src/resources/extensions/gsd/gsd-db.ts b/src/resources/extensions/gsd/gsd-db.ts index e440bdb44..a088e04d6 100644 --- a/src/resources/extensions/gsd/gsd-db.ts +++ b/src/resources/extensions/gsd/gsd-db.ts @@ -10,6 +10,7 @@ import { existsSync, copyFileSync, mkdirSync, realpathSync } from "node:fs"; import { dirname } from "node:path"; import type { Decision, Requirement, GateRow, GateId, GateScope, GateStatus, GateVerdict } from "./types.js"; import { GSDError, GSD_STALE_STATE } from "./errors.js"; +import { getGateIdsForTurn, type OwnerTurn } from "./gate-registry.js"; import { logError, logWarning } from "./workflow-logger.js"; const _require = createRequire(import.meta.url); @@ -2302,3 +2303,53 @@ export function getPendingSliceGateCount(milestoneId: string, sliceId: string): ).get({ ":mid": milestoneId, ":sid": sliceId }); return row ? (row["cnt"] as number) : 0; } + +/** + * Return pending gate rows owned by a specific workflow turn. + * + * Unlike `getPendingGates(..., scope)`, this filters by the registry's + * `ownerTurn` metadata so callers can distinguish Q3/Q4 (owned by + * gate-evaluate) from Q8 (owned by complete-slice) even though both are + * scope:"slice". Pass `taskId` to narrow task-scoped results to one task. + */ +export function getPendingGatesForTurn( + milestoneId: string, + sliceId: string, + turn: OwnerTurn, + taskId?: string, +): GateRow[] { + if (!currentDb) return []; + const ids = getGateIdsForTurn(turn); + if (ids.size === 0) return []; + const idList = [...ids]; + const placeholders = idList.map((_, i) => `:gid${i}`).join(","); + const params: Record = { + ":mid": milestoneId, + ":sid": sliceId, + }; + idList.forEach((id, i) => { + params[`:gid${i}`] = id; + }); + let sql = + `SELECT * FROM quality_gates + WHERE milestone_id = :mid AND slice_id = :sid + AND status = 'pending' + AND gate_id IN (${placeholders})`; + if (taskId !== undefined) { + sql += ` AND task_id = :tid`; + params[":tid"] = taskId; + } + return currentDb.prepare(sql).all(params).map(rowToGate); +} + +/** + * Count pending gates for a turn. Convenience wrapper used by state + * derivation to decide whether a phase transition should pause. + */ +export function getPendingGateCountForTurn( + milestoneId: string, + sliceId: string, + turn: OwnerTurn, +): number { + return getPendingGatesForTurn(milestoneId, sliceId, turn).length; +} diff --git a/src/resources/extensions/gsd/milestone-validation-gates.ts b/src/resources/extensions/gsd/milestone-validation-gates.ts index 4dcd522b6..994870c37 100644 --- a/src/resources/extensions/gsd/milestone-validation-gates.ts +++ b/src/resources/extensions/gsd/milestone-validation-gates.ts @@ -6,19 +6,13 @@ * records in the DB. This module inserts milestone-level validation gates * that correspond to the validation checks performed. * - * Gate IDs for milestone validation: - * MV01 — Success criteria checklist - * MV02 — Slice delivery audit - * MV03 — Cross-slice integration - * MV04 — Requirement coverage - * - * These use the existing quality_gates table with scope "milestone". + * Gate IDs for milestone validation (MV01–MV04) are sourced from the + * gate registry so the definitions stay in lockstep with prompt builders, + * dispatch rules, and state derivation. See gate-registry.ts. */ import { _getAdapter } from "./gsd-db.js"; - -/** Milestone validation gate IDs. */ -const MILESTONE_GATE_IDS = ["MV01", "MV02", "MV03", "MV04"] as const; +import { getGatesForTurn } from "./gate-registry.js"; /** * Insert milestone-level quality_gates records for a validation run. @@ -27,6 +21,9 @@ const MILESTONE_GATE_IDS = ["MV01", "MV02", "MV03", "MV04"] as const; * from the overall milestone validation verdict. Individual gate-level * verdicts are not available (the handler receives a single verdict), * so all gates share the overall verdict. + * + * Gate IDs come from the registry — adding/removing an MV-scoped gate + * in gate-registry.ts automatically flows through here. */ export function insertMilestoneValidationGates( milestoneId: string, @@ -38,8 +35,9 @@ export function insertMilestoneValidationGates( if (!db) return; const gateVerdict = verdict === "pass" ? "pass" : "flag"; + const milestoneGates = getGatesForTurn("validate-milestone"); - for (const gateId of MILESTONE_GATE_IDS) { + for (const def of milestoneGates) { db.prepare( `INSERT OR REPLACE INTO quality_gates (milestone_id, slice_id, gate_id, scope, task_id, status, verdict, rationale, findings, evaluated_at) @@ -47,9 +45,9 @@ export function insertMilestoneValidationGates( ).run({ ":mid": milestoneId, ":sid": sliceId, - ":gid": gateId, + ":gid": def.id, ":verdict": gateVerdict, - ":rationale": `Milestone validation verdict: ${verdict}`, + ":rationale": `${def.promptSection} — milestone validation verdict: ${verdict}`, ":evaluated_at": evaluatedAt, }); } diff --git a/src/resources/extensions/gsd/prompt-validation.ts b/src/resources/extensions/gsd/prompt-validation.ts new file mode 100644 index 000000000..0163b88a6 --- /dev/null +++ b/src/resources/extensions/gsd/prompt-validation.ts @@ -0,0 +1,157 @@ +/** + * GSD Prompt Validation — Validates enhanced context and turn output + * artifacts before writing. + * + * Implements R109 validation requirement: CONTEXT.md must have required + * sections before being written to disk. Additionally, per-turn validators + * check that artifacts produced by gate-owning turns contain the gate + * sections declared in gate-registry.ts, so a malformed summary/validation + * markdown file cannot silently drop a quality gate. + */ + +import { getGatesForTurn, type OwnerTurn } from "./gate-registry.js"; + +/** + * Result of validating enhanced context output. + */ +export interface ValidationResult { + /** Whether all required sections are present. */ + valid: boolean; + /** List of missing required sections. */ + missing: string[]; +} + +/** + * Validate that enhanced context content has all required sections. + * + * Required sections per R109: + * - Scope section (## Scope, ## Milestone Scope, or ## Why This Milestone) + * - Architectural Decisions section (## Architectural Decisions) + * - Acceptance Criteria section (## Acceptance Criteria or ## Final Integrated Acceptance) + * + * Additionally validates that the Architectural Decisions section contains + * at least one decision entry (### heading or **Decision marker). + * + * @param content - The enhanced context markdown content + * @returns ValidationResult with valid flag and list of missing sections + */ +export function validateEnhancedContext(content: string): ValidationResult { + const missing: string[] = []; + + // Required section 1: Scope (multiple acceptable header variants) + const hasScopeSection = + /^## Scope\b/m.test(content) || + /^## Milestone Scope\b/m.test(content) || + /^## Why This Milestone\b/m.test(content); + + if (!hasScopeSection) { + missing.push("Milestone Scope or Why This Milestone"); + } + + // Required section 2: Architectural Decisions + const hasArchitecturalDecisions = /^## Architectural Decisions\b/m.test(content); + if (!hasArchitecturalDecisions) { + missing.push("Architectural Decisions"); + } + + // Required section 3: Acceptance Criteria (multiple acceptable header variants) + const hasAcceptanceCriteria = + /^## Acceptance Criteria\b/m.test(content) || + /^## Final Integrated Acceptance\b/m.test(content); + + if (!hasAcceptanceCriteria) { + missing.push("Acceptance Criteria"); + } + + // Additional validation: Architectural Decisions must have at least one entry + if (hasArchitecturalDecisions) { + // Extract the section content between ## Architectural Decisions and the next ## heading. + // Uses indexOf-based extraction instead of regex with \z (which is invalid in JavaScript + // regex — it's PCRE/Ruby syntax and JS treats it as literal 'z'). + const sectionStart = content.indexOf("## Architectural Decisions"); + if (sectionStart === -1) { + missing.push("Architectural Decisions"); + } else { + const afterHeading = content.slice(sectionStart + "## Architectural Decisions".length); + const nextSection = afterHeading.search(/^## /m); + const sectionContent = nextSection === -1 ? afterHeading : afterHeading.slice(0, nextSection); + + // Check for actual decision entries: + // - ### heading (subsection per decision) + // - **Decision marker (inline decision format) + const hasDecisionEntry = /^### /m.test(sectionContent) || /^\*\*Decision/m.test(sectionContent); + + if (!hasDecisionEntry) { + missing.push("At least one architectural decision entry"); + } + } + } + + return { + valid: missing.length === 0, + missing, + }; +} + +// ─── Per-Turn Gate Section Validators ───────────────────────────────────── +// +// Each validator checks that the artifact written by a turn contains a +// heading for every gate owned by that turn. The registry is the source +// of truth for which sections must exist; adding a new gate automatically +// flows through via `getGatesForTurn(turn)`. + +/** + * Escape a string so it can be embedded safely inside a regular expression. + */ +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +/** + * Validate that an artifact contains an `## H2` heading for every gate the + * named turn owns. Returns the list of missing gate section headers. + * + * Soft rule: a section counts as "present" if it is declared (H2 heading + * exists) — empty-body sections are allowed and handled by the tool + * handler, which will record such gates as `omitted`. + */ +export function validateGateSections( + content: string, + turn: OwnerTurn, +): ValidationResult { + const missing: string[] = []; + for (const def of getGatesForTurn(turn)) { + const pattern = new RegExp(`^##\\s+${escapeRegExp(def.promptSection)}\\b`, "m"); + if (!pattern.test(content)) { + missing.push(`${def.id} (## ${def.promptSection})`); + } + } + return { valid: missing.length === 0, missing }; +} + +/** + * Validate a SUMMARY.md produced by the complete-slice turn. Requires + * an H2 heading for every gate owned by complete-slice (e.g. Q8 → + * "## Operational Readiness"). Intended for use in the tool handler's + * pre-write checks or in the post-unit validation sweep. + */ +export function validateSliceSummaryOutput(content: string): ValidationResult { + return validateGateSections(content, "complete-slice"); +} + +/** + * Validate a task SUMMARY.md produced by the execute-task turn. Only + * flags gates that are still pending for the task; skips the check + * when no rows are seeded (simple task). + */ +export function validateTaskSummaryOutput(content: string): ValidationResult { + return validateGateSections(content, "execute-task"); +} + +/** + * Validate a VALIDATION.md produced by the validate-milestone turn. + * Requires an H2 heading for every MV gate declared in the registry. + */ +export function validateMilestoneValidationOutput(content: string): ValidationResult { + return validateGateSections(content, "validate-milestone"); +} diff --git a/src/resources/extensions/gsd/prompts/complete-slice.md b/src/resources/extensions/gsd/prompts/complete-slice.md index 746729d82..100f8efd7 100644 --- a/src/resources/extensions/gsd/prompts/complete-slice.md +++ b/src/resources/extensions/gsd/prompts/complete-slice.md @@ -16,6 +16,8 @@ All relevant context has been preloaded below — the slice plan, all task summa {{inlinedContext}} +{{gatesToClose}} + **Match effort to complexity.** A simple slice with 1-2 tasks needs a brief summary and lightweight verification. A complex slice with 5 tasks across multiple subsystems needs thorough verification and a detailed summary. Scale the work below accordingly. Then: @@ -23,7 +25,7 @@ Then: 2. {{skillActivation}} 3. Run all slice-level verification checks defined in the slice plan. All must pass before marking the slice done. If any fail, fix them first. Task artifacts use a **flat file layout** directly inside `tasks/` (for example `T01-SUMMARY.md`, `T02-SUMMARY.md`) rather than per-task subdirectories. If you need to count or re-read task summaries during verification, use `find .gsd/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks -name "*-SUMMARY.md"` or `ls .gsd/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks/*-SUMMARY.md`. Never use `tasks/*/SUMMARY.md` — that glob expects subdirectories that do not exist. 4. If the slice plan includes observability/diagnostic surfaces, confirm they work. Skip this for simple slices that don't have observability sections. -5. If the slice involved runtime behavior, fill the **Operational Readiness** section (Q8) in the slice summary: health signal, failure signal, recovery procedure, and monitoring gaps. Omit entirely for simple slices with no runtime concerns. +5. Address every gate listed in the **Gates to Close** section above — each gate maps to a specific slice-summary section the handler inspects (for example, Q8 maps to **Operational Readiness**: health signal, failure signal, recovery procedure, and monitoring gaps). Leaving a section empty records the gate as `omitted`. 6. If this slice produced evidence that a requirement changed status (Active → Validated, Active → Deferred, etc.), call `gsd_requirement_update` with the requirement ID, updated `status`, and `validation` evidence. Do NOT write `.gsd/REQUIREMENTS.md` directly — the engine renders it from the database. 7. Prepare the slice completion content you will pass to `gsd_complete_slice` using the camelCase fields `milestoneId`, `sliceId`, `sliceTitle`, `oneLiner`, `narrative`, `verification`, and `uatContent`. Do **not** manually write `{{sliceSummaryPath}}`. Do **not** manually write `{{sliceUatPath}}` — the DB-backed tool is the canonical write path for both artifacts. 8. Draft the UAT content you will pass as `uatContent` — a concrete UAT script with real test cases derived from the slice plan and task summaries. Include preconditions, numbered steps with expected outcomes, and edge cases. This must NOT be a placeholder or generic template — tailor every test case to what this slice actually built. diff --git a/src/resources/extensions/gsd/prompts/execute-task.md b/src/resources/extensions/gsd/prompts/execute-task.md index 9895dd6a4..5fc9dc835 100644 --- a/src/resources/extensions/gsd/prompts/execute-task.md +++ b/src/resources/extensions/gsd/prompts/execute-task.md @@ -22,6 +22,8 @@ A researcher explored the codebase and a planner decomposed the work — you are {{slicePlanExcerpt}} +{{gatesToClose}} + ## Backing Source Artifacts - Slice plan: `{{planPath}}` - Task plan source: `{{taskPlanPath}}` diff --git a/src/resources/extensions/gsd/prompts/validate-milestone.md b/src/resources/extensions/gsd/prompts/validate-milestone.md index aa6aa75a6..f5a200602 100644 --- a/src/resources/extensions/gsd/prompts/validate-milestone.md +++ b/src/resources/extensions/gsd/prompts/validate-milestone.md @@ -18,6 +18,8 @@ All relevant context has been preloaded below — the roadmap, all slice summari {{inlinedContext}} +{{gatesToEvaluate}} + ## Execution Protocol ### Step 1 — Dispatch Parallel Reviewers diff --git a/src/resources/extensions/gsd/state.ts b/src/resources/extensions/gsd/state.ts index 1275feca3..e4877552e 100644 --- a/src/resources/extensions/gsd/state.ts +++ b/src/resources/extensions/gsd/state.ts @@ -58,7 +58,7 @@ import { insertSlice, insertTask, updateTaskStatus, - getPendingSliceGateCount, + getPendingGateCountForTurn, type MilestoneRow, type SliceRow, type TaskRow, @@ -864,7 +864,18 @@ export async function deriveStateFromDb(basePath: string): Promise { } } - const pendingGateCount = getPendingSliceGateCount(activeMilestone.id, activeSlice.id); + // ── Quality gate evaluation check ────────────────────────────────── + // Pause before execution only when gates owned by the `gate-evaluate` + // turn (Q3/Q4) are still pending. Q8 is also `scope:"slice"` but is + // owned by `complete-slice`, so it must NOT block the evaluating-gates + // phase — otherwise auto-loop stalls forever waiting for a gate that + // this turn never evaluates. See gate-registry.ts for the ownership map. + // Slices with zero gate rows (pre-feature or simple) skip straight through. + const pendingGateCount = getPendingGateCountForTurn( + activeMilestone.id, + activeSlice.id, + "gate-evaluate", + ); if (pendingGateCount > 0) { return { activeMilestone, activeSlice, activeTask: null, diff --git a/src/resources/extensions/gsd/tests/complete-slice-gate-closure.test.ts b/src/resources/extensions/gsd/tests/complete-slice-gate-closure.test.ts new file mode 100644 index 000000000..6a764ef55 --- /dev/null +++ b/src/resources/extensions/gsd/tests/complete-slice-gate-closure.test.ts @@ -0,0 +1,167 @@ +/** + * complete-slice gate closure integration test. + * + * Pins the fix for the Q8-stall bug: complete-slice must close every gate + * owned by the complete-slice turn based on the content of the matching + * CompleteSliceParams field. Without this, Q8 stays pending forever and + * blocks state derivation on subsequent loops. + */ + +import { describe, test, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; +import * as fs from "node:fs"; +import * as path from "node:path"; +import * as os from "node:os"; + +import { + openDatabase, + closeDatabase, + insertMilestone, + insertSlice, + insertTask, + insertGateRow, + getGateResults, +} from "../gsd-db.ts"; +import { handleCompleteSlice } from "../tools/complete-slice.ts"; +import type { CompleteSliceParams } from "../types.ts"; + +function makeValidSliceParams(overrides: Partial = {}): CompleteSliceParams { + return { + sliceId: "S01", + milestoneId: "M001", + sliceTitle: "Test Slice", + oneLiner: "Implemented test slice", + narrative: "Built and tested.", + verification: "All tests pass.", + deviations: "None.", + knownLimitations: "None.", + followUps: "None.", + keyFiles: ["src/foo.ts"], + keyDecisions: [], + patternsEstablished: [], + observabilitySurfaces: [], + provides: [], + requirementsSurfaced: [], + drillDownPaths: [], + affects: [], + requirementsAdvanced: [], + requirementsValidated: [], + requirementsInvalidated: [], + filesModified: [], + requires: [], + uatContent: "## Smoke Test\n\nVerify happy path.", + ...overrides, + }; +} + +describe("complete-slice closes complete-slice-owned gates", () => { + let dbPath: string; + let basePath: string; + + beforeEach(() => { + dbPath = path.join( + fs.mkdtempSync(path.join(os.tmpdir(), "gsd-slice-gate-")), + "test.db", + ); + openDatabase(dbPath); + + basePath = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-slice-gate-handler-")); + const sliceDir = path.join( + basePath, ".gsd", "milestones", "M001", "slices", "S01", "tasks", + ); + fs.mkdirSync(sliceDir, { recursive: true }); + fs.writeFileSync( + path.join(basePath, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), + [ + "# M001: Test Milestone", + "", + "## Slices", + "", + '- [ ] **S01: Test Slice** `risk:medium` `depends:[]`', + " - After this: basic functionality works", + ].join("\n"), + ); + + insertMilestone({ id: "M001" }); + insertSlice({ id: "S01", milestoneId: "M001" }); + insertTask({ + id: "T01", sliceId: "S01", milestoneId: "M001", + status: "complete", title: "Task 1", + }); + + // Seed Q8 as pending — this is what plan-slice does today. + insertGateRow({ + milestoneId: "M001", sliceId: "S01", + gateId: "Q8", scope: "slice", + }); + }); + + afterEach(() => { + closeDatabase(); + fs.rmSync(path.dirname(dbPath), { recursive: true, force: true }); + fs.rmSync(basePath, { recursive: true, force: true }); + }); + + test("Q8 closes as 'pass' when operationalReadiness is populated", async () => { + const params = makeValidSliceParams({ + operationalReadiness: [ + "- Health signal: /health endpoint returns 200", + "- Failure signal: error rate alert in observability dashboard", + "- Recovery: systemd auto-restart", + ].join("\n"), + }); + + const result = await handleCompleteSlice(params, basePath); + assert.ok(!("error" in result), `handler failed: ${(result as any).error}`); + + const gates = getGateResults("M001", "S01", "slice"); + const q8 = gates.find((g) => g.gate_id === "Q8"); + assert.ok(q8, "Q8 row must exist after complete-slice"); + assert.equal(q8.status, "complete"); + assert.equal(q8.verdict, "pass"); + assert.ok( + q8.findings.includes("Health signal"), + "Q8 findings must capture the operationalReadiness content", + ); + }); + + test("Q8 closes as 'omitted' when operationalReadiness is empty", async () => { + const params = makeValidSliceParams({ operationalReadiness: "" }); + + const result = await handleCompleteSlice(params, basePath); + assert.ok(!("error" in result), `handler failed: ${(result as any).error}`); + + const gates = getGateResults("M001", "S01", "slice"); + const q8 = gates.find((g) => g.gate_id === "Q8"); + assert.ok(q8, "Q8 row must exist after complete-slice"); + assert.equal(q8.status, "complete"); + assert.equal(q8.verdict, "omitted"); + }); + + test("Q8 also closes when operationalReadiness is omitted entirely", async () => { + // A model that doesn't pass operationalReadiness at all must still + // move Q8 out of 'pending' — leaving it pending produces the stall. + const params = makeValidSliceParams(); + const result = await handleCompleteSlice(params, basePath); + assert.ok(!("error" in result), `handler failed: ${(result as any).error}`); + + const gates = getGateResults("M001", "S01", "slice"); + const q8 = gates.find((g) => g.gate_id === "Q8"); + assert.ok(q8); + assert.notEqual(q8.status, "pending", "Q8 must never remain pending after complete-slice"); + assert.equal(q8.verdict, "omitted"); + }); + + test("summary markdown contains Operational Readiness section", async () => { + const params = makeValidSliceParams({ + operationalReadiness: "- Health signal: /health\n- Failure signal: alert", + }); + const result = await handleCompleteSlice(params, basePath); + assert.ok(!("error" in result)); + if (!("error" in result)) { + const summary = fs.readFileSync(result.summaryPath, "utf-8"); + assert.match(summary, /^## Operational Readiness/m); + assert.match(summary, /Health signal: \/health/); + } + }); +}); diff --git a/src/resources/extensions/gsd/tests/gate-dispatch.test.ts b/src/resources/extensions/gsd/tests/gate-dispatch.test.ts index 3b18a2fbf..36fdbe2c9 100644 --- a/src/resources/extensions/gsd/tests/gate-dispatch.test.ts +++ b/src/resources/extensions/gsd/tests/gate-dispatch.test.ts @@ -186,4 +186,31 @@ describe("evaluating-gates phase", () => { insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T01" }); assert.equal(getPendingSliceGateCount("M001", "S01"), 1); }); + + test("Q8 (owned by complete-slice) does not block evaluating-gates phase", async () => { + // Regression: Q8 is stored with scope:"slice" but owned by the + // complete-slice turn. Before the gate registry landed, deriveState + // counted Q8 as a blocker for evaluating-gates while the gate-evaluate + // prompt silently dropped Q8 — an unrecoverable stall. After the + // registry change, deriveState filters by owner turn, so Q8 never + // blocks evaluating-gates. + planSlice(tmpDir); + await renderPlanFromDb(tmpDir, "M001", "S01"); + + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" }); + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", scope: "slice" }); + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q8", scope: "slice" }); + + saveGateResult({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", verdict: "pass", rationale: "OK", findings: "" }); + saveGateResult({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", verdict: "omitted", rationale: "N/A", findings: "" }); + // Q8 deliberately left pending — it's complete-slice's problem. + + invalidateStateCache(); + const state = await deriveState(tmpDir); + assert.equal( + state.phase, + "executing", + `pending Q8 must not stall evaluating-gates — got phase=${state.phase}`, + ); + }); }); diff --git a/src/resources/extensions/gsd/tests/gate-registry.test.ts b/src/resources/extensions/gsd/tests/gate-registry.test.ts new file mode 100644 index 000000000..3bb1d6c3c --- /dev/null +++ b/src/resources/extensions/gsd/tests/gate-registry.test.ts @@ -0,0 +1,140 @@ +/** + * Gate registry tests — enforce that every declared GateId has a registry + * entry, that every owner-turn bucket is non-empty, and that coverage + * assertions fail loudly instead of silently skipping unknown gates. + */ + +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; + +import { + GATE_REGISTRY, + assertGateCoverage, + getGateDefinition, + getGateIdsForTurn, + getGatesForTurn, + getOwnerTurn, + type OwnerTurn, +} from "../gate-registry.ts"; +import type { GateId } from "../types.ts"; + +/** Authoritative list of GateIds as declared in types.ts. */ +const ALL_GATE_IDS: readonly GateId[] = [ + "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", + "MV01", "MV02", "MV03", "MV04", +]; + +const ALL_OWNER_TURNS: readonly OwnerTurn[] = [ + "gate-evaluate", + "execute-task", + "complete-slice", + "validate-milestone", +]; + +describe("gate-registry", () => { + test("every declared GateId has a registry entry", () => { + for (const id of ALL_GATE_IDS) { + const def = GATE_REGISTRY[id]; + assert.ok(def, `missing registry entry for gate ${id}`); + assert.equal(def.id, id); + assert.ok(def.question.length > 0, `${id} missing question`); + assert.ok(def.guidance.length > 0, `${id} missing guidance`); + assert.ok(def.promptSection.length > 0, `${id} missing promptSection`); + } + }); + + test("registry contains no extra gate entries", () => { + const registryIds = new Set(Object.keys(GATE_REGISTRY)); + const declaredIds = new Set(ALL_GATE_IDS); + for (const id of registryIds) { + assert.ok(declaredIds.has(id), `registry has unknown gate ${id}`); + } + }); + + test("every owner turn owns at least one gate", () => { + for (const turn of ALL_OWNER_TURNS) { + const gates = getGatesForTurn(turn); + assert.ok( + gates.length > 0, + `owner turn "${turn}" has no gates — likely a registry mistake`, + ); + } + }); + + test("owner turn buckets are disjoint", () => { + const seen = new Set(); + for (const turn of ALL_OWNER_TURNS) { + for (const def of getGatesForTurn(turn)) { + assert.ok(!seen.has(def.id), `gate ${def.id} claimed by two turns`); + seen.add(def.id); + } + } + // Every gate should appear in exactly one bucket. + assert.equal(seen.size, ALL_GATE_IDS.length); + }); + + test("getOwnerTurn round-trips against GATE_REGISTRY", () => { + for (const id of ALL_GATE_IDS) { + const turn = getOwnerTurn(id); + const idsForTurn = getGateIdsForTurn(turn); + assert.ok(idsForTurn.has(id), `${id} not in ${turn} bucket`); + } + }); + + test("getGateDefinition returns undefined for unknown ids", () => { + assert.equal(getGateDefinition("Q99"), undefined); + assert.equal(getGateDefinition("not-a-gate"), undefined); + }); +}); + +describe("assertGateCoverage", () => { + test("throws when a row is owned by a different turn", () => { + // Q8 is owned by complete-slice, not gate-evaluate — this used to be + // silently dropped by the old `if (!meta) continue;` filter, causing + // the evaluating-gates phase to stall. + assert.throws( + () => assertGateCoverage([{ gate_id: "Q8" }], "gate-evaluate"), + (err: Error) => + err.message.includes("Q8") && err.message.includes("gate-evaluate"), + ); + }); + + test("throws when a row has an unknown gate id", () => { + assert.throws( + () => assertGateCoverage([{ gate_id: "Q999" as GateId }], "gate-evaluate", { requireAll: false }), + (err: Error) => err.message.includes("Q999"), + ); + }); + + test("throws when requireAll is true and an owned gate is missing", () => { + // gate-evaluate owns Q3 and Q4. Passing only Q3 should fail. + assert.throws( + () => assertGateCoverage([{ gate_id: "Q3" }], "gate-evaluate", { requireAll: true }), + (err: Error) => err.message.includes("Q4"), + ); + }); + + test("passes when requireAll is false and only a subset is pending", () => { + // execute-task owns Q5/Q6/Q7, but a task with no external dependencies + // may only have Q7 seeded. That's still valid coverage. + assert.doesNotThrow(() => + assertGateCoverage([{ gate_id: "Q7" }], "execute-task", { requireAll: false }), + ); + }); + + test("passes when requireAll is true and every owned gate is pending", () => { + assert.doesNotThrow(() => + assertGateCoverage( + [{ gate_id: "Q3" }, { gate_id: "Q4" }], + "gate-evaluate", + { requireAll: true }, + ), + ); + }); + + test("empty pending list passes when requireAll is false", () => { + assert.doesNotThrow(() => + assertGateCoverage([], "complete-slice", { requireAll: false }), + ); + }); +}); diff --git a/src/resources/extensions/gsd/tests/prompt-system-gate-coverage.test.ts b/src/resources/extensions/gsd/tests/prompt-system-gate-coverage.test.ts new file mode 100644 index 000000000..fad37ed7d --- /dev/null +++ b/src/resources/extensions/gsd/tests/prompt-system-gate-coverage.test.ts @@ -0,0 +1,208 @@ +/** + * Prompt-system gate coverage tests. + * + * These tests pin the invariants the plan file documents: + * 1. Every pending slice-scoped gate is routed to exactly one owner turn. + * Q8 (owned by complete-slice) MUST NOT leak into gate-evaluate and + * get silently dropped the way it used to before the registry landed. + * 2. getPendingGatesForTurn filters by the registry's owner turn, not + * just the DB scope column. + * 3. Output validators recognize artifacts that contain the required + * gate section headings, and flag ones that don't. + * 4. Prompt output produced by the validators reflects MV01-MV04. + * + * They also assert the VALIDATION.md renderer still produces headings + * matching the registry's promptSection strings, so future renderer + * edits that drift from the registry fail the suite loudly. + */ + +import { describe, test, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { + openDatabase, + closeDatabase, + insertMilestone, + insertSlice, + insertTask, + insertGateRow, + getPendingGates, + getPendingGatesForTurn, +} from "../gsd-db.ts"; +import { + GATE_REGISTRY, + getGatesForTurn, + type OwnerTurn, +} from "../gate-registry.ts"; +import { + validateSliceSummaryOutput, + validateTaskSummaryOutput, + validateMilestoneValidationOutput, + validateGateSections, +} from "../prompt-validation.ts"; + +function setupTestDb(): string { + const tmpDir = mkdtempSync(join(tmpdir(), "prompt-gate-coverage-")); + const dbPath = join(tmpDir, "gsd.db"); + openDatabase(dbPath); + insertMilestone({ id: "M001", title: "Test", status: "active" }); + insertSlice({ + milestoneId: "M001", + id: "S01", + title: "Test Slice", + status: "pending", + risk: "medium", + depends: [], + }); + insertTask({ + id: "T01", + sliceId: "S01", + milestoneId: "M001", + title: "Test Task", + status: "pending", + }); + return tmpDir; +} + +describe("getPendingGatesForTurn routes by owner turn, not scope column", () => { + let tmpDir: string; + beforeEach(() => { + tmpDir = setupTestDb(); + }); + afterEach(() => { + closeDatabase(); + rmSync(tmpDir, { recursive: true, force: true }); + }); + + test("Q8 stored as scope:'slice' is owned by complete-slice, not gate-evaluate", () => { + // Seed the three slice-scoped gates plan-slice writes today. + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" }); + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", scope: "slice" }); + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q8", scope: "slice" }); + + // getPendingGates(..., "slice") returns all three (unchanged). + const allSlicePending = getPendingGates("M001", "S01", "slice"); + assert.equal(allSlicePending.length, 3); + + // But the turn-aware helper routes them correctly. + const gateEval = getPendingGatesForTurn("M001", "S01", "gate-evaluate"); + assert.deepEqual(gateEval.map((g) => g.gate_id).sort(), ["Q3", "Q4"]); + + const completeSlice = getPendingGatesForTurn("M001", "S01", "complete-slice"); + assert.deepEqual(completeSlice.map((g) => g.gate_id), ["Q8"]); + }); + + test("task-scoped gates are scoped to the requested task id", () => { + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T01" }); + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q6", scope: "task", taskId: "T01" }); + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T02" }); + + const t1 = getPendingGatesForTurn("M001", "S01", "execute-task", "T01"); + assert.equal(t1.length, 2); + assert.ok(t1.every((g) => g.gate_id === "Q5" || g.gate_id === "Q6")); + + const t2 = getPendingGatesForTurn("M001", "S01", "execute-task", "T02"); + assert.equal(t2.length, 1); + assert.equal(t2[0].gate_id, "Q5"); + }); +}); + +describe("per-turn output validators", () => { + test("validateSliceSummaryOutput flags missing Operational Readiness", () => { + const md = `# S01: Test Slice\n\n## What Happened\nstuff\n\n## Verification\nstuff\n`; + const result = validateSliceSummaryOutput(md); + assert.equal(result.valid, false); + assert.ok(result.missing.some((m) => m.includes("Q8"))); + assert.ok(result.missing.some((m) => m.includes("Operational Readiness"))); + }); + + test("validateSliceSummaryOutput passes when Operational Readiness heading is present", () => { + const md = `# S01\n\n## Operational Readiness\n- Health: /health\n- Failure: alert\n`; + const result = validateSliceSummaryOutput(md); + assert.equal(result.valid, true); + assert.equal(result.missing.length, 0); + }); + + test("validateMilestoneValidationOutput requires all four MV headings", () => { + // Missing Requirement Coverage. + const md = [ + "# Milestone Validation: M001", + "## Success Criteria Checklist", + "ok", + "## Slice Delivery Audit", + "ok", + "## Cross-Slice Integration", + "ok", + ].join("\n\n"); + const result = validateMilestoneValidationOutput(md); + assert.equal(result.valid, false); + assert.ok(result.missing.some((m) => m.includes("MV04"))); + }); + + test("validateMilestoneValidationOutput passes for a complete VALIDATION.md", () => { + const md = [ + "# Milestone Validation: M001", + "## Success Criteria Checklist", + "ok", + "## Slice Delivery Audit", + "ok", + "## Cross-Slice Integration", + "ok", + "## Requirement Coverage", + "ok", + ].join("\n\n"); + const result = validateMilestoneValidationOutput(md); + assert.equal(result.valid, true, `unexpected missing: ${result.missing.join(", ")}`); + }); + + test("validateTaskSummaryOutput flags missing task-gate sections", () => { + const md = `# T01\n\n## What Happened\nstuff\n\n## Verification\nstuff\n`; + const result = validateTaskSummaryOutput(md); + assert.equal(result.valid, false); + const idsInMissing = result.missing.join(" "); + assert.ok(idsInMissing.includes("Q5")); + assert.ok(idsInMissing.includes("Q6")); + assert.ok(idsInMissing.includes("Q7")); + }); + + test("validateGateSections returns empty missing when gate bucket is empty", () => { + // Build a phoney owner turn that owns nothing (simulate by validating + // against a real turn against an artifact containing every section). + const fullMd = getGatesForTurn("validate-milestone") + .map((g) => `## ${g.promptSection}\n\nstuff`) + .join("\n\n"); + const result = validateGateSections(fullMd, "validate-milestone"); + assert.equal(result.valid, true); + }); +}); + +describe("registry / renderer parity", () => { + test("MV promptSections match the validate-milestone renderer H2 headings", () => { + // Mirror the string literals from tools/validate-milestone.ts + // renderValidationMarkdown() so a rename there flips this test red. + const expectedHeadings = [ + "Success Criteria Checklist", + "Slice Delivery Audit", + "Cross-Slice Integration", + "Requirement Coverage", + ]; + const registryHeadings = getGatesForTurn("validate-milestone").map((g) => g.promptSection); + assert.deepEqual(registryHeadings.sort(), [...expectedHeadings].sort()); + }); + + test("Q8 promptSection matches the complete-slice renderer H2 heading", () => { + // Mirror the slice-summary H2 introduced in tools/complete-slice.ts. + assert.equal(GATE_REGISTRY.Q8.promptSection, "Operational Readiness"); + }); + + test("registry owner turns cover every turn gate-registry.ts declares", () => { + const ownerTurns = new Set(Object.values(GATE_REGISTRY).map((g) => g.ownerTurn)); + assert.ok(ownerTurns.has("gate-evaluate")); + assert.ok(ownerTurns.has("execute-task")); + assert.ok(ownerTurns.has("complete-slice")); + assert.ok(ownerTurns.has("validate-milestone")); + }); +}); diff --git a/src/resources/extensions/gsd/tools/complete-slice.ts b/src/resources/extensions/gsd/tools/complete-slice.ts index 5863a586f..6e9c06e61 100644 --- a/src/resources/extensions/gsd/tools/complete-slice.ts +++ b/src/resources/extensions/gsd/tools/complete-slice.ts @@ -21,7 +21,10 @@ import { getMilestone, updateSliceStatus, setSliceSummaryMd, + saveGateResult, + getPendingGatesForTurn, } from "../gsd-db.js"; +import { getGatesForTurn } from "../gate-registry.js"; import { resolveSliceFile, resolveSlicePath, clearPathCache } from "../paths.js"; import { checkOwnership, sliceUnitKey } from "../unit-ownership.js"; import { saveFile, clearParseCache } from "../files.js"; @@ -39,6 +42,23 @@ export interface CompleteSliceResult { uatPath: string; } +/** + * Map a complete-slice-owned gate id to the CompleteSliceParams field + * whose presence drives `pass` vs. `omitted`. Keep this in lockstep with + * the gates declared in gate-registry.ts under ownerTurn "complete-slice". + */ +function sliceGateFieldForId( + id: string, + params: CompleteSliceParams, +): string | undefined { + switch (id) { + case "Q8": + return params.operationalReadiness; + default: + return undefined; + } +} + /** * Render slice summary markdown matching the template format. * YAML frontmatter uses snake_case keys for parseSummary() compatibility. @@ -169,6 +189,10 @@ ${reqSurfaced} ${reqInvalidated} +## Operational Readiness + +${params.operationalReadiness?.trim() || "None."} + ## Deviations ${params.deviations || "None."} @@ -330,6 +354,45 @@ export async function handleCompleteSlice( // Store rendered markdown in DB for D004 recovery setSliceSummaryMd(params.milestoneId, params.sliceId, summaryMd, uatMd); + // ── Close gates owned by complete-slice (Q8) ─────────────────────────── + // Each owned gate maps to a specific summary section via the registry. + // If the caller populated the corresponding field, record `pass`; if the + // field is empty, record `omitted`. Without this loop, Q8 would stay + // pending forever and block future state derivation (see gate-registry). + try { + const pendingGates = getPendingGatesForTurn( + params.milestoneId, + params.sliceId, + "complete-slice", + ); + if (pendingGates.length > 0) { + const ownedDefs = new Map(getGatesForTurn("complete-slice").map((g) => [g.id, g] as const)); + for (const row of pendingGates) { + const def = ownedDefs.get(row.gate_id); + if (!def) continue; + // Map gate id → param field it maps to. Keep the map local so + // adding a new complete-slice gate is a single place change. + const field = sliceGateFieldForId(def.id, params); + const hasContent = typeof field === "string" && field.trim().length > 0; + saveGateResult({ + milestoneId: params.milestoneId, + sliceId: params.sliceId, + gateId: def.id, + verdict: hasContent ? "pass" : "omitted", + rationale: hasContent + ? `${def.promptSection} section populated in slice summary` + : `${def.promptSection} section left empty — recorded as omitted`, + findings: hasContent ? (field as string).trim() : "", + }); + } + } + } catch (gateErr) { + logWarning( + "tool", + `complete-slice gate close warning for ${params.milestoneId}/${params.sliceId}: ${(gateErr as Error).message}`, + ); + } + // Invalidate all caches invalidateStateCache(); clearPathCache(); diff --git a/src/resources/extensions/gsd/tools/complete-task.ts b/src/resources/extensions/gsd/tools/complete-task.ts index 00cfa78d8..f19f5b4b9 100644 --- a/src/resources/extensions/gsd/tools/complete-task.ts +++ b/src/resources/extensions/gsd/tools/complete-task.ts @@ -24,7 +24,10 @@ import { updateTaskStatus, setTaskSummaryMd, deleteVerificationEvidence, + saveGateResult, + getPendingGatesForTurn, } from "../gsd-db.js"; +import { getGatesForTurn } from "../gate-registry.js"; import { resolveSliceFile, resolveTasksDir, clearPathCache } from "../paths.js"; import { checkOwnership, taskUnitKey } from "../unit-ownership.js"; import { saveFile, clearParseCache } from "../files.js"; @@ -44,6 +47,27 @@ export interface CompleteTaskResult { import type { TaskRow } from "../gsd-db.js"; +/** + * Map an execute-task-owned gate id to the CompleteTaskParams field whose + * presence drives `pass` vs. `omitted`. Keep in lockstep with the gates + * declared in gate-registry.ts under ownerTurn "execute-task". + */ +function taskGateFieldForId( + id: string, + params: CompleteTaskParams, +): string | undefined { + switch (id) { + case "Q5": + return params.failureModes; + case "Q6": + return params.loadProfile; + case "Q7": + return params.negativeTests; + default: + return undefined; + } +} + /** * Normalize a list parameter that may arrive as a string (newline-delimited * bullet list from the LLM) into a string array (#3361). @@ -236,6 +260,45 @@ export async function handleCompleteTask( // Store rendered markdown in DB for D004 recovery setTaskSummaryMd(params.milestoneId, params.sliceId, params.taskId, summaryMd); + // ── Close gates owned by execute-task (Q5/Q6/Q7) for this task ──────── + // Each gate id maps to a specific params field via taskGateFieldForId. + // When the model populates the field, record `pass`; when it's empty, + // record `omitted`. Task-scoped rows are filtered by taskId so a single + // task's completion doesn't touch sibling tasks' gate rows. + try { + const pendingGates = getPendingGatesForTurn( + params.milestoneId, + params.sliceId, + "execute-task", + params.taskId, + ); + if (pendingGates.length > 0) { + const ownedDefs = new Map(getGatesForTurn("execute-task").map((g) => [g.id, g] as const)); + for (const row of pendingGates) { + const def = ownedDefs.get(row.gate_id); + if (!def) continue; + const field = taskGateFieldForId(def.id, params); + const hasContent = typeof field === "string" && field.trim().length > 0; + saveGateResult({ + milestoneId: params.milestoneId, + sliceId: params.sliceId, + taskId: params.taskId, + gateId: def.id, + verdict: hasContent ? "pass" : "omitted", + rationale: hasContent + ? `${def.promptSection} section populated in task summary` + : `${def.promptSection} section left empty — recorded as omitted`, + findings: hasContent ? (field as string).trim() : "", + }); + } + } + } catch (gateErr) { + logWarning( + "tool", + `complete-task gate close warning for ${params.milestoneId}/${params.sliceId}/${params.taskId}: ${(gateErr as Error).message}`, + ); + } + // Invalidate all caches invalidateStateCache(); clearPathCache(); diff --git a/src/resources/extensions/gsd/tools/workflow-tool-executors.ts b/src/resources/extensions/gsd/tools/workflow-tool-executors.ts index 14f179bff..ef7b67088 100644 --- a/src/resources/extensions/gsd/tools/workflow-tool-executors.ts +++ b/src/resources/extensions/gsd/tools/workflow-tool-executors.ts @@ -8,6 +8,7 @@ import { _getAdapter, saveGateResult, } from "../gsd-db.js"; +import { GATE_REGISTRY } from "../gate-registry.js"; import { saveArtifactToDb } from "../db-writer.js"; import type { CompleteMilestoneParams } from "./complete-milestone.js"; import { handleCompleteMilestone } from "./complete-milestone.js"; @@ -427,7 +428,9 @@ export async function executeSaveGateResult( }; } - const validGates = ["Q3", "Q4", "Q5", "Q6", "Q7", "Q8"]; + // Source of truth: gate-registry.ts. Every declared GateId is accepted, + // so adding a new gate in one place automatically flows through here. + const validGates = Object.keys(GATE_REGISTRY); if (!validGates.includes(params.gateId)) { return { content: [{ type: "text", text: `Error: Invalid gateId "${params.gateId}". Must be one of: ${validGates.join(", ")}` }], diff --git a/src/resources/extensions/gsd/types.ts b/src/resources/extensions/gsd/types.ts index e03815520..292aa462a 100644 --- a/src/resources/extensions/gsd/types.ts +++ b/src/resources/extensions/gsd/types.ts @@ -536,6 +536,24 @@ export interface CompleteTaskParams { verdict: string; durationMs: number; }>; + /** + * Q5 failure-modes section content (what breaks when dependencies fail). + * Populated → `pass`; omitted/empty → `omitted`. + * @optional + */ + failureModes?: string; + /** + * Q6 load-profile section content (10x breakpoint + protection). + * Populated → `pass`; omitted/empty → `omitted`. + * @optional + */ + loadProfile?: string; + /** + * Q7 negative-tests section content (malformed inputs, error paths, + * boundaries). Populated → `pass`; omitted/empty → `omitted`. + * @optional + */ + negativeTests?: string; /** Optional caller-provided identity for audit trail */ actorName?: string; /** Optional caller-provided reason this action was triggered */ @@ -584,6 +602,14 @@ export interface CompleteSliceParams { affects?: string[]; /** @optional — defaults to [] when omitted */ drillDownPaths?: string[]; + /** + * Q8 operational readiness section content (health signal, failure signal, + * recovery, monitoring gaps). When populated, the complete-slice handler + * records Q8 as `pass`; when omitted or empty, Q8 is recorded as `omitted`. + * See gate-registry.ts. + * @optional + */ + operationalReadiness?: string; /** Optional caller-provided identity for audit trail */ actorName?: string; /** Optional caller-provided reason this action was triggered */