fix(gates): add mechanical enforcement for discussion question gates

When ask_user_questions fails, errors, or is cancelled during a discussion flow, the model is now mechanically blocked from all non-read-only tool calls until it re-asks and gets a valid response. Previously, gate enforcement was prompt-only — the model could rationalize past failed ask_user_questions calls ("auth issue, I'll continue") and generate an entire plan without any user interaction. The pending gate mechanism: - tool_call hook: any ask_user_questions during discussion sets pending - tool_result hook: valid user response clears pending; failure keeps it - tool_call hook: blocks write/edit/gsd/mutating-bash while pending - read-only tools and ask_user_questions itself always allowed
2026-04-08 14:00:16 -05:00 · 2026-04-08 14:00:16 -05:00 · d84778336c
commit d84778336c
parent 2c1e4b695e
4 changed files with 347 additions and 2 deletions
--- a/src/resources/extensions/gsd/bootstrap/register-hooks.ts
+++ b/src/resources/extensions/gsd/bootstrap/register-hooks.ts
@ -6,7 +6,7 @@ import { isToolCallEventType } from "@gsd/pi-coding-agent";
 import { buildMilestoneFileName, resolveMilestonePath, resolveSliceFile, resolveSlicePath } from "../paths.js";
 import { buildBeforeAgentStartResult } from "./system-context.js";
 import { handleAgentEnd } from "./agent-end-recovery.js";
-import { clearDiscussionFlowState, isDepthVerified, isDepthConfirmationAnswer, isQueuePhaseActive, markDepthVerified, resetWriteGateState, shouldBlockContextWrite, shouldBlockQueueExecution } from "./write-gate.js";
+import { clearDiscussionFlowState, isDepthVerified, isDepthConfirmationAnswer, isQueuePhaseActive, markDepthVerified, resetWriteGateState, shouldBlockContextWrite, shouldBlockQueueExecution, isGateQuestionId, setPendingGate, clearPendingGate, getPendingGate, shouldBlockPendingGate, shouldBlockPendingGateBash } from "./write-gate.js";
 import { isBlockedStateFile, isBashWriteToStateFile, BLOCKED_WRITE_ERROR } from "../write-intercept.js";
 import { cleanupQuickBranch } from "../quick.js";
 import { getDiscussionMilestoneId } from "../guided-flow.js";
@ -168,6 +168,43 @@ export function registerHooks(pi: ExtensionAPI): void {
      return { block: true, reason: loopCheck.reason };
    }

+    // ── Discussion gate enforcement: track pending questions ─────────
+    // During a discussion flow, EVERY ask_user_questions call matters.
+    // When ask_user_questions is called, mark it as pending. It stays
+    // pending until the user responds. This prevents the model from
+    // continuing if ask_user_questions fails, errors, or is cancelled.
+    if (event.toolName === "ask_user_questions") {
+      const milestoneId = getDiscussionMilestoneId();
+      const inDiscussion = milestoneId !== null || isQueuePhaseActive();
+      if (inDiscussion) {
+        const questions: any[] = (event.input as any)?.questions ?? [];
+        const questionId = questions[0]?.id ?? "ask_user_questions";
+        setPendingGate(typeof questionId === "string" ? questionId : "ask_user_questions");
+      }
+    }
+
+    // ── Discussion gate enforcement: block tool calls while gate is pending ──
+    // If ask_user_questions was called with a gate ID but hasn't been confirmed,
+    // block all non-read-only tool calls to prevent the model from skipping gates.
+    if (getPendingGate()) {
+      const milestoneId = getDiscussionMilestoneId();
+      if (isToolCallEventType("bash", event)) {
+        const bashGuard = shouldBlockPendingGateBash(
+          event.input.command,
+          milestoneId,
+          isQueuePhaseActive(),
+        );
+        if (bashGuard.block) return bashGuard;
+      } else {
+        const gateGuard = shouldBlockPendingGate(
+          event.toolName,
+          milestoneId,
+          isQueuePhaseActive(),
+        );
+        if (gateGuard.block) return gateGuard;
+      }
+    }
+
    // ── Queue-mode execution guard (#2545): block source-code mutations ──
    // When /gsd queue is active, the agent should only create milestones,
    // not execute work. Block write/edit to non-.gsd/ paths and bash commands
@ -244,9 +281,27 @@ export function registerHooks(pi: ExtensionAPI): void {
    if (!milestoneId && !queueActive) return;

    const details = event.details as any;
+
+    // ── Discussion gate enforcement: handle gate question responses ──
+    // If the result is cancelled or has no response, the pending gate stays active
+    // so the model is blocked from non-read-only tools until it re-asks.
+    // If the user responded at all (even "needs adjustment"), clear the pending gate
+    // because the user engaged — the prompt handles the re-ask-after-adjustment flow.
+    const questions: any[] = (event.input as any)?.questions ?? [];
+    const currentPendingGate = getPendingGate();
+    if (currentPendingGate) {
+      if (details?.cancelled || !details?.response) {
+        // Gate stays pending — model will be blocked from non-read-only tools
+        // until it re-asks and gets a valid response
+      } else {
+        // User responded (confirmed or requested adjustment) — clear the pending gate.
+        // The prompt-level instructions handle the "needs adjustment" re-ask flow.
+        clearPendingGate();
+      }
+    }
+
    if (details?.cancelled || !details?.response) return;

-    const questions: any[] = (event.input as any)?.questions ?? [];
    for (const question of questions) {
      if (typeof question.id === "string" && question.id.includes("depth_verification")) {
        // Only unlock the gate if the user selected the first option (confirmation).
--- a/src/resources/extensions/gsd/bootstrap/write-gate.ts
+++ b/src/resources/extensions/gsd/bootstrap/write-gate.ts
@ -29,6 +29,40 @@ const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which
 let depthVerificationDone = false;
 let activeQueuePhase = false;

+/**
+ * Discussion gate enforcement state.
+ *
+ * When ask_user_questions is called with a recognized gate question ID,
+ * we track the pending gate. Until the gate is confirmed (user selects the
+ * first/recommended option), all non-read-only tool calls are blocked.
+ * This mechanically prevents the model from rationalizing past failed or
+ * cancelled gate questions.
+ */
+let pendingGateId: string | null = null;
+
+/**
+ * Recognized gate question ID patterns.
+ * These appear in both discuss-prepared.md (4-layer) and discuss.md (depth/requirements/roadmap).
+ */
+const GATE_QUESTION_PATTERNS = [
+  "layer1_scope_gate",
+  "layer2_architecture_gate",
+  "layer3_error_gate",
+  "layer4_quality_gate",
+  "depth_verification",
+] as const;
+
+/**
+ * Tools that are safe to call while a gate is pending.
+ * Includes read-only tools and ask_user_questions itself (so the model can re-ask).
+ */
+const GATE_SAFE_TOOLS = new Set([
+  "ask_user_questions",
+  "read", "grep", "find", "ls", "glob",
+  "search-the-web", "resolve_library", "get_library_docs", "fetch_page",
+  "search_and_read",
+]);
+
 export function isDepthVerified(): boolean {
  return depthVerificationDone;
 }
@ -43,17 +77,111 @@ export function setQueuePhaseActive(active: boolean): void {

 export function resetWriteGateState(): void {
  depthVerificationDone = false;
+  pendingGateId = null;
 }

 export function clearDiscussionFlowState(): void {
  depthVerificationDone = false;
  activeQueuePhase = false;
+  pendingGateId = null;
 }

 export function markDepthVerified(): void {
  depthVerificationDone = true;
 }

+/**
+ * Check whether a question ID matches a recognized gate pattern.
+ */
+export function isGateQuestionId(questionId: string): boolean {
+  return GATE_QUESTION_PATTERNS.some(pattern => questionId.includes(pattern));
+}
+
+/**
+ * Mark a gate as pending (called when ask_user_questions is invoked with a gate ID).
+ */
+export function setPendingGate(gateId: string): void {
+  pendingGateId = gateId;
+}
+
+/**
+ * Clear the pending gate (called when the user confirms).
+ */
+export function clearPendingGate(): void {
+  pendingGateId = null;
+}
+
+/**
+ * Get the currently pending gate, if any.
+ */
+export function getPendingGate(): string | null {
+  return pendingGateId;
+}
+
+/**
+ * Check whether a tool call should be blocked because a discussion gate
+ * is pending (ask_user_questions was called but not confirmed).
+ *
+ * Returns { block: true, reason } if the tool should be blocked.
+ * Read-only tools and ask_user_questions itself are always allowed.
+ */
+export function shouldBlockPendingGate(
+  toolName: string,
+  milestoneId: string | null,
+  queuePhaseActive?: boolean,
+): { block: boolean; reason?: string } {
+  if (!pendingGateId) return { block: false };
+
+  const inDiscussion = milestoneId !== null;
+  const inQueue = queuePhaseActive ?? false;
+  if (!inDiscussion && !inQueue) return { block: false };
+
+  if (GATE_SAFE_TOOLS.has(toolName)) return { block: false };
+
+  // Bash read-only commands are also safe
+  if (toolName === "bash") return { block: false }; // bash is checked separately below
+
+  return {
+    block: true,
+    reason: [
+      `HARD BLOCK: Discussion gate "${pendingGateId}" has not been confirmed by the user.`,
+      `You MUST re-call ask_user_questions with the gate question before making any other tool calls.`,
+      `If the previous ask_user_questions call failed, errored, was cancelled, or the user's response`,
+      `did not match a provided option, you MUST re-ask — never rationalize past the block.`,
+      `Do NOT proceed, do NOT use alternative approaches, do NOT skip the gate.`,
+    ].join(" "),
+  };
+}
+
+/**
+ * Check whether a bash command should be blocked because a discussion gate is pending.
+ * Read-only bash commands are allowed; mutating commands are blocked.
+ */
+export function shouldBlockPendingGateBash(
+  command: string,
+  milestoneId: string | null,
+  queuePhaseActive?: boolean,
+): { block: boolean; reason?: string } {
+  if (!pendingGateId) return { block: false };
+
+  const inDiscussion = milestoneId !== null;
+  const inQueue = queuePhaseActive ?? false;
+  if (!inDiscussion && !inQueue) return { block: false };
+
+  // Allow read-only bash commands
+  if (BASH_READ_ONLY_RE.test(command)) return { block: false };
+
+  return {
+    block: true,
+    reason: [
+      `HARD BLOCK: Discussion gate "${pendingGateId}" has not been confirmed by the user.`,
+      `You MUST re-call ask_user_questions with the gate question before running mutating commands.`,
+      `If the previous ask_user_questions call failed, errored, was cancelled, or the user's response`,
+      `did not match a provided option, you MUST re-ask — never rationalize past the block.`,
+    ].join(" "),
+  };
+}
+
 /**
 * Check whether a depth_verification answer confirms the discussion is complete.
 * Uses structural validation: the selected answer must exactly match the first
--- a/src/resources/extensions/gsd/index.ts
+++ b/src/resources/extensions/gsd/index.ts
@ -3,10 +3,16 @@ import type { ExtensionAPI } from "@gsd/pi-coding-agent";
 export {
  isDepthConfirmationAnswer,
  isDepthVerified,
+  isGateQuestionId,
  isQueuePhaseActive,
  setQueuePhaseActive,
  shouldBlockContextWrite,
+  shouldBlockPendingGate,
+  shouldBlockPendingGateBash,
  shouldBlockQueueExecution,
+  setPendingGate,
+  clearPendingGate,
+  getPendingGate,
 } from "./bootstrap/write-gate.js";

 export default async function registerExtension(pi: ExtensionAPI) {
--- a/src/resources/extensions/gsd/tests/write-gate.test.ts
+++ b/src/resources/extensions/gsd/tests/write-gate.test.ts
@ -195,6 +195,162 @@ test('write-gate: markDepthVerified unblocks queue-mode writes when milestoneId
  clearDiscussionFlowState();
 });

+// ═══════════════════════════════════════════════════════════════════════
+// Discussion gate enforcement tests (pending gate mechanism)
+// ═══════════════════════════════════════════════════════════════════════
+
+import {
+  isGateQuestionId,
+  shouldBlockPendingGate,
+  shouldBlockPendingGateBash,
+  setPendingGate,
+  clearPendingGate,
+  getPendingGate,
+} from '../bootstrap/write-gate.ts';
+
+// ─── Scenario 19: isGateQuestionId recognizes all gate patterns ──
+
+test('write-gate: isGateQuestionId recognizes all gate patterns', () => {
+  assert.strictEqual(isGateQuestionId('layer1_scope_gate'), true);
+  assert.strictEqual(isGateQuestionId('layer2_architecture_gate'), true);
+  assert.strictEqual(isGateQuestionId('layer3_error_gate'), true);
+  assert.strictEqual(isGateQuestionId('layer4_quality_gate'), true);
+  assert.strictEqual(isGateQuestionId('depth_verification'), true);
+  assert.strictEqual(isGateQuestionId('depth_verification_M002'), true);
+  assert.strictEqual(isGateQuestionId('my_layer1_scope_gate_question'), true);
+  // Non-gate question IDs
+  assert.strictEqual(isGateQuestionId('project_intent'), false);
+  assert.strictEqual(isGateQuestionId('feature_priority'), false);
+  assert.strictEqual(isGateQuestionId(''), false);
+});
+
+// ─── Scenario 20: setPendingGate / getPendingGate / clearPendingGate lifecycle ──
+
+test('write-gate: pending gate lifecycle (set, get, clear)', () => {
+  clearDiscussionFlowState();
+  assert.strictEqual(getPendingGate(), null, 'starts null');
+
+  setPendingGate('layer1_scope_gate');
+  assert.strictEqual(getPendingGate(), 'layer1_scope_gate', 'set correctly');
+
+  clearPendingGate();
+  assert.strictEqual(getPendingGate(), null, 'cleared correctly');
+
+  // clearDiscussionFlowState also clears pending gate
+  setPendingGate('layer2_architecture_gate');
+  clearDiscussionFlowState();
+  assert.strictEqual(getPendingGate(), null, 'clearDiscussionFlowState clears pending gate');
+});
+
+// ─── Scenario 21: shouldBlockPendingGate blocks non-safe tools when gate is pending ──
+
+test('write-gate: shouldBlockPendingGate blocks write/edit during pending gate', () => {
+  clearDiscussionFlowState();
+  setPendingGate('layer1_scope_gate');
+
+  // write should be blocked during discussion
+  const writeResult = shouldBlockPendingGate('write', 'M001', false);
+  assert.strictEqual(writeResult.block, true, 'write should be blocked');
+  assert.ok(writeResult.reason!.includes('layer1_scope_gate'), 'reason mentions the gate');
+
+  // edit should be blocked
+  const editResult = shouldBlockPendingGate('edit', 'M001', false);
+  assert.strictEqual(editResult.block, true, 'edit should be blocked');
+
+  // gsd tools should be blocked
+  const gsdResult = shouldBlockPendingGate('gsd_plan_milestone', 'M001', false);
+  assert.strictEqual(gsdResult.block, true, 'gsd tools should be blocked');
+
+  clearDiscussionFlowState();
+});
+
+// ─── Scenario 22: shouldBlockPendingGate allows safe tools when gate is pending ──
+
+test('write-gate: shouldBlockPendingGate allows read-only and ask_user_questions during pending gate', () => {
+  clearDiscussionFlowState();
+  setPendingGate('layer1_scope_gate');
+
+  // ask_user_questions is always safe (model needs to re-ask)
+  assert.strictEqual(shouldBlockPendingGate('ask_user_questions', 'M001').block, false);
+  // read-only tools are safe
+  assert.strictEqual(shouldBlockPendingGate('read', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGate('grep', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGate('glob', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGate('ls', 'M001').block, false);
+
+  clearDiscussionFlowState();
+});
+
+// ─── Scenario 23: shouldBlockPendingGate does not block outside discussion ──
+
+test('write-gate: shouldBlockPendingGate does not block outside discussion', () => {
+  clearDiscussionFlowState();
+  setPendingGate('layer1_scope_gate');
+
+  // No milestoneId and no queue phase — not in discussion
+  const result = shouldBlockPendingGate('write', null, false);
+  assert.strictEqual(result.block, false, 'should not block outside discussion');
+
+  clearDiscussionFlowState();
+});
+
+// ─── Scenario 24: shouldBlockPendingGate blocks in queue mode ──
+
+test('write-gate: shouldBlockPendingGate blocks in queue mode when gate is pending', () => {
+  clearDiscussionFlowState();
+  setQueuePhaseActive(true);
+  setPendingGate('depth_verification');
+
+  const result = shouldBlockPendingGate('write', null, true);
+  assert.strictEqual(result.block, true, 'should block in queue mode');
+
+  clearDiscussionFlowState();
+});
+
+// ─── Scenario 25: shouldBlockPendingGateBash allows read-only commands ──
+
+test('write-gate: shouldBlockPendingGateBash allows read-only commands during pending gate', () => {
+  clearDiscussionFlowState();
+  setPendingGate('layer2_architecture_gate');
+
+  assert.strictEqual(shouldBlockPendingGateBash('cat file.txt', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGateBash('git log --oneline', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGateBash('grep -r pattern .', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGateBash('ls -la', 'M001').block, false);
+
+  clearDiscussionFlowState();
+});
+
+// ─── Scenario 26: shouldBlockPendingGateBash blocks mutating commands ──
+
+test('write-gate: shouldBlockPendingGateBash blocks mutating commands during pending gate', () => {
+  clearDiscussionFlowState();
+  setPendingGate('layer2_architecture_gate');
+
+  const result = shouldBlockPendingGateBash('npm run build', 'M001');
+  assert.strictEqual(result.block, true, 'mutating bash should be blocked');
+  assert.ok(result.reason!.includes('layer2_architecture_gate'));
+
+  clearDiscussionFlowState();
+});
+
+// ─── Scenario 27: no pending gate means no blocking ──
+
+test('write-gate: no pending gate means no blocking', () => {
+  clearDiscussionFlowState();
+
+  assert.strictEqual(shouldBlockPendingGate('write', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGateBash('npm run build', 'M001').block, false);
+});
+
+// ─── Scenario 28: resetWriteGateState clears pending gate ──
+
+test('write-gate: resetWriteGateState clears pending gate', () => {
+  setPendingGate('layer3_error_gate');
+  resetWriteGateState();
+  assert.strictEqual(getPendingGate(), null);
+});
+
 // ─── Standard options fixture used across depth confirmation tests ──

 const STANDARD_OPTIONS = [