diff --git a/src/resources/extensions/gsd/guided-flow.ts b/src/resources/extensions/gsd/guided-flow.ts index fb72aa121..a51bc83a6 100644 --- a/src/resources/extensions/gsd/guided-flow.ts +++ b/src/resources/extensions/gsd/guided-flow.ts @@ -36,6 +36,11 @@ let pendingAutoStart: { step?: boolean; // preserve step mode through discuss → auto transition } | null = null; +/** Returns the milestoneId being discussed, or null if no discussion is active */ +export function getDiscussionMilestoneId(): string | null { + return pendingAutoStart?.milestoneId ?? null; +} + /** Called from agent_end to check if auto-mode should start after discuss */ export function checkAutoStartAfterDiscuss(): boolean { if (!pendingAutoStart) return false; diff --git a/src/resources/extensions/gsd/index.ts b/src/resources/extensions/gsd/index.ts index 145507b1b..8c0ec1791 100644 --- a/src/resources/extensions/gsd/index.ts +++ b/src/resources/extensions/gsd/index.ts @@ -23,7 +23,7 @@ import type { ExtensionCommandContext, ExtensionContext, } from "@gsd/pi-coding-agent"; -import { createBashTool, createWriteTool, createReadTool, createEditTool } from "@gsd/pi-coding-agent"; +import { createBashTool, createWriteTool, createReadTool, createEditTool, isToolCallEventType } from "@gsd/pi-coding-agent"; import { registerGSDCommand } from "./commands.js"; import { registerExitCommand } from "./exit-command.js"; @@ -33,7 +33,7 @@ import { loadPrompt } from "./prompt-loader.js"; import { deriveState } from "./state.js"; import { isAutoActive, isAutoPaused, handleAgentEnd, pauseAuto, getAutoDashboardData } from "./auto.js"; import { saveActivityLog } from "./activity-log.js"; -import { checkAutoStartAfterDiscuss } from "./guided-flow.js"; +import { checkAutoStartAfterDiscuss, getDiscussionMilestoneId } from "./guided-flow.js"; import { GSDDashboardOverlay } from "./dashboard-overlay.js"; import { loadEffectiveGSDPreferences, @@ -44,7 +44,7 @@ import { hasSkillSnapshot, detectNewSkills, formatSkillsXml } from "./skill-disc import { resolveSlicePath, resolveSliceFile, resolveTaskFile, resolveTaskFiles, resolveTasksDir, relSliceFile, relSlicePath, relTaskFile, - buildSliceFileName, gsdRoot, + buildSliceFileName, buildMilestoneFileName, gsdRoot, resolveMilestonePath, } from "./paths.js"; import { Key } from "@gsd/pi-tui"; import { join } from "node:path"; @@ -52,6 +52,32 @@ import { existsSync } from "node:fs"; import { shortcutDesc } from "../shared/terminal.js"; import { Text } from "@gsd/pi-tui"; +// ── Depth verification state ────────────────────────────────────────────── +let depthVerificationDone = false; + +export function isDepthVerified(): boolean { + return depthVerificationDone; +} + +// ── Write-gate: block CONTEXT.md writes during discussion without depth verification ── +const MILESTONE_CONTEXT_RE = /M\d+-CONTEXT\.md$/; + +export function shouldBlockContextWrite( + toolName: string, + inputPath: string, + milestoneId: string | null, + depthVerified: boolean, +): { block: boolean; reason?: string } { + if (toolName !== "write") return { block: false }; + if (!milestoneId) return { block: false }; + if (!MILESTONE_CONTEXT_RE.test(inputPath)) return { block: false }; + if (depthVerified) return { block: false }; + return { + block: true, + reason: `Blocked: Cannot write to milestone CONTEXT.md during discussion phase without depth verification. Call ask_user_questions with question id "depth_verification" first to confirm discussion depth before writing context.`, + }; +} + // ── ASCII logo ──────────────────────────────────────────────────────────── const GSD_LOGO_LINES = [ " ██████╗ ███████╗██████╗ ", @@ -291,7 +317,10 @@ export default function (pi: ExtensionAPI) { // ── agent_end: auto-mode advancement or auto-start after discuss ─────────── pi.on("agent_end", async (event, ctx: ExtensionContext) => { // If discuss phase just finished, start auto-mode - if (checkAutoStartAfterDiscuss()) return; + if (checkAutoStartAfterDiscuss()) { + depthVerificationDone = false; + return; + } // If auto-mode is already running, advance to next unit if (!isAutoActive()) return; @@ -365,6 +394,80 @@ export default function (pi: ExtensionAPI) { saveActivityLog(ctx, dash.basePath, dash.currentUnit.type, dash.currentUnit.id); } }); + + // ── tool_call: block CONTEXT.md writes during discussion without depth verification ── + pi.on("tool_call", async (event) => { + if (!isToolCallEventType("write", event)) return; + const result = shouldBlockContextWrite( + event.toolName, + event.input.path, + getDiscussionMilestoneId(), + isDepthVerified(), + ); + if (result.block) return result; + }); + + // ── tool_result: persist discussion exchanges & detect depth gate ────── + pi.on("tool_result", async (event) => { + if (event.toolName !== "ask_user_questions") return; + + const milestoneId = getDiscussionMilestoneId(); + if (!milestoneId) return; + + const details = event.details as any; + if (details?.cancelled || !details?.response) return; + + // ── Depth gate detection ────────────────────────────────────────── + const questions: any[] = (event.input as any)?.questions ?? []; + for (const q of questions) { + if (typeof q.id === "string" && q.id.includes("depth_verification")) { + depthVerificationDone = true; + break; + } + } + + // ── Persist exchange to DISCUSSION.md ────────────────────────────── + const basePath = process.cwd(); + const milestoneDir = resolveMilestonePath(basePath, milestoneId); + if (!milestoneDir) return; + + const fileName = buildMilestoneFileName(milestoneId, "DISCUSSION"); + const discussionPath = join(milestoneDir, fileName); + const timestamp = new Date().toISOString(); + + // Format exchange as markdown + const lines: string[] = [`## Exchange — ${timestamp}`, ""]; + + for (const q of questions) { + lines.push(`### ${q.header ?? "Question"}`); + lines.push(""); + lines.push(q.question ?? ""); + if (Array.isArray(q.options)) { + lines.push(""); + for (const opt of q.options) { + lines.push(`- **${opt.label}** — ${opt.description ?? ""}`); + } + } + + // Append user response for this question + const answer = details.response?.answers?.[q.id]; + if (answer) { + lines.push(""); + const selected = Array.isArray(answer.selected) ? answer.selected.join(", ") : answer.selected; + lines.push(`**Selected:** ${selected}`); + if (answer.notes) { + lines.push(`**Notes:** ${answer.notes}`); + } + } + lines.push(""); + } + + lines.push("---", ""); + + const newBlock = lines.join("\n"); + const existing = await loadFile(discussionPath) ?? `# ${milestoneId} Discussion Log\n\n`; + await saveFile(discussionPath, existing + newBlock); + }); } async function buildGuidedExecuteContextInjection(prompt: string, basePath: string): Promise { diff --git a/src/resources/extensions/gsd/prompts/discuss.md b/src/resources/extensions/gsd/prompts/discuss.md index 90aca80b5..fbac73173 100644 --- a/src/resources/extensions/gsd/prompts/discuss.md +++ b/src/resources/extensions/gsd/prompts/discuss.md @@ -52,6 +52,16 @@ You are a thinking partner, not an interviewer. **Freeform rule:** When the user selects "Other" or clearly wants to explain something freely, stop using `ask_user_questions` and switch to plain text follow-ups. Let them talk. Resume structured questions when appropriate. +**Depth-signal awareness.** When a user writes extensively about something — long notes, detailed explanations, specific examples — that's a signal. Probe that area deeper. Don't spread attention evenly across all topics when the user is clearly investing energy in one. + +**Enrichment fusion.** Weave the user's specific language, terminology, and framing into your subsequent questions. If they said "craft feel," your next question references "craft feel" — don't paraphrase it into "user experience quality." Their precision is signal, not noise. + +**Position-first framing.** Have opinions. State your read of a tradeoff with rationale before asking what they think. "I'd lean toward X because Y — does that match your thinking, or am I missing context?" is better than "what do you think about X vs Y?" You're a thinking partner, not a neutral interviewer. + +**Negative constraints.** Ask what would disappoint them. What they explicitly don't want. What the product should never feel like. Negative constraints are sharper than positive wishes — "never feel sluggish" defines the performance bar more precisely than "should be fast." + +**Observation ≠ Conclusion.** Technical facts you discover in the codebase during investigation are context, not decisions. Present them as context and let the user decide what they mean for direction. "The current auth uses JWT with 24h expiry" is an observation. Whether to keep that pattern is the user's call. + **Anti-patterns — never do these:** - **Checklist walking** — going through a predetermined list of topics regardless of what the user said - **Canned questions** — asking generic questions that could apply to any project @@ -73,10 +83,22 @@ Do NOT offer to proceed until ALL of the following are satisfied. Track these in - [ ] **The biggest technical unknowns / risks** — what could fail, what hasn't been proven - [ ] **What external systems/services this touches** — APIs, databases, third-party services, hardware +Before offering to proceed, demonstrate absorption: reference specific things the user emphasized, specific terminology they used, specific nuance they sharpened — and show how those shaped your understanding. Synthesize, don't recite. "Your emphasis on X led me to prioritize Y over Z" is good. "You said X, you said Y, you said Z" is not. The user should feel heard in the specifics, not just acknowledged in the abstract. + **Questioning depth should match scope.** Simple, well-defined work needs fewer rounds — maybe 1-2. Large, ambiguous visions need more — maybe 4+. Don't pad rounds to hit a number. Stop when the depth checklist is satisfied and you genuinely understand the work. Do not count the reflection step as a question round. Rounds start after reflection is confirmed. +## Depth Verification + +Before moving to the wrap-up gate, present a structured depth summary to the user via `ask_user_questions`. This is a checkpoint — show what you captured across the depth checklist dimensions, using the user's own terminology and framing. + +The question should summarize: what you understood them to be building, what shaped your understanding most (their emphasis, constraints, concerns), and any areas where you're least confident in your understanding. Frame it as: "Before we move to planning, here's what I captured — did I get the depth right?" + +**Convention:** The question ID must contain `depth_verification` (e.g., `depth_verification_summary`). This naming convention enables downstream mechanical detection of this step. + +Offer two options: "Yes, you got it (Recommended)" and "Not quite — let me clarify." If they clarify, absorb the correction and re-verify. + ## Wrap-up Gate Only after the depth checklist is fully satisfied and you genuinely understand the work, offer to proceed. @@ -166,6 +188,9 @@ Once the user is satisfied, in a single pass: 1. `mkdir -p .gsd/milestones/{{milestoneId}}/slices` 2. Write or update `.gsd/PROJECT.md` — read the template at `~/.gsd/agent/extensions/gsd/templates/project.md` first. Describe what the project is, its current state, and list the milestone sequence. 3. Write or update `.gsd/REQUIREMENTS.md` — read the template at `~/.gsd/agent/extensions/gsd/templates/requirements.md` first. Confirm requirement states, ownership, and traceability before roadmap creation. +**Depth-Preservation Guidance for context.md:** +When writing context.md, preserve the user's exact terminology, emphasis, and specific framing from the discussion. Do not paraphrase user nuance into generic summaries. If the user said "craft feel," write "craft feel" — not "high-quality user experience." If they emphasized a specific constraint or negative requirement, carry that emphasis through verbatim. The context file is downstream agents' only window into this conversation — flattening specifics into generics loses the signal that shaped every decision. + 4. Write `{{contextAbsPath}}` — read the template at `~/.gsd/agent/extensions/gsd/templates/context.md` first. Preserve key risks, unknowns, existing codebase constraints, integration points, and relevant requirements surfaced during discussion. 5. Write `{{roadmapAbsPath}}` — read the template at `~/.gsd/agent/extensions/gsd/templates/roadmap.md` first. Decompose into demoable vertical slices with checkboxes, risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. 6. Seed `.gsd/DECISIONS.md` — read the template at `~/.gsd/agent/extensions/gsd/templates/decisions.md` first. Append rows for any architectural or pattern decisions made during discussion. diff --git a/src/resources/extensions/gsd/tests/write-gate.test.ts b/src/resources/extensions/gsd/tests/write-gate.test.ts new file mode 100644 index 000000000..0b7074adc --- /dev/null +++ b/src/resources/extensions/gsd/tests/write-gate.test.ts @@ -0,0 +1,122 @@ +/** + * Unit tests for the CONTEXT.md write-gate (D031 guard chain). + * + * Exercises shouldBlockContextWrite() — a pure function that implements: + * (a) toolName !== "write" → pass + * (b) milestoneId null → pass (not in discussion) + * (c) path doesn't match /M\d+-CONTEXT\.md$/ → pass + * (d) depthVerified → pass + * (e) else → block with actionable reason + */ + +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { shouldBlockContextWrite } from '../index.ts'; + +// ─── Scenario 1: Blocks CONTEXT.md write during discussion without depth verification (absolute path) ── + +test('write-gate: blocks CONTEXT.md write during discussion without depth verification (absolute path)', () => { + const result = shouldBlockContextWrite( + 'write', + '/Users/dev/project/.gsd/milestones/M001/M001-CONTEXT.md', + 'M001', + false, + ); + assert.strictEqual(result.block, true, 'should block the write'); + assert.ok(result.reason, 'should provide a reason'); +}); + +// ─── Scenario 2: Blocks CONTEXT.md write during discussion without depth verification (relative path) ── + +test('write-gate: blocks CONTEXT.md write during discussion without depth verification (relative path)', () => { + const result = shouldBlockContextWrite( + 'write', + '.gsd/milestones/M005/M005-CONTEXT.md', + 'M005', + false, + ); + assert.strictEqual(result.block, true, 'should block the write'); + assert.ok(result.reason, 'should provide a reason'); +}); + +// ─── Scenario 3: Allows CONTEXT.md write after depth verification ── + +test('write-gate: allows CONTEXT.md write after depth verification', () => { + const result = shouldBlockContextWrite( + 'write', + '/Users/dev/project/.gsd/milestones/M001/M001-CONTEXT.md', + 'M001', + true, + ); + assert.strictEqual(result.block, false, 'should not block after depth verification'); + assert.strictEqual(result.reason, undefined, 'should have no reason'); +}); + +// ─── Scenario 4: Allows CONTEXT.md write outside discussion phase (milestoneId null) ── + +test('write-gate: allows CONTEXT.md write outside discussion phase', () => { + const result = shouldBlockContextWrite( + 'write', + '.gsd/milestones/M001/M001-CONTEXT.md', + null, + false, + ); + assert.strictEqual(result.block, false, 'should not block outside discussion phase'); +}); + +// ─── Scenario 5: Allows non-CONTEXT.md writes during discussion ── + +test('write-gate: allows non-CONTEXT.md writes during discussion', () => { + // DISCUSSION.md + const r1 = shouldBlockContextWrite( + 'write', + '.gsd/milestones/M001/M001-DISCUSSION.md', + 'M001', + false, + ); + assert.strictEqual(r1.block, false, 'DISCUSSION.md should pass'); + + // Slice file + const r2 = shouldBlockContextWrite( + 'write', + '.gsd/milestones/M001/slices/S01/S01-PLAN.md', + 'M001', + false, + ); + assert.strictEqual(r2.block, false, 'slice plan should pass'); + + // Regular code file + const r3 = shouldBlockContextWrite( + 'write', + 'src/index.ts', + 'M001', + false, + ); + assert.strictEqual(r3.block, false, 'regular code file should pass'); +}); + +// ─── Scenario 6: Regex specificity — doesn't match S01-CONTEXT.md ── + +test('write-gate: regex does not match slice context files (S01-CONTEXT.md)', () => { + const result = shouldBlockContextWrite( + 'write', + '.gsd/milestones/M001/slices/S01/S01-CONTEXT.md', + 'M001', + false, + ); + assert.strictEqual(result.block, false, 'S01-CONTEXT.md should not be blocked'); +}); + +// ─── Scenario 7: Error message contains actionable instruction ── + +test('write-gate: blocked reason contains depth_verification keyword', () => { + const result = shouldBlockContextWrite( + 'write', + '.gsd/milestones/M999/M999-CONTEXT.md', + 'M999', + false, + ); + assert.strictEqual(result.block, true); + assert.ok(result.reason!.includes('depth_verification'), 'reason should mention depth_verification question id'); + assert.ok(result.reason!.includes('ask_user_questions'), 'reason should mention ask_user_questions tool'); +});