Merge pull request #3935 from jeremymcs/fix/remove-broken-discuss-prepared

fix(gsd): remove broken discuss-prepared template, inject briefs into discuss.md
This commit is contained in:
Jeremy McSpadden 2026-04-10 12:07:31 -05:00 committed by GitHub
commit 750c5b7aeb
13 changed files with 72 additions and 3438 deletions

View file

@ -47,13 +47,9 @@ let pendingGateId: string | null = null;
/**
* Recognized gate question ID patterns.
* These appear in both discuss-prepared.md (4-layer) and discuss.md (depth/requirements/roadmap).
* These appear in discuss.md (depth/requirements/roadmap).
*/
const GATE_QUESTION_PATTERNS = [
"layer1_scope_gate",
"layer2_architecture_gate",
"layer3_error_gate",
"layer4_quality_gate",
"depth_verification",
] as const;

View file

@ -53,25 +53,8 @@ import {
runPreparation,
formatCodebaseBrief,
formatPriorContextBrief,
formatEcosystemBrief,
type PreparationResult,
} from "./preparation.js";
// ─── Preparation result storage ─────────────────────────────────────────────
// Stores the most recent preparation result for injection into discuss prompts.
// S02 will consume this when building the prepared discussion prompt.
let lastPreparationResult: PreparationResult | null = null;
/** Get the most recent preparation result (for S02 prompt building). */
export function getLastPreparationResult(): PreparationResult | null {
return lastPreparationResult;
}
/** Clear the preparation result (called after discussion completes). */
export function clearPreparationResult(): void {
lastPreparationResult = null;
}
// ─── Re-exports (preserve public API for existing importers) ────────────────
export {
MILESTONE_ID_RE, generateMilestoneSuffix, nextMilestoneId,
@ -427,7 +410,7 @@ function resolveAvailableModel<T extends { id: string; provider: string }>(
* Build the discuss-and-plan prompt for a new milestone.
* Used by all three "new milestone" paths (first ever, no active, all complete).
*/
function buildDiscussPrompt(nextId: string, preamble: string, _basePath: string): string {
function buildDiscussPrompt(nextId: string, preamble: string, _basePath: string, preparationContext?: string): string {
const milestoneRel = `.gsd/milestones/${nextId}`;
const inlinedTemplates = [
inlineTemplate("project", "Project"),
@ -439,6 +422,7 @@ function buildDiscussPrompt(nextId: string, preamble: string, _basePath: string)
return loadPrompt("discuss", {
milestoneId: nextId,
preamble,
preparationContext: preparationContext ?? "",
contextPath: `${milestoneRel}/${nextId}-CONTEXT.md`,
roadmapPath: `${milestoneRel}/${nextId}-ROADMAP.md`,
inlinedTemplates,
@ -471,59 +455,12 @@ function buildHeadlessDiscussPrompt(nextId: string, seedContext: string, _basePa
});
}
/**
* Build the prepared discuss prompt with brief injection.
* Uses the discuss-prepared template which encodes the 4-layer discussion protocol.
*
* @param nextId - The milestone ID being discussed
* @param preamble - Preamble text for the discuss prompt
* @param _basePath - Root directory of the project (unused, kept for signature consistency)
* @param prepResult - Preparation result containing briefs to inject
* @returns The prepared discuss prompt string
*/
function buildPreparedPrompt(
nextId: string,
preamble: string,
_basePath: string,
prepResult: PreparationResult,
): string {
const milestoneRel = `.gsd/milestones/${nextId}`;
// Use context-enhanced instead of context for prepared discussions
const inlinedTemplates = [
inlineTemplate("project", "Project"),
inlineTemplate("requirements", "Requirements"),
inlineTemplate("context-enhanced", "Context Enhanced"),
inlineTemplate("roadmap", "Roadmap"),
inlineTemplate("decisions", "Decisions"),
].join("\n\n---\n\n");
// Format the briefs from the preparation result
const codebaseBrief = prepResult.codebaseBrief || formatCodebaseBrief(prepResult.codebase);
const priorContextBrief = prepResult.priorContextBrief || formatPriorContextBrief(prepResult.priorContext);
const ecosystemBrief = prepResult.ecosystemBrief || formatEcosystemBrief(prepResult.ecosystem);
return loadPrompt("discuss-prepared", {
milestoneId: nextId,
preamble,
codebaseBrief,
priorContextBrief,
ecosystemBrief,
contextPath: `${milestoneRel}/${nextId}-CONTEXT.md`,
roadmapPath: `${milestoneRel}/${nextId}-ROADMAP.md`,
inlinedTemplates,
commitInstruction: buildDocsCommitInstruction(`docs(${nextId}): context, requirements, and roadmap`),
multiMilestoneCommitInstruction: buildDocsCommitInstruction("docs: project plan — N milestones"),
});
}
/**
* Run preparation phase if enabled, then build the discuss prompt.
* This is the main entry point for new milestone discussions with preparation.
* Stores the preparation result for S02 to inject into the discuss prompt.
*
* When preparation succeeds, uses the discuss-prepared template with brief injection.
* Falls back to the standard discuss template when preparation is disabled or fails.
* Preparation analyzes the codebase and prior context, injecting the results
* as supplementary context into the standard discuss template. The discuss
* template drives the conversation (asks "What's the vision?" first), while
* the preparation briefs give the agent grounding in the existing codebase.
*
* @param ctx - Extension command context with UI for progress notifications
* @param nextId - The milestone ID being discussed
@ -537,14 +474,13 @@ async function prepareAndBuildDiscussPrompt(
preamble: string,
basePath: string,
): Promise<string> {
// Clear stale preparation result immediately to prevent cross-session/project
// state leaks. This ensures data from a prior milestone/project never leaks
// into subsequent discussions (adversarial review fix #3602).
lastPreparationResult = null;
const prefs = loadEffectiveGSDPreferences()?.preferences ?? {};
// Run preparation if enabled (default: true)
// Run preparation if enabled (default: true) — results are injected as
// supplementary context into the standard discuss prompt, NOT as a
// replacement template. The discuss prompt always leads with "What's the
// vision?" so the user defines the scope, not the codebase analysis.
let preparationContext = "";
if (prefs.discuss_preparation !== false) {
try {
const prepResult = await runPreparation(basePath, ctx.ui, {
@ -552,21 +488,23 @@ async function prepareAndBuildDiscussPrompt(
discuss_web_research: prefs.discuss_web_research,
discuss_depth: prefs.discuss_depth,
});
lastPreparationResult = prepResult;
// Use prepared prompt if preparation was enabled and produced results
if (prepResult.enabled) {
return buildPreparedPrompt(nextId, preamble, basePath, prepResult);
const codebaseBrief = prepResult.codebaseBrief || formatCodebaseBrief(prepResult.codebase);
const priorContextBrief = prepResult.priorContextBrief || formatPriorContextBrief(prepResult.priorContext);
const parts: string[] = [];
if (codebaseBrief) parts.push(`### Codebase Brief\n\n${codebaseBrief}`);
if (priorContextBrief) parts.push(`### Prior Context Brief\n\n${priorContextBrief}`);
if (parts.length > 0) {
preparationContext = `\n\n## Preparation Context\n\nThe system analyzed the codebase before this discussion. Use these findings as background context — they describe what already exists, NOT what the user wants to build. Always ask the user what they want to build first.\n\n${parts.join("\n\n")}`;
}
}
} catch {
// If preparation throws, ensure stale data doesn't persist
lastPreparationResult = null;
} catch (err) {
logWarning("guided", `preparation failed, proceeding without context: ${(err as Error).message}`);
}
}
// Fall back to standard discuss prompt for backward compatibility
// lastPreparationResult is already null (cleared at entry or on error)
return buildDiscussPrompt(nextId, preamble, basePath);
return buildDiscussPrompt(nextId, preamble, basePath, preparationContext);
}
/**

View file

@ -1,88 +0,0 @@
/**
* GSD Prompt Validation Validates enhanced context output before writing.
*
* Implements R109 validation requirement: CONTEXT.md must have required sections
* before being written to disk.
*/
/**
* Result of validating enhanced context output.
*/
export interface ValidationResult {
/** Whether all required sections are present. */
valid: boolean;
/** List of missing required sections. */
missing: string[];
}
/**
* Validate that enhanced context content has all required sections.
*
* Required sections per R109:
* - Scope section (## Scope, ## Milestone Scope, or ## Why This Milestone)
* - Architectural Decisions section (## Architectural Decisions)
* - Acceptance Criteria section (## Acceptance Criteria or ## Final Integrated Acceptance)
*
* Additionally validates that the Architectural Decisions section contains
* at least one decision entry (### heading or **Decision marker).
*
* @param content - The enhanced context markdown content
* @returns ValidationResult with valid flag and list of missing sections
*/
export function validateEnhancedContext(content: string): ValidationResult {
const missing: string[] = [];
// Required section 1: Scope (multiple acceptable header variants)
const hasScopeSection =
/^## Scope\b/m.test(content) ||
/^## Milestone Scope\b/m.test(content) ||
/^## Why This Milestone\b/m.test(content);
if (!hasScopeSection) {
missing.push("Milestone Scope or Why This Milestone");
}
// Required section 2: Architectural Decisions
const hasArchitecturalDecisions = /^## Architectural Decisions\b/m.test(content);
if (!hasArchitecturalDecisions) {
missing.push("Architectural Decisions");
}
// Required section 3: Acceptance Criteria (multiple acceptable header variants)
const hasAcceptanceCriteria =
/^## Acceptance Criteria\b/m.test(content) ||
/^## Final Integrated Acceptance\b/m.test(content);
if (!hasAcceptanceCriteria) {
missing.push("Acceptance Criteria");
}
// Additional validation: Architectural Decisions must have at least one entry
if (hasArchitecturalDecisions) {
// Extract the section content between ## Architectural Decisions and the next ## heading.
// Uses indexOf-based extraction instead of regex with \z (which is invalid in JavaScript
// regex — it's PCRE/Ruby syntax and JS treats it as literal 'z').
const sectionStart = content.indexOf("## Architectural Decisions");
if (sectionStart === -1) {
missing.push("Architectural Decisions");
} else {
const afterHeading = content.slice(sectionStart + "## Architectural Decisions".length);
const nextSection = afterHeading.search(/^## /m);
const sectionContent = nextSection === -1 ? afterHeading : afterHeading.slice(0, nextSection);
// Check for actual decision entries:
// - ### heading (subsection per decision)
// - **Decision marker (inline decision format)
const hasDecisionEntry = /^### /m.test(sectionContent) || /^\*\*Decision/m.test(sectionContent);
if (!hasDecisionEntry) {
missing.push("At least one architectural decision entry");
}
}
}
return {
valid: missing.length === 0,
missing,
};
}

View file

@ -1,424 +0,0 @@
{{preamble}}
You are conducting a **prepared discussion** — the system has already analyzed the codebase, gathered prior context, and researched the ecosystem. Your job is to present these findings, make recommendations, and gather the user's input through a structured 4-layer protocol.
## Preparation Briefs
The following briefs were generated during the preparation phase. Use them to ground your recommendations.
### Codebase Brief
{{codebaseBrief}}
### Prior Context Brief
{{priorContextBrief}}
### Ecosystem Brief
{{ecosystemBrief}}
---
## 4-Layer Discussion Protocol
This discussion proceeds through four mandatory layers. At each layer:
1. **Present findings** — share what the preparation revealed
2. **Make a recommendation** — take a position based on the evidence
3. **Ask clarifying questions** — fill gaps the preparation couldn't answer
4. **Gate** — use `ask_user_questions` to get explicit sign-off before advancing
**Do NOT skip layers.** Each layer builds on the previous. The user must explicitly approve each layer before you proceed.
---
## Depth Adaptation
The depth of questioning at each layer should match THIS milestone's work type. Do not apply a fixed checklist — reason from first principles about what matters for this specific work.
**Work-type reasoning:**
- **API/service work** — Focus Layer 2 questions on contracts, versioning, backwards compatibility, authentication boundaries. Layer 3 must cover rate limiting, timeout cascades, and partial failure states.
- **CLI/developer tools** — Focus Layer 1 on user mental model and command grammar. Layer 4 needs shell compatibility, error message clarity, and exit code semantics.
- **ML/data pipelines** — Focus Layer 2 on data flow, reproducibility, and intermediate state. Layer 3 must cover data corruption, training divergence, and checkpoint recovery.
- **UI/frontend work** — Focus Layer 2 on component boundaries and state management. Layer 3 needs loading states, optimistic updates, and offline behavior. Layer 4 must include visual regression criteria.
- **Infrastructure/platform** — Focus Layer 2 on deployment topology and failure domains. Layer 3 must cover cascading failures, resource exhaustion, and rollback paths.
- **Refactoring/migration** — Focus Layer 1 on what changes vs what must stay identical. Layer 4 needs behavioral equivalence tests, not just code coverage.
**Adaptation principle:** Ask "What would cause this milestone to fail silently or succeed incorrectly?" The answer shapes which questions deserve deep exploration vs quick confirmation.
---
## Layer 1 — Scope (What are we building?)
### Identify Work Type
**Before presenting findings, identify the primary work type and state it explicitly:**
"Based on [user's request and codebase analysis], this milestone is primarily **[work type]** work (e.g., API/backend, UI/frontend, CLI tool, data pipeline, simulation, infrastructure)."
This classification determines the depth and focus of questioning at each layer. If the work type spans multiple categories, state the dominant type and note the secondary types. The user can correct this classification.
### Present Findings
Start by presenting what you learned from the preparation:
1. **From the Codebase Brief:** Summarize the technology stack, key modules, and established patterns. Call out anything that constrains or enables the proposed work.
2. **From the Prior Context Brief:** Surface existing decisions, requirements, and knowledge that are relevant. Note any prior commitments or constraints.
3. **Scope implications:** Based on the above, explain what scope makes sense and what would conflict with the existing codebase.
### Make a Recommendation
Take a clear position: "Based on [specific findings], I recommend the milestone scope as [concrete description]."
Include:
- What the milestone will deliver (user-visible outcome)
- What it explicitly excludes (to prevent scope creep)
- Rough size estimate (number of slices, complexity)
### Resolve Scope — Mandatory Rounds
After presenting your recommendation, you MUST complete these rounds in order. Each round uses `ask_user_questions` or direct questions. Do NOT skip rounds. Do NOT combine rounds. Do NOT jump to the Layer 1 Gate until all rounds are complete.
**Complexity calibration:** If the milestone is simple (1-2 slices, well-understood patterns, no ambiguity), you may compress rounds — but you must still explicitly address each round's topic, even if briefly. You may NOT skip rounds entirely. For complex milestones (3+ slices, novel architecture, significant ambiguity), give each round full treatment.
**Round 1 — Feature boundaries:**
For each feature in your recommendation, state what it includes and excludes. Ask the user to confirm or adjust each boundary. Example: "Signup — I'm including email/password registration. I'm excluding OAuth, email verification, and phone number signup. Correct?"
**Round 2 — Ambiguity resolution:**
Identify every term or concept in the scope that could be interpreted multiple ways. For each one, state the two most likely interpretations and ask which the user intends. Example: "'User authentication' — do you mean just login/signup, or also session management, token refresh, and logout?"
**Round 3 — Dependencies and constraints:**
Ask about external dependencies (APIs, services, databases), existing code that will be affected, and constraints the user hasn't mentioned. Reference specific findings from the codebase brief. Example: "Your db.ts already has a getUser() function — should signup create users compatible with this existing model?"
**Round 4 — Priority and ordering:**
If the scope has multiple features, ask the user to rank them by priority. Ask what's the minimum viable version if the milestone needs to be cut short. Example: "If we had to ship with only 2 of the 3 slices, which two matter most?"
After completing all 4 rounds, proceed to the Layer 1 Gate.
### Layer 1 Gate
Before advancing, use `ask_user_questions` with question ID containing `layer1_scope_gate`:
```
Header: "Scope Gate"
Question: "Does this scope capture what you want to build?"
Options:
- "Yes, scope is correct (Recommended)" — proceed to Layer 2
- "Needs adjustment" — user will clarify, then re-present scope
```
**CRITICAL — Non-bypassable gate:** Do NOT proceed to Layer 2 until the user explicitly approves the scope. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. "Tool not responding, I'll proceed," "auth issues," or "I'll use my recommended scope" are all **forbidden**. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around.
---
## Ecosystem Research (between layers)
Before presenting Layer 2 findings, use your available web search tools to research the technologies identified in the Codebase Brief. For each major technology (framework, ORM, key library):
1. Search for "[technology] [version] best practices [current year]"
2. Search for "[technology] [version] known issues"
Summarize findings concisely. If search tools fail or are unavailable, note this and proceed using your training knowledge — but do NOT use a search failure as justification to skip any gate.
Present ecosystem findings at the start of Layer 2 alongside your architecture recommendation.
---
## Layer 2 — Architecture (How will it work?)
### Present Findings
Now present architectural recommendations grounded in evidence:
1. **From the Ecosystem Brief:** Share relevant best practices, known issues, library recommendations, and integration patterns discovered during research.
2. **From the Codebase Brief:** Identify existing architectural patterns that should be followed or deliberately broken from.
3. **Synthesis:** Explain how the ecosystem research applies to this specific codebase context.
### Make a Recommendation
Take a clear position: "I'd suggest [approach] because [evidence-based rationale]."
Cover:
- Overall architectural approach (new module? extend existing? separate service?)
- Key technical decisions (which libraries, patterns, data flow)
- Integration points with existing code
- What you'd avoid and why
### Resolve Architecture — Mandatory Rounds
After presenting your recommendation, you MUST complete these rounds in order. Do NOT skip rounds. Do NOT jump to the Layer 2 Gate until all rounds are complete.
**Complexity calibration:** If the milestone is simple (1-2 slices, well-understood patterns, no ambiguity), you may compress rounds — but you must still explicitly address each round's topic, even if briefly. You may NOT skip rounds entirely. For complex milestones (3+ slices, novel architecture, significant ambiguity), give each round full treatment.
**Round 1 — Per-slice technical decisions:**
For each slice in your decomposition, state the specific technical approach. Ask the user to confirm or adjust. Don't just say "build the signup endpoint" — state which library handles password hashing, where the route file lives, what the request/response schema looks like.
**Round 2 — Inter-slice contracts:**
For each dependency between slices, state explicitly what the upstream slice produces and what the downstream slice expects. Ask the user to confirm the interface. Example: "S01 produces a User model with {id, email, hashedPassword}. S02's login endpoint will query by email and compare password. Does this contract work?"
**Round 3 — Library and pattern decisions:**
For each library or pattern choice, present at least one alternative with tradeoffs. Ask the user to confirm. Example: "bcrypt vs argon2 for password hashing — bcrypt is more common in Node, argon2 is newer and more resistant to GPU attacks. I recommend bcrypt for simplicity. Agree?"
**Round 4 — Integration with existing code:**
Walk through how the new code connects to existing files and patterns. Ask about anything that might conflict. Reference specific files from the codebase brief. Example: "The new auth routes will mount at /api/auth alongside your existing /api router in routes.ts. Should they share the same router file or get their own auth-routes.ts?"
After completing all 4 rounds, proceed to the Layer 2 Gate.
### Layer 2 Gate
Before advancing, use `ask_user_questions` with question ID containing `layer2_architecture_gate`:
```
Header: "Architecture Gate"
Question: "Ready to move to error handling, or want to adjust the architecture?"
Options:
- "Architecture looks good (Recommended)" — proceed to Layer 3
- "Want to adjust" — user will clarify, then re-present architecture
```
**CRITICAL — Non-bypassable gate:** Do NOT proceed to Layer 3 until the user explicitly approves the architecture. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around.
---
## Layer 3 — Error States (What can go wrong?)
### Present Findings
Identify failure modes based on the scope and architecture:
1. **From the Ecosystem Brief:** Known issues, common pitfalls, edge cases that trip up similar implementations.
2. **From the Architecture:** Failure points at integration boundaries, async operations, external dependencies, user input handling.
3. **From the Codebase Brief:** How existing code handles errors — patterns to follow, gaps to fill.
### Make a Recommendation
Take a clear position: "The critical error paths are [X, Y, Z]. I recommend handling them by [approach]."
Cover:
- **Must-handle errors:** Failures that would break the user experience or corrupt data
- **Should-handle errors:** Degraded experiences that are acceptable with good messaging
- **Edge cases:** Boundary conditions, malformed input, timing issues
- **Recovery strategy:** Retry logic, fallback behavior, user notification
### Resolve Error Handling — Mandatory Rounds
After presenting your recommendation, ask the user:
**"Do you want to go deep on error handling, or accept the defaults I recommended?"**
Use `ask_user_questions` with options: "Go deep" / "Accept defaults"
If they accept defaults, record your recommendations as decisions and proceed to the Layer 3 Gate.
If they want to go deep, complete these rounds:
**Complexity calibration:** If the milestone is simple, you may compress rounds — but you must still explicitly address each round's topic. You may NOT skip rounds entirely.
**Round 1 — Input validation:**
For each endpoint or entry point, state what input validation happens and what error the user sees for invalid input. Ask the user to confirm. Example: "Signup with missing email returns 400 with {error: 'Email is required'}. Signup with invalid email format returns 400 with {error: 'Invalid email format'}. Right?"
**Round 2 — Authentication/authorization failures:**
For each protected operation, state what happens when auth fails. Ask the user to confirm. Example: "Expired JWT returns 401. Missing JWT returns 401. Malformed JWT returns 401. All three use the same generic message to avoid information leakage. Correct?"
**Round 3 — System failures:**
For each external dependency (database, API, file system), state what happens when it's unavailable. Ask the user to confirm. Example: "If Prisma can't connect to the database, all endpoints return 500 with a generic message. We log the real error server-side but never expose it to the client."
After completing all rounds (or accepting defaults), proceed to the Layer 3 Gate.
### Layer 3 Gate
Before advancing, use `ask_user_questions` with question ID containing `layer3_error_gate`:
```
Header: "Error Handling Gate"
Question: "Error handling strategy captured. Ready to define the quality bar?"
Options:
- "Yes, move to quality bar (Recommended)" — proceed to Layer 4
- "Want to adjust error handling" — user will clarify, then re-present errors
```
**CRITICAL — Non-bypassable gate:** Do NOT proceed to Layer 4 until the user explicitly approves error handling. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around.
---
## Layer 4 — Quality Bar (What does done mean?)
### Present Findings
Define what "done" looks like based on everything discussed:
1. **Testing requirements:** What must be tested? Unit tests, integration tests, E2E tests? Based on the architecture's complexity and risk profile.
2. **Acceptance criteria:** Concrete, observable outcomes that prove the milestone is complete. Derived from the scope discussion.
3. **Performance/quality constraints:** Based on ecosystem research and codebase patterns — response times, error rates, accessibility requirements.
### Make a Recommendation
Take a clear position: "For this scope, I'd suggest these acceptance criteria: [list]."
Include:
- **Definition of done:** What conditions must be true for the milestone to be complete?
- **Test coverage expectations:** What must be tested vs nice-to-have?
- **Quality gates:** What would block shipping?
### Resolve Quality — Mandatory Rounds
After presenting your recommendation, you MUST complete these rounds in order. Do NOT skip rounds.
**Complexity calibration:** If the milestone is simple, you may compress rounds — but you must still explicitly address each round's topic, even if briefly. You may NOT skip rounds entirely.
**Round 1 — Per-slice acceptance criteria:**
For each slice, state 3-5 specific, testable acceptance criteria. Ask the user to confirm each slice's criteria. These must be concrete enough that the planner can use them directly. "Tests pass" is NOT an acceptance criterion. "POST /api/auth/signup with {email, password} returns 201 with {id, email}" IS an acceptance criterion.
**Round 2 — Test strategy:**
For each slice, state what type of tests are needed (unit, integration, e2e) and what specifically gets tested. Ask the user to confirm. Example: "S01 needs: unit test for password hashing, integration test for signup endpoint with valid and invalid inputs. No e2e needed for this slice."
**Round 3 — Definition of done:**
State the end-to-end scenario that proves the milestone works. Ask the user to confirm. Example: "Done means: a new user can sign up, log in, receive a JWT, and use that JWT to access a protected endpoint — all verified by running the sequence manually or via integration test."
After completing all 3 rounds, proceed to the Layer 4 Gate.
### Layer 4 Gate
Before advancing, use `ask_user_questions` with question ID containing `layer4_quality_gate`:
```
Header: "Quality Gate"
Question: "Quality bar defined. Ready to write context and roadmap?"
Options:
- "Yes, write the artifacts (Recommended)" — proceed to Output Phase
- "Want to adjust the quality bar" — user will clarify, then re-present quality
```
**CRITICAL — Non-bypassable gate:** Do NOT proceed to Output Phase until the user explicitly approves the quality bar. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around.
---
## Output Phase
Once all four layers are complete, you have gathered:
- Confirmed scope (Layer 1)
- Approved architecture (Layer 2)
- Error handling strategy (Layer 3)
- Quality bar and acceptance criteria (Layer 4)
### Capability Contract
Before writing a roadmap, produce or update `.gsd/REQUIREMENTS.md`.
Use it as the project's explicit capability contract. Requirements discovered during the 4-layer discussion should be captured here with source `user` or `inferred` as appropriate.
**Print the requirements in chat before writing the roadmap.** Print a markdown table with columns: ID, Title, Status, Owner, Source. Group by status (Active, Deferred, Out of Scope). After the table, ask: "Confirm, adjust, or add?" **Non-bypassable:** If the user does not respond or gives an ambiguous answer, you MUST re-ask — never proceed to roadmap creation without explicit requirement confirmation.
### Roadmap Preview
Before writing any files, **print the planned roadmap in chat** so the user can see and approve it. Print a markdown table with columns: Slice, Title, Risk, Depends, Demo. One row per slice. Below the table, print the milestone definition of done as a bullet list.
If the user raises a substantive objection, adjust the roadmap. Otherwise, present the roadmap and ask: "Ready to write, or want to adjust?" — one gate, not two. **Non-bypassable:** If the user does not respond or gives an ambiguous answer, you MUST re-ask — never write files without explicit approval. A missing response is not a "yes."
### Naming Convention
Directories use bare IDs. Files use ID-SUFFIX format. Titles live inside file content, not in names.
- Milestone dir: `.gsd/milestones/{{milestoneId}}/`
- Milestone files: `{{milestoneId}}-CONTEXT.md`, `{{milestoneId}}-ROADMAP.md`
- Slice dirs: `S01/`, `S02/`, etc.
### Single Milestone
Once the user is satisfied, in a single pass:
1. `mkdir -p .gsd/milestones/{{milestoneId}}/slices`
2. Write or update `.gsd/PROJECT.md` — use the **Project** output template below. Describe what the project is, its current state, and list the milestone sequence.
3. Write or update `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Confirm requirement states, ownership, and traceability before roadmap creation.
**Depth-Preservation Guidance for context.md:**
When writing context.md, preserve the user's exact terminology, emphasis, and specific framing from the discussion. Do not paraphrase user nuance into generic summaries. If the user said "craft feel," write "craft feel" — not "high-quality user experience." If they emphasized a specific constraint or negative requirement, carry that emphasis through verbatim. The context file is downstream agents' only window into this conversation — flattening specifics into generics loses the signal that shaped every decision.
**Enhanced Context Requirement:** Because this is a prepared discussion, use the `context-enhanced` template which includes sections for Codebase Brief, Architectural Decisions, Interface Contracts, Error Handling Strategy, Testing Requirements, Acceptance Criteria, and Ecosystem Notes. Populate these from the 4-layer discussion:
- Codebase Brief: from Layer 1 presentation
- Architectural Decisions: from Layer 2 — each decision with rationale, evidence, alternatives
- Error Handling Strategy: from Layer 3
- Testing Requirements and Acceptance Criteria: from Layer 4
- Ecosystem Notes: key findings from the ecosystem brief
4. Write `{{contextPath}}` — use the **Context Enhanced** output template below. Preserve key risks, unknowns, existing codebase constraints, integration points, and relevant requirements surfaced during discussion.
5. Call `gsd_plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters.
6. For each architectural or pattern decision made during discussion, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically.
7. {{commitInstruction}}
After writing the files, say exactly: "Milestone {{milestoneId}} ready." — nothing else. Auto-mode will start automatically.
### Multi-Milestone
Once the user confirms the milestone split:
#### Phase 1: Shared artifacts
1. For each milestone, call `gsd_milestone_generate_id` to get its ID — never invent milestone IDs manually. Then `mkdir -p .gsd/milestones/<ID>/slices`.
2. Write `.gsd/PROJECT.md` — use the **Project** output template below.
3. Write `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Capture Active, Deferred, Out of Scope, and any already Validated requirements. Later milestones may have provisional ownership where slice plans do not exist yet.
4. For any architectural or pattern decisions made during discussion, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically.
#### Phase 2: Primary milestone
5. Write a full enhanced `CONTEXT.md` for the primary milestone (the one discussed in depth). Use the `context-enhanced` template.
6. Call `gsd_plan_milestone` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done.
#### MANDATORY: depends_on Frontmatter in CONTEXT.md
Every CONTEXT.md for a milestone that depends on other milestones MUST have YAML frontmatter with `depends_on`. The auto-mode state machine reads this field to determine execution order — without it, milestones may execute out of order or in parallel when they shouldn't.
```yaml
---
depends_on: [M001, M002]
---
# M003: Title
```
If a milestone has no dependencies, omit the frontmatter. The dependency chain from the milestone confirmation gate MUST be reflected in each CONTEXT.md frontmatter. Do NOT rely on QUEUE.md or PROJECT.md for dependency tracking — the state machine only reads CONTEXT.md frontmatter.
#### Phase 3: Sequential readiness gate for remaining milestones
For each remaining milestone **one at a time, in sequence**, decide the most likely readiness mode from the evidence you already have, then use `ask_user_questions` to let the user correct that recommendation. Present three options:
- **"Discuss now"** — The user wants to conduct a focused discussion for this milestone in the current session, while the context from the broader discussion is still fresh. Proceed with a focused discussion for this milestone (Layer 1-4 protocol). When the discussion concludes, write a full enhanced `CONTEXT.md`. Then move to the gate for the next milestone.
- **"Write draft for later"** — This milestone has seed material from the current conversation but needs its own dedicated discussion in a future session. Write a `CONTEXT-DRAFT.md` capturing the seed material (what was discussed, key ideas, provisional scope, open questions). Mark it clearly as a draft, not a finalized context. **What happens downstream:** When auto-mode reaches this milestone, it pauses and notifies the user: "M00x has draft context — needs discussion. Run /gsd." The `/gsd` wizard shows a "Discuss from draft" option that seeds the new discussion with this draft, so nothing from the current conversation is lost. After the dedicated discussion produces a full CONTEXT.md, the draft file is automatically deleted.
- **"Just queue it"** — This milestone is identified but intentionally left without context. No context file is written — the directory already exists from Phase 1. **What happens downstream:** When auto-mode reaches this milestone, it pauses and notifies the user to run /gsd. The wizard starts a full discussion from scratch.
**When "Discuss now" is chosen:** Run the full 4-layer protocol for that milestone using fresh preparation briefs scoped to that milestone.
#### Milestone Gate Tracking (MANDATORY for multi-milestone)
After EVERY Phase 3 gate decision, immediately write or update `.gsd/DISCUSSION-MANIFEST.json` with the cumulative state. This file is mechanically validated by the system before auto-mode starts — if gates are incomplete, auto-mode will NOT start.
```json
{
"primary": "M001",
"milestones": {
"M001": { "gate": "discussed", "context": "full" },
"M002": { "gate": "discussed", "context": "full" },
"M003": { "gate": "queued", "context": "none" }
},
"total": 3,
"gates_completed": 3
}
```
Write this file AFTER each gate decision, not just at the end. Update `gates_completed` incrementally. The system reads this file and BLOCKS auto-start if `gates_completed < total`.
For single-milestone projects, do NOT write this file — it is only for multi-milestone discussions.
#### Phase 4: Finalize
7. {{multiMilestoneCommitInstruction}}
After writing the files, say exactly: "Milestone M001 ready." — nothing else. Auto-mode will start automatically.
{{inlinedTemplates}}

View file

@ -28,6 +28,8 @@ After reflection is confirmed, decide the approach based on the actual scope —
**Anti-reduction rule:** If the user describes a big vision, plan the big vision. Do not ask "what's the minimum viable version?" or try to reduce scope unless the user explicitly asks for an MVP or minimal version. When something is complex or risky, phase it into a later milestone — do not cut it. The user's ambition is the target, and your job is to sequence it intelligently, not shrink it.
{{preparationContext}}
## Mandatory Investigation Before First Question Round
Before asking your first question, do a mandatory investigation pass. This is not optional.

View file

@ -1,138 +0,0 @@
# {{milestoneId}}: {{milestoneTitle}}
**Gathered:** {{date}}
**Status:** Ready for planning
## Project Description
{{description}}
## Why This Milestone
{{whatProblemThisSolves_AND_whyNow}}
## Codebase Brief
### Technology Stack
{{techStack}}
### Key Modules
{{keyModules}}
### Patterns in Use
{{patternsInUse}}
## User-Visible Outcome
### When this milestone is complete, the user can:
- {{literalUserActionInRealEnvironment}}
- {{literalUserActionInRealEnvironment}}
### Entry point / environment
- Entry point: {{CLI command / URL / bot / extension / service / workflow}}
- Environment: {{local dev / browser / mobile / launchd / CI / production-like}}
- Live dependencies involved: {{telegram / database / webhook / rpc subprocess / none}}
## Completion Class
- Contract complete means: {{what can be proven by tests / fixtures / artifacts}}
- Integration complete means: {{what must work across real subsystems}}
- Operational complete means: {{what must work under real lifecycle conditions, or none}}
## Architectural Decisions
### {{decisionTitle}}
**Decision:** {{decisionStatement}}
**Rationale:** {{rationale}}
**Evidence:** {{evidence}}
**Alternatives Considered:**
- {{alternative1}} — {{whyNotChosen1}}
- {{alternative2}} — {{whyNotChosen2}}
---
> Add additional decisions as separate `### Decision Title` blocks following the same structure above.
## Interface Contracts
{{interfaceContracts}}
> Document API boundaries, function signatures, data shapes, or protocol agreements that must be honored. Leave blank or remove if not applicable to this milestone.
## Error Handling Strategy
{{errorHandlingStrategy}}
> Describe the approach for handling failures, edge cases, and error propagation. Include retry policies, fallback behaviors, and user-facing error messages where relevant.
## Final Integrated Acceptance
To call this milestone complete, we must prove:
- {{one real end-to-end scenario}}
- {{one real end-to-end scenario}}
- {{what cannot be simulated if this milestone is to be considered truly done}}
## Testing Requirements
{{testingRequirements}}
> Specify test types (unit, integration, e2e), coverage expectations, and any specific test scenarios that must pass.
## Acceptance Criteria
{{acceptanceCriteria}}
> Per-slice acceptance criteria gathered during discussion. Each slice should have clear, testable criteria.
## Risks and Unknowns
- {{riskOrUnknown}} — {{whyItMatters}}
## Existing Codebase / Prior Art
- `{{fileOrModule}}` — {{howItRelates}}
- `{{fileOrModule}}` — {{howItRelates}}
> See `.gsd/DECISIONS.md` for all architectural and pattern decisions — it is an append-only register; read it during planning, append to it during execution.
## Relevant Requirements
- {{requirementId}} — {{howThisMilestoneAdvancesIt}}
## Scope
### In Scope
- {{inScopeItem}}
### Out of Scope / Non-Goals
- {{outOfScopeItem}}
## Technical Constraints
- {{constraint}}
## Integration Points
- {{systemOrService}} — {{howThisMilestoneInteractsWithIt}}
## Ecosystem Notes
{{ecosystemNotes}}
> Research findings, best practices, known issues, and relevant external documentation discovered during preparation.
## Open Questions
- {{question}} — {{currentThinking}}

View file

@ -38,6 +38,28 @@ To call this milestone complete, we must prove:
- {{one real end-to-end scenario}}
- {{what cannot be simulated if this milestone is to be considered truly done}}
## Architectural Decisions
### {{decisionTitle}}
**Decision:** {{decisionStatement}}
**Rationale:** {{rationale}}
**Alternatives Considered:**
- {{alternative}} — {{whyNotChosen}}
---
> Add additional decisions as separate `### Decision Title` blocks following the same structure above.
> See `.gsd/DECISIONS.md` for the full append-only register of all project decisions.
## Error Handling Strategy
{{errorHandlingStrategy}}
> Describe the approach for handling failures, edge cases, and error propagation. Include retry policies, fallback behaviors, and user-facing error messages where relevant.
## Risks and Unknowns
- {{riskOrUnknown}} — {{whyItMatters}}
@ -47,8 +69,6 @@ To call this milestone complete, we must prove:
- `{{fileOrModule}}` — {{howItRelates}}
- `{{fileOrModule}}` — {{howItRelates}}
> See `.gsd/DECISIONS.md` for all architectural and pattern decisions — it is an append-only register; read it during planning, append to it during execution.
## Relevant Requirements
- {{requirementId}} — {{howThisMilestoneAdvancesIt}}
@ -71,6 +91,18 @@ To call this milestone complete, we must prove:
- {{systemOrService}} — {{howThisMilestoneInteractsWithIt}}
## Testing Requirements
{{testingRequirements}}
> Specify test types (unit, integration, e2e), coverage expectations, and specific test scenarios that must pass.
## Acceptance Criteria
{{acceptanceCriteria}}
> Per-slice acceptance criteria gathered during discussion. Each slice should have clear, testable criteria.
## Open Questions
- {{question}} — {{currentThinking}}

View file

@ -1,223 +0,0 @@
/**
* Tests for adversarial review fixes from PR #3602.
*
* These tests verify the fixes for:
* 1. Cross-session state leak in lastPreparationResult (HIGH)
* 2. Invalid regex anchor \z in prompt-validation.ts (HIGH)
* 3. Consecutive error counter in agent-loop.ts (MEDIUM) UPSTREAM CODE, NOT MODIFIED
*/
import { describe, test, beforeEach, afterEach } from "node:test";
import assert from "node:assert/strict";
import { mkdirSync, writeFileSync, rmSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import { getLastPreparationResult, clearPreparationResult } from "../guided-flow.ts";
import { validateEnhancedContext } from "../prompt-validation.ts";
// ─── Test Helpers ───────────────────────────────────────────────────────────────
function makeTempDir(prefix: string): string {
const dir = join(
tmpdir(),
`gsd-adversarial-test-${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
);
mkdirSync(dir, { recursive: true });
return dir;
}
function cleanup(dir: string): void {
try {
rmSync(dir, { recursive: true, force: true });
} catch {
// best-effort
}
}
// ─── Fix 1: Cross-session state leak in lastPreparationResult ────────────────────
describe("Fix #1 — Cross-session state leak (lastPreparationResult)", () => {
beforeEach(() => {
clearPreparationResult();
});
afterEach(() => {
clearPreparationResult();
});
test("clearPreparationResult sets lastPreparationResult to null", () => {
// First, verify the getter returns null after clear
clearPreparationResult();
const result = getLastPreparationResult();
assert.equal(result, null, "lastPreparationResult should be null after clear");
});
test("getLastPreparationResult returns null initially", () => {
clearPreparationResult();
const result = getLastPreparationResult();
assert.equal(result, null, "should return null when no preparation has run");
});
// Note: The actual test that prepareAndBuildDiscussPrompt clears the result
// on entry requires mocking ExtensionCommandContext which is complex.
// The fix is verified by code inspection and integration tests.
// The key behavior is:
// 1. lastPreparationResult = null at the start of prepareAndBuildDiscussPrompt
// 2. If preparation throws, lastPreparationResult stays null
// 3. If discuss_preparation is false, lastPreparationResult stays null
});
// ─── Fix 2: Invalid regex anchor \z in prompt-validation.ts ──────────────────────
describe("Fix #2 — Invalid regex anchor (prompt-validation.ts)", () => {
test("validates content with Architectural Decisions at end of file", () => {
// This was the bug: \z is PCRE/Ruby, not JS. JS treated it as literal 'z'.
// The section extraction would fail when Architectural Decisions was the
// last section (no subsequent ## heading).
const contentWithDecisionsAtEnd = `
# M001: Test Milestone
## Why This Milestone
This is why.
## Acceptance Criteria
- Criterion 1
## Architectural Decisions
### Decision 1
**Decision:** Use TypeScript
**Rationale:** Type safety
`;
const result = validateEnhancedContext(contentWithDecisionsAtEnd);
assert.equal(result.valid, true, "should validate content with decisions at end");
assert.equal(result.missing.length, 0, "should have no missing sections");
});
test("validates content with Architectural Decisions followed by another section", () => {
const contentWithDecisionsInMiddle = `
# M001: Test Milestone
## Why This Milestone
This is why.
## Architectural Decisions
### Decision 1
**Decision:** Use TypeScript
## Acceptance Criteria
- Criterion 1
`;
const result = validateEnhancedContext(contentWithDecisionsInMiddle);
assert.equal(result.valid, true, "should validate content with decisions in middle");
});
test("detects missing decision entry when section is empty", () => {
const contentEmptyDecisions = `
# M001: Test Milestone
## Why This Milestone
This is why.
## Architectural Decisions
(No decisions yet)
## Acceptance Criteria
- Criterion 1
`;
const result = validateEnhancedContext(contentEmptyDecisions);
assert.equal(result.valid, false, "should fail when decisions section has no entries");
assert.ok(
result.missing.some((m) => m.includes("decision entry")),
"should report missing decision entry",
);
});
test("accepts inline **Decision format", () => {
const contentInlineDecision = `
## Why This Milestone
Test
## Architectural Decisions
**Decision:** Use React
## Acceptance Criteria
- Criterion 1
`;
const result = validateEnhancedContext(contentInlineDecision);
assert.equal(result.valid, true, "should accept **Decision format");
});
test("accepts ### subsection format", () => {
const contentSubsectionDecision = `
## Why This Milestone
Test
## Architectural Decisions
### Database Choice
We chose SQLite.
## Acceptance Criteria
- Criterion 1
`;
const result = validateEnhancedContext(contentSubsectionDecision);
assert.equal(result.valid, true, "should accept ### subsection format");
});
test("handles edge case: Architectural Decisions heading without space before content", () => {
const contentNoSpace = `## Why This Milestone
Test
## Architectural Decisions
### Decision 1
Content here
## Acceptance Criteria
- Done`;
const result = validateEnhancedContext(contentNoSpace);
assert.equal(result.valid, true, "should handle content without extra spacing");
});
});
// ─── Fix 3: Consecutive error counter (agent-loop.ts) ────────────────────────────
describe("Fix #3 — Consecutive error counter (UPSTREAM)", () => {
test("NOTE: agent-loop.ts is upstream code that was not modified", () => {
// This finding from the adversarial review relates to upstream behavior
// in packages/pi-agent-core/src/agent-loop.ts.
//
// The consecutiveAllToolErrorTurns counter logic was added in PR #3301
// and refined in PR #3618 by upstream contributors. These PRs fix
// issues with:
// - Schema overload detection counting bash exit codes as failures
// - The counter not resetting properly on successful turns
//
// Since this is upstream code (part of pi-agent-core, not gsd extension),
// we do not modify it here. The fix should be coordinated with upstream.
//
// See: packages/pi-agent-core/src/agent-loop.ts lines 191, 298-325
assert.ok(true, "Documented as upstream behavior — no changes made");
});
});

View file

@ -1,525 +0,0 @@
/**
* Integration tests for the prepared discussion system.
*
* Exercises the full preparation pipeline against the real GSD-2 codebase:
* - runPreparation() produces valid briefs
* - TypeScript is detected as primary language
* - Module structure includes top-level directories
* - Completes within R112 timing requirement (<60s)
* - prepareAndBuildDiscussPrompt() uses discuss-prepared template when enabled
* - Fallback to standard prompt when preparation is disabled
*/
import test from "node:test";
import assert from "node:assert/strict";
import { join } from "node:path";
import { existsSync } from "node:fs";
import {
runPreparation,
formatCodebaseBrief,
formatPriorContextBrief,
formatEcosystemBrief,
type PreparationUIContext,
type PreparationPreferences,
type PreparationResult,
} from "../preparation.ts";
import { validateEnhancedContext } from "../prompt-validation.ts";
import { getLastPreparationResult, clearPreparationResult } from "../guided-flow.ts";
// ─── Test Helpers ───────────────────────────────────────────────────────────────
/**
* Mock UI context that captures notifications for testing.
* Follows the pattern from preparation.test.ts.
*/
function createMockUI(): PreparationUIContext & { notifications: Array<{ message: string; type?: string }> } {
const notifications: Array<{ message: string; type?: string }> = [];
return {
notifications,
notify(message: string, type?: "info" | "warning" | "error" | "success") {
notifications.push({ message, type });
},
};
}
/**
* Get the GSD extension source directory for integration testing.
* This is the real codebase we'll analyze.
*/
function getGsdExtensionDir(): string {
// Navigate from tests/ up to gsd/ directory
return join(import.meta.dirname, "..");
}
/**
* Get the GSD-2 project root for full codebase analysis.
*/
function getProjectRoot(): string {
// Navigate from tests/ up to the project root
// tests/ -> gsd/ -> extensions/ -> resources/ -> src/ -> gsd-2/
return join(import.meta.dirname, "..", "..", "..", "..", "..");
}
// ─── R111 Validation: runPreparation against real codebase ──────────────────────
test("R111: runPreparation() produces valid codebase brief for GSD extension", async (t) => {
const dir = getGsdExtensionDir();
const ui = createMockUI();
const prefs: PreparationPreferences = {
discuss_preparation: true,
discuss_web_research: false, // Skip web research to avoid API key requirement
discuss_depth: "standard",
};
const result = await runPreparation(dir, ui, prefs);
// Verify preparation completed successfully
assert.equal(result.enabled, true, "preparation should be enabled");
assert.ok(result.codebase, "should have codebase brief");
assert.ok(result.codebaseBrief, "should have formatted codebase brief");
// Verify TypeScript is detected as primary language
assert.equal(
result.codebase.techStack.primaryLanguage,
"javascript/typescript",
"should detect TypeScript as primary language",
);
// Verify module structure includes top-level directories
const topLevelDirs = result.codebase.moduleStructure.topLevelDirs;
assert.ok(topLevelDirs.length > 0, "should detect top-level directories");
// Common directories in the GSD extension
const expectedDirs = ["tests", "prompts", "templates", "migrate"];
const foundExpected = expectedDirs.filter(d => topLevelDirs.includes(d));
assert.ok(
foundExpected.length >= 2,
`should detect common directories, found: ${topLevelDirs.join(", ")}`,
);
// Verify sampled files exist
assert.ok(result.codebase.sampledFiles.length > 0, "should sample source files");
});
test("R111: runPreparation() produces valid prior context brief", async (t) => {
const dir = getGsdExtensionDir();
const ui = createMockUI();
const prefs: PreparationPreferences = {
discuss_preparation: true,
discuss_web_research: false,
};
const result = await runPreparation(dir, ui, prefs);
// Verify prior context brief structure
assert.ok(result.priorContext, "should have prior context");
assert.ok(result.priorContextBrief, "should have formatted prior context brief");
// Prior context aggregates decisions, requirements, knowledge, summaries
assert.ok("decisions" in result.priorContext, "should have decisions");
assert.ok("requirements" in result.priorContext, "should have requirements");
assert.ok("knowledge" in result.priorContext, "should have knowledge");
assert.ok("summaries" in result.priorContext, "should have summaries");
});
test("R111: runPreparation() produces valid ecosystem brief (skipped without API key)", async (t) => {
const dir = getGsdExtensionDir();
const ui = createMockUI();
const prefs: PreparationPreferences = {
discuss_preparation: true,
discuss_web_research: false, // Explicitly disable
};
const result = await runPreparation(dir, ui, prefs);
// Verify ecosystem brief structure
assert.ok(result.ecosystem, "should have ecosystem brief");
assert.ok(result.ecosystemBrief, "should have formatted ecosystem brief");
assert.equal(result.ecosystem.available, false, "ecosystem should be unavailable when web research disabled");
assert.ok(result.ecosystem.skippedReason, "should have skip reason");
});
test("R112: runPreparation() completes within 60s requirement", async (t) => {
const dir = getGsdExtensionDir();
const prefs: PreparationPreferences = {
discuss_preparation: true,
discuss_web_research: false,
discuss_depth: "standard",
};
const startTime = performance.now();
const result = await runPreparation(dir, null, prefs);
const elapsed = performance.now() - startTime;
// R112 requirement: preparation must complete within 60 seconds
assert.ok(result.durationMs < 60000, `should complete within 60s, took ${result.durationMs}ms`);
assert.ok(elapsed < 60000, `wall-clock time should be under 60s, was ${elapsed}ms`);
// Should be much faster for a local directory analysis
assert.ok(result.durationMs < 10000, `should typically complete within 10s, took ${result.durationMs}ms`);
});
// ─── Codebase Pattern Detection ─────────────────────────────────────────────────
test("runPreparation() detects code patterns from GSD extension", async (t) => {
const dir = getGsdExtensionDir();
const prefs: PreparationPreferences = {
discuss_preparation: true,
discuss_web_research: false,
};
const result = await runPreparation(dir, null, prefs);
// The GSD extension uses async/await extensively
assert.ok(
result.codebase.patterns.asyncStyle === "async/await" || result.codebase.patterns.asyncStyle === "mixed",
`should detect async/await or mixed, got ${result.codebase.patterns.asyncStyle}`,
);
// The GSD extension uses try/catch for error handling
assert.ok(
result.codebase.patterns.errorHandling === "try/catch" || result.codebase.patterns.errorHandling === "mixed",
`should detect try/catch or mixed, got ${result.codebase.patterns.errorHandling}`,
);
// TypeScript uses camelCase or mixed naming
assert.ok(
result.codebase.patterns.namingConvention === "camelCase" || result.codebase.patterns.namingConvention === "mixed",
`should detect camelCase or mixed, got ${result.codebase.patterns.namingConvention}`,
);
// Evidence should be populated
assert.ok(result.codebase.patterns.evidence.asyncStyle.length > 0, "should have async style evidence");
});
test("runPreparation() samples TypeScript files from src/ or project root", async (t) => {
const dir = getGsdExtensionDir();
const prefs: PreparationPreferences = {
discuss_preparation: true,
discuss_web_research: false,
};
const result = await runPreparation(dir, null, prefs);
// Should sample TypeScript files
const tsFiles = result.codebase.sampledFiles.filter(
f => f.endsWith(".ts") || f.endsWith(".tsx"),
);
assert.ok(tsFiles.length > 0, "should sample TypeScript files");
// Should exclude test files
const testFiles = result.codebase.sampledFiles.filter(
f => f.includes(".test.") || f.includes(".spec."),
);
assert.equal(testFiles.length, 0, "should not sample test files");
});
// ─── Brief Formatting ───────────────────────────────────────────────────────────
test("formatCodebaseBrief() produces LLM-readable markdown", async (t) => {
const dir = getGsdExtensionDir();
const prefs: PreparationPreferences = {
discuss_preparation: true,
discuss_web_research: false,
};
const result = await runPreparation(dir, null, prefs);
const formatted = formatCodebaseBrief(result.codebase);
// Should contain expected sections
assert.ok(formatted.includes("## Tech Stack"), "should have Tech Stack section");
assert.ok(formatted.includes("## Module Structure"), "should have Module Structure section");
assert.ok(formatted.includes("## Code Patterns"), "should have Code Patterns section");
// Should contain detected tech
assert.ok(formatted.includes("javascript/typescript"), "should include detected language");
// Should be within character limit
assert.ok(formatted.length <= 3000, `should cap at 3000 chars, got ${formatted.length}`);
});
test("formatPriorContextBrief() produces structured prior context output", async (t) => {
const dir = getGsdExtensionDir();
const prefs: PreparationPreferences = {
discuss_preparation: true,
discuss_web_research: false,
};
const result = await runPreparation(dir, null, prefs);
const formatted = formatPriorContextBrief(result.priorContext);
// Should contain expected sections
assert.ok(formatted.includes("## Prior Decisions"), "should have Prior Decisions section");
assert.ok(formatted.includes("## Prior Requirements"), "should have Prior Requirements section");
assert.ok(formatted.includes("## Prior Knowledge"), "should have Prior Knowledge section");
assert.ok(formatted.includes("## Prior Milestone Summaries"), "should have Prior Milestone Summaries section");
// Should be within character limit
assert.ok(formatted.length <= 6000, `should cap at 6000 chars, got ${formatted.length}`);
});
test("formatEcosystemBrief() returns simplified message (research happens during discussion)", async (t) => {
const dir = getGsdExtensionDir();
const prefs: PreparationPreferences = {
discuss_preparation: true,
discuss_web_research: false,
};
const result = await runPreparation(dir, null, prefs);
const formatted = formatEcosystemBrief(result.ecosystem);
// Should contain section header
assert.ok(formatted.includes("## Ecosystem Research"), "should have Ecosystem Research section");
// Should indicate research happens during discussion
assert.ok(formatted.includes("during the discussion"), "should mention research happens during discussion");
assert.ok(formatted.includes("web search tools"), "should mention web search tools");
// Should be within character limit
assert.ok(formatted.length <= 4000, `should cap at 4000 chars, got ${formatted.length}`);
});
// ─── Preparation Result Storage ─────────────────────────────────────────────────
test("getLastPreparationResult() returns null initially", async (t) => {
// Clear any existing state
clearPreparationResult();
const result = getLastPreparationResult();
assert.equal(result, null, "should return null when no preparation has run");
});
test("clearPreparationResult() clears stored result", async (t) => {
// This test verifies the clear function works
// We can't easily test the set behavior without running the full guided-flow
clearPreparationResult();
const result = getLastPreparationResult();
assert.equal(result, null, "should be null after clear");
});
// ─── TUI Progress Notifications ─────────────────────────────────────────────────
test("runPreparation() emits TUI progress notifications", async (t) => {
const dir = getGsdExtensionDir();
const ui = createMockUI();
const prefs: PreparationPreferences = {
discuss_preparation: true,
discuss_web_research: false,
};
await runPreparation(dir, ui, prefs);
// Should have notifications for each phase
assert.ok(ui.notifications.length > 0, "should have notifications");
// Verify codebase analysis notifications
assert.ok(
ui.notifications.some(n => n.message.includes("Analyzing codebase")),
"should show codebase analysis start",
);
assert.ok(
ui.notifications.some(n => n.message.includes("✓ Analyzed codebase")),
"should show codebase analysis complete",
);
// Verify prior context notifications
assert.ok(
ui.notifications.some(n => n.message.includes("Reviewing prior context")),
"should show prior context start",
);
assert.ok(
ui.notifications.some(n => n.message.includes("✓ Reviewed prior context")),
"should show prior context complete",
);
});
test("runPreparation() works in silent mode (no UI)", async (t) => {
const dir = getGsdExtensionDir();
const prefs: PreparationPreferences = {
discuss_preparation: true,
discuss_web_research: false,
};
// Pass null for UI
const result = await runPreparation(dir, null, prefs);
// Should complete without error
assert.equal(result.enabled, true, "should work without UI");
assert.ok(result.codebase, "should have codebase");
assert.ok(result.priorContext, "should have priorContext");
assert.ok(result.durationMs > 0, "should have duration");
});
// ─── Preference-Controlled Behavior ─────────────────────────────────────────────
test("runPreparation() returns early when discuss_preparation is false", async (t) => {
const dir = getGsdExtensionDir();
const ui = createMockUI();
const prefs: PreparationPreferences = {
discuss_preparation: false,
};
const result = await runPreparation(dir, ui, prefs);
assert.equal(result.enabled, false, "should indicate preparation disabled");
assert.equal(result.codebaseBrief, "", "should have empty codebase brief");
assert.equal(result.priorContextBrief, "", "should have empty prior context brief");
assert.equal(result.ecosystemBrief, "", "should have empty ecosystem brief");
assert.equal(ui.notifications.length, 0, "should not show any notifications");
});
test("runPreparation() ecosystem research always returns unavailable (happens during discussion)", async (t) => {
const dir = getGsdExtensionDir();
const ui = createMockUI();
const prefs: PreparationPreferences = {
discuss_preparation: true,
discuss_web_research: true, // Even with this enabled, ecosystem research returns unavailable
};
const result = await runPreparation(dir, ui, prefs);
assert.equal(result.enabled, true);
assert.equal(result.ecosystemResearchPerformed, false, "should not perform ecosystem research from preparation");
assert.equal(result.ecosystem.available, false);
assert.ok(
result.ecosystem.skippedReason?.includes("during the discussion"),
"should indicate research happens during discussion",
);
// Should NOT have ecosystem research notifications (no longer part of preparation)
assert.ok(
!ui.notifications.some(n => n.message.includes("Researching ecosystem")),
"should not show ecosystem research notification",
);
});
// ─── validateEnhancedContext Integration ────────────────────────────────────────
test("validateEnhancedContext() validates required sections", async (t) => {
// Test with valid enhanced context
const validContext = `# M001 — Test Milestone
## Scope
This milestone covers X, Y, Z.
## Architectural Decisions
### Decision 1: Use TypeScript
We will use TypeScript for type safety.
## Acceptance Criteria
- [ ] Feature A works
- [ ] Feature B works
`;
const validResult = validateEnhancedContext(validContext);
assert.equal(validResult.valid, true, "should validate complete context");
assert.deepEqual(validResult.missing, [], "should have no missing sections");
// Test with missing sections
const invalidContext = `# M001 — Test Milestone
## Scope
This milestone covers X, Y, Z.
`;
const invalidResult = validateEnhancedContext(invalidContext);
assert.equal(invalidResult.valid, false, "should reject incomplete context");
assert.ok(invalidResult.missing.length > 0, "should list missing sections");
assert.ok(
invalidResult.missing.some(m => m.includes("Architectural Decisions")),
"should report missing Architectural Decisions",
);
assert.ok(
invalidResult.missing.some(m => m.includes("Acceptance Criteria")),
"should report missing Acceptance Criteria",
);
});
test("validateEnhancedContext() requires decision entries in Architectural Decisions", async (t) => {
// Empty architectural decisions section
const emptyDecisions = `# M001 — Test Milestone
## Scope
This milestone covers X, Y, Z.
## Architectural Decisions
(No decisions yet)
## Acceptance Criteria
- [ ] Feature A works
`;
const result = validateEnhancedContext(emptyDecisions);
assert.equal(result.valid, false, "should reject empty decisions section");
assert.ok(
result.missing.some(m => m.includes("decision entry")),
"should report missing decision entry",
);
});
// ─── Full Pipeline Integration ──────────────────────────────────────────────────
test("Full pipeline: preparation produces consistent results across runs", async (t) => {
const dir = getGsdExtensionDir();
const prefs: PreparationPreferences = {
discuss_preparation: true,
discuss_web_research: false,
};
// Run preparation twice
const result1 = await runPreparation(dir, null, prefs);
const result2 = await runPreparation(dir, null, prefs);
// Results should be consistent (same codebase, same analysis)
assert.equal(
result1.codebase.techStack.primaryLanguage,
result2.codebase.techStack.primaryLanguage,
"primary language should be consistent",
);
assert.deepEqual(
result1.codebase.moduleStructure.topLevelDirs.sort(),
result2.codebase.moduleStructure.topLevelDirs.sort(),
"top-level directories should be consistent",
);
assert.equal(
result1.codebase.patterns.asyncStyle,
result2.codebase.patterns.asyncStyle,
"async style should be consistent",
);
});
test("Full pipeline: preparation handles empty .gsd directory gracefully", async (t) => {
// The GSD extension directory may or may not have a .gsd subdirectory
// Either way, preparation should not crash
const dir = getGsdExtensionDir();
const prefs: PreparationPreferences = {
discuss_preparation: true,
discuss_web_research: false,
};
let result: PreparationResult | undefined;
let error: unknown;
try {
result = await runPreparation(dir, null, prefs);
} catch (e) {
error = e;
}
assert.equal(error, undefined, "should not throw");
assert.ok(result, "should return result");
assert.equal(result!.enabled, true, "should be enabled");
// Prior context should gracefully handle missing files
assert.ok(result!.priorContext, "should have prior context even if files missing");
});

View file

@ -1,53 +0,0 @@
/**
* Test isolation utilities for integration tests.
*
* Integration tests often call `mergeMilestoneToMain` and other functions that
* load preferences. If the user's global ~/.gsd/preferences.md has
* `git.main_branch: master`, tests fail because test repos use `main`.
*
* These utilities isolate tests from the user's global environment.
*/
import { mkdtempSync, rmSync, realpathSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { _resetServiceCache } from "../../worktree.ts";
import { _clearGsdRootCache } from "../../paths.ts";
let originalHome: string | undefined;
let fakeHome: string | null = null;
/**
* Isolate the test environment from user's global preferences.
* Creates a fake HOME directory so loadEffectiveGSDPreferences() returns
* empty global preferences instead of the user's ~/.gsd/preferences.md.
*
* Call this in a test.before() hook.
*/
export function isolateFromGlobalPreferences(): void {
originalHome = process.env.HOME;
fakeHome = realpathSync(mkdtempSync(join(tmpdir(), "gsd-test-home-")));
process.env.HOME = fakeHome;
_clearGsdRootCache();
_resetServiceCache();
}
/**
* Restore the original HOME and clean up the fake home directory.
*
* Call this in a test.after() hook.
*/
export function restoreGlobalPreferences(): void {
if (originalHome !== undefined) {
process.env.HOME = originalHome;
} else {
delete process.env.HOME;
}
_clearGsdRootCache();
_resetServiceCache();
if (fakeHome) {
rmSync(fakeHome, { recursive: true, force: true });
fakeHome = null;
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,669 +0,0 @@
/**
* Prompt Builder Tests Comprehensive tests for S02 components.
*
* Tests cover:
* 1. Template validation (context-enhanced.md, discuss-prepared.md)
* 2. Prompt loading and variable substitution
* 3. Enhanced context validation (R109)
* 4. Integration tests for format functions and prompt injection
*/
import test, { describe } from "node:test";
import assert from "node:assert/strict";
import { readFileSync, existsSync } from "node:fs";
import { join } from "node:path";
// ─── Template Paths ─────────────────────────────────────────────────────────────
const templatesDir = join(process.cwd(), "src/resources/extensions/gsd/templates");
const promptsDir = join(process.cwd(), "src/resources/extensions/gsd/prompts");
const contextEnhancedPath = join(templatesDir, "context-enhanced.md");
const contextPath = join(templatesDir, "context.md");
const discussPreparedPath = join(promptsDir, "discuss-prepared.md");
// ─── Template Tests ─────────────────────────────────────────────────────────────
describe("Template: context-enhanced.md", () => {
test("file exists", () => {
assert.ok(existsSync(contextEnhancedPath), "context-enhanced.md should exist");
});
test("contains all original context.md sections", () => {
const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8");
const originalContext = readFileSync(contextPath, "utf-8");
// Extract section headers from original context.md
const originalSections = originalContext.match(/^## .+$/gm) ?? [];
// Each original section should be present in context-enhanced.md
for (const section of originalSections) {
assert.ok(
contextEnhanced.includes(section),
`context-enhanced.md should contain original section: ${section}`,
);
}
});
test("contains new structured sections for prepared discussions", () => {
const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8");
// New sections required by R108
const newSections = [
"## Codebase Brief",
"## Architectural Decisions",
"## Interface Contracts",
"## Error Handling Strategy",
"## Testing Requirements",
"## Acceptance Criteria",
"## Ecosystem Notes",
];
for (const section of newSections) {
assert.ok(
contextEnhanced.includes(section),
`context-enhanced.md should contain new section: ${section}`,
);
}
});
test("Codebase Brief has sub-sections", () => {
const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8");
assert.ok(
contextEnhanced.includes("### Technology Stack"),
"Codebase Brief should have Technology Stack sub-section",
);
assert.ok(
contextEnhanced.includes("### Key Modules"),
"Codebase Brief should have Key Modules sub-section",
);
assert.ok(
contextEnhanced.includes("### Patterns in Use"),
"Codebase Brief should have Patterns in Use sub-section",
);
});
test("Architectural Decisions has structured format guidance", () => {
const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8");
// Check for decision structure markers
assert.ok(
contextEnhanced.includes("**Decision:**"),
"Architectural Decisions should have Decision marker",
);
assert.ok(
contextEnhanced.includes("**Rationale:**"),
"Architectural Decisions should have Rationale marker",
);
assert.ok(
contextEnhanced.includes("**Evidence:**"),
"Architectural Decisions should have Evidence marker",
);
assert.ok(
contextEnhanced.includes("**Alternatives Considered:**"),
"Architectural Decisions should have Alternatives Considered marker",
);
});
});
describe("Template: discuss-prepared.md", () => {
test("file exists", () => {
assert.ok(existsSync(discussPreparedPath), "discuss-prepared.md should exist");
});
test("contains all three brief placeholders", () => {
const discussPrepared = readFileSync(discussPreparedPath, "utf-8");
assert.ok(
discussPrepared.includes("{{codebaseBrief}}"),
"discuss-prepared.md should contain {{codebaseBrief}} placeholder",
);
assert.ok(
discussPrepared.includes("{{priorContextBrief}}"),
"discuss-prepared.md should contain {{priorContextBrief}} placeholder",
);
assert.ok(
discussPrepared.includes("{{ecosystemBrief}}"),
"discuss-prepared.md should contain {{ecosystemBrief}} placeholder",
);
});
test("contains 4-layer protocol markers", () => {
const discussPrepared = readFileSync(discussPreparedPath, "utf-8");
// Check for all four layer headings
assert.ok(
discussPrepared.includes("## Layer 1 — Scope"),
"discuss-prepared.md should contain Layer 1 (Scope)",
);
assert.ok(
discussPrepared.includes("## Layer 2 — Architecture"),
"discuss-prepared.md should contain Layer 2 (Architecture)",
);
assert.ok(
discussPrepared.includes("## Layer 3 — Error States"),
"discuss-prepared.md should contain Layer 3 (Error States)",
);
assert.ok(
discussPrepared.includes("## Layer 4 — Quality Bar"),
"discuss-prepared.md should contain Layer 4 (Quality Bar)",
);
});
test("contains gate question IDs for all layers", () => {
const discussPrepared = readFileSync(discussPreparedPath, "utf-8");
assert.ok(
discussPrepared.includes("layer1_scope_gate"),
"discuss-prepared.md should contain layer1_scope_gate question ID",
);
assert.ok(
discussPrepared.includes("layer2_architecture_gate"),
"discuss-prepared.md should contain layer2_architecture_gate question ID",
);
assert.ok(
discussPrepared.includes("layer3_error_gate"),
"discuss-prepared.md should contain layer3_error_gate question ID",
);
assert.ok(
discussPrepared.includes("layer4_quality_gate"),
"discuss-prepared.md should contain layer4_quality_gate question ID",
);
});
test("contains context-enhanced template guidance", () => {
const discussPrepared = readFileSync(discussPreparedPath, "utf-8");
assert.ok(
discussPrepared.includes("context-enhanced"),
"discuss-prepared.md should reference context-enhanced template",
);
});
});
// ─── Prompt Loading Tests ───────────────────────────────────────────────────────
describe("Prompt Loading", () => {
// Dynamic import to work with the module's warm cache
test("loadPrompt substitutes all variables correctly", async () => {
const { loadPrompt } = await import("../prompt-loader.ts");
const result = loadPrompt("discuss-prepared", {
preamble: "Test preamble",
codebaseBrief: "Test codebase brief content",
priorContextBrief: "Test prior context brief content",
ecosystemBrief: "Test ecosystem brief content",
milestoneId: "M001",
contextPath: ".gsd/milestones/M001/M001-CONTEXT.md",
roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md",
inlinedTemplates: "Test templates",
commitInstruction: "Test commit instruction",
multiMilestoneCommitInstruction: "Test multi-milestone commit",
});
assert.ok(result.includes("Test codebase brief content"), "codebaseBrief should be substituted");
assert.ok(result.includes("Test prior context brief content"), "priorContextBrief should be substituted");
assert.ok(result.includes("Test ecosystem brief content"), "ecosystemBrief should be substituted");
assert.ok(!result.includes("{{codebaseBrief}}"), "placeholder should not remain");
});
test("loadPrompt throws GSDError for missing variables", async () => {
const { loadPrompt } = await import("../prompt-loader.ts");
const { GSDError, GSD_PARSE_ERROR } = await import("../errors.ts");
assert.throws(
() => loadPrompt("discuss-prepared", {}), // Missing required variables
(err: unknown) => {
assert.ok(err instanceof GSDError, "should throw GSDError");
assert.equal((err as InstanceType<typeof GSDError>).code, GSD_PARSE_ERROR, "should have GSD_PARSE_ERROR code");
return true;
},
);
});
test("brief content with {{...}} patterns does not cause false variable errors", async () => {
const { loadPrompt } = await import("../prompt-loader.ts");
// Content that contains template-like patterns but should not be treated as variables
const briefWithPatterns = `
## Tech Stack
- Framework: Uses \`{{slot}}\` placeholder syntax in templates
- Pattern: The codebase has \`{{variableName}}\` markers
`;
// This should NOT throw, because {{slot}} and {{variableName}} are inside
// the brief value, not undeclared placeholders in the template itself.
const result = loadPrompt("discuss-prepared", {
preamble: "Test",
codebaseBrief: briefWithPatterns,
priorContextBrief: "Test brief",
ecosystemBrief: "Test brief",
milestoneId: "M001",
contextPath: ".gsd/milestones/M001/M001-CONTEXT.md",
roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md",
inlinedTemplates: "Test templates",
commitInstruction: "Test commit instruction",
multiMilestoneCommitInstruction: "Test multi-milestone commit",
});
assert.ok(result.includes("{{slot}}"), "template-like patterns in content should be preserved");
assert.ok(result.includes("{{variableName}}"), "template-like patterns in content should be preserved");
});
});
// ─── Validation Tests ───────────────────────────────────────────────────────────
describe("Enhanced Context Validation", () => {
test("valid enhanced context passes validation", async () => {
const { validateEnhancedContext } = await import("../prompt-validation.ts");
const validContent = `
# M001: Test Milestone
## Why This Milestone
This is why we need this milestone.
## Architectural Decisions
### Decision 1
**Decision:** Use TypeScript
**Rationale:** Type safety
## Acceptance Criteria
- Criterion 1
- Criterion 2
`;
const result = validateEnhancedContext(validContent);
assert.equal(result.valid, true, "valid content should pass validation");
assert.equal(result.missing.length, 0, "no missing sections");
});
test("missing scope section fails", async () => {
const { validateEnhancedContext } = await import("../prompt-validation.ts");
const contentMissingScope = `
# M001: Test Milestone
## Architectural Decisions
### Decision 1
**Decision:** Use TypeScript
## Acceptance Criteria
- Criterion 1
`;
const result = validateEnhancedContext(contentMissingScope);
assert.equal(result.valid, false, "should fail validation");
assert.ok(
result.missing.some((m) => m.includes("Scope") || m.includes("Why This Milestone")),
"should report missing scope section",
);
});
test("missing architectural decisions section fails", async () => {
const { validateEnhancedContext } = await import("../prompt-validation.ts");
const contentMissingDecisions = `
# M001: Test Milestone
## Why This Milestone
This is why we need this milestone.
## Acceptance Criteria
- Criterion 1
`;
const result = validateEnhancedContext(contentMissingDecisions);
assert.equal(result.valid, false, "should fail validation");
assert.ok(
result.missing.includes("Architectural Decisions"),
"should report missing architectural decisions section",
);
});
test("missing acceptance criteria section fails", async () => {
const { validateEnhancedContext } = await import("../prompt-validation.ts");
const contentMissingCriteria = `
# M001: Test Milestone
## Why This Milestone
This is why we need this milestone.
## Architectural Decisions
### Decision 1
**Decision:** Use TypeScript
`;
const result = validateEnhancedContext(contentMissingCriteria);
assert.equal(result.valid, false, "should fail validation");
assert.ok(
result.missing.includes("Acceptance Criteria"),
"should report missing acceptance criteria section",
);
});
test("empty architectural decisions section (no entries) fails", async () => {
const { validateEnhancedContext } = await import("../prompt-validation.ts");
const contentEmptyDecisions = `
# M001: Test Milestone
## Why This Milestone
This is why we need this milestone.
## Architectural Decisions
No decisions yet.
## Acceptance Criteria
- Criterion 1
`;
const result = validateEnhancedContext(contentEmptyDecisions);
assert.equal(result.valid, false, "should fail validation");
assert.ok(
result.missing.some((m) => m.includes("decision entry")),
"should report missing decision entry",
);
});
test("alternative scope headers are accepted", async () => {
const { validateEnhancedContext } = await import("../prompt-validation.ts");
// Test with ## Scope
const withScope = `
## Scope
### In Scope
- Item 1
## Architectural Decisions
### Decision 1
**Decision:** Test
## Acceptance Criteria
- Criterion 1
`;
assert.equal(validateEnhancedContext(withScope).valid, true, "## Scope should be accepted");
// Test with ## Milestone Scope
const withMilestoneScope = `
## Milestone Scope
This is the scope.
## Architectural Decisions
### Decision 1
**Decision:** Test
## Acceptance Criteria
- Criterion 1
`;
assert.equal(
validateEnhancedContext(withMilestoneScope).valid,
true,
"## Milestone Scope should be accepted",
);
});
test("alternative acceptance criteria headers are accepted", async () => {
const { validateEnhancedContext } = await import("../prompt-validation.ts");
const withFinalIntegrated = `
## Why This Milestone
Test
## Architectural Decisions
### Decision 1
**Decision:** Test
## Final Integrated Acceptance
- Criterion 1
`;
assert.equal(
validateEnhancedContext(withFinalIntegrated).valid,
true,
"## Final Integrated Acceptance should be accepted",
);
});
test("inline decision format is accepted", async () => {
const { validateEnhancedContext } = await import("../prompt-validation.ts");
const withInlineDecision = `
## Why This Milestone
Test
## Architectural Decisions
**Decision:** Use React for the frontend
## Acceptance Criteria
- Criterion 1
`;
assert.equal(
validateEnhancedContext(withInlineDecision).valid,
true,
"**Decision marker format should be accepted",
);
});
});
// ─── Integration Tests ──────────────────────────────────────────────────────────
describe("Integration: Format Functions", () => {
test("formatCodebaseBrief produces non-empty output", async () => {
const { formatCodebaseBrief } = await import("../preparation.ts");
const brief = {
techStack: {
primaryLanguage: "TypeScript",
detectedFiles: ["package.json", "tsconfig.json"],
packageManager: "npm",
isMonorepo: false,
hasTests: true,
hasCI: true,
},
moduleStructure: {
topLevelDirs: ["src", "tests"],
srcSubdirs: ["components", "utils"],
totalFilesSampled: 5,
},
patterns: {
asyncStyle: "async/await" as const,
errorHandling: "try/catch" as const,
namingConvention: "camelCase" as const,
evidence: {
asyncStyle: ["src/foo.ts: async/await (5 occurrences)"],
errorHandling: ["src/bar.ts: try/catch (3 occurrences)"],
namingConvention: ["camelCase: 50 occurrences"],
},
fileCounts: {
asyncAwait: 3,
promises: 0,
callbacks: 0,
tryCatch: 2,
errorCallbacks: 0,
resultTypes: 0,
},
},
sampledFiles: ["src/index.ts", "src/utils.ts"],
};
const formatted = formatCodebaseBrief(brief);
assert.ok(formatted.length > 0, "formatted brief should not be empty");
assert.ok(formatted.includes("TypeScript"), "should include primary language");
assert.ok(formatted.includes("async/await"), "should include async style");
});
test("formatPriorContextBrief produces non-empty output", async () => {
const { formatPriorContextBrief } = await import("../preparation.ts");
const brief = {
decisions: {
byScope: new Map([
["architecture", [{ id: "D001", scope: "architecture", decision: "Use SQLite", choice: "SQLite", rationale: "Simplicity" }]],
]),
totalCount: 1,
},
requirements: {
active: [{ id: "R001", description: "Test requirement", status: "active" as const }],
validated: [],
deferred: [],
totalCount: 1,
},
knowledge: "Some knowledge entry",
summaries: "M001 completed X and Y",
};
const formatted = formatPriorContextBrief(brief);
assert.ok(formatted.length > 0, "formatted brief should not be empty");
assert.ok(formatted.includes("Prior Decisions"), "should include decisions section");
assert.ok(formatted.includes("D001"), "should include decision ID");
});
test("formatEcosystemBrief returns simplified message (research happens during discussion)", async () => {
const { formatEcosystemBrief } = await import("../preparation.ts");
// formatEcosystemBrief now returns a fixed message regardless of brief content
// because ecosystem research happens during the discussion, not preparation
const briefWithFindings = {
available: true,
queries: ["Next.js best practices 2024"],
findings: [
{
query: "Next.js best practices 2024",
title: "Server Components Guide",
url: "https://example.com/guide",
snippet: "Use Server Components for data fetching",
},
],
provider: "tavily",
};
const formatted = formatEcosystemBrief(briefWithFindings);
assert.ok(formatted.length > 0, "formatted brief should not be empty");
assert.ok(formatted.includes("Ecosystem Research"), "should include research heading");
assert.ok(formatted.includes("during the discussion"), "should mention research happens during discussion");
});
test("formatEcosystemBrief returns same output for any brief state", async () => {
const { formatEcosystemBrief } = await import("../preparation.ts");
const briefUnavailable = {
available: false,
queries: [],
findings: [],
skippedReason: "No API key configured",
};
const briefAvailable = {
available: true,
queries: ["test"],
findings: [],
provider: "tavily",
};
const formatted1 = formatEcosystemBrief(briefUnavailable);
const formatted2 = formatEcosystemBrief(briefAvailable);
// Both should return the same simplified message
assert.equal(formatted1, formatted2, "should return consistent output regardless of brief state");
assert.ok(formatted1.includes("web search tools"), "should mention web search tools");
});
test("formatted briefs can be injected into prompt without errors", async () => {
const { loadPrompt } = await import("../prompt-loader.ts");
const { formatCodebaseBrief, formatPriorContextBrief, formatEcosystemBrief } = await import("../preparation.ts");
// Create realistic briefs
const codebaseBrief = formatCodebaseBrief({
techStack: {
primaryLanguage: "TypeScript",
detectedFiles: ["package.json"],
packageManager: "npm",
isMonorepo: false,
hasTests: true,
hasCI: false,
},
moduleStructure: {
topLevelDirs: ["src"],
srcSubdirs: [],
totalFilesSampled: 1,
},
patterns: {
asyncStyle: "async/await" as const,
errorHandling: "try/catch" as const,
namingConvention: "camelCase" as const,
evidence: { asyncStyle: [], errorHandling: [], namingConvention: [] },
fileCounts: {
asyncAwait: 0,
promises: 0,
callbacks: 0,
tryCatch: 0,
errorCallbacks: 0,
resultTypes: 0,
},
},
sampledFiles: [],
});
const priorContextBrief = formatPriorContextBrief({
decisions: { byScope: new Map(), totalCount: 0 },
requirements: { active: [], validated: [], deferred: [], totalCount: 0 },
knowledge: "No prior knowledge recorded.",
summaries: "No prior milestone summaries.",
});
const ecosystemBrief = formatEcosystemBrief({
available: false,
queries: [],
findings: [],
skippedReason: "Preparation disabled",
});
// Should not throw when injecting formatted briefs
const result = loadPrompt("discuss-prepared", {
preamble: "Test preamble",
codebaseBrief,
priorContextBrief,
ecosystemBrief,
milestoneId: "M001",
contextPath: ".gsd/milestones/M001/M001-CONTEXT.md",
roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md",
inlinedTemplates: "Test templates",
commitInstruction: "Do not commit",
multiMilestoneCommitInstruction: "Do not commit",
});
assert.ok(result.includes("TypeScript"), "codebase brief should be present");
assert.ok(result.includes("Prior Decisions"), "prior context brief should be present");
// formatEcosystemBrief now returns a fixed message about research during discussion
assert.ok(result.includes("during the discussion"), "ecosystem brief should be present");
});
});

View file

@ -230,16 +230,13 @@ import {
// ─── Scenario 19: isGateQuestionId recognizes all gate patterns ──
test('write-gate: isGateQuestionId recognizes all gate patterns', () => {
assert.strictEqual(isGateQuestionId('layer1_scope_gate'), true);
assert.strictEqual(isGateQuestionId('layer2_architecture_gate'), true);
assert.strictEqual(isGateQuestionId('layer3_error_gate'), true);
assert.strictEqual(isGateQuestionId('layer4_quality_gate'), true);
assert.strictEqual(isGateQuestionId('depth_verification'), true);
assert.strictEqual(isGateQuestionId('depth_verification_M002'), true);
assert.strictEqual(isGateQuestionId('my_layer1_scope_gate_question'), true);
assert.strictEqual(isGateQuestionId('depth_verification_confirm'), true);
// Non-gate question IDs
assert.strictEqual(isGateQuestionId('project_intent'), false);
assert.strictEqual(isGateQuestionId('feature_priority'), false);
assert.strictEqual(isGateQuestionId('layer1_scope_gate'), false);
assert.strictEqual(isGateQuestionId(''), false);
});
@ -249,14 +246,14 @@ test('write-gate: pending gate lifecycle (set, get, clear)', () => {
clearDiscussionFlowState();
assert.strictEqual(getPendingGate(), null, 'starts null');
setPendingGate('layer1_scope_gate');
assert.strictEqual(getPendingGate(), 'layer1_scope_gate', 'set correctly');
setPendingGate('depth_verification');
assert.strictEqual(getPendingGate(), 'depth_verification', 'set correctly');
clearPendingGate();
assert.strictEqual(getPendingGate(), null, 'cleared correctly');
// clearDiscussionFlowState also clears pending gate
setPendingGate('layer2_architecture_gate');
setPendingGate('depth_verification_M002');
clearDiscussionFlowState();
assert.strictEqual(getPendingGate(), null, 'clearDiscussionFlowState clears pending gate');
});
@ -265,12 +262,12 @@ test('write-gate: pending gate lifecycle (set, get, clear)', () => {
test('write-gate: shouldBlockPendingGate blocks write/edit during pending gate', () => {
clearDiscussionFlowState();
setPendingGate('layer1_scope_gate');
setPendingGate('depth_verification');
// write should be blocked during discussion
const writeResult = shouldBlockPendingGate('write', 'M001', false);
assert.strictEqual(writeResult.block, true, 'write should be blocked');
assert.ok(writeResult.reason!.includes('layer1_scope_gate'), 'reason mentions the gate');
assert.ok(writeResult.reason!.includes('depth_verification'), 'reason mentions the gate');
// edit should be blocked
const editResult = shouldBlockPendingGate('edit', 'M001', false);
@ -287,7 +284,7 @@ test('write-gate: shouldBlockPendingGate blocks write/edit during pending gate',
test('write-gate: shouldBlockPendingGate allows read-only and ask_user_questions during pending gate', () => {
clearDiscussionFlowState();
setPendingGate('layer1_scope_gate');
setPendingGate('depth_verification');
// ask_user_questions is always safe (model needs to re-ask)
assert.strictEqual(shouldBlockPendingGate('ask_user_questions', 'M001').block, false);
@ -304,7 +301,7 @@ test('write-gate: shouldBlockPendingGate allows read-only and ask_user_questions
test('write-gate: shouldBlockPendingGate blocks outside discussion when a gate is pending', () => {
clearDiscussionFlowState();
setPendingGate('layer1_scope_gate');
setPendingGate('depth_verification');
// No milestoneId and no queue phase — still block because the gate is pending
const result = shouldBlockPendingGate('write', null, false);
@ -330,7 +327,7 @@ test('write-gate: shouldBlockPendingGate blocks in queue mode when gate is pendi
test('write-gate: shouldBlockPendingGateBash allows read-only commands during pending gate', () => {
clearDiscussionFlowState();
setPendingGate('layer2_architecture_gate');
setPendingGate('depth_verification');
assert.strictEqual(shouldBlockPendingGateBash('cat file.txt', 'M001').block, false);
assert.strictEqual(shouldBlockPendingGateBash('git log --oneline', 'M001').block, false);
@ -344,11 +341,11 @@ test('write-gate: shouldBlockPendingGateBash allows read-only commands during pe
test('write-gate: shouldBlockPendingGateBash blocks mutating commands during pending gate', () => {
clearDiscussionFlowState();
setPendingGate('layer2_architecture_gate');
setPendingGate('depth_verification');
const result = shouldBlockPendingGateBash('npm run build', 'M001');
assert.strictEqual(result.block, true, 'mutating bash should be blocked');
assert.ok(result.reason!.includes('layer2_architecture_gate'));
assert.ok(result.reason!.includes('depth_verification'));
clearDiscussionFlowState();
});
@ -365,7 +362,7 @@ test('write-gate: no pending gate means no blocking', () => {
// ─── Scenario 28: resetWriteGateState clears pending gate ──
test('write-gate: resetWriteGateState clears pending gate', () => {
setPendingGate('layer3_error_gate');
setPendingGate('depth_verification');
resetWriteGateState();
assert.strictEqual(getPendingGate(), null);
});