From b09e2a549ce352fc82702291b80e81bab5755d4d Mon Sep 17 00:00:00 2001 From: frizynn Date: Mon, 16 Mar 2026 16:18:25 -0300 Subject: [PATCH 1/7] feat: add `gsd headless` CLI subcommand for non-interactive auto-mode Adds a first-class `gsd headless` command that runs auto-mode without a TUI by spawning a child process in RPC mode via RpcClient. Useful for CI/CD pipelines, scripts, and unattended execution. CLI interface: gsd headless - Run auto-mode until complete gsd headless --step - Run one unit only (sends /gsd next) gsd headless --timeout 300000 - Custom timeout (default 5 min) gsd headless --json - Forward RPC events as JSONL to stdout gsd headless --verbose - Show full agent text and tool results gsd headless --model - Override model Exit codes: 0 = complete, 1 = error/timeout, 2 = blocked Features: - Extension UI auto-responder (handles select, confirm, input, editor, notify, setStatus, setWidget, setTitle, set_editor_text) - Completion detection via terminal notification keywords + idle timeout - Human-readable progress output to stderr - SIGINT/SIGTERM forwarding for clean shutdown - Child process crash detection - Completion summary with diagnostics on failure --- src/cli.ts | 7 + src/headless.ts | 427 +++++++++++++++++++++++++++++++++++++++++++++++ src/help-text.ts | 30 ++++ 3 files changed, 464 insertions(+) create mode 100644 src/headless.ts diff --git a/src/cli.ts b/src/cli.ts index eb7adb610..4d0b27e64 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -174,6 +174,13 @@ if (cliFlags.messages[0] === 'sessions') { cliFlags._selectedSessionPath = selected.path } +// `gsd headless` — run auto-mode without TUI +if (cliFlags.messages[0] === 'headless') { + const { runHeadless, parseHeadlessArgs } = await import('./headless.js') + await runHeadless(parseHeadlessArgs(process.argv)) + process.exit(0) +} + // Pi's tool bootstrap can mis-detect already-installed fd/rg on some systems // because spawnSync(..., ["--version"]) returns EPERM despite a zero exit code. // Provision local managed binaries first so Pi sees them without probing PATH. diff --git a/src/headless.ts b/src/headless.ts new file mode 100644 index 000000000..78f87e4ba --- /dev/null +++ b/src/headless.ts @@ -0,0 +1,427 @@ +/** + * Headless Orchestrator — `gsd headless` + * + * Runs GSD's auto-mode (or a single unit via --step) without a TUI by + * spawning a child process in RPC mode, auto-responding to extension UI + * requests, and streaming progress to stderr. + * + * Exit codes: + * 0 — complete (auto-mode finished successfully) + * 1 — error or timeout + * 2 — blocked (auto-mode reported a blocker) + */ + +import { existsSync } from 'node:fs' +import { join } from 'node:path' +import { ChildProcess } from 'node:child_process' + +// RpcClient is not in @gsd/pi-coding-agent's public exports — import from dist directly. +// This relative path resolves correctly from both src/ (via tsx) and dist/ (compiled). +import { RpcClient } from '../packages/pi-coding-agent/dist/modes/rpc/rpc-client.js' + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface HeadlessOptions { + timeout: number + step: boolean + json: boolean + verbose: boolean + model?: string +} + +interface ExtensionUIRequest { + type: 'extension_ui_request' + id: string + method: string + title?: string + options?: string[] + message?: string + prefill?: string + timeout?: number + [key: string]: unknown +} + +interface TrackedEvent { + type: string + timestamp: number + detail?: string +} + +// --------------------------------------------------------------------------- +// CLI Argument Parser +// --------------------------------------------------------------------------- + +export function parseHeadlessArgs(argv: string[]): HeadlessOptions { + const options: HeadlessOptions = { + timeout: 300_000, + step: false, + json: false, + verbose: false, + } + + const args = argv.slice(2) + for (let i = 0; i < args.length; i++) { + const arg = args[i] + if (arg === 'headless') continue + if (arg === '--timeout' && i + 1 < args.length) { + options.timeout = parseInt(args[++i], 10) + if (Number.isNaN(options.timeout) || options.timeout <= 0) { + process.stderr.write('[headless] Error: --timeout must be a positive integer (milliseconds)\n') + process.exit(1) + } + } else if (arg === '--step') { + options.step = true + } else if (arg === '--json') { + options.json = true + } else if (arg === '--verbose') { + options.verbose = true + } else if (arg === '--model' && i + 1 < args.length) { + options.model = args[++i] + } + } + + return options +} + +// --------------------------------------------------------------------------- +// JSONL Helper +// --------------------------------------------------------------------------- + +function serializeJsonLine(obj: Record): string { + return JSON.stringify(obj) + '\n' +} + +// --------------------------------------------------------------------------- +// Extension UI Auto-Responder +// --------------------------------------------------------------------------- + +function handleExtensionUIRequest( + event: ExtensionUIRequest, + writeToStdin: (data: string) => void, +): void { + const { id, method } = event + let response: Record + + switch (method) { + case 'select': + response = { type: 'extension_ui_response', id, value: event.options?.[0] ?? '' } + break + case 'confirm': + response = { type: 'extension_ui_response', id, confirmed: true } + break + case 'input': + response = { type: 'extension_ui_response', id, value: '' } + break + case 'editor': + response = { type: 'extension_ui_response', id, value: event.prefill ?? '' } + break + case 'notify': + case 'setStatus': + case 'setWidget': + case 'setTitle': + case 'set_editor_text': + response = { type: 'extension_ui_response', id, value: '' } + break + default: + process.stderr.write(`[headless] Warning: unknown extension_ui_request method "${method}", cancelling\n`) + response = { type: 'extension_ui_response', id, cancelled: true } + break + } + + writeToStdin(serializeJsonLine(response)) +} + +// --------------------------------------------------------------------------- +// Progress Formatter +// --------------------------------------------------------------------------- + +function formatProgress( + event: Record, + verbose: boolean, +): string | null { + const type = String(event.type ?? '') + + switch (type) { + case 'tool_execution_start': + return `[tool] ${event.toolName ?? 'unknown'}` + + case 'tool_execution_end': + if (verbose) { + const result = String(event.result ?? '').slice(0, 200) + return `[tool:result] ${event.toolName ?? 'unknown'}: ${result}` + } + return null + + case 'agent_start': + return '[agent] Session started' + + case 'agent_end': + return '[agent] Session ended' + + case 'extension_ui_request': + if (event.method === 'notify') { + return `[gsd] ${event.message ?? ''}` + } + return null + + case 'message_update': + if (verbose) { + const msgEvent = event.assistantMessageEvent as Record | undefined + const text = String(msgEvent?.text ?? '').slice(0, 200) + if (text) return `[assistant] ${text}` + } + return null + + default: + return null + } +} + +// --------------------------------------------------------------------------- +// Completion Detection +// --------------------------------------------------------------------------- + +const TERMINAL_KEYWORDS = ['complete', 'stopped', 'blocked'] +const IDLE_TIMEOUT_MS = 15_000 + +function isTerminalNotification(event: Record): boolean { + if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false + const message = String(event.message ?? '').toLowerCase() + return TERMINAL_KEYWORDS.some((kw) => message.includes(kw)) +} + +function isBlockedNotification(event: Record): boolean { + if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false + return String(event.message ?? '').toLowerCase().includes('blocked') +} + +// --------------------------------------------------------------------------- +// Main Orchestrator +// --------------------------------------------------------------------------- + +export async function runHeadless(options: HeadlessOptions): Promise { + const startTime = Date.now() + + // Validate .gsd/ directory + const gsdDir = join(process.cwd(), '.gsd') + if (!existsSync(gsdDir)) { + process.stderr.write('[headless] Error: No .gsd/ directory found in current directory.\n') + process.stderr.write("[headless] Run 'gsd' interactively first to initialize a project.\n") + process.exit(1) + } + + // Resolve CLI path for the child process + const cliPath = process.env.GSD_BIN_PATH || process.argv[1] + if (!cliPath) { + process.stderr.write('[headless] Error: Cannot determine CLI path. Set GSD_BIN_PATH or run via gsd.\n') + process.exit(1) + } + + // Create RPC client + const clientOptions: Record = { + cliPath, + cwd: process.cwd(), + } + if (options.model) { + clientOptions.model = options.model + } + + const client = new RpcClient(clientOptions) + + // Event tracking + let totalEvents = 0 + let toolCallCount = 0 + let sawToolExecution = false + let blocked = false + let completed = false + let exitCode = 0 + const recentEvents: TrackedEvent[] = [] + + function trackEvent(event: Record): void { + totalEvents++ + const type = String(event.type ?? 'unknown') + + if (type === 'tool_execution_start') { + toolCallCount++ + sawToolExecution = true + } + + // Keep last 20 events for diagnostics + const detail = + type === 'tool_execution_start' + ? String(event.toolName ?? '') + : type === 'extension_ui_request' + ? `${event.method}: ${event.title ?? event.message ?? ''}` + : undefined + + recentEvents.push({ type, timestamp: Date.now(), detail }) + if (recentEvents.length > 20) recentEvents.shift() + } + + // Stdin writer for sending extension_ui_response to child + let stdinWriter: ((data: string) => void) | null = null + + // Completion promise + let resolveCompletion: () => void + let rejectCompletion: (err: Error) => void + const completionPromise = new Promise((resolve, reject) => { + resolveCompletion = resolve + rejectCompletion = reject + }) + + // Idle timeout — fallback completion detection + let idleTimer: ReturnType | null = null + + function resetIdleTimer(): void { + if (idleTimer) clearTimeout(idleTimer) + if (sawToolExecution) { + idleTimer = setTimeout(() => { + completed = true + resolveCompletion() + }, IDLE_TIMEOUT_MS) + } + } + + // Overall timeout + const timeoutTimer = setTimeout(() => { + process.stderr.write(`[headless] Timeout after ${options.timeout / 1000}s\n`) + exitCode = 1 + resolveCompletion() + }, options.timeout) + + // Event handler + client.onEvent((event) => { + const eventObj = event as unknown as Record + trackEvent(eventObj) + resetIdleTimer() + + // --json mode: forward all events as JSONL to stdout + if (options.json) { + process.stdout.write(JSON.stringify(eventObj) + '\n') + } else { + // Progress output to stderr + const line = formatProgress(eventObj, options.verbose) + if (line) process.stderr.write(line + '\n') + } + + // Handle extension_ui_request + if (eventObj.type === 'extension_ui_request' && stdinWriter) { + // Check for terminal notification before auto-responding + if (isBlockedNotification(eventObj)) { + blocked = true + } + if (isTerminalNotification(eventObj)) { + completed = true + } + + handleExtensionUIRequest(eventObj as unknown as ExtensionUIRequest, stdinWriter) + + // If we detected a terminal notification, resolve after responding + if (completed) { + exitCode = blocked ? 2 : 0 + resolveCompletion() + return + } + } + + // agent_end after tool execution — possible completion + if (eventObj.type === 'agent_end' && sawToolExecution && !completed) { + // Don't immediately resolve — wait for potential terminal notify or idle timeout. + // The idle timer handles this case. + } + }) + + // Signal handling + const signalHandler = () => { + process.stderr.write('\n[headless] Interrupted, stopping child process...\n') + exitCode = 1 + client.stop().finally(() => { + clearTimeout(timeoutTimer) + if (idleTimer) clearTimeout(idleTimer) + process.exit(exitCode) + }) + } + process.on('SIGINT', signalHandler) + process.on('SIGTERM', signalHandler) + + // Start the RPC session + try { + await client.start() + } catch (err) { + process.stderr.write(`[headless] Error: Failed to start RPC session: ${err instanceof Error ? err.message : String(err)}\n`) + clearTimeout(timeoutTimer) + process.exit(1) + } + + // Access stdin writer from the internal process + const internalProcess = (client as any).process as ChildProcess + if (!internalProcess?.stdin) { + process.stderr.write('[headless] Error: Cannot access child process stdin\n') + await client.stop() + clearTimeout(timeoutTimer) + process.exit(1) + } + + stdinWriter = (data: string) => { + internalProcess.stdin!.write(data) + } + + // Detect child process crash + internalProcess.on('exit', (code) => { + if (!completed) { + const msg = `[headless] Child process exited unexpectedly with code ${code ?? 'null'}\n` + process.stderr.write(msg) + exitCode = 1 + resolveCompletion() + } + }) + + if (!options.json) { + process.stderr.write('[headless] Starting auto-mode...\n') + } + + // Send the command + const command = options.step ? '/gsd next' : '/gsd auto' + try { + await client.prompt(command) + } catch (err) { + process.stderr.write(`[headless] Error: Failed to send prompt: ${err instanceof Error ? err.message : String(err)}\n`) + exitCode = 1 + } + + // Wait for completion + if (exitCode === 0 || exitCode === 2) { + await completionPromise + } + + // Cleanup + clearTimeout(timeoutTimer) + if (idleTimer) clearTimeout(idleTimer) + process.removeListener('SIGINT', signalHandler) + process.removeListener('SIGTERM', signalHandler) + + await client.stop() + + // Summary + const duration = ((Date.now() - startTime) / 1000).toFixed(1) + const status = blocked ? 'blocked' : exitCode === 1 ? (totalEvents === 0 ? 'error' : 'timeout') : 'complete' + + process.stderr.write(`[headless] Status: ${status}\n`) + process.stderr.write(`[headless] Duration: ${duration}s\n`) + process.stderr.write(`[headless] Events: ${totalEvents} total, ${toolCallCount} tool calls\n`) + + // On failure, print last 5 events for diagnostics + if (exitCode !== 0) { + const lastFive = recentEvents.slice(-5) + if (lastFive.length > 0) { + process.stderr.write('[headless] Last events:\n') + for (const e of lastFive) { + process.stderr.write(` ${e.type}${e.detail ? `: ${e.detail}` : ''}\n`) + } + } + } + + process.exit(exitCode) +} diff --git a/src/help-text.ts b/src/help-text.ts index ed4e5fdbc..a38471f76 100644 --- a/src/help-text.ts +++ b/src/help-text.ts @@ -31,6 +31,35 @@ const SUBCOMMAND_HELP: Record = { '', 'Compare with --continue (-c) which always resumes the most recent session.', ].join('\n'), + + headless: [ + 'Usage: gsd headless [flags] [command] [args...]', + '', + 'Run /gsd commands without the TUI. Default command: auto', + '', + 'Flags (before command):', + ' --timeout N Overall timeout in ms (default: 300000)', + ' --json JSONL event stream to stdout', + ' --verbose Detailed progress output', + ' --model ID Override model', + '', + 'Commands:', + ' auto /gsd auto (default)', + ' next /gsd next — one unit', + ' status /gsd status', + ' queue /gsd queue', + ' discuss /gsd discuss', + ' doctor [mode] /gsd doctor [fix|heal|audit]', + ' steer "desc" /gsd steer', + ' dispatch Direct unit-type dispatch', + ' ... Any /gsd subcommand', + '', + 'Dispatch phases:', + ' research, plan, execute, complete, reassess, uat, replan', + ' Also: research-milestone, plan-slice, execute-task, etc.', + '', + 'Exit codes: 0 = complete, 1 = error/timeout, 2 = blocked', + ].join('\n'), } export function printHelp(version: string): void { @@ -51,6 +80,7 @@ export function printHelp(version: string): void { process.stdout.write(' config Re-run the setup wizard\n') process.stdout.write(' update Update GSD to the latest version\n') process.stdout.write(' sessions List and resume a past session\n') + process.stdout.write(' headless [cmd] [args] Run /gsd commands without TUI (default: auto)\n') process.stdout.write('\nRun gsd --help for subcommand-specific help.\n') } From 93ee6646f1ac33c8dbfb942bc79dcf8f18510e24 Mon Sep 17 00:00:00 2001 From: frizynn Date: Mon, 16 Mar 2026 16:18:30 -0300 Subject: [PATCH 2/7] test: add integration test for gsd headless command End-to-end test that validates the headless CLI subcommand by: - Creating a temp dir with a complete .gsd/ project fixture - Spawning `node dist/loader.js headless --step --json` - Validating exit code, JSONL stdout, stderr progress, and artifact Supports --dry-run for fixture validation without running the agent. --- .../gsd/tests/integration/headless-command.ts | 534 ++++++++++++++++++ 1 file changed, 534 insertions(+) create mode 100644 src/resources/extensions/gsd/tests/integration/headless-command.ts diff --git a/src/resources/extensions/gsd/tests/integration/headless-command.ts b/src/resources/extensions/gsd/tests/integration/headless-command.ts new file mode 100644 index 000000000..fc5f3582d --- /dev/null +++ b/src/resources/extensions/gsd/tests/integration/headless-command.ts @@ -0,0 +1,534 @@ +/** + * Integration test for `gsd headless` CLI subcommand + * + * Validates that the headless CLI entry point works end-to-end: + * 1. Creates a temp dir with a complete .gsd/ project fixture + * 2. Initializes a git repo in the temp dir + * 3. Spawns `node dist/loader.js headless --step --json` as a child process + * 4. Waits for the process to exit (with a 5-minute timeout) + * 5. Validates exit code, JSONL stdout, stderr progress, and task artifact + * + * Auth: Uses OAuth credentials from ~/.gsd/agent/auth.json (Claude Code Max). + * Falls back to ANTHROPIC_API_KEY env var if OAuth is not configured (D013). + * + * Usage: + * npx tsx src/resources/extensions/gsd/tests/integration/headless-command.ts + * Add --dry-run to validate fixture without running the agent. + */ + +import { mkdtempSync, mkdirSync, writeFileSync, existsSync, readFileSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir, homedir } from "node:os"; +import { fileURLToPath } from "node:url"; +import { dirname } from "node:path"; +import { spawn, execSync } from "node:child_process"; + +// ── Configuration ──────────────────────────────────────────────────────────── + +const TIMEOUT_MS = parseInt(process.env.HEADLESS_TIMEOUT_MS ?? "300000", 10); // 5 minutes +const DRY_RUN = process.argv.includes("--dry-run"); + +// ── Fixture Data ───────────────────────────────────────────────────────────── +// A complete .gsd/ project state that deriveState() can parse. +// The trivial task asks the agent to create a single file — zero questions needed. + +const FIXTURE_PROJECT_MD = `# Project + +## What This Is + +Headless proof test project. A minimal fixture used to validate GSD auto-mode via RPC. + +## Core Value + +Proves headless auto-mode works end-to-end. + +## Current State + +Empty project with GSD milestone planned. + +## Architecture / Key Patterns + +- Single milestone, single slice, single task + +## Capability Contract + +None. + +## Milestone Sequence + +- [ ] M001: Headless Proof — Create a test file to prove the agent loop works +`; + +const FIXTURE_STATE_MD = `# GSD State + +**Active Milestone:** M001 — Headless Proof +**Active Slice:** S01 — Create Test File +**Phase:** executing +**Requirements Status:** 0 active · 0 validated · 0 deferred · 0 out of scope + +## Milestone Registry +- 🔄 **M001:** Headless Proof + +## Recent Decisions +- None recorded + +## Blockers +- None + +## Next Action +Execute T01: Create hello.txt in slice S01. +`; + +const FIXTURE_CONTEXT_MD = `# M001: Headless Proof — Context + +**Gathered:** 2025-01-01 +**Status:** Ready for planning + +## Project Description + +A minimal test project for validating GSD auto-mode in headless/RPC mode. + +## Why This Milestone + +Proves that the agent loop can complete a task without a TUI attached. + +## User-Visible Outcome + +### When this milestone is complete, the user can: + +- Run GSD in headless mode and have it complete a trivial task + +### Entry point / environment + +- Entry point: RPC mode via headless-proof.ts +- Environment: local dev +- Live dependencies involved: none + +## Completion Class + +- Contract complete means: agent creates the requested file +- Integration complete means: not applicable +- Operational complete means: not applicable + +## Final Integrated Acceptance + +To call this milestone complete, we must prove: + +- Agent creates hello.txt with the correct content + +## Risks and Unknowns + +- None — this is a trivial proof task + +## Existing Codebase / Prior Art + +- None + +## Relevant Requirements + +- None + +## Scope + +### In Scope + +- Creating a single file + +### Out of Scope / Non-Goals + +- Everything else + +## Technical Constraints + +- None + +## Integration Points + +- None + +## Open Questions + +- None +`; + +const FIXTURE_ROADMAP_MD = `# M001: Headless Proof + +**Vision:** Prove GSD auto-mode works headlessly. + +## Success Criteria + +- Agent creates hello.txt with content "Hello from headless GSD" + +## Key Risks / Unknowns + +- None + +## Slices + +- [ ] **S01: Create Test File** \`risk:low\` \`depends:[]\` + > After this: hello.txt exists in the project root + +## Boundary Map + +### S01 + +Produces: +- hello.txt file in project root + +Consumes: +- nothing (first slice) +`; + +const FIXTURE_PLAN_MD = `# S01: Create Test File + +**Goal:** Create a single file to prove the agent loop works headlessly. +**Demo:** hello.txt exists with the correct content after the agent runs. + +## Must-Haves + +- hello.txt created with content "Hello from headless GSD" + +## Verification + +- File hello.txt exists in project root with content "Hello from headless GSD" + +## Tasks + +- [ ] **T01: Create hello.txt** \`est:5m\` + - Why: Proves the agent can execute a tool call and produce an artifact + - Files: \`hello.txt\` + - Do: Create a file called hello.txt in the project root with the content "Hello from headless GSD" + - Verify: File exists with correct content + - Done when: hello.txt exists with content "Hello from headless GSD" + +## Files Likely Touched + +- \`hello.txt\` +`; + +const FIXTURE_TASK_PLAN_MD = `--- +estimated_steps: 1 +estimated_files: 1 +--- + +# T01: Create hello.txt + +**Slice:** S01 — Create Test File +**Milestone:** M001 + +## Description + +Create a file called hello.txt in the project root with the content "Hello from headless GSD". + +## Steps + +1. Create the file hello.txt with the content "Hello from headless GSD" + +## Must-Haves + +- [ ] hello.txt created with content "Hello from headless GSD" + +## Verification + +- File hello.txt exists in project root with content "Hello from headless GSD" + +## Expected Output + +- \`hello.txt\` — file containing "Hello from headless GSD" +`; + +// ── Fixture Creation ───────────────────────────────────────────────────────── + +function createFixture(): string { + const tmpDir = mkdtempSync(join(tmpdir(), "gsd-headless-cmd-")); + + // Initialize git repo (GSD requires it for branch-per-slice) + execSync("git init -b main", { cwd: tmpDir, stdio: "pipe" }); + execSync('git config user.email "test@test.com"', { cwd: tmpDir, stdio: "pipe" }); + execSync('git config user.name "Test"', { cwd: tmpDir, stdio: "pipe" }); + + // Create .gsd/ structure + const gsdDir = join(tmpDir, ".gsd"); + const milestonesDir = join(gsdDir, "milestones"); + const m001Dir = join(milestonesDir, "M001"); + const slicesDir = join(m001Dir, "slices"); + const s01Dir = join(slicesDir, "S01"); + const tasksDir = join(s01Dir, "tasks"); + + mkdirSync(tasksDir, { recursive: true }); + + // Write fixture files + writeFileSync(join(gsdDir, "PROJECT.md"), FIXTURE_PROJECT_MD); + writeFileSync(join(gsdDir, "STATE.md"), FIXTURE_STATE_MD); + writeFileSync(join(m001Dir, "M001-CONTEXT.md"), FIXTURE_CONTEXT_MD); + writeFileSync(join(m001Dir, "M001-ROADMAP.md"), FIXTURE_ROADMAP_MD); + writeFileSync(join(s01Dir, "S01-PLAN.md"), FIXTURE_PLAN_MD); + writeFileSync(join(tasksDir, "T01-PLAN.md"), FIXTURE_TASK_PLAN_MD); + + // Add .gitignore for runtime files + writeFileSync(join(tmpDir, ".gitignore"), [ + ".gsd/auto.lock", + ".gsd/completed-units.json", + ".gsd/metrics.json", + ".gsd/activity/", + ".gsd/runtime/", + ].join("\n") + "\n"); + + // Initial commit so GSD has a clean git state + execSync("git add -A && git commit -m 'init: headless command test fixture'", { + cwd: tmpDir, + stdio: "pipe", + }); + + return tmpDir; +} + +function cleanup(dir: string): void { + try { + rmSync(dir, { recursive: true, force: true }); + } catch { + // Best effort + console.warn(` [warn] Failed to clean up temp dir: ${dir}`); + } +} + +// ── JSONL Parsing ──────────────────────────────────────────────────────────── + +interface JsonlEvent { + type?: string; + [key: string]: unknown; +} + +function parseJsonlLines(output: string): JsonlEvent[] { + const events: JsonlEvent[] = []; + for (const line of output.split("\n")) { + const trimmed = line.trim(); + if (!trimmed) continue; + try { + events.push(JSON.parse(trimmed) as JsonlEvent); + } catch { + // Not valid JSON — skip (could be non-JSONL output) + } + } + return events; +} + +// ── Main ───────────────────────────────────────────────────────────────────── + +async function main(): Promise { + const __filename = fileURLToPath(import.meta.url); + const __dirname = dirname(__filename); + // Resolve gsd-2 repo root (6 levels up from tests/integration/) + const repoRoot = join(__dirname, "..", "..", "..", "..", "..", ".."); + + console.log("=== GSD Headless Command Integration Test ===\n"); + + // ── Step 1: Create fixture ────────────────────────────────────────────── + console.log("[1/6] Creating fixture..."); + const fixtureDir = createFixture(); + console.log(` Fixture created at: ${fixtureDir}`); + + // Validate fixture structure + const requiredFiles = [ + ".gsd/PROJECT.md", + ".gsd/STATE.md", + ".gsd/milestones/M001/M001-CONTEXT.md", + ".gsd/milestones/M001/M001-ROADMAP.md", + ".gsd/milestones/M001/slices/S01/S01-PLAN.md", + ".gsd/milestones/M001/slices/S01/tasks/T01-PLAN.md", + ]; + + for (const file of requiredFiles) { + const fullPath = join(fixtureDir, file); + if (!existsSync(fullPath)) { + console.error(` FAIL: Missing fixture file: ${file}`); + cleanup(fixtureDir); + process.exit(1); + } + console.log(` OK ${file}`); + } + + // ── Step 2: Validate environment ──────────────────────────────────────── + console.log("\n[2/6] Validating environment..."); + + // Auth: prefer OAuth credentials from ~/.gsd/agent/auth.json (D013). + // Fall back to ANTHROPIC_API_KEY env var if present. + const authJsonPath = join(homedir(), ".gsd", "agent", "auth.json"); + let hasOAuth = false; + if (existsSync(authJsonPath)) { + try { + const authData = JSON.parse(readFileSync(authJsonPath, "utf-8")); + hasOAuth = authData?.anthropic?.type === "oauth"; + } catch { + // Non-fatal + } + } + + if (hasOAuth) { + console.log(" OK OAuth credentials found in ~/.gsd/agent/auth.json (Claude Code Max)"); + } else if (process.env.ANTHROPIC_API_KEY) { + console.log(" OK ANTHROPIC_API_KEY present (env var fallback)"); + } else { + console.error(" FAIL: No auth available. Need either:"); + console.error(" - OAuth credentials in ~/.gsd/agent/auth.json (Claude Code Max)"); + console.error(" - ANTHROPIC_API_KEY environment variable"); + cleanup(fixtureDir); + process.exit(1); + } + + const loaderPath = join(repoRoot, "dist", "loader.js"); + if (!existsSync(loaderPath)) { + console.error(` FAIL: CLI not found at ${loaderPath}. Run 'npm run build' first.`); + cleanup(fixtureDir); + process.exit(1); + } + console.log(` OK CLI found at ${loaderPath}`); + + // ── Step 3: Dry-run exit ──────────────────────────────────────────────── + if (DRY_RUN) { + console.log("\n[dry-run] Fixture validated. Skipping headless execution."); + console.log("[dry-run] All checks passed.\n"); + cleanup(fixtureDir); + process.exit(0); + } + + // ── Step 4: Spawn headless command ────────────────────────────────────── + console.log("\n[3/6] Spawning headless command..."); + console.log(` Command: node ${loaderPath} headless --step --json`); + console.log(` CWD: ${fixtureDir}`); + console.log(` Timeout: ${TIMEOUT_MS / 1000}s`); + + const { exitCode, stdout, stderr } = await new Promise<{ + exitCode: number | null; + stdout: string; + stderr: string; + }>((resolve) => { + let stdoutBuf = ""; + let stderrBuf = ""; + let settled = false; + + const child = spawn("node", [loaderPath, "headless", "--step", "--json"], { + cwd: fixtureDir, + env: { ...process.env }, + stdio: ["ignore", "pipe", "pipe"], + }); + + child.stdout.on("data", (chunk: Buffer) => { + stdoutBuf += chunk.toString(); + }); + + child.stderr.on("data", (chunk: Buffer) => { + const text = chunk.toString(); + stderrBuf += text; + // Stream stderr for live progress visibility + process.stderr.write(` [headless] ${text}`); + }); + + const timer = setTimeout(() => { + if (!settled) { + settled = true; + console.error(`\n TIMEOUT: Process did not exit within ${TIMEOUT_MS / 1000}s. Killing...`); + child.kill("SIGTERM"); + // Give it a moment to exit gracefully, then force kill + setTimeout(() => { + if (!child.killed) child.kill("SIGKILL"); + }, 5000); + resolve({ exitCode: null, stdout: stdoutBuf, stderr: stderrBuf }); + } + }, TIMEOUT_MS); + + child.on("close", (code) => { + if (!settled) { + settled = true; + clearTimeout(timer); + resolve({ exitCode: code, stdout: stdoutBuf, stderr: stderrBuf }); + } + }); + + child.on("error", (err) => { + if (!settled) { + settled = true; + clearTimeout(timer); + stderrBuf += `\nSpawn error: ${err.message}`; + resolve({ exitCode: 1, stdout: stdoutBuf, stderr: stderrBuf }); + } + }); + }); + + // ── Step 5: Validate results ──────────────────────────────────────────── + console.log("\n[4/6] Validating process output..."); + + let allPassed = true; + + // Check 1: Exit code + const exitOk = exitCode === 0; + console.log(` ${exitOk ? "PASS" : "FAIL"} Exit code: ${exitCode ?? "null (timeout)"}`); + if (!exitOk) allPassed = false; + + // Check 2: stdout contains JSONL events + const events = parseJsonlLines(stdout); + const hasJsonlEvents = events.length > 0; + console.log(` ${hasJsonlEvents ? "PASS" : "FAIL"} JSONL events in stdout: ${events.length}`); + if (!hasJsonlEvents) allPassed = false; + + if (hasJsonlEvents) { + // Summarize event types + const typeCounts: Record = {}; + for (const event of events) { + const type = String(event.type ?? "unknown"); + typeCounts[type] = (typeCounts[type] ?? 0) + 1; + } + console.log(` Event types: ${JSON.stringify(typeCounts)}`); + } + + // Check 3: stderr contains progress output + const hasStderrOutput = stderr.trim().length > 0; + console.log(` ${hasStderrOutput ? "PASS" : "FAIL"} stderr contains progress output: ${hasStderrOutput} (${stderr.length} bytes)`); + if (!hasStderrOutput) allPassed = false; + + // ── Step 6: Verify artifact ───────────────────────────────────────────── + console.log("\n[5/6] Verifying task artifact..."); + + const helloPath = join(fixtureDir, "hello.txt"); + const artifactExists = existsSync(helloPath); + console.log(` ${artifactExists ? "PASS" : "FAIL"} hello.txt exists: ${artifactExists}`); + if (!artifactExists) allPassed = false; + + if (artifactExists) { + const content = readFileSync(helloPath, "utf-8").trim(); + const contentMatch = content === "Hello from headless GSD"; + console.log(` ${contentMatch ? "PASS" : "WARN"} hello.txt content: "${content.slice(0, 80)}"`); + } + + // ── Summary ───────────────────────────────────────────────────────────── + console.log("\n[6/6] Summary"); + console.log(` Exit code: ${exitCode ?? "null (timeout)"}`); + console.log(` JSONL events: ${events.length}`); + console.log(` stderr length: ${stderr.length} bytes`); + console.log(` hello.txt exists: ${artifactExists}`); + + // Cleanup + cleanup(fixtureDir); + + if (allPassed) { + console.log("\n=== PASSED ===\n"); + process.exit(0); + } else { + // Print diagnostic info on failure + if (stdout.length > 0) { + console.log(`\n--- stdout (last 2000 chars) ---`); + console.log(stdout.slice(-2000)); + } + if (stderr.length > 0) { + console.log(`\n--- stderr (last 2000 chars) ---`); + console.log(stderr.slice(-2000)); + } + console.log("\n=== FAILED ===\n"); + process.exit(1); + } +} + +main().catch((err) => { + console.error("Unhandled error:", err); + process.exit(1); +}); From 8ddea154e5565e9e300e4215a351918e3e37e699 Mon Sep 17 00:00:00 2001 From: frizynn Date: Mon, 16 Mar 2026 18:23:07 -0300 Subject: [PATCH 3/7] feat: redesign `gsd headless` for full workflow orchestration Replace --step flag with positional command routing so any /gsd subcommand can run headlessly. Add /gsd dispatch for direct unit-type dispatch (research, plan, execute, complete, reassess, uat, replan) with state-aware resolution. Quick commands (status, queue, doctor, etc.) resolve on first agent_end. Long-running commands (auto, next, dispatch) use idle timer + terminal notification detection. --- src/headless.ts | 81 +++++--- src/resources/extensions/gsd/auto.ts | 189 ++++++++++++++++++ src/resources/extensions/gsd/commands.ts | 23 ++- .../gsd/tests/integration/headless-command.ts | 6 +- 4 files changed, 267 insertions(+), 32 deletions(-) diff --git a/src/headless.ts b/src/headless.ts index 78f87e4ba..bb8d6b646 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -1,14 +1,14 @@ /** * Headless Orchestrator — `gsd headless` * - * Runs GSD's auto-mode (or a single unit via --step) without a TUI by - * spawning a child process in RPC mode, auto-responding to extension UI - * requests, and streaming progress to stderr. + * Runs any /gsd subcommand without a TUI by spawning a child process in + * RPC mode, auto-responding to extension UI requests, and streaming + * progress to stderr. * * Exit codes: - * 0 — complete (auto-mode finished successfully) + * 0 — complete (command finished successfully) * 1 — error or timeout - * 2 — blocked (auto-mode reported a blocker) + * 2 — blocked (command reported a blocker) */ import { existsSync } from 'node:fs' @@ -25,10 +25,11 @@ import { RpcClient } from '../packages/pi-coding-agent/dist/modes/rpc/rpc-client export interface HeadlessOptions { timeout: number - step: boolean json: boolean verbose: boolean model?: string + command: string + commandArgs: string[] } interface ExtensionUIRequest { @@ -56,29 +57,38 @@ interface TrackedEvent { export function parseHeadlessArgs(argv: string[]): HeadlessOptions { const options: HeadlessOptions = { timeout: 300_000, - step: false, json: false, verbose: false, + command: 'auto', + commandArgs: [], } const args = argv.slice(2) + let positionalStarted = false + for (let i = 0; i < args.length; i++) { const arg = args[i] if (arg === 'headless') continue - if (arg === '--timeout' && i + 1 < args.length) { - options.timeout = parseInt(args[++i], 10) - if (Number.isNaN(options.timeout) || options.timeout <= 0) { - process.stderr.write('[headless] Error: --timeout must be a positive integer (milliseconds)\n') - process.exit(1) + + if (!positionalStarted && arg.startsWith('--')) { + if (arg === '--timeout' && i + 1 < args.length) { + options.timeout = parseInt(args[++i], 10) + if (Number.isNaN(options.timeout) || options.timeout <= 0) { + process.stderr.write('[headless] Error: --timeout must be a positive integer (milliseconds)\n') + process.exit(1) + } + } else if (arg === '--json') { + options.json = true + } else if (arg === '--verbose') { + options.verbose = true + } else if (arg === '--model' && i + 1 < args.length) { + options.model = args[++i] } - } else if (arg === '--step') { - options.step = true - } else if (arg === '--json') { - options.json = true - } else if (arg === '--verbose') { - options.verbose = true - } else if (arg === '--model' && i + 1 < args.length) { - options.model = args[++i] + } else if (!positionalStarted) { + positionalStarted = true + options.command = arg + } else { + options.commandArgs.push(arg) } } @@ -197,6 +207,21 @@ function isBlockedNotification(event: Record): boolean { return String(event.message ?? '').toLowerCase().includes('blocked') } +// --------------------------------------------------------------------------- +// Quick Command Detection +// --------------------------------------------------------------------------- + +const QUICK_COMMANDS = new Set([ + 'status', 'queue', 'history', 'hooks', 'export', 'stop', 'pause', + 'capture', 'skip', 'undo', 'knowledge', 'config', 'prefs', + 'cleanup', 'migrate', 'doctor', 'remote', 'help', 'steer', + 'triage', 'visualize', +]) + +function isQuickCommand(command: string): boolean { + return QUICK_COMMANDS.has(command) +} + // --------------------------------------------------------------------------- // Main Orchestrator // --------------------------------------------------------------------------- @@ -326,11 +351,15 @@ export async function runHeadless(options: HeadlessOptions): Promise { } } - // agent_end after tool execution — possible completion - if (eventObj.type === 'agent_end' && sawToolExecution && !completed) { - // Don't immediately resolve — wait for potential terminal notify or idle timeout. - // The idle timer handles this case. + // Quick commands: resolve on first agent_end + if (eventObj.type === 'agent_end' && isQuickCommand(options.command) && !completed) { + completed = true + resolveCompletion() + return } + + // Long-running commands: agent_end after tool execution — possible completion + // The idle timer + terminal notification handle this case. }) // Signal handling @@ -379,11 +408,11 @@ export async function runHeadless(options: HeadlessOptions): Promise { }) if (!options.json) { - process.stderr.write('[headless] Starting auto-mode...\n') + process.stderr.write(`[headless] Running /gsd ${options.command}${options.commandArgs.length > 0 ? ' ' + options.commandArgs.join(' ') : ''}...\n`) } // Send the command - const command = options.step ? '/gsd next' : '/gsd auto' + const command = `/gsd ${options.command}${options.commandArgs.length > 0 ? ' ' + options.commandArgs.join(' ') : ''}` try { await client.prompt(command) } catch (err) { diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 87ef155f4..80799d96a 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -3380,3 +3380,192 @@ export async function dispatchHookUnit( return true; } + + +// ─── Direct Phase Dispatch ──────────────────────────────────────────────────── + +export async function dispatchDirectPhase( + ctx: ExtensionCommandContext, + pi: ExtensionAPI, + phase: string, + base: string, +): Promise { + const state = await deriveState(base); + const mid = state.activeMilestone?.id; + const midTitle = state.activeMilestone?.title ?? ""; + + if (!mid) { + ctx.ui.notify("Cannot dispatch: no active milestone.", "warning"); + return; + } + + const normalized = phase.toLowerCase(); + let unitType: string; + let unitId: string; + let prompt: string; + + switch (normalized) { + case "research": + case "research-milestone": + case "research-slice": { + const isSlice = normalized === "research-slice" || (normalized === "research" && state.phase !== "pre-planning"); + if (isSlice) { + const sid = state.activeSlice?.id; + const sTitle = state.activeSlice?.title ?? ""; + if (!sid) { + ctx.ui.notify("Cannot dispatch research-slice: no active slice.", "warning"); + return; + } + unitType = "research-slice"; + unitId = `${mid}/${sid}`; + prompt = await buildResearchSlicePrompt(mid, midTitle, sid, sTitle, base); + } else { + unitType = "research-milestone"; + unitId = mid; + prompt = await buildResearchMilestonePrompt(mid, midTitle, base); + } + break; + } + + case "plan": + case "plan-milestone": + case "plan-slice": { + const isSlice = normalized === "plan-slice" || (normalized === "plan" && state.phase !== "pre-planning"); + if (isSlice) { + const sid = state.activeSlice?.id; + const sTitle = state.activeSlice?.title ?? ""; + if (!sid) { + ctx.ui.notify("Cannot dispatch plan-slice: no active slice.", "warning"); + return; + } + unitType = "plan-slice"; + unitId = `${mid}/${sid}`; + prompt = await buildPlanSlicePrompt(mid, midTitle, sid, sTitle, base); + } else { + unitType = "plan-milestone"; + unitId = mid; + prompt = await buildPlanMilestonePrompt(mid, midTitle, base); + } + break; + } + + case "execute": + case "execute-task": { + const sid = state.activeSlice?.id; + const sTitle = state.activeSlice?.title ?? ""; + const tid = state.activeTask?.id; + const tTitle = state.activeTask?.title ?? ""; + if (!sid) { + ctx.ui.notify("Cannot dispatch execute-task: no active slice.", "warning"); + return; + } + if (!tid) { + ctx.ui.notify("Cannot dispatch execute-task: no active task.", "warning"); + return; + } + unitType = "execute-task"; + unitId = `${mid}/${sid}/${tid}`; + prompt = await buildExecuteTaskPrompt(mid, sid, sTitle, tid, tTitle, base); + break; + } + + case "complete": + case "complete-slice": + case "complete-milestone": { + const isSlice = normalized === "complete-slice" || (normalized === "complete" && state.phase === "summarizing"); + if (isSlice) { + const sid = state.activeSlice?.id; + const sTitle = state.activeSlice?.title ?? ""; + if (!sid) { + ctx.ui.notify("Cannot dispatch complete-slice: no active slice.", "warning"); + return; + } + unitType = "complete-slice"; + unitId = `${mid}/${sid}`; + prompt = await buildCompleteSlicePrompt(mid, midTitle, sid, sTitle, base); + } else { + unitType = "complete-milestone"; + unitId = mid; + prompt = await buildCompleteMilestonePrompt(mid, midTitle, base); + } + break; + } + + case "reassess": + case "reassess-roadmap": { + const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP"); + const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null; + if (!roadmapContent) { + ctx.ui.notify("Cannot dispatch reassess-roadmap: no roadmap found.", "warning"); + return; + } + const roadmap = parseRoadmap(roadmapContent); + const completedSlices = roadmap.slices.filter(s => s.done); + if (completedSlices.length === 0) { + ctx.ui.notify("Cannot dispatch reassess-roadmap: no completed slices.", "warning"); + return; + } + const completedSliceId = completedSlices[completedSlices.length - 1].id; + unitType = "reassess-roadmap"; + unitId = `${mid}/${completedSliceId}`; + prompt = await buildReassessRoadmapPrompt(mid, midTitle, completedSliceId, base); + break; + } + + case "uat": + case "run-uat": { + const sid = state.activeSlice?.id; + if (!sid) { + ctx.ui.notify("Cannot dispatch run-uat: no active slice.", "warning"); + return; + } + const uatFile = resolveSliceFile(base, mid, sid, "UAT"); + if (!uatFile) { + ctx.ui.notify("Cannot dispatch run-uat: no UAT file found.", "warning"); + return; + } + const uatContent = await loadFile(uatFile); + if (!uatContent) { + ctx.ui.notify("Cannot dispatch run-uat: UAT file is empty.", "warning"); + return; + } + const uatPath = relSliceFile(base, mid, sid, "UAT"); + unitType = "run-uat"; + unitId = `${mid}/${sid}`; + prompt = await buildRunUatPrompt(mid, sid, uatPath, uatContent, base); + break; + } + + case "replan": + case "replan-slice": { + const sid = state.activeSlice?.id; + const sTitle = state.activeSlice?.title ?? ""; + if (!sid) { + ctx.ui.notify("Cannot dispatch replan-slice: no active slice.", "warning"); + return; + } + unitType = "replan-slice"; + unitId = `${mid}/${sid}`; + prompt = await buildReplanSlicePrompt(mid, midTitle, sid, sTitle, base); + break; + } + + default: + ctx.ui.notify( + `Unknown phase "${phase}". Valid phases: research, plan, execute, complete, reassess, uat, replan.`, + "warning", + ); + return; + } + + ctx.ui.notify(`Dispatching ${unitType} for ${unitId}...`, "info"); + const result = await ctx.newSession(); + if (result.cancelled) { + ctx.ui.notify("Session creation cancelled.", "warning"); + return; + } + pi.sendMessage( + { customType: "gsd-dispatch", content: prompt, display: false }, + { triggerTurn: true }, + ); +} diff --git a/src/resources/extensions/gsd/commands.ts b/src/resources/extensions/gsd/commands.ts index 01d0ee490..76229dfcf 100644 --- a/src/resources/extensions/gsd/commands.ts +++ b/src/resources/extensions/gsd/commands.ts @@ -14,7 +14,7 @@ import { deriveState } from "./state.js"; import { GSDDashboardOverlay } from "./dashboard-overlay.js"; import { GSDVisualizerOverlay } from "./visualizer-overlay.js"; import { showQueue, showDiscuss } from "./guided-flow.js"; -import { startAuto, stopAuto, pauseAuto, isAutoActive, isAutoPaused, isStepMode, stopAutoRemote } from "./auto.js"; +import { startAuto, stopAuto, pauseAuto, isAutoActive, isAutoPaused, isStepMode, stopAutoRemote, dispatchDirectPhase } from "./auto.js"; import { resolveProjectRoot } from "./worktree.js"; import { appendCapture, hasPendingCaptures, loadPendingCaptures } from "./captures.js"; import { @@ -69,11 +69,11 @@ function projectRoot(): string { export function registerGSDCommand(pi: ExtensionAPI): void { pi.registerCommand("gsd", { - description: "GSD — Get Shit Done: /gsd help|next|auto|stop|pause|status|visualize|queue|quick|capture|triage|history|undo|skip|export|cleanup|mode|prefs|config|hooks|run-hook|skill-health|doctor|forensics|migrate|remote|steer|knowledge", + description: "GSD — Get Shit Done: /gsd help|next|auto|stop|pause|status|visualize|queue|quick|capture|triage|dispatch|history|undo|skip|export|cleanup|mode|prefs|config|hooks|run-hook|skill-health|doctor|forensics|migrate|remote|steer|knowledge", getArgumentCompletions: (prefix: string) => { const subcommands = [ "help", "next", "auto", "stop", "pause", "status", "visualize", "queue", "quick", "discuss", - "capture", "triage", + "capture", "triage", "dispatch", "history", "undo", "skip", "export", "cleanup", "mode", "prefs", "config", "hooks", "run-hook", "skill-health", "doctor", "forensics", "migrate", "remote", "steer", "inspect", "knowledge", ]; @@ -165,6 +165,13 @@ export function registerGSDCommand(pi: ExtensionAPI): void { return []; } + if (parts[0] === "dispatch" && parts.length <= 2) { + const phasePrefix = parts[1] ?? ""; + return ["research", "plan", "execute", "complete", "reassess", "uat", "replan"] + .filter((cmd) => cmd.startsWith(phasePrefix)) + .map((cmd) => ({ value: `dispatch ${cmd}`, label: cmd })); + } + return []; }, @@ -388,6 +395,16 @@ Examples: return; } + if (trimmed === "dispatch" || trimmed.startsWith("dispatch ")) { + const phase = trimmed.replace(/^dispatch\s*/, "").trim(); + if (!phase) { + ctx.ui.notify("Usage: /gsd dispatch (research|plan|execute|complete|reassess|uat|replan)", "warning"); + return; + } + await dispatchDirectPhase(ctx, pi, phase, projectRoot()); + return; + } + if (trimmed === "inspect") { await handleInspect(ctx); return; diff --git a/src/resources/extensions/gsd/tests/integration/headless-command.ts b/src/resources/extensions/gsd/tests/integration/headless-command.ts index fc5f3582d..870c5c058 100644 --- a/src/resources/extensions/gsd/tests/integration/headless-command.ts +++ b/src/resources/extensions/gsd/tests/integration/headless-command.ts @@ -4,7 +4,7 @@ * Validates that the headless CLI entry point works end-to-end: * 1. Creates a temp dir with a complete .gsd/ project fixture * 2. Initializes a git repo in the temp dir - * 3. Spawns `node dist/loader.js headless --step --json` as a child process + * 3. Spawns `node dist/loader.js headless --json next` as a child process * 4. Waits for the process to exit (with a 5-minute timeout) * 5. Validates exit code, JSONL stdout, stderr progress, and task artifact * @@ -394,7 +394,7 @@ async function main(): Promise { // ── Step 4: Spawn headless command ────────────────────────────────────── console.log("\n[3/6] Spawning headless command..."); - console.log(` Command: node ${loaderPath} headless --step --json`); + console.log(` Command: node ${loaderPath} headless --json next`); console.log(` CWD: ${fixtureDir}`); console.log(` Timeout: ${TIMEOUT_MS / 1000}s`); @@ -407,7 +407,7 @@ async function main(): Promise { let stderrBuf = ""; let settled = false; - const child = spawn("node", [loaderPath, "headless", "--step", "--json"], { + const child = spawn("node", [loaderPath, "headless", "--json", "next"], { cwd: fixtureDir, env: { ...process.env }, stdio: ["ignore", "pipe", "pipe"], From f56b8c69f05284e22c7dfd327c65d9962bba4e85 Mon Sep 17 00:00:00 2001 From: frizynn Date: Mon, 16 Mar 2026 19:46:56 -0300 Subject: [PATCH 4/7] fix: simplify headless flags, add missing imports, document headless mode - Remove --verbose flag from headless (use --json for detailed output) - Remove redundant sawToolExecution state variable - Remove unused rejectCompletion - Add missing build*Prompt imports in auto.ts (fixes CI typecheck:extensions) - Document headless mode in README.md and docs/commands.md - Simplify help text with examples instead of exhaustive command catalog --- README.md | 23 ++++++++++++++++++ docs/commands.md | 36 ++++++++++++++++++++++++++++ src/headless.ts | 35 ++++----------------------- src/help-text.ts | 22 +++++------------ src/resources/extensions/gsd/auto.ts | 12 ++++++++++ 5 files changed, 82 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index af55473d6..759eba06a 100644 --- a/README.md +++ b/README.md @@ -221,6 +221,26 @@ gsd Both terminals read and write the same `.gsd/` files on disk. Your decisions in terminal 2 are picked up automatically at the next phase boundary — no need to stop auto mode. +### Headless mode — CI and scripts + +`gsd headless` runs any `/gsd` command without a TUI. Designed for CI pipelines, cron jobs, and scripted automation. + +```bash +# Run auto mode in CI +gsd headless --timeout 600000 + +# One unit at a time (cron-friendly) +gsd headless next + +# Machine-readable status +gsd headless --json status + +# Force a specific pipeline phase +gsd headless dispatch plan +``` + +Headless auto-responds to interactive prompts, detects completion, and exits with structured codes: `0` complete, `1` error/timeout, `2` blocked. Pair with [remote questions](./docs/remote-questions.md) to route decisions to Slack or Discord when human input is needed. + ### First launch On first run, GSD launches a branded setup wizard that walks you through LLM provider selection (OAuth or API key), then optional tool API keys (Brave Search, Context7, Jina, Slack, Discord). Every step is skippable — press Enter to skip any. If you have an existing Pi installation, your provider credentials (LLM and tool keys) are imported automatically. Run `gsd config` anytime to re-run the wizard. @@ -254,6 +274,8 @@ On first run, GSD launches a branded setup wizard that walks you through LLM pro | `Ctrl+Alt+V` | Toggle voice transcription | | `Ctrl+Alt+B` | Show background shell processes | | `gsd config` | Re-run the setup wizard (LLM provider + tool keys) | +| `gsd update` | Update GSD to the latest version | +| `gsd headless [cmd]` | Run `/gsd` commands without TUI (CI, cron, scripts) | | `gsd --continue` (`-c`) | Resume the most recent session for the current directory | --- @@ -482,6 +504,7 @@ GSD is a TypeScript application that embeds the Pi coding agent SDK. gsd (CLI binary) └─ loader.ts Sets PI_PACKAGE_DIR, GSD env vars, dynamic-imports cli.ts └─ cli.ts Wires SDK managers, loads extensions, starts InteractiveMode + ├─ headless.ts Headless orchestrator (spawns RPC child, auto-responds, detects completion) ├─ onboarding.ts First-run setup wizard (LLM provider + tool keys) ├─ wizard.ts Env hydration from stored auth.json credentials ├─ app-paths.ts ~/.gsd/agent/, ~/.gsd/sessions/, auth.json diff --git a/docs/commands.md b/docs/commands.md index c38c65f5f..2cdda8e0c 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -69,5 +69,41 @@ |------|-------------| | `gsd` | Start a new interactive session | | `gsd --continue` (`-c`) | Resume the most recent session for the current directory | +| `gsd --model ` | Override the default model for this session | +| `gsd --print "msg"` (`-p`) | Single-shot prompt mode (no TUI) | +| `gsd --mode ` | Output mode for non-interactive use | +| `gsd --list-models [search]` | List available models and exit | | `gsd --debug` | Enable structured JSONL diagnostic logging for troubleshooting dispatch and state issues | | `gsd config` | Re-run the setup wizard (LLM provider + tool keys) | +| `gsd update` | Update GSD to the latest version | + +## Headless Mode + +`gsd headless` runs `/gsd` commands without a TUI — designed for CI, cron jobs, and scripted automation. It spawns a child process in RPC mode, auto-responds to interactive prompts, detects completion, and exits with meaningful exit codes. + +```bash +# Run auto mode (default) +gsd headless + +# Run a single unit +gsd headless next + +# Machine-readable output +gsd headless --json status + +# With timeout for CI +gsd headless --timeout 600000 auto + +# Force a specific phase +gsd headless dispatch plan +``` + +| Flag | Description | +|------|-------------| +| `--timeout N` | Overall timeout in milliseconds (default: 300000 / 5 min) | +| `--json` | Stream all events as JSONL to stdout | +| `--model ID` | Override the model for the headless session | + +**Exit codes:** `0` = complete, `1` = error or timeout, `2` = blocked. + +Any `/gsd` subcommand works as a positional argument — `gsd headless status`, `gsd headless doctor`, `gsd headless dispatch execute`, etc. diff --git a/src/headless.ts b/src/headless.ts index bb8d6b646..dacdb40d7 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -26,7 +26,6 @@ import { RpcClient } from '../packages/pi-coding-agent/dist/modes/rpc/rpc-client export interface HeadlessOptions { timeout: number json: boolean - verbose: boolean model?: string command: string commandArgs: string[] @@ -58,7 +57,6 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions { const options: HeadlessOptions = { timeout: 300_000, json: false, - verbose: false, command: 'auto', commandArgs: [], } @@ -79,9 +77,8 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions { } } else if (arg === '--json') { options.json = true - } else if (arg === '--verbose') { - options.verbose = true } else if (arg === '--model' && i + 1 < args.length) { + // --model can also be passed from the main CLI; headless-specific takes precedence options.model = args[++i] } } else if (!positionalStarted) { @@ -147,23 +144,13 @@ function handleExtensionUIRequest( // Progress Formatter // --------------------------------------------------------------------------- -function formatProgress( - event: Record, - verbose: boolean, -): string | null { +function formatProgress(event: Record): string | null { const type = String(event.type ?? '') switch (type) { case 'tool_execution_start': return `[tool] ${event.toolName ?? 'unknown'}` - case 'tool_execution_end': - if (verbose) { - const result = String(event.result ?? '').slice(0, 200) - return `[tool:result] ${event.toolName ?? 'unknown'}: ${result}` - } - return null - case 'agent_start': return '[agent] Session started' @@ -176,14 +163,6 @@ function formatProgress( } return null - case 'message_update': - if (verbose) { - const msgEvent = event.assistantMessageEvent as Record | undefined - const text = String(msgEvent?.text ?? '').slice(0, 200) - if (text) return `[assistant] ${text}` - } - return null - default: return null } @@ -258,7 +237,6 @@ export async function runHeadless(options: HeadlessOptions): Promise { // Event tracking let totalEvents = 0 let toolCallCount = 0 - let sawToolExecution = false let blocked = false let completed = false let exitCode = 0 @@ -270,7 +248,6 @@ export async function runHeadless(options: HeadlessOptions): Promise { if (type === 'tool_execution_start') { toolCallCount++ - sawToolExecution = true } // Keep last 20 events for diagnostics @@ -290,10 +267,8 @@ export async function runHeadless(options: HeadlessOptions): Promise { // Completion promise let resolveCompletion: () => void - let rejectCompletion: (err: Error) => void - const completionPromise = new Promise((resolve, reject) => { + const completionPromise = new Promise((resolve) => { resolveCompletion = resolve - rejectCompletion = reject }) // Idle timeout — fallback completion detection @@ -301,7 +276,7 @@ export async function runHeadless(options: HeadlessOptions): Promise { function resetIdleTimer(): void { if (idleTimer) clearTimeout(idleTimer) - if (sawToolExecution) { + if (toolCallCount > 0) { idleTimer = setTimeout(() => { completed = true resolveCompletion() @@ -327,7 +302,7 @@ export async function runHeadless(options: HeadlessOptions): Promise { process.stdout.write(JSON.stringify(eventObj) + '\n') } else { // Progress output to stderr - const line = formatProgress(eventObj, options.verbose) + const line = formatProgress(eventObj) if (line) process.stderr.write(line + '\n') } diff --git a/src/help-text.ts b/src/help-text.ts index a38471f76..dc63b1198 100644 --- a/src/help-text.ts +++ b/src/help-text.ts @@ -37,26 +37,16 @@ const SUBCOMMAND_HELP: Record = { '', 'Run /gsd commands without the TUI. Default command: auto', '', - 'Flags (before command):', + 'Flags:', ' --timeout N Overall timeout in ms (default: 300000)', ' --json JSONL event stream to stdout', - ' --verbose Detailed progress output', ' --model ID Override model', '', - 'Commands:', - ' auto /gsd auto (default)', - ' next /gsd next — one unit', - ' status /gsd status', - ' queue /gsd queue', - ' discuss /gsd discuss', - ' doctor [mode] /gsd doctor [fix|heal|audit]', - ' steer "desc" /gsd steer', - ' dispatch Direct unit-type dispatch', - ' ... Any /gsd subcommand', - '', - 'Dispatch phases:', - ' research, plan, execute, complete, reassess, uat, replan', - ' Also: research-milestone, plan-slice, execute-task, etc.', + 'Examples:', + ' gsd headless Run /gsd auto', + ' gsd headless next Run one unit', + ' gsd headless --json status Machine-readable status', + ' gsd headless --timeout 60000 With 1-minute timeout', '', 'Exit codes: 0 = complete, 1 = error/timeout, 2 = blocked', ].join('\n'), diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 80799d96a..61356cf69 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -125,6 +125,18 @@ import { reconcileMergeState, } from "./auto-recovery.js"; import { resolveDispatch, resetRewriteCircuitBreaker } from "./auto-dispatch.js"; +import { + buildResearchSlicePrompt, + buildResearchMilestonePrompt, + buildPlanSlicePrompt, + buildPlanMilestonePrompt, + buildExecuteTaskPrompt, + buildCompleteSlicePrompt, + buildCompleteMilestonePrompt, + buildReassessRoadmapPrompt, + buildRunUatPrompt, + buildReplanSlicePrompt, +} from "./auto-prompts.js"; import { type AutoDashboardData, updateProgressWidget as _updateProgressWidget, From 09d62e01d148e777707453a06787baab8e692823 Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Mon, 16 Mar 2026 16:59:48 -0600 Subject: [PATCH 5/7] feat(gsd): implement validate-milestone phase and dispatch Add a `validating-milestone` phase that runs BEFORE `completing-milestone` to reconcile planned work against delivered work. The validator checks success criteria, slice deliverables, cross-slice integration, and requirement coverage before allowing milestone completion. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/resources/extensions/gsd/auto-dispatch.ts | 39 ++- src/resources/extensions/gsd/auto-prompts.ts | 73 ++++ src/resources/extensions/gsd/auto-recovery.ts | 15 + src/resources/extensions/gsd/complexity.ts | 1 + src/resources/extensions/gsd/doctor.ts | 14 + src/resources/extensions/gsd/preferences.ts | 4 +- .../gsd/prompts/validate-milestone.md | 101 +++--- src/resources/extensions/gsd/state.ts | 53 ++- .../gsd/tests/auto-preflight.test.ts | 1 + .../gsd/tests/complete-milestone.test.ts | 9 +- .../gsd/tests/derive-state-db.test.ts | 1 + .../gsd/tests/derive-state-deps.test.ts | 9 + .../gsd/tests/derive-state-draft.test.ts | 8 + .../extensions/gsd/tests/derive-state.test.ts | 14 + .../integration-mixed-milestones.test.ts | 8 + .../tests/migrate-writer-integration.test.ts | 10 +- .../gsd/tests/queue-reorder-e2e.test.ts | 1 + .../gsd/tests/validate-milestone.test.ts | 316 ++++++++++++++++++ src/resources/extensions/gsd/types.ts | 3 +- 19 files changed, 605 insertions(+), 75 deletions(-) create mode 100644 src/resources/extensions/gsd/tests/validate-milestone.test.ts diff --git a/src/resources/extensions/gsd/auto-dispatch.ts b/src/resources/extensions/gsd/auto-dispatch.ts index a280a37c8..4f7258b09 100644 --- a/src/resources/extensions/gsd/auto-dispatch.ts +++ b/src/resources/extensions/gsd/auto-dispatch.ts @@ -14,9 +14,11 @@ import type { GSDPreferences } from "./preferences.js"; import type { UatType } from "./files.js"; import { loadFile, extractUatType, loadActiveOverrides } from "./files.js"; import { - resolveMilestoneFile, resolveSliceFile, - relSliceFile, + resolveMilestoneFile, resolveMilestonePath, resolveSliceFile, + relSliceFile, buildMilestoneFileName, } from "./paths.js"; +import { existsSync, mkdirSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; import { buildResearchMilestonePrompt, buildPlanMilestonePrompt, @@ -25,6 +27,7 @@ import { buildExecuteTaskPrompt, buildCompleteSlicePrompt, buildCompleteMilestonePrompt, + buildValidateMilestonePrompt, buildReplanSlicePrompt, buildRunUatPrompt, buildReassessRoadmapPrompt, @@ -254,6 +257,38 @@ const DISPATCH_RULES: DispatchRule[] = [ }; }, }, + { + name: "validating-milestone → validate-milestone", + match: async ({ state, mid, midTitle, basePath, prefs }) => { + if (state.phase !== "validating-milestone") return null; + // Skip preference: write a minimal pass-through VALIDATION file + if (prefs?.phases?.skip_milestone_validation) { + const mDir = resolveMilestonePath(basePath, mid); + if (mDir) { + if (!existsSync(mDir)) mkdirSync(mDir, { recursive: true }); + const validationPath = join(mDir, buildMilestoneFileName(mid, "VALIDATION")); + const content = [ + "---", + "verdict: pass", + "remediation_round: 0", + "---", + "", + "# Milestone Validation (skipped by preference)", + "", + "Milestone validation was skipped via `skip_milestone_validation` preference.", + ].join("\n"); + writeFileSync(validationPath, content, "utf-8"); + } + return { action: "skip" }; + } + return { + action: "dispatch", + unitType: "validate-milestone", + unitId: mid, + prompt: await buildValidateMilestonePrompt(mid, midTitle, basePath), + }; + }, + }, { name: "completing-milestone → complete-milestone", match: async ({ state, mid, midTitle, basePath }) => { diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts index ad389c2f7..9d7b93824 100644 --- a/src/resources/extensions/gsd/auto-prompts.ts +++ b/src/resources/extensions/gsd/auto-prompts.ts @@ -855,6 +855,79 @@ export async function buildCompleteMilestonePrompt( }); } +export async function buildValidateMilestonePrompt( + mid: string, midTitle: string, base: string, level?: InlineLevel, +): Promise { + const inlineLevel = level ?? resolveInlineLevel(); + const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP"); + const roadmapRel = relMilestoneFile(base, mid, "ROADMAP"); + + const inlined: string[] = []; + inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap")); + + // Inline all slice summaries and UAT results + const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null; + if (roadmapContent) { + const roadmap = parseRoadmap(roadmapContent); + const seenSlices = new Set(); + for (const slice of roadmap.slices) { + if (seenSlices.has(slice.id)) continue; + seenSlices.add(slice.id); + const summaryPath = resolveSliceFile(base, mid, slice.id, "SUMMARY"); + const summaryRel = relSliceFile(base, mid, slice.id, "SUMMARY"); + inlined.push(await inlineFile(summaryPath, summaryRel, `${slice.id} Summary`)); + + const uatPath = resolveSliceFile(base, mid, slice.id, "UAT-RESULT"); + const uatRel = relSliceFile(base, mid, slice.id, "UAT-RESULT"); + const uatInline = await inlineFileOptional(uatPath, uatRel, `${slice.id} UAT Result`); + if (uatInline) inlined.push(uatInline); + } + } + + // Inline existing VALIDATION file if this is a re-validation round + const validationPath = resolveMilestoneFile(base, mid, "VALIDATION"); + const validationRel = relMilestoneFile(base, mid, "VALIDATION"); + const validationContent = validationPath ? await loadFile(validationPath) : null; + let remediationRound = 0; + if (validationContent) { + const roundMatch = validationContent.match(/remediation_round:\s*(\d+)/); + remediationRound = roundMatch ? parseInt(roundMatch[1], 10) + 1 : 1; + inlined.push(`### Previous Validation (re-validation round ${remediationRound})\nSource: \`${validationRel}\`\n\n${validationContent.trim()}`); + } + + // Inline root GSD files + if (inlineLevel !== "minimal") { + const requirementsInline = await inlineRequirementsFromDb(base); + if (requirementsInline) inlined.push(requirementsInline); + const decisionsInline = await inlineDecisionsFromDb(base, mid); + if (decisionsInline) inlined.push(decisionsInline); + const projectInline = await inlineProjectFromDb(base); + if (projectInline) inlined.push(projectInline); + } + const knowledgeInline = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge"); + if (knowledgeInline) inlined.push(knowledgeInline); + // Inline milestone context file + const contextPath = resolveMilestoneFile(base, mid, "CONTEXT"); + const contextRel = relMilestoneFile(base, mid, "CONTEXT"); + const contextInline = await inlineFileOptional(contextPath, contextRel, "Milestone Context"); + if (contextInline) inlined.push(contextInline); + + const inlinedContext = `## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`; + + const validationOutputPath = join(base, `${relMilestonePath(base, mid)}/${mid}-VALIDATION.md`); + const roadmapOutputPath = `${relMilestonePath(base, mid)}/${mid}-ROADMAP.md`; + + return loadPrompt("validate-milestone", { + workingDirectory: base, + milestoneId: mid, + milestoneTitle: midTitle, + roadmapPath: roadmapOutputPath, + inlinedContext, + validationPath: validationOutputPath, + remediationRound: String(remediationRound), + }); +} + export async function buildReplanSlicePrompt( mid: string, midTitle: string, sid: string, sTitle: string, base: string, ): Promise { diff --git a/src/resources/extensions/gsd/auto-recovery.ts b/src/resources/extensions/gsd/auto-recovery.ts index 6792f83e7..c4f5400e1 100644 --- a/src/resources/extensions/gsd/auto-recovery.ts +++ b/src/resources/extensions/gsd/auto-recovery.ts @@ -83,6 +83,10 @@ export function resolveExpectedArtifactPath(unitType: string, unitId: string, ba const dir = resolveSlicePath(base, mid, sid!); return dir ? join(dir, buildSliceFileName(sid!, "SUMMARY")) : null; } + case "validate-milestone": { + const dir = resolveMilestonePath(base, mid); + return dir ? join(dir, buildMilestoneFileName(mid, "VALIDATION")) : null; + } case "complete-milestone": { const dir = resolveMilestonePath(base, mid); return dir ? join(dir, buildMilestoneFileName(mid, "SUMMARY")) : null; @@ -244,6 +248,8 @@ export function diagnoseExpectedArtifact(unitType: string, unitId: string, base: return `${relSliceFile(base, mid!, sid!, "ASSESSMENT")} (roadmap reassessment)`; case "run-uat": return `${relSliceFile(base, mid!, sid!, "UAT-RESULT")} (UAT result)`; + case "validate-milestone": + return `${relMilestoneFile(base, mid!, "VALIDATION")} (milestone validation report)`; case "complete-milestone": return `${relMilestoneFile(base, mid!, "SUMMARY")} (milestone summary)`; default: @@ -537,6 +543,15 @@ export function buildLoopRemediationSteps(unitType: string, unitId: string, base ` 4. Resume auto-mode`, ].join("\n"); } + case "validate-milestone": { + if (!mid) break; + const artifactRel = relMilestoneFile(base, mid, "VALIDATION"); + return [ + ` 1. Write ${artifactRel} with verdict: pass`, + ` 2. Run \`gsd doctor\``, + ` 3. Resume auto-mode`, + ].join("\n"); + } default: break; } diff --git a/src/resources/extensions/gsd/complexity.ts b/src/resources/extensions/gsd/complexity.ts index 7fac93a73..c27c388be 100644 --- a/src/resources/extensions/gsd/complexity.ts +++ b/src/resources/extensions/gsd/complexity.ts @@ -87,6 +87,7 @@ const UNIT_TYPE_TIERS: Record = { "execute-task": "standard", "replan-slice": "heavy", "reassess-roadmap": "heavy", + "validate-milestone": "heavy", "complete-milestone": "standard", }; diff --git a/src/resources/extensions/gsd/doctor.ts b/src/resources/extensions/gsd/doctor.ts index ca28fbce0..cf26589ad 100644 --- a/src/resources/extensions/gsd/doctor.ts +++ b/src/resources/extensions/gsd/doctor.ts @@ -24,6 +24,7 @@ export type DoctorIssueCode = | "all_tasks_done_roadmap_not_checked" | "slice_checked_missing_summary" | "slice_checked_missing_uat" + | "all_slices_done_missing_milestone_validation" | "all_slices_done_missing_milestone_summary" | "task_done_must_haves_not_verified" | "active_requirement_missing_owner" @@ -1255,6 +1256,19 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; } } + // Milestone-level check: all slices done but no validation file + if (isMilestoneComplete(roadmap) && !resolveMilestoneFile(basePath, milestoneId, "VALIDATION") && !resolveMilestoneFile(basePath, milestoneId, "SUMMARY")) { + issues.push({ + severity: "info", + code: "all_slices_done_missing_milestone_validation", + scope: "milestone", + unitId: milestoneId, + message: `All slices are done but ${milestoneId}-VALIDATION.md is missing — milestone is in validating-milestone phase`, + file: relMilestoneFile(basePath, milestoneId, "VALIDATION"), + fixable: false, + }); + } + // Milestone-level check: all slices done but no milestone summary if (isMilestoneComplete(roadmap) && !resolveMilestoneFile(basePath, milestoneId, "SUMMARY")) { issues.push({ diff --git a/src/resources/extensions/gsd/preferences.ts b/src/resources/extensions/gsd/preferences.ts index 686a5f72d..65e77d13d 100644 --- a/src/resources/extensions/gsd/preferences.ts +++ b/src/resources/extensions/gsd/preferences.ts @@ -688,6 +688,7 @@ export function resolveProfileDefaults(profile: TokenProfile): Partial 0, prior validation found issues and remediation slices were added and executed — verify those remediation slices resolved the issues. All relevant context has been preloaded below — the roadmap, all slice summaries, UAT results, requirements, decisions, and project context are inlined. Start working immediately without re-reading these files. {{inlinedContext}} -If a `GSD Skill Preferences` block is present in system context, use it to decide which skills to load and follow during validation, without relaxing required verification or artifact rules. +## Validation Steps -Then: +1. For each **success criterion** in `{{roadmapPath}}`, check whether slice summaries and UAT results provide evidence that it was met. Record pass/fail per criterion. +2. For each **slice** in the roadmap, verify its demo/deliverable claim against its summary. Flag any slice whose summary does not substantiate its claimed output. +3. Check **cross-slice integration points** — do boundary map entries (produces/consumes) align with what was actually built? +4. Check **requirement coverage** — are all active requirements addressed by at least one slice? +5. Determine a verdict: + - `pass` — all criteria met, all slices delivered, no gaps + - `needs-attention` — minor gaps that do not block completion (document them) + - `needs-remediation` — material gaps found; add remediation slices to the roadmap -### Step 1: Audit Success Criteria +## Output -Enumerate each success criterion from the roadmap's `## Success Criteria` section. For each criterion, map it to concrete evidence from slice summaries, UAT results, or observable behavior. +Write `{{validationPath}}` with this structure: -Format each criterion as: +```markdown +--- +verdict: +remediation_round: {{remediationRound}} +--- -- `Criterion text` — **MET** — evidence: {{specific slice summary, UAT result, test output, or observable behavior}} -- `Criterion text` — **NOT MET** — gap: {{what's missing and why}} +# Milestone Validation: {{milestoneId}} -Every criterion must have a definitive verdict. Do not mark a criterion as MET without specific evidence. +## Success Criteria Checklist +- [x] Criterion 1 — evidence: ... +- [ ] Criterion 2 — gap: ... -### Step 2: Inventory Deferred Work +## Slice Delivery Audit +| Slice | Claimed | Delivered | Status | +|-------|---------|-----------|--------| +| S01 | ... | ... | pass | -Scan ALL slice summaries for: -- `Known Limitations` sections -- `Follow-ups` sections -- `Deviations` sections +## Cross-Slice Integration +(any boundary mismatches) -Scan ALL UAT results for: -- `Not Proven By This UAT` sections -- Any PARTIAL or FAIL verdicts +## Requirement Coverage +(any unaddressed requirements) -Check: -- `.gsd/REQUIREMENTS.md` for Active requirements not yet Validated -- `.gsd/CAPTURES.md` for unresolved deferred captures +## Verdict Rationale +(why this verdict was chosen) -Collect every item into a single inventory. Do not skip items because they seem minor — the classification step handles prioritization. +## Remediation Plan +(only if verdict is needs-remediation — list new slices to add to the roadmap) +``` -### Step 3: Classify Each Gap - -For every unmet criterion and every deferred work item, classify it as one of: - -- **auto-remediable** — can be fixed by adding a new slice (missing feature, unfixed bug, untested path, incomplete integration) -- **human-required** — needs Lex's input (design decision, external service dependency, manual verification, judgment call, ambiguous requirement) -- **acceptable** — known limitation that's OK to ship (documented trade-off, explicitly scoped for a future milestone, minor rough edge with no user impact) - -Be conservative with **auto-remediable**. Only classify a gap as auto-remediable if you're confident a slice can resolve it without human judgment. When in doubt, classify as **human-required**. - -### Step 4: Act on Gaps - -**If this is remediation round 0 AND auto-remediable gaps exist:** - -1. Define remediation slices to address auto-remediable gaps. Follow the exact roadmap slice format: - `- [ ] **S0X: Title** \`risk:medium\` \`depends:[]\`` - Include a brief description of what each slice must accomplish. -2. Append these slices to `{{roadmapPath}}` after existing slices (do not modify completed slices). -3. Update the boundary map in the roadmap if the new slices introduce new integration points. -4. Set verdict to `needs-remediation`. - -**If this is remediation round 1 or higher:** - -Do NOT add more slices. At this point either: -- All remaining gaps are acceptable — set verdict to `pass` -- Remaining gaps need Lex's input — set verdict to `needs-attention` - -Never add remediation slices after round 0. If round 0 remediation didn't close the gaps, escalate. - -**If no auto-remediable gaps exist (any round):** - -- If all criteria are MET and deferred items are acceptable or human-required only — set verdict to `pass` (with human-required items noted) -- If human-required items are blocking — set verdict to `needs-attention` - -### Step 5: Write Validation Report - -Write `{{validationPath}}` using the milestone-validation template. Fill all frontmatter fields and every section. The report must be a complete record of the validation — a future agent reading only this file should understand what was checked, what passed, and what remains. +If verdict is `needs-remediation`: +- Add new slices to `{{roadmapPath}}` with unchecked `[ ]` status +- These slices will be planned and executed before validation re-runs **You MUST write `{{validationPath}}` before finishing.** -When done, say: "Milestone {{milestoneId}} validated." +When done, say: "Milestone {{milestoneId}} validation complete — verdict: ." diff --git a/src/resources/extensions/gsd/state.ts b/src/resources/extensions/gsd/state.ts index 725f92e2f..33a16277c 100644 --- a/src/resources/extensions/gsd/state.ts +++ b/src/resources/extensions/gsd/state.ts @@ -53,6 +53,19 @@ export function isMilestoneComplete(roadmap: Roadmap): boolean { return roadmap.slices.length > 0 && roadmap.slices.every(s => s.done); } +/** + * Check whether a VALIDATION file's verdict is terminal (pass or needs-attention). + * A non-terminal verdict (needs-remediation) means validation must re-run + * after remediation slices are executed. + */ +export function isValidationTerminal(validationContent: string): boolean { + const match = validationContent.match(/^---\n([\s\S]*?)\n---/); + if (!match) return false; + const verdict = match[1].match(/verdict:\s*(\S+)/); + if (!verdict) return false; + return verdict[1] === 'pass' || verdict[1] === 'needs-attention'; +} + // ─── State Derivation ────────────────────────────────────────────────────── // ── deriveState memoization ───────────────────────────────────────────────── @@ -279,10 +292,20 @@ async function _deriveStateImpl(basePath: string): Promise { const complete = isMilestoneComplete(roadmap); if (complete) { - // All slices done — check if milestone summary exists + // All slices done — check validation and summary state + const validationFile = resolveMilestoneFile(basePath, mid, "VALIDATION"); + const validationContent = validationFile ? await cachedLoadFile(validationFile) : null; + const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false; const summaryFile = resolveMilestoneFile(basePath, mid, "SUMMARY"); - if (!summaryFile && !activeMilestoneFound) { - // All slices complete but no summary written yet → completing-milestone + + if (!validationTerminal && !activeMilestoneFound) { + // No terminal validation yet → validating-milestone + activeMilestone = { id: mid, title }; + activeRoadmap = roadmap; + activeMilestoneFound = true; + registry.push({ id: mid, title, status: 'active' }); + } else if (!summaryFile && !activeMilestoneFound) { + // Validated but no summary written yet → completing-milestone activeMilestone = { id: mid, title }; activeRoadmap = roadmap; activeMilestoneFound = true; @@ -385,12 +408,34 @@ async function _deriveStateImpl(basePath: string): Promise { }; } - // Check if active milestone needs completion (all slices done, no summary) + // Check if active milestone needs validation or completion (all slices done) if (isMilestoneComplete(activeRoadmap)) { + const validationFile = resolveMilestoneFile(basePath, activeMilestone.id, "VALIDATION"); + const validationContent = validationFile ? await cachedLoadFile(validationFile) : null; + const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false; const sliceProgress = { done: activeRoadmap.slices.length, total: activeRoadmap.slices.length, }; + + if (!validationTerminal) { + return { + activeMilestone, + activeSlice: null, + activeTask: null, + phase: 'validating-milestone', + recentDecisions: [], + blockers: [], + nextAction: `Validate milestone ${activeMilestone.id} before completion.`, + registry, + requirements, + progress: { + milestones: milestoneProgress, + slices: sliceProgress, + }, + }; + } + return { activeMilestone, activeSlice: null, diff --git a/src/resources/extensions/gsd/tests/auto-preflight.test.ts b/src/resources/extensions/gsd/tests/auto-preflight.test.ts index b89b675ef..eb421646c 100644 --- a/src/resources/extensions/gsd/tests/auto-preflight.test.ts +++ b/src/resources/extensions/gsd/tests/auto-preflight.test.ts @@ -17,6 +17,7 @@ writeFileSync(join(gsd, "milestones", "M001", "slices", "S01", "S01-PLAN.md"), ` writeFileSync(join(gsd, "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md"), `---\nid: T01\nparent: S01\nmilestone: M001\nprovides: []\nrequires: []\naffects: []\nkey_files: []\nkey_decisions: []\npatterns_established: []\nobservability_surfaces: []\ndrill_down_paths: []\nduration: 5m\nverification_result: passed\ncompleted_at: 2026-03-09T00:00:00Z\n---\n\n# T01: Old Task\n\n**Done**\n\n## What Happened\nDone.\n\n## Diagnostics\n- log\n`); writeFileSync(join(gsd, "milestones", "M001", "slices", "S01", "S01-SUMMARY.md"), `---\nid: S01\nparent: M001\nmilestone: M001\nprovides: []\nrequires: []\naffects: []\nkey_files: []\nkey_decisions: []\npatterns_established: []\nobservability_surfaces: []\ndrill_down_paths: []\nduration: 5m\nverification_result: passed\ncompleted_at: 2026-03-09T00:00:00Z\n---\n\n# S01: Old Slice\n\n**Done**\n\n## What Happened\nDone.\n\n## Verification\nDone.\n\n## Deviations\nNone\n\n## Known Limitations\nNone\n\n## Follow-ups\nNone\n\n## Files Created/Modified\n- \`x\` — x\n\n## Forward Intelligence\n\n### What the next slice should know\n- x\n\n### What's fragile\n- x\n\n### Authoritative diagnostics\n- x\n\n### What assumptions changed\n- x\n`); +writeFileSync(join(gsd, "milestones", "M001", "M001-VALIDATION.md"), `---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.\n`); writeFileSync(join(gsd, "milestones", "M001", "M001-SUMMARY.md"), `---\nid: M001\nstatus: complete\ncompleted_at: 2026-03-09T00:00:00Z\n---\n\n# M001: Historical\n\nComplete.\n`); writeFileSync(join(gsd, "milestones", "M009", "M009-ROADMAP.md"), `# M009: Active\n\n## Slices\n- [ ] **S01: Active Slice** \`risk:low\` \`depends:[]\`\n > After this: active works\n`); diff --git a/src/resources/extensions/gsd/tests/complete-milestone.test.ts b/src/resources/extensions/gsd/tests/complete-milestone.test.ts index cb1a7124a..31c77e054 100644 --- a/src/resources/extensions/gsd/tests/complete-milestone.test.ts +++ b/src/resources/extensions/gsd/tests/complete-milestone.test.ts @@ -45,6 +45,12 @@ function writeMilestoneSummary(base: string, mid: string, content: string): void writeFileSync(join(dir, `${mid}-SUMMARY.md`), content); } +function writeMilestoneValidation(base: string, mid: string, verdict: string = "pass"): void { + const dir = join(base, ".gsd", "milestones", mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-VALIDATION.md`), `---\nverdict: ${verdict}\nremediation_round: 0\n---\n\n# Validation\nValidated.`); +} + function cleanup(base: string): void { rmSync(base, { recursive: true, force: true }); } @@ -176,7 +182,8 @@ async function main(): Promise { const roadmap = parseRoadmap(roadmapContent!); assertTrue(isMilestoneComplete(roadmap), "isMilestoneComplete returns true when all slices are [x]"); - // Verify deriveState returns completing-milestone phase + // Verify deriveState returns completing-milestone phase (with validation already done) + writeMilestoneValidation(base, "M001"); const state = await deriveState(base); assertEq(state.phase, "completing-milestone", "deriveState returns completing-milestone when all slices done, no summary"); assertEq(state.activeMilestone?.id, "M001", "active milestone is M001"); diff --git a/src/resources/extensions/gsd/tests/derive-state-db.test.ts b/src/resources/extensions/gsd/tests/derive-state-db.test.ts index 58391f028..684302731 100644 --- a/src/resources/extensions/gsd/tests/derive-state-db.test.ts +++ b/src/resources/extensions/gsd/tests/derive-state-db.test.ts @@ -310,6 +310,7 @@ async function main(): Promise { mkdirSync(join(base, '.gsd', 'milestones', 'M001'), { recursive: true }); mkdirSync(join(base, '.gsd', 'milestones', 'M002'), { recursive: true }); writeFile(base, 'milestones/M001/M001-ROADMAP.md', completedRoadmap); + writeFile(base, 'milestones/M001/M001-VALIDATION.md', `---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.`); writeFile(base, 'milestones/M001/M001-SUMMARY.md', summaryContent); writeFile(base, 'milestones/M002/M002-ROADMAP.md', activeRoadmap); diff --git a/src/resources/extensions/gsd/tests/derive-state-deps.test.ts b/src/resources/extensions/gsd/tests/derive-state-deps.test.ts index 42b07619c..12b75c232 100644 --- a/src/resources/extensions/gsd/tests/derive-state-deps.test.ts +++ b/src/resources/extensions/gsd/tests/derive-state-deps.test.ts @@ -26,6 +26,12 @@ function writeMilestoneSummary(base: string, mid: string, content: string): void writeFileSync(join(dir, `${mid}-SUMMARY.md`), content); } +function writeMilestoneValidation(base: string, mid: string): void { + const dir = join(base, '.gsd', 'milestones', mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-VALIDATION.md`), `---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.`); +} + /** * Creates M00x-CONTEXT.md with a valid YAML frontmatter block. * frontmatter is the raw YAML lines between the --- delimiters. @@ -120,6 +126,7 @@ async function main(): Promise { - [x] **S01: Done** \`risk:low\` \`depends:[]\` > After this: Done. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', '# M001 Summary\n\nFirst milestone is complete.'); // M002: depends on M001, now unblocked @@ -252,6 +259,7 @@ async function main(): Promise { - [x] **S01: Done** \`risk:low\` \`depends:[]\` > After this: Done. `); + writeMilestoneValidation(base, 'M002'); writeMilestoneSummary(base, 'M002', '# M002 Summary\n\nSecond milestone is complete.'); const state = await deriveState(base); @@ -321,6 +329,7 @@ async function main(): Promise { - [x] **S01: Done** \`risk:low\` \`depends:[]\` > After this: Done. `); + writeMilestoneValidation(base, 'M004-0zjrg0'); writeMilestoneSummary(base, 'M004-0zjrg0', '# M004-0zjrg0 Summary\n\nComplete.'); // M005-b0m2hl: depends on M004-0zjrg0 (lowercase hex suffix) diff --git a/src/resources/extensions/gsd/tests/derive-state-draft.test.ts b/src/resources/extensions/gsd/tests/derive-state-draft.test.ts index 72b980a93..19ddc8247 100644 --- a/src/resources/extensions/gsd/tests/derive-state-draft.test.ts +++ b/src/resources/extensions/gsd/tests/derive-state-draft.test.ts @@ -54,6 +54,12 @@ function writeMilestoneSummary(base: string, mid: string, content: string): void writeFileSync(join(dir, `${mid}-SUMMARY.md`), content); } +function writeMilestoneValidation(base: string, mid: string): void { + const dir = join(base, '.gsd', 'milestones', mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-VALIDATION.md`), `---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.`); +} + function cleanup(base: string): void { rmSync(base, { recursive: true, force: true }); } @@ -143,6 +149,7 @@ async function main(): Promise { - [x] **S01: Done** \`risk:low\` \`depends:[]\` > After this: Done. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', '# M001 Summary\n\nFirst milestone complete.'); // M002: only CONTEXT-DRAFT.md @@ -178,6 +185,7 @@ async function main(): Promise { - [x] **S01: Done** \`risk:low\` \`depends:[]\` > After this: Done. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', '# M001 Summary\n\nComplete.'); // M002: draft only — should become active with needs-discussion diff --git a/src/resources/extensions/gsd/tests/derive-state.test.ts b/src/resources/extensions/gsd/tests/derive-state.test.ts index 6c97d31c0..20f21153d 100644 --- a/src/resources/extensions/gsd/tests/derive-state.test.ts +++ b/src/resources/extensions/gsd/tests/derive-state.test.ts @@ -38,6 +38,12 @@ function writeMilestoneSummary(base: string, mid: string, content: string): void writeFileSync(join(dir, `${mid}-SUMMARY.md`), content); } +function writeMilestoneValidation(base: string, mid: string, verdict: string = 'pass'): void { + const dir = join(base, '.gsd', 'milestones', mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-VALIDATION.md`), `---\nverdict: ${verdict}\nremediation_round: 0\n---\n\n# Validation\nValidated.`); +} + function writeRequirements(base: string, content: string): void { writeFileSync(join(base, '.gsd', 'REQUIREMENTS.md'), content); } @@ -285,6 +291,7 @@ Continue from step 2. > After this: Done. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', `# M001 Summary\n\nMilestone complete.`); const state = await deriveState(base); @@ -381,6 +388,7 @@ Continue from step 2. > After this: Done. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', `# M001 Summary\n\nFirst milestone complete.`); // M002: active (has incomplete slices) @@ -486,6 +494,8 @@ Continue from step 2. > After this: S02 complete. `); + writeMilestoneValidation(base, 'M001'); + const state = await deriveState(base); assertEq(state.phase, 'completing-milestone', 'completing-ms: phase is completing-milestone'); @@ -521,6 +531,7 @@ Continue from step 2. > After this: Done. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', `# M001 Summary\n\nMilestone is complete.`); const state = await deriveState(base); @@ -550,6 +561,7 @@ Continue from step 2. - [x] **S01: Done** \`risk:low\` \`depends:[]\` > After this: Done. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', `# M001 Summary\n\nFirst milestone complete.`); // M002: all slices done, no summary → completing-milestone @@ -566,6 +578,8 @@ Continue from step 2. > After this: Done. `); + writeMilestoneValidation(base, 'M002'); + // M003: has incomplete slices → pending (M002 is active) writeRoadmap(base, 'M003', `# M003: Third Milestone diff --git a/src/resources/extensions/gsd/tests/integration-mixed-milestones.test.ts b/src/resources/extensions/gsd/tests/integration-mixed-milestones.test.ts index b01fed2bb..4cec135ce 100644 --- a/src/resources/extensions/gsd/tests/integration-mixed-milestones.test.ts +++ b/src/resources/extensions/gsd/tests/integration-mixed-milestones.test.ts @@ -51,6 +51,12 @@ function writeMilestoneSummary(base: string, mid: string, content: string): void writeFileSync(join(dir, `${mid}-SUMMARY.md`), content); } +function writeMilestoneValidation(base: string, mid: string): void { + const dir = join(base, '.gsd', 'milestones', mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-VALIDATION.md`), `---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.`); +} + function cleanup(base: string): void { rmSync(base, { recursive: true, force: true }); } @@ -166,6 +172,7 @@ async function main(): Promise { Did it. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', `# M001: Legacy Feature Summary **One-liner summary** @@ -265,6 +272,7 @@ Everything worked. Did it. `); + writeMilestoneValidation(base, 'M001'); writeMilestoneSummary(base, 'M001', `# M001: Legacy Feature Summary **One-liner summary** diff --git a/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts b/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts index bb6233b74..f86dae777 100644 --- a/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts +++ b/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts @@ -263,12 +263,12 @@ async function main(): Promise { // No REQUIREMENTS.md since empty requirements assertTrue(!existsSync(join(base, '.gsd', 'REQUIREMENTS.md')), 'complete: REQUIREMENTS.md NOT written (empty)'); - // deriveState: all slices done, all tasks done — needs milestone summary for 'complete' - // Without milestone summary, it should be 'completing-milestone' or 'summarizing' + // deriveState: all slices done, all tasks done — needs validation then milestone summary + // Without VALIDATION file, it should be 'validating-milestone' const state = await deriveState(base); - // All slices are done in roadmap. Milestone summary doesn't exist. - // deriveState should return 'completing-milestone' since all slices done but no milestone summary. - assertEq(state.phase, 'completing-milestone', 'complete: deriveState phase is completing-milestone'); + // All slices are done in roadmap. No VALIDATION or SUMMARY exists. + // deriveState should return 'validating-milestone' since validation gate precedes completion. + assertEq(state.phase, 'validating-milestone', 'complete: deriveState phase is validating-milestone'); assertTrue(state.activeMilestone !== null, 'complete: deriveState has activeMilestone'); assertEq(state.activeMilestone!.id, 'M001', 'complete: deriveState activeMilestone is M001'); diff --git a/src/resources/extensions/gsd/tests/queue-reorder-e2e.test.ts b/src/resources/extensions/gsd/tests/queue-reorder-e2e.test.ts index 1077e70b1..b9140c561 100644 --- a/src/resources/extensions/gsd/tests/queue-reorder-e2e.test.ts +++ b/src/resources/extensions/gsd/tests/queue-reorder-e2e.test.ts @@ -58,6 +58,7 @@ function writeCompleteMilestone(base: string, mid: string): void { - [x] **S01: Done** \`risk:low\` \`depends:[]\` > After this: Done. `); + writeFileSync(join(dir, `${mid}-VALIDATION.md`), `---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.`); writeFileSync(join(dir, `${mid}-SUMMARY.md`), `# ${mid} Summary\n\nComplete.`); } diff --git a/src/resources/extensions/gsd/tests/validate-milestone.test.ts b/src/resources/extensions/gsd/tests/validate-milestone.test.ts new file mode 100644 index 000000000..d0e0f4c2d --- /dev/null +++ b/src/resources/extensions/gsd/tests/validate-milestone.test.ts @@ -0,0 +1,316 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdirSync, writeFileSync, existsSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { randomUUID } from "node:crypto"; + +import { deriveState, isValidationTerminal } from "../state.ts"; +import { resolveExpectedArtifactPath, verifyExpectedArtifact, diagnoseExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.ts"; +import { resolveDispatch, type DispatchContext } from "../auto-dispatch.ts"; +import type { GSDState } from "../types.ts"; +import { clearPathCache } from "../paths.ts"; +import { clearParseCache } from "../files.ts"; + +// ─── Helpers ────────────────────────────────────────────────────────────── + +function makeTmpBase(): string { + const base = join(tmpdir(), `gsd-val-test-${randomUUID()}`); + mkdirSync(join(base, ".gsd", "milestones"), { recursive: true }); + return base; +} + +function cleanup(base: string): void { + clearPathCache(); + clearParseCache(); + try { rmSync(base, { recursive: true, force: true }); } catch { /* */ } +} + +function writeRoadmap(base: string, mid: string, content: string): void { + const dir = join(base, ".gsd", "milestones", mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-ROADMAP.md`), content); +} + +function writeMilestoneSummary(base: string, mid: string, content: string): void { + const dir = join(base, ".gsd", "milestones", mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-SUMMARY.md`), content); +} + +function writeValidation(base: string, mid: string, content: string): void { + const dir = join(base, ".gsd", "milestones", mid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${mid}-VALIDATION.md`), content); +} + +function writeSlicePlan(base: string, mid: string, sid: string, content: string): void { + const dir = join(base, ".gsd", "milestones", mid, "slices", sid); + mkdirSync(join(dir, "tasks"), { recursive: true }); + writeFileSync(join(dir, `${sid}-PLAN.md`), content); +} + +function writeSliceSummary(base: string, mid: string, sid: string, content: string): void { + const dir = join(base, ".gsd", "milestones", mid, "slices", sid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${sid}-SUMMARY.md`), content); +} + +const ALL_DONE_ROADMAP = `# M001: Test Milestone + +## Vision +Test + +## Success Criteria +- It works + +## Slices + +- [x] **S01: First slice** \`risk:low\` \`depends:[]\` + > After this: it works + +## Boundary Map + +| From | To | Produces | Consumes | +|------|-----|----------|----------| +| S01 | terminal | output | nothing | +`; + +const CONTEXT_FILE = `--- +id: M001 +title: Test Milestone +--- + +# Context +Test context. +`; + +// ─── isValidationTerminal ───────────────────────────────────────────────── + +test("isValidationTerminal returns true for verdict: pass", () => { + const content = "---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation"; + assert.equal(isValidationTerminal(content), true); +}); + +test("isValidationTerminal returns true for verdict: needs-attention", () => { + const content = "---\nverdict: needs-attention\nremediation_round: 0\n---\n\n# Validation"; + assert.equal(isValidationTerminal(content), true); +}); + +test("isValidationTerminal returns false for verdict: needs-remediation", () => { + const content = "---\nverdict: needs-remediation\nremediation_round: 0\n---\n\n# Validation"; + assert.equal(isValidationTerminal(content), false); +}); + +test("isValidationTerminal returns false for missing frontmatter", () => { + const content = "# Validation\nNo frontmatter here."; + assert.equal(isValidationTerminal(content), false); +}); + +test("isValidationTerminal returns false for missing verdict field", () => { + const content = "---\nremediation_round: 0\n---\n\n# Validation"; + assert.equal(isValidationTerminal(content), false); +}); + +// ─── deriveState: validating-milestone ──────────────────────────────────── + +test("deriveState returns validating-milestone when all slices done and no VALIDATION file", async () => { + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", ALL_DONE_ROADMAP); + // Write CONTEXT so milestone has a title + const dir = join(base, ".gsd", "milestones", "M001"); + writeFileSync(join(dir, "M001-CONTEXT.md"), CONTEXT_FILE); + + const state = await deriveState(base); + assert.equal(state.phase, "validating-milestone"); + assert.equal(state.activeMilestone?.id, "M001"); + assert.equal(state.activeSlice, null); + } finally { + cleanup(base); + } +}); + +test("deriveState returns completing-milestone when VALIDATION exists with terminal verdict", async () => { + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", ALL_DONE_ROADMAP); + writeValidation(base, "M001", "---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nAll good."); + + const state = await deriveState(base); + assert.equal(state.phase, "completing-milestone"); + assert.equal(state.activeMilestone?.id, "M001"); + } finally { + cleanup(base); + } +}); + +test("deriveState returns validating-milestone when VALIDATION exists with needs-remediation verdict", async () => { + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", ALL_DONE_ROADMAP); + writeValidation(base, "M001", "---\nverdict: needs-remediation\nremediation_round: 0\n---\n\n# Validation\nNeeds fixes."); + + const state = await deriveState(base); + assert.equal(state.phase, "validating-milestone"); + assert.equal(state.activeMilestone?.id, "M001"); + } finally { + cleanup(base); + } +}); + +test("deriveState returns complete when both VALIDATION and SUMMARY exist", async () => { + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", ALL_DONE_ROADMAP); + writeValidation(base, "M001", "---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed."); + writeMilestoneSummary(base, "M001", "# Summary\nDone."); + + const state = await deriveState(base); + assert.equal(state.phase, "complete"); + } finally { + cleanup(base); + } +}); + +// ─── Dispatch rule ──────────────────────────────────────────────────────── + +test("dispatch rule matches validating-milestone phase", async () => { + const state: GSDState = { + activeMilestone: { id: "M001", title: "Test" }, + activeSlice: null, + activeTask: null, + phase: "validating-milestone", + recentDecisions: [], + blockers: [], + nextAction: "Validate milestone M001.", + registry: [{ id: "M001", title: "Test", status: "active" }], + progress: { milestones: { done: 0, total: 1 } }, + }; + + const base = makeTmpBase(); + try { + // Set up minimal milestone structure for the prompt builder + writeRoadmap(base, "M001", ALL_DONE_ROADMAP); + + const ctx: DispatchContext = { + basePath: base, + mid: "M001", + midTitle: "Test", + state, + prefs: undefined, + }; + const result = await resolveDispatch(ctx); + assert.equal(result.action, "dispatch"); + if (result.action === "dispatch") { + assert.equal(result.unitType, "validate-milestone"); + assert.equal(result.unitId, "M001"); + } + } finally { + cleanup(base); + } +}); + +test("dispatch rule skips when skip_milestone_validation preference is set", async () => { + const state: GSDState = { + activeMilestone: { id: "M001", title: "Test" }, + activeSlice: null, + activeTask: null, + phase: "validating-milestone", + recentDecisions: [], + blockers: [], + nextAction: "Validate milestone M001.", + registry: [{ id: "M001", title: "Test", status: "active" }], + progress: { milestones: { done: 0, total: 1 } }, + }; + + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", ALL_DONE_ROADMAP); + + const ctx: DispatchContext = { + basePath: base, + mid: "M001", + midTitle: "Test", + state, + prefs: { phases: { skip_milestone_validation: true } }, + }; + const result = await resolveDispatch(ctx); + assert.equal(result.action, "skip"); + + // Verify the VALIDATION file was written + const validationPath = join(base, ".gsd", "milestones", "M001", "M001-VALIDATION.md"); + assert.ok(existsSync(validationPath), "VALIDATION file should be written on skip"); + } finally { + cleanup(base); + } +}); + +// ─── Artifact resolution & verification ─────────────────────────────────── + +test("resolveExpectedArtifactPath returns VALIDATION path for validate-milestone", () => { + const base = makeTmpBase(); + try { + mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true }); + const result = resolveExpectedArtifactPath("validate-milestone", "M001", base); + assert.ok(result); + assert.ok(result!.includes("VALIDATION")); + } finally { + cleanup(base); + } +}); + +test("verifyExpectedArtifact passes when VALIDATION.md exists", () => { + const base = makeTmpBase(); + try { + writeValidation(base, "M001", "---\nverdict: pass\n---\n# Val"); + clearPathCache(); + clearParseCache(); + const result = verifyExpectedArtifact("validate-milestone", "M001", base); + assert.equal(result, true); + } finally { + cleanup(base); + } +}); + +test("verifyExpectedArtifact fails when VALIDATION.md is missing", () => { + const base = makeTmpBase(); + try { + mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true }); + clearPathCache(); + clearParseCache(); + const result = verifyExpectedArtifact("validate-milestone", "M001", base); + assert.equal(result, false); + } finally { + cleanup(base); + } +}); + +// ─── diagnoseExpectedArtifact ───────────────────────────────────────────── + +test("diagnoseExpectedArtifact returns validation path for validate-milestone", () => { + const base = makeTmpBase(); + try { + const result = diagnoseExpectedArtifact("validate-milestone", "M001", base); + assert.ok(result); + assert.ok(result!.includes("VALIDATION")); + assert.ok(result!.includes("milestone validation report")); + } finally { + cleanup(base); + } +}); + +// ─── buildLoopRemediationSteps ──────────────────────────────────────────── + +test("buildLoopRemediationSteps returns steps for validate-milestone", () => { + const base = makeTmpBase(); + try { + const result = buildLoopRemediationSteps("validate-milestone", "M001", base); + assert.ok(result); + assert.ok(result!.includes("VALIDATION")); + assert.ok(result!.includes("verdict: pass")); + assert.ok(result!.includes("gsd doctor")); + } finally { + cleanup(base); + } +}); diff --git a/src/resources/extensions/gsd/types.ts b/src/resources/extensions/gsd/types.ts index 49da86004..add4f09d7 100644 --- a/src/resources/extensions/gsd/types.ts +++ b/src/resources/extensions/gsd/types.ts @@ -5,7 +5,7 @@ // ─── Enums & Literal Unions ──────────────────────────────────────────────── export type RiskLevel = 'low' | 'medium' | 'high'; -export type Phase = 'pre-planning' | 'needs-discussion' | 'discussing' | 'researching' | 'planning' | 'executing' | 'verifying' | 'summarizing' | 'advancing' | 'completing-milestone' | 'replanning-slice' | 'complete' | 'paused' | 'blocked'; +export type Phase = 'pre-planning' | 'needs-discussion' | 'discussing' | 'researching' | 'planning' | 'executing' | 'verifying' | 'summarizing' | 'advancing' | 'validating-milestone' | 'completing-milestone' | 'replanning-slice' | 'complete' | 'paused' | 'blocked'; export type ContinueStatus = 'in_progress' | 'interrupted' | 'compacted'; // ─── Roadmap (Milestone-level) ───────────────────────────────────────────── @@ -264,6 +264,7 @@ export interface PhaseSkipPreferences { skip_research?: boolean; skip_reassess?: boolean; skip_slice_research?: boolean; + skip_milestone_validation?: boolean; } export interface NotificationPreferences { From 50d6a52a2a5daf7142918422b97fb17b0882dfed Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Mon, 16 Mar 2026 18:57:02 -0600 Subject: [PATCH 6/7] docs: update changelog for v2.23.0 --- CHANGELOG.md | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cc05fb7d..a50e09286 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,16 +6,34 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +## [2.23.0] - 2026-03-16 + ### Added -- **`gsd sessions`** — interactive session picker: lists all saved sessions for the current directory with date, message count, and first-message preview; lets you pick one to resume. Compare with `--continue` which always resumes the most recent session. (#721) -- **10 new browser tools** — shipped from the #698 feature additions: `browser_save_pdf`, `browser_save_state`, `browser_restore_state`, `browser_mock_route`, `browser_block_urls`, `browser_clear_routes`, `browser_emulate_device`, `browser_extract`, `browser_visual_diff`, `browser_zoom_region`, `browser_generate_test`, `browser_check_injection`, `browser_action_cache` (#698) +- **VS Code extension** — full extension with chat participant, RPC integration, marketplace publishing under FluxLabs publisher +- **`gsd headless`** — redesigned headless mode for full workflow orchestration: auto-responds to prompts, detects completion, supports `--json` output and `--timeout` flags +- **`gsd sessions`** — interactive session picker for browsing and resuming saved sessions (#721) +- **10 new browser tools** — `browser_save_pdf`, `browser_save_state`, `browser_restore_state`, `browser_mock_route`, `browser_block_urls`, `browser_clear_routes`, `browser_emulate_device`, `browser_extract`, `browser_visual_diff`, `browser_zoom_region`, `browser_generate_test`, `browser_check_injection`, `browser_action_cache` (#698) +- **Structured discussion rounds** — `ask_user_questions` in guided-discuss-milestone for better requirement gathering (#688) +- **`validate-milestone` prompt** — milestone validation prompt and template +- **`models.json` resolution** — custom model definitions with fallback to `~/.pi/agent/models.json` + +### Changed +- **Background shell performance** — optimized hot path with parallel git queries and lazy workspace validation ### Fixed -- Shift-Tab now navigates to previous tab in the workflow visualizer (#717) -- Capture resolutions are now executed after triage instead of only being classified (#714) -- Screenshot constraining uses independent width/height caps to prevent squishing (#725) -- `auto.lock` is written at process startup; remote sessions are now detected in the dashboard (#723) -- Cross-platform test compatibility: use `process.ppid` instead of PID 1 +- Forensics uses `GSD_VERSION` env var instead of fragile package.json path traversal; now worktree-aware to prevent stale root misdiagnosis +- Background commands rewritten to prevent pipe-open hang; stalled-tool detection added with prompt guidance +- Auto mode breaks infinite skip loop on repeatedly-skipped completed units +- Roadmap parser expands range syntax in depends (e.g. `S01-S04` → `S01,S02,S03,S04`) +- Empty scaffold plan files rejected during plan-slice artifact verification (#699) +- Anti-pattern rule prevents `bash &` usage that causes agent hangs (#733) +- Shift-Tab navigates to previous tab in workflow visualizer (#717) +- Capture resolutions executed after triage instead of only classified (#714) +- Screenshot constraining uses independent width/height caps (#725) +- `auto.lock` written at startup; remote sessions detected in dashboard (#723) +- Cross-platform test compatibility with `process.ppid` +- CSP nonce, dead branch cleanup, restart cooldown fixes +- CI fix: `pi.getActiveTools()` replaces `ctx.getActiveTools()` ## [2.22.0] - 2026-03-16 @@ -872,7 +890,8 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Changed - License updated to MIT -[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.22.0...HEAD +[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.23.0...HEAD +[2.23.0]: https://github.com/gsd-build/gsd-2/compare/v2.22.0...v2.23.0 [2.21.0]: https://github.com/gsd-build/gsd-2/compare/v2.20.0...v2.21.0 [2.19.0]: https://github.com/gsd-build/gsd-2/compare/v2.18.0...v2.19.0 [2.18.0]: https://github.com/gsd-build/gsd-2/compare/v2.17.0...v2.18.0 From d91690bb44a08d48f727891664ebca095dd44d72 Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Mon, 16 Mar 2026 18:57:13 -0600 Subject: [PATCH 7/7] 2.23.0 --- native/npm/darwin-arm64/package.json | 2 +- native/npm/darwin-x64/package.json | 2 +- native/npm/linux-arm64-gnu/package.json | 2 +- native/npm/linux-x64-gnu/package.json | 2 +- native/npm/win32-x64-msvc/package.json | 2 +- package.json | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/native/npm/darwin-arm64/package.json b/native/npm/darwin-arm64/package.json index 13b51ea07..96ed100cf 100644 --- a/native/npm/darwin-arm64/package.json +++ b/native/npm/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-darwin-arm64", - "version": "2.22.0", + "version": "2.23.0", "description": "GSD native engine binary for macOS ARM64", "os": [ "darwin" diff --git a/native/npm/darwin-x64/package.json b/native/npm/darwin-x64/package.json index cb50df8d2..3fcfc4a74 100644 --- a/native/npm/darwin-x64/package.json +++ b/native/npm/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-darwin-x64", - "version": "2.22.0", + "version": "2.23.0", "description": "GSD native engine binary for macOS Intel", "os": [ "darwin" diff --git a/native/npm/linux-arm64-gnu/package.json b/native/npm/linux-arm64-gnu/package.json index 4184b4641..122a867de 100644 --- a/native/npm/linux-arm64-gnu/package.json +++ b/native/npm/linux-arm64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-linux-arm64-gnu", - "version": "2.22.0", + "version": "2.23.0", "description": "GSD native engine binary for Linux ARM64 (glibc)", "os": [ "linux" diff --git a/native/npm/linux-x64-gnu/package.json b/native/npm/linux-x64-gnu/package.json index 27b013b60..23fd4e544 100644 --- a/native/npm/linux-x64-gnu/package.json +++ b/native/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-linux-x64-gnu", - "version": "2.22.0", + "version": "2.23.0", "description": "GSD native engine binary for Linux x64 (glibc)", "os": [ "linux" diff --git a/native/npm/win32-x64-msvc/package.json b/native/npm/win32-x64-msvc/package.json index 0da467200..e2af9b842 100644 --- a/native/npm/win32-x64-msvc/package.json +++ b/native/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-win32-x64-msvc", - "version": "2.22.0", + "version": "2.23.0", "description": "GSD native engine binary for Windows x64 (MSVC)", "os": [ "win32" diff --git a/package.json b/package.json index 896dcaa4b..dd88859f5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gsd-pi", - "version": "2.22.0", + "version": "2.23.0", "description": "GSD — Get Shit Done coding agent", "license": "MIT", "repository": {