diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 17351ebb2..e14add275 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -155,7 +155,7 @@ jobs: run: npm run test:coverage windows-portability: - timeout-minutes: 15 + timeout-minutes: 25 needs: detect-changes if: >- needs.detect-changes.outputs.docs-only != 'true' @@ -180,12 +180,17 @@ jobs: - name: Typecheck extensions run: npm run typecheck:extensions - - name: Run unit tests - run: npm run test:unit - - name: Run package tests run: npm run test:packages + - name: Run Windows portability tests + run: >- + node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs + --experimental-strip-types --test + src/tests/windows-portability.test.ts + src/resources/extensions/gsd/tests/validate-directory.test.ts + src/tests/integration/web-mode-windows-hide.test.ts + rtk-portability: timeout-minutes: 20 needs: detect-changes diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e79c71de..a89f4f0b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,82 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +## [2.71.0] - 2026-04-11 + +### Added +- **mcp-server**: add secure_env_collect tool via MCP form elicitation + +### Fixed +- **tui**: clear pinned output on message_end to prevent duplicate display +- **tui**: clear pinned latest output on turn completion +- **tui**: restore pinned output above editor during tool execution +- TOCTOU file locking race conditions in event log and custom workflow graph +- **tui**: mask secure extension input values in interactive mode +- **claude-code**: harden MCP elicitation schema handling +- **claude-code**: accept secure_env_collect MCP elicitation forms +- **interactive**: keep MCP tool output ordered and restore secure prompt fallback +- **interactive**: preserve MCP tool output stream ordering +- **gsd**: resolve workflow MCP test typing regressions +- **mcp**: return isError flag on workflow tool execution failures +- **discuss**: add structuredQuestionsAvailable conditional to all gates +- **discuss**: add multi-round questioning to new-project discuss phase +- **gsd**: harden claude-code workflow MCP bootstrap +- **web**: drop provisional pre-tool question text + +### Changed +- extract deriveStateFromDb logic into composable helpers +- **pr**: drop web-layer changes from MCP stream-order fix + +## [2.70.1] - 2026-04-11 + +### Fixed +- **routing**: address codex review — complete interactive bypass and accurate banner +- **routing**: skip dynamic routing for interactive dispatches, always show model changes (#3962) +- **ci**: trim windows portability integration load +- **ci**: narrow windows portability coverage +- **ci**: skip validate-pack in windows portability job +- **ci**: unblock windows portability follow-up +- **windows**: harden portability across runtime and tooling +- **auto**: use pathToFileURL for cross-platform import and reconcile regression test +- **auto**: resolve resource-loader.js from GSD_PKG_ROOT on resume (#3949) +- **mcp-server**: importLocalModule resolves src/ paths from dist/ context +- **gsd**: surface scoped doctor health warnings +- **gsd**: skip skipped slices in milestone prompts +- **gsd**: handle doubled-backtick pre-exec paths +- **update**: fetch latest version from registry + +## [2.70.0] - 2026-04-10 + +### Added +- **mcp-server**: expose ask_user_questions via elicitation + +### Fixed +- **pi-ai**: remove Anthropic OAuth flow for TOS compliance +- **mcp-server**: hydrate model credentials into env +- **mcp-server**: hydrate stored tool credentials on startup +- **gsd**: auto-enable cmux when detected instead of prompting +- **mcp-server**: URL scheme regex no longer matches Windows drive letters + +## [2.69.0] - 2026-04-10 + +### Added +- **gsd**: implement ADR-005 multi-model provider and tool strategy +- **gsd**: complete ADR-004 capability-aware model routing implementation + +### Fixed +- **gsd**: add missing directories to codebase generator exclude list +- **gsd**: wire ADR-005 infrastructure into live paths +- **gsd**: replace empty catch with logWarning for CI compliance +- **gsd**: merge enhanced context sections into standard template, clean up stale gate patterns +- **gsd**: remove broken discuss-prepared template, inject briefs into discuss.md + +## [2.68.1] - 2026-04-10 + +### Fixed +- **ci**: update FILE-SYSTEM-MAP.md path after docs reorganization +- **test**: update discord invite test path after docs reorganization +- **gsd**: resolve resource-loader import for deployed extensions + ## [2.68.0] - 2026-04-10 ### Added @@ -2664,7 +2740,12 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Changed - License updated to MIT -[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.68.0...HEAD +[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.71.0...HEAD +[2.71.0]: https://github.com/gsd-build/gsd-2/compare/v2.70.1...v2.71.0 +[2.70.1]: https://github.com/gsd-build/gsd-2/compare/v2.70.0...v2.70.1 +[2.70.0]: https://github.com/gsd-build/gsd-2/compare/v2.69.0...v2.70.0 +[2.69.0]: https://github.com/gsd-build/gsd-2/compare/v2.68.1...v2.69.0 +[2.68.1]: https://github.com/gsd-build/gsd-2/compare/v2.68.0...v2.68.1 [2.68.0]: https://github.com/gsd-build/gsd-2/compare/v2.67.0...v2.68.0 [2.67.0]: https://github.com/gsd-build/gsd-2/compare/v2.66.1...v2.67.0 [2.66.1]: https://github.com/gsd-build/gsd-2/compare/v2.66.0...v2.66.1 diff --git a/README.md b/README.md index a906da402..1af83f33a 100644 --- a/README.md +++ b/README.md @@ -21,42 +21,49 @@ One command. Walk away. Come back to a built project with clean git history. > GSD now provisions a managed [RTK](https://github.com/rtk-ai/rtk) binary on supported macOS, Linux, and Windows installs to compress shell-command output in `bash`, `async_bash`, `bg_shell`, and verification flows. GSD forces `RTK_TELEMETRY_DISABLED=1` for all managed invocations. Set `GSD_RTK_DISABLED=1` to disable the integration. -> **📋 NOTICE: New to Node on Mac?** If you installed Node.js via Homebrew, you may be running a development release instead of LTS. **[Read this guide](./docs/node-lts-macos.md)** to pin Node 24 LTS and avoid compatibility issues. +> **📋 NOTICE: New to Node on Mac?** If you installed Node.js via Homebrew, you may be running a development release instead of LTS. **[Read this guide](./docs/user-docs/node-lts-macos.md)** to pin Node 24 LTS and avoid compatibility issues. --- -## What's New in v2.68 +## What's New in v2.71 -### MCP Workflow Tools +### MCP Secure Env Collect -- **Full workflow over MCP** — slice replanning, milestone management, slice completion, task completion, and core planning tools are now exposed over MCP for external integrations. -- **Transport-gated MCP** — workflow tool availability adapts to provider transport capabilities automatically. -- **Write gate enforcement** — workflow MCP respects write gates, preventing unauthorized state mutations from external clients. +- **Secure credential collection over MCP** — the new `secure_env_collect` tool uses MCP form elicitation to collect secrets (API keys, tokens) from external clients without exposing values in tool output. Masks input in interactive mode. +- **Hardened elicitation schema** — MCP elicitation schema handling is stricter, with proper validation and fallback for providers that don't support forms. -### Reliability & Recovery +### MCP Reliability -- **False degraded-mode fix** — eliminates spurious degraded-mode warnings when the DB hasn't been initialized yet. -- **Stale session resume suppression** — prevents stale interrupted-session resume prompts from hijacking fresh sessions. -- **Merge conflict recovery** — `autoCommitDirtyState` guarded with cwd restore on `MergeConflictError`. -- **Auto-resume hardening** — `autoStartTime` restored on resume, managed resources resynced on auto resume. +- **Stream ordering preserved** — MCP tool output now renders in the correct order, fixing interleaved output in Claude Code and other MCP clients. +- **isError flag propagation** — workflow tool execution failures now correctly return `isError: true`, so MCP clients can distinguish success from failure. +- **Multi-round discuss questions** — new-project discuss phase supports multi-round questioning with structured question gates. -### TUI & Developer Experience +### TUI Fixes -- **Contextual tips system** — TUI and web terminal now surface contextual tips based on workflow state. -- **Claude Code MCP streaming** — real-time streaming and tool output rendering for Claude Code MCP connections. +- **Pinned output restored** — pinned output bar displays above the editor during tool execution again. +- **Turn completion cleanup** — pinned latest output is cleared on turn completion, preventing stale output from persisting. +- **Secure input masking** — extension input values are masked in interactive mode when collecting secrets. -### Infrastructure +### Reliability & Internals -- **Weekly model registry refresh** — CI workflow auto-regenerates the model registry on a weekly schedule. -- **Codebase cache auto-refresh** — stale codebase cache is refreshed automatically without manual intervention. +- **TOCTOU file locking** — race conditions in event log and custom workflow graph file locking are fixed with proper atomic lock acquisition. +- **State derive refactor** — `deriveStateFromDb` god function extracted into composable, testable helpers. +- **Windows portability** — hardened cross-platform portability across runtime, tooling, and CI. +- **Model routing transparency** — dynamic routing is skipped for interactive dispatches; model changes are always shown in the banner. +- **Capability-aware routing (ADR-004)** — full implementation of capability scoring, `before_model_select` hook, and task metadata extraction. +- **Multi-model provider strategy (ADR-005)** — infrastructure for multi-provider model selection wired into live paths. See the full [Changelog](./CHANGELOG.md) for details on every release.
-Previous highlights (v2.67 and earlier) +Previous highlights (v2.70 and earlier) +- **Full workflow over MCP (v2.68)** — slice replanning, milestone management, slice completion, task completion, and core planning tools exposed over MCP +- **Transport-gated MCP (v2.68)** — workflow tool availability adapts to provider transport capabilities automatically +- **Contextual tips system (v2.68)** — TUI and web terminal surface contextual tips based on workflow state +- **Ask user questions over MCP (v2.70)** — interactive questions exposed via elicitation for external integrations - **Tiered Context Injection (M005)** — relevance-scoped context with 65%+ token reduction - **Resilient transient error recovery** — defers to Core RetryHandler and fixes cmdCtx race conditions - **Anthropic subscription routing** — auto-routed through Claude Code CLI provider with proper display names @@ -86,30 +93,35 @@ See the full [Changelog](./CHANGELOG.md) for details on every release. ## Documentation -Full documentation is available at **[gsd.build](https://gsd.build)** (powered by Mintlify) and in the [`docs/`](./docs/) directory: +Full documentation is in the [`docs/`](./docs/) directory: -- **[Getting Started](./docs/getting-started.md)** — install, first run, basic usage -- **[Auto Mode](./docs/auto-mode.md)** — autonomous execution deep-dive -- **[Configuration](./docs/configuration.md)** — all preferences, models, git, and hooks -- **[Custom Models](./docs/custom-models.md)** — add custom providers (Ollama, vLLM, LM Studio, proxies) -- **[Token Optimization](./docs/token-optimization.md)** — profiles, context compression, complexity routing -- **[Cost Management](./docs/cost-management.md)** — budgets, tracking, projections -- **[Git Strategy](./docs/git-strategy.md)** — worktree isolation, branching, merge behavior -- **[Parallel Orchestration](./docs/parallel-orchestration.md)** — run multiple milestones simultaneously -- **[Working in Teams](./docs/working-in-teams.md)** — unique IDs, shared artifacts -- **[Skills](./docs/skills.md)** — bundled skills, discovery, custom authoring -- **[Commands Reference](./docs/commands.md)** — all commands and keyboard shortcuts -- **[Architecture](./docs/architecture.md)** — system design and dispatch pipeline -- **[Troubleshooting](./docs/troubleshooting.md)** — common issues, doctor, forensics, recovery -- **[CI/CD Pipeline](./docs/ci-cd-pipeline.md)** — three-stage promotion pipeline (Dev → Test → Prod) -- **[VS Code Extension](./vscode-extension/README.md)** — chat participant, sidebar dashboard, RPC integration -- **[Visualizer](./docs/visualizer.md)** — workflow visualizer with stats and discussion status -- **[Remote Questions](./docs/remote-questions.md)** — route decisions to Slack or Discord when human input is needed -- **[Dynamic Model Routing](./docs/dynamic-model-routing.md)** — complexity-based model selection and budget pressure -- **[Web Interface](./docs/web-interface.md)** — browser-based project management and real-time progress -- **[Pipeline Simplification (ADR-003)](./docs/ADR-003-pipeline-simplification.md)** — merged research into planning, mechanical completion +### User Guides + +- **[Getting Started](./docs/user-docs/getting-started.md)** — install, first run, basic usage +- **[Auto Mode](./docs/user-docs/auto-mode.md)** — autonomous execution deep-dive +- **[Configuration](./docs/user-docs/configuration.md)** — all preferences, models, git, and hooks +- **[Custom Models](./docs/user-docs/custom-models.md)** — add custom providers (Ollama, vLLM, LM Studio, proxies) +- **[Token Optimization](./docs/user-docs/token-optimization.md)** — profiles, context compression, complexity routing +- **[Cost Management](./docs/user-docs/cost-management.md)** — budgets, tracking, projections +- **[Git Strategy](./docs/user-docs/git-strategy.md)** — worktree isolation, branching, merge behavior +- **[Parallel Orchestration](./docs/user-docs/parallel-orchestration.md)** — run multiple milestones simultaneously +- **[Working in Teams](./docs/user-docs/working-in-teams.md)** — unique IDs, shared artifacts +- **[Skills](./docs/user-docs/skills.md)** — bundled skills, discovery, custom authoring +- **[Commands Reference](./docs/user-docs/commands.md)** — all commands and keyboard shortcuts +- **[Troubleshooting](./docs/user-docs/troubleshooting.md)** — common issues, doctor, forensics, recovery +- **[Visualizer](./docs/user-docs/visualizer.md)** — workflow visualizer with stats and discussion status +- **[Remote Questions](./docs/user-docs/remote-questions.md)** — route decisions to Slack or Discord when human input is needed +- **[Dynamic Model Routing](./docs/user-docs/dynamic-model-routing.md)** — complexity-based model selection and budget pressure +- **[Web Interface](./docs/user-docs/web-interface.md)** — browser-based project management and real-time progress +- **[Migration from v1](./docs/user-docs/migration.md)** — `.planning` → `.gsd` migration - **[Docker Sandbox](./docker/README.md)** — run GSD auto mode in an isolated Docker container -- **[Migration from v1](./docs/migration.md)** — `.planning` → `.gsd` migration + +### Developer Docs + +- **[Architecture](./docs/dev/architecture.md)** — system design and dispatch pipeline +- **[CI/CD Pipeline](./docs/dev/ci-cd-pipeline.md)** — three-stage promotion pipeline (Dev → Test → Prod) +- **[Pipeline Simplification (ADR-003)](./docs/dev/ADR-003-pipeline-simplification.md)** — merged research into planning, mechanical completion +- **[VS Code Extension](./vscode-extension/README.md)** — chat participant, sidebar dashboard, RPC integration --- @@ -325,7 +337,7 @@ gsd headless query gsd headless dispatch plan ``` -Headless auto-responds to interactive prompts, detects completion, and exits with structured codes: `0` complete, `1` error/timeout, `2` blocked. Auto-restarts on crash with exponential backoff. Use `gsd headless query` for instant, machine-readable state inspection — returns phase, next dispatch preview, and parallel worker costs as a single JSON object without spawning an LLM session. Pair with [remote questions](./docs/remote-questions.md) to route decisions to Slack or Discord when human input is needed. +Headless auto-responds to interactive prompts, detects completion, and exits with structured codes: `0` complete, `1` error/timeout, `2` blocked. Auto-restarts on crash with exponential backoff. Use `gsd headless query` for instant, machine-readable state inspection — returns phase, next dispatch preview, and parallel worker costs as a single JSON object without spawning an LLM session. Pair with [remote questions](./docs/user-docs/remote-questions.md) to route decisions to Slack or Discord when human input is needed. **Multi-session orchestration** — headless mode supports file-based IPC in `.gsd/parallel/` for coordinating multiple GSD workers across milestones. Build orchestrators that spawn, monitor, and budget-cap a fleet of GSD workers. @@ -498,9 +510,8 @@ auto_report: true | `verification_commands`| Array of shell commands to run after task execution (e.g., `["npm run lint", "npm run test"]`) | | `verification_auto_fix`| Auto-retry on verification failures (default: true) | | `verification_max_retries` | Max retries for verification failures (default: 2) | -| `require_slice_discussion` | Pause auto-mode before each slice for human discussion review | +| `phases.require_slice_discussion` | Pause auto-mode before each slice for human discussion review | | `auto_report` | Auto-generate HTML reports after milestone completion (default: true) | -| `searchExcludeDirs` | Directories to exclude from `@` file autocomplete (e.g., `["node_modules", ".git", "dist"]`) | ### Agent Instructions @@ -530,7 +541,7 @@ token_profile: budget # or balanced (default), quality **Budget pressure** graduates model downgrading as you approach your budget ceiling — 50%, 75%, and 90% thresholds progressively shift work to cheaper tiers. -See the full [Token Optimization Guide](./docs/token-optimization.md) for details. +See the full [Token Optimization Guide](./docs/user-docs/token-optimization.md) for details. ### Bundled Tools @@ -565,13 +576,15 @@ GSD ships with 24 extensions, all loaded automatically: ### Bundled Agents -Three specialized subagents for delegated work: +Five specialized subagents for delegated work: -| Agent | Role | -| -------------- | ------------------------------------------------------------ | -| **Scout** | Fast codebase recon — returns compressed context for handoff | -| **Researcher** | Web research — finds and synthesizes current information | -| **Worker** | General-purpose execution in an isolated context window | +| Agent | Role | +| ------------------- | ------------------------------------------------------------ | +| **Scout** | Fast codebase recon — returns compressed context for handoff | +| **Researcher** | Web research — finds and synthesizes current information | +| **Worker** | General-purpose execution in an isolated context window | +| **JavaScript Pro** | JavaScript-specialized execution and debugging | +| **TypeScript Pro** | TypeScript-specialized execution and debugging | --- @@ -646,9 +659,8 @@ gsd (CLI binary) ├─ resource-loader.ts Syncs bundled extensions + agents to ~/.gsd/agent/ └─ src/resources/ ├─ extensions/gsd/ Core GSD extension (auto, state, commands, ...) - ├─ extensions/... 23 supporting extensions - ├─ agents/ scout, researcher, worker - ├─ AGENTS.md Agent routing instructions + ├─ extensions/... 21 supporting extensions + ├─ agents/ scout, researcher, worker, javascript-pro, typescript-pro └─ GSD-WORKFLOW.md Manual bootstrap protocol ``` diff --git a/docs/dev/ADR-005-multi-model-provider-tool-strategy.md b/docs/dev/ADR-005-multi-model-provider-tool-strategy.md new file mode 100644 index 000000000..bdf00706a --- /dev/null +++ b/docs/dev/ADR-005-multi-model-provider-tool-strategy.md @@ -0,0 +1,67 @@ +# ADR-005: Multi-Model, Multi-Provider, and Tool Strategy + +**Status:** Accepted +**Date:** 2026-03-27 +**Deciders:** Jeremy McSpadden +**Related:** ADR-004 (capability-aware model routing), ADR-003 (pipeline simplification), [Issue #2790](https://github.com/gsd-build/gsd-2/issues/2790) + +## Context + +PR #2755 lands capability-aware model routing (ADR-004), extending the router from a one-dimensional complexity-tier system to a two-dimensional system that scores models across 7 capability dimensions. GSD can now intelligently pick the best model for a task from a heterogeneous pool. + +But model selection is only one piece of the multi-model puzzle. The system faces structural gaps as users configure diverse provider pools: + +1. **Tool compatibility is assumed, not verified** — Every registered tool is sent to every model regardless of provider capabilities. +2. **No tool-aware model routing** — ADR-004 scores 7 capability dimensions but none encode whether a model can actually use the tools a task requires. +3. **Provider failover loses context fidelity** — Cross-provider switches silently degrade conversation quality (thinking blocks dropped, tool IDs remapped). +4. **Tool availability is static across a session** — The same tools are presented regardless of the selected model's capabilities. +5. **No provider capability registry** — Provider quirks are scattered across `*-shared.ts` files. + +## Decision + +Introduce a provider capability registry and tool compatibility layer that integrates with ADR-004's capability-aware model router. + +### Design Principles + +1. **Layered on ADR-004, not replacing it.** Capability scoring remains primary. This adds tool compatibility as a hard constraint. +2. **Hard constraints filter; soft scores rank.** Tool support is binary — it filters the eligible set before scoring. +3. **Provider knowledge is declarative, not scattered.** Provider capabilities move to an explicit registry. +4. **Tool sets adapt to model capabilities.** Active tool set adjusts when the router selects a different model. +5. **Graceful degradation preserved.** Unknown providers get full tool access — same as today. + +### Implementation Phases + +1. **Phase 1:** Provider Capabilities Registry (`packages/pi-ai/src/providers/provider-capabilities.ts`) +2. **Phase 2:** Tool Compatibility Metadata (extend `ToolDefinition` with `compatibility` field) +3. **Phase 3:** Tool-compatibility filter in routing pipeline + `ProviderSwitchReport` in `transform-messages.ts` +4. **Phase 4:** `adjustToolSet` extension hook + +## Consequences + +### Positive +- Eliminates silent tool failures when routing to incompatible providers +- Makes cross-provider routing safe by default +- Provider knowledge becomes queryable (registry vs scattered code) +- Cross-provider context loss becomes visible via `ProviderSwitchReport` + +### Negative +- More metadata to maintain (provider capabilities, tool compatibility) +- Tool filtering adds a pipeline step (sub-millisecond, O(models × tools)) +- Risk of over-filtering (mitigated: opt-in per tool, permissive defaults) + +### Neutral +- Existing behavior unchanged without metadata +- ADR-004 scoring is unmodified +- Provider implementations simplify over time as registry replaces scattered workarounds + +## Appendix: Architecture Reference + +| File | Role | +|------|------| +| `packages/pi-ai/src/providers/register-builtins.ts` | Provider registration | +| `packages/pi-ai/src/providers/*-shared.ts` | Provider-specific handling | +| `packages/pi-ai/src/providers/transform-messages.ts` | Cross-provider normalization | +| `packages/pi-ai/src/types.ts` | Core types | +| `packages/pi-coding-agent/src/core/extensions/types.ts` | ToolDefinition, ExtensionAPI | +| `src/resources/extensions/gsd/model-router.ts` | Capability scoring (ADR-004) | +| `src/resources/extensions/gsd/auto-model-selection.ts` | Model selection orchestration | diff --git a/docs/user-docs/claude-code-auth-compliance.md b/docs/user-docs/claude-code-auth-compliance.md index f930afd46..0c6b77466 100644 --- a/docs/user-docs/claude-code-auth-compliance.md +++ b/docs/user-docs/claude-code-auth-compliance.md @@ -86,18 +86,15 @@ Implication for GSD2: These are directionally correct because GSD is using the user's own local Claude Code installation as the authenticated Anthropic surface. -### Medium/high-risk pieces +### Medium/high-risk pieces — RESOLVED -- `packages/pi-ai/src/utils/oauth/anthropic.ts` - Still implements a first-party-looking Anthropic OAuth flow for GSD itself using `claude.ai/oauth/authorize` and `platform.claude.com/v1/oauth/token`. -- `packages/pi-ai/src/utils/oauth/index.ts` - Still registers `anthropicOAuthProvider` as a built-in OAuth provider. -- `src/web/onboarding-service.ts` - Still advertises Anthropic as `supportsOAuth: true`, which keeps the web onboarding surface inconsistent with the TUI stance. -- `packages/daemon/src/orchestrator.ts` - Reads Anthropic OAuth credentials from `~/.gsd/agent/auth.json`, refreshes them, and then uses the access token for Anthropic API calls. +All Anthropic OAuth code paths have been removed: -The key risk is not just stale UI. The repo still contains code paths where GSD can behave as a third-party Anthropic OAuth client and then convert that credential into direct API access. +- `packages/pi-ai/src/utils/oauth/anthropic.ts` — **Deleted.** No longer implements Anthropic OAuth flow. +- `packages/pi-ai/src/utils/oauth/index.ts` — **Updated.** `anthropicOAuthProvider` removed from built-in registry. +- `src/web/onboarding-service.ts` — **Updated.** Anthropic set to `supportsOAuth: false`. +- `packages/daemon/src/orchestrator.ts` — **Updated.** OAuth token refresh removed; requires `ANTHROPIC_API_KEY` env var. +- `packages/pi-ai/src/providers/anthropic.ts` — **Updated.** OAuth client branch removed; `isOAuthToken` always returns false. ## Recommended Policy For GSD2 @@ -149,14 +146,14 @@ This is the best long-term UX because it separates: - API-billed usage - cloud-routed usage -## Concrete Repo Follow-ups +## Concrete Repo Follow-ups — COMPLETED -1. Delete or disable `packages/pi-ai/src/utils/oauth/anthropic.ts`. -2. Remove `anthropicOAuthProvider` from `packages/pi-ai/src/utils/oauth/index.ts`. -3. Change `src/web/onboarding-service.ts` so Anthropic does not claim OAuth support. -4. Audit `packages/daemon/src/orchestrator.ts` and any other callers that treat Anthropic OAuth access tokens as API credentials. -5. Update docs/UI labels to prefer `anthropic-api` for direct API usage and `claude-code` for subscription usage. -6. Add tests that fail if Anthropic subscription OAuth is reintroduced through the onboarding/provider registry. +1. ~~Delete or disable `packages/pi-ai/src/utils/oauth/anthropic.ts`.~~ **Done** — file deleted. +2. ~~Remove `anthropicOAuthProvider` from `packages/pi-ai/src/utils/oauth/index.ts`.~~ **Done.** +3. ~~Change `src/web/onboarding-service.ts` so Anthropic does not claim OAuth support.~~ **Done.** +4. ~~Audit `packages/daemon/src/orchestrator.ts` and any other callers that treat Anthropic OAuth access tokens as API credentials.~~ **Done** — daemon now requires `ANTHROPIC_API_KEY`. +5. ~~Update docs/UI labels to prefer `anthropic-api` for direct API usage and `claude-code` for subscription usage.~~ **Done** — providers.md and getting-started.md updated. +6. Add tests that fail if Anthropic subscription OAuth is reintroduced through the onboarding/provider registry. — **TODO.** ## Decision Rule diff --git a/docs/user-docs/configuration.md b/docs/user-docs/configuration.md index 00512fa22..b3e873e72 100644 --- a/docs/user-docs/configuration.md +++ b/docs/user-docs/configuration.md @@ -148,6 +148,7 @@ Recommended verification order: - Use absolute paths for local executables and scripts when possible. - For `stdio` servers, prefer setting required environment variables directly in the MCP config instead of relying on an interactive shell profile. +- GSD and `gsd-mcp-server` both hydrate supported model and tool keys saved in `~/.gsd/agent/auth.json`, so MCP configs can safely reference them through `${ENV_VAR}` placeholders without committing raw credentials. - If a server is team-shared and safe to commit, `.mcp.json` is usually the better home. - If a server depends on machine-local paths, personal services, or local-only secrets, prefer `.gsd/mcp.json`. diff --git a/docs/user-docs/getting-started.md b/docs/user-docs/getting-started.md index 6fbcf2422..d095ef8f9 100644 --- a/docs/user-docs/getting-started.md +++ b/docs/user-docs/getting-started.md @@ -1,74 +1,311 @@ -# Getting Started +# Getting Started with GSD -## Install +GSD is an AI coding agent that handles planning, execution, verification, and shipping so you can focus on what to build. This guide walks you through installation on macOS, Windows, and Linux, then gets you running your first session. + +--- + +## Prerequisites + +| Requirement | Minimum | Recommended | +|-------------|---------|-------------| +| **[Node.js](https://nodejs.org/)** | 22.0.0 | 24 LTS | +| **[Git](https://git-scm.com/)** | 2.20+ | Latest | +| **LLM API key** | Any supported provider | Anthropic (Claude) | + +Don't have Node.js or Git yet? Follow the OS-specific instructions below. + +--- + +## Install by Operating System + +### macOS + +> **Downloads:** [Node.js](https://nodejs.org/) | [Git](https://git-scm.com/download/mac) | [Homebrew](https://brew.sh/) + +**Step 1 — Install Homebrew** (skip if you already have it): + +```bash +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" +``` + +**Step 2 — Install Node.js and Git:** + +```bash +brew install node git +``` + +**Step 3 — Verify dependencies are installed:** + +```bash +node --version # should print v22.x or higher +git --version # should print 2.20+ +``` + +**Step 4 — Install GSD:** ```bash npm install -g gsd-pi ``` -Requires Node.js ≥ 22.0.0 (24 LTS recommended) and Git. - -> **`command not found: gsd`?** Your shell may not have npm's global bin directory in `$PATH`. Run `npm prefix -g` to find it, then add `$(npm prefix -g)/bin` to your PATH. See [Troubleshooting](./troubleshooting.md#command-not-found-gsd-after-install) for details. - -GSD checks for updates once every 24 hours. When a new version is available, you'll see an interactive prompt at startup with the option to update immediately or skip. You can also update from within a session with `/gsd update`. - -### Set up API keys - -If you use a non-Anthropic model, you'll need a search API key for web search. Run `/gsd config` to set keys globally — they're saved to `~/.gsd/agent/auth.json` and apply to all projects: +**Step 5 — Set up your LLM provider:** ```bash -# Inside any GSD session: -/gsd config -``` +# Option A: Set an environment variable (Anthropic recommended) +export ANTHROPIC_API_KEY="sk-ant-..." -See [Global API Keys](./configuration.md#global-api-keys-gsd-config) for details on supported keys. - -### Set up custom MCP servers - -If you want GSD to call local or external MCP servers, add project-local config in `.mcp.json` or `.gsd/mcp.json`. - -See [Configuration → MCP Servers](./configuration.md#mcp-servers) for examples and verification steps. - -### VS Code Extension - -GSD is also available as a VS Code extension. Install from the marketplace (publisher: FluxLabs) or search for "GSD" in VS Code extensions. The extension provides: - -- **`@gsd` chat participant** — talk to the agent in VS Code Chat -- **Sidebar dashboard** — connection status, model info, token usage, quick actions -- **Full command palette** — start/stop agent, switch models, export sessions - -The CLI (`gsd-pi`) must be installed first — the extension connects to it via RPC. - -### Web Interface - -GSD also has a browser-based interface. Run `gsd --web` to start a local web server with a visual dashboard, real-time progress, and multi-project support. See [Web Interface](./web-interface.md) for details. - -## First Launch - -Run `gsd` in any directory: - -```bash -gsd -``` - -GSD displays a welcome screen showing your version, active model, and available tool keys. Then on first launch, it runs a setup wizard: - -1. **LLM Provider** — select from 20+ providers (Anthropic, OpenAI, Google, OpenRouter, GitHub Copilot, Amazon Bedrock, Azure, and more). OAuth flows handle Claude Max and Copilot subscriptions automatically; otherwise paste an API key. -2. **Tool API Keys** (optional) — Brave Search, Context7, Jina, Slack, Discord. Press Enter to skip any. - -If you have an existing Pi installation, provider credentials are imported automatically. - -For detailed setup instructions for specific providers (OpenRouter, Ollama, LM Studio, vLLM, and more), see the [Provider Setup Guide](./providers.md). - -Re-run the wizard anytime with: - -```bash +# Option B: Use the built-in config wizard gsd config ``` -## Choose a Model +To persist the key, add the export line to `~/.zshrc`: -GSD auto-selects a default model after login. Switch later with: +```bash +echo 'export ANTHROPIC_API_KEY="sk-ant-..."' >> ~/.zshrc +source ~/.zshrc +``` + +See [Provider Setup Guide](./providers.md) for all 20+ supported providers. + +**Step 6 — Launch GSD:** + +```bash +cd ~/my-project # navigate to any project +gsd # start a session +``` + +**Step 7 — Verify everything works:** + +```bash +gsd --version # prints the installed version +``` + +Inside the session, type `/model` to confirm your LLM is connected. + +> **Apple Silicon PATH fix:** If `gsd` isn't found after install, npm's global bin may not be in your PATH: +> ```bash +> echo 'export PATH="$(npm prefix -g)/bin:$PATH"' >> ~/.zshrc +> source ~/.zshrc +> ``` + +> **oh-my-zsh conflict:** The oh-my-zsh git plugin defines `alias gsd='git svn dcommit'`. Fix with `unalias gsd 2>/dev/null` in `~/.zshrc`, or use `gsd-cli` instead. + +--- + +### Windows + +> **Downloads:** [Node.js](https://nodejs.org/) | [Git for Windows](https://git-scm.com/download/win) | [Windows Terminal](https://aka.ms/terminal) + +#### Option A: winget (recommended for Windows 10/11) + +**Step 1 — Install Node.js and Git:** + +```powershell +winget install OpenJS.NodeJS.LTS +winget install Git.Git +``` + +**Step 2 — Restart your terminal** (close and reopen PowerShell or Windows Terminal). + +**Step 3 — Verify dependencies are installed:** + +```powershell +node --version # should print v22.x or higher +git --version # should print 2.20+ +``` + +**Step 4 — Install GSD:** + +```powershell +npm install -g gsd-pi +``` + +**Step 5 — Set up your LLM provider:** + +```powershell +# Option A: Set an environment variable (current session) +$env:ANTHROPIC_API_KEY = "sk-ant-..." + +# Option B: Use the built-in config wizard +gsd config +``` + +To persist the key permanently, add it via System Settings > Environment Variables, or run: + +```powershell +[System.Environment]::SetEnvironmentVariable("ANTHROPIC_API_KEY", "sk-ant-...", "User") +``` + +See [Provider Setup Guide](./providers.md) for all 20+ supported providers. + +**Step 6 — Launch GSD:** + +```powershell +cd C:\Users\you\my-project # navigate to any project +gsd # start a session +``` + +**Step 7 — Verify everything works:** + +```powershell +gsd --version # prints the installed version +``` + +Inside the session, type `/model` to confirm your LLM is connected. + +#### Option B: Manual install + +1. Download and install [Node.js LTS](https://nodejs.org/) — check **"Add to PATH"** during setup +2. Download and install [Git for Windows](https://git-scm.com/download/win) — use default options +3. Open a **new** terminal, then follow Steps 3-7 above + +> **Windows tips:** +> - Use **Windows Terminal** or **PowerShell** for the best experience. Command Prompt works but has limited color support. +> - If `gsd` isn't recognized, restart your terminal. Windows needs a fresh terminal to pick up new PATH entries. +> - **WSL2** also works — install WSL, then follow the Linux instructions inside your distro. + +--- + +### Linux + +> **Downloads:** [Node.js](https://nodejs.org/) | [Git](https://git-scm.com/download/linux) | [nvm](https://github.com/nvm-sh/nvm) + +Pick your distro, then follow the steps. + +#### Ubuntu / Debian + +**Step 1 — Install Node.js and Git:** + +```bash +curl -fsSL https://deb.nodesource.com/setup_24.x | sudo -E bash - +sudo apt-get install -y nodejs git +``` + +#### Fedora / RHEL / CentOS + +**Step 1 — Install Node.js and Git:** + +```bash +curl -fsSL https://rpm.nodesource.com/setup_24.x | sudo bash - +sudo dnf install -y nodejs git +``` + +#### Arch Linux + +**Step 1 — Install Node.js and Git:** + +```bash +sudo pacman -S nodejs npm git +``` + +#### Using nvm (any distro) + +**Step 1 — Install nvm, then Node.js:** + +```bash +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.0/install.sh | bash +source ~/.bashrc # or ~/.zshrc +nvm install 24 +nvm use 24 +``` + +#### All distros: Steps 2-7 + +**Step 2 — Verify dependencies are installed:** + +```bash +node --version # should print v22.x or higher +git --version # should print 2.20+ +``` + +**Step 3 — Install GSD:** + +```bash +npm install -g gsd-pi +``` + +**Step 4 — Set up your LLM provider:** + +```bash +# Option A: Set an environment variable (Anthropic recommended) +export ANTHROPIC_API_KEY="sk-ant-..." + +# Option B: Use the built-in config wizard +gsd config +``` + +To persist the key, add the export line to `~/.bashrc` (or `~/.zshrc`): + +```bash +echo 'export ANTHROPIC_API_KEY="sk-ant-..."' >> ~/.bashrc +source ~/.bashrc +``` + +See [Provider Setup Guide](./providers.md) for all 20+ supported providers. + +**Step 5 — Launch GSD:** + +```bash +cd ~/my-project # navigate to any project +gsd # start a session +``` + +**Step 6 — Verify everything works:** + +```bash +gsd --version # prints the installed version +``` + +Inside the session, type `/model` to confirm your LLM is connected. + +> **Permission errors on `npm install -g`?** Don't use `sudo npm`. Fix npm's global directory instead: +> ```bash +> mkdir -p ~/.npm-global +> npm config set prefix '~/.npm-global' +> echo 'export PATH="$HOME/.npm-global/bin:$PATH"' >> ~/.bashrc +> source ~/.bashrc +> npm install -g gsd-pi +> ``` + +--- + +### Docker (any OS) + +> **Downloads:** [Docker Desktop](https://www.docker.com/products/docker-desktop/) + +Run GSD in an isolated sandbox without installing Node.js on your host. + +**Step 1 — Install Docker Desktop** (4.58+ required). + +**Step 2 — Clone the GSD repo:** + +```bash +git clone https://github.com/gsd-build/gsd-2.git +cd gsd-2/docker +``` + +**Step 3 — Create and enter a sandbox:** + +```bash +docker sandbox create --template . --name gsd-sandbox +docker sandbox exec -it gsd-sandbox bash +``` + +**Step 4 — Set your API key and run GSD:** + +```bash +export ANTHROPIC_API_KEY="sk-ant-..." +gsd auto "implement the feature described in issue #42" +``` + +See [Docker Sandbox docs](../../docker/README.md) for full configuration, resource limits, and compose files. + +--- + +## After Installation + +### Choose a Model + +GSD auto-selects a default model after provider setup. Switch anytime inside a session: ``` /model @@ -76,18 +313,20 @@ GSD auto-selects a default model after login. Switch later with: Or configure per-phase models in preferences — see [Configuration](./configuration.md). +--- + ## Two Ways to Work ### Step Mode — `/gsd` Type `/gsd` inside a session. GSD executes one unit of work at a time, pausing between each with a wizard showing what completed and what's next. -- **No `.gsd/` directory** → starts a discussion flow to capture your project vision -- **Milestone exists, no roadmap** → discuss or research the milestone -- **Roadmap exists, slices pending** → plan the next slice or execute a task -- **Mid-task** → resume where you left off +- **No `.gsd/` directory** — starts a discussion flow to capture your project vision +- **Milestone exists, no roadmap** — discuss or research the milestone +- **Roadmap exists, slices pending** — plan the next slice or execute a task +- **Mid-task** — resume where you left off -Step mode is the on-ramp. You stay in the loop, reviewing output between each step. +Step mode keeps you in the loop, reviewing output between each step. ### Auto Mode — `/gsd auto` @@ -99,9 +338,11 @@ Type `/gsd auto` and walk away. GSD autonomously researches, plans, executes, ve See [Auto Mode](./auto-mode.md) for full details. -## Two Terminals, One Project +--- -The recommended workflow: auto mode in one terminal, steering from another. +## Recommended Workflow: Two Terminals + +Run auto mode in one terminal, steer from another. **Terminal 1 — let it build:** @@ -121,9 +362,9 @@ gsd Both terminals read and write the same `.gsd/` files. Decisions in terminal 2 are picked up at the next phase boundary automatically. -## Project Structure +--- -GSD organizes work into a hierarchy: +## How GSD Organizes Work ``` Milestone → a shippable version (4-10 slices) @@ -138,25 +379,45 @@ All state lives on disk in `.gsd/`: ``` .gsd/ PROJECT.md — what the project is right now - REQUIREMENTS.md — requirement contract (active/validated/deferred) + REQUIREMENTS.md — requirement contract DECISIONS.md — append-only architectural decisions - KNOWLEDGE.md — cross-session rules, patterns, and lessons - RUNTIME.md — runtime context: API endpoints, env vars, services (v2.39) + KNOWLEDGE.md — cross-session rules and patterns STATE.md — quick-glance status milestones/ M001/ - M001-ROADMAP.md — slice plan with risk levels and dependencies - M001-CONTEXT.md — scope and goals from discussion + M001-ROADMAP.md — slice plan with dependencies slices/ S01/ S01-PLAN.md — task decomposition S01-SUMMARY.md — what happened - S01-UAT.md — human test script - tasks/ - T01-PLAN.md - T01-SUMMARY.md ``` +--- + +## VS Code Extension + +GSD is also available as a VS Code extension. Install from the marketplace (publisher: FluxLabs) or search for "GSD" in VS Code extensions: + +- **`@gsd` chat participant** — talk to the agent in VS Code Chat +- **Sidebar dashboard** — connection status, model info, token usage +- **Full command palette** — start/stop agent, switch models, export sessions + +The CLI (`gsd-pi`) must be installed first — the extension connects to it via RPC. + +--- + +## Web Interface + +GSD has a browser-based interface for visual project management: + +```bash +gsd --web +``` + +See [Web Interface](./web-interface.md) for details. + +--- + ## Resume a Session ```bash @@ -165,36 +426,48 @@ gsd --continue # or gsd -c Resumes the most recent session for the current directory. -To browse and pick from all saved sessions: +Browse all saved sessions: ```bash gsd sessions ``` -Shows each session's date, message count, and first-message preview so you can choose which one to resume. +--- + +## Updating GSD + +GSD checks for updates every 24 hours and prompts at startup. You can also update manually: + +```bash +npm update -g gsd-pi +``` + +Or from within a session: + +``` +/gsd update +``` + +--- + +## Quick Troubleshooting + +| Problem | Fix | +|---------|-----| +| `command not found: gsd` | Add npm global bin to PATH (see OS-specific notes above) | +| `gsd` runs `git svn dcommit` | oh-my-zsh conflict — `unalias gsd` or use `gsd-cli` | +| Permission errors on `npm install -g` | Fix npm prefix (see Linux notes) or use nvm | +| Can't connect to LLM | Check API key with `gsd config`, verify network access | +| `gsd` hangs on start | Check Node.js version: `node --version` (need 22+) | + +For more, see [Troubleshooting](./troubleshooting.md). + +--- ## Next Steps - [Auto Mode](./auto-mode.md) — deep dive into autonomous execution - [Configuration](./configuration.md) — model selection, timeouts, budgets - [Commands Reference](./commands.md) — all commands and shortcuts - -## Troubleshooting - -### `gsd` command runs `git svn dcommit` instead of GSD - -The [oh-my-zsh git plugin](https://github.com/ohmyzsh/ohmyzsh/tree/master/plugins/git) defines `alias gsd='git svn dcommit'`, which shadows the GSD binary. - -**Option 1** — Remove the alias in your `~/.zshrc` (add after the `source $ZSH/oh-my-zsh.sh` line): - -```bash -unalias gsd 2>/dev/null -``` - -**Option 2** — Use the alternative binary name: - -```bash -gsd-cli -``` - -Both `gsd` and `gsd-cli` point to the same binary. +- [Provider Setup](./providers.md) — detailed setup for every provider +- [Working in Teams](./working-in-teams.md) — multi-developer workflows diff --git a/docs/user-docs/providers.md b/docs/user-docs/providers.md index 984ee369c..cfa3df939 100644 --- a/docs/user-docs/providers.md +++ b/docs/user-docs/providers.md @@ -30,7 +30,7 @@ Step-by-step setup instructions for every LLM provider GSD supports. If you ran | Provider | Auth Method | Env Variable | Config File | |----------|-------------|-------------|-------------| -| Anthropic | OAuth or API key | `ANTHROPIC_API_KEY` | — | +| Anthropic | API key | `ANTHROPIC_API_KEY` | — | | OpenAI | API key | `OPENAI_API_KEY` | — | | Google Gemini | API key | `GEMINI_API_KEY` | — | | OpenRouter | API key | `OPENROUTER_API_KEY` | Optional `models.json` | @@ -55,25 +55,91 @@ Built-in providers have models pre-registered in GSD. You only need to supply cr **Recommended.** Anthropic models have the deepest integration: built-in web search, extended thinking, and prompt caching. -**Option A — Browser sign-in (recommended):** - -```bash -gsd config -# Choose "Sign in with your browser" → "Anthropic (Claude)" -``` - -Or inside a session: `/login` - -**Option B — API key:** +**Option A — API key (recommended):** ```bash export ANTHROPIC_API_KEY="sk-ant-..." ``` -Or paste it during `gsd config` when prompted. +Or run `gsd config` and paste your key when prompted. **Get a key:** [console.anthropic.com/settings/keys](https://console.anthropic.com/settings/keys) +**Option B — Claude Code CLI:** + +If you have a Claude Pro or Max subscription, you can authenticate through Anthropic's official Claude Code CLI. Install it, sign in with `claude`, then GSD will detect and route through it automatically: + +```bash +# Install Claude Code CLI (see https://docs.anthropic.com/en/docs/claude-code) +claude +# Sign in when prompted, then start GSD +gsd +``` + +GSD detects your local Claude Code installation and uses it as the authenticated Anthropic surface. This is the TOS-compliant path for subscription users — GSD never handles your subscription credentials directly. + +> **Note:** GSD does not support browser-based OAuth sign-in for Anthropic. Use an API key or the Claude Code CLI instead. + +**Option C — Use your Claude Pro/Max plan with GSD inside Claude Code:** + +If you already have a Claude Pro or Max subscription and want to use GSD's planning, execution, and milestone orchestration directly from Claude Code — without switching to a separate terminal — you can connect GSD as an MCP server. This gives Claude Code access to GSD's full workflow toolset via the [Model Context Protocol](https://modelcontextprotocol.io), so you get GSD's structured project management powered by your existing Claude plan. + +**Automatic setup (recommended):** + +When GSD detects a Claude Code model during startup, it automatically writes a `.mcp.json` file in your project root with the GSD workflow MCP server configured. No manual steps needed — just start GSD once with Claude Code as the provider and the config is created for you. + +You can also trigger this manually from inside a GSD session: + +```bash +/gsd mcp init +``` + +This writes (or updates) the `gsd-workflow` entry in your project's `.mcp.json`. Claude Code discovers this file automatically on its next session start. + +**Manual setup:** + +If you prefer to configure it yourself, add GSD to your project's `.mcp.json`: + +```json +{ + "mcpServers": { + "gsd": { + "command": "npx", + "args": ["gsd-mcp-server"], + "env": { + "GSD_CLI_PATH": "/path/to/gsd" + } + } + } +} +``` + +Or if `gsd-mcp-server` is installed globally: + +```json +{ + "mcpServers": { + "gsd": { + "command": "gsd-mcp-server" + } + } +} +``` + +You can also add this to `~/.claude/settings.json` under `mcpServers` to make GSD available across all projects. + +**What's exposed:** + +The MCP server provides GSD's full workflow tool surface — milestone planning, task completion, slice management, roadmap reassessment, journal queries, and more. Session management tools (`gsd_execute`, `gsd_status`, `gsd_result`, `gsd_cancel`) let Claude Code start and monitor GSD auto-mode sessions. See [Commands → MCP Server Mode](./commands.md#mcp-server-mode) for the full tool list. + +**Verify the connection:** + +From inside a GSD session, check that the MCP server is reachable: + +```bash +/gsd mcp status +``` + ### OpenAI ```bash diff --git a/gitbook/README.md b/gitbook/README.md new file mode 100644 index 000000000..cb84bae87 --- /dev/null +++ b/gitbook/README.md @@ -0,0 +1,65 @@ +# What is GSD? + +GSD is an AI-powered development agent that turns project ideas into working software. Describe what you want to build, and GSD researches, plans, codes, tests, and commits — with clean git history and full cost tracking. + +## How It Works + +GSD breaks your project into manageable pieces and works through them systematically: + +``` +You describe your project + ↓ +GSD creates a milestone with slices (features) + ↓ +Each slice is decomposed into tasks + ↓ +Tasks are executed one at a time in fresh AI sessions + ↓ +Code is committed, verified, and the next task begins +``` + +You can stay hands-on with **step mode** (reviewing each step) or let GSD run autonomously with **auto mode** while you grab coffee. + +## Key Features + +- **Autonomous execution** — `/gsd auto` runs research, planning, coding, testing, and committing without intervention +- **20+ LLM providers** — Anthropic, OpenAI, Google, OpenRouter, GitHub Copilot, Amazon Bedrock, local models, and more +- **Git isolation** — Each milestone works in its own worktree branch, merged cleanly when done +- **Cost tracking** — Real-time token usage, budget ceilings, and automatic model downgrading +- **Crash recovery** — Sessions resume automatically after interruptions +- **Skills system** — Domain-specific instruction sets for frameworks, languages, and tools +- **Parallel milestones** — Run multiple milestones simultaneously in isolated worktrees +- **Remote questions** — Get Discord, Slack, or Telegram notifications when GSD needs input +- **Web interface** — Browser-based dashboard with real-time progress +- **VS Code extension** — Chat participant, sidebar dashboard, and full command palette +- **Headless mode** — Run in CI pipelines, cron jobs, and scripted automation + +## Quick Start + +```bash +# Install +npm install -g gsd-pi + +# Launch +gsd + +# Start autonomous mode +/gsd auto +``` + +See [Installation](getting-started/installation.md) for detailed setup instructions. + +## Two Ways to Work + +| Mode | Command | Best For | +|------|---------|----------| +| **Step** | `/gsd` | Staying in the loop, reviewing each step | +| **Auto** | `/gsd auto` | Walking away, overnight builds, batch work | + +The recommended workflow: run auto mode in one terminal, steer from another. See [Step Mode](core-concepts/step-mode.md) and [Auto Mode](core-concepts/auto-mode.md). + +## Requirements + +- **Node.js** 22.0.0 or later (24 LTS recommended) +- **Git** installed and configured +- An API key for at least one LLM provider (or use browser sign-in for Anthropic/GitHub Copilot) diff --git a/gitbook/SUMMARY.md b/gitbook/SUMMARY.md new file mode 100644 index 000000000..962364bbe --- /dev/null +++ b/gitbook/SUMMARY.md @@ -0,0 +1,49 @@ +# Table of contents + +* [What is GSD?](README.md) + +## Getting Started + +* [Installation](getting-started/installation.md) +* [Your First Project](getting-started/first-project.md) +* [Choosing a Model](getting-started/choosing-a-model.md) + +## Core Concepts + +* [How GSD Organizes Work](core-concepts/project-structure.md) +* [Step Mode](core-concepts/step-mode.md) +* [Auto Mode](core-concepts/auto-mode.md) + +## Configuration + +* [Preferences](configuration/preferences.md) +* [Provider Setup](configuration/providers.md) +* [Custom Models](configuration/custom-models.md) +* [Git & Worktrees](configuration/git-settings.md) +* [Notifications](configuration/notifications.md) +* [MCP Servers](configuration/mcp-servers.md) + +## Features + +* [Cost Management](features/cost-management.md) +* [Token Optimization](features/token-optimization.md) +* [Dynamic Model Routing](features/dynamic-model-routing.md) +* [Skills](features/skills.md) +* [Captures & Triage](features/captures.md) +* [Workflow Visualizer](features/visualizer.md) +* [Workflow Templates](features/workflow-templates.md) +* [Web Interface](features/web-interface.md) +* [Remote Questions](features/remote-questions.md) +* [Working in Teams](features/teams.md) +* [Parallel Orchestration](features/parallel.md) +* [Headless & CI Mode](features/headless.md) +* [GitHub Sync](features/github-sync.md) + +## Reference + +* [Commands](reference/commands.md) +* [Keyboard Shortcuts](reference/keyboard-shortcuts.md) +* [CLI Flags](reference/cli-flags.md) +* [Environment Variables](reference/environment-variables.md) +* [Troubleshooting](reference/troubleshooting.md) +* [Migration from v1](reference/migration.md) diff --git a/gitbook/configuration/custom-models.md b/gitbook/configuration/custom-models.md new file mode 100644 index 000000000..8f02512ff --- /dev/null +++ b/gitbook/configuration/custom-models.md @@ -0,0 +1,131 @@ +# Custom Models + +Define custom models and providers in `~/.gsd/agent/models.json`. This lets you add models not in the default registry — self-hosted endpoints, fine-tuned models, proxies, or new provider releases. + +## File Location + +GSD looks for models.json at: +1. `~/.gsd/agent/models.json` (primary) +2. `~/.pi/agent/models.json` (fallback) + +The file reloads each time you open `/model` — no restart needed. + +## Basic Structure + +```json +{ + "providers": { + "my-provider": { + "baseUrl": "https://my-endpoint.example.com/v1", + "apiKey": "MY_PROVIDER_API_KEY", + "api": "openai-completions", + "models": [ + { + "id": "model-id-here", + "name": "Friendly Model Name", + "reasoning": false, + "input": ["text"], + "contextWindow": 128000, + "maxTokens": 16384, + "cost": { "input": 0.15, "output": 0.60, "cacheRead": 0.015, "cacheWrite": 0.19 } + } + ] + } + } +} +``` + +## API Key Resolution + +The `apiKey` field can be: + +- **An environment variable name**: `"OPENROUTER_API_KEY"` — GSD resolves it automatically +- **A literal value**: `"sk-abc123..."` — used directly +- **A dummy value**: `"not-needed"` — for local servers that don't require auth + +## Compatibility Flags + +Local and non-standard servers often need compatibility adjustments: + +```json +{ + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false, + "supportsUsageInStreaming": false, + "thinkingFormat": "qwen" + } +} +``` + +| Flag | Default | Purpose | +|------|---------|---------| +| `supportsDeveloperRole` | `true` | Set `false` if the server doesn't support the `developer` message role | +| `supportsReasoningEffort` | `true` | Set `false` if the server doesn't support reasoning effort parameters | +| `supportsUsageInStreaming` | `true` | Set `false` if streaming responses don't include token usage | +| `thinkingFormat` | — | Set `"qwen"` for Qwen thinking mode, `"qwen-chat-template"` for chat template variant | + +## Custom Headers + +For proxies that need extra headers: + +```json +{ + "providers": { + "litellm-proxy": { + "baseUrl": "https://litellm.example.com/v1", + "apiKey": "MY_API_KEY", + "api": "openai-completions", + "headers": { + "x-custom-header": "value" + }, + "models": [...] + } + } +} +``` + +## Model Overrides + +Override specific model settings without redefining the entire model: + +```json +{ + "providers": { + "openrouter": { + "modelOverrides": { + "anthropic/claude-sonnet-4": { + "compat": { + "openRouterRouting": { + "only": ["amazon-bedrock"] + } + } + } + } + } + } +} +``` + +## Cost Tracking + +For accurate cost tracking with custom models, add the `cost` field (per million tokens): + +```json +"cost": { + "input": 0.15, + "output": 0.60, + "cacheRead": 0.015, + "cacheWrite": 0.19 +} +``` + +Without this, cost shows $0.00 — which is the expected default for custom models. + +## Community Extensions + +For providers not built into GSD, community extensions add full provider support: + +| Extension | Provider | Install | +|-----------|----------|---------| +| `pi-dashscope` | Alibaba DashScope (Qwen3, GLM-5, etc.) | `gsd install npm:pi-dashscope` | diff --git a/gitbook/configuration/git-settings.md b/gitbook/configuration/git-settings.md new file mode 100644 index 000000000..cf4c0d524 --- /dev/null +++ b/gitbook/configuration/git-settings.md @@ -0,0 +1,148 @@ +# Git & Worktrees + +GSD uses git for milestone isolation and sequential commits. The strategy is fully automated — you don't need to manage branches manually. + +## Isolation Modes + +GSD supports three isolation modes, configured via `git.isolation` in preferences: + +| Mode | Working Directory | Branch | Best For | +|------|-------------------|--------|----------| +| `worktree` (default) | `.gsd/worktrees//` | `milestone/` | Most projects — full isolation | +| `branch` | Project root | `milestone/` | Submodule-heavy repos | +| `none` | Project root | Current branch | Hot-reload workflows | + +### Worktree Mode (Default) + +Each milestone gets its own git worktree and branch. All execution happens inside the worktree. On completion, everything is squash-merged to main as one clean commit. The worktree and branch are then cleaned up. + +Changes in a milestone can't interfere with your main working copy. + +### Branch Mode + +Work happens in the project root on a `milestone/` branch. No worktree directory is created. Useful when worktrees cause problems with submodules or hardcoded paths. + +### None Mode + +Work happens directly on your current branch. No worktree, no milestone branch. GSD still commits with conventional commit messages. Use this when file isolation breaks dev tooling (file watchers, hot-reload, etc.). + +## Branching Model + +``` +main ──────────────────────────────────────────── + │ ↑ + └── milestone/M001 (worktree) ─────────────┘ + commit: feat: core types + commit: feat: markdown parser + commit: feat: file writer + → squash-merged to main +``` + +## Workflow Modes + +Set `mode` for sensible defaults instead of configuring each setting individually: + +```yaml +mode: solo # personal projects +mode: team # shared repos +``` + +| Setting | `solo` | `team` | +|---------|--------|--------| +| `git.auto_push` | `true` | `false` | +| `git.push_branches` | `false` | `true` | +| `git.pre_merge_check` | `false` | `true` | +| `unique_milestone_ids` | `false` | `true` | + +Mode defaults are the lowest priority — any explicit preference overrides them. + +## Git Preferences + +```yaml +git: + auto_push: false # push after commits + push_branches: false # push milestone branch to remote + remote: origin # git remote name + snapshots: true # WIP snapshot commits during long tasks + pre_merge_check: auto # validation before merge + commit_type: feat # override conventional commit prefix + main_branch: main # primary branch name + merge_strategy: squash # "squash" or "merge" + isolation: worktree # "worktree", "branch", or "none" + commit_docs: true # commit .gsd/ artifacts to git + manage_gitignore: true # let GSD manage .gitignore + auto_pr: false # create PR on milestone completion + pr_target_branch: develop # PR target branch +``` + +## Automatic Pull Requests + +For teams using Gitflow or branch-based workflows: + +```yaml +git: + auto_push: true + auto_pr: true + pr_target_branch: develop +``` + +When a milestone completes, GSD pushes the branch and creates a PR targeting your specified branch. Requires `gh` CLI installed and authenticated. + +## Post-Worktree Hook + +Run a script after worktree creation (copy `.env` files, symlink assets, etc.): + +```yaml +git: + worktree_post_create: .gsd/hooks/post-worktree-create +``` + +Example hook: + +```bash +#!/bin/bash +cp "$SOURCE_DIR/.env" "$WORKTREE_DIR/.env" +ln -sf "$SOURCE_DIR/assets" "$WORKTREE_DIR/assets" +``` + +## Keeping `.gsd/` Local + +For teams where only some members use GSD: + +```yaml +git: + commit_docs: false +``` + +This adds `.gsd/` to `.gitignore` entirely. You get structured planning without affecting teammates who don't use GSD. + +## Commit Format + +Commits use conventional commit format with GSD metadata: + +``` +feat: core type definitions + +GSD-Task: M001/S01/T01 +``` + +## Manual Worktree Management + +Use `/worktree` (or `/wt`) for manual worktree operations: + +``` +/worktree create +/worktree switch +/worktree merge +/worktree remove +``` + +## Self-Healing + +GSD automatically recovers from common git issues: + +- **Detached HEAD** — reattaches to the correct branch +- **Stale lock files** — removes `index.lock` from crashed processes +- **Orphaned worktrees** — detects and cleans up abandoned worktrees + +Run `/gsd doctor` to check git health manually. diff --git a/gitbook/configuration/mcp-servers.md b/gitbook/configuration/mcp-servers.md new file mode 100644 index 000000000..6079fa3a7 --- /dev/null +++ b/gitbook/configuration/mcp-servers.md @@ -0,0 +1,65 @@ +# MCP Servers + +GSD can connect to external MCP (Model Context Protocol) servers for local tools, internal APIs, self-hosted services, or integrations not built in as native extensions. + +## Configuration Files + +GSD reads MCP config from these project-local paths: + +- `.mcp.json` — repo-shared config (safe to commit) +- `.gsd/mcp.json` — local-only config (not shared) + +If both exist, server names are merged and the first definition found wins. + +## Supported Transports + +| Transport | Config Shape | Use When | +|-----------|-------------|----------| +| `stdio` | `command` + optional `args`, `env`, `cwd` | Launching a local MCP server | +| `http` | `url` | Connecting to an already-running server | + +## Examples + +### stdio Server + +```json +{ + "mcpServers": { + "my-server": { + "type": "stdio", + "command": "/absolute/path/to/python3", + "args": ["/absolute/path/to/server.py"], + "env": { + "API_URL": "http://localhost:8000" + } + } + } +} +``` + +### HTTP Server + +```json +{ + "mcpServers": { + "my-http-server": { + "url": "http://localhost:8080/mcp" + } + } +} +``` + +## Verifying a Server + +After adding config, verify from a GSD session: + +1. `mcp_servers` — confirms GSD sees the config +2. `mcp_discover(server="my-server")` — confirms the server starts and responds +3. `mcp_call(server="my-server", tool="", args={...})` — confirms a real tool call works + +## Tips + +- Use **absolute paths** for executables and scripts +- Set required **environment variables** directly in the MCP config's `env` block +- Use `.mcp.json` for team-shared servers; `.gsd/mcp.json` for machine-local ones +- If a server depends on local paths or personal secrets, keep it in `.gsd/mcp.json` diff --git a/gitbook/configuration/notifications.md b/gitbook/configuration/notifications.md new file mode 100644 index 000000000..54acd0d67 --- /dev/null +++ b/gitbook/configuration/notifications.md @@ -0,0 +1,38 @@ +# Notifications + +GSD sends desktop notifications during auto mode to keep you informed without watching the terminal. + +## Configuration + +```yaml +notifications: + enabled: true + on_complete: true # notify on unit completion + on_error: true # notify on errors + on_budget: true # notify on budget thresholds + on_milestone: true # notify when milestone finishes + on_attention: true # notify when manual attention needed +``` + +## macOS Setup + +GSD uses `terminal-notifier` when available, falling back to `osascript`. + +**Recommended:** Install `terminal-notifier` for reliable delivery: + +```bash +brew install terminal-notifier +``` + +**Why?** The `osascript` fallback attributes notifications to your terminal app (Ghostty, iTerm2, etc.), which may not have notification permissions. `terminal-notifier` registers as its own app and prompts for permission on first use. + +### Notifications Not Appearing? + +1. Check **System Settings → Notifications** for your terminal app +2. Install `terminal-notifier` (recommended) +3. Test with: + ```bash + terminal-notifier -title "GSD" -message "working!" -sound Glass + ``` + +If your terminal app doesn't appear in Notification settings, it may need to send at least one notification first to register. See [Troubleshooting](../reference/troubleshooting.md) for more details. diff --git a/gitbook/configuration/preferences.md b/gitbook/configuration/preferences.md new file mode 100644 index 000000000..3a997150a --- /dev/null +++ b/gitbook/configuration/preferences.md @@ -0,0 +1,238 @@ +# Preferences + +GSD preferences live in YAML frontmatter markdown files. You can configure them globally or per-project. + +## Managing Preferences + +``` +/gsd prefs # open the global preferences wizard +/gsd prefs project # open the project preferences wizard +/gsd prefs status # show current values and where they come from +``` + +## Preference Files + +| Scope | Path | Applies To | +|-------|------|-----------| +| Global | `~/.gsd/PREFERENCES.md` | All projects | +| Project | `.gsd/PREFERENCES.md` | Current project only | + +**How they merge:** +- **Scalar fields** (`budget_ceiling`, `token_profile`): project wins if defined +- **Array fields** (`always_use_skills`, etc.): concatenated (global first, then project) +- **Object fields** (`models`, `git`, `auto_supervisor`): shallow-merged, project overrides per-key + +## Quick Example + +```yaml +--- +version: 1 + +# Model selection +models: + research: claude-sonnet-4-6 + planning: claude-opus-4-6 + execution: claude-sonnet-4-6 + completion: claude-sonnet-4-6 + +# Token optimization +token_profile: balanced + +# Budget +budget_ceiling: 25.00 +budget_enforcement: pause + +# Supervision +auto_supervisor: + soft_timeout_minutes: 15 + hard_timeout_minutes: 25 + +# Git +git: + auto_push: true + merge_strategy: squash + isolation: worktree + +# Verification +verification_commands: + - npm run lint + - npm run test + +# Notifications +notifications: + on_milestone: true + on_attention: true +--- +``` + +## All Settings + +### `models` + +Per-phase model selection. See [Choosing a Model](../getting-started/choosing-a-model.md). + +```yaml +models: + research: claude-sonnet-4-6 + planning: + model: claude-opus-4-6 + fallbacks: + - openrouter/z-ai/glm-5 + execution: claude-sonnet-4-6 + execution_simple: claude-haiku-4-5 + completion: claude-sonnet-4-6 + subagent: claude-sonnet-4-6 +``` + +### `token_profile` + +Coordinates model selection, phase skipping, and context compression. Values: `budget`, `balanced` (default), `quality`. See [Token Optimization](../features/token-optimization.md). + +### `budget_ceiling` + +Maximum USD to spend during auto mode: + +```yaml +budget_ceiling: 50.00 +``` + +### `budget_enforcement` + +What happens when the ceiling is reached: + +| Value | Behavior | +|-------|----------| +| `warn` | Log a warning, continue | +| `pause` | Pause auto mode (default) | +| `halt` | Stop auto mode entirely | + +### `auto_supervisor` + +Timeout thresholds for auto mode: + +```yaml +auto_supervisor: + soft_timeout_minutes: 20 # warn AI to wrap up + idle_timeout_minutes: 10 # detect stalls + hard_timeout_minutes: 30 # pause auto mode +``` + +### `verification_commands` + +Shell commands that run after every task execution: + +```yaml +verification_commands: + - npm run lint + - npm run test +verification_auto_fix: true # auto-retry on failure (default) +verification_max_retries: 2 # max attempts (default: 2) +``` + +### `phases` + +Fine-grained control over which phases run: + +```yaml +phases: + skip_research: false + skip_reassess: false + skip_slice_research: true + reassess_after_slice: true + require_slice_discussion: false +``` + +### `skill_discovery` + +| Value | Behavior | +|-------|----------| +| `auto` | Skills found and applied automatically | +| `suggest` | Skills identified but not auto-applied (default) | +| `off` | Skill discovery disabled | + +### `dynamic_routing` + +Automatic model selection by task complexity. See [Dynamic Model Routing](../features/dynamic-model-routing.md). + +```yaml +dynamic_routing: + enabled: true + escalate_on_failure: true + budget_pressure: true +``` + +### `git` + +Git behavior. See [Git & Worktrees](git-settings.md). + +```yaml +git: + auto_push: false + merge_strategy: squash + isolation: worktree + commit_docs: true + auto_pr: false +``` + +### `notifications` + +See [Notifications](notifications.md). + +```yaml +notifications: + enabled: true + on_complete: true + on_error: true + on_milestone: true + on_attention: true +``` + +### `remote_questions` + +Route questions to Slack, Discord, or Telegram. See [Remote Questions](../features/remote-questions.md). + +```yaml +remote_questions: + channel: discord + channel_id: "1234567890123456789" + timeout_minutes: 5 +``` + +### `parallel` + +Run multiple milestones simultaneously. See [Parallel Orchestration](../features/parallel.md). + +```yaml +parallel: + enabled: false + max_workers: 2 + budget_ceiling: 50.00 +``` + +### `custom_instructions` + +Durable instructions appended to every session: + +```yaml +custom_instructions: + - "Always use TypeScript strict mode" + - "Prefer functional patterns over classes" +``` + +For project-specific patterns, use `.gsd/KNOWLEDGE.md` instead — it's injected into every agent prompt automatically. + +### `context_pause_threshold` + +Context window usage percentage at which auto mode pauses: + +```yaml +context_pause_threshold: 80 # pause at 80% +``` + +### `show_token_cost` + +Show per-prompt and cumulative session token cost in the footer: + +```yaml +show_token_cost: true +``` diff --git a/gitbook/configuration/providers.md b/gitbook/configuration/providers.md new file mode 100644 index 000000000..4cb709142 --- /dev/null +++ b/gitbook/configuration/providers.md @@ -0,0 +1,277 @@ +# Provider Setup + +Step-by-step setup instructions for every LLM provider GSD supports. If you ran the onboarding wizard (`gsd config`) and picked a provider, you may already be configured — check with `/model` inside a session. + +## Quick Reference + +| Provider | Auth Method | Environment Variable | +|----------|-------------|---------------------| +| Anthropic | OAuth or API key | `ANTHROPIC_API_KEY` | +| OpenAI | API key | `OPENAI_API_KEY` | +| Google Gemini | API key | `GEMINI_API_KEY` | +| OpenRouter | API key | `OPENROUTER_API_KEY` | +| Groq | API key | `GROQ_API_KEY` | +| xAI (Grok) | API key | `XAI_API_KEY` | +| Mistral | API key | `MISTRAL_API_KEY` | +| GitHub Copilot | OAuth | `GH_TOKEN` | +| Amazon Bedrock | IAM credentials | `AWS_PROFILE` or `AWS_ACCESS_KEY_ID` | +| Vertex AI | ADC | `GOOGLE_APPLICATION_CREDENTIALS` | +| Azure OpenAI | API key | `AZURE_OPENAI_API_KEY` | +| Ollama | None (local) | — | +| LM Studio | None (local) | — | +| vLLM / SGLang | None (local) | — | + +## Built-in Providers + +### Anthropic (Claude) + +**Recommended.** Anthropic models have the deepest integration: built-in web search, extended thinking, and prompt caching. + +**Option A — Browser sign-in (recommended):** + +```bash +gsd config +# Choose "Sign in with your browser" → "Anthropic (Claude)" +``` + +Or inside a session: `/login` + +**Option B — API key:** + +```bash +export ANTHROPIC_API_KEY="sk-ant-..." +``` + +### OpenAI + +```bash +export OPENAI_API_KEY="sk-..." +``` + +Or run `gsd config` and choose "Paste an API key" then "OpenAI". + +### Google Gemini + +```bash +export GEMINI_API_KEY="..." +``` + +### OpenRouter + +OpenRouter aggregates 200+ models from multiple providers behind a single API key. + +1. Get a key at [openrouter.ai/keys](https://openrouter.ai/keys) +2. Set it: + ```bash + export OPENROUTER_API_KEY="sk-or-..." + ``` +3. In GSD, type `/model` to select an OpenRouter model (prefixed with `openrouter/`) + +To add models not in the built-in list, add them to `~/.gsd/agent/models.json`. See [Custom Models](custom-models.md). + +### Groq + +```bash +export GROQ_API_KEY="gsk_..." +``` + +### xAI (Grok) + +```bash +export XAI_API_KEY="xai-..." +``` + +### Mistral + +```bash +export MISTRAL_API_KEY="..." +``` + +### GitHub Copilot + +Uses OAuth — sign in through the browser: + +```bash +gsd config +# Choose "Sign in with your browser" → "GitHub Copilot" +``` + +Requires an active GitHub Copilot subscription. + +### Amazon Bedrock + +Bedrock uses AWS IAM credentials: + +```bash +# Named profile +export AWS_PROFILE="my-profile" + +# Or IAM keys +export AWS_ACCESS_KEY_ID="AKIA..." +export AWS_SECRET_ACCESS_KEY="..." +export AWS_REGION="us-east-1" + +# Or bearer token +export AWS_BEARER_TOKEN_BEDROCK="..." +``` + +ECS task roles and IRSA (Kubernetes) are also detected automatically. + +### Anthropic on Vertex AI + +```bash +gcloud auth application-default login +export ANTHROPIC_VERTEX_PROJECT_ID="my-project-id" +``` + +### Azure OpenAI + +```bash +export AZURE_OPENAI_API_KEY="..." +``` + +## Local Providers + +Local providers run on your machine. They require a `models.json` configuration file at `~/.gsd/agent/models.json` because GSD needs to know the endpoint URL and available models. + +The file reloads each time you open `/model` — no restart needed. + +### Ollama + +1. Install and start Ollama: + ```bash + brew install ollama + ollama serve + ``` + +2. Pull a model: + ```bash + ollama pull llama3.1:8b + ``` + +3. Create `~/.gsd/agent/models.json`: + ```json + { + "providers": { + "ollama": { + "baseUrl": "http://localhost:11434/v1", + "api": "openai-completions", + "apiKey": "ollama", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { "id": "llama3.1:8b" } + ] + } + } + } + ``` + +4. In GSD, type `/model` and select your Ollama model. + +### LM Studio + +1. Install [LM Studio](https://lmstudio.ai) +2. Go to "Local Server" tab, load a model, click "Start Server" (default port 1234) +3. Create `~/.gsd/agent/models.json`: + ```json + { + "providers": { + "lm-studio": { + "baseUrl": "http://localhost:1234/v1", + "api": "openai-completions", + "apiKey": "lm-studio", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { "id": "your-model-name" } + ] + } + } + } + ``` + +### vLLM + +```json +{ + "providers": { + "vllm": { + "baseUrl": "http://localhost:8000/v1", + "api": "openai-completions", + "apiKey": "vllm", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false, + "supportsUsageInStreaming": false + }, + "models": [ + { "id": "meta-llama/Llama-3.1-8B-Instruct" } + ] + } + } +} +``` + +### SGLang + +```json +{ + "providers": { + "sglang": { + "baseUrl": "http://localhost:30000/v1", + "api": "openai-completions", + "apiKey": "sglang", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { "id": "meta-llama/Llama-3.1-8B-Instruct" } + ] + } + } +} +``` + +## Custom OpenAI-Compatible Endpoints + +Any server that implements the OpenAI Chat Completions API can work with GSD — proxies (LiteLLM, Portkey, Helicone), self-hosted inference, new providers. + +**Quickest path:** + +```bash +gsd config +# Choose "Paste an API key" → "Custom (OpenAI-compatible)" +# Enter: base URL, API key, model ID +``` + +This writes `~/.gsd/agent/models.json` for you. See [Custom Models](custom-models.md) for manual setup. + +## Verifying Your Setup + +1. Launch GSD: `gsd` +2. Check available models: `/model` +3. Select your model from the picker +4. Send a test message to confirm it responds + +If the model doesn't appear, check: +- The environment variable is set in the current shell +- `models.json` is valid JSON +- The server is running (for local providers) + +## Common Issues + +| Problem | Cause | Fix | +|---------|-------|-----| +| "Authentication failed" with valid key | Key not visible to GSD | Export in the same terminal, or save via `gsd config` | +| OpenRouter models not in `/model` | No API key set | Set `OPENROUTER_API_KEY` and restart | +| Ollama returns empty responses | Server not running or model not pulled | Run `ollama serve` and `ollama pull ` | +| LM Studio model ID mismatch | ID doesn't match server | Check LM Studio's server tab for the exact identifier | +| `developer` role error | Local server doesn't support it | Set `compat.supportsDeveloperRole: false` | +| `stream_options` error | Server doesn't support streaming usage | Set `compat.supportsUsageInStreaming: false` | +| Cost shows $0.00 | Default for custom models | Add `cost` field to model definition | diff --git a/gitbook/core-concepts/auto-mode.md b/gitbook/core-concepts/auto-mode.md new file mode 100644 index 000000000..b611f85ff --- /dev/null +++ b/gitbook/core-concepts/auto-mode.md @@ -0,0 +1,183 @@ +# Auto Mode + +Auto mode is GSD's autonomous execution engine. Run `/gsd auto`, walk away, come back to built software with clean git history. + +## Starting Auto Mode + +``` +/gsd auto +``` + +GSD reads `.gsd/STATE.md`, determines the next unit of work, creates a fresh AI session with all relevant context, and lets the AI execute. When it finishes, GSD reads disk state again and dispatches the next unit. This continues until the milestone is complete. + +## The Execution Loop + +Each slice flows through phases automatically: + +``` +Plan → Execute (per task) → Complete → Reassess Roadmap → Next Slice + ↓ (all done) + Validate Milestone +``` + +- **Plan** — scouts the codebase, researches docs, decomposes the slice into tasks +- **Execute** — runs each task in a fresh context window +- **Complete** — writes summary, UAT script, marks roadmap, commits +- **Reassess** — checks if the roadmap still makes sense after what was learned +- **Validate** — after all slices, verifies success criteria were actually met + +## Controlling Auto Mode + +### Pause + +Press **Escape**. The conversation is preserved. You can interact with the agent, inspect state, or resume. + +### Resume + +``` +/gsd auto +``` + +Auto mode reads disk state and picks up where it left off. + +### Stop + +``` +/gsd stop +``` + +Stops auto mode gracefully. Can be run from a different terminal. + +### Steer + +``` +/gsd steer +``` + +Modify plan documents during execution without stopping. Changes are picked up at the next phase boundary. + +### Capture Thoughts + +``` +/gsd capture "add rate limiting to API endpoints" +``` + +Fire-and-forget thought capture. Captures are triaged automatically between tasks without pausing execution. See [Captures & Triage](../features/captures.md). + +## Fresh Session Per Unit + +Every task gets a clean AI context window. No accumulated garbage, no quality degradation from context bloat. The dispatch prompt includes everything needed — task plans, prior summaries, decisions, dependency context — so the AI starts oriented. + +## Git Isolation + +GSD isolates milestone work using one of three modes: + +| Mode | How It Works | Best For | +|------|-------------|----------| +| `worktree` (default) | Each milestone gets its own directory and branch | Most projects | +| `branch` | Work happens in the project root on a milestone branch | Submodule-heavy repos | +| `none` | Work happens directly on your current branch | Hot-reload workflows | + +In worktree mode, all commits are squash-merged to main as one clean commit when the milestone completes. See [Git & Worktrees](../configuration/git-settings.md). + +## Crash Recovery + +If a session dies, the next `/gsd auto` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context. + +In headless mode (`gsd headless auto`), crashes trigger automatic restart with exponential backoff (5s → 10s → 30s, up to 3 attempts). Combined with crash recovery, this enables true overnight "fire and forget" execution. + +## Provider Error Recovery + +GSD handles provider errors automatically: + +| Error Type | Examples | What Happens | +|-----------|----------|-------------| +| Rate limit | 429, "too many requests" | Auto-resumes after cooldown (60s or retry-after header) | +| Server error | 500, 502, 503, "overloaded" | Auto-resumes after 30s | +| Permanent | "unauthorized", "invalid key" | Pauses — requires manual resume | + +No manual intervention needed for transient errors. + +## Timeout Supervision + +Three timeout tiers prevent runaway sessions: + +| Timeout | Default | What Happens | +|---------|---------|-------------| +| Soft | 20 min | Warns the AI to wrap up | +| Idle | 10 min | Detects stalls, intervenes | +| Hard | 30 min | Pauses auto mode | + +Configure in preferences: + +```yaml +auto_supervisor: + soft_timeout_minutes: 20 + idle_timeout_minutes: 10 + hard_timeout_minutes: 30 +``` + +## Verification Gates + +Configure shell commands that run automatically after every task: + +```yaml +verification_commands: + - npm run lint + - npm run test +verification_auto_fix: true # auto-retry on failure +verification_max_retries: 2 # max retry attempts +``` + +If verification fails, the AI sees the output and attempts to fix the issues before advancing. This ensures quality gates are enforced mechanically. + +## Slice Discussion Gate + +For projects requiring human review before each slice: + +```yaml +require_slice_discussion: true +``` + +Auto mode pauses before each slice, showing the plan for your approval before building. + +## Stuck Detection + +GSD uses sliding-window analysis to detect stuck loops — not just "same unit dispatched twice" but also cycles like A→B→A→B. On detection, GSD retries once with a diagnostic prompt. If it fails again, auto mode stops with details so you can intervene. + +## Cost Tracking + +Every unit's token usage and cost is captured, broken down by phase, slice, and model. The dashboard shows running totals and projections. Budget ceilings can pause auto mode before overspending. See [Cost Management](../features/cost-management.md). + +## Dashboard + +`Ctrl+Alt+G` or `/gsd status` shows real-time progress: + +- Current milestone, slice, and task +- Auto mode elapsed time and phase +- Per-unit cost and token breakdown +- Cost projections +- Completed and in-progress units +- Pending capture count +- Parallel worker status (when running parallel milestones) + +## HTML Reports + +After a milestone completes, GSD generates a self-contained HTML report in `.gsd/reports/` with project summary, progress tree, dependency graph, cost metrics, timeline, and changelog. Generate manually with: + +``` +/gsd export --html +/gsd export --html --all # all milestones +``` + +## Diagnostic Tools + +If auto mode has issues, GSD provides two diagnostic tools: + +- **`/gsd doctor`** — validates `.gsd/` integrity, checks referential consistency, fixes structural issues +- **`/gsd forensics`** — full post-mortem debugger with anomaly detection, unit traces, metrics analysis, and AI-guided investigation + +``` +/gsd doctor +/gsd forensics [optional problem description] +``` diff --git a/gitbook/core-concepts/project-structure.md b/gitbook/core-concepts/project-structure.md new file mode 100644 index 000000000..6aa6e9078 --- /dev/null +++ b/gitbook/core-concepts/project-structure.md @@ -0,0 +1,104 @@ +# How GSD Organizes Work + +GSD uses a three-level hierarchy to break projects into manageable pieces that an AI can execute reliably. + +## The Hierarchy + +``` +Milestone → a shippable version (4-10 slices) + Slice → one demoable vertical feature (1-7 tasks) + Task → one context-window-sized unit of work +``` + +### Milestones + +A milestone is a shippable version of your project — an MVP, a major release, or a feature set that delivers standalone value. Milestones typically contain 4-10 slices. + +Examples: +- "MVP with user auth, dashboard, and settings" +- "v2.0 with real-time collaboration and API v2" +- "Security hardening milestone" + +### Slices + +A slice is one demoable, vertical capability within a milestone. It cuts across layers (database, backend, frontend) to deliver something you could show to a user. Slices contain 1-7 tasks. + +Examples: +- "User authentication with JWT" +- "Dashboard layout with charts" +- "API rate limiting" + +### Tasks + +A task is the smallest unit of work — something that fits in one AI context window. If a task can't be completed in a single AI session, it's broken into smaller tasks. + +Examples: +- "Create the User model and migration" +- "Implement JWT middleware" +- "Build the login form component" + +## The `.gsd/` Directory + +All project state lives on disk in a `.gsd/` directory at your project root: + +``` +.gsd/ + PROJECT.md — living description of what the project is + REQUIREMENTS.md — requirement contract (active/validated/deferred) + DECISIONS.md — append-only architectural decisions log + KNOWLEDGE.md — cross-session rules, patterns, and lessons + RUNTIME.md — runtime context: API endpoints, env vars, services + STATE.md — quick-glance status of current work + PREFERENCES.md — project-level preferences (optional) + milestones/ + M001/ + M001-ROADMAP.md — slice plan with risk levels and dependencies + M001-CONTEXT.md — scope and goals from discussion phase + slices/ + S01/ + S01-PLAN.md — task decomposition for this slice + S01-SUMMARY.md — what was built and what changed + S01-UAT.md — human test script + tasks/ + T01-PLAN.md — detailed plan for this task + T01-SUMMARY.md — what the task accomplished +``` + +### Key Files + +| File | Purpose | +|------|---------| +| `PROJECT.md` | High-level project description, updated as the project evolves | +| `REQUIREMENTS.md` | Formal requirement contract — tracks what's active, validated, and deferred | +| `DECISIONS.md` | Append-only log of architectural decisions with rationale | +| `KNOWLEDGE.md` | Rules, patterns, and lessons learned across sessions — GSD reads this at the start of every task | +| `RUNTIME.md` | Runtime context like API URLs, ports, and environment variables | +| `STATE.md` | Current status at a glance — auto-generated, don't edit manually | + +## How Work Flows + +Each slice flows through phases: + +``` +Plan → Execute (per task) → Complete → Reassess Roadmap → Next Slice +``` + +1. **Plan** — GSD scouts the codebase, researches relevant docs, and decomposes the slice into tasks with clear requirements +2. **Execute** — Each task runs in a fresh AI session with focused context +3. **Complete** — GSD writes summaries, generates a UAT script, and commits +4. **Reassess** — The roadmap is checked against reality — slices may be reordered, added, or removed +5. **Next Slice** — The loop continues until all slices are done + +After all slices complete, a **milestone validation** gate checks that success criteria were actually met before sealing the milestone. + +## Adding Knowledge + +GSD maintains a knowledge base that persists across sessions. Add rules, patterns, or lessons: + +``` +/gsd knowledge rule "Always use parameterized queries for database access" +/gsd knowledge pattern "Service classes go in src/services/" +/gsd knowledge lesson "The OAuth flow requires the redirect URL to match exactly" +``` + +This knowledge is injected into every task prompt automatically. diff --git a/gitbook/core-concepts/step-mode.md b/gitbook/core-concepts/step-mode.md new file mode 100644 index 000000000..750c56728 --- /dev/null +++ b/gitbook/core-concepts/step-mode.md @@ -0,0 +1,54 @@ +# Step Mode + +Step mode is GSD's interactive, one-step-at-a-time workflow. You stay in the loop, reviewing output between each step. + +## Starting Step Mode + +``` +/gsd +``` + +GSD reads the state of your `.gsd/` directory and presents a wizard showing what's completed and what's next. It then executes one unit of work and pauses. + +## How It Works + +Step mode adapts to your project's current state: + +| State | What Happens | +|-------|-------------| +| No `.gsd/` directory | Starts a discussion flow to capture your project vision | +| Milestone exists, no roadmap | Opens a discussion or research phase for the milestone | +| Roadmap exists, slices pending | Plans the next slice or executes the next task | +| Mid-task | Resumes where you left off | + +After each unit completes, you see results and decide what to do next. This is ideal for: + +- New projects where you want to shape the architecture +- Critical work where you want to review each step +- Learning how GSD works before trusting auto mode + +## Steering During Step Mode + +Between steps, you can: + +- **Discuss** — `/gsd discuss` to talk through architecture decisions +- **Skip** — `/gsd skip` to prevent a unit from being dispatched +- **Undo** — `/gsd undo` to revert the last completed unit +- **Switch to auto** — `/gsd auto` to let GSD continue autonomously + +## When to Use Step Mode + +- **First milestone** — Review GSD's work before trusting it to run solo +- **Architectural decisions** — When you want to guide the approach +- **Unfamiliar codebases** — When you want to ensure GSD understands the project +- **High-stakes changes** — When mistakes would be costly + +## Transitioning to Auto Mode + +Once you're comfortable with GSD's approach, switch to auto mode: + +``` +/gsd auto +``` + +You can always press **Escape** to pause auto mode and return to step-by-step control. diff --git a/gitbook/features/captures.md b/gitbook/features/captures.md new file mode 100644 index 000000000..54a2a27e1 --- /dev/null +++ b/gitbook/features/captures.md @@ -0,0 +1,54 @@ +# Captures & Triage + +Captures let you fire-and-forget thoughts during auto-mode execution. Instead of pausing auto mode to steer, capture ideas, bugs, or scope changes and let GSD triage them at natural seams between tasks. + +## Quick Start + +While auto mode is running (or any time): + +``` +/gsd capture "add rate limiting to the API endpoints" +/gsd capture "the auth flow should support OAuth, not just JWT" +``` + +Captures are appended to `.gsd/CAPTURES.md` and triaged automatically between tasks. + +## How It Works + +``` +Capture → Triage → Confirm → Resolve → Resume +``` + +1. **Capture** — your thought is saved with a timestamp +2. **Triage** — between tasks, GSD classifies each capture +3. **Confirm** — you see the proposed resolution and approve or adjust +4. **Resolve** — the resolution is applied +5. **Resume** — auto mode continues + +## Classification Types + +Each capture is classified into one of five types: + +| Type | Meaning | What Happens | +|------|---------|-------------| +| `quick-task` | Small, self-contained fix | Executed immediately | +| `inject` | New task needed in current slice | Task added to active slice | +| `defer` | Important but not urgent | Deferred to roadmap reassessment | +| `replan` | Changes the current approach | Triggers slice replan | +| `note` | Informational, no action needed | Acknowledged, no changes | + +Plan-modifying resolutions (inject, replan) require your confirmation. + +## Manual Triage + +Trigger triage manually at any time: + +``` +/gsd triage +``` + +Useful when you've accumulated several captures and want to process them before the next natural seam. + +## Dashboard Integration + +The progress widget shows a pending capture count badge when captures are waiting for triage. diff --git a/gitbook/features/cost-management.md b/gitbook/features/cost-management.md new file mode 100644 index 000000000..62204c586 --- /dev/null +++ b/gitbook/features/cost-management.md @@ -0,0 +1,74 @@ +# Cost Management + +GSD tracks token usage and cost for every unit of work during auto mode. This data powers the dashboard, budget enforcement, and cost projections. + +## Viewing Costs + +**Dashboard:** Press `Ctrl+Alt+G` or type `/gsd status` for real-time cost breakdown. + +**Visualizer:** `/gsd visualize` → Metrics tab for detailed charts. + +**Aggregations:** +- By phase (research, planning, execution, completion, reassessment) +- By slice +- By model +- Project totals + +## Budget Ceiling + +Set a maximum spend: + +```yaml +budget_ceiling: 50.00 +``` + +### Enforcement Modes + +```yaml +budget_enforcement: pause # default when ceiling is set +``` + +| Mode | What Happens | +|------|-------------| +| `warn` | Log a warning, keep going | +| `pause` | Pause auto mode, wait for you | +| `halt` | Stop auto mode entirely | + +## Cost Projections + +Once at least two slices have completed, GSD projects the remaining cost: + +``` +Projected remaining: $12.40 ($6.20/slice avg × 2 remaining) +``` + +## Budget Pressure + +When approaching the budget ceiling, GSD automatically uses cheaper models: + +| Budget Used | Effect | +|------------|--------| +| < 50% | No adjustment | +| 50-75% | Standard tasks downgrade to lighter models | +| 75-90% | More aggressive downgrading | +| > 90% | Nearly everything downgrades; only complex tasks stay at standard | + +This spreads your budget across remaining work instead of exhausting it early. + +## Token Profiles & Cost + +| Profile | Typical Savings | How | +|---------|----------------|-----| +| `budget` | 40-60% | Cheaper models, phase skipping, minimal context | +| `balanced` | 10-20% | Default models, standard context | +| `quality` | 0% (baseline) | All phases, full context | + +## Tips + +- Start with `balanced` profile and a generous `budget_ceiling` to establish baseline costs +- Check `/gsd status` after a few slices to see per-slice cost averages +- Switch to `budget` for well-understood, repetitive work +- Use `quality` only when architectural decisions are being made +- Use per-phase model selection to save: Opus for planning, Sonnet for execution +- Enable `dynamic_routing` for automatic model downgrading on simple tasks +- Use `/gsd visualize` → Metrics tab to see where your budget is going diff --git a/gitbook/features/dynamic-model-routing.md b/gitbook/features/dynamic-model-routing.md new file mode 100644 index 000000000..a9903f715 --- /dev/null +++ b/gitbook/features/dynamic-model-routing.md @@ -0,0 +1,88 @@ +# Dynamic Model Routing + +Dynamic model routing automatically selects cheaper models for simple work and reserves expensive models for complex tasks. This reduces cost by 20-50% without sacrificing quality where it matters. + +## Enabling + +```yaml +dynamic_routing: + enabled: true +``` + +## How It Works + +Each unit passes through two stages: + +1. **Complexity classification** — classifies work as light, standard, or heavy +2. **Capability scoring** — within the tier, ranks models by how well they match the task + +**Key rule:** Your configured model is always the ceiling — routing never upgrades beyond what you've set. + +| Tier | Typical Work | Model Level | +|------|-------------|-------------| +| Light | Slice completion, UAT, hooks | Haiku-class | +| Standard | Research, planning, execution | Sonnet-class | +| Heavy | Replanning, roadmap reassessment | Opus-class | + +## Configuration + +```yaml +dynamic_routing: + enabled: true + tier_models: # optional: explicit model per tier + light: claude-haiku-4-5 + standard: claude-sonnet-4-6 + heavy: claude-opus-4-6 + escalate_on_failure: true # bump tier on failure (default) + budget_pressure: true # auto-downgrade near budget ceiling (default) + cross_provider: true # consider models from other providers (default) + capability_routing: true # score models by task fit (default) +``` + +### Escalate on Failure + +When a task fails at a given tier, the router escalates to the next tier on retry: Light → Standard → Heavy. This prevents cheap models from burning retries on work that needs more reasoning. + +### Budget Pressure + +When approaching the budget ceiling, the router progressively downgrades: + +| Budget Used | Effect | +|------------|--------| +| < 50% | No adjustment | +| 50-75% | Standard → Light | +| 75-90% | More aggressive | +| > 90% | Nearly everything → Light | + +### Cross-Provider + +When enabled, the router may select models from providers other than your primary, using the built-in cost table to find the cheapest model at each tier. + +### Capability Routing + +Models are scored across 7 dimensions: coding, debugging, research, reasoning, speed, long context handling, and instruction following. Different task types weight these dimensions differently — a research task prioritizes research and reasoning, while an execution task prioritizes coding and instruction following. + +Set `capability_routing: false` to revert to simple cheapest-in-tier selection. + +## Interaction with Token Profiles + +Dynamic routing and token profiles work together: + +- **Token profiles** control phase skipping and context compression +- **Dynamic routing** controls per-unit model selection + +The `budget` profile + dynamic routing provides maximum cost savings. + +## Adaptive Learning + +GSD tracks routing outcomes in `.gsd/routing-history.json`. If a tier's failure rate exceeds 20% for a given task type, future classifications are bumped up. + +Use `/gsd rate` to submit feedback: + +``` +/gsd rate over # too powerful — use cheaper next time +/gsd rate ok # just right +/gsd rate under # too weak — use stronger next time +``` + +Feedback is weighted 2x compared to automatic outcomes. diff --git a/gitbook/features/github-sync.md b/gitbook/features/github-sync.md new file mode 100644 index 000000000..aa89c8602 --- /dev/null +++ b/gitbook/features/github-sync.md @@ -0,0 +1,44 @@ +# GitHub Sync + +GSD can auto-sync milestones, slices, and tasks to GitHub Issues, PRs, and Milestones. + +## Setup + +1. Install and authenticate the `gh` CLI: + ```bash + gh auth login + ``` + +2. Enable in preferences: + ```yaml + github: + enabled: true + repo: "owner/repo" # auto-detected from git remote if omitted + labels: [gsd, auto-generated] # labels for created items + ``` + +## Commands + +| Command | Description | +|---------|-------------| +| `/github-sync bootstrap` | Initial setup — creates GitHub Milestones, Issues, and draft PRs from current `.gsd/` state | +| `/github-sync status` | Show sync mapping counts (milestones, slices, tasks) | + +## How It Works + +- Milestones → GitHub Milestones +- Slices → GitHub Issues (linked to milestone) +- Tasks → GitHub Issue checklists +- Completed slices → Draft PRs + +Sync mapping is persisted in `.gsd/.github-sync.json`. The sync is rate-limit aware — it skips when the GitHub API rate limit is low. + +## Configuration + +```yaml +github: + enabled: true + repo: "owner/repo" + labels: [gsd, auto-generated] + project: "Project ID" # optional: GitHub Project board +``` diff --git a/gitbook/features/headless.md b/gitbook/features/headless.md new file mode 100644 index 000000000..5cc1e9351 --- /dev/null +++ b/gitbook/features/headless.md @@ -0,0 +1,86 @@ +# Headless & CI Mode + +`gsd headless` runs GSD commands without a terminal UI — designed for CI pipelines, cron jobs, and scripted automation. + +## Basic Usage + +```bash +# Run auto mode +gsd headless + +# Run a single unit +gsd headless next + +# With timeout for CI +gsd headless --timeout 600000 auto + +# Force a specific phase +gsd headless dispatch plan + +# Stream all events as JSONL +gsd headless --json auto +``` + +## Creating Milestones Headlessly + +```bash +# From a context file +gsd headless new-milestone --context brief.md --auto + +# From inline text +gsd headless new-milestone --context-text "Build a REST API with auth" + +# Pipe from stdin +echo "Build a CLI tool" | gsd headless new-milestone --context - +``` + +## CLI Flags + +| Flag | Default | Description | +|------|---------|-------------| +| `--timeout N` | 300000 (5 min) | Overall timeout in milliseconds | +| `--max-restarts N` | 3 | Auto-restart on crash (0 to disable) | +| `--json` | — | Stream events as JSONL to stdout | +| `--model ID` | — | Override model for this session | +| `--context ` | — | Context file for `new-milestone` (use `-` for stdin) | +| `--context-text ` | — | Inline context for `new-milestone` | +| `--auto` | — | Chain into auto mode after milestone creation | + +## Exit Codes + +| Code | Meaning | +|------|---------| +| `0` | Complete | +| `1` | Error or timeout | +| `2` | Blocked | + +## Instant State Query + +`gsd headless query` returns a JSON snapshot of project state — no AI session, instant response (~50ms): + +```bash +gsd headless query | jq '.state.phase' +# "executing" + +gsd headless query | jq '.next' +# {"action":"dispatch","unitType":"execute-task","unitId":"M001/S01/T03"} + +gsd headless query | jq '.cost.total' +# 4.25 +``` + +Any `/gsd` subcommand works as a positional argument: `gsd headless status`, `gsd headless doctor`, etc. + +## MCP Server Mode + +`gsd --mode mcp` runs GSD as a Model Context Protocol server over stdin/stdout, exposing all GSD tools to external AI clients: + +```bash +gsd --mode mcp +``` + +Compatible with Claude Desktop, VS Code Copilot, and any MCP host. + +## Auto-Restart + +In headless mode, crashes trigger automatic restart with exponential backoff (5s → 10s → 30s cap, default 3 attempts). SIGINT/SIGTERM bypasses restart. Combined with crash recovery, this enables true overnight unattended execution. diff --git a/gitbook/features/parallel.md b/gitbook/features/parallel.md new file mode 100644 index 000000000..a94615308 --- /dev/null +++ b/gitbook/features/parallel.md @@ -0,0 +1,97 @@ +# Parallel Orchestration + +Run multiple milestones simultaneously in isolated git worktrees. Each milestone gets its own worker process, branch, and context window. + +{% hint style="info" %} +Parallel mode is off by default. Enable it in preferences to use `/gsd parallel` commands. +{% endhint %} + +## Quick Start + +1. Enable parallel mode: + ```yaml + parallel: + enabled: true + max_workers: 2 + ``` + +2. Start parallel execution: + ``` + /gsd parallel start + ``` + GSD scans milestones, checks dependencies and file overlap, shows an eligibility report, and spawns workers. + +3. Monitor: + ``` + /gsd parallel status + ``` + +4. Stop: + ``` + /gsd parallel stop + ``` + +## How It Works + +Each worker is a separate GSD process with complete isolation: + +| Resource | Isolation | +|----------|----------| +| Filesystem | Own git worktree | +| Git branch | `milestone/` | +| Context window | Separate process | +| Metrics | Own `metrics.json` | +| Crash recovery | Own `auto.lock` | + +Workers communicate with the coordinator through file-based IPC — heartbeat files and signal files in `.gsd/parallel/`. + +## Eligibility + +Before starting, GSD checks which milestones can run concurrently: + +1. **Not complete** — finished milestones are skipped +2. **Dependencies satisfied** — all `dependsOn` entries must be complete +3. **File overlap** — milestones touching the same files get a warning (but are still eligible since they run in separate worktrees) + +## Configuration + +```yaml +parallel: + enabled: false # master toggle (default: false) + max_workers: 2 # concurrent workers (1-4) + budget_ceiling: 50.00 # aggregate cost limit + merge_strategy: "per-milestone" # when to merge back + auto_merge: "confirm" # "auto", "confirm", or "manual" +``` + +## Commands + +| Command | Description | +|---------|-------------| +| `/gsd parallel start` | Analyze and start workers | +| `/gsd parallel status` | Show all workers with progress and cost | +| `/gsd parallel stop [MID]` | Stop all or a specific worker | +| `/gsd parallel pause [MID]` | Pause all or a specific worker | +| `/gsd parallel resume [MID]` | Resume paused workers | +| `/gsd parallel merge [MID]` | Merge completed milestones to main | + +## Merge Reconciliation + +When milestones complete, their changes merge back to main: + +- `.gsd/` state files are auto-resolved +- Code conflicts halt the merge — resolve manually and retry with `/gsd parallel merge ` + +## Budget Management + +When `budget_ceiling` is set, aggregate cost across all workers is tracked. When the ceiling is reached, workers are signaled to stop. + +## Troubleshooting + +| Problem | Fix | +|---------|-----| +| "Parallel mode is not enabled" | Set `parallel.enabled: true` | +| "No eligible milestones" | All milestones are complete or blocked; check `/gsd queue` | +| Worker crashed | Run `/gsd doctor --fix`, then `/gsd parallel start` | +| Merge conflicts | Resolve in `.gsd/worktrees//`, then `/gsd parallel merge ` | +| Workers seem stuck | Check if budget ceiling was reached via `/gsd parallel status` | diff --git a/gitbook/features/remote-questions.md b/gitbook/features/remote-questions.md new file mode 100644 index 000000000..2c16ef8db --- /dev/null +++ b/gitbook/features/remote-questions.md @@ -0,0 +1,90 @@ +# Remote Questions + +Remote questions let GSD ask for your input via Slack, Discord, or Telegram when running in headless auto mode. When GSD needs a decision, it posts the question to your configured channel and polls for a response. + +## Setup + +### Discord + +``` +/gsd remote discord +``` + +The wizard prompts for your bot token, validates it, lets you pick a server and channel, sends a test message, and saves the config. + +**Bot requirements:** +- A bot application with a token from the [Discord Developer Portal](https://discord.com/developers/applications) +- Bot invited to the server with: Send Messages, Read Message History, Add Reactions, View Channel +- `DISCORD_BOT_TOKEN` environment variable set + +### Slack + +``` +/gsd remote slack +``` + +**Bot requirements:** +- A Slack app with a bot token (`xoxb-...`) from [Slack API](https://api.slack.com/apps) +- Bot invited to the target channel +- Scopes: `chat:write`, `reactions:read`, `reactions:write`, `channels:read`, `groups:read`, `channels:history`, `groups:history` + +### Telegram + +``` +/gsd remote telegram +``` + +**Bot requirements:** +- A bot token from [@BotFather](https://t.me/BotFather) +- Bot added to the target group chat +- `TELEGRAM_BOT_TOKEN` environment variable set + +## Configuration + +```yaml +remote_questions: + channel: discord # or slack or telegram + channel_id: "1234567890123456789" + timeout_minutes: 5 # 1-30, default 5 + poll_interval_seconds: 5 # 2-30, default 5 +``` + +## How It Works + +1. GSD encounters a decision point during auto mode +2. The question is posted to your channel as a rich message +3. GSD polls for a response at the configured interval +4. You respond by: + - **Reacting** with a number emoji (1️⃣, 2️⃣, etc.) for single-question prompts + - **Replying** with a number, comma-separated numbers, or free text +5. GSD picks up the response and continues +6. A ✅ reaction confirms receipt + +### Response Formats + +**Single question:** React with a number emoji, reply with a number, or reply with free text. + +**Multiple questions:** Reply with semicolons (`1;2;custom text`) or newlines (one answer per line). + +### Timeouts + +If no response arrives within `timeout_minutes`, GSD continues with a timeout result — typically making a conservative default choice. + +## Commands + +| Command | Description | +|---------|-------------| +| `/gsd remote` | Show menu and current status | +| `/gsd remote slack` | Set up Slack | +| `/gsd remote discord` | Set up Discord | +| `/gsd remote telegram` | Set up Telegram | +| `/gsd remote status` | Show current config | +| `/gsd remote disconnect` | Remove configuration | + +## Troubleshooting + +| Problem | Fix | +|---------|-----| +| "Remote auth failed" | Verify bot token is correct and not expired | +| "Could not send to channel" | Check bot has Send Messages permission; invite bot to channel | +| No response detected | Make sure you're replying to the prompt message, not posting a new one | diff --git a/gitbook/features/skills.md b/gitbook/features/skills.md new file mode 100644 index 000000000..4a9fd46b7 --- /dev/null +++ b/gitbook/features/skills.md @@ -0,0 +1,120 @@ +# Skills + +Skills are specialized instruction sets that GSD loads when the task matches. They provide domain-specific guidance — coding patterns, framework idioms, testing strategies, and tool usage. + +Skills follow the open [Agent Skills standard](https://agentskills.io/) and work across multiple AI agents, not just GSD. + +## Skill Directories + +| Location | Scope | Description | +|----------|-------|------------| +| `~/.agents/skills/` | Global | Shared across all projects | +| `.agents/skills/` (project root) | Project | Project-specific, committable to git | + +Global skills take precedence when names collide. + +## Installing Skills + +Skills are installed via the [skills.sh CLI](https://skills.sh): + +```bash +# Interactive — choose skills and target agents +npx skills add dpearson2699/swift-ios-skills + +# Install specific skills +npx skills add dpearson2699/swift-ios-skills --skill swift-concurrency --skill swiftui-patterns -y + +# Install all from a repo +npx skills add dpearson2699/swift-ios-skills --all + +# Check for updates +npx skills check + +# Update installed skills +npx skills update +``` + +## Onboarding Catalog + +During `gsd init`, GSD detects your project's tech stack and recommends relevant skill packs: + +- **Swift** — SwiftUI, Swift Core, concurrency, Charts, Testing +- **iOS** — App Intents, Widgets, StoreKit, MapKit, Core ML, Vision, accessibility +- **Web** — React, React Native, frontend design, accessibility +- **Languages** — Rust, Python, Go patterns and best practices +- **General** — Document handling (PDF, DOCX, XLSX) + +## Skill Discovery + +The `skill_discovery` preference controls how GSD finds skills during auto mode: + +| Mode | Behavior | +|------|----------| +| `auto` | Skills found and applied automatically | +| `suggest` | Skills identified but require confirmation (default) | +| `off` | No skill discovery | + +## Skill Preferences + +Control which skills are used: + +```yaml +always_use_skills: + - debug-like-expert +prefer_skills: + - frontend-design +avoid_skills: + - security-docker +skill_rules: + - when: task involves authentication + use: [clerk] + - when: frontend styling work + prefer: [frontend-design] +``` + +## Creating Custom Skills + +Create your own skill by adding a directory with a `SKILL.md` file: + +``` +~/.agents/skills/my-skill/ + SKILL.md — instructions for the AI + references/ — optional reference files +``` + +The `SKILL.md` contains instructions the AI follows when the skill is active. + +### Project-Local Skills + +Place skills in your project root for project-specific guidance: + +``` +.agents/skills/my-project-skill/ + SKILL.md +``` + +Project-local skills can be committed to git so team members share the same skill set. + +## Skill Health Dashboard + +Track skill performance: + +``` +/gsd skill-health # overview table +/gsd skill-health rust-core # detailed view for one skill +/gsd skill-health --stale 30 # skills unused for 30+ days +/gsd skill-health --declining # skills with falling success rates +``` + +The dashboard flags: +- Success rate below 70% over the last 10 uses +- Token usage rising 20%+ compared to previous window +- Skills unused beyond the configured threshold + +### Staleness Detection + +```yaml +skill_staleness_days: 60 # flag skills unused for 60+ days (0 to disable) +``` + +Stale skills are excluded from automatic matching but remain available for explicit use. diff --git a/gitbook/features/teams.md b/gitbook/features/teams.md new file mode 100644 index 000000000..44dac0c57 --- /dev/null +++ b/gitbook/features/teams.md @@ -0,0 +1,91 @@ +# Working in Teams + +GSD supports multi-user workflows where several developers work on the same repository concurrently. + +## Quick Setup + +The simplest way: set team mode in your project preferences. + +```yaml +# .gsd/PREFERENCES.md (committed to git) +--- +version: 1 +mode: team +--- +``` + +This enables unique milestone IDs, push branches, pre-merge checks, and other team-appropriate defaults in one setting. + +## What Team Mode Does + +| Setting | Effect | +|---------|--------| +| `unique_milestone_ids` | IDs like `M001-eh88as` instead of `M001` — no collisions | +| `git.push_branches` | Milestone branches are pushed to remote | +| `git.pre_merge_check` | Validation runs before merging | + +You can override individual settings on top of `mode: team`. + +## Configure `.gitignore` + +Share planning artifacts while keeping runtime files local: + +```bash +# Runtime files (per-developer, gitignore these) +.gsd/auto.lock +.gsd/completed-units.json +.gsd/STATE.md +.gsd/metrics.json +.gsd/activity/ +.gsd/runtime/ +.gsd/worktrees/ +.gsd/milestones/**/continue.md +.gsd/milestones/**/*-CONTINUE.md +``` + +**What gets shared** (committed to git): +- `.gsd/PREFERENCES.md` — project preferences +- `.gsd/PROJECT.md` — living project description +- `.gsd/REQUIREMENTS.md` — requirement contract +- `.gsd/DECISIONS.md` — architectural decisions +- `.gsd/milestones/` — roadmaps, plans, summaries, research + +**What stays local** (gitignored): +- Lock files, metrics, state, activity logs, worktrees + +## Commit the Config + +```bash +git add .gsd/PREFERENCES.md +git commit -m "chore: enable GSD team workflow" +``` + +## Keeping `.gsd/` Local + +For teams where only some members use GSD: + +```yaml +git: + commit_docs: false +``` + +This gitignores `.gsd/` entirely. You get structured planning without affecting teammates. + +## Parallel Development + +Multiple developers can run auto mode simultaneously on different milestones. Each developer: + +- Gets their own worktree (`.gsd/worktrees//`) +- Works on a unique `milestone/` branch +- Squash-merges to main independently + +Milestone dependencies can be declared: + +```yaml +# In M00X-CONTEXT.md frontmatter +--- +depends_on: [M001-eh88as] +--- +``` + +GSD enforces that dependent milestones complete before starting downstream work. diff --git a/gitbook/features/token-optimization.md b/gitbook/features/token-optimization.md new file mode 100644 index 000000000..c89493618 --- /dev/null +++ b/gitbook/features/token-optimization.md @@ -0,0 +1,108 @@ +# Token Optimization + +GSD's token optimization system can reduce token usage by 40-60% without sacrificing output quality. It has three pillars: **token profiles**, **context compression**, and **complexity-based task routing**. + +## Token Profiles + +A token profile coordinates model selection, phase skipping, and context compression with a single setting: + +```yaml +token_profile: balanced +``` + +### `budget` — Maximum Savings (40-60%) + +| Setting | Value | +|---------|-------| +| Planning model | Sonnet | +| Execution model | Sonnet | +| Simple task model | Haiku | +| Milestone research | Skipped | +| Slice research | Skipped | +| Roadmap reassessment | Skipped | +| Context level | Minimal | + +Best for: prototyping, small projects, well-understood codebases. + +### `balanced` — Smart Defaults (default) + +| Setting | Value | +|---------|-------| +| All models | User's default | +| Milestone research | Runs | +| Slice research | Skipped | +| Roadmap reassessment | Runs | +| Context level | Standard | + +Best for: most projects, day-to-day development. + +### `quality` — Full Context + +| Setting | Value | +|---------|-------| +| All models | User's configured defaults | +| All phases | Run | +| Context level | Full | + +Best for: complex architectures, greenfield projects, critical work. + +## Context Compression + +Each profile controls how much context is pre-loaded into AI prompts: + +| Profile | What's Included | +|---------|----------------| +| `budget` | Task plan and essential prior summaries only | +| `balanced` | Task plan, summaries, slice plan, roadmap excerpt | +| `quality` | Everything — all plans, summaries, decisions, requirements | + +## Complexity-Based Task Routing + +GSD classifies each task by complexity and routes it to an appropriate model: + +| Complexity | Indicators | Model Level | +|-----------|------------|-------------| +| Simple | ≤3 steps, ≤3 files, short description | Haiku-class | +| Standard | 4-7 steps, 4-7 files | Sonnet-class | +| Complex | ≥8 steps, ≥8 files, complexity keywords | Opus-class | + +**Complexity keywords** that prevent simple classification: `refactor`, `migrate`, `integrate`, `architect`, `security`, `performance`, `concurrent`, `distributed`, and others. + +{% hint style="info" %} +Dynamic routing requires `models` configured in your preferences and `dynamic_routing.enabled: true`. See [Dynamic Model Routing](dynamic-model-routing.md). +{% endhint %} + +## Overriding Profile Defaults + +The `token_profile` sets defaults, but explicit preferences always win: + +```yaml +token_profile: budget +phases: + skip_research: false # override: keep research +models: + planning: claude-opus-4-6 # override: use Opus for planning +``` + +## Adaptive Learning + +GSD tracks success and failure of tier assignments over time. If a model tier's failure rate exceeds 20% for a given task type, future tasks of that type are bumped to a higher tier. + +Submit manual feedback with: + +``` +/gsd rate over # model was overpowered — use cheaper next time +/gsd rate ok # model was appropriate +/gsd rate under # model was too weak — use stronger next time +``` + +## Observation Masking + +During auto mode, old tool results are replaced with lightweight placeholders before each AI call. This reduces token usage between compactions with zero overhead. + +```yaml +context_management: + observation_masking: true # default: true + observation_mask_turns: 8 # keep results from last 8 turns + tool_result_max_chars: 800 # truncate large tool outputs +``` diff --git a/gitbook/features/visualizer.md b/gitbook/features/visualizer.md new file mode 100644 index 000000000..4155ec144 --- /dev/null +++ b/gitbook/features/visualizer.md @@ -0,0 +1,82 @@ +# Workflow Visualizer + +The workflow visualizer is a full-screen terminal overlay showing project progress, dependencies, cost metrics, and execution timeline. + +## Opening + +``` +/gsd visualize +``` + +Or configure automatic display after milestone completion: + +```yaml +auto_visualize: true +``` + +## Tabs + +Switch tabs with `Tab`, `1`-`4`, or arrow keys. + +### 1. Progress + +A tree view of milestones, slices, and tasks with completion status: + +``` +M001: User Management 3/6 tasks + ✅ S01: Auth module 3/3 tasks + ✅ T01: Core types + ✅ T02: JWT middleware + ✅ T03: Login flow + ⏳ S02: User dashboard 1/2 tasks + ✅ T01: Layout component + ⬜ T02: Profile page +``` + +### 2. Dependencies + +An ASCII dependency graph showing slice relationships: + +``` +S01 ──→ S02 ──→ S04 + └───→ S03 ──↗ +``` + +### 3. Metrics + +Bar charts showing cost and token usage: + +- By phase (research, planning, execution, completion) +- By slice (with running totals) +- By model (which models consumed the most budget) + +### 4. Timeline + +Chronological execution history: unit type, timestamps, duration, model, and token counts. + +## Controls + +| Key | Action | +|-----|--------| +| `Tab` | Next tab | +| `Shift+Tab` | Previous tab | +| `1`-`4` | Jump to tab | +| `↑`/`↓` | Scroll | +| `Escape` / `q` | Close | + +The visualizer auto-refreshes every 2 seconds, staying current alongside running auto mode. + +## HTML Reports + +For shareable reports outside the terminal: + +``` +/gsd export --html # current milestone +/gsd export --html --all # all milestones +``` + +Generates self-contained HTML files in `.gsd/reports/` with progress tree, dependency graph, cost charts, timeline, and changelog. All CSS and JS are inlined — no external dependencies. Printable to PDF from any browser. + +```yaml +auto_report: true # auto-generate after milestone completion (default) +``` diff --git a/gitbook/features/web-interface.md b/gitbook/features/web-interface.md new file mode 100644 index 000000000..6870ffc41 --- /dev/null +++ b/gitbook/features/web-interface.md @@ -0,0 +1,37 @@ +# Web Interface + +GSD includes a browser-based interface for project management and real-time progress monitoring. + +## Quick Start + +```bash +gsd --web +``` + +This starts a local web server and opens the dashboard in your default browser. + +## CLI Flags + +```bash +gsd --web --host 0.0.0.0 --port 8080 --allowed-origins "https://example.com" +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--host` | `localhost` | Bind address | +| `--port` | `3000` | Port | +| `--allowed-origins` | (none) | Comma-separated CORS origins | + +## Features + +- **Project management** — view milestones, slices, and tasks in a visual dashboard +- **Real-time progress** — live updates as auto mode executes +- **Multi-project support** — manage multiple projects from one browser tab via `?project=` URL parameter +- **Change project root** — switch directories from the web UI without restarting +- **Onboarding flow** — API key setup and provider configuration in the browser +- **Model selection** — switch models and providers from the web UI + +## Platform Notes + +- **macOS/Linux** — Full support +- **Windows** — Web build is skipped due to Next.js compatibility issues; CLI remains fully functional diff --git a/gitbook/features/workflow-templates.md b/gitbook/features/workflow-templates.md new file mode 100644 index 000000000..45246a33b --- /dev/null +++ b/gitbook/features/workflow-templates.md @@ -0,0 +1,45 @@ +# Workflow Templates + +Workflow templates are pre-built patterns for common development tasks. Instead of setting up a full milestone for a quick bugfix or spike, use a template to get started immediately. + +## Using Templates + +``` +/gsd start # pick from available templates +/gsd start resume # resume an in-progress workflow +``` + +## Available Templates + +| Template | Purpose | +|----------|---------| +| `bugfix` | Fix a specific bug with diagnosis and verification | +| `spike` | Time-boxed investigation or prototype | +| `feature` | Standard feature development | +| `hotfix` | Urgent production fix | +| `refactor` | Code restructuring and cleanup | +| `security-audit` | Security review and remediation | +| `dep-upgrade` | Dependency update and migration | +| `full-project` | Complete project from scratch | + +## Listing and Inspecting + +``` +/gsd templates # list all available templates +/gsd templates info # show details for a template +``` + +## Custom Workflows + +Create your own workflow definitions: + +``` +/gsd workflow new # create a new workflow YAML +/gsd workflow run # start a workflow run +/gsd workflow list # list active runs +/gsd workflow validate # validate definition +/gsd workflow pause # pause running workflow +/gsd workflow resume # resume paused workflow +``` + +Custom workflows are defined in YAML and can specify phases, dependencies, and configuration for each step. diff --git a/gitbook/getting-started/choosing-a-model.md b/gitbook/getting-started/choosing-a-model.md new file mode 100644 index 000000000..64b2e3aad --- /dev/null +++ b/gitbook/getting-started/choosing-a-model.md @@ -0,0 +1,94 @@ +# Choosing a Model + +GSD auto-selects a default model after you log in to a provider. You can switch models at any time. + +## Switch Models + +Inside a GSD session, type: + +``` +/model +``` + +This opens an interactive picker showing all available models from your configured providers. + +## Per-Phase Models + +Different phases of work have different requirements. You can assign specific models to each phase in your preferences: + +```yaml +models: + research: claude-sonnet-4-6 # scouting and research + planning: claude-opus-4-6 # architectural decisions + execution: claude-sonnet-4-6 # writing code + execution_simple: claude-haiku-4-5 # simple tasks (docs, config) + completion: claude-sonnet-4-6 # summaries and wrap-up + subagent: claude-sonnet-4-6 # delegated sub-tasks +``` + +Omit a key to use whatever model is currently active for that phase. + +## Model Fallbacks + +If a model is unavailable (provider down, rate limited, credits exhausted), GSD can automatically fall back to another: + +```yaml +models: + planning: + model: claude-opus-4-6 + fallbacks: + - openrouter/z-ai/glm-5 + - openrouter/moonshotai/kimi-k2.5 +``` + +Fallbacks are tried in order until one works. + +## Token Profiles + +Token profiles coordinate model selection, phase skipping, and context compression with a single setting: + +| Profile | Cost Savings | Best For | +|---------|-------------|----------| +| `budget` | 40-60% | Prototyping, small projects, well-understood codebases | +| `balanced` | 10-20% | Most projects, day-to-day development (default) | +| `quality` | 0% (baseline) | Complex architectures, greenfield projects, critical work | + +```yaml +token_profile: balanced +``` + +See [Token Optimization](../features/token-optimization.md) for details. + +## Dynamic Model Routing + +When enabled, GSD automatically picks cheaper models for simple tasks and reserves expensive ones for complex work: + +```yaml +dynamic_routing: + enabled: true +``` + +A documentation fix gets Haiku. An architectural refactor gets Opus. Your configured model is always the ceiling — routing never upgrades beyond what you've set. + +See [Dynamic Model Routing](../features/dynamic-model-routing.md) for the full guide. + +## Supported Providers + +GSD supports 20+ providers out of the box. See [Provider Setup](../configuration/providers.md) for setup instructions: + +| Provider | Auth Method | +|----------|-------------| +| Anthropic (Claude) | OAuth or API key | +| OpenAI | API key | +| Google Gemini | API key | +| OpenRouter | API key | +| Groq | API key | +| xAI (Grok) | API key | +| Mistral | API key | +| GitHub Copilot | OAuth | +| Amazon Bedrock | IAM credentials | +| Vertex AI | ADC | +| Azure OpenAI | API key | +| Ollama | Local (no auth) | +| LM Studio | Local (no auth) | +| vLLM / SGLang | Local (no auth) | diff --git a/gitbook/getting-started/first-project.md b/gitbook/getting-started/first-project.md new file mode 100644 index 000000000..dd0551035 --- /dev/null +++ b/gitbook/getting-started/first-project.md @@ -0,0 +1,128 @@ +# Your First Project + +## Launch GSD + +Open a terminal in any project directory (or an empty one) and run: + +```bash +gsd +``` + +GSD shows a welcome screen with your version, active model, and available tool keys. + +## Start a Discussion + +Type `/gsd` to enter step mode. GSD reads the state of your project directory and determines the next logical action: + +- **No `.gsd/` directory** — starts a discussion flow to capture your project vision +- **Milestone exists, no roadmap** — discuss or research the milestone +- **Roadmap exists, slices pending** — plan the next slice or execute a task +- **Mid-task** — resume where you left off + +For a new project, GSD will ask you to describe what you want to build. Talk through your vision — GSD captures requirements, architectural decisions, and scope. + +## The Project Hierarchy + +After discussion, GSD organizes your work into: + +``` +Milestone → a shippable version (4-10 slices) + Slice → one demoable feature (1-7 tasks) + Task → one context-window-sized unit of work +``` + +The key rule: **a task must fit in one AI context window.** If it can't, it becomes two tasks. + +## Run Auto Mode + +Once you have a milestone and roadmap, let GSD take the wheel: + +``` +/gsd auto +``` + +GSD autonomously: +1. **Plans** each slice — scouts the codebase, researches docs, decomposes into tasks +2. **Executes** each task — writes code in a fresh AI session +3. **Completes** the slice — writes summaries, commits with meaningful messages +4. **Reassesses** the roadmap — checks if the plan still makes sense +5. **Repeats** until the milestone is done + +## The Two-Terminal Workflow + +The recommended approach: auto mode in one terminal, steering from another. + +**Terminal 1 — let it build:** + +```bash +gsd +/gsd auto +``` + +**Terminal 2 — steer while it works:** + +```bash +gsd +/gsd discuss # talk through architecture decisions +/gsd status # check progress +/gsd queue # queue the next milestone +/gsd capture "add rate limiting to the API" # fire-and-forget thought +``` + +Both terminals read and write the same `.gsd/` files. Decisions in terminal 2 are picked up at the next phase boundary automatically. + +## Check Progress + +Press `Ctrl+Alt+G` or type `/gsd status` to see the dashboard: + +- Current milestone, slice, and task +- Elapsed time and phase +- Per-unit cost and token breakdown +- Completed and in-progress work + +## Resume a Session + +```bash +gsd --continue # or gsd -c +``` + +Resumes the most recent session for the current directory. + +To browse and pick from all saved sessions: + +```bash +gsd sessions +``` + +Shows each session's date, message count, and preview so you can choose which to resume. + +## What's on Disk + +All state lives in `.gsd/` inside your project: + +``` +.gsd/ + PROJECT.md — what the project is + REQUIREMENTS.md — requirement contract + DECISIONS.md — architectural decisions + KNOWLEDGE.md — cross-session rules and patterns + STATE.md — quick-glance status + milestones/ + M001/ + M001-ROADMAP.md — slice plan with dependencies + M001-CONTEXT.md — scope and goals + slices/ + S01/ + S01-PLAN.md — task decomposition + S01-SUMMARY.md — what happened + S01-UAT.md — test script + tasks/ + T01-PLAN.md + T01-SUMMARY.md +``` + +## Next Steps + +- [Auto Mode](../core-concepts/auto-mode.md) — deep dive into autonomous execution +- [Preferences](../configuration/preferences.md) — model selection, timeouts, budgets +- [Commands](../reference/commands.md) — all commands and shortcuts diff --git a/gitbook/getting-started/installation.md b/gitbook/getting-started/installation.md new file mode 100644 index 000000000..e1e72fe80 --- /dev/null +++ b/gitbook/getting-started/installation.md @@ -0,0 +1,84 @@ +# Installation + +## Install GSD + +```bash +npm install -g gsd-pi +``` + +Requires **Node.js 22.0.0 or later** (24 LTS recommended) and **Git**. + +{% hint style="info" %} +**`command not found: gsd`?** Your shell may not have npm's global bin directory in `$PATH`. Run `npm prefix -g` to find it, then add `$(npm prefix -g)/bin` to your PATH. See [Troubleshooting](../reference/troubleshooting.md) for details. +{% endhint %} + +GSD checks for updates once every 24 hours. When a new version is available, you'll see a prompt at startup with the option to update immediately or skip. You can also update from within a session with `/gsd update`. + +## Set Up Your LLM Provider + +Launch GSD for the first time: + +```bash +gsd +``` + +The setup wizard walks you through: + +1. **LLM Provider** — choose from 20+ providers (Anthropic, OpenAI, Google, OpenRouter, GitHub Copilot, Amazon Bedrock, Azure, and more). OAuth flows handle Claude Max and Copilot subscriptions automatically; otherwise paste an API key. +2. **Tool API Keys** (optional) — Brave Search, Context7, Jina, Slack, Discord. Press Enter to skip any. + +Re-run the wizard anytime with: + +```bash +gsd config +``` + +For detailed provider setup, see [Provider Setup](../configuration/providers.md). + +## Set Up API Keys for Tools + +If you use a non-Anthropic model, you may need a search API key for web search. Run `/gsd config` inside any GSD session to set keys globally — they're saved to `~/.gsd/agent/auth.json` and apply to all projects. + +| Tool | Purpose | Get a Key | +|------|---------|-----------| +| Tavily Search | Web search for non-Anthropic models | [tavily.com](https://tavily.com/app/api-keys) | +| Brave Search | Web search for non-Anthropic models | [brave.com](https://brave.com/search/api) | +| Context7 Docs | Library documentation lookup | [context7.com](https://context7.com/dashboard) | + +Anthropic models have built-in web search and don't need these keys. + +## VS Code Extension + +GSD is also available as a VS Code extension. Install from the marketplace (publisher: FluxLabs) or search for "GSD" in VS Code extensions. + +The extension provides: + +- **`@gsd` chat participant** — talk to the agent in VS Code Chat +- **Sidebar dashboard** — connection status, model info, token usage, quick actions +- **Full command palette** — start/stop agent, switch models, export sessions + +The CLI (`gsd-pi`) must be installed first — the extension connects to it via RPC. + +## Web Interface + +GSD also has a browser-based interface: + +```bash +gsd --web +``` + +This starts a local web server with a visual dashboard, real-time progress, and multi-project support. See [Web Interface](../features/web-interface.md) for details. + +## Alternative Binary Name + +If the `gsd` command conflicts with another tool (e.g., the oh-my-zsh git plugin aliases `gsd` to `git svn dcommit`), use the alternative: + +```bash +gsd-cli +``` + +Both `gsd` and `gsd-cli` point to the same binary. To remove the conflict permanently, add this to your `~/.zshrc`: + +```bash +unalias gsd 2>/dev/null +``` diff --git a/gitbook/reference/cli-flags.md b/gitbook/reference/cli-flags.md new file mode 100644 index 000000000..a1de87f37 --- /dev/null +++ b/gitbook/reference/cli-flags.md @@ -0,0 +1,61 @@ +# CLI Flags + +## Starting GSD + +| Flag | Description | +|------|-------------| +| `gsd` | Start a new interactive session | +| `gsd --continue` (`-c`) | Resume the most recent session | +| `gsd --model ` | Override the default model for this session | +| `gsd --web [path]` | Start browser-based web interface | +| `gsd --worktree` (`-w`) [name] | Start in a git worktree | +| `gsd --no-session` | Disable session persistence | +| `gsd --extension ` | Load an additional extension (repeatable) | +| `gsd --append-system-prompt ` | Append text to the system prompt | +| `gsd --tools ` | Comma-separated tools to enable | +| `gsd --version` (`-v`) | Print version and exit | +| `gsd --help` (`-h`) | Print help and exit | +| `gsd --debug` | Enable diagnostic logging | + +## Non-Interactive Modes + +| Flag | Description | +|------|-------------| +| `gsd --print "msg"` (`-p`) | Single-shot prompt mode (no TUI) | +| `gsd --mode ` | Output mode for non-interactive use | + +## Session Management + +| Command | Description | +|---------|-------------| +| `gsd sessions` | Interactive session picker — list and resume saved sessions | +| `gsd --list-models [search]` | List available models and exit | + +## Configuration + +| Command | Description | +|---------|-------------| +| `gsd config` | Set up global API keys | +| `gsd update` | Update to the latest version | + +## Headless Mode + +| Flag | Description | +|------|-------------| +| `gsd headless` | Run without TUI | +| `gsd headless --timeout N` | Timeout in ms (default: 300000) | +| `gsd headless --max-restarts N` | Auto-restart on crash (default: 3) | +| `gsd headless --json` | Stream events as JSONL | +| `gsd headless --model ID` | Override model | +| `gsd headless --context ` | Context file for `new-milestone` | +| `gsd headless --context-text ` | Inline context for `new-milestone` | +| `gsd headless --auto` | Chain into auto mode after milestone creation | +| `gsd headless query` | Instant JSON state snapshot (~50ms) | + +## Web Interface + +| Flag | Default | Description | +|------|---------|-------------| +| `--host` | `localhost` | Bind address | +| `--port` | `3000` | Port | +| `--allowed-origins` | (none) | CORS origins | diff --git a/gitbook/reference/commands.md b/gitbook/reference/commands.md new file mode 100644 index 000000000..e042723da --- /dev/null +++ b/gitbook/reference/commands.md @@ -0,0 +1,128 @@ +# Commands + +## Session Commands + +| Command | Description | +|---------|-------------| +| `/gsd` | Step mode — execute one unit at a time | +| `/gsd auto` | Autonomous mode — research, plan, execute, commit, repeat | +| `/gsd quick` | Quick task with GSD guarantees but no full planning | +| `/gsd stop` | Stop auto mode gracefully | +| `/gsd pause` | Pause auto mode (preserves state) | +| `/gsd steer` | Modify plan documents during execution | +| `/gsd discuss` | Discuss architecture and decisions | +| `/gsd status` | Progress dashboard | +| `/gsd widget` | Cycle dashboard widget: full / small / min / off | +| `/gsd queue` | Queue and reorder future milestones | +| `/gsd capture` | Fire-and-forget thought capture | +| `/gsd triage` | Manually trigger capture triage | +| `/gsd dispatch` | Dispatch a specific phase directly | +| `/gsd history` | View execution history (supports `--cost`, `--phase`, `--model` filters) | +| `/gsd forensics` | Full debugger for auto-mode failures | +| `/gsd cleanup` | Clean up state files and stale worktrees | +| `/gsd visualize` | Open workflow visualizer | +| `/gsd export --html` | Generate HTML report for current milestone | +| `/gsd export --html --all` | Generate reports for all milestones | +| `/gsd update` | Update GSD to the latest version | +| `/gsd knowledge` | Add persistent project knowledge | +| `/gsd fast` | Toggle service tier for supported models | +| `/gsd rate` | Rate last unit's model tier (over/ok/under) | +| `/gsd changelog` | Show release notes | +| `/gsd logs` | Browse activity and debug logs | +| `/gsd remote` | Control remote auto-mode | +| `/gsd help` | Show all available commands | + +## Configuration & Diagnostics + +| Command | Description | +|---------|-------------| +| `/gsd prefs` | Preferences wizard | +| `/gsd mode` | Switch workflow mode (solo/team) | +| `/gsd config` | Re-run provider setup wizard | +| `/gsd keys` | API key manager | +| `/gsd doctor` | Runtime health checks with auto-fix | +| `/gsd inspect` | Show database diagnostics | +| `/gsd init` | Project init wizard | +| `/gsd setup` | Global setup status | +| `/gsd skill-health` | Skill lifecycle dashboard | +| `/gsd hooks` | Show configured hooks | +| `/gsd migrate` | Migrate v1 `.planning` to `.gsd` format | + +## Milestone Management + +| Command | Description | +|---------|-------------| +| `/gsd new-milestone` | Create a new milestone | +| `/gsd skip` | Prevent a unit from auto-mode dispatch | +| `/gsd undo` | Revert last completed unit | +| `/gsd undo-task` | Reset a specific task's completion state | +| `/gsd reset-slice` | Reset a slice and all its tasks | +| `/gsd park` | Park a milestone (skip without deleting) | +| `/gsd unpark` | Reactivate a parked milestone | + +## Parallel Orchestration + +| Command | Description | +|---------|-------------| +| `/gsd parallel start` | Analyze and start parallel workers | +| `/gsd parallel status` | Show worker state and progress | +| `/gsd parallel stop [MID]` | Stop workers | +| `/gsd parallel pause [MID]` | Pause workers | +| `/gsd parallel resume [MID]` | Resume workers | +| `/gsd parallel merge [MID]` | Merge completed milestones | + +## Workflow Templates + +| Command | Description | +|---------|-------------| +| `/gsd start` | Start a workflow template | +| `/gsd start resume` | Resume an in-progress workflow | +| `/gsd templates` | List available templates | +| `/gsd templates info ` | Show template details | + +## Custom Workflows + +| Command | Description | +|---------|-------------| +| `/gsd workflow new` | Create a workflow definition | +| `/gsd workflow run ` | Start a workflow run | +| `/gsd workflow list` | List workflow runs | +| `/gsd workflow validate ` | Validate a workflow YAML | +| `/gsd workflow pause` | Pause workflow auto-mode | +| `/gsd workflow resume` | Resume paused workflow | + +## Extensions + +| Command | Description | +|---------|-------------| +| `/gsd extensions list` | List all extensions | +| `/gsd extensions enable ` | Enable an extension | +| `/gsd extensions disable ` | Disable an extension | +| `/gsd extensions info ` | Show extension details | + +## GitHub Sync + +| Command | Description | +|---------|-------------| +| `/github-sync bootstrap` | Initial GitHub sync setup | +| `/github-sync status` | Show sync mapping counts | + +## Session Management + +| Command | Description | +|---------|-------------| +| `/clear` | Start a new session | +| `/exit` | Graceful shutdown | +| `/model` | Switch the active model | +| `/login` | Log in to an LLM provider | +| `/thinking` | Toggle thinking level | +| `/voice` | Toggle speech-to-text | +| `/worktree` (`/wt`) | Git worktree management | + +## In-Session Update + +``` +/gsd update +``` + +Checks npm for a newer version and installs it without leaving the session. diff --git a/gitbook/reference/environment-variables.md b/gitbook/reference/environment-variables.md new file mode 100644 index 000000000..c23af72df --- /dev/null +++ b/gitbook/reference/environment-variables.md @@ -0,0 +1,56 @@ +# Environment Variables + +## GSD Configuration + +| Variable | Default | Description | +|----------|---------|-------------| +| `GSD_HOME` | `~/.gsd` | Global GSD directory. All paths derive from this unless individually overridden. | +| `GSD_PROJECT_ID` | (auto-hash) | Override automatic project identity hash. Useful for CI/CD or sharing state across repo clones. | +| `GSD_STATE_DIR` | `$GSD_HOME` | Per-project state root. Controls where `projects//` directories are created. | +| `GSD_CODING_AGENT_DIR` | `$GSD_HOME/agent` | Agent directory for extensions, auth, and managed resources. | +| `GSD_FETCH_ALLOWED_URLS` | (none) | Comma-separated hostnames exempt from internal URL blocking. | +| `GSD_ALLOWED_COMMAND_PREFIXES` | (built-in) | Comma-separated command prefixes allowed for value resolution. | +| `GSD_WEB_PROJECT_CWD` | — | Default project path for `gsd --web` when `?project=` is not specified. | + +## LLM Provider Keys + +| Variable | Provider | +|----------|----------| +| `ANTHROPIC_API_KEY` | Anthropic (Claude) | +| `OPENAI_API_KEY` | OpenAI | +| `GEMINI_API_KEY` | Google Gemini | +| `OPENROUTER_API_KEY` | OpenRouter | +| `GROQ_API_KEY` | Groq | +| `XAI_API_KEY` | xAI (Grok) | +| `MISTRAL_API_KEY` | Mistral | +| `GH_TOKEN` | GitHub Copilot | +| `AWS_PROFILE` | Amazon Bedrock (named profile) | +| `AWS_ACCESS_KEY_ID` | Amazon Bedrock (IAM keys) | +| `AWS_SECRET_ACCESS_KEY` | Amazon Bedrock (IAM keys) | +| `AWS_REGION` | Amazon Bedrock (region) | +| `AWS_BEARER_TOKEN_BEDROCK` | Amazon Bedrock (bearer token) | +| `ANTHROPIC_VERTEX_PROJECT_ID` | Vertex AI | +| `GOOGLE_APPLICATION_CREDENTIALS` | Vertex AI (ADC) | +| `AZURE_OPENAI_API_KEY` | Azure OpenAI | + +## Tool API Keys + +| Variable | Purpose | +|----------|---------| +| `TAVILY_API_KEY` | Tavily web search | +| `BRAVE_API_KEY` | Brave web search | +| `CONTEXT7_API_KEY` | Context7 documentation lookup | +| `DISCORD_BOT_TOKEN` | Discord remote questions | +| `TELEGRAM_BOT_TOKEN` | Telegram remote questions | + +## URL Blocking + +The `fetch_page` tool blocks requests to private/internal networks by default (SSRF protection). To allow specific internal hosts: + +```bash +export GSD_FETCH_ALLOWED_URLS="internal-docs.company.com,192.168.1.50" +``` + +Or set `fetchAllowedUrls` in `~/.gsd/agent/settings.json`. + +Blocked by default: private IP ranges, cloud metadata endpoints, localhost, non-HTTP protocols, IPv6 private ranges. diff --git a/gitbook/reference/keyboard-shortcuts.md b/gitbook/reference/keyboard-shortcuts.md new file mode 100644 index 000000000..8b2013729 --- /dev/null +++ b/gitbook/reference/keyboard-shortcuts.md @@ -0,0 +1,33 @@ +# Keyboard Shortcuts + +| Shortcut | Action | +|----------|--------| +| `Ctrl+Alt+G` | Toggle dashboard overlay | +| `Ctrl+Alt+V` | Toggle voice transcription | +| `Ctrl+Alt+B` | Show background shell processes | +| `Ctrl+V` / `Alt+V` | Paste image from clipboard (screenshot → vision input) | +| `Escape` | Pause auto mode (preserves conversation) | + +## Terminal Compatibility + +In terminals without Kitty keyboard protocol support (macOS Terminal.app, JetBrains IDEs), slash-command fallbacks are shown instead of `Ctrl+Alt` shortcuts. + +{% hint style="tip" %} +If `Ctrl+V` is intercepted by your terminal (e.g. Warp), use `Alt+V` instead for clipboard image paste. +{% endhint %} + +## iTerm2 Note + +If `Ctrl+Alt` shortcuts trigger the wrong action (e.g., `Ctrl+Alt+G` opens external editor instead of the dashboard), go to **Profiles → Keys → General** and set **Left Option Key** to **Esc+**. This makes Alt/Option work correctly with Ctrl combinations. + +## cmux Integration + +If you use cmux (terminal multiplexer), GSD can integrate with it: + +| Command | Description | +|---------|-------------| +| `/gsd cmux status` | Show cmux detection and capabilities | +| `/gsd cmux on` / `off` | Enable/disable integration | +| `/gsd cmux notifications on/off` | Toggle desktop notifications | +| `/gsd cmux sidebar on/off` | Toggle sidebar metadata | +| `/gsd cmux splits on/off` | Toggle visual subagent splits | diff --git a/gitbook/reference/migration.md b/gitbook/reference/migration.md new file mode 100644 index 000000000..54d548dc8 --- /dev/null +++ b/gitbook/reference/migration.md @@ -0,0 +1,48 @@ +# Migration from v1 + +If you have projects with `.planning` directories from the original Get Shit Done (v1), you can migrate them to GSD-2's `.gsd` format. + +## Running the Migration + +```bash +# From within the project directory +/gsd migrate + +# Or specify a path +/gsd migrate ~/projects/my-old-project +``` + +## What Gets Migrated + +The migration tool: + +- Parses your old `PROJECT.md`, `ROADMAP.md`, `REQUIREMENTS.md`, phase directories, plans, summaries, and research +- Maps phases → slices, plans → tasks, milestones → milestones +- Preserves completion state (`[x]` phases stay done, summaries carry over) +- Consolidates research files into the new structure +- Shows a preview before writing anything +- Optionally runs an AI-driven review for quality assurance + +## Supported Formats + +The migration handles various v1 format variations: + +- Milestone-sectioned roadmaps with `
` blocks +- Bold phase entries +- Bullet-format requirements +- Decimal phase numbering +- Duplicate phase numbers across milestones + +## Requirements + +Migration works best with a `ROADMAP.md` file for milestone structure. Without one, milestones are inferred from the `phases/` directory. + +## Post-Migration + +After migrating, verify the output: + +``` +/gsd doctor +``` + +This checks `.gsd/` integrity and flags any structural issues. diff --git a/gitbook/reference/troubleshooting.md b/gitbook/reference/troubleshooting.md new file mode 100644 index 000000000..8102ede58 --- /dev/null +++ b/gitbook/reference/troubleshooting.md @@ -0,0 +1,151 @@ +# Troubleshooting + +## `/gsd doctor` + +The built-in diagnostic tool validates `.gsd/` integrity: + +``` +/gsd doctor +``` + +It checks file structure, roadmap ↔ slice ↔ task consistency, completion state, git health, stale locks, and orphaned records. + +## Common Issues + +### Auto mode loops on the same unit + +The same unit dispatches repeatedly. + +**Fix:** Run `/gsd doctor` to repair state, then `/gsd auto`. If it persists, check that the expected artifact file exists on disk. + +### Auto mode stops with "Loop detected" + +A unit failed to produce its expected artifact twice. + +**Fix:** Check the task plan for clarity. Refine it manually, then `/gsd auto`. + +### `command not found: gsd` after install + +npm's global bin directory isn't in `$PATH`. + +**Fix:** +```bash +npm prefix -g +# Add the bin dir to PATH: +echo 'export PATH="$(npm prefix -g)/bin:$PATH"' >> ~/.zshrc +source ~/.zshrc +``` + +**Common causes:** +- **Homebrew Node** — `/opt/homebrew/bin` missing from PATH +- **Version manager (nvm, fnm, mise)** — global bin is version-specific +- **oh-my-zsh** — `gitfast` plugin aliases `gsd` to `git svn dcommit`; check with `alias gsd` + +### Provider errors during auto mode + +| Error Type | Auto-Resume? | Delay | +|-----------|-------------|-------| +| Rate limit (429) | Yes | 60s or retry-after header | +| Server error (500, 502, 503) | Yes | 30s | +| Auth/billing ("unauthorized") | No | Manual resume required | + +For permanent errors, configure fallback models: + +```yaml +models: + execution: + model: claude-sonnet-4-6 + fallbacks: + - openrouter/minimax/minimax-m2.5 +``` + +### Budget ceiling reached + +Auto mode pauses with "Budget ceiling reached." + +**Fix:** Increase `budget_ceiling` in preferences, or switch to `budget` token profile, then `/gsd auto`. + +### Stale lock file + +Auto mode won't start, says another session is running. + +**Fix:** GSD auto-detects stale locks (dead PID = auto cleanup). If automatic recovery fails: + +```bash +rm -f .gsd/auto.lock +rm -rf "$(dirname .gsd)/.gsd.lock" +``` + +### Git merge conflicts + +Worktree merge fails on `.gsd/` files. + +**Fix:** `.gsd/` conflicts are auto-resolved. Code conflicts get an AI fix attempt; if that fails, resolve manually. + +### Notifications not appearing on macOS + +**Fix:** Install `terminal-notifier`: + +```bash +brew install terminal-notifier +``` + +See [Notifications](../configuration/notifications.md) for details. + +## MCP Issues + +### No servers configured + +**Fix:** Add server to `.mcp.json` or `.gsd/mcp.json`, verify JSON is valid, run `mcp_servers(refresh=true)`. + +### Server discovery times out + +**Fix:** Run the configured command outside GSD to confirm it starts. Check that backend services are reachable. + +### Server connection closed immediately + +**Fix:** Verify `command` and `args` paths are correct and absolute. Run the command manually to catch errors. + +## Recovery Procedures + +### Reset auto mode state + +```bash +rm .gsd/auto.lock +rm .gsd/completed-units.json +``` + +Then `/gsd auto` to restart from current state. + +### Reset routing history + +```bash +rm .gsd/routing-history.json +``` + +### Full state rebuild + +``` +/gsd doctor +``` + +Rebuilds `STATE.md` from plan and roadmap files and fixes inconsistencies. + +## Getting Help + +- **GitHub Issues:** [github.com/gsd-build/GSD-2/issues](https://github.com/gsd-build/GSD-2/issues) +- **Dashboard:** `Ctrl+Alt+G` or `/gsd status` +- **Forensics:** `/gsd forensics` for post-mortem analysis +- **Session logs:** `.gsd/activity/` contains JSONL session dumps + +## Platform-Specific Issues + +### iTerm2 + +`Ctrl+Alt` shortcuts trigger wrong actions → Set **Profiles → Keys → General → Left Option Key** to **Esc+**. + +### Windows + +- LSP ENOENT on MSYS2/Git Bash → Fixed in v2.29+, upgrade +- EBUSY errors during builds → Close browser extension, or change output directory +- Transient EBUSY/EPERM on `.gsd/` files → Retry; close file-locking tools if persistent diff --git a/native/npm/darwin-arm64/package.json b/native/npm/darwin-arm64/package.json index f619507b8..096c57f16 100644 --- a/native/npm/darwin-arm64/package.json +++ b/native/npm/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-darwin-arm64", - "version": "2.68.0", + "version": "2.71.0", "description": "GSD native engine binary for macOS ARM64", "os": [ "darwin" diff --git a/native/npm/darwin-x64/package.json b/native/npm/darwin-x64/package.json index de80d45c2..09464a640 100644 --- a/native/npm/darwin-x64/package.json +++ b/native/npm/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-darwin-x64", - "version": "2.68.0", + "version": "2.71.0", "description": "GSD native engine binary for macOS Intel", "os": [ "darwin" diff --git a/native/npm/linux-arm64-gnu/package.json b/native/npm/linux-arm64-gnu/package.json index 5c73452dc..67c22f543 100644 --- a/native/npm/linux-arm64-gnu/package.json +++ b/native/npm/linux-arm64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-linux-arm64-gnu", - "version": "2.68.0", + "version": "2.71.0", "description": "GSD native engine binary for Linux ARM64 (glibc)", "os": [ "linux" diff --git a/native/npm/linux-x64-gnu/package.json b/native/npm/linux-x64-gnu/package.json index 6ff3a766f..be462588b 100644 --- a/native/npm/linux-x64-gnu/package.json +++ b/native/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-linux-x64-gnu", - "version": "2.68.0", + "version": "2.71.0", "description": "GSD native engine binary for Linux x64 (glibc)", "os": [ "linux" diff --git a/native/npm/win32-x64-msvc/package.json b/native/npm/win32-x64-msvc/package.json index 19e54234b..373a6b2c9 100644 --- a/native/npm/win32-x64-msvc/package.json +++ b/native/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-win32-x64-msvc", - "version": "2.68.0", + "version": "2.71.0", "description": "GSD native engine binary for Windows x64 (MSVC)", "os": [ "win32" diff --git a/package.json b/package.json index bfb1a5ebc..71c908b81 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gsd-pi", - "version": "2.68.0", + "version": "2.71.0", "description": "GSD — Get Shit Done coding agent", "license": "MIT", "repository": { @@ -56,22 +56,22 @@ "copy-themes": "node scripts/copy-themes.cjs", "copy-export-html": "node scripts/copy-export-html.cjs", "test:compile": "node scripts/compile-tests.mjs", - "test:unit": "npm run test:compile && node --import ./scripts/dist-test-resolve.mjs --experimental-test-isolation=process --test-reporter=./scripts/test-reporter-compact.mjs --test 'dist-test/src/tests/*.test.js' 'dist-test/src/resources/extensions/gsd/tests/*.test.js' 'dist-test/src/resources/extensions/gsd/tests/*.test.mjs' 'dist-test/src/resources/extensions/shared/tests/*.test.js' 'dist-test/src/resources/extensions/claude-code-cli/tests/*.test.js' 'dist-test/src/resources/extensions/github-sync/tests/*.test.js' 'dist-test/src/resources/extensions/universal-config/tests/*.test.js' 'dist-test/src/resources/extensions/voice/tests/*.test.js' 'dist-test/src/resources/extensions/mcp-client/tests/*.test.js'", - "test:packages": "node --test packages/pi-coding-agent/dist/core/*.test.js", - "test:marketplace": "GSD_TEST_CLONE_MARKETPLACES=1 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/claude-import-tui.test.ts src/resources/extensions/gsd/tests/plugin-importer-live.test.ts src/tests/marketplace-discovery.test.ts", - "test:coverage": "c8 --reporter=text --reporter=lcov --exclude='src/resources/extensions/gsd/tests/**' --exclude='src/tests/**' --exclude='scripts/**' --exclude='native/**' --exclude='node_modules/**' --check-coverage --statements=40 --lines=40 --branches=20 --functions=20 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --experimental-test-isolation=process --test src/resources/extensions/gsd/tests/*.test.ts src/resources/extensions/gsd/tests/*.test.mjs src/tests/*.test.ts src/resources/extensions/shared/tests/*.test.ts", - "test:integration": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test 'src/tests/integration/*.test.ts' 'src/resources/extensions/gsd/tests/integration/*.test.ts' 'src/resources/extensions/async-jobs/*.test.ts' 'src/resources/extensions/browser-tools/tests/*.test.mjs'", + "test:unit": "npm run test:compile && node --import ./scripts/dist-test-resolve.mjs --experimental-test-isolation=process --test-reporter=./scripts/test-reporter-compact.mjs --test \"dist-test/src/tests/*.test.js\" \"dist-test/src/resources/extensions/gsd/tests/*.test.js\" \"dist-test/src/resources/extensions/gsd/tests/*.test.mjs\" \"dist-test/src/resources/extensions/shared/tests/*.test.js\" \"dist-test/src/resources/extensions/claude-code-cli/tests/*.test.js\" \"dist-test/src/resources/extensions/github-sync/tests/*.test.js\" \"dist-test/src/resources/extensions/universal-config/tests/*.test.js\" \"dist-test/src/resources/extensions/voice/tests/*.test.js\" \"dist-test/src/resources/extensions/mcp-client/tests/*.test.js\"", + "test:packages": "node --test packages/pi-coding-agent/dist/core/*.test.js packages/pi-coding-agent/dist/core/tools/spawn-shell-windows.test.js", + "test:marketplace": "node scripts/with-env.mjs GSD_TEST_CLONE_MARKETPLACES=1 -- node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/claude-import-tui.test.ts src/resources/extensions/gsd/tests/plugin-importer-live.test.ts src/tests/marketplace-discovery.test.ts", + "test:coverage": "c8 --reporter=text --reporter=lcov --exclude=\"src/resources/extensions/gsd/tests/**\" --exclude=\"src/tests/**\" --exclude=\"scripts/**\" --exclude=\"native/**\" --exclude=\"node_modules/**\" --check-coverage --statements=40 --lines=40 --branches=20 --functions=20 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --experimental-test-isolation=process --test src/resources/extensions/gsd/tests/*.test.ts src/resources/extensions/gsd/tests/*.test.mjs src/tests/*.test.ts src/resources/extensions/shared/tests/*.test.ts", + "test:integration": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test \"src/tests/integration/*.test.ts\" \"src/resources/extensions/gsd/tests/integration/*.test.ts\" \"src/resources/extensions/async-jobs/*.test.ts\" \"src/resources/extensions/browser-tools/tests/*.test.mjs\"", "pretest": "npm run typecheck:extensions", "test": "npm run test:unit && npm run test:integration", "test:smoke": "node --experimental-strip-types tests/smoke/run.ts", "test:fixtures": "node --experimental-strip-types tests/fixtures/run.ts", - "test:fixtures:record": "GSD_FIXTURE_MODE=record node --experimental-strip-types tests/fixtures/record.ts", - "test:live": "GSD_LIVE_TESTS=1 node --experimental-strip-types tests/live/run.ts", + "test:fixtures:record": "node scripts/with-env.mjs GSD_FIXTURE_MODE=record -- node --experimental-strip-types tests/fixtures/record.ts", + "test:live": "node scripts/with-env.mjs GSD_LIVE_TESTS=1 -- node --experimental-strip-types tests/live/run.ts", "test:browser-tools": "node --test src/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs src/resources/extensions/browser-tools/tests/browser-tools-integration.test.mjs", "test:native": "node --test packages/native/src/__tests__/grep.test.mjs", "test:secret-scan": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/tests/secret-scan.test.ts", - "secret-scan": "bash scripts/secret-scan.sh", - "secret-scan:install-hook": "bash scripts/install-hooks.sh", + "secret-scan": "node scripts/secret-scan.mjs", + "secret-scan:install-hook": "node scripts/install-hooks.mjs", "build:native": "node native/scripts/build.js", "build:native:dev": "node native/scripts/build.js --dev", "dev": "node scripts/dev.js", @@ -92,7 +92,7 @@ "release:update-changelog": "node scripts/update-changelog.mjs", "docker:build-runtime": "docker build --target runtime -t ghcr.io/gsd-build/gsd-pi .", "docker:build-builder": "docker build --target builder -t ghcr.io/gsd-build/gsd-ci-builder .", - "prepublishOnly": "npm run sync-pkg-version && npm run sync-platform-versions && ([ \"$CI\" = 'true' ] || git diff --exit-code || (echo 'ERROR: version sync changed files — commit them before publishing' && exit 1)) && npm run build && npm run typecheck:extensions && npm run validate-pack", + "prepublishOnly": "npm run sync-pkg-version && npm run sync-platform-versions && node scripts/prepublish-check.mjs && npm run build && npm run typecheck:extensions && npm run validate-pack", "test:live-regression": "node --experimental-strip-types tests/live-regression/run.ts" }, "dependencies": { diff --git a/packages/daemon/src/orchestrator.ts b/packages/daemon/src/orchestrator.ts index 678874cec..fe2998d8f 100644 --- a/packages/daemon/src/orchestrator.ts +++ b/packages/daemon/src/orchestrator.ts @@ -12,9 +12,6 @@ */ import { z } from 'zod'; -import { readFileSync, writeFileSync, chmodSync } from 'node:fs'; -import { join } from 'node:path'; -import { homedir } from 'node:os'; import type Anthropic from '@anthropic-ai/sdk'; import type { MessageParam, @@ -30,90 +27,18 @@ import type { ProjectInfo, ManagedSession } from './types.js'; import type { Logger } from './logger.js'; // --------------------------------------------------------------------------- -// OAuth token resolution — reads GSD's auth.json, refreshes if expired +// API key resolution — requires ANTHROPIC_API_KEY env var +// Anthropic OAuth removed per TOS compliance (see docs/user-docs/claude-code-auth-compliance.md) // --------------------------------------------------------------------------- -interface OAuthCredentials { - type: 'oauth'; - refresh: string; - access: string; - expires: number; -} - -const TOKEN_URL = 'https://platform.claude.com/v1/oauth/token'; -const CLIENT_ID = atob('OWQxYzI1MGEtZTYxYi00NGQ5LTg4ZWQtNTk0NGQxOTYyZjVl'); - -/** - * Read the Anthropic OAuth access token from GSD's auth.json. - * If expired, refresh it and write the new credentials back. - * Falls back to ANTHROPIC_API_KEY env var if no OAuth credential exists. - */ -async function resolveAnthropicApiKey(logger?: Logger): Promise { - // Try env var first (explicit override) - if (process.env.ANTHROPIC_API_KEY) { - return process.env.ANTHROPIC_API_KEY; - } - - const authPath = join(homedir(), '.gsd', 'agent', 'auth.json'); - let authData: Record; - try { - authData = JSON.parse(readFileSync(authPath, 'utf-8')); - } catch { +function resolveAnthropicApiKey(): string { + const apiKey = process.env.ANTHROPIC_API_KEY; + if (!apiKey) { throw new Error( - 'No Anthropic auth found. Run `gsd login` to authenticate, or set ANTHROPIC_API_KEY.', + 'ANTHROPIC_API_KEY is required. Set it in your environment or run `gsd config`.', ); } - - const cred = authData.anthropic as OAuthCredentials | undefined; - if (!cred || cred.type !== 'oauth' || !cred.access) { - throw new Error( - 'No Anthropic OAuth credential in auth.json. Run `gsd login` to authenticate.', - ); - } - - // If token is still valid, use it - if (Date.now() < cred.expires) { - return cred.access; - } - - // Token expired — refresh it - logger?.info('orchestrator: refreshing Anthropic OAuth token'); - const response = await fetch(TOKEN_URL, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - grant_type: 'refresh_token', - client_id: CLIENT_ID, - refresh_token: cred.refresh, - }), - signal: AbortSignal.timeout(30_000), - }); - - if (!response.ok) { - const error = await response.text(); - throw new Error(`Anthropic token refresh failed: ${error}`); - } - - const data = (await response.json()) as { - access_token: string; - refresh_token: string; - expires_in: number; - }; - - const newCred: OAuthCredentials = { - type: 'oauth', - refresh: data.refresh_token, - access: data.access_token, - expires: Date.now() + data.expires_in * 1000 - 5 * 60 * 1000, - }; - - // Write back to auth.json - authData.anthropic = newCred; - writeFileSync(authPath, JSON.stringify(authData, null, 2), 'utf-8'); - chmodSync(authPath, 0o600); - logger?.info('orchestrator: Anthropic OAuth token refreshed'); - - return newCred.access; + return apiKey; } // --------------------------------------------------------------------------- @@ -254,11 +179,11 @@ export class Orchestrator { /** * Lazily initialise the Anthropic client. Dynamic import handles K007 module resolution. - * Resolves auth from GSD's OAuth credentials (auth.json), refreshing if needed. + * Requires ANTHROPIC_API_KEY environment variable. */ private async getClient(): Promise { if (this.client) return this.client; - const apiKey = await resolveAnthropicApiKey(this.deps.logger); + const apiKey = resolveAnthropicApiKey(); const { default: AnthropicSDK } = await import('@anthropic-ai/sdk'); this.client = new AnthropicSDK({ apiKey }); return this.client; diff --git a/packages/mcp-server/README.md b/packages/mcp-server/README.md index dcc32aa94..642657dd7 100644 --- a/packages/mcp-server/README.md +++ b/packages/mcp-server/README.md @@ -7,7 +7,8 @@ Start GSD auto-mode sessions, poll progress, resolve blockers, and retrieve resu This package now exposes two tool surfaces: - session/read tools for starting and inspecting GSD sessions -- workflow mutation tools for planning, completion, validation, reassessment, and gate persistence +- MCP-native interactive tools for structured user input +- headless-safe workflow tools for planning, completion, validation, reassessment, metadata persistence, and journal reads ## Installation @@ -74,18 +75,29 @@ Add to `.cursor/mcp.json`: ## Tools -### Workflow mutation tools +### Workflow tools The workflow MCP surface includes: +- `gsd_decision_save` +- `gsd_save_decision` +- `gsd_requirement_update` +- `gsd_update_requirement` +- `gsd_requirement_save` +- `gsd_save_requirement` +- `gsd_milestone_generate_id` +- `gsd_generate_milestone_id` - `gsd_plan_milestone` - `gsd_plan_slice` +- `gsd_plan_task` +- `gsd_task_plan` - `gsd_replan_slice` - `gsd_slice_replan` - `gsd_task_complete` - `gsd_complete_task` - `gsd_slice_complete` - `gsd_complete_slice` +- `gsd_skip_slice` - `gsd_validate_milestone` - `gsd_milestone_validate` - `gsd_complete_milestone` @@ -95,13 +107,21 @@ The workflow MCP surface includes: - `gsd_save_gate_result` - `gsd_summary_save` - `gsd_milestone_status` +- `gsd_journal_query` -These mutation tools use the same GSD workflow handlers as the native in-process tool path. +These tools use the same GSD workflow handlers as the native in-process tool path wherever a shared handler exists. + +### Interactive tools + +The packaged server now exposes `ask_user_questions` through MCP form elicitation. This keeps the existing GSD answer payload shape while allowing Claude Code CLI and other elicitation-capable clients to surface structured user choices. + +`secure_env_collect` is still not exposed by this package. That path needs MCP URL elicitation or an equivalent secure bridge because secrets should not flow through form elicitation. Current support boundary: - when running inside the GSD monorepo checkout, the MCP server auto-discovers the shared workflow executor module - outside the monorepo, set `GSD_WORKFLOW_EXECUTORS_MODULE` to an importable `workflow-tool-executors` module path if you want the mutation tools enabled +- `ask_user_questions` requires an MCP client that supports form elicitation - session/read tools do not depend on this bridge If the executor bridge cannot be loaded, workflow mutation calls will fail with a precise configuration error instead of silently degrading. @@ -214,6 +234,8 @@ Resolve a pending blocker in a session by sending a response to the blocked UI r | `GSD_CLI_PATH` | Absolute path to the GSD CLI binary. If not set, the server resolves `gsd` via `which`. | | `GSD_WORKFLOW_EXECUTORS_MODULE` | Optional absolute path or `file:` URL for the shared GSD workflow executor module used by workflow mutation tools. | +The server also hydrates supported model-provider and tool credentials from `~/.gsd/agent/auth.json` on startup. Keys saved through `/gsd config` or `/gsd keys` become available to the MCP server process automatically, and any explicitly-set environment variable still wins. + ## Architecture ``` diff --git a/packages/mcp-server/src/cli.ts b/packages/mcp-server/src/cli.ts index 744749d03..e9b64d794 100644 --- a/packages/mcp-server/src/cli.ts +++ b/packages/mcp-server/src/cli.ts @@ -1,5 +1,3 @@ -#!/usr/bin/env node - /** * @gsd-build/mcp-server CLI — stdio transport entry point. * @@ -9,13 +7,17 @@ import { SessionManager } from './session-manager.js'; import { createMcpServer } from './server.js'; +import { loadStoredCredentialEnvKeys } from './tool-credentials.js'; const MCP_PKG = '@modelcontextprotocol/sdk'; async function main(): Promise { + loadStoredCredentialEnvKeys(); + const sessionManager = new SessionManager(); - // Create the configured MCP server with all 12 tools (6 session + 6 read-only) + // Create the configured MCP server with session, interactive, read-only, + // and workflow tools. const { server } = await createMcpServer(sessionManager); // Dynamic import for StdioServerTransport (same TS subpath workaround) diff --git a/packages/mcp-server/src/env-writer.test.ts b/packages/mcp-server/src/env-writer.test.ts new file mode 100644 index 000000000..5932d1cfb --- /dev/null +++ b/packages/mcp-server/src/env-writer.test.ts @@ -0,0 +1,280 @@ +// @gsd-build/mcp-server — Tests for env-writer utilities +// Copyright (c) 2026 Jeremy McSpadden + +import { describe, it, afterEach } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { + checkExistingEnvKeys, + detectDestination, + writeEnvKey, + applySecrets, + isSafeEnvVarKey, + isSupportedDeploymentEnvironment, + shellEscapeSingle, +} from './env-writer.js'; + +function makeTempDir(prefix: string): string { + return mkdtempSync(join(tmpdir(), `${prefix}-`)); +} + +// --------------------------------------------------------------------------- +// checkExistingEnvKeys +// --------------------------------------------------------------------------- + +describe('checkExistingEnvKeys', () => { + it('finds key in .env file', async () => { + const tmp = makeTempDir('env-check'); + try { + const envPath = join(tmp, '.env'); + writeFileSync(envPath, 'API_KEY=secret123\nOTHER=val\n'); + const result = await checkExistingEnvKeys(['API_KEY'], envPath); + assert.deepStrictEqual(result, ['API_KEY']); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('finds key in process.env', async () => { + const tmp = makeTempDir('env-check'); + const saved = process.env.GSD_MCP_TEST_KEY_1; + try { + process.env.GSD_MCP_TEST_KEY_1 = 'some-value'; + const envPath = join(tmp, '.env'); + const result = await checkExistingEnvKeys(['GSD_MCP_TEST_KEY_1'], envPath); + assert.deepStrictEqual(result, ['GSD_MCP_TEST_KEY_1']); + } finally { + delete process.env.GSD_MCP_TEST_KEY_1; + if (saved !== undefined) process.env.GSD_MCP_TEST_KEY_1 = saved; + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('returns empty for missing keys', async () => { + const tmp = makeTempDir('env-check'); + try { + const envPath = join(tmp, '.env'); + writeFileSync(envPath, 'OTHER=val\n'); + delete process.env.DEFINITELY_NOT_SET_MCP_XYZ; + const result = await checkExistingEnvKeys(['DEFINITELY_NOT_SET_MCP_XYZ'], envPath); + assert.deepStrictEqual(result, []); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('handles missing .env file gracefully', async () => { + const tmp = makeTempDir('env-check'); + try { + const envPath = join(tmp, 'nonexistent.env'); + delete process.env.DEFINITELY_NOT_SET_MCP_XYZ; + const result = await checkExistingEnvKeys(['DEFINITELY_NOT_SET_MCP_XYZ'], envPath); + assert.deepStrictEqual(result, []); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); +}); + +// --------------------------------------------------------------------------- +// detectDestination +// --------------------------------------------------------------------------- + +describe('detectDestination', () => { + it('returns vercel when vercel.json exists', () => { + const tmp = makeTempDir('dest'); + try { + writeFileSync(join(tmp, 'vercel.json'), '{}'); + assert.equal(detectDestination(tmp), 'vercel'); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('returns convex when convex/ dir exists', () => { + const tmp = makeTempDir('dest'); + try { + mkdirSync(join(tmp, 'convex')); + assert.equal(detectDestination(tmp), 'convex'); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('returns dotenv when neither exists', () => { + const tmp = makeTempDir('dest'); + try { + assert.equal(detectDestination(tmp), 'dotenv'); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('vercel takes priority over convex', () => { + const tmp = makeTempDir('dest'); + try { + writeFileSync(join(tmp, 'vercel.json'), '{}'); + mkdirSync(join(tmp, 'convex')); + assert.equal(detectDestination(tmp), 'vercel'); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); +}); + +// --------------------------------------------------------------------------- +// writeEnvKey +// --------------------------------------------------------------------------- + +describe('writeEnvKey', () => { + it('creates .env file with new key', async () => { + const tmp = makeTempDir('write'); + try { + const envPath = join(tmp, '.env'); + await writeEnvKey(envPath, 'NEW_KEY', 'new-value'); + const content = readFileSync(envPath, 'utf8'); + assert.ok(content.includes('NEW_KEY=new-value')); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('updates existing key in-place', async () => { + const tmp = makeTempDir('write'); + try { + const envPath = join(tmp, '.env'); + writeFileSync(envPath, 'EXISTING=old\nOTHER=keep\n'); + await writeEnvKey(envPath, 'EXISTING', 'new'); + const content = readFileSync(envPath, 'utf8'); + assert.ok(content.includes('EXISTING=new')); + assert.ok(content.includes('OTHER=keep')); + assert.ok(!content.includes('old')); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('escapes newlines in values', async () => { + const tmp = makeTempDir('write'); + try { + const envPath = join(tmp, '.env'); + await writeEnvKey(envPath, 'MULTI', 'line1\nline2'); + const content = readFileSync(envPath, 'utf8'); + assert.ok(content.includes('MULTI=line1\\nline2')); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('rejects non-string values', async () => { + const tmp = makeTempDir('write'); + try { + const envPath = join(tmp, '.env'); + await assert.rejects( + () => writeEnvKey(envPath, 'KEY', undefined as unknown as string), + /expects a string value/, + ); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); +}); + +// --------------------------------------------------------------------------- +// applySecrets (dotenv) +// --------------------------------------------------------------------------- + +describe('applySecrets', () => { + const savedKeys: Record = {}; + + afterEach(() => { + for (const [k, v] of Object.entries(savedKeys)) { + if (v === undefined) delete process.env[k]; + else process.env[k] = v; + } + }); + + it('writes keys to .env and hydrates process.env', async () => { + const tmp = makeTempDir('apply'); + const envPath = join(tmp, '.env'); + savedKeys.GSD_APPLY_TEST_A = process.env.GSD_APPLY_TEST_A; + try { + const { applied, errors } = await applySecrets( + [{ key: 'GSD_APPLY_TEST_A', value: 'val-a' }], + 'dotenv', + { envFilePath: envPath }, + ); + assert.deepStrictEqual(applied, ['GSD_APPLY_TEST_A']); + assert.deepStrictEqual(errors, []); + assert.equal(process.env.GSD_APPLY_TEST_A, 'val-a'); + const content = readFileSync(envPath, 'utf8'); + assert.ok(content.includes('GSD_APPLY_TEST_A=val-a')); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('returns errors for invalid vercel environment', async () => { + const tmp = makeTempDir('apply'); + try { + const { applied, errors } = await applySecrets( + [{ key: 'KEY', value: 'val' }], + 'vercel', + { + envFilePath: join(tmp, '.env'), + environment: 'staging' as 'development', + execFn: async () => ({ code: 0, stderr: '' }), + }, + ); + assert.deepStrictEqual(applied, []); + assert.ok(errors[0]?.includes('unsupported')); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); +}); + +// --------------------------------------------------------------------------- +// Validation helpers +// --------------------------------------------------------------------------- + +describe('isSafeEnvVarKey', () => { + it('accepts valid keys', () => { + assert.ok(isSafeEnvVarKey('API_KEY')); + assert.ok(isSafeEnvVarKey('_PRIVATE')); + assert.ok(isSafeEnvVarKey('key123')); + }); + + it('rejects invalid keys', () => { + assert.ok(!isSafeEnvVarKey('123BAD')); + assert.ok(!isSafeEnvVarKey('has-dash')); + assert.ok(!isSafeEnvVarKey('has space')); + assert.ok(!isSafeEnvVarKey('')); + }); +}); + +describe('isSupportedDeploymentEnvironment', () => { + it('accepts valid environments', () => { + assert.ok(isSupportedDeploymentEnvironment('development')); + assert.ok(isSupportedDeploymentEnvironment('preview')); + assert.ok(isSupportedDeploymentEnvironment('production')); + }); + + it('rejects invalid environments', () => { + assert.ok(!isSupportedDeploymentEnvironment('staging')); + assert.ok(!isSupportedDeploymentEnvironment('test')); + }); +}); + +describe('shellEscapeSingle', () => { + it('wraps in single quotes', () => { + assert.equal(shellEscapeSingle('hello'), "'hello'"); + }); + + it('escapes embedded single quotes', () => { + assert.equal(shellEscapeSingle("it's"), "'it'\\''s'"); + }); +}); diff --git a/packages/mcp-server/src/env-writer.ts b/packages/mcp-server/src/env-writer.ts new file mode 100644 index 000000000..219496539 --- /dev/null +++ b/packages/mcp-server/src/env-writer.ts @@ -0,0 +1,183 @@ +// @gsd-build/mcp-server — Environment variable write utilities +// Copyright (c) 2026 Jeremy McSpadden +// +// Shared helpers for writing env vars to .env files, detecting project +// destinations, and checking existing keys. Used by secure_env_collect +// MCP tool. No TUI dependencies — pure filesystem + process.env operations. + +import { readFile, writeFile } from "node:fs/promises"; +import { existsSync, statSync } from "node:fs"; +import { resolve } from "node:path"; + +// --------------------------------------------------------------------------- +// checkExistingEnvKeys +// --------------------------------------------------------------------------- + +/** + * Check which keys already exist in a .env file or process.env. + * Returns the subset of `keys` that are already set. + */ +export async function checkExistingEnvKeys(keys: string[], envFilePath: string): Promise { + let fileContent = ""; + try { + fileContent = await readFile(envFilePath, "utf8"); + } catch { + // ENOENT or other read error — proceed with empty content + } + + const existing: string[] = []; + for (const key of keys) { + const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const regex = new RegExp(`^${escaped}\\s*=`, "m"); + if (regex.test(fileContent) || key in process.env) { + existing.push(key); + } + } + return existing; +} + +// --------------------------------------------------------------------------- +// detectDestination +// --------------------------------------------------------------------------- + +/** + * Detect the write destination based on project files in basePath. + * Priority: vercel.json → convex/ dir → fallback "dotenv". + */ +export function detectDestination(basePath: string): "dotenv" | "vercel" | "convex" { + if (existsSync(resolve(basePath, "vercel.json"))) { + return "vercel"; + } + const convexPath = resolve(basePath, "convex"); + try { + if (existsSync(convexPath) && statSync(convexPath).isDirectory()) { + return "convex"; + } + } catch { + // stat error — treat as not found + } + return "dotenv"; +} + +// --------------------------------------------------------------------------- +// writeEnvKey +// --------------------------------------------------------------------------- + +/** + * Write a single key=value pair to a .env file. + * Updates existing keys in-place, appends new ones at the end. + */ +export async function writeEnvKey(filePath: string, key: string, value: string): Promise { + if (typeof value !== "string") { + throw new TypeError(`writeEnvKey expects a string value for key "${key}", got ${typeof value}`); + } + let content = ""; + try { + content = await readFile(filePath, "utf8"); + } catch { + content = ""; + } + const escaped = value.replace(/\\/g, "\\\\").replace(/\n/g, "\\n").replace(/\r/g, ""); + const line = `${key}=${escaped}`; + const regex = new RegExp(`^${key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\s*=.*$`, "m"); + if (regex.test(content)) { + content = content.replace(regex, line); + } else { + if (content.length > 0 && !content.endsWith("\n")) content += "\n"; + content += `${line}\n`; + } + await writeFile(filePath, content, "utf8"); +} + +// --------------------------------------------------------------------------- +// Validation helpers +// --------------------------------------------------------------------------- + +export function isSafeEnvVarKey(key: string): boolean { + return /^[A-Za-z_][A-Za-z0-9_]*$/.test(key); +} + +export function isSupportedDeploymentEnvironment(env: string): boolean { + return env === "development" || env === "preview" || env === "production"; +} + +// --------------------------------------------------------------------------- +// Shell helpers (for vercel/convex CLI) +// --------------------------------------------------------------------------- + +export function shellEscapeSingle(value: string): string { + return `'${value.replace(/'/g, `'\\''`)}'`; +} + +// --------------------------------------------------------------------------- +// applySecrets +// --------------------------------------------------------------------------- + +interface ApplyResult { + applied: string[]; + errors: string[]; +} + +/** + * Apply collected secrets to the target destination. + * Dotenv writes are handled directly; vercel/convex shell out via execFn. + */ +export async function applySecrets( + provided: Array<{ key: string; value: string }>, + destination: "dotenv" | "vercel" | "convex", + opts: { + envFilePath: string; + environment?: string; + execFn?: (cmd: string, args: string[]) => Promise<{ code: number; stderr: string }>; + }, +): Promise { + const applied: string[] = []; + const errors: string[] = []; + + if (destination === "dotenv") { + for (const { key, value } of provided) { + try { + await writeEnvKey(opts.envFilePath, key, value); + applied.push(key); + // Hydrate process.env so the current session sees the new value + process.env[key] = value; + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + errors.push(`${key}: ${msg}`); + } + } + } + + if ((destination === "vercel" || destination === "convex") && opts.execFn) { + const env = opts.environment ?? "development"; + if (!isSupportedDeploymentEnvironment(env)) { + errors.push(`environment: unsupported target environment "${env}"`); + return { applied, errors }; + } + for (const { key, value } of provided) { + if (!isSafeEnvVarKey(key)) { + errors.push(`${key}: invalid environment variable name`); + continue; + } + const cmd = destination === "vercel" + ? `printf %s ${shellEscapeSingle(value)} | vercel env add ${key} ${env}` + : ""; + try { + const result = destination === "vercel" + ? await opts.execFn("sh", ["-c", cmd]) + : await opts.execFn("npx", ["convex", "env", "set", key, value]); + if (result.code !== 0) { + errors.push(`${key}: ${result.stderr.slice(0, 200)}`); + } else { + applied.push(key); + process.env[key] = value; + } + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + errors.push(`${key}: ${msg}`); + } + } + } + + return { applied, errors }; +} diff --git a/packages/mcp-server/src/import-candidates.test.ts b/packages/mcp-server/src/import-candidates.test.ts new file mode 100644 index 000000000..5b0171f3f --- /dev/null +++ b/packages/mcp-server/src/import-candidates.test.ts @@ -0,0 +1,48 @@ +// GSD-2 — Regression tests for importLocalModule candidate resolution (#3954) +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +import { _buildImportCandidates } from "./workflow-tools.js"; + +describe("_buildImportCandidates", () => { + it("includes dist/ fallback for src/ paths", () => { + const candidates = _buildImportCandidates("../../../src/resources/extensions/gsd/db-writer.js"); + assert.ok( + candidates.some((c) => c.includes("/dist/resources/extensions/gsd/db-writer.js")), + "should include dist/ swapped candidate", + ); + }); + + it("includes src/ fallback for dist/ paths", () => { + const candidates = _buildImportCandidates("../../../dist/resources/extensions/gsd/db-writer.js"); + assert.ok( + candidates.some((c) => c.includes("/src/resources/extensions/gsd/db-writer.js")), + "should include src/ swapped candidate", + ); + }); + + it("includes .ts variants for .js paths", () => { + const candidates = _buildImportCandidates("../../../src/resources/extensions/gsd/db-writer.js"); + assert.ok( + candidates.some((c) => c.endsWith("db-writer.ts") && c.includes("/src/")), + "should include .ts variant for original src/ path", + ); + assert.ok( + candidates.some((c) => c.endsWith("db-writer.ts") && c.includes("/dist/")), + "should include .ts variant for swapped dist/ path", + ); + }); + + it("returns original path first", () => { + const input = "../../../src/resources/extensions/gsd/db-writer.js"; + const candidates = _buildImportCandidates(input); + assert.equal(candidates[0], input, "first candidate should be the original path"); + }); + + it("handles paths without src/ or dist/ gracefully", () => { + const candidates = _buildImportCandidates("./local-module.js"); + assert.equal(candidates.length, 2, "should have original + .ts variant only"); + assert.equal(candidates[0], "./local-module.js"); + assert.equal(candidates[1], "./local-module.ts"); + }); +}); diff --git a/packages/mcp-server/src/mcp-server.test.ts b/packages/mcp-server/src/mcp-server.test.ts index 6d7ce156e..c3ba68065 100644 --- a/packages/mcp-server/src/mcp-server.test.ts +++ b/packages/mcp-server/src/mcp-server.test.ts @@ -16,7 +16,11 @@ import { resolve } from 'node:path'; import { EventEmitter } from 'node:events'; import { SessionManager } from './session-manager.js'; -import { createMcpServer } from './server.js'; +import { + buildAskUserQuestionsElicitRequest, + createMcpServer, + formatAskUserQuestionsElicitResult, +} from './server.js'; import { MAX_EVENTS } from './types.js'; import type { ManagedSession, CostAccumulator, PendingBlocker } from './types.js'; @@ -574,6 +578,8 @@ describe('createMcpServer tool registration', () => { it('creates server successfully with all required methods', async () => { const { server } = await createMcpServer(sm); assert.ok(server); + assert.ok(server.server); + assert.equal(typeof server.server.elicitInput, 'function'); assert.ok(typeof server.connect === 'function'); assert.ok(typeof server.close === 'function'); }); @@ -625,4 +631,82 @@ describe('createMcpServer tool registration', () => { const session = sm.getSession(sessionId)!; assert.equal(session.status, 'cancelled'); }); + + it('buildAskUserQuestionsElicitRequest adds None of the above note field for single-select questions', () => { + const request = buildAskUserQuestionsElicitRequest([ + { + id: 'depth_verification_M001', + header: 'Depth Check', + question: 'Did I capture the depth right?', + options: [ + { label: 'Yes, you got it (Recommended)', description: 'Continue with the current summary.' }, + { label: 'Not quite', description: 'I need to clarify the depth further.' }, + ], + }, + { + id: 'focus_areas', + header: 'Focus', + question: 'Which areas matter most?', + allowMultiple: true, + options: [ + { label: 'Frontend', description: 'Prioritize the UI.' }, + { label: 'Backend', description: 'Prioritize server logic.' }, + ], + }, + ]); + + assert.equal(request.mode, 'form'); + assert.deepEqual(request.requestedSchema.required, ['depth_verification_M001', 'focus_areas']); + assert.ok(request.requestedSchema.properties['depth_verification_M001']); + assert.ok(request.requestedSchema.properties['depth_verification_M001__note']); + assert.ok(!request.requestedSchema.properties['focus_areas__note']); + }); + + it('formatAskUserQuestionsElicitResult preserves the existing answers JSON shape', () => { + const result = formatAskUserQuestionsElicitResult( + [ + { + id: 'depth_verification_M001', + header: 'Depth Check', + question: 'Did I capture the depth right?', + options: [ + { label: 'Yes, you got it (Recommended)', description: 'Continue with the current summary.' }, + { label: 'Not quite', description: 'I need to clarify the depth further.' }, + ], + }, + { + id: 'focus_areas', + header: 'Focus', + question: 'Which areas matter most?', + allowMultiple: true, + options: [ + { label: 'Frontend', description: 'Prioritize the UI.' }, + { label: 'Backend', description: 'Prioritize server logic.' }, + ], + }, + ], + { + action: 'accept', + content: { + depth_verification_M001: 'None of the above', + depth_verification_M001__note: 'Need more implementation detail.', + focus_areas: ['Frontend', 'Backend'], + }, + }, + ); + + assert.equal( + result, + JSON.stringify({ + answers: { + depth_verification_M001: { + answers: ['None of the above', 'user_note: Need more implementation detail.'], + }, + focus_areas: { + answers: ['Frontend', 'Backend'], + }, + }, + }), + ); + }); }); diff --git a/packages/mcp-server/src/secure-env-collect.test.ts b/packages/mcp-server/src/secure-env-collect.test.ts new file mode 100644 index 000000000..c33ad2949 --- /dev/null +++ b/packages/mcp-server/src/secure-env-collect.test.ts @@ -0,0 +1,265 @@ +// @gsd-build/mcp-server — Tests for secure_env_collect MCP tool +// Copyright (c) 2026 Jeremy McSpadden +// +// Tests the secure_env_collect tool registered in createMcpServer. +// Uses a mock MCP server to intercept tool registration and elicitInput calls. + +import { describe, it, beforeEach } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { createMcpServer } from './server.js'; +import { SessionManager } from './session-manager.js'; + +// --------------------------------------------------------------------------- +// Mock infrastructure +// --------------------------------------------------------------------------- + +/** + * We intercept McpServer construction by monkey-patching the dynamic import. + * Instead, we'll test the tool handler indirectly through the exported + * createMcpServer function — capturing the registered tool handlers. + * + * Since createMcpServer dynamically imports McpServer, we need to test at + * a level that exercises the tool handler logic. We do this by extracting + * the tool handler through the server.tool() calls. + */ + +interface RegisteredTool { + name: string; + description: string; + params: Record; + handler: (args: Record) => Promise; +} + +interface ToolResult { + content?: Array<{ type: string; text: string }>; + isError?: boolean; +} + +/** + * Mock McpServer that captures tool registrations and provides + * a controllable elicitInput response. + */ +class MockMcpServer { + registeredTools: RegisteredTool[] = []; + elicitResponse: { action: string; content?: Record } = { action: 'accept', content: {} }; + + server = { + elicitInput: async (_params: unknown) => { + return this.elicitResponse; + }, + }; + + tool(name: string, description: string, params: Record, handler: (args: Record) => Promise) { + this.registeredTools.push({ name, description, params, handler }); + } + + async connect(_transport: unknown) { /* no-op */ } + async close() { /* no-op */ } + + getToolHandler(name: string): ((args: Record) => Promise) | undefined { + return this.registeredTools.find((t) => t.name === name)?.handler; + } +} + +// --------------------------------------------------------------------------- +// Helper to create a mock MCP server with secure_env_collect registered +// --------------------------------------------------------------------------- + +/** + * Since createMcpServer uses dynamic import for McpServer, we can't easily + * mock it. Instead, we test the env-writer utilities directly (in env-writer.test.ts) + * and test the tool integration by verifying: + * 1. The tool exists in the registered tools list + * 2. The handler produces correct results with mock data + * + * For handler-level testing, we create a standalone test that replicates + * the tool handler logic with a controllable mock. + */ + +function makeTempDir(prefix: string): string { + return mkdtempSync(join(tmpdir(), `${prefix}-`)); +} + +// --------------------------------------------------------------------------- +// Integration test — verify tool is registered +// --------------------------------------------------------------------------- + +describe('secure_env_collect tool registration', () => { + it('createMcpServer registers secure_env_collect tool', async () => { + // This test verifies the tool exists — createMcpServer internally calls + // server.tool('secure_env_collect', ...) which we can't intercept without + // module mocking, but we can verify the server creates successfully + const sm = new SessionManager(); + try { + const { server } = await createMcpServer(sm); + assert.ok(server, 'server should be created'); + // The McpServer internally tracks registered tools — we verify no error + } finally { + await sm.cleanup(); + } + }); +}); + +// --------------------------------------------------------------------------- +// Handler logic tests — using env-writer directly to test the flow +// --------------------------------------------------------------------------- + +describe('secure_env_collect handler logic', () => { + it('skips keys that already exist in .env', async () => { + const tmp = makeTempDir('sec-collect'); + try { + const envPath = join(tmp, '.env'); + writeFileSync(envPath, 'ALREADY_SET=existing-value\n'); + + // Import the utility directly to test the pre-check logic + const { checkExistingEnvKeys } = await import('./env-writer.js'); + const existing = await checkExistingEnvKeys(['ALREADY_SET', 'NEW_KEY'], envPath); + assert.deepStrictEqual(existing, ['ALREADY_SET']); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('writes collected values to .env without returning secret values', async () => { + const tmp = makeTempDir('sec-collect'); + try { + const envPath = join(tmp, '.env'); + const savedKey = process.env.SEC_COLLECT_TEST_KEY; + + const { applySecrets } = await import('./env-writer.js'); + const { applied, errors } = await applySecrets( + [{ key: 'SEC_COLLECT_TEST_KEY', value: 'super-secret-value' }], + 'dotenv', + { envFilePath: envPath }, + ); + + assert.deepStrictEqual(applied, ['SEC_COLLECT_TEST_KEY']); + assert.deepStrictEqual(errors, []); + + // Verify the value was written + const content = readFileSync(envPath, 'utf8'); + assert.ok(content.includes('SEC_COLLECT_TEST_KEY=super-secret-value')); + + // Verify process.env was hydrated + assert.equal(process.env.SEC_COLLECT_TEST_KEY, 'super-secret-value'); + + // Cleanup + if (savedKey === undefined) delete process.env.SEC_COLLECT_TEST_KEY; + else process.env.SEC_COLLECT_TEST_KEY = savedKey; + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('auto-detects vercel destination from vercel.json', async () => { + const tmp = makeTempDir('sec-collect'); + try { + writeFileSync(join(tmp, 'vercel.json'), '{}'); + const { detectDestination } = await import('./env-writer.js'); + assert.equal(detectDestination(tmp), 'vercel'); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('handles empty form values as skipped', async () => { + // Simulate what happens when user leaves a field empty in the form + const formContent: Record = { + 'API_KEY': 'provided-value', + 'OPTIONAL_KEY': '', // empty = skip + }; + + const provided: Array<{ key: string; value: string }> = []; + const skipped: string[] = []; + + for (const [key, raw] of Object.entries(formContent)) { + const value = typeof raw === 'string' ? raw.trim() : ''; + if (value.length > 0) { + provided.push({ key, value }); + } else { + skipped.push(key); + } + } + + assert.deepStrictEqual(provided, [{ key: 'API_KEY', value: 'provided-value' }]); + assert.deepStrictEqual(skipped, ['OPTIONAL_KEY']); + }); + + it('result text never contains secret values', async () => { + const tmp = makeTempDir('sec-collect'); + try { + const envPath = join(tmp, '.env'); + const savedKey = process.env.RESULT_TEXT_TEST; + + const { applySecrets } = await import('./env-writer.js'); + const { applied } = await applySecrets( + [{ key: 'RESULT_TEXT_TEST', value: 'sk-super-secret-abc123' }], + 'dotenv', + { envFilePath: envPath }, + ); + + // Simulate building result text (same logic as the tool handler) + const lines: string[] = [ + 'destination: dotenv (auto-detected)', + ...applied.map((k) => `✓ ${k}: applied`), + ]; + const resultText = lines.join('\n'); + + // The result MUST NOT contain the secret value + assert.ok(!resultText.includes('sk-super-secret-abc123'), 'result text must not contain secret value'); + assert.ok(resultText.includes('RESULT_TEXT_TEST'), 'result text should contain key name'); + + // Cleanup + if (savedKey === undefined) delete process.env.RESULT_TEXT_TEST; + else process.env.RESULT_TEXT_TEST = savedKey; + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('handles multiple keys with mixed existing/new/skipped', async () => { + const tmp = makeTempDir('sec-collect'); + try { + const envPath = join(tmp, '.env'); + writeFileSync(envPath, 'EXISTING_A=already-here\n'); + const savedB = process.env.NEW_B; + const savedC = process.env.SKIP_C; + + const { checkExistingEnvKeys, applySecrets } = await import('./env-writer.js'); + + const allKeys = ['EXISTING_A', 'NEW_B', 'SKIP_C']; + const existing = await checkExistingEnvKeys(allKeys, envPath); + assert.deepStrictEqual(existing, ['EXISTING_A']); + + // Simulate form response: NEW_B has value, SKIP_C is empty + const formContent = { NEW_B: 'new-value', SKIP_C: '' }; + const provided: Array<{ key: string; value: string }> = []; + const skipped: string[] = []; + + for (const key of allKeys.filter((k) => !existing.includes(k))) { + const raw = formContent[key as keyof typeof formContent] ?? ''; + if (raw.trim().length > 0) provided.push({ key, value: raw.trim() }); + else skipped.push(key); + } + + const { applied, errors } = await applySecrets(provided, 'dotenv', { envFilePath: envPath }); + + assert.deepStrictEqual(applied, ['NEW_B']); + assert.deepStrictEqual(skipped, ['SKIP_C']); + assert.deepStrictEqual(errors, []); + assert.deepStrictEqual(existing, ['EXISTING_A']); + + // Cleanup + if (savedB === undefined) delete process.env.NEW_B; + else process.env.NEW_B = savedB; + if (savedC === undefined) delete process.env.SKIP_C; + else process.env.SKIP_C = savedC; + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/mcp-server/src/server.ts b/packages/mcp-server/src/server.ts index f4f5fe206..d619ff0f6 100644 --- a/packages/mcp-server/src/server.ts +++ b/packages/mcp-server/src/server.ts @@ -2,8 +2,9 @@ * MCP Server — registers GSD orchestration, project-state, and workflow tools. * * Session tools (6): gsd_execute, gsd_status, gsd_result, gsd_cancel, gsd_query, gsd_resolve_blocker + * Interactive tools (2): ask_user_questions, secure_env_collect via MCP form elicitation * Read-only tools (6): gsd_progress, gsd_roadmap, gsd_history, gsd_doctor, gsd_captures, gsd_knowledge - * Workflow tools (17): planning, replanning, completion, validation, reassessment, gate result, and milestone status tools + * Workflow tools (29): headless-safe planning, metadata persistence, replanning, completion, validation, reassessment, gate result, status, and journal tools * * Uses dynamic imports for @modelcontextprotocol/sdk because TS Node16 * cannot resolve the SDK's subpath exports statically (same pattern as @@ -21,6 +22,7 @@ import { readCaptures } from './readers/captures.js'; import { readKnowledge } from './readers/knowledge.js'; import { runDoctorLite } from './readers/doctor-lite.js'; import { registerWorkflowTools } from './workflow-tools.js'; +import { applySecrets, checkExistingEnvKeys, detectDestination } from './env-writer.js'; // --------------------------------------------------------------------------- // Constants @@ -44,6 +46,11 @@ function errorContent(message: string): { isError: true; content: Array<{ type: return { isError: true, content: [{ type: 'text' as const, text: message }] }; } +/** Return raw text content without JSON wrapping. */ +function textContent(text: string): { content: Array<{ type: 'text'; text: string }> } { + return { content: [{ type: 'text' as const, text }] }; +} + // --------------------------------------------------------------------------- // gsd_query filesystem reader // --------------------------------------------------------------------------- @@ -106,12 +113,172 @@ async function fileExists(path: string): Promise { // MCP Server type — minimal interface for the dynamically-imported McpServer // --------------------------------------------------------------------------- +interface ElicitResult { + action: 'accept' | 'decline' | 'cancel'; + content?: Record; +} + +interface ElicitRequestFormParams { + mode?: 'form'; + message: string; + requestedSchema: { + type: 'object'; + properties: Record>; + required?: string[]; + }; +} + interface McpServerInstance { tool(name: string, description: string, params: Record, handler: (args: Record) => Promise): unknown; + server: { + elicitInput( + params: AskUserQuestionsElicitRequest | ElicitRequestFormParams, + options?: unknown, + ): Promise; + }; connect(transport: unknown): Promise; close(): Promise; } +interface AskUserQuestionOption { + label: string; + description: string; +} + +interface AskUserQuestion { + id: string; + header: string; + question: string; + options: AskUserQuestionOption[]; + allowMultiple?: boolean; +} + +interface AskUserQuestionsParams { + questions: AskUserQuestion[]; +} + +type AskUserQuestionsContentValue = string | number | boolean | string[]; + +interface AskUserQuestionsElicitResult { + action: 'accept' | 'decline' | 'cancel'; + content?: Record; +} + +interface AskUserQuestionsElicitRequest { + mode: 'form'; + message: string; + requestedSchema: { + type: 'object'; + properties: Record>; + required?: string[]; + }; +} + +const OTHER_OPTION_LABEL = 'None of the above'; + +function normalizeAskUserQuestionsNote(value: AskUserQuestionsContentValue | undefined): string { + return typeof value === 'string' ? value.trim() : ''; +} + +function normalizeAskUserQuestionsAnswers( + value: AskUserQuestionsContentValue | undefined, + allowMultiple: boolean, +): string[] { + if (allowMultiple) { + return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : []; + } + + return typeof value === 'string' && value.length > 0 ? [value] : []; +} + +function validateAskUserQuestionsPayload(questions: AskUserQuestion[]): string | null { + if (questions.length === 0 || questions.length > 3) { + return 'Error: questions must contain 1-3 items'; + } + + for (const question of questions) { + if (!question.options || question.options.length === 0) { + return `Error: ask_user_questions requires non-empty options for every question (question "${question.id}" has none)`; + } + } + + return null; +} + +export function buildAskUserQuestionsElicitRequest(questions: AskUserQuestion[]): AskUserQuestionsElicitRequest { + const properties: Record> = {}; + const required = questions.map((question) => question.id); + + for (const question of questions) { + if (question.allowMultiple) { + properties[question.id] = { + type: 'array', + title: question.header, + description: question.question, + minItems: 1, + maxItems: question.options.length, + items: { + anyOf: question.options.map((option) => ({ + const: option.label, + title: option.label, + })), + }, + }; + continue; + } + + properties[question.id] = { + type: 'string', + title: question.header, + description: question.question, + oneOf: [...question.options, { label: OTHER_OPTION_LABEL, description: 'Choose this when the listed options do not fit.' }].map((option) => ({ + const: option.label, + title: option.label, + })), + }; + + properties[`${question.id}__note`] = { + type: 'string', + title: `${question.header} Note`, + description: `Optional note for "${OTHER_OPTION_LABEL}".`, + maxLength: 500, + }; + } + + return { + mode: 'form', + message: 'Please answer the following question(s). For single-select questions, choose "None of the above" and add a note if the provided options do not fit.', + requestedSchema: { + type: 'object', + properties, + required, + }, + }; +} + +export function formatAskUserQuestionsElicitResult( + questions: AskUserQuestion[], + result: AskUserQuestionsElicitResult, +): string { + const answers: Record = {}; + const content = result.content ?? {}; + + for (const question of questions) { + const answerList = normalizeAskUserQuestionsAnswers(content[question.id], !!question.allowMultiple); + + if (!question.allowMultiple && answerList[0] === OTHER_OPTION_LABEL) { + const note = normalizeAskUserQuestionsNote(content[`${question.id}__note`]); + if (note) { + answerList.push(`user_note: ${note}`); + } + } + + answers[question.id] = { answers: answerList }; + } + + return JSON.stringify({ answers }); +} + // --------------------------------------------------------------------------- // createMcpServer // --------------------------------------------------------------------------- @@ -131,7 +298,7 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{ const server: McpServerInstance = new McpServer( { name: SERVER_NAME, version: SERVER_VERSION }, - { capabilities: { tools: {} } }, + { capabilities: { tools: {}, elicitation: {} } }, ); // ----------------------------------------------------------------------- @@ -285,6 +452,160 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{ }, ); + // ----------------------------------------------------------------------- + // ask_user_questions — structured user input via MCP form elicitation + // ----------------------------------------------------------------------- + server.tool( + 'ask_user_questions', + 'Request user input for one to three short questions and wait for the response. Single-select questions include a free-form "None of the above" path. Multi-select questions allow multiple choices.', + { + questions: z.array(z.object({ + id: z.string().describe('Stable identifier for mapping answers (snake_case)'), + header: z.string().describe('Short header label shown in the UI (12 or fewer chars)'), + question: z.string().describe('Single-sentence prompt shown to the user'), + options: z.array(z.object({ + label: z.string().describe('User-facing label (1-5 words)'), + description: z.string().describe('One short sentence explaining impact/tradeoff if selected'), + })).describe('Provide 2-3 mutually exclusive choices. Put the recommended option first and suffix its label with "(Recommended)". Do not include an "Other" option for single-select questions.'), + allowMultiple: z.boolean().optional().describe('If true, the user can select multiple options. No "None of the above" option is added.'), + })).describe('Questions to show the user. Prefer 1 and do not exceed 3.'), + }, + async (args: Record) => { + const { questions } = args as unknown as AskUserQuestionsParams; + try { + const validationError = validateAskUserQuestionsPayload(questions); + if (validationError) return errorContent(validationError); + + const elicitation = await server.server.elicitInput(buildAskUserQuestionsElicitRequest(questions)); + if (elicitation.action !== 'accept' || !elicitation.content) { + return textContent('ask_user_questions was cancelled before receiving a response'); + } + + return textContent(formatAskUserQuestionsElicitResult(questions, elicitation)); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + + // ----------------------------------------------------------------------- + // secure_env_collect — collect secrets via MCP form elicitation + // ----------------------------------------------------------------------- + server.tool( + 'secure_env_collect', + 'Collect environment variables securely via form input. Values are written directly to .env (or Vercel/Convex) and NEVER appear in tool output — only key names and applied/skipped status are returned. Use this instead of asking users to manually edit .env files or paste secrets into chat.', + { + projectDir: z.string().describe('Absolute path to the project directory'), + keys: z.array(z.object({ + key: z.string().describe('Env var name, e.g. OPENAI_API_KEY'), + hint: z.string().optional().describe('Format hint shown to user, e.g. "starts with sk-"'), + guidance: z.array(z.string()).optional().describe('Step-by-step instructions for obtaining this key'), + })).min(1).describe('Environment variables to collect'), + destination: z.enum(['dotenv', 'vercel', 'convex']).optional().describe('Where to write secrets. Auto-detected from project files if omitted.'), + envFilePath: z.string().optional().describe('Path to .env file (dotenv only). Defaults to .env in projectDir.'), + environment: z.enum(['development', 'preview', 'production']).optional().describe('Target environment (vercel/convex only)'), + }, + async (args: Record) => { + const { projectDir, keys, destination, envFilePath, environment } = args as { + projectDir: string; + keys: Array<{ key: string; hint?: string; guidance?: string[] }>; + destination?: 'dotenv' | 'vercel' | 'convex'; + envFilePath?: string; + environment?: 'development' | 'preview' | 'production'; + }; + + try { + const resolvedProjectDir = resolve(projectDir); + const resolvedEnvPath = resolve(resolvedProjectDir, envFilePath ?? '.env'); + + // (1) Check which keys already exist + const allKeyNames = keys.map((k) => k.key); + const existingKeys = await checkExistingEnvKeys(allKeyNames, resolvedEnvPath); + const existingSet = new Set(existingKeys); + const pendingKeys = keys.filter((k) => !existingSet.has(k.key)); + + // If all keys already exist, return immediately + if (pendingKeys.length === 0) { + const lines = existingKeys.map((k) => `• ${k}: already set`); + return textContent(`All ${existingKeys.length} key(s) already set.\n${lines.join('\n')}`); + } + + // (2) Build elicitation form — one string field per pending key + const properties: Record> = {}; + const required: string[] = []; + + for (const item of pendingKeys) { + const descParts: string[] = []; + if (item.hint) descParts.push(`Format: ${item.hint}`); + if (item.guidance && item.guidance.length > 0) { + descParts.push('How to get this:'); + item.guidance.forEach((step, i) => descParts.push(`${i + 1}. ${step}`)); + } + descParts.push('Leave empty to skip.'); + + properties[item.key] = { + type: 'string', + title: item.key, + description: descParts.join('\n'), + }; + // Don't mark as required — empty string = skip + } + + // (3) Elicit input from the MCP client + const elicitation = await server.server.elicitInput({ + message: `Enter values for ${pendingKeys.length} environment variable(s). Values are written directly to the project and never shown to the AI.`, + requestedSchema: { + type: 'object', + properties, + required, + }, + }); + + if (elicitation.action !== 'accept' || !elicitation.content) { + return textContent('secure_env_collect was cancelled by user.'); + } + + // (4) Separate provided vs skipped from form response + const provided: Array<{ key: string; value: string }> = []; + const skipped: string[] = []; + + for (const item of pendingKeys) { + const raw = elicitation.content[item.key]; + const value = typeof raw === 'string' ? raw.trim() : ''; + if (value.length > 0) { + provided.push({ key: item.key, value }); + } else { + skipped.push(item.key); + } + } + + // (5) Auto-detect destination if not specified + const resolvedDestination = destination ?? detectDestination(resolvedProjectDir); + + // (6) Write secrets to destination + const { applied, errors } = await applySecrets(provided, resolvedDestination, { + envFilePath: resolvedEnvPath, + environment, + }); + + // (7) Build result — NEVER include secret values + const lines: string[] = [ + `destination: ${resolvedDestination}${!destination ? ' (auto-detected)' : ''}${environment ? ` (${environment})` : ''}`, + ]; + for (const k of applied) lines.push(`✓ ${k}: applied`); + for (const k of skipped) lines.push(`• ${k}: skipped`); + for (const k of existingKeys) lines.push(`• ${k}: already set`); + for (const e of errors) lines.push(`✗ ${e}`); + + return errors.length > 0 && applied.length === 0 + ? errorContent(lines.join('\n')) + : textContent(lines.join('\n')); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + // ======================================================================= // READ-ONLY TOOLS — no session required, pure filesystem reads // ======================================================================= diff --git a/packages/mcp-server/src/tool-credentials.test.ts b/packages/mcp-server/src/tool-credentials.test.ts new file mode 100644 index 000000000..b6838a29f --- /dev/null +++ b/packages/mcp-server/src/tool-credentials.test.ts @@ -0,0 +1,95 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { loadStoredCredentialEnvKeys, resolveAuthPath } from "./tool-credentials.js"; + +describe("tool credentials", () => { + it("hydrates supported model and tool keys from auth.json", () => { + const tempRoot = mkdtempSync(join(tmpdir(), "gsd-mcp-auth-")); + const authPath = join(tempRoot, "auth.json"); + const env: NodeJS.ProcessEnv = {}; + + try { + writeFileSync(authPath, JSON.stringify({ + anthropic: { type: "api_key", key: "sk-ant-secret" }, + openai: { type: "api_key", key: "sk-openai-secret" }, + tavily: { type: "api_key", key: "tvly-secret" }, + context7: [{ type: "api_key", key: "ctx7-secret" }], + })); + + const loaded = loadStoredCredentialEnvKeys({ authPath, env }); + assert.deepEqual(loaded.sort(), [ + "ANTHROPIC_API_KEY", + "CONTEXT7_API_KEY", + "OPENAI_API_KEY", + "TAVILY_API_KEY", + ]); + assert.equal(env.ANTHROPIC_API_KEY, "sk-ant-secret"); + assert.equal(env.OPENAI_API_KEY, "sk-openai-secret"); + assert.equal(env.TAVILY_API_KEY, "tvly-secret"); + assert.equal(env.CONTEXT7_API_KEY, "ctx7-secret"); + } finally { + rmSync(tempRoot, { recursive: true, force: true }); + } + }); + + it("does not overwrite explicit environment variables", () => { + const tempRoot = mkdtempSync(join(tmpdir(), "gsd-mcp-auth-")); + const authPath = join(tempRoot, "auth.json"); + const env: NodeJS.ProcessEnv = { + BRAVE_API_KEY: "already-set", + }; + + try { + writeFileSync(authPath, JSON.stringify({ + brave: { type: "api_key", key: "from-auth-json" }, + anthropic: { type: "api_key", key: "sk-ant-from-auth-json" }, + })); + + const loaded = loadStoredCredentialEnvKeys({ authPath, env }); + assert.deepEqual(loaded, ["ANTHROPIC_API_KEY"]); + assert.equal(env.BRAVE_API_KEY, "already-set"); + assert.equal(env.ANTHROPIC_API_KEY, "sk-ant-from-auth-json"); + } finally { + rmSync(tempRoot, { recursive: true, force: true }); + } + }); + + it("ignores oauth credentials because they are resolved through auth storage, not env hydration", () => { + const tempRoot = mkdtempSync(join(tmpdir(), "gsd-mcp-auth-")); + const authPath = join(tempRoot, "auth.json"); + const env: NodeJS.ProcessEnv = {}; + + try { + writeFileSync(authPath, JSON.stringify({ + openai: { type: "oauth", access: "oauth-access-token" }, + "google-gemini-cli": { type: "oauth", token: "ya29.oauth-token" }, + })); + + const loaded = loadStoredCredentialEnvKeys({ authPath, env }); + assert.deepEqual(loaded, []); + assert.equal(env.OPENAI_API_KEY, undefined); + assert.equal(env.GEMINI_API_KEY, undefined); + } finally { + rmSync(tempRoot, { recursive: true, force: true }); + } + }); + + it("resolves auth.json from GSD_CODING_AGENT_DIR", () => { + const tempRoot = mkdtempSync(join(tmpdir(), "gsd-mcp-agent-dir-")); + const agentDir = join(tempRoot, "agent"); + mkdirSync(agentDir, { recursive: true }); + + try { + assert.equal( + resolveAuthPath({ GSD_CODING_AGENT_DIR: agentDir }), + join(agentDir, "auth.json"), + ); + } finally { + rmSync(tempRoot, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/mcp-server/src/tool-credentials.ts b/packages/mcp-server/src/tool-credentials.ts new file mode 100644 index 000000000..d19487437 --- /dev/null +++ b/packages/mcp-server/src/tool-credentials.ts @@ -0,0 +1,97 @@ +import { existsSync, readFileSync } from "node:fs"; +import { homedir } from "node:os"; +import { join } from "node:path"; + +type AuthCredential = + | { type?: unknown; key?: unknown } + | Array<{ type?: unknown; key?: unknown }>; + +type AuthStorageData = Record; + +const AUTH_ENV_KEYS = [ + ["anthropic", "ANTHROPIC_API_KEY"], + ["openai", "OPENAI_API_KEY"], + ["github-copilot", "GITHUB_TOKEN"], + ["google", "GEMINI_API_KEY"], + ["groq", "GROQ_API_KEY"], + ["xai", "XAI_API_KEY"], + ["openrouter", "OPENROUTER_API_KEY"], + ["mistral", "MISTRAL_API_KEY"], + ["ollama-cloud", "OLLAMA_API_KEY"], + ["custom-openai", "CUSTOM_OPENAI_API_KEY"], + ["cerebras", "CEREBRAS_API_KEY"], + ["azure-openai-responses", "AZURE_OPENAI_API_KEY"], + ["vercel-ai-gateway", "AI_GATEWAY_API_KEY"], + ["zai", "ZAI_API_KEY"], + ["minimax", "MINIMAX_API_KEY"], + ["minimax-cn", "MINIMAX_CN_API_KEY"], + ["huggingface", "HF_TOKEN"], + ["opencode", "OPENCODE_API_KEY"], + ["opencode-go", "OPENCODE_API_KEY"], + ["kimi-coding", "KIMI_API_KEY"], + ["alibaba-coding-plan", "ALIBABA_API_KEY"], + ["brave", "BRAVE_API_KEY"], + ["brave_answers", "BRAVE_ANSWERS_KEY"], + ["context7", "CONTEXT7_API_KEY"], + ["jina", "JINA_API_KEY"], + ["tavily", "TAVILY_API_KEY"], + ["slack_bot", "SLACK_BOT_TOKEN"], + ["discord_bot", "DISCORD_BOT_TOKEN"], + ["telegram_bot", "TELEGRAM_BOT_TOKEN"], +] as const; + +function expandHome(pathValue: string): string { + if (pathValue === "~") return homedir(); + if (pathValue.startsWith("~/")) return join(homedir(), pathValue.slice(2)); + return pathValue; +} + +function getStoredApiKey(data: AuthStorageData, providerId: string): string | undefined { + const raw = data[providerId]; + const credentials = Array.isArray(raw) ? raw : raw ? [raw] : []; + + for (const credential of credentials) { + if (credential?.type !== "api_key") continue; + if (typeof credential.key !== "string") continue; + if (credential.key.trim().length === 0) continue; + return credential.key; + } + + return undefined; +} + +export function resolveAuthPath(env: NodeJS.ProcessEnv = process.env): string { + const agentDir = env.GSD_CODING_AGENT_DIR?.trim(); + if (agentDir) return join(expandHome(agentDir), "auth.json"); + return join(homedir(), ".gsd", "agent", "auth.json"); +} + +export function loadStoredCredentialEnvKeys(options: { + env?: NodeJS.ProcessEnv; + authPath?: string; +} = {}): string[] { + const env = options.env ?? process.env; + const authPath = options.authPath ?? resolveAuthPath(env); + if (!existsSync(authPath)) return []; + + let parsed: AuthStorageData; + try { + const raw = readFileSync(authPath, "utf-8"); + const data = JSON.parse(raw) as unknown; + if (!data || typeof data !== "object" || Array.isArray(data)) return []; + parsed = data as AuthStorageData; + } catch { + return []; + } + + const loaded: string[] = []; + for (const [providerId, envVar] of AUTH_ENV_KEYS) { + if (env[envVar]) continue; + const key = getStoredApiKey(parsed, providerId); + if (!key) continue; + env[envVar] = key; + loaded.push(envVar); + } + + return loaded; +} diff --git a/packages/mcp-server/src/workflow-tools.test.ts b/packages/mcp-server/src/workflow-tools.test.ts index 35a883b3b..8435203c6 100644 --- a/packages/mcp-server/src/workflow-tools.test.ts +++ b/packages/mcp-server/src/workflow-tools.test.ts @@ -6,7 +6,7 @@ import { tmpdir } from "node:os"; import { randomUUID } from "node:crypto"; import { _getAdapter, closeDatabase } from "../../../src/resources/extensions/gsd/gsd-db.ts"; -import { registerWorkflowTools } from "./workflow-tools.ts"; +import { registerWorkflowTools, WORKFLOW_TOOL_NAMES } from "./workflow-tools.ts"; function makeTmpBase(): string { const base = join(tmpdir(), `gsd-mcp-workflow-${randomUUID()}`); @@ -68,33 +68,12 @@ function makeMockServer() { } describe("workflow MCP tools", () => { - it("registers the seventeen workflow tools", () => { + it("registers the full headless-safe workflow tool surface", () => { const server = makeMockServer(); registerWorkflowTools(server as any); - assert.equal(server.tools.length, 17); - assert.deepEqual( - server.tools.map((t) => t.name), - [ - "gsd_plan_milestone", - "gsd_plan_slice", - "gsd_replan_slice", - "gsd_slice_replan", - "gsd_slice_complete", - "gsd_complete_slice", - "gsd_complete_milestone", - "gsd_milestone_complete", - "gsd_validate_milestone", - "gsd_milestone_validate", - "gsd_reassess_roadmap", - "gsd_roadmap_reassess", - "gsd_save_gate_result", - "gsd_summary_save", - "gsd_task_complete", - "gsd_complete_task", - "gsd_milestone_status", - ], - ); + assert.equal(server.tools.length, WORKFLOW_TOOL_NAMES.length); + assert.deepEqual(server.tools.map((t) => t.name), [...WORKFLOW_TOOL_NAMES]); }); it("gsd_summary_save writes artifact through the shared executor", async () => { @@ -405,6 +384,116 @@ describe("workflow MCP tools", () => { } }); + it("gsd_requirement_save opens the DB before inline requirement writes", async () => { + const base = makeTmpBase(); + try { + const server = makeMockServer(); + registerWorkflowTools(server as any); + const requirementTool = server.tools.find((t) => t.name === "gsd_requirement_save"); + assert.ok(requirementTool, "requirement tool should be registered"); + + closeDatabase(); + + const result = await requirementTool!.handler({ + projectDir: base, + class: "operability", + description: "Inline MCP requirement save regression", + why: "Reproduce missing ensureDbOpen in workflow-tools", + source: "user", + status: "active", + primary_owner: "M010/S10", + validation: "n/a", + }); + + assert.match((result as any).content[0].text as string, /Saved requirement R\d+/); + assert.ok(existsSync(join(base, ".gsd", "REQUIREMENTS.md")), "REQUIREMENTS.md should be written to disk"); + const row = _getAdapter()! + .prepare("SELECT id, class, description FROM requirements WHERE description = ?") + .get("Inline MCP requirement save regression") as Record | undefined; + assert.ok(row, "requirement should be written to the database"); + assert.equal(row["class"], "operability"); + } finally { + cleanup(base); + } + }); + + it("gsd_plan_task reopens the DB before inline task planning writes", async () => { + const base = makeTmpBase(); + try { + const server = makeMockServer(); + registerWorkflowTools(server as any); + const milestoneTool = server.tools.find((t) => t.name === "gsd_plan_milestone"); + const sliceTool = server.tools.find((t) => t.name === "gsd_plan_slice"); + const taskTool = server.tools.find((t) => t.name === "gsd_plan_task"); + assert.ok(milestoneTool, "milestone planning tool should be registered"); + assert.ok(sliceTool, "slice planning tool should be registered"); + assert.ok(taskTool, "task planning tool should be registered"); + + await milestoneTool!.handler({ + projectDir: base, + milestoneId: "M010", + title: "Inline task planning DB reopen", + vision: "Seed a slice, close the DB, then plan another task inline.", + slices: [ + { + sliceId: "S10", + title: "Inline task planning", + risk: "medium", + depends: [], + demo: "Inline gsd_plan_task reopens the DB after it was closed.", + goal: "Preserve MCP task planning after the DB adapter is closed.", + successCriteria: "The second task plan persists after a closed DB is reopened.", + proofLevel: "integration", + integrationClosure: "The inline MCP handler reopens the DB before planning.", + observabilityImpact: "workflow-tools MCP tests cover the inline reopen path.", + }, + ], + }); + await sliceTool!.handler({ + projectDir: base, + milestoneId: "M010", + sliceId: "S10", + goal: "Create the initial slice plan before closing the DB.", + tasks: [ + { + taskId: "T10", + title: "Seed existing task", + description: "Create the initial task plan before closing the DB.", + estimate: "5m", + files: ["packages/mcp-server/src/workflow-tools.ts"], + verify: "node --test", + inputs: ["M010-ROADMAP.md"], + expectedOutput: ["T10-PLAN.md"], + }, + ], + }); + + closeDatabase(); + + const result = await taskTool!.handler({ + projectDir: base, + milestoneId: "M010", + sliceId: "S10", + taskId: "T11", + title: "Reopen and plan", + description: "Exercise the inline plan-task path after the DB was closed.", + estimate: "5m", + files: ["packages/mcp-server/src/workflow-tools.ts"], + verify: "node --test", + inputs: ["M010-ROADMAP.md", "S10-PLAN.md"], + expectedOutput: ["T11-PLAN.md"], + }); + + assert.match((result as any).content[0].text as string, /Planned task T11/); + assert.ok( + existsSync(join(base, ".gsd", "milestones", "M010", "slices", "S10", "tasks", "T11-PLAN.md")), + "T11 plan should be written after reopening the DB", + ); + } finally { + cleanup(base); + } + }); + it("gsd_replan_slice and gsd_slice_replan work end-to-end", async () => { const base = makeTmpBase(); try { @@ -974,3 +1063,31 @@ describe("workflow MCP tools", () => { } }); }); + +describe("URL scheme regex — Windows drive letter safety", () => { + // This is the regex used in getWriteGateModuleCandidates() and + // getWorkflowExecutorModuleCandidates() to reject non-file URL schemes. + // It must NOT match single-letter Windows drive prefixes (C:, D:, etc.). + const urlSchemeRegex = /^[a-z]{2,}:/i; + + it("rejects multi-letter URL schemes", () => { + assert.ok(urlSchemeRegex.test("http://example.com"), "http: should match"); + assert.ok(urlSchemeRegex.test("https://example.com"), "https: should match"); + assert.ok(urlSchemeRegex.test("ftp://files.example.com"), "ftp: should match"); + assert.ok(urlSchemeRegex.test("file:///C:/Users"), "file: should match"); + assert.ok(urlSchemeRegex.test("node:fs"), "node: should match"); + }); + + it("allows single-letter Windows drive prefixes", () => { + assert.ok(!urlSchemeRegex.test("C:\\Users\\user\\project"), "C:\\ should not match"); + assert.ok(!urlSchemeRegex.test("D:\\other\\path"), "D:\\ should not match"); + assert.ok(!urlSchemeRegex.test("c:\\lowercase\\drive"), "c:\\ should not match"); + assert.ok(!urlSchemeRegex.test("E:/forward/slash/path"), "E:/ should not match"); + }); + + it("allows bare filesystem paths", () => { + assert.ok(!urlSchemeRegex.test("/usr/local/lib/module.js"), "unix absolute path should not match"); + assert.ok(!urlSchemeRegex.test("./relative/path.js"), "relative path should not match"); + assert.ok(!urlSchemeRegex.test("../parent/path.js"), "parent relative path should not match"); + }); +}); diff --git a/packages/mcp-server/src/workflow-tools.ts b/packages/mcp-server/src/workflow-tools.ts index 95ea20494..9abbddbeb 100644 --- a/packages/mcp-server/src/workflow-tools.ts +++ b/packages/mcp-server/src/workflow-tools.ts @@ -244,6 +244,10 @@ type WorkflowWriteGateModule = { ) => { block: boolean; reason?: string }; }; +type WorkflowDbBootstrapModule = { + ensureDbOpen: (basePath?: string) => Promise; +}; + let workflowToolExecutorsPromise: Promise | null = null; let workflowExecutionQueue: Promise = Promise.resolve(); let workflowWriteGatePromise: Promise | null = null; @@ -318,7 +322,7 @@ function getWriteGateModuleCandidates(): string[] { const candidates: string[] = []; const explicitModule = process.env.GSD_WORKFLOW_WRITE_GATE_MODULE?.trim(); if (explicitModule) { - if (/^[a-z]+:/i.test(explicitModule) && !explicitModule.startsWith("file:")) { + if (/^[a-z]{2,}:/i.test(explicitModule) && !explicitModule.startsWith("file:")) { throw new Error("GSD_WORKFLOW_WRITE_GATE_MODULE only supports file: URLs or filesystem paths."); } candidates.push(explicitModule.startsWith("file:") ? explicitModule : toFileUrl(explicitModule)); @@ -326,6 +330,7 @@ function getWriteGateModuleCandidates(): string[] { candidates.push( new URL("../../../src/resources/extensions/gsd/bootstrap/write-gate.js", import.meta.url).href, + new URL("../../../dist/resources/extensions/gsd/bootstrap/write-gate.js", import.meta.url).href, new URL("../../../src/resources/extensions/gsd/bootstrap/write-gate.ts", import.meta.url).href, ); @@ -336,11 +341,46 @@ function toFileUrl(modulePath: string): string { return pathToFileURL(resolve(modulePath)).href; } +/** @internal — exported for testing only */ +export function _buildImportCandidates(relativePath: string): string[] { + // Build candidate paths: try the given path first, then swap src/<->dist/ + // and try .ts extension. This handles both dev (tsx from src/) and prod + // (compiled from dist/) execution contexts. + const candidates: string[] = [relativePath]; + const swapped = relativePath.includes("/src/") + ? relativePath.replace("/src/", "/dist/") + : relativePath.includes("/dist/") + ? relativePath.replace("/dist/", "/src/") + : null; + if (swapped) candidates.push(swapped); + // Also try .ts variants for dev-mode tsx execution + if (relativePath.endsWith(".js")) { + candidates.push(relativePath.replace(/\.js$/, ".ts")); + if (swapped) candidates.push(swapped.replace(/\.js$/, ".ts")); + } + return candidates; +} + +async function importLocalModule(relativePath: string): Promise { + const candidates = _buildImportCandidates(relativePath) + .map((p) => new URL(p, import.meta.url).href); + + let lastErr: unknown; + for (const candidate of candidates) { + try { + return await import(candidate) as T; + } catch (err) { + lastErr = err; + } + } + throw lastErr; +} + function getWorkflowExecutorModuleCandidates(env: NodeJS.ProcessEnv = process.env): string[] { const candidates: string[] = []; const explicitModule = env.GSD_WORKFLOW_EXECUTORS_MODULE?.trim(); if (explicitModule) { - if (/^[a-z]+:/i.test(explicitModule) && !explicitModule.startsWith("file:")) { + if (/^[a-z]{2,}:/i.test(explicitModule) && !explicitModule.startsWith("file:")) { throw new Error("GSD_WORKFLOW_EXECUTORS_MODULE only supports file: URLs or filesystem paths."); } candidates.push(explicitModule.startsWith("file:") ? explicitModule : toFileUrl(explicitModule)); @@ -348,6 +388,7 @@ function getWorkflowExecutorModuleCandidates(env: NodeJS.ProcessEnv = process.en candidates.push( new URL("../../../src/resources/extensions/gsd/tools/workflow-tool-executors.js", import.meta.url).href, + new URL("../../../dist/resources/extensions/gsd/tools/workflow-tool-executors.js", import.meta.url).href, new URL("../../../src/resources/extensions/gsd/tools/workflow-tool-executors.ts", import.meta.url).href, ); @@ -420,6 +461,38 @@ interface McpToolServer { ): unknown; } +export const WORKFLOW_TOOL_NAMES = [ + "gsd_decision_save", + "gsd_save_decision", + "gsd_requirement_update", + "gsd_update_requirement", + "gsd_requirement_save", + "gsd_save_requirement", + "gsd_milestone_generate_id", + "gsd_generate_milestone_id", + "gsd_plan_milestone", + "gsd_plan_slice", + "gsd_plan_task", + "gsd_task_plan", + "gsd_replan_slice", + "gsd_slice_replan", + "gsd_slice_complete", + "gsd_complete_slice", + "gsd_skip_slice", + "gsd_complete_milestone", + "gsd_milestone_complete", + "gsd_validate_milestone", + "gsd_milestone_validate", + "gsd_reassess_roadmap", + "gsd_roadmap_reassess", + "gsd_save_gate_result", + "gsd_summary_save", + "gsd_task_complete", + "gsd_complete_task", + "gsd_milestone_status", + "gsd_journal_query", +] as const; + async function runSerializedWorkflowOperation(fn: () => Promise): Promise { // The shared DB adapter and workflow log base path are process-global, so // workflow MCP mutations must not overlap within a single server process. @@ -437,6 +510,22 @@ async function runSerializedWorkflowOperation(fn: () => Promise): Promise< } } +async function runSerializedWorkflowDbOperation( + projectDir: string, + fn: () => Promise, +): Promise { + return runSerializedWorkflowOperation(async () => { + const { ensureDbOpen } = await importLocalModule( + "../../../src/resources/extensions/gsd/bootstrap/dynamic-tools.js", + ); + const dbAvailable = await ensureDbOpen(projectDir); + if (!dbAvailable) { + throw new Error("GSD database is not available"); + } + return fn(); + }); +} + async function enforceWorkflowWriteGate( toolName: string, projectDir: string, @@ -566,6 +655,15 @@ async function handleSaveGateResult( return runSerializedWorkflowOperation(() => executeSaveGateResult(params, projectDir)); } +async function ensureMilestoneDbRow(milestoneId: string): Promise { + try { + const { insertMilestone } = await importLocalModule("../../../src/resources/extensions/gsd/gsd-db.js"); + insertMilestone({ id: milestoneId, status: "queued" }); + } catch { + // Ignore pre-existing rows or transient DB availability issues. + } +} + const projectDirParam = z.string().describe("Absolute path to the project directory within the configured workflow root"); const planMilestoneParams = { @@ -772,6 +870,73 @@ const summarySaveParams = { }; const summarySaveSchema = z.object(summarySaveParams); +const decisionSaveParams = { + projectDir: projectDirParam, + scope: z.string().describe("Scope of the decision (e.g. architecture, library, observability)"), + decision: z.string().describe("What is being decided"), + choice: z.string().describe("The choice made"), + rationale: z.string().describe("Why this choice was made"), + revisable: z.string().optional().describe("Whether this can be revisited"), + when_context: z.string().optional().describe("When/context for the decision"), + made_by: z.enum(["human", "agent", "collaborative"]).optional().describe("Who made the decision"), +}; +const decisionSaveSchema = z.object(decisionSaveParams); + +const requirementUpdateParams = { + projectDir: projectDirParam, + id: z.string().describe("Requirement ID (e.g. R001)"), + status: z.string().optional().describe("New status"), + validation: z.string().optional().describe("Validation criteria or proof"), + notes: z.string().optional().describe("Additional notes"), + description: z.string().optional().describe("Updated description"), + primary_owner: z.string().optional().describe("Primary owning slice"), + supporting_slices: z.string().optional().describe("Supporting slices"), +}; +const requirementUpdateSchema = z.object(requirementUpdateParams); + +const requirementSaveParams = { + projectDir: projectDirParam, + class: z.string().describe("Requirement class"), + description: z.string().describe("Short description of the requirement"), + why: z.string().describe("Why this requirement matters"), + source: z.string().describe("Origin of the requirement"), + status: z.string().optional().describe("Requirement status"), + primary_owner: z.string().optional().describe("Primary owning slice"), + supporting_slices: z.string().optional().describe("Supporting slices"), + validation: z.string().optional().describe("Validation criteria"), + notes: z.string().optional().describe("Additional notes"), +}; +const requirementSaveSchema = z.object(requirementSaveParams); + +const milestoneGenerateIdParams = { + projectDir: projectDirParam, +}; +const milestoneGenerateIdSchema = z.object(milestoneGenerateIdParams); + +const planTaskParams = { + projectDir: projectDirParam, + milestoneId: z.string().describe("Milestone ID (e.g. M001)"), + sliceId: z.string().describe("Slice ID (e.g. S01)"), + taskId: z.string().describe("Task ID (e.g. T01)"), + title: z.string().describe("Task title"), + description: z.string().describe("Task description / steps block"), + estimate: z.string().describe("Task estimate"), + files: z.array(z.string()).describe("Files likely touched"), + verify: z.string().describe("Verification command or block"), + inputs: z.array(z.string()).describe("Input files or references"), + expectedOutput: z.array(z.string()).describe("Expected output files or artifacts"), + observabilityImpact: z.string().optional().describe("Task observability impact"), +}; +const planTaskSchema = z.object(planTaskParams); + +const skipSliceParams = { + projectDir: projectDirParam, + sliceId: z.string().describe("Slice ID (e.g. S02)"), + milestoneId: z.string().describe("Milestone ID (e.g. M003)"), + reason: z.string().optional().describe("Reason for skipping this slice"), +}; +const skipSliceSchema = z.object(skipSliceParams); + const taskCompleteParams = { projectDir: projectDirParam, taskId: z.string().describe("Task ID (e.g. T01)"), @@ -803,7 +968,171 @@ const milestoneStatusParams = { }; const milestoneStatusSchema = z.object(milestoneStatusParams); +const journalQueryParams = { + projectDir: projectDirParam, + flowId: z.string().optional().describe("Filter by flow ID"), + unitId: z.string().optional().describe("Filter by unit ID"), + rule: z.string().optional().describe("Filter by rule name"), + eventType: z.string().optional().describe("Filter by event type"), + after: z.string().optional().describe("ISO-8601 lower bound (inclusive)"), + before: z.string().optional().describe("ISO-8601 upper bound (inclusive)"), + limit: z.number().optional().describe("Maximum entries to return"), +}; +const journalQuerySchema = z.object(journalQueryParams); + export function registerWorkflowTools(server: McpToolServer): void { + server.tool( + "gsd_decision_save", + "Record a project decision to the GSD database and regenerate DECISIONS.md.", + decisionSaveParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(decisionSaveSchema, args); + const { projectDir, ...params } = parsed; + await enforceWorkflowWriteGate("gsd_decision_save", projectDir); + const result = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { saveDecisionToDb } = await importLocalModule("../../../src/resources/extensions/gsd/db-writer.js"); + return saveDecisionToDb(params, projectDir); + }); + return { content: [{ type: "text" as const, text: `Saved decision ${result.id}` }] }; + }, + ); + + server.tool( + "gsd_save_decision", + "Alias for gsd_decision_save. Record a project decision to the GSD database and regenerate DECISIONS.md.", + decisionSaveParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(decisionSaveSchema, args); + const { projectDir, ...params } = parsed; + await enforceWorkflowWriteGate("gsd_decision_save", projectDir); + const result = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { saveDecisionToDb } = await importLocalModule("../../../src/resources/extensions/gsd/db-writer.js"); + return saveDecisionToDb(params, projectDir); + }); + return { content: [{ type: "text" as const, text: `Saved decision ${result.id}` }] }; + }, + ); + + server.tool( + "gsd_requirement_update", + "Update an existing requirement in the GSD database and regenerate REQUIREMENTS.md.", + requirementUpdateParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(requirementUpdateSchema, args); + const { projectDir, id, ...updates } = parsed; + await enforceWorkflowWriteGate("gsd_requirement_update", projectDir); + await runSerializedWorkflowDbOperation(projectDir, async () => { + const { updateRequirementInDb } = await importLocalModule("../../../src/resources/extensions/gsd/db-writer.js"); + return updateRequirementInDb(id, updates, projectDir); + }); + return { content: [{ type: "text" as const, text: `Updated requirement ${id}` }] }; + }, + ); + + server.tool( + "gsd_update_requirement", + "Alias for gsd_requirement_update. Update an existing requirement in the GSD database and regenerate REQUIREMENTS.md.", + requirementUpdateParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(requirementUpdateSchema, args); + const { projectDir, id, ...updates } = parsed; + await enforceWorkflowWriteGate("gsd_requirement_update", projectDir); + await runSerializedWorkflowDbOperation(projectDir, async () => { + const { updateRequirementInDb } = await importLocalModule("../../../src/resources/extensions/gsd/db-writer.js"); + return updateRequirementInDb(id, updates, projectDir); + }); + return { content: [{ type: "text" as const, text: `Updated requirement ${id}` }] }; + }, + ); + + server.tool( + "gsd_requirement_save", + "Record a new requirement to the GSD database and regenerate REQUIREMENTS.md.", + requirementSaveParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(requirementSaveSchema, args); + const { projectDir, ...params } = parsed; + await enforceWorkflowWriteGate("gsd_requirement_save", projectDir); + const result = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { saveRequirementToDb } = await importLocalModule("../../../src/resources/extensions/gsd/db-writer.js"); + return saveRequirementToDb(params, projectDir); + }); + return { content: [{ type: "text" as const, text: `Saved requirement ${result.id}` }] }; + }, + ); + + server.tool( + "gsd_save_requirement", + "Alias for gsd_requirement_save. Record a new requirement to the GSD database and regenerate REQUIREMENTS.md.", + requirementSaveParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(requirementSaveSchema, args); + const { projectDir, ...params } = parsed; + await enforceWorkflowWriteGate("gsd_requirement_save", projectDir); + const result = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { saveRequirementToDb } = await importLocalModule("../../../src/resources/extensions/gsd/db-writer.js"); + return saveRequirementToDb(params, projectDir); + }); + return { content: [{ type: "text" as const, text: `Saved requirement ${result.id}` }] }; + }, + ); + + server.tool( + "gsd_milestone_generate_id", + "Generate the next milestone ID for a new GSD milestone.", + milestoneGenerateIdParams, + async (args: Record) => { + const { projectDir } = parseWorkflowArgs(milestoneGenerateIdSchema, args); + await enforceWorkflowWriteGate("gsd_milestone_generate_id", projectDir); + const id = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { + claimReservedId, + findMilestoneIds, + getReservedMilestoneIds, + nextMilestoneId, + } = await importLocalModule("../../../src/resources/extensions/gsd/milestone-ids.js"); + const reserved = claimReservedId(); + if (reserved) { + await ensureMilestoneDbRow(reserved); + return reserved; + } + const allIds = [...new Set([...findMilestoneIds(projectDir), ...getReservedMilestoneIds()])]; + const nextId = nextMilestoneId(allIds); + await ensureMilestoneDbRow(nextId); + return nextId; + }); + return { content: [{ type: "text" as const, text: id }] }; + }, + ); + + server.tool( + "gsd_generate_milestone_id", + "Alias for gsd_milestone_generate_id. Generate the next milestone ID for a new GSD milestone.", + milestoneGenerateIdParams, + async (args: Record) => { + const { projectDir } = parseWorkflowArgs(milestoneGenerateIdSchema, args); + await enforceWorkflowWriteGate("gsd_milestone_generate_id", projectDir); + const id = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { + claimReservedId, + findMilestoneIds, + getReservedMilestoneIds, + nextMilestoneId, + } = await importLocalModule("../../../src/resources/extensions/gsd/milestone-ids.js"); + const reserved = claimReservedId(); + if (reserved) { + await ensureMilestoneDbRow(reserved); + return reserved; + } + const allIds = [...new Set([...findMilestoneIds(projectDir), ...getReservedMilestoneIds()])]; + const nextId = nextMilestoneId(allIds); + await ensureMilestoneDbRow(nextId); + return nextId; + }); + return { content: [{ type: "text" as const, text: id }] }; + }, + ); + server.tool( "gsd_plan_milestone", "Write milestone planning state to the GSD database and render ROADMAP.md from DB.", @@ -830,6 +1159,48 @@ export function registerWorkflowTools(server: McpToolServer): void { }, ); + server.tool( + "gsd_plan_task", + "Write task planning state to the GSD database and render tasks/T##-PLAN.md from DB.", + planTaskParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(planTaskSchema, args); + const { projectDir, ...params } = parsed; + await enforceWorkflowWriteGate("gsd_plan_task", projectDir, params.milestoneId); + const result = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { handlePlanTask } = await importLocalModule("../../../src/resources/extensions/gsd/tools/plan-task.js"); + return handlePlanTask(params, projectDir); + }); + if ("error" in result) { + throw new Error(result.error); + } + return { + content: [{ type: "text" as const, text: `Planned task ${result.taskId} (${result.sliceId}/${result.milestoneId})` }], + }; + }, + ); + + server.tool( + "gsd_task_plan", + "Alias for gsd_plan_task. Write task planning state to the GSD database and render tasks/T##-PLAN.md from DB.", + planTaskParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(planTaskSchema, args); + const { projectDir, ...params } = parsed; + await enforceWorkflowWriteGate("gsd_plan_task", projectDir, params.milestoneId); + const result = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { handlePlanTask } = await importLocalModule("../../../src/resources/extensions/gsd/tools/plan-task.js"); + return handlePlanTask(params, projectDir); + }); + if ("error" in result) { + throw new Error(result.error); + } + return { + content: [{ type: "text" as const, text: `Planned task ${result.taskId} (${result.sliceId}/${result.milestoneId})` }], + }; + }, + ); + server.tool( "gsd_replan_slice", "Replan a slice after a blocker is discovered, preserving completed tasks and re-rendering PLAN.md + REPLAN.md.", @@ -870,6 +1241,36 @@ export function registerWorkflowTools(server: McpToolServer): void { }, ); + server.tool( + "gsd_skip_slice", + "Mark a slice as skipped so auto-mode advances past it without executing.", + skipSliceParams, + async (args: Record) => { + const { projectDir, milestoneId, sliceId, reason } = parseWorkflowArgs(skipSliceSchema, args); + await enforceWorkflowWriteGate("gsd_skip_slice", projectDir, milestoneId); + await runSerializedWorkflowDbOperation(projectDir, async () => { + const { getSlice, updateSliceStatus } = await importLocalModule("../../../src/resources/extensions/gsd/gsd-db.js"); + const { invalidateStateCache } = await importLocalModule("../../../src/resources/extensions/gsd/state.js"); + const { rebuildState } = await importLocalModule("../../../src/resources/extensions/gsd/doctor.js"); + const slice = getSlice(milestoneId, sliceId); + if (!slice) { + throw new Error(`Slice ${sliceId} not found in milestone ${milestoneId}`); + } + if (slice.status === "complete" || slice.status === "done") { + throw new Error(`Slice ${sliceId} is already complete and cannot be skipped`); + } + if (slice.status !== "skipped") { + updateSliceStatus(milestoneId, sliceId, "skipped"); + invalidateStateCache(); + await rebuildState(projectDir); + } + }); + return { + content: [{ type: "text" as const, text: `Skipped slice ${sliceId} (${milestoneId}). Reason: ${reason ?? "User-directed skip"}.` }], + }; + }, + ); + server.tool( "gsd_complete_milestone", "Record a completed milestone to the GSD database and render its SUMMARY.md.", @@ -994,4 +1395,19 @@ export function registerWorkflowTools(server: McpToolServer): void { return runSerializedWorkflowOperation(() => executeMilestoneStatus({ milestoneId }, projectDir)); }, ); + + server.tool( + "gsd_journal_query", + "Query the structured event journal for auto-mode iterations.", + journalQueryParams, + async (args: Record) => { + const { projectDir, limit, ...filters } = parseWorkflowArgs(journalQuerySchema, args); + const { queryJournal } = await importLocalModule("../../../src/resources/extensions/gsd/journal.js"); + const entries = queryJournal(projectDir, filters).slice(0, limit ?? 100); + if (entries.length === 0) { + return { content: [{ type: "text" as const, text: "No matching journal entries found." }] }; + } + return { content: [{ type: "text" as const, text: JSON.stringify(entries, null, 2) }] }; + }, + ); } diff --git a/packages/pi-agent-core/src/agent.test.ts b/packages/pi-agent-core/src/agent.test.ts index e0b838cd4..4ecd23af2 100644 --- a/packages/pi-agent-core/src/agent.test.ts +++ b/packages/pi-agent-core/src/agent.test.ts @@ -8,6 +8,8 @@ import assert from "node:assert/strict"; import { readFileSync } from "node:fs"; import { join, dirname } from "node:path"; import { fileURLToPath } from "node:url"; +import { Agent } from "./agent.ts"; +import { getModel, type AssistantMessageEventStream } from "@gsd/pi-ai"; const __dirname = dirname(fileURLToPath(import.meta.url)); @@ -50,4 +52,84 @@ describe("Agent — activeInferenceModel (#1844 Bug 2)", () => { assert.ok(setLine < abortLine, "activeInferenceModel must be set before streaming infrastructure is created"); }); + + it("getProviderOptions are forwarded into the provider stream call", async () => { + let capturedOptions: Record | undefined; + const agent = new Agent({ + initialState: { + model: getModel("anthropic", "claude-3-5-sonnet-20241022"), + systemPrompt: "test", + tools: [], + }, + getProviderOptions: async () => ({ customRuntimeOption: "present" }), + streamFn: (_model, _context, options): AssistantMessageEventStream => { + capturedOptions = options as Record | undefined; + return { + async *[Symbol.asyncIterator]() { + yield { + type: "start", + partial: { + role: "assistant", + content: [], + api: "anthropic-messages", + provider: "anthropic", + model: "claude-3-5-sonnet-20241022", + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }, + }; + yield { + type: "done", + message: { + role: "assistant", + content: [{ type: "text", text: "ok" }], + api: "anthropic-messages", + provider: "anthropic", + model: "claude-3-5-sonnet-20241022", + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }, + }; + }, + result: async () => ({ + role: "assistant", + content: [{ type: "text", text: "ok" }], + api: "anthropic-messages", + provider: "anthropic", + model: "claude-3-5-sonnet-20241022", + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }), + [Symbol.asyncDispose]: async () => {}, + } as AssistantMessageEventStream; + }, + }); + + await agent.prompt("hello"); + assert.equal(capturedOptions?.customRuntimeOption, "present"); + }); }); diff --git a/packages/pi-agent-core/src/agent.ts b/packages/pi-agent-core/src/agent.ts index e65ae7a35..924dd8d39 100644 --- a/packages/pi-agent-core/src/agent.ts +++ b/packages/pi-agent-core/src/agent.ts @@ -108,6 +108,14 @@ export interface AgentOptions { * switches mid-session are handled correctly. */ externalToolExecution?: (model: Model) => boolean; + + /** + * Optional provider-specific options to merge into the next stream call. + * + * Use this for runtime-only callbacks or handles that should not live in + * shared agent state, such as UI bridges for external CLI providers. + */ + getProviderOptions?: (model: Model) => Record | undefined | Promise | undefined>; } /** @@ -152,6 +160,7 @@ export class Agent { private _beforeToolCall?: AgentLoopConfig["beforeToolCall"]; private _afterToolCall?: AgentLoopConfig["afterToolCall"]; private _externalToolExecution?: (model: Model) => boolean; + private _getProviderOptions?: AgentOptions["getProviderOptions"]; constructor(opts: AgentOptions = {}) { this._state = { ...this._state, ...opts.initialState }; @@ -167,6 +176,7 @@ export class Agent { this._transport = opts.transport ?? "sse"; this._maxRetryDelayMs = opts.maxRetryDelayMs; this._externalToolExecution = opts.externalToolExecution; + this._getProviderOptions = opts.getProviderOptions; } /** @@ -486,8 +496,10 @@ export class Agent { }; let skipInitialSteeringPoll = options?.skipInitialSteeringPoll === true; + const providerOptions = await this._getProviderOptions?.(model); const config: AgentLoopConfig = { + ...(providerOptions ?? {}), model, reasoning, sessionId: this._sessionId, diff --git a/packages/pi-ai/src/index.ts b/packages/pi-ai/src/index.ts index c8d9e1e8c..8b81cc22e 100644 --- a/packages/pi-ai/src/index.ts +++ b/packages/pi-ai/src/index.ts @@ -12,7 +12,10 @@ export * from "./providers/google-vertex.js"; export * from "./providers/mistral.js"; export * from "./providers/openai-completions.js"; export * from "./providers/openai-responses.js"; +export * from "./providers/provider-capabilities.js"; export * from "./providers/register-builtins.js"; +export type { ProviderSwitchReport } from "./providers/transform-messages.js"; +export { createEmptyReport, hasTransformations, transformMessagesWithReport } from "./providers/transform-messages.js"; export * from "./stream.js"; export * from "./types.js"; export * from "./utils/event-stream.js"; diff --git a/packages/pi-ai/src/providers/amazon-bedrock.ts b/packages/pi-ai/src/providers/amazon-bedrock.ts index 52b42b4d1..dee0c363e 100644 --- a/packages/pi-ai/src/providers/amazon-bedrock.ts +++ b/packages/pi-ai/src/providers/amazon-bedrock.ts @@ -43,7 +43,7 @@ import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; import { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning } from "./simple-options.js"; -import { transformMessages } from "./transform-messages.js"; +import { transformMessagesWithReport } from "./transform-messages.js"; export interface BedrockOptions extends StreamOptions { region?: string; @@ -151,7 +151,7 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt messages: convertMessages(context, model, cacheRetention), system: buildSystemPrompt(context.systemPrompt, model, cacheRetention), inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature }, - toolConfig: convertToolConfig(context.tools, options.toolChoice), + toolConfig: convertToolConfig(context.tools, options.toolChoice, model, cacheRetention), additionalModelRequestFields: buildAdditionalModelRequestFields(model, options), }; const nextCommandInput = await options?.onPayload?.(commandInput, model); @@ -487,7 +487,7 @@ function convertMessages( cacheRetention: CacheRetention, ): Message[] { const result: Message[] = []; - const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId); + const transformedMessages = transformMessagesWithReport(context.messages, model, normalizeToolCallId, "bedrock-converse-stream"); for (let i = 0; i < transformedMessages.length; i++) { const m = transformedMessages[i]; @@ -633,6 +633,8 @@ function convertMessages( function convertToolConfig( tools: Tool[] | undefined, toolChoice: BedrockOptions["toolChoice"], + model: Model<"bedrock-converse-stream">, + cacheRetention: CacheRetention, ): ToolConfiguration | undefined { if (!tools?.length || toolChoice === "none") return undefined; @@ -644,6 +646,16 @@ function convertToolConfig( }, })); + // Add cachePoint after last tool for supported models + if (cacheRetention !== "none" && supportsPromptCaching(model)) { + bedrockTools.push({ + cachePoint: { + type: CachePointType.DEFAULT, + ...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}), + }, + } as any); + } + let bedrockToolChoice: ToolChoice | undefined; switch (toolChoice) { case "auto": diff --git a/packages/pi-ai/src/providers/anthropic-shared.test.ts b/packages/pi-ai/src/providers/anthropic-shared.test.ts index 9b6718570..6e08bc52e 100644 --- a/packages/pi-ai/src/providers/anthropic-shared.test.ts +++ b/packages/pi-ai/src/providers/anthropic-shared.test.ts @@ -1,6 +1,60 @@ import { describe, it } from "node:test"; import assert from "node:assert/strict"; -import { mapStopReason } from "./anthropic-shared.js"; +import { convertTools, mapStopReason } from "./anthropic-shared.js"; + +const makeTool = (name: string) => + ({ + name, + description: `desc for ${name}`, + parameters: { + type: "object" as const, + properties: { arg: { type: "string" } }, + required: ["arg"], + }, + }) as any; + +describe("convertTools cache_control", () => { + it("adds cache_control to the last tool when cacheControl is provided", () => { + const tools = [makeTool("Read"), makeTool("Write"), makeTool("Edit")]; + const cacheControl = { type: "ephemeral" as const }; + const result = convertTools(tools, false, cacheControl); + + assert.equal(result.length, 3); + assert.equal((result[0] as any).cache_control, undefined); + assert.equal((result[1] as any).cache_control, undefined); + assert.deepEqual((result[2] as any).cache_control, { type: "ephemeral" }); + }); + + it("does not add cache_control when cacheControl is undefined", () => { + const tools = [makeTool("Read"), makeTool("Write")]; + const result = convertTools(tools, false); + + for (const tool of result) { + assert.equal((tool as any).cache_control, undefined); + } + }); + + it("handles empty tools array without error", () => { + const result = convertTools([], false, { type: "ephemeral" }); + assert.equal(result.length, 0); + }); + + it("passes through ttl when provided", () => { + const tools = [makeTool("Read")]; + const cacheControl = { type: "ephemeral" as const, ttl: "1h" as const }; + const result = convertTools(tools, false, cacheControl); + + assert.deepEqual((result[0] as any).cache_control, { type: "ephemeral", ttl: "1h" }); + }); + + it("single tool gets cache_control", () => { + const tools = [makeTool("Read")]; + const result = convertTools(tools, false, { type: "ephemeral" }); + + assert.equal(result.length, 1); + assert.deepEqual((result[0] as any).cache_control, { type: "ephemeral" }); + }); +}); describe("mapStopReason", () => { it("maps end_turn to stop", () => { diff --git a/packages/pi-ai/src/providers/anthropic-shared.ts b/packages/pi-ai/src/providers/anthropic-shared.ts index 098f50721..567609147 100644 --- a/packages/pi-ai/src/providers/anthropic-shared.ts +++ b/packages/pi-ai/src/providers/anthropic-shared.ts @@ -33,7 +33,7 @@ import type { AssistantMessageEventStream } from "../utils/event-stream.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { hasXmlParameterTags, repairToolJson } from "../utils/repair-tool-json.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; -import { transformMessages } from "./transform-messages.js"; +import { transformMessagesWithReport } from "./transform-messages.js"; export type AnthropicEffort = "low" | "medium" | "high" | "max"; @@ -235,7 +235,7 @@ export function convertMessages( ): MessageParam[] { const params: MessageParam[] = []; - const transformedMessages = transformMessages(messages, model, normalizeToolCallId); + const transformedMessages = transformMessagesWithReport(messages, model, normalizeToolCallId, "anthropic-messages"); for (let i = 0; i < transformedMessages.length; i++) { const msg = transformedMessages[i]; @@ -394,10 +394,14 @@ export function convertMessages( return params; } -export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] { +export function convertTools( + tools: Tool[], + isOAuthToken: boolean, + cacheControl?: { type: "ephemeral"; ttl?: "1h" }, +): Anthropic.Messages.Tool[] { if (!tools) return []; - return tools.map((tool) => { + const result = tools.map((tool) => { const jsonSchema = tool.parameters as any; return { @@ -410,6 +414,13 @@ export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Me }, }; }); + + // Add cache breakpoint to last tool — covers entire tool block + if (cacheControl && result.length > 0) { + (result[result.length - 1] as any).cache_control = cacheControl; + } + + return result; } export function buildParams( @@ -457,7 +468,7 @@ export function buildParams( } if (context.tools) { - params.tools = convertTools(context.tools, isOAuthToken); + params.tools = convertTools(context.tools, isOAuthToken, cacheControl); } if (options?.thinkingEnabled && model.reasoning) { diff --git a/packages/pi-ai/src/providers/anthropic.ts b/packages/pi-ai/src/providers/anthropic.ts index 21c0da707..57ee1b5be 100644 --- a/packages/pi-ai/src/providers/anthropic.ts +++ b/packages/pi-ai/src/providers/anthropic.ts @@ -34,9 +34,6 @@ async function getAnthropicClass(): Promise { return _AnthropicClass; } -// Stealth mode: Mimic Claude Code's tool naming exactly -const claudeCodeVersion = "2.1.62"; - function mergeHeaders(...headerSources: (Record | undefined)[]): Record { const merged: Record = {}; for (const headers of headerSources) { @@ -47,10 +44,6 @@ function mergeHeaders(...headerSources: (Record | undefined)[]): return merged; } -function isOAuthToken(apiKey: string): boolean { - return apiKey.includes("sk-ant-oat"); -} - async function createClient( model: Model<"anthropic-messages">, apiKey: string, @@ -97,30 +90,7 @@ async function createClient( betaFeatures.push("interleaved-thinking-2025-05-14"); } - // OAuth: Bearer auth, Claude Code identity headers - if (isOAuthToken(apiKey)) { - const client = new AnthropicClass({ - apiKey: null, - authToken: apiKey, - baseURL: model.baseUrl, - dangerouslyAllowBrowser: true, - defaultHeaders: mergeHeaders( - { - accept: "application/json", - "anthropic-dangerous-direct-browser-access": "true", - ...(betaFeatures.length > 0 ? { "anthropic-beta": `claude-code-20250219,oauth-2025-04-20,${betaFeatures.join(",")}` } : {}), - "user-agent": `claude-cli/${claudeCodeVersion}`, - "x-app": "cli", - }, - model.headers, - optionsHeaders, - ), - }); - - return { client, isOAuthToken: true }; - } - - // API key auth + // API key auth (Anthropic OAuth removed per TOS compliance — use API keys or Claude CLI) // Alibaba Coding Plan uses Bearer token auth instead of x-api-key const isAlibabaProvider = model.provider === "alibaba-coding-plan"; const client = new AnthropicClass({ diff --git a/packages/pi-ai/src/providers/google-shared.ts b/packages/pi-ai/src/providers/google-shared.ts index e6a31771f..7984bdd4b 100644 --- a/packages/pi-ai/src/providers/google-shared.ts +++ b/packages/pi-ai/src/providers/google-shared.ts @@ -5,7 +5,7 @@ import { type Content, FinishReason, FunctionCallingConfigMode, type Part } from "@google/genai"; import type { Context, ImageContent, Model, StopReason, TextContent, Tool } from "../types.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; -import { transformMessages } from "./transform-messages.js"; +import { transformMessagesWithReport } from "./transform-messages.js"; type GoogleApiType = "google-generative-ai" | "google-gemini-cli" | "google-vertex"; @@ -80,7 +80,7 @@ export function convertMessages(model: Model, contex return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64); }; - const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId); + const transformedMessages = transformMessagesWithReport(context.messages, model, normalizeToolCallId, "google-generative-ai"); for (const msg of transformedMessages) { if (msg.role === "user") { diff --git a/packages/pi-ai/src/providers/mistral.ts b/packages/pi-ai/src/providers/mistral.ts index 7c9b54b91..0a6a28e5c 100644 --- a/packages/pi-ai/src/providers/mistral.ts +++ b/packages/pi-ai/src/providers/mistral.ts @@ -39,7 +39,7 @@ import { shortHash } from "../utils/hash.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; import { buildBaseOptions, clampReasoning } from "./simple-options.js"; -import { transformMessages } from "./transform-messages.js"; +import { transformMessagesWithReport } from "./transform-messages.js"; const MISTRAL_TOOL_CALL_ID_LENGTH = 9; const MAX_MISTRAL_ERROR_BODY_CHARS = 4000; @@ -79,7 +79,7 @@ export const streamMistral: StreamFunction<"mistral-conversations", MistralOptio }); const normalizeMistralToolCallId = createMistralToolCallIdNormalizer(); - const transformedMessages = transformMessages(context.messages, model, (id) => normalizeMistralToolCallId(id)); + const transformedMessages = transformMessagesWithReport(context.messages, model, (id) => normalizeMistralToolCallId(id), "mistral-conversations"); let payload = buildChatPayload(model, context, transformedMessages, options); const nextPayload = await options?.onPayload?.(payload, model); diff --git a/packages/pi-ai/src/providers/openai-completions.ts b/packages/pi-ai/src/providers/openai-completions.ts index 4d6e1a3cf..51213ad39 100644 --- a/packages/pi-ai/src/providers/openai-completions.ts +++ b/packages/pi-ai/src/providers/openai-completions.ts @@ -39,7 +39,7 @@ import { finalizeStream, handleStreamError, } from "./openai-shared.js"; -import { transformMessages } from "./transform-messages.js"; +import { transformMessagesWithReport } from "./transform-messages.js"; /** * Check if conversation messages contain tool calls or tool results. @@ -343,6 +343,7 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio if (context.tools) { params.tools = convertTools(context.tools, compat); + maybeAddOpenRouterAnthropicToolCacheControl(model, params.tools); } else if (hasToolHistory(context.messages)) { // Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results params.tools = []; @@ -379,6 +380,19 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio return params; } +function maybeAddOpenRouterAnthropicToolCacheControl( + model: Model<"openai-completions">, + tools: OpenAI.Chat.Completions.ChatCompletionTool[] | undefined, +): void { + if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return; + if (!tools?.length) return; + + const lastTool = tools[tools.length - 1]; + if ("function" in lastTool) { + Object.assign(lastTool.function, { cache_control: { type: "ephemeral" } }); + } +} + function mapReasoningEffort( effort: NonNullable, reasoningEffortMap: Partial, string>>, @@ -441,7 +455,7 @@ export function convertMessages( return id; }; - const transformedMessages = transformMessages(context.messages, model, (id) => normalizeToolCallId(id)); + const transformedMessages = transformMessagesWithReport(context.messages, model, (id) => normalizeToolCallId(id), "openai-completions"); if (context.systemPrompt) { const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole; diff --git a/packages/pi-ai/src/providers/openai-responses-shared.ts b/packages/pi-ai/src/providers/openai-responses-shared.ts index 10ac5ee1b..8227dcff5 100644 --- a/packages/pi-ai/src/providers/openai-responses-shared.ts +++ b/packages/pi-ai/src/providers/openai-responses-shared.ts @@ -30,7 +30,7 @@ import type { AssistantMessageEventStream } from "../utils/event-stream.js"; import { shortHash } from "../utils/hash.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; -import { transformMessages } from "./transform-messages.js"; +import { transformMessagesWithReport } from "./transform-messages.js"; // ============================================================================= // Utilities @@ -108,7 +108,7 @@ export function convertResponsesMessages( return `${normalizedCallId}|${normalizedItemId}`; }; - const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId); + const transformedMessages = transformMessagesWithReport(context.messages, model, normalizeToolCallId, "openai-responses"); const includeSystemPrompt = options?.includeSystemPrompt ?? true; if (includeSystemPrompt && context.systemPrompt) { diff --git a/packages/pi-ai/src/providers/provider-capabilities.test.ts b/packages/pi-ai/src/providers/provider-capabilities.test.ts new file mode 100644 index 000000000..7b8728975 --- /dev/null +++ b/packages/pi-ai/src/providers/provider-capabilities.test.ts @@ -0,0 +1,174 @@ +// GSD-2 — Provider Capabilities Registry Tests (ADR-005 Phase 1) +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; + +import { + PROVIDER_CAPABILITIES, + getProviderCapabilities, + getUnsupportedFeatures, + mergeCapabilityOverrides, + getRegisteredApis, +} from "./provider-capabilities.js"; + +// ─── Registry Completeness ────────────────────────────────────────────────── + +describe("PROVIDER_CAPABILITIES registry", () => { + const EXPECTED_APIS = [ + "anthropic-messages", + "anthropic-vertex", + "openai-responses", + "azure-openai-responses", + "openai-codex-responses", + "openai-completions", + "google-generative-ai", + "google-gemini-cli", + "google-vertex", + "mistral-conversations", + "bedrock-converse-stream", + "ollama-chat", + ]; + + test("covers all expected API providers", () => { + for (const api of EXPECTED_APIS) { + assert.ok( + PROVIDER_CAPABILITIES[api], + `Missing capability entry for API: ${api}`, + ); + } + }); + + test("getRegisteredApis returns all entries", () => { + const registered = getRegisteredApis(); + for (const api of EXPECTED_APIS) { + assert.ok(registered.includes(api), `getRegisteredApis missing: ${api}`); + } + }); + + test("all entries have required fields", () => { + for (const [api, caps] of Object.entries(PROVIDER_CAPABILITIES)) { + assert.equal(typeof caps.toolCalling, "boolean", `${api}.toolCalling`); + assert.equal(typeof caps.maxTools, "number", `${api}.maxTools`); + assert.equal(typeof caps.imageToolResults, "boolean", `${api}.imageToolResults`); + assert.equal(typeof caps.structuredOutput, "boolean", `${api}.structuredOutput`); + assert.ok(caps.toolCallIdFormat, `${api}.toolCallIdFormat`); + assert.equal(typeof caps.toolCallIdFormat.maxLength, "number", `${api}.toolCallIdFormat.maxLength`); + assert.ok(caps.toolCallIdFormat.allowedChars instanceof RegExp, `${api}.toolCallIdFormat.allowedChars`); + assert.ok( + ["full", "text-only", "none"].includes(caps.thinkingPersistence), + `${api}.thinkingPersistence is "${caps.thinkingPersistence}"`, + ); + assert.ok(Array.isArray(caps.unsupportedSchemaFeatures), `${api}.unsupportedSchemaFeatures`); + } + }); +}); + +// ─── Provider-specific Values ─────────────────────────────────────────────── + +describe("provider-specific capabilities", () => { + test("Anthropic supports full thinking persistence", () => { + assert.equal(PROVIDER_CAPABILITIES["anthropic-messages"].thinkingPersistence, "full"); + }); + + test("Anthropic supports image tool results", () => { + assert.equal(PROVIDER_CAPABILITIES["anthropic-messages"].imageToolResults, true); + }); + + test("Anthropic tool call ID is 64 chars max", () => { + assert.equal(PROVIDER_CAPABILITIES["anthropic-messages"].toolCallIdFormat.maxLength, 64); + }); + + test("Mistral tool call ID is 9 chars max", () => { + assert.equal(PROVIDER_CAPABILITIES["mistral-conversations"].toolCallIdFormat.maxLength, 9); + }); + + test("Mistral has no thinking persistence", () => { + assert.equal(PROVIDER_CAPABILITIES["mistral-conversations"].thinkingPersistence, "none"); + }); + + test("Google does not support patternProperties", () => { + assert.ok( + PROVIDER_CAPABILITIES["google-generative-ai"].unsupportedSchemaFeatures.includes("patternProperties"), + ); + }); + + test("Google does not support const", () => { + assert.ok( + PROVIDER_CAPABILITIES["google-generative-ai"].unsupportedSchemaFeatures.includes("const"), + ); + }); + + test("OpenAI Responses does not support image tool results", () => { + assert.equal(PROVIDER_CAPABILITIES["openai-responses"].imageToolResults, false); + }); + + test("OpenAI Responses has text-only thinking persistence", () => { + assert.equal(PROVIDER_CAPABILITIES["openai-responses"].thinkingPersistence, "text-only"); + }); +}); + +// ─── getProviderCapabilities ──────────────────────────────────────────────── + +describe("getProviderCapabilities", () => { + test("returns known provider capabilities", () => { + const caps = getProviderCapabilities("anthropic-messages"); + assert.equal(caps.toolCalling, true); + assert.equal(caps.thinkingPersistence, "full"); + }); + + test("returns permissive defaults for unknown providers", () => { + const caps = getProviderCapabilities("unknown-provider-xyz"); + assert.equal(caps.toolCalling, true); + assert.equal(caps.imageToolResults, true); + assert.deepEqual(caps.unsupportedSchemaFeatures, []); + }); +}); + +// ─── getUnsupportedFeatures ───────────────────────────────────────────────── + +describe("getUnsupportedFeatures", () => { + test("returns unsupported features for Google", () => { + const unsupported = getUnsupportedFeatures("google-generative-ai", ["patternProperties", "const"]); + assert.deepEqual(unsupported, ["patternProperties", "const"]); + }); + + test("returns empty for Anthropic with any features", () => { + const unsupported = getUnsupportedFeatures("anthropic-messages", ["patternProperties", "const"]); + assert.deepEqual(unsupported, []); + }); + + test("returns empty for unknown provider", () => { + const unsupported = getUnsupportedFeatures("unknown-xyz", ["patternProperties"]); + assert.deepEqual(unsupported, []); + }); +}); + +// ─── mergeCapabilityOverrides ─────────────────────────────────────────────── + +describe("mergeCapabilityOverrides", () => { + test("overrides individual fields", () => { + const merged = mergeCapabilityOverrides("openai-responses", { + imageToolResults: true, + }); + assert.equal(merged.imageToolResults, true); + // Non-overridden fields preserved + assert.equal(merged.toolCalling, true); + assert.equal(merged.thinkingPersistence, "text-only"); + }); + + test("deep-merges toolCallIdFormat", () => { + const merged = mergeCapabilityOverrides("anthropic-messages", { + toolCallIdFormat: { maxLength: 128 }, + }); + assert.equal(merged.toolCallIdFormat.maxLength, 128); + // allowedChars preserved from base + assert.ok(merged.toolCallIdFormat.allowedChars instanceof RegExp); + }); + + test("uses permissive defaults for unknown provider", () => { + const merged = mergeCapabilityOverrides("unknown-xyz", { + imageToolResults: false, + }); + assert.equal(merged.imageToolResults, false); + assert.equal(merged.toolCalling, true); // from default + }); +}); diff --git a/packages/pi-ai/src/providers/provider-capabilities.ts b/packages/pi-ai/src/providers/provider-capabilities.ts new file mode 100644 index 000000000..b49a1f319 --- /dev/null +++ b/packages/pi-ai/src/providers/provider-capabilities.ts @@ -0,0 +1,215 @@ +// GSD-2 — Provider Capabilities Registry (ADR-005 Phase 1) +// Declarative registry of what each API provider supports, consolidating +// scattered knowledge from *-shared.ts files into a queryable data structure. + +import type { Api } from "../types.js"; + +// ─── Types ────────────────────────────────────────────────────────────────── + +/** + * Declarative capability profile for an API provider. + * Used by the model router to filter incompatible models and by the tool + * system to adjust tool sets per provider. + */ +export interface ProviderCapabilities { + /** Whether models from this provider support tool/function calling */ + toolCalling: boolean; + /** Maximum number of tools the provider handles well (0 = unlimited) */ + maxTools: number; + /** Whether tool results can contain images */ + imageToolResults: boolean; + /** Whether the provider supports structured JSON output */ + structuredOutput: boolean; + /** Tool call ID format constraints */ + toolCallIdFormat: { + maxLength: number; + allowedChars: RegExp; + }; + /** Whether thinking/reasoning blocks are preserved cross-turn */ + thinkingPersistence: "full" | "text-only" | "none"; + /** Schema features NOT supported (tools using these get filtered) */ + unsupportedSchemaFeatures: string[]; +} + +// ─── Registry ─────────────────────────────────────────────────────────────── + +/** + * Built-in provider capability profiles. + * + * Sources (consolidated from scattered *-shared.ts files): + * - anthropic-shared.ts: normalizeToolCallId (64-char, [a-zA-Z0-9_-]) + * - openai-responses-shared.ts: ID normalization (64-char, fc_ prefix), image-in-tool-result workaround + * - google-shared.ts: sanitizeSchemaForGoogle (patternProperties, const), requiresToolCallId + * - mistral.ts: MISTRAL_TOOL_CALL_ID_LENGTH = 9 + * - amazon-bedrock.ts: normalizeToolCallId (64-char, [a-zA-Z0-9_-]) + */ +export const PROVIDER_CAPABILITIES: Record = { + "anthropic-messages": { + toolCalling: true, + maxTools: 0, + imageToolResults: true, + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "full", + unsupportedSchemaFeatures: [], + }, + "anthropic-vertex": { + toolCalling: true, + maxTools: 0, + imageToolResults: true, + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "full", + unsupportedSchemaFeatures: [], + }, + "openai-responses": { + toolCalling: true, + maxTools: 0, + imageToolResults: false, // images sent as separate user message, not in tool result + structuredOutput: true, + toolCallIdFormat: { maxLength: 512, allowedChars: /^.+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: [], + }, + "azure-openai-responses": { + toolCalling: true, + maxTools: 0, + imageToolResults: false, + structuredOutput: true, + toolCallIdFormat: { maxLength: 512, allowedChars: /^.+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: [], + }, + "openai-codex-responses": { + toolCalling: true, + maxTools: 0, + imageToolResults: false, + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: [], + }, + "openai-completions": { + toolCalling: true, + maxTools: 0, + imageToolResults: false, + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: [], + }, + "google-generative-ai": { + toolCalling: true, + maxTools: 0, + imageToolResults: true, + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: ["patternProperties", "const"], + }, + "google-gemini-cli": { + toolCalling: true, + maxTools: 0, + imageToolResults: true, + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: ["patternProperties", "const"], + }, + "google-vertex": { + toolCalling: true, + maxTools: 0, + imageToolResults: true, + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: ["patternProperties", "const"], + }, + "mistral-conversations": { + toolCalling: true, + maxTools: 0, + imageToolResults: false, + structuredOutput: true, + toolCallIdFormat: { maxLength: 9, allowedChars: /^[a-zA-Z0-9]+$/ }, + thinkingPersistence: "none", + unsupportedSchemaFeatures: [], + }, + "bedrock-converse-stream": { + toolCalling: true, + maxTools: 0, + imageToolResults: true, // Bedrock supports image content blocks in tool results + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: [], + }, + "ollama-chat": { + toolCalling: true, + maxTools: 0, + imageToolResults: false, + structuredOutput: false, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "none", + unsupportedSchemaFeatures: [], + }, +}; + +// ─── Default (permissive) profile for unknown providers ───────────────────── + +const DEFAULT_CAPABILITIES: ProviderCapabilities = { + toolCalling: true, + maxTools: 0, + imageToolResults: true, + structuredOutput: true, + toolCallIdFormat: { maxLength: 512, allowedChars: /^.+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: [], +}; + +// ─── Public API ───────────────────────────────────────────────────────────── + +/** + * Get capabilities for a provider API. Returns a permissive default for + * unknown providers (preserving existing behavior per ADR-005 principle 5). + */ +export function getProviderCapabilities(api: string): ProviderCapabilities { + return PROVIDER_CAPABILITIES[api] ?? DEFAULT_CAPABILITIES; +} + +/** + * Check if a provider supports all required schema features. + * Returns the list of unsupported features (empty if all supported). + */ +export function getUnsupportedFeatures(api: string, requiredFeatures: string[]): string[] { + const caps = getProviderCapabilities(api); + return requiredFeatures.filter(f => caps.unsupportedSchemaFeatures.includes(f)); +} + +/** + * Deep-merge user-provided capability overrides with built-in defaults. + * Partial overrides merge with the built-in profile for the given API. + */ +export function mergeCapabilityOverrides( + api: string, + overrides: Partial> & { + toolCallIdFormat?: Partial; + }, +): ProviderCapabilities { + const base = getProviderCapabilities(api); + return { + ...base, + ...overrides, + toolCallIdFormat: overrides.toolCallIdFormat + ? { ...base.toolCallIdFormat, ...overrides.toolCallIdFormat } + : base.toolCallIdFormat, + }; +} + +/** + * Get all registered API names in the capability registry. + * Used by lint rules to verify all providers in register-builtins.ts + * have corresponding capability entries. + */ +export function getRegisteredApis(): string[] { + return Object.keys(PROVIDER_CAPABILITIES); +} diff --git a/packages/pi-ai/src/providers/transform-messages-report.test.ts b/packages/pi-ai/src/providers/transform-messages-report.test.ts new file mode 100644 index 000000000..85ae585ba --- /dev/null +++ b/packages/pi-ai/src/providers/transform-messages-report.test.ts @@ -0,0 +1,189 @@ +// GSD-2 — ProviderSwitchReport Tests (ADR-005 Phase 3) +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; + +import { transformMessages, createEmptyReport, hasTransformations } from "./transform-messages.js"; +import type { ProviderSwitchReport } from "./transform-messages.js"; +import type { Message, Model, AssistantMessage, ToolCall } from "../types.js"; + +// ─── Helpers ──────────────────────────────────────────────────────────────── + +function makeModel(overrides: Partial> = {}): Model { + return { + id: "claude-sonnet-4-6", + name: "Claude Sonnet 4.6", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "", + reasoning: false, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + ...overrides, + } as Model; +} + +function makeAssistantMsg(overrides: Partial = {}): AssistantMessage { + return { + role: "assistant", + content: [], + api: "anthropic-messages", + provider: "anthropic", + model: "claude-sonnet-4-6", + usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } }, + stopReason: "stop", + timestamp: Date.now(), + ...overrides, + }; +} + +// ─── createEmptyReport / hasTransformations ───────────────────────────────── + +describe("createEmptyReport", () => { + test("creates report with zero counters", () => { + const report = createEmptyReport("anthropic-messages", "openai-responses"); + assert.equal(report.fromApi, "anthropic-messages"); + assert.equal(report.toApi, "openai-responses"); + assert.equal(report.thinkingBlocksDropped, 0); + assert.equal(report.thinkingBlocksDowngraded, 0); + assert.equal(report.toolCallIdsRemapped, 0); + assert.equal(report.syntheticToolResultsInserted, 0); + assert.equal(report.thoughtSignaturesDropped, 0); + }); +}); + +describe("hasTransformations", () => { + test("returns false for empty report", () => { + const report = createEmptyReport("a", "b"); + assert.equal(hasTransformations(report), false); + }); + + test("returns true when any counter is non-zero", () => { + const report = createEmptyReport("a", "b"); + report.thinkingBlocksDropped = 1; + assert.equal(hasTransformations(report), true); + }); +}); + +// ─── Report Tracking in transformMessages ─────────────────────────────────── + +describe("transformMessages with report tracking", () => { + test("tracks thinking blocks dropped for redacted cross-model", () => { + const model = makeModel({ id: "gpt-5", api: "openai-responses", provider: "openai" }); + const messages: Message[] = [ + makeAssistantMsg({ + content: [ + { type: "thinking", thinking: "", redacted: true }, + { type: "text", text: "Hello" }, + ], + }), + ]; + const report = createEmptyReport("anthropic-messages", "openai-responses"); + transformMessages(messages, model, undefined, report); + assert.equal(report.thinkingBlocksDropped, 1); + }); + + test("tracks thinking blocks downgraded to plain text", () => { + const model = makeModel({ id: "gpt-5", api: "openai-responses", provider: "openai" }); + const messages: Message[] = [ + makeAssistantMsg({ + content: [ + { type: "thinking", thinking: "Let me think about this..." }, + { type: "text", text: "Here is my answer" }, + ], + }), + ]; + const report = createEmptyReport("anthropic-messages", "openai-responses"); + transformMessages(messages, model, undefined, report); + assert.equal(report.thinkingBlocksDowngraded, 1); + }); + + test("tracks tool call IDs remapped", () => { + const model = makeModel({ id: "claude-sonnet-4-6", api: "anthropic-messages", provider: "anthropic" }); + const toolCall: ToolCall = { + type: "toolCall", + id: "original-long-id-that-needs-normalization|with-special-chars", + name: "bash", + arguments: { command: "ls" }, + }; + const messages: Message[] = [ + makeAssistantMsg({ + provider: "openai", + api: "openai-responses", + model: "gpt-5", + content: [toolCall], + }), + ]; + const normalizer = (id: string) => id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64); + const report = createEmptyReport("openai-responses", "anthropic-messages"); + transformMessages(messages, model, normalizer, report); + assert.equal(report.toolCallIdsRemapped, 1); + }); + + test("tracks thought signatures dropped", () => { + const model = makeModel({ id: "claude-sonnet-4-6", api: "anthropic-messages", provider: "anthropic" }); + const toolCall: ToolCall = { + type: "toolCall", + id: "tc_001", + name: "bash", + arguments: { command: "ls" }, + thoughtSignature: "some-opaque-signature", + }; + const messages: Message[] = [ + makeAssistantMsg({ + provider: "google", + api: "google-generative-ai", + model: "gemini-2.5-pro", + content: [toolCall], + }), + ]; + const report = createEmptyReport("google-generative-ai", "anthropic-messages"); + transformMessages(messages, model, undefined, report); + assert.equal(report.thoughtSignaturesDropped, 1); + }); + + test("tracks synthetic tool results inserted", () => { + const model = makeModel(); + const toolCall: ToolCall = { + type: "toolCall", + id: "tc_orphan", + name: "bash", + arguments: { command: "ls" }, + }; + // Assistant message with tool call followed by another assistant (no tool result) + const messages: Message[] = [ + makeAssistantMsg({ content: [toolCall, { type: "text", text: "Using bash" }] }), + makeAssistantMsg({ content: [{ type: "text", text: "Next message" }] }), + ]; + const report = createEmptyReport("anthropic-messages", "anthropic-messages"); + transformMessages(messages, model, undefined, report); + assert.equal(report.syntheticToolResultsInserted, 1); + }); + + test("does not count transformations for same-model messages", () => { + const model = makeModel(); + const messages: Message[] = [ + makeAssistantMsg({ + content: [ + { type: "thinking", thinking: "Let me think..." }, + { type: "text", text: "Answer" }, + ], + }), + ]; + const report = createEmptyReport("anthropic-messages", "anthropic-messages"); + transformMessages(messages, model, undefined, report); + assert.equal(report.thinkingBlocksDowngraded, 0); + assert.equal(report.thinkingBlocksDropped, 0); + }); + + test("works without report parameter (backward compatible)", () => { + const model = makeModel(); + const messages: Message[] = [ + makeAssistantMsg({ content: [{ type: "text", text: "Hello" }] }), + ]; + // Should not throw + const result = transformMessages(messages, model); + assert.ok(Array.isArray(result)); + }); +}); diff --git a/packages/pi-ai/src/providers/transform-messages.ts b/packages/pi-ai/src/providers/transform-messages.ts index f61f08037..bcfd5234a 100644 --- a/packages/pi-ai/src/providers/transform-messages.ts +++ b/packages/pi-ai/src/providers/transform-messages.ts @@ -1,5 +1,87 @@ import type { Api, AssistantMessage, Message, Model, ToolCall, ToolResultMessage } from "../types.js"; +/** + * Report of context transformations during a cross-provider switch (ADR-005 Phase 3). + * Tracks what was lost or downgraded when replaying conversation history to a different provider. + */ +export interface ProviderSwitchReport { + /** API of the messages being transformed from */ + fromApi: string; + /** API of the target model */ + toApi: string; + /** Number of thinking blocks completely dropped (redacted/encrypted, cross-model) */ + thinkingBlocksDropped: number; + /** Number of thinking blocks downgraded from structured to plain text */ + thinkingBlocksDowngraded: number; + /** Number of tool call IDs that were remapped/normalized */ + toolCallIdsRemapped: number; + /** Number of synthetic tool results inserted for orphaned tool calls */ + syntheticToolResultsInserted: number; + /** Number of thought signatures dropped (Google-specific opaque context) */ + thoughtSignaturesDropped: number; +} + +/** + * Create an empty provider switch report. + */ +export function createEmptyReport(fromApi: string, toApi: string): ProviderSwitchReport { + return { + fromApi, + toApi, + thinkingBlocksDropped: 0, + thinkingBlocksDowngraded: 0, + toolCallIdsRemapped: 0, + syntheticToolResultsInserted: 0, + thoughtSignaturesDropped: 0, + }; +} + +/** + * Check if a provider switch report has any non-zero transformations. + */ +export function hasTransformations(report: ProviderSwitchReport): boolean { + return ( + report.thinkingBlocksDropped > 0 || + report.thinkingBlocksDowngraded > 0 || + report.toolCallIdsRemapped > 0 || + report.syntheticToolResultsInserted > 0 || + report.thoughtSignaturesDropped > 0 + ); +} + +/** + * Create a report, run transformMessages, and log if non-empty. + * Convenience wrapper for provider adapters (ADR-005). + */ +export function transformMessagesWithReport( + messages: Message[], + model: Model, + normalizeToolCallId?: (id: string, model: Model, source: AssistantMessage) => string, + sourceApi?: string, +): Message[] { + const report = createEmptyReport(sourceApi ?? "unknown", model.api); + const result = transformMessages(messages, model, normalizeToolCallId, report); + if (hasTransformations(report)) { + logProviderSwitchReport(report); + } + return result; +} + +/** Log a non-empty ProviderSwitchReport as a debug-level warning. */ +function logProviderSwitchReport(report: ProviderSwitchReport): void { + const parts: string[] = [`Provider switch ${report.fromApi} → ${report.toApi}:`]; + if (report.thinkingBlocksDropped > 0) parts.push(`${report.thinkingBlocksDropped} thinking blocks dropped`); + if (report.thinkingBlocksDowngraded > 0) parts.push(`${report.thinkingBlocksDowngraded} thinking blocks downgraded`); + if (report.toolCallIdsRemapped > 0) parts.push(`${report.toolCallIdsRemapped} tool call IDs remapped`); + if (report.syntheticToolResultsInserted > 0) parts.push(`${report.syntheticToolResultsInserted} synthetic tool results inserted`); + if (report.thoughtSignaturesDropped > 0) parts.push(`${report.thoughtSignaturesDropped} thought signatures dropped`); + // Use process.stderr for debug output — this is observable in verbose/debug modes + // without polluting stdout which may be used for structured output (RPC/MCP). + if (process.env.GSD_VERBOSE === "1" || process.env.PI_VERBOSE === "1") { + process.stderr.write(`[provider-switch] ${parts.join(", ")}\n`); + } +} + /** * Normalize tool call ID for cross-provider compatibility. * OpenAI Responses API generates IDs that are 450+ chars with special characters like `|`. @@ -9,6 +91,7 @@ export function transformMessages( messages: Message[], model: Model, normalizeToolCallId?: (id: string, model: Model, source: AssistantMessage) => string, + report?: ProviderSwitchReport, ): Message[] { // Build a map of original tool call IDs to normalized IDs const toolCallIdMap = new Map(); @@ -42,14 +125,20 @@ export function transformMessages( // Redacted thinking is opaque encrypted content, only valid for the same model. // Drop it for cross-model to avoid API errors. if (block.redacted) { + if (!isSameModel && report) report.thinkingBlocksDropped++; return isSameModel ? block : []; } // For same model: keep thinking blocks with signatures (needed for replay) // even if the thinking text is empty (OpenAI encrypted reasoning) if (isSameModel && block.thinkingSignature) return block; // Skip empty thinking blocks, convert others to plain text - if (!block.thinking || block.thinking.trim() === "") return []; + if (!block.thinking || block.thinking.trim() === "") { + if (!isSameModel && report) report.thinkingBlocksDropped++; + return []; + } if (isSameModel) return block; + // Downgrade: structured thinking → plain text + if (report) report.thinkingBlocksDowngraded++; return { type: "text" as const, text: block.thinking, @@ -71,6 +160,7 @@ export function transformMessages( if (!isSameModel && toolCall.thoughtSignature) { normalizedToolCall = { ...toolCall }; delete (normalizedToolCall as { thoughtSignature?: string }).thoughtSignature; + if (report) report.thoughtSignaturesDropped++; } if (!isSameModel && normalizeToolCallId) { @@ -78,6 +168,7 @@ export function transformMessages( if (normalizedId !== toolCall.id) { toolCallIdMap.set(toolCall.id, normalizedId); normalizedToolCall = { ...normalizedToolCall, id: normalizedId }; + if (report) report.toolCallIdsRemapped++; } } @@ -117,6 +208,7 @@ export function transformMessages( isError: true, timestamp: Date.now(), } as ToolResultMessage); + if (report) report.syntheticToolResultsInserted++; } } pendingToolCalls = []; @@ -157,6 +249,7 @@ export function transformMessages( isError: true, timestamp: Date.now(), } as ToolResultMessage); + if (report) report.syntheticToolResultsInserted++; } } pendingToolCalls = []; diff --git a/packages/pi-ai/src/utils/oauth/anthropic.ts b/packages/pi-ai/src/utils/oauth/anthropic.ts deleted file mode 100644 index 861e26409..000000000 --- a/packages/pi-ai/src/utils/oauth/anthropic.ts +++ /dev/null @@ -1,140 +0,0 @@ -/** - * Anthropic OAuth flow (Claude Pro/Max) - */ - -import { generatePKCE } from "./pkce.js"; -import type { OAuthCredentials, OAuthLoginCallbacks, OAuthProviderInterface } from "./types.js"; - -const decode = (s: string) => atob(s); -const CLIENT_ID = decode("OWQxYzI1MGEtZTYxYi00NGQ5LTg4ZWQtNTk0NGQxOTYyZjVl"); -const AUTHORIZE_URL = "https://claude.ai/oauth/authorize"; -const TOKEN_URL = "https://platform.claude.com/v1/oauth/token"; -const REDIRECT_URI = "https://platform.claude.com/oauth/code/callback"; -const SCOPES = "org:create_api_key user:profile user:inference"; - -/** - * Login with Anthropic OAuth (device code flow) - * - * @param onAuthUrl - Callback to handle the authorization URL (e.g., open browser) - * @param onPromptCode - Callback to prompt user for the authorization code - */ -export async function loginAnthropic( - onAuthUrl: (url: string) => void, - onPromptCode: () => Promise, -): Promise { - const { verifier, challenge } = await generatePKCE(); - - // Build authorization URL - const authParams = new URLSearchParams({ - code: "true", - client_id: CLIENT_ID, - response_type: "code", - redirect_uri: REDIRECT_URI, - scope: SCOPES, - code_challenge: challenge, - code_challenge_method: "S256", - state: verifier, - }); - - const authUrl = `${AUTHORIZE_URL}?${authParams.toString()}`; - - // Notify caller with URL to open - onAuthUrl(authUrl); - - // Wait for user to paste authorization code (format: code#state) - const authCode = await onPromptCode(); - const splits = authCode.split("#"); - const code = splits[0]; - const state = splits[1]; - - // Exchange code for tokens - const tokenResponse = await fetch(TOKEN_URL, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - grant_type: "authorization_code", - client_id: CLIENT_ID, - code: code, - state: state, - redirect_uri: REDIRECT_URI, - code_verifier: verifier, - }), - signal: AbortSignal.timeout(30_000), - }); - - if (!tokenResponse.ok) { - const error = await tokenResponse.text(); - throw new Error(`Token exchange failed: ${error}`); - } - - const tokenData = (await tokenResponse.json()) as { - access_token: string; - refresh_token: string; - expires_in: number; - }; - - // Calculate expiry time (current time + expires_in seconds - 5 min buffer) - const expiresAt = Date.now() + tokenData.expires_in * 1000 - 5 * 60 * 1000; - - // Save credentials - return { - refresh: tokenData.refresh_token, - access: tokenData.access_token, - expires: expiresAt, - }; -} - -/** - * Refresh Anthropic OAuth token - */ -export async function refreshAnthropicToken(refreshToken: string): Promise { - const response = await fetch(TOKEN_URL, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - grant_type: "refresh_token", - client_id: CLIENT_ID, - refresh_token: refreshToken, - }), - signal: AbortSignal.timeout(30_000), - }); - - if (!response.ok) { - const error = await response.text(); - throw new Error(`Anthropic token refresh failed: ${error}`); - } - - const data = (await response.json()) as { - access_token: string; - refresh_token: string; - expires_in: number; - }; - - return { - refresh: data.refresh_token, - access: data.access_token, - expires: Date.now() + data.expires_in * 1000 - 5 * 60 * 1000, - }; -} - -export const anthropicOAuthProvider: OAuthProviderInterface = { - id: "anthropic", - name: "Anthropic (Claude Pro/Max)", - - async login(callbacks: OAuthLoginCallbacks): Promise { - return loginAnthropic( - (url) => callbacks.onAuth({ url }), - () => callbacks.onPrompt({ message: "Paste the authorization code:" }), - ); - }, - - async refreshToken(credentials: OAuthCredentials): Promise { - return refreshAnthropicToken(credentials.refresh); - }, - - getApiKey(credentials: OAuthCredentials): string { - return credentials.access; - }, -}; diff --git a/packages/pi-ai/src/utils/oauth/index.ts b/packages/pi-ai/src/utils/oauth/index.ts index a91decf4a..715b4910c 100644 --- a/packages/pi-ai/src/utils/oauth/index.ts +++ b/packages/pi-ai/src/utils/oauth/index.ts @@ -3,14 +3,14 @@ * * This module handles login, token refresh, and credential storage * for OAuth-based providers: - * - Anthropic (Claude Pro/Max) * - GitHub Copilot * - Google Cloud Code Assist (Gemini CLI) * - Antigravity (Gemini 3, Claude, GPT-OSS via Google Cloud) + * + * Note: Anthropic OAuth was removed per TOS compliance (see docs/user-docs/claude-code-auth-compliance.md). + * Use API keys or the local Claude Code CLI for Anthropic access. */ -// Anthropic -export { anthropicOAuthProvider, loginAnthropic, refreshAnthropicToken } from "./anthropic.js"; // GitHub Copilot export { getGitHubCopilotBaseUrl, @@ -32,7 +32,6 @@ export * from "./types.js"; // Provider Registry // ============================================================================ -import { anthropicOAuthProvider } from "./anthropic.js"; import { githubCopilotOAuthProvider } from "./github-copilot.js"; import { antigravityOAuthProvider } from "./google-antigravity.js"; import { geminiCliOAuthProvider } from "./google-gemini-cli.js"; @@ -40,7 +39,6 @@ import { openaiCodexOAuthProvider } from "./openai-codex.js"; import type { OAuthCredentials, OAuthProviderId, OAuthProviderInterface } from "./types.js"; const BUILT_IN_OAUTH_PROVIDERS: OAuthProviderInterface[] = [ - anthropicOAuthProvider, githubCopilotOAuthProvider, geminiCliOAuthProvider, antigravityOAuthProvider, diff --git a/packages/pi-coding-agent/package.json b/packages/pi-coding-agent/package.json index 0fa7e909f..20ebb6757 100644 --- a/packages/pi-coding-agent/package.json +++ b/packages/pi-coding-agent/package.json @@ -1,6 +1,6 @@ { "name": "@gsd/pi-coding-agent", - "version": "2.68.0", + "version": "2.71.0", "description": "Coding agent CLI (vendored from pi-mono)", "type": "module", "piConfig": { diff --git a/packages/pi-coding-agent/src/core/chat-controller-ordering.test.ts b/packages/pi-coding-agent/src/core/chat-controller-ordering.test.ts new file mode 100644 index 000000000..eb7795508 --- /dev/null +++ b/packages/pi-coding-agent/src/core/chat-controller-ordering.test.ts @@ -0,0 +1,468 @@ +import assert from "node:assert/strict"; +import { test } from "node:test"; + +import { handleAgentEvent } from "../modes/interactive/controllers/chat-controller.js"; + +function makeUsage() { + return { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }; +} + +function makeAssistant(content: any[]) { + return { + role: "assistant", + content, + api: "anthropic-messages", + provider: "claude-code", + model: "claude-sonnet-4", + usage: makeUsage(), + stopReason: "stop", + timestamp: Date.now(), + }; +} + +function createHost() { + const chatContainer = { + children: [] as any[], + addChild(component: any) { + this.children.push(component); + }, + removeChild(component: any) { + const idx = this.children.indexOf(component); + if (idx !== -1) this.children.splice(idx, 1); + }, + clear() { + this.children = []; + }, + }; + + const pinnedMessageContainer = { + children: [] as any[], + addChild(component: any) { + this.children.push(component); + }, + removeChild(component: any) { + const idx = this.children.indexOf(component); + if (idx !== -1) this.children.splice(idx, 1); + }, + clear() { + this.children = []; + }, + }; + + const host: any = { + isInitialized: true, + init: async () => {}, + defaultEditor: { onEscape: undefined }, + editor: {}, + session: { retryAttempt: 0, abortCompaction: () => {}, abortRetry: () => {} }, + ui: { requestRender: () => {}, terminal: { rows: 50 } }, + footer: { invalidate: () => {} }, + keybindings: {}, + statusContainer: { clear: () => {}, addChild: () => {} }, + chatContainer, + settingsManager: { getTimestampFormat: () => "date-time-iso", getShowImages: () => false }, + pendingTools: new Map(), + toolOutputExpanded: false, + hideThinkingBlock: false, + isBashMode: false, + defaultWorkingMessage: "Working...", + compactionQueuedMessages: [], + editorContainer: {}, + pendingMessagesContainer: { clear: () => {} }, + pinnedMessageContainer, + addMessageToChat: () => {}, + getMarkdownThemeWithSettings: () => ({}), + formatWebSearchResult: () => "", + getRegisteredToolDefinition: () => undefined, + checkShutdownRequested: async () => {}, + rebuildChatFromMessages: () => {}, + flushCompactionQueue: async () => {}, + showStatus: () => {}, + showError: () => {}, + updatePendingMessagesDisplay: () => {}, + updateTerminalTitle: () => {}, + updateEditorBorderColor: () => {}, + }; + + return host; +} + +test("chat-controller keeps tool output ahead of delayed assistant text for external tool streams", async () => { + // ToolExecutionComponent uses the global theme singleton. + // Install a minimal no-op theme implementation for this unit test. + (globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = { + fg: (_key: string, text: string) => text, + bg: (_key: string, text: string) => text, + bold: (text: string) => text, + italic: (text: string) => text, + truncate: (text: string) => text, + }; + + const host = createHost(); + const toolId = "mcp-tool-1"; + const toolCall = { + type: "toolCall", + id: toolId, + name: "exec_command", + arguments: { cmd: "echo hi" }, + }; + + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + assert.equal(host.streamingComponent, undefined, "assistant component should be deferred at message_start"); + assert.equal(host.chatContainer.children.length, 0, "nothing should render before content arrives"); + + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant([toolCall]), + assistantMessageEvent: { + type: "toolcall_end", + contentIndex: 0, + toolCall: { + ...toolCall, + externalResult: { + content: [{ type: "text", text: "tool output" }], + details: {}, + isError: false, + }, + }, + partial: makeAssistant([toolCall]), + }, + } as any, + ); + + assert.equal(host.streamingComponent, undefined, "assistant text container should remain deferred for tool-only updates"); + assert.equal(host.chatContainer.children.length, 1, "tool execution block should render immediately"); + assert.equal(host.chatContainer.children[0]?.constructor?.name, "ToolExecutionComponent"); + + // Re-assert required host method before the text-bearing update path. + host.getMarkdownThemeWithSettings = () => ({}); + + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant([toolCall, { type: "text", text: "done" }]), + assistantMessageEvent: { + type: "text_delta", + contentIndex: 1, + delta: "done", + partial: makeAssistant([toolCall, { type: "text", text: "done" }]), + }, + } as any, + ); + + assert.equal(host.chatContainer.children.length, 2, "assistant content should render after existing tool output"); + assert.equal(host.chatContainer.children[0]?.constructor?.name, "ToolExecutionComponent"); + assert.equal(host.chatContainer.children[1]?.constructor?.name, "AssistantMessageComponent"); +}); + +test("chat-controller keeps serverToolUse output ahead of assistant text when external results arrive", async () => { + (globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = { + fg: (_key: string, text: string) => text, + bg: (_key: string, text: string) => text, + bold: (text: string) => text, + italic: (text: string) => text, + truncate: (text: string) => text, + }; + + const host = createHost(); + const toolId = "mcp-secure-1"; + const serverToolUse = { + type: "serverToolUse", + id: toolId, + name: "mcp__gsd-workflow__secure_env_collect", + input: { projectDir: "/tmp/project", keys: [{ key: "SECURE_PASSWORD" }], destination: "dotenv" }, + }; + + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant([serverToolUse]), + assistantMessageEvent: { + type: "server_tool_use", + contentIndex: 0, + partial: makeAssistant([serverToolUse]), + }, + } as any, + ); + + assert.equal(host.streamingComponent, undefined, "assistant content should stay deferred while only tool content streams"); + assert.equal(host.chatContainer.children.length, 1, "server tool block should render immediately"); + assert.equal(host.chatContainer.children[0]?.constructor?.name, "ToolExecutionComponent"); + + host.getMarkdownThemeWithSettings = () => ({}); + const resultMessage = makeAssistant([ + { + ...serverToolUse, + externalResult: { + content: [{ type: "text", text: "secure_env_collect was cancelled by user." }], + details: {}, + isError: true, + }, + }, + { type: "text", text: "The secure password collection was cancelled." }, + ]); + + await handleAgentEvent( + host, + { + type: "message_update", + message: resultMessage, + assistantMessageEvent: { + type: "server_tool_use", + contentIndex: 0, + partial: resultMessage, + }, + } as any, + ); + + assert.equal(host.chatContainer.children.length, 2, "assistant text should render after existing server tool output"); + assert.equal(host.chatContainer.children[0]?.constructor?.name, "ToolExecutionComponent"); + assert.equal(host.chatContainer.children[1]?.constructor?.name, "AssistantMessageComponent"); +}); + +test("chat-controller pins latest assistant text above editor when tool calls are present", async () => { + (globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = { + fg: (_key: string, text: string) => text, + bg: (_key: string, text: string) => text, + bold: (text: string) => text, + italic: (text: string) => text, + truncate: (text: string) => text, + }; + + const host = createHost(); + const toolId = "tool-pin-1"; + const toolCall = { + type: "toolCall", + id: toolId, + name: "exec_command", + arguments: { cmd: "echo hi" }, + }; + + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should be empty at message_start"); + + // Send a message with text followed by a tool call + host.getMarkdownThemeWithSettings = () => ({}); + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant([ + { type: "text", text: "Looking at the files now." }, + toolCall, + ]), + assistantMessageEvent: { + type: "toolcall_end", + contentIndex: 1, + toolCall: { + ...toolCall, + externalResult: { + content: [{ type: "text", text: "file contents" }], + details: {}, + isError: false, + }, + }, + partial: makeAssistant([{ type: "text", text: "Looking at the files now." }, toolCall]), + }, + } as any, + ); + + // Pinned zone should now have a DynamicBorder and a Markdown component + assert.equal(host.pinnedMessageContainer.children.length, 2, "pinned zone should have border + markdown"); + assert.equal(host.pinnedMessageContainer.children[0]?.constructor?.name, "DynamicBorder"); + assert.equal(host.pinnedMessageContainer.children[1]?.constructor?.name, "Markdown"); +}); + +test("chat-controller clears pinned zone when a new assistant message starts", async () => { + (globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = { + fg: (_key: string, text: string) => text, + bg: (_key: string, text: string) => text, + bold: (text: string) => text, + italic: (text: string) => text, + truncate: (text: string) => text, + }; + + const host = createHost(); + const toolCall = { + type: "toolCall", + id: "tool-clear-1", + name: "exec_command", + arguments: { cmd: "echo hi" }, + }; + + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + // Populate the pinned zone + host.getMarkdownThemeWithSettings = () => ({}); + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant([{ type: "text", text: "Working on it." }, toolCall]), + assistantMessageEvent: { + type: "toolcall_end", + contentIndex: 1, + toolCall: { + ...toolCall, + externalResult: { + content: [{ type: "text", text: "ok" }], + details: {}, + isError: false, + }, + }, + partial: makeAssistant([{ type: "text", text: "Working on it." }, toolCall]), + }, + } as any, + ); + + assert.ok(host.pinnedMessageContainer.children.length > 0, "pinned zone should be populated"); + + // Start a new assistant message — pinned zone should clear + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should clear on new assistant message"); +}); + +test("chat-controller clears pinned zone when the agent turn ends", async () => { + (globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = { + fg: (_key: string, text: string) => text, + bg: (_key: string, text: string) => text, + bold: (text: string) => text, + italic: (text: string) => text, + truncate: (text: string) => text, + }; + + const host = createHost(); + const toolCall = { + type: "toolCall", + id: "tool-clear-on-end-1", + name: "exec_command", + arguments: { cmd: "echo hi" }, + }; + + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + host.getMarkdownThemeWithSettings = () => ({}); + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant([{ type: "text", text: "Working on it." }, toolCall]), + assistantMessageEvent: { + type: "toolcall_end", + contentIndex: 1, + toolCall: { + ...toolCall, + externalResult: { + content: [{ type: "text", text: "ok" }], + details: {}, + isError: false, + }, + }, + partial: makeAssistant([{ type: "text", text: "Working on it." }, toolCall]), + }, + } as any, + ); + + assert.ok(host.pinnedMessageContainer.children.length > 0, "pinned zone should be populated before agent_end"); + + await handleAgentEvent(host, { type: "agent_end" } as any); + + assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should clear on agent_end"); +}); + +test("chat-controller clears pinned zone when assistant message ends", async () => { + (globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = { + fg: (_key: string, text: string) => text, + bg: (_key: string, text: string) => text, + bold: (text: string) => text, + italic: (text: string) => text, + truncate: (text: string) => text, + }; + + const host = createHost(); + const toolCall = { + type: "toolCall", + id: "tool-msg-end-1", + name: "exec_command", + arguments: { cmd: "echo hi" }, + }; + + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + host.getMarkdownThemeWithSettings = () => ({}); + const msgContent = [{ type: "text", text: "Summary after tools." }, toolCall]; + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant(msgContent), + assistantMessageEvent: { + type: "toolcall_end", + contentIndex: 1, + toolCall: { + ...toolCall, + externalResult: { + content: [{ type: "text", text: "ok" }], + details: {}, + isError: false, + }, + }, + partial: makeAssistant(msgContent), + }, + } as any, + ); + + assert.ok(host.pinnedMessageContainer.children.length > 0, "pinned zone should be populated during streaming"); + + // End the assistant message (e.g. before form elicitation) — pinned zone should clear + await handleAgentEvent(host, { type: "message_end", message: makeAssistant(msgContent) } as any); + + assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should clear on message_end to prevent duplicate display"); +}); + +test("chat-controller does not pin when there are no tool calls", async () => { + (globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = { + fg: (_key: string, text: string) => text, + bg: (_key: string, text: string) => text, + bold: (text: string) => text, + italic: (text: string) => text, + truncate: (text: string) => text, + }; + + const host = createHost(); + + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + host.getMarkdownThemeWithSettings = () => ({}); + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant([{ type: "text", text: "Just some text, no tools." }]), + assistantMessageEvent: { + type: "text_delta", + contentIndex: 0, + delta: "Just some text, no tools.", + partial: makeAssistant([{ type: "text", text: "Just some text, no tools." }]), + }, + } as any, + ); + + assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should stay empty without tool calls"); +}); diff --git a/packages/pi-coding-agent/src/core/extensions/index.ts b/packages/pi-coding-agent/src/core/extensions/index.ts index 70525095a..0438d364b 100644 --- a/packages/pi-coding-agent/src/core/extensions/index.ts +++ b/packages/pi-coding-agent/src/core/extensions/index.ts @@ -43,6 +43,9 @@ export type { BeforeProviderRequestEventResult, // Context CompactOptions, + // Events - Adjust Tool Set (ADR-005) + AdjustToolSetEvent, + AdjustToolSetResult, // Events - Agent ContextEvent, // Event Results @@ -135,6 +138,7 @@ export type { ToolCallEvent, ToolCallEventResult, // Tools + ToolCompatibility, ToolDefinition, // Events - Tool Execution ToolExecutionEndEvent, diff --git a/packages/pi-coding-agent/src/core/extensions/loader.ts b/packages/pi-coding-agent/src/core/extensions/loader.ts index 7e25c837d..016f05448 100644 --- a/packages/pi-coding-agent/src/core/extensions/loader.ts +++ b/packages/pi-coding-agent/src/core/extensions/loader.ts @@ -38,6 +38,7 @@ import type { ExecOptions } from "../exec.js"; import { execCommand } from "../exec.js"; import { getUntrustedExtensionPaths } from "./project-trust.js"; export { isProjectTrusted, trustProject, getUntrustedExtensionPaths } from "./project-trust.js"; +import { registerToolCompatibility } from "../tools/tool-compatibility-registry.js"; import type { Extension, ExtensionAPI, @@ -428,8 +429,9 @@ export function createExtensionRuntime(): ExtensionRuntime { unregisterProvider: (name) => { runtime.pendingProviderRegistrations = runtime.pendingProviderRegistrations.filter((r) => r.name !== name); }, - // Stub replaced by ExtensionRunner at construction time via bindEmitMethods(). + // Stubs replaced by ExtensionRunner at construction time via bindEmitMethods(). emitBeforeModelSelect: async () => undefined, + emitAdjustToolSet: async () => undefined, }; return runtime; @@ -459,6 +461,10 @@ function createExtensionAPI( definition: tool, extensionPath: extension.path, }); + // ADR-005: auto-register tool compatibility metadata + if (tool.compatibility) { + registerToolCompatibility(tool.name, tool.compatibility); + } runtime.refreshTools(); }, @@ -585,6 +591,10 @@ function createExtensionAPI( return runtime.emitBeforeModelSelect(event); }, + async emitAdjustToolSet(event: Omit): Promise { + return runtime.emitAdjustToolSet(event); + }, + events: eventBus, } as ExtensionAPI; diff --git a/packages/pi-coding-agent/src/core/extensions/runner.ts b/packages/pi-coding-agent/src/core/extensions/runner.ts index 048ad534c..0b0f6114b 100644 --- a/packages/pi-coding-agent/src/core/extensions/runner.ts +++ b/packages/pi-coding-agent/src/core/extensions/runner.ts @@ -11,6 +11,8 @@ import type { KeyAction, KeybindingsConfig } from "../keybindings.js"; import type { ModelRegistry } from "../model-registry.js"; import type { SessionManager } from "../session-manager.js"; import type { + AdjustToolSetEvent, + AdjustToolSetResult, BeforeAgentStartEvent, BeforeAgentStartEventResult, BeforeModelSelectEvent, @@ -234,6 +236,7 @@ export class ExtensionRunner { this.modelRegistry = modelRegistry; // Bind emit methods into the shared runtime so createExtensionAPI can delegate to them. this.runtime.emitBeforeModelSelect = (event) => this.emitBeforeModelSelect(event); + this.runtime.emitAdjustToolSet = (event) => this.emitAdjustToolSet(event); } bindCore(actions: ExtensionActions, contextActions: ExtensionContextActions): void { @@ -713,6 +716,21 @@ export class ExtensionRunner { return result; } + async emitAdjustToolSet(event: Omit): Promise { + let result: AdjustToolSetResult | undefined; + await this.invokeHandlers("adjust_tool_set", () => ({ + type: "adjust_tool_set" as const, + ...event, + } satisfies AdjustToolSetEvent), (handlerResult) => { + if (handlerResult) { + result = handlerResult as AdjustToolSetResult; + return { done: true }; // first override wins + } + return { done: false }; + }); + return result; + } + async emitBeforeAgentStart( prompt: string, images: ImageContent[] | undefined, diff --git a/packages/pi-coding-agent/src/core/extensions/types.ts b/packages/pi-coding-agent/src/core/extensions/types.ts index f4c153992..5fea6389a 100644 --- a/packages/pi-coding-agent/src/core/extensions/types.ts +++ b/packages/pi-coding-agent/src/core/extensions/types.ts @@ -88,6 +88,8 @@ export interface ExtensionUIDialogOptions { timeout?: number; /** When true, the user can select multiple options. The return type becomes `string[]`. */ allowMultiple?: boolean; + /** When true, text input dialogs should hide typed characters if supported by the client surface. */ + secure?: boolean; } /** Placement for extension widgets. */ @@ -331,6 +333,19 @@ export interface ToolRenderResultOptions { isPartial: boolean; } +/** + * Tool compatibility metadata for provider-aware tool filtering (ADR-005 Phase 2). + * Tools without compatibility metadata are assumed universally compatible. + */ +export interface ToolCompatibility { + /** Tool produces image content in results (filtered for providers without imageToolResults) */ + producesImages?: boolean; + /** Tool requires schema features that some providers don't support (e.g., ["patternProperties"]) */ + schemaFeatures?: string[]; + /** Tool is effective only with models above a minimum capability threshold */ + minCapabilityTier?: "light" | "standard" | "heavy"; +} + /** * Tool definition for registerTool(). */ @@ -347,6 +362,8 @@ export interface ToolDefinition): void; on(event: "input", handler: ExtensionHandler): void; on(event: "before_model_select", handler: ExtensionHandler): void; + on(event: "adjust_tool_set", handler: ExtensionHandler): void; // ========================================================================= // Event Emission (for host extensions that orchestrate model selection) @@ -1077,6 +1119,9 @@ export interface ExtensionAPI { /** Emit before_model_select event. Returns override model ID or undefined. */ emitBeforeModelSelect(event: Omit): Promise; + /** Emit adjust_tool_set event (ADR-005). Returns override tool names or undefined. */ + emitAdjustToolSet(event: Omit): Promise; + // ========================================================================= // Tool Registration // ========================================================================= @@ -1395,6 +1440,8 @@ export interface ExtensionRuntimeState { unregisterProvider: (name: string) => void; /** Emit before_model_select event to all registered handlers. Bound by ExtensionRunner. */ emitBeforeModelSelect: (event: Omit) => Promise; + /** Emit adjust_tool_set event to all registered handlers. Bound by ExtensionRunner (ADR-005). */ + emitAdjustToolSet: (event: Omit) => Promise; } /** diff --git a/packages/pi-coding-agent/src/core/lsp/config.ts b/packages/pi-coding-agent/src/core/lsp/config.ts index 758657856..29401a363 100644 --- a/packages/pi-coding-agent/src/core/lsp/config.ts +++ b/packages/pi-coding-agent/src/core/lsp/config.ts @@ -172,16 +172,49 @@ export function hasRootMarkers(cwd: string, markers: string[]): boolean { // Local Binary Resolution // ============================================================================= -const LOCAL_BIN_PATHS: Array<{ markers: string[]; binDir: string }> = [ - { markers: ["package.json", "package-lock.json", "yarn.lock", "pnpm-lock.yaml"], binDir: "node_modules/.bin" }, - { markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDir: ".venv/bin" }, - { markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDir: "venv/bin" }, - { markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDir: ".env/bin" }, - { markers: ["Gemfile", "Gemfile.lock"], binDir: "vendor/bundle/bin" }, - { markers: ["Gemfile", "Gemfile.lock"], binDir: "bin" }, - { markers: ["go.mod", "go.sum"], binDir: "bin" }, +const LOCAL_BIN_PATHS: Array<{ markers: string[]; binDirs: string[] }> = [ + { markers: ["package.json", "package-lock.json", "yarn.lock", "pnpm-lock.yaml"], binDirs: ["node_modules/.bin"] }, + { markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDirs: [".venv/bin", ".venv/Scripts"] }, + { markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDirs: ["venv/bin", "venv/Scripts"] }, + { markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDirs: [".env/bin", ".env/Scripts"] }, + { markers: ["Gemfile", "Gemfile.lock"], binDirs: ["vendor/bundle/bin"] }, + { markers: ["Gemfile", "Gemfile.lock"], binDirs: ["bin"] }, + { markers: ["go.mod", "go.sum"], binDirs: ["bin"] }, ]; +function getWindowsBinaryCandidates(command: string): string[] { + const ext = path.extname(command).toLowerCase(); + if (ext) { + return [command]; + } + + return [ + command, + `${command}.cmd`, + `${command}.bat`, + `${command}.exe`, + ]; +} + +export function resolveLocalBinaryPath(command: string, cwd: string, isWindows: boolean): string | null { + for (const { markers, binDirs } of LOCAL_BIN_PATHS) { + if (!hasRootMarkers(cwd, markers)) continue; + + for (const binDir of binDirs) { + const basePath = path.join(cwd, binDir, command); + const candidates = isWindows ? getWindowsBinaryCandidates(basePath) : [basePath]; + + for (const candidate of candidates) { + if (fs.existsSync(candidate)) { + return candidate; + } + } + } + } + + return null; +} + export function which(command: string): string | null { // On Windows, prefer `where.exe` over `which` — MSYS/Git Bash's `which` // returns POSIX paths (/c/Users/...) that Node's spawn() can't execute. @@ -196,15 +229,8 @@ export function which(command: string): string | null { } export function resolveCommand(command: string, cwd: string): string | null { - for (const { markers, binDir } of LOCAL_BIN_PATHS) { - if (hasRootMarkers(cwd, markers)) { - const localPath = path.join(cwd, binDir, command); - if (fs.existsSync(localPath)) { - return localPath; - } - } - } - + const localPath = resolveLocalBinaryPath(command, cwd, process.platform === "win32"); + if (localPath) return localPath; return which(command); } diff --git a/packages/pi-coding-agent/src/core/sdk.ts b/packages/pi-coding-agent/src/core/sdk.ts index a0c2d943b..07ed24c53 100644 --- a/packages/pi-coding-agent/src/core/sdk.ts +++ b/packages/pi-coding-agent/src/core/sdk.ts @@ -341,6 +341,14 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} thinkingBudgets: settingsManager.getThinkingBudgets(), maxRetryDelayMs: settingsManager.getRetrySettings().maxDelayMs, externalToolExecution: (m) => modelRegistry.getProviderAuthMode(m.provider) === "externalCli", + getProviderOptions: async (currentModel) => { + if (currentModel.provider !== "claude-code") return undefined; + const runner = extensionRunnerRef.current; + if (!runner?.hasUI()) return undefined; + return { + extensionUIContext: runner.getUIContext(), + }; + }, getApiKey: async (provider) => { // Use the provider argument from the in-flight request; // agent.state.model may already be switched mid-turn. diff --git a/packages/pi-coding-agent/src/core/tools/index.ts b/packages/pi-coding-agent/src/core/tools/index.ts index d54ac2a9c..90a5a524c 100644 --- a/packages/pi-coding-agent/src/core/tools/index.ts +++ b/packages/pi-coding-agent/src/core/tools/index.ts @@ -112,6 +112,13 @@ export { lspTool, } from "../lsp/index.js"; export type { LspServerStatus } from "../lsp/client.js"; +export { + registerToolCompatibility, + getToolCompatibility, + getAllToolCompatibility, + registerMcpToolCompatibility, + resetToolCompatibilityRegistry, +} from "./tool-compatibility-registry.js"; import type { AgentTool } from "@gsd/pi-agent-core"; import { type BashToolOptions, bashTool, createBashTool } from "./bash.js"; diff --git a/packages/pi-coding-agent/src/core/tools/tool-compatibility-registry.ts b/packages/pi-coding-agent/src/core/tools/tool-compatibility-registry.ts new file mode 100644 index 000000000..9e5bea3b5 --- /dev/null +++ b/packages/pi-coding-agent/src/core/tools/tool-compatibility-registry.ts @@ -0,0 +1,83 @@ +// GSD-2 — Tool Compatibility Registry (ADR-005 Phase 2) +// Maps tool names to their provider compatibility metadata. +// Used by the model router to filter tools incompatible with the selected provider. + +import type { ToolCompatibility } from "../extensions/types.js"; + +// ─── Registry State ───────────────────────────────────────────────────────── + +const registry = new Map(); + +// ─── Built-in Tool Compatibility (universally compatible) ─────────────────── +// Built-in tools (bash, read, write, edit, grep, find, ls) produce text-only +// results and use standard JSON Schema — compatible with all providers. + +const BUILTIN_TOOLS: Record = { + bash: {}, + read: {}, + write: {}, + edit: {}, + grep: {}, + find: {}, + ls: {}, + lsp: {}, + hashline_edit: {}, + hashline_read: {}, +}; + +// Pre-populate registry with built-in tools +for (const [name, compat] of Object.entries(BUILTIN_TOOLS)) { + registry.set(name, compat); +} + +// ─── MCP Tool Defaults ───────────────────────────────────────────────────── +// MCP tools may use complex schemas. Default to cautious compatibility. + +const MCP_TOOL_DEFAULTS: ToolCompatibility = { + schemaFeatures: ["patternProperties"], +}; + +// ─── Public API ───────────────────────────────────────────────────────────── + +/** + * Register compatibility metadata for a tool. + * Called automatically by registerTool() for extension tools that include + * compatibility metadata in their ToolDefinition. + */ +export function registerToolCompatibility(toolName: string, compatibility: ToolCompatibility): void { + registry.set(toolName, compatibility); +} + +/** + * Get compatibility metadata for a tool. + * Returns undefined for unknown tools (treated as universally compatible + * per ADR-005 principle: "fail open, don't restrict without data"). + */ +export function getToolCompatibility(toolName: string): ToolCompatibility | undefined { + return registry.get(toolName); +} + +/** + * Get all registered tool compatibility entries. + */ +export function getAllToolCompatibility(): ReadonlyMap { + return registry; +} + +/** + * Register an MCP tool with default cautious compatibility. + * MCP tools may use complex schemas that some providers don't support. + */ +export function registerMcpToolCompatibility(toolName: string, overrides?: Partial): void { + registry.set(toolName, { ...MCP_TOOL_DEFAULTS, ...overrides }); +} + +/** + * Clear all non-builtin entries (for testing). + */ +export function resetToolCompatibilityRegistry(): void { + registry.clear(); + for (const [name, compat] of Object.entries(BUILTIN_TOOLS)) { + registry.set(name, compat); + } +} diff --git a/packages/pi-coding-agent/src/index.ts b/packages/pi-coding-agent/src/index.ts index 86686caf0..ab7de8bac 100644 --- a/packages/pi-coding-agent/src/index.ts +++ b/packages/pi-coding-agent/src/index.ts @@ -49,6 +49,8 @@ export { export { createEventBus, type EventBus, type EventBusController } from "./core/event-bus.js"; // Extension system export type { + AdjustToolSetEvent, + AdjustToolSetResult, AgentEndEvent, AgentStartEvent, AgentToolResult, @@ -118,6 +120,7 @@ export type { SlashCommandSource, TerminalInputHandler, ToolCallEvent, + ToolCompatibility, ToolDefinition, ToolInfo, SortResult, @@ -310,6 +313,12 @@ export { type HashlineReadToolDetails, type HashlineReadToolInput, type HashlineReadToolOptions, + // Tool compatibility registry (ADR-005) + registerToolCompatibility, + getToolCompatibility, + getAllToolCompatibility, + registerMcpToolCompatibility, + resetToolCompatibilityRegistry, } from "./core/tools/index.js"; // Main entry point export { main } from "./main.js"; diff --git a/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts b/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts index a54298065..5a023afd3 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts @@ -1,8 +1,10 @@ -import type { Component } from "@gsd/pi-tui"; +import type { Component, TUI } from "@gsd/pi-tui"; +import { visibleWidth } from "@gsd/pi-tui"; import { theme } from "../theme/theme.js"; /** * Dynamic border component that adjusts to viewport width. + * Supports an optional animated spinner in the label area. * * Note: When used from extensions loaded via jiti, the global `theme` may be undefined * because jiti creates a separate module cache. Always pass an explicit color @@ -10,11 +12,51 @@ import { theme } from "../theme/theme.js"; */ export class DynamicBorder implements Component { private color: (str: string) => string; + private label?: string; + private spinnerFrames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]; + private spinnerIndex = 0; + private spinnerInterval: NodeJS.Timeout | null = null; + private spinnerColorFn?: (str: string) => string; constructor(color: (str: string) => string = (str) => { try { return theme.fg("border", str); } catch { return str; } - }) { + }, label?: string) { this.color = color; + this.label = label; + } + + setLabel(label: string | undefined): void { + this.label = label; + } + + /** + * Start an animated spinner that prepends to the label. + * The spinner rotates every 80ms and triggers a re-render via the TUI. + */ + startSpinner(ui: TUI, colorFn: (str: string) => string): void { + this.stopSpinner(); + this.spinnerColorFn = colorFn; + this.spinnerIndex = 0; + this.spinnerInterval = setInterval(() => { + this.spinnerIndex = (this.spinnerIndex + 1) % this.spinnerFrames.length; + ui.requestRender(); + }, 80); + ui.requestRender(); + } + + /** + * Stop the spinner animation. The border reverts to a static label. + */ + stopSpinner(): void { + if (this.spinnerInterval) { + clearInterval(this.spinnerInterval); + this.spinnerInterval = null; + } + this.spinnerColorFn = undefined; + } + + get isSpinning(): boolean { + return this.spinnerInterval !== null; } invalidate(): void { @@ -22,6 +64,20 @@ export class DynamicBorder implements Component { } render(width: number): string[] { + const spinnerPrefix = this.spinnerInterval && this.spinnerColorFn + ? this.spinnerColorFn(this.spinnerFrames[this.spinnerIndex]) + " " + : ""; + + if (this.label) { + const labelText = ` ${spinnerPrefix}${this.label} `; + const labelVisible = visibleWidth(labelText); + const leading = "── "; + const remaining = Math.max(0, width - labelVisible - leading.length); + const trailing = "─".repeat(Math.max(1, remaining)); + // Color leading and trailing separately so embedded ANSI in the + // spinner/label doesn't bleed into the trailing dashes. + return [this.color(leading) + labelText + this.color(trailing)]; + } return [this.color("─".repeat(Math.max(1, width)))]; } } diff --git a/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts b/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts index 525bcfc06..7634d154f 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts @@ -11,6 +11,7 @@ import { keyHint } from "./keybinding-hints.js"; export interface ExtensionInputOptions { tui?: TUI; timeout?: number; + secure?: boolean; } export class ExtensionInputComponent extends Container implements Focusable { @@ -61,6 +62,7 @@ export class ExtensionInputComponent extends Container implements Focusable { } this.input = new Input(); + this.input.secure = opts?.secure === true; if (placeholder) { this.input.placeholder = placeholder; } diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts index d301acd12..88d887ffd 100644 --- a/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts +++ b/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts @@ -1,14 +1,36 @@ -import { Loader, Spacer, Text } from "@gsd/pi-tui"; +import { Loader, Markdown, Spacer, Text } from "@gsd/pi-tui"; import type { InteractiveModeEvent, InteractiveModeStateHost } from "../interactive-mode-state.js"; import { theme } from "../theme/theme.js"; import { AssistantMessageComponent } from "../components/assistant-message.js"; import { ToolExecutionComponent } from "../components/tool-execution.js"; +import { DynamicBorder } from "../components/dynamic-border.js"; import { appKey } from "../components/keybinding-hints.js"; // Tracks the last processed content index to avoid re-scanning all blocks on every message_update let lastProcessedContentIndex = 0; +function hasVisibleAssistantContent(message: { content: Array }): boolean { + return message.content.some( + (c) => + (c.type === "text" && typeof c.text === "string" && c.text.trim().length > 0) + || (c.type === "thinking" && typeof c.thinking === "string" && c.thinking.trim().length > 0), + ); +} + +function hasAssistantToolBlocks(message: { content: Array }): boolean { + return message.content.some((c) => c.type === "toolCall" || c.type === "serverToolUse"); +} + +// Tracks the latest assistant text for the pinned message zone +let lastPinnedText = ""; +// Whether any tool execution has been added in this assistant turn (triggers pinned display) +let hasToolsInTurn = false; +// Reference to the pinned border so we can toggle its label between working/idle +let pinnedBorder: DynamicBorder | undefined; +// Reference to the pinned markdown component below the border +let pinnedTextComponent: Markdown | undefined; + export async function handleAgentEvent(host: InteractiveModeStateHost & { init: () => Promise; getMarkdownThemeWithSettings: () => any; @@ -31,9 +53,15 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { host.footer.invalidate(); - // Reset content index tracker when a new assistant message starts + // Reset content index tracker and pinned state when a new assistant message starts if (event.type === "message_start" && event.message.role === "assistant") { lastProcessedContentIndex = 0; + lastPinnedText = ""; + hasToolsInTurn = false; + if (pinnedBorder) pinnedBorder.stopSpinner(); + pinnedBorder = undefined; + pinnedTextComponent = undefined; + host.pinnedMessageContainer.clear(); } switch (event.type) { @@ -46,6 +74,12 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { host.streamingMessage = undefined; host.pendingTools.clear(); host.pendingMessagesContainer.clear(); + host.pinnedMessageContainer.clear(); + lastPinnedText = ""; + hasToolsInTurn = false; + if (pinnedBorder) pinnedBorder.stopSpinner(); + pinnedBorder = undefined; + pinnedTextComponent = undefined; host.compactionQueuedMessages = []; host.rebuildChatFromMessages(); host.updatePendingMessagesDisplay(); @@ -104,45 +138,54 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { host.updatePendingMessagesDisplay(); host.ui.requestRender(); } else if (event.message.role === "assistant") { - host.streamingComponent = new AssistantMessageComponent( - undefined, - host.hideThinkingBlock, - host.getMarkdownThemeWithSettings(), - host.settingsManager.getTimestampFormat(), - ); host.streamingMessage = event.message; - host.chatContainer.addChild(host.streamingComponent); - host.streamingComponent.updateContent(host.streamingMessage); + // External-tool providers can stream multiple assistant turns through + // one response. Delay component creation until visible assistant text + // arrives so tool outputs keep chronological ordering. host.ui.requestRender(); } break; case "message_update": - if (host.streamingComponent && event.message.role === "assistant") { + if (event.message.role === "assistant") { host.streamingMessage = event.message; - host.streamingComponent.updateContent(host.streamingMessage); - - // When the stream adapter signals a completed tool call with an - // external result (from Claude Code SDK), update the pending - // ToolExecutionComponent immediately so output is visible in - // real-time instead of waiting for the session to end. const innerEvent = event.assistantMessageEvent; + + let externalToolResult: + | { toolCallId: string; content: Array<{ type: string; text?: string; data?: string; mimeType?: string }>; details: Record; isError: boolean } + | undefined; if (innerEvent.type === "toolcall_end" && innerEvent.toolCall) { const tc = innerEvent.toolCall as any; - const externalResult = tc.externalResult; - if (externalResult) { - const component = host.pendingTools.get(tc.id); - if (component) { - component.updateResult({ - content: externalResult.content ?? [{ type: "text", text: "" }], - details: externalResult.details ?? {}, - isError: externalResult.isError ?? false, - }); - } + const ext = tc.externalResult; + if (ext) { + externalToolResult = { + toolCallId: tc.id, + content: ext.content ?? [{ type: "text", text: "" }], + details: ext.details ?? {}, + isError: ext.isError ?? false, + }; + } + } else if (innerEvent.type === "server_tool_use") { + const idx = typeof innerEvent.contentIndex === "number" ? innerEvent.contentIndex : -1; + const block = idx >= 0 ? (host.streamingMessage.content[idx] as any) : undefined; + const ext = block?.externalResult; + if (block?.id && ext) { + externalToolResult = { + toolCallId: block.id, + content: ext.content ?? [{ type: "text", text: "" }], + details: ext.details ?? {}, + isError: ext.isError ?? false, + }; } } const contentBlocks = host.streamingMessage.content; + // Some adapters reuse a single assistant lifecycle while internally + // spanning multiple provider turns. When a new turn starts, content + // length can shrink back to 0/1; reset scan index to avoid skipping. + if (lastProcessedContentIndex >= contentBlocks.length) { + lastProcessedContentIndex = 0; + } for (let i = lastProcessedContentIndex; i < contentBlocks.length; i++) { const content = contentBlocks[i]; if (content.type === "toolCall") { @@ -192,19 +235,108 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { } } } + + // When the stream adapter signals a completed tool call with an + // external result (from Claude Code SDK), update the pending + // ToolExecutionComponent immediately so output is visible in + // real-time instead of waiting for the session to end. + if (externalToolResult) { + const component = host.pendingTools.get(externalToolResult.toolCallId); + if (component) { + component.updateResult({ + content: externalToolResult.content, + details: externalToolResult.details, + isError: externalToolResult.isError, + }); + } + } + + // Render assistant text/thinking after tool components so mixed + // streams keep chronological ordering in the chat container. + const hasToolBlocks = hasAssistantToolBlocks(host.streamingMessage); + if (!host.streamingComponent && hasVisibleAssistantContent(host.streamingMessage)) { + host.streamingComponent = new AssistantMessageComponent( + undefined, + host.hideThinkingBlock, + host.getMarkdownThemeWithSettings(), + host.settingsManager.getTimestampFormat(), + ); + host.chatContainer.addChild(host.streamingComponent); + } + if (host.streamingComponent) { + if (hasToolBlocks) { + host.chatContainer.removeChild(host.streamingComponent); + host.chatContainer.addChild(host.streamingComponent); + } + host.streamingComponent.updateContent(host.streamingMessage); + } + // Update index: fully processed blocks won't need re-scanning. // Keep the last block's index (it may still be accumulating data), // so we re-check it next time but skip all earlier ones. if (contentBlocks.length > 0) { lastProcessedContentIndex = Math.max(0, contentBlocks.length - 1); } + + // Pinned message: mirror the latest assistant text above the editor + // when tool executions push it out of the viewport. + const hasTools = contentBlocks.some( + (c: any) => c.type === "toolCall" || c.type === "serverToolUse", + ); + if (hasTools) hasToolsInTurn = true; + + if (hasToolsInTurn) { + // Collect the latest text block(s) from the assistant message + let latestText = ""; + for (let i = contentBlocks.length - 1; i >= 0; i--) { + const c = contentBlocks[i] as any; + if (c.type === "text" && c.text?.trim()) { + latestText = c.text.trim(); + break; + } + } + + if (latestText && latestText !== lastPinnedText) { + lastPinnedText = latestText; + + if (!pinnedBorder) { + // First time: create border + text component + host.pinnedMessageContainer.clear(); + pinnedBorder = new DynamicBorder( + (str: string) => theme.fg("dim", str), + "Working · Latest Output", + ); + pinnedBorder.startSpinner(host.ui, (str: string) => theme.fg("accent", str)); + host.pinnedMessageContainer.addChild(pinnedBorder); + pinnedTextComponent = new Markdown(latestText, 1, 0, host.getMarkdownThemeWithSettings()); + // Cap pinned content to ~40% of terminal height so tall output + // doesn't exceed the viewport and cause render flashing. + pinnedTextComponent.maxLines = Math.max(3, Math.floor(host.ui.terminal.rows * 0.4)); + host.pinnedMessageContainer.addChild(pinnedTextComponent); + // Hide the separate status loader — the pinned zone replaces it + if (host.loadingAnimation) { + host.loadingAnimation.stop(); + host.loadingAnimation = undefined; + } + host.statusContainer.clear(); + } else { + // Update existing markdown component in-place + pinnedTextComponent?.setText(latestText); + // Refresh maxLines in case terminal was resized + if (pinnedTextComponent) { + pinnedTextComponent.maxLines = Math.max(3, Math.floor(host.ui.terminal.rows * 0.4)); + } + } + } + } + host.ui.requestRender(); } break; case "message_end": if (event.message.role === "user") break; - if (host.streamingComponent && event.message.role === "assistant") { + if (event.message.role === "assistant") { host.streamingMessage = event.message; let errorMessage: string | undefined; if (host.streamingMessage.stopReason === "aborted") { @@ -214,7 +346,25 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { : "Operation aborted"; host.streamingMessage.errorMessage = errorMessage; } - host.streamingComponent.updateContent(host.streamingMessage); + + const shouldRenderAssistant = hasVisibleAssistantContent(host.streamingMessage) + || ( + (host.streamingMessage.stopReason === "aborted" || host.streamingMessage.stopReason === "error") + && !hasAssistantToolBlocks(host.streamingMessage) + ); + if (!host.streamingComponent && shouldRenderAssistant) { + host.streamingComponent = new AssistantMessageComponent( + undefined, + host.hideThinkingBlock, + host.getMarkdownThemeWithSettings(), + host.settingsManager.getTimestampFormat(), + ); + host.chatContainer.addChild(host.streamingComponent); + } + if (host.streamingComponent) { + host.streamingComponent.updateContent(host.streamingMessage); + } + if (host.streamingMessage.stopReason === "aborted" || host.streamingMessage.stopReason === "error") { if (!errorMessage) { errorMessage = host.streamingMessage.errorMessage || "Error"; @@ -230,6 +380,15 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { } host.streamingComponent = undefined; host.streamingMessage = undefined; + // Clear pinned output once the message is finalized in the chat + // container — prevents duplicate display when the agent continues + // (e.g. form elicitation) after the assistant message ends. + if (pinnedBorder) pinnedBorder.stopSpinner(); + host.pinnedMessageContainer.clear(); + lastPinnedText = ""; + hasToolsInTurn = false; + pinnedBorder = undefined; + pinnedTextComponent = undefined; host.footer.invalidate(); } host.ui.requestRender(); @@ -282,6 +441,16 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { host.streamingMessage = undefined; } host.pendingTools.clear(); + // Pinned output is only useful while work is actively streaming. + // Keep chat history as the single source after completion. + if (pinnedBorder) { + pinnedBorder.stopSpinner(); + } + host.pinnedMessageContainer.clear(); + lastPinnedText = ""; + hasToolsInTurn = false; + pinnedBorder = undefined; + pinnedTextComponent = undefined; await host.checkShutdownRequested(); host.ui.requestRender(); break; diff --git a/packages/pi-coding-agent/src/modes/interactive/interactive-mode-state.ts b/packages/pi-coding-agent/src/modes/interactive/interactive-mode-state.ts index cf91b00b1..bffa82d51 100644 --- a/packages/pi-coding-agent/src/modes/interactive/interactive-mode-state.ts +++ b/packages/pi-coding-agent/src/modes/interactive/interactive-mode-state.ts @@ -9,6 +9,7 @@ export interface InteractiveModeStateHost { keybindings: any; statusContainer: any; chatContainer: any; + pinnedMessageContainer: any; settingsManager: any; pendingTools: Map; toolOutputExpanded: boolean; diff --git a/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts b/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts index 85ba64d39..c42aca520 100644 --- a/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts +++ b/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts @@ -168,6 +168,7 @@ export class InteractiveMode { private chatContainer: Container; private pendingMessagesContainer: Container; private statusContainer: Container; + private pinnedMessageContainer: Container; private defaultEditor: CustomEditor; private editor: EditorComponent; private autocompleteProvider: CombinedAutocompleteProvider | undefined; @@ -285,6 +286,7 @@ export class InteractiveMode { this.chatContainer = new Container(); this.pendingMessagesContainer = new Container(); this.statusContainer = new Container(); + this.pinnedMessageContainer = new Container(); this.widgetContainerAbove = new Container(); this.widgetContainerBelow = new Container(); this.keybindings = KeybindingsManager.create(); @@ -490,6 +492,7 @@ export class InteractiveMode { this.ui.addChild(this.chatContainer); this.ui.addChild(this.pendingMessagesContainer); this.ui.addChild(this.statusContainer); + this.ui.addChild(this.pinnedMessageContainer); this.renderWidgets(); // Initialize with default spacer this.ui.addChild(this.widgetContainerAbove); this.ui.addChild(this.editorContainer); @@ -1396,7 +1399,19 @@ export class InteractiveMode { */ private renderWidgets(): void { if (!this.widgetContainerAbove || !this.widgetContainerBelow) return; - this.renderWidgetContainer(this.widgetContainerAbove, this.extensionWidgetsAbove, true, true); + + // widgetContainerAbove: spacer collapses when pinned content is visible + // so there's no extra blank line between pinned output and the editor border. + this.widgetContainerAbove.clear(); + const pinned = this.pinnedMessageContainer; + this.widgetContainerAbove.addChild({ + render: () => pinned.children.length > 0 ? [] : [""], + invalidate: () => {}, + }); + for (const component of this.extensionWidgetsAbove.values()) { + this.widgetContainerAbove.addChild(component); + } + this.renderWidgetContainer(this.widgetContainerBelow, this.extensionWidgetsBelow, false, false); this.ui.requestRender(); } @@ -1631,7 +1646,7 @@ export class InteractiveMode { this.hideExtensionInput(); resolve(undefined); }, - { tui: this.ui, timeout: opts?.timeout }, + { tui: this.ui, timeout: opts?.timeout, secure: opts?.secure }, ); this.editorContainer.clear(); @@ -2264,6 +2279,7 @@ export class InteractiveMode { updateFooter: true, populateHistory: true, }); + this.populatePinnedFromMessages(context.messages); // Show compaction info if session was compacted const allEntries = this.sessionManager.getEntries(); @@ -2287,6 +2303,54 @@ export class InteractiveMode { this.chatContainer.clear(); const context = this.sessionManager.buildSessionContext(); this.renderSessionContext(context); + this.populatePinnedFromMessages(context.messages); + } + + /** + * After rebuilding chat from messages, pin the last assistant text above the + * editor if tool results would otherwise push it out of the viewport. + */ + private populatePinnedFromMessages(messages: AgentMessage[]): void { + this.pinnedMessageContainer.clear(); + + // Walk backwards to find the last assistant message + let lastAssistant: AssistantMessage | undefined; + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (msg && "role" in msg && msg.role === "assistant") { + lastAssistant = msg as AssistantMessage; + break; + } + } + if (!lastAssistant) return; + + // Check if any tool calls follow the last text block + const content = lastAssistant.content; + let lastTextIndex = -1; + let hasToolAfterText = false; + for (let i = 0; i < content.length; i++) { + if (content[i].type === "text") lastTextIndex = i; + } + if (lastTextIndex >= 0) { + for (let i = lastTextIndex + 1; i < content.length; i++) { + if (content[i].type === "toolCall" || content[i].type === "serverToolUse") { + hasToolAfterText = true; + break; + } + } + } + if (!hasToolAfterText || lastTextIndex < 0) return; + + const textBlock = content[lastTextIndex] as { type: "text"; text: string }; + const text = textBlock.text?.trim(); + if (!text) return; + + this.pinnedMessageContainer.addChild( + new DynamicBorder((str: string) => theme.fg("dim", str), "Latest Output"), + ); + this.pinnedMessageContainer.addChild( + new Markdown(text, 1, 0, this.getMarkdownThemeWithSettings()), + ); } // ========================================================================= diff --git a/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts b/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts index c510e63b4..24fd8bb7a 100644 --- a/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts +++ b/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts @@ -305,11 +305,13 @@ async function handleShareCommand(ctx: SlashCommandContext): Promise { ctx.showStatus("Share cancelled"); }; - try { - const result = await new Promise<{ stdout: string; stderr: string; code: number | null }>((resolve) => { - proc = spawn("gh", ["gist", "create", "--public=false", tmpFile]); - let stdout = ""; - let stderr = ""; + try { + const result = await new Promise<{ stdout: string; stderr: string; code: number | null }>((resolve) => { + proc = spawn("gh", ["gist", "create", "--public=false", tmpFile], { + shell: process.platform === "win32", + }); + let stdout = ""; + let stderr = ""; proc.stdout?.on("data", (data) => { stdout += data.toString(); }); diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts index f2f8fbe4c..7d36e563a 100644 --- a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts @@ -224,7 +224,7 @@ export async function runRpcMode(session: AgentSession): Promise { ), input: (title, placeholder, opts) => - createDialogPromise(opts, undefined, { method: "input", title, placeholder, timeout: opts?.timeout }, (r) => + createDialogPromise(opts, undefined, { method: "input", title, placeholder, timeout: opts?.timeout, secure: opts?.secure }, (r) => "cancelled" in r && r.cancelled ? undefined : "value" in r ? r.value : undefined, ), diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts index 20d5c2c73..d6cd25bfc 100644 --- a/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts @@ -291,6 +291,7 @@ export type RpcExtensionUIRequest = title: string; placeholder?: string; timeout?: number; + secure?: boolean; } | { type: "extension_ui_request"; id: string; method: "editor"; title: string; prefill?: string } | { diff --git a/packages/pi-tui/src/components/__tests__/input.test.ts b/packages/pi-tui/src/components/__tests__/input.test.ts index c47100492..581c2e14f 100644 --- a/packages/pi-tui/src/components/__tests__/input.test.ts +++ b/packages/pi-tui/src/components/__tests__/input.test.ts @@ -32,4 +32,15 @@ describe("Input", () => { input.focused = false; assert.equal(input.focused, false); }); + + it("secure mode obscures typed characters in render output", () => { + const input = new Input(); + input.secure = true; + input.focused = true; + input.handleInput("secret123"); + + const line = input.render(40)[0] ?? ""; + assert.ok(!line.includes("secret123"), "rendered line must not expose raw secret text"); + assert.ok(line.includes("*********"), "rendered line should include masked characters"); + }); }); diff --git a/packages/pi-tui/src/components/__tests__/markdown-maxlines.test.ts b/packages/pi-tui/src/components/__tests__/markdown-maxlines.test.ts new file mode 100644 index 000000000..fb9fbf0bc --- /dev/null +++ b/packages/pi-tui/src/components/__tests__/markdown-maxlines.test.ts @@ -0,0 +1,75 @@ +import assert from "node:assert/strict"; +import { test } from "node:test"; + +import { Markdown, type MarkdownTheme } from "../markdown.js"; + +function noopTheme(): MarkdownTheme { + const identity = (text: string) => text; + return { + heading: identity, + link: identity, + linkUrl: identity, + code: identity, + codeBlock: identity, + codeBlockBorder: identity, + quote: identity, + quoteBorder: identity, + hr: identity, + listBullet: identity, + bold: identity, + italic: identity, + strikethrough: identity, + underline: identity, + }; +} + +test("Markdown renders all lines when maxLines is not set", () => { + const text = "Line 1\n\nLine 2\n\nLine 3\n\nLine 4\n\nLine 5"; + const md = new Markdown(text, 0, 0, noopTheme()); + const lines = md.render(80); + // Each paragraph produces a line + an inter-paragraph blank line + const contentLines = lines.filter((l) => l.trim().length > 0); + assert.ok(contentLines.length >= 5, `expected at least 5 content lines, got ${contentLines.length}`); +}); + +test("Markdown truncates from the top when maxLines is exceeded", () => { + const text = "Line 1\n\nLine 2\n\nLine 3\n\nLine 4\n\nLine 5"; + const md = new Markdown(text, 0, 0, noopTheme()); + md.maxLines = 3; + const lines = md.render(80); + assert.ok(lines.length <= 3, `expected at most 3 lines, got ${lines.length}`); + // First line should be the ellipsis indicator + assert.ok(lines[0].includes("…"), "first line should contain ellipsis indicator"); + assert.ok(lines[0].includes("above"), "first line should mention lines above"); +}); + +test("Markdown preserves most recent content when truncating", () => { + const text = "First paragraph\n\nSecond paragraph\n\nThird paragraph\n\nFourth paragraph\n\nFifth paragraph"; + const md = new Markdown(text, 0, 0, noopTheme()); + md.maxLines = 3; + const lines = md.render(80); + // The last rendered line should contain "Fifth paragraph" (the most recent content) + const lastContentLine = lines.filter((l) => !l.includes("…")).pop() ?? ""; + assert.ok( + lastContentLine.includes("Fifth paragraph"), + `expected last content line to contain "Fifth paragraph", got "${lastContentLine}"`, + ); +}); + +test("Markdown does not truncate when content fits within maxLines", () => { + const text = "Short text"; + const md = new Markdown(text, 0, 0, noopTheme()); + md.maxLines = 10; + const lines = md.render(80); + assert.ok(!lines.some((l) => l.includes("…")), "should not contain ellipsis when content fits"); + assert.ok(lines.some((l) => l.includes("Short text")), "should contain the original text"); +}); + +test("Markdown trims trailing empty lines", () => { + const text = "Some text\n\n"; + const md = new Markdown(text, 0, 0, noopTheme()); + const lines = md.render(80); + // Last line should not be empty (trailing empties are trimmed) + const lastLine = lines[lines.length - 1]; + assert.ok(lastLine.trim().length > 0 || lines.length === 1, "trailing empty lines should be trimmed"); +}); diff --git a/packages/pi-tui/src/components/input.ts b/packages/pi-tui/src/components/input.ts index 627f3557c..78535ab3f 100644 --- a/packages/pi-tui/src/components/input.ts +++ b/packages/pi-tui/src/components/input.ts @@ -21,6 +21,8 @@ export class Input implements Component, Focusable { public onSubmit?: (value: string) => void; public onEscape?: () => void; public placeholder: string = ""; + /** When true, render obscured characters instead of the actual value. */ + public secure: boolean = false; /** Focusable interface - set by TUI when focus changes */ private _focused: boolean = false; @@ -446,6 +448,7 @@ export class Input implements Component, Focusable { // Calculate visible window const prompt = "> "; const availableWidth = width - prompt.length; + const renderValue = this.secure ? "*".repeat(this.value.length) : this.value; if (availableWidth <= 0) { return [prompt]; @@ -466,7 +469,7 @@ export class Input implements Component, Focusable { if (this.value.length < availableWidth) { // Everything fits (leave room for cursor at end) - visibleText = this.value; + visibleText = renderValue; } else { // Need horizontal scrolling // Reserve one character for cursor if it's at the end @@ -501,17 +504,17 @@ export class Input implements Component, Focusable { if (this.cursor < halfWidth) { // Cursor near start - visibleText = this.value.slice(0, findValidEnd(scrollWidth)); + visibleText = renderValue.slice(0, findValidEnd(scrollWidth)); cursorDisplay = this.cursor; } else if (this.cursor > this.value.length - halfWidth) { // Cursor near end const start = findValidStart(this.value.length - scrollWidth); - visibleText = this.value.slice(start); + visibleText = renderValue.slice(start); cursorDisplay = this.cursor - start; } else { // Cursor in middle const start = findValidStart(this.cursor - halfWidth); - visibleText = this.value.slice(start, findValidEnd(start + scrollWidth)); + visibleText = renderValue.slice(start, findValidEnd(start + scrollWidth)); cursorDisplay = halfWidth; } } diff --git a/packages/pi-tui/src/components/markdown.ts b/packages/pi-tui/src/components/markdown.ts index 0920e6b4f..e1d7d454f 100644 --- a/packages/pi-tui/src/components/markdown.ts +++ b/packages/pi-tui/src/components/markdown.ts @@ -58,10 +58,13 @@ export class Markdown implements Component { private defaultTextStyle?: DefaultTextStyle; private theme: MarkdownTheme; private defaultStylePrefix?: string; + /** Maximum rendered lines (excluding padding). When set, content is truncated from the top with an ellipsis indicator so the most recent output remains visible. */ + maxLines?: number; // Cache for rendered output private cachedText?: string; private cachedWidth?: number; + private cachedMaxLines?: number; private cachedLines?: string[]; constructor( @@ -86,12 +89,13 @@ export class Markdown implements Component { invalidate(): void { this.cachedText = undefined; this.cachedWidth = undefined; + this.cachedMaxLines = undefined; this.cachedLines = undefined; } render(width: number): string[] { // Check cache - if (this.cachedLines && this.cachedText === this.text && this.cachedWidth === width) { + if (this.cachedLines && this.cachedText === this.text && this.cachedWidth === width && this.cachedMaxLines === this.maxLines) { return this.cachedLines; } @@ -104,6 +108,7 @@ export class Markdown implements Component { // Update cache this.cachedText = this.text; this.cachedWidth = width; + this.cachedMaxLines = this.maxLines; this.cachedLines = result; return result; } @@ -124,6 +129,12 @@ export class Markdown implements Component { for (let j = 0; j < tokenLines.length; j++) renderedLines.push(tokenLines[j]); } + // Trim trailing empty lines — inter-block spacing at the end just adds + // unwanted whitespace before whatever follows (e.g. pinned output border). + while (renderedLines.length > 0 && renderedLines[renderedLines.length - 1] === "") { + renderedLines.pop(); + } + // Wrap lines (NO padding, NO background yet) const wrappedLines: string[] = []; for (const line of renderedLines) { @@ -143,6 +154,15 @@ export class Markdown implements Component { } } + // Truncate from the top when maxLines is set so the most recent content + // stays visible. This prevents the pinned output zone from exceeding the + // terminal height and causing render flashing. + if (this.maxLines !== undefined && wrappedLines.length > this.maxLines) { + const keep = Math.max(1, this.maxLines - 1); // Reserve one line for the ellipsis indicator + const truncated = wrappedLines.length - keep; + wrappedLines.splice(0, truncated, `… ${truncated} line${truncated !== 1 ? "s" : ""} above`); + } + // Add margins and background to each wrapped line const leftMargin = " ".repeat(this.paddingX); const rightMargin = " ".repeat(this.paddingX); @@ -181,6 +201,7 @@ export class Markdown implements Component { // Update cache this.cachedText = this.text; this.cachedWidth = width; + this.cachedMaxLines = this.maxLines; this.cachedLines = result; return result.length > 0 ? result : [""]; diff --git a/pkg/package.json b/pkg/package.json index 154dec5ad..31a3b4639 100644 --- a/pkg/package.json +++ b/pkg/package.json @@ -1,6 +1,6 @@ { "name": "@glittercowboy/gsd", - "version": "2.68.0", + "version": "2.71.0", "piConfig": { "name": "gsd", "configDir": ".gsd" diff --git a/scripts/dev.js b/scripts/dev.js index faf9a75d2..0eea64072 100644 --- a/scripts/dev.js +++ b/scripts/dev.js @@ -11,15 +11,18 @@ import { spawn } from 'node:child_process' import { resolve, dirname } from 'node:path' import { fileURLToPath } from 'node:url' +import { createRequire } from 'node:module' const __dirname = dirname(fileURLToPath(import.meta.url)) const root = resolve(__dirname, '..') +const require = createRequire(import.meta.url) +const tscBin = require.resolve('typescript/bin/tsc') const procs = [ spawn('node', [resolve(__dirname, 'watch-resources.js')], { cwd: root, stdio: 'inherit' }), - spawn(resolve(root, 'node_modules', '.bin', 'tsc'), ['--watch'], { + spawn(process.execPath, [tscBin, '--watch'], { cwd: root, stdio: 'inherit' }) ] diff --git a/scripts/install-hooks.mjs b/scripts/install-hooks.mjs new file mode 100644 index 000000000..dea550585 --- /dev/null +++ b/scripts/install-hooks.mjs @@ -0,0 +1,52 @@ +#!/usr/bin/env node + +import { execFileSync } from 'node:child_process'; +import { chmodSync, existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; + +const MARKER = '# gsd-secret-scan'; + +function git(args) { + return execFileSync('git', args, { + encoding: 'utf8', + shell: process.platform === 'win32', + }).trim(); +} + +const gitDir = git(['rev-parse', '--git-dir']); +const repoRoot = git(['rev-parse', '--show-toplevel']); +const hookDir = join(gitDir, 'hooks'); +const hookFile = join(hookDir, 'pre-commit'); +const hookCommand = `node "${join(repoRoot, 'scripts', 'secret-scan.mjs')}"`; + +mkdirSync(hookDir, { recursive: true }); + +if (existsSync(hookFile)) { + const current = readFileSync(hookFile, 'utf8'); + if (current.includes(MARKER)) { + process.stdout.write('secret-scan pre-commit hook already installed.\n'); + process.exit(0); + } + + const next = `${current.replace(/\s*$/, '\n')}${MARKER}\n${hookCommand}\n`; + writeFileSync(hookFile, next, 'utf8'); + process.stdout.write('secret-scan appended to existing pre-commit hook.\n'); + process.exit(0); +} + +const hookBody = [ + '#!/usr/bin/env sh', + '# gsd-secret-scan', + '# Pre-commit hook: scan staged files for hardcoded secrets', + hookCommand, + '', +].join('\n'); + +writeFileSync(hookFile, hookBody, 'utf8'); +try { + chmodSync(hookFile, 0o755); +} catch { + // Best effort on Windows filesystems that do not honor chmod. +} + +process.stdout.write('secret-scan pre-commit hook installed.\n'); diff --git a/scripts/parallel-monitor.mjs b/scripts/parallel-monitor.mjs index b29109682..e3acd6545 100755 --- a/scripts/parallel-monitor.mjs +++ b/scripts/parallel-monitor.mjs @@ -42,7 +42,7 @@ import fs from 'node:fs'; import path from 'node:path'; -import { execSync } from 'node:child_process'; +import { execSync, spawn, spawnSync } from 'node:child_process'; // ─── Configuration ─────────────────────────────────────────────────────────── @@ -294,7 +294,10 @@ function findGsdLoader() { // 3. Try `which gsd` and resolve symlink try { - const bin = execSync('which gsd', { encoding: 'utf-8', timeout: 3000 }).trim(); + const pathLookup = process.platform === 'win32' ? 'where.exe' : 'which'; + const lookupArgs = ['gsd']; + const result = spawnSync(pathLookup, lookupArgs, { encoding: 'utf-8', timeout: 3000 }); + const bin = result.status === 0 ? result.stdout.trim().split(/\r?\n/)[0]?.trim() : ''; if (bin) { const realBin = fs.realpathSync(bin); const loader = path.resolve(path.dirname(realBin), '..', 'dist', 'loader.js'); @@ -309,7 +312,7 @@ const GSD_LOADER = findGsdLoader(); /** * Respawn a dead worker. Returns the new PID or null on failure. - * Uses nohup + output redirection so the child is fully detached. + * Uses a detached Node child with log file descriptors so the child is fully detached. */ function respawnWorker(mid) { const worktreeDir = path.resolve(PROJECT_ROOT, `.gsd/worktrees/${mid}`); @@ -319,41 +322,37 @@ function respawnWorker(mid) { const stdoutLog = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.stdout.log`); const stderrLog = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.stderr.log`); + let stdoutFd; + let stderrFd; try { - const env = [ - `GSD_MILESTONE_LOCK=${mid}`, - `GSD_PROJECT_ROOT=${PROJECT_ROOT}`, - `GSD_PARALLEL_WORKER=1`, - ].join(' '); - - // Use a shell script written to a temp file to avoid quoting hell - const script = [ - '#!/bin/bash', - `cd "${worktreeDir}"`, - `export GSD_MILESTONE_LOCK=${mid}`, - `export GSD_PROJECT_ROOT="${PROJECT_ROOT}"`, - `export GSD_PARALLEL_WORKER=1`, - `exec node "${GSD_LOADER}" headless --json auto > "${stdoutLog}" 2>> "${stderrLog}"`, - ].join('\n'); - - const scriptPath = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.respawn.sh`); - fs.writeFileSync(scriptPath, script, { mode: 0o755 }); - - // Launch detached via nohup - const result = execSync( - `nohup bash "${scriptPath}" > /dev/null 2>&1 & echo $!`, - { timeout: 5000, encoding: 'utf-8', cwd: worktreeDir } - ).trim(); - - // Clean up the temp script after a delay (process already forked) - setTimeout(() => { - try { fs.unlinkSync(scriptPath); } catch {} - }, 5000); - - const newPid = parseInt(result, 10); - return isNaN(newPid) ? null : newPid; + fs.mkdirSync(path.dirname(stdoutLog), { recursive: true }); + stdoutFd = fs.openSync(stdoutLog, 'a'); + stderrFd = fs.openSync(stderrLog, 'a'); + + const child = spawn(process.execPath, [GSD_LOADER, 'headless', '--json', 'auto'], { + cwd: worktreeDir, + detached: true, + env: { + ...process.env, + GSD_MILESTONE_LOCK: mid, + GSD_PROJECT_ROOT: PROJECT_ROOT, + GSD_PARALLEL_WORKER: '1', + }, + stdio: ['ignore', stdoutFd, stderrFd], + windowsHide: true, + }); + + child.unref(); + return child.pid ?? null; } catch (err) { return null; + } finally { + if (stdoutFd !== undefined) { + try { fs.closeSync(stdoutFd); } catch {} + } + if (stderrFd !== undefined) { + try { fs.closeSync(stderrFd); } catch {} + } } } diff --git a/scripts/pr-risk-check.mjs b/scripts/pr-risk-check.mjs index 18c88e02b..94b61f13b 100644 --- a/scripts/pr-risk-check.mjs +++ b/scripts/pr-risk-check.mjs @@ -20,7 +20,7 @@ import { createInterface } from 'readline'; const __dirname = dirname(fileURLToPath(import.meta.url)); const REPO_ROOT = resolve(__dirname, '..'); -const MAP_PATH = resolve(REPO_ROOT, 'docs/FILE-SYSTEM-MAP.md'); +const MAP_PATH = resolve(REPO_ROOT, 'docs/dev/FILE-SYSTEM-MAP.md'); // --------------------------------------------------------------------------- // Risk tier definitions diff --git a/scripts/prepublish-check.mjs b/scripts/prepublish-check.mjs new file mode 100644 index 000000000..c47cafbbd --- /dev/null +++ b/scripts/prepublish-check.mjs @@ -0,0 +1,19 @@ +#!/usr/bin/env node + +import { spawnSync } from 'node:child_process'; + +if (process.env.CI === 'true' || process.env.CI === '1') { + process.exit(0); +} + +const result = spawnSync('git', ['diff', '--exit-code'], { + stdio: 'inherit', + shell: process.platform === 'win32', +}); + +if (result.status === 0) { + process.exit(0); +} + +process.stderr.write('ERROR: version sync changed files — commit them before publishing\n'); +process.exit(result.status ?? 1); diff --git a/scripts/secret-scan.mjs b/scripts/secret-scan.mjs new file mode 100644 index 000000000..e8f1a5f79 --- /dev/null +++ b/scripts/secret-scan.mjs @@ -0,0 +1,184 @@ +#!/usr/bin/env node + +import { execFileSync } from 'node:child_process'; +import { existsSync, readFileSync } from 'node:fs'; + +const RED = '\x1b[0;31m'; +const YELLOW = '\x1b[1;33m'; +const NC = '\x1b[0m'; +const IGNORE_FILE = '.secretscanignore'; + +const PATTERNS = [ + { label: 'AWS Access Key', regex: /AKIA[0-9A-Z]{16}/g }, + { label: 'Generic API Key', regex: /(api[_-]?key|apikey|api[_-]?secret)[ \t]*[:=][ \t]*['"][0-9a-zA-Z_./-]{20,}['"]/gi }, + { label: 'Generic Secret', regex: /(secret|token|password|passwd|pwd|credential)[ \t]*[:=][ \t]*['"][^\s'"]{8,}['"]/gi }, + { label: 'Authorization Header', regex: /(authorization|bearer)[ \t]*[:=][ \t]*['"][^\s'"]{8,}['"]/gi }, + { label: 'Private Key', regex: /-----BEGIN\s+(RSA|DSA|EC|OPENSSH|PGP)\s+PRIVATE\s+KEY-----/g }, + { label: 'Database URL', regex: /(mysql|postgres|postgresql|mongodb|redis|amqp|mssql):\/\/[^\s'"]{8,}/gi }, + { label: 'GitHub Token', regex: /gh[pousr]_[0-9a-zA-Z]{36,}/g }, + { label: 'GitLab Token', regex: /glpat-[0-9a-zA-Z-]{20,}/g }, + { label: 'Slack Token', regex: /xox[baprs]-[0-9a-zA-Z-]{10,}/g }, + { label: 'Slack Webhook', regex: /hooks\.slack\.com\/services\/T[0-9A-Z]{8,}\/B[0-9A-Z]{8,}\/[0-9a-zA-Z]{20,}/g }, + { label: 'Google API Key', regex: /AIza[0-9A-Za-z_-]{35}/g }, + { label: 'Stripe Key', regex: /[sr]k_(live|test)_[0-9a-zA-Z]{20,}/g }, + { label: 'npm Token', regex: /npm_[0-9a-zA-Z]{36,}/g }, + { label: 'Hex Secret', regex: /(secret|key|token|password)[ \t]*[:=][ \t]*['"]?[0-9a-f]{32,}['"]?/gi }, + { label: 'Hardcoded Password', regex: /password[ \t]*[:=][ \t]*['"][^'"]{4,}['"]/gi }, +]; + +function runGit(args) { + try { + return execFileSync('git', args, { + encoding: 'utf8', + shell: process.platform === 'win32', + stdio: ['ignore', 'pipe', 'ignore'], + }); + } catch { + return ''; + } +} + +function parseArgs(argv) { + if (argv[0] === '--diff') { + return { mode: 'diff', ref: argv[1] || 'HEAD' }; + } + if (argv[0] === '--file') { + return { mode: 'file', file: argv[1] || '' }; + } + return { mode: 'staged' }; +} + +function getFiles(options) { + if (options.mode === 'diff') { + return runGit(['diff', '--name-only', '--diff-filter=ACMR', options.ref]); + } + if (options.mode === 'file') { + return options.file; + } + return runGit(['diff', '--cached', '--name-only', '--diff-filter=ACMR']); +} + +function shouldScan(file) { + const lower = file.toLowerCase(); + const skippedExtensions = [ + '.png', '.jpg', '.jpeg', '.gif', '.ico', '.svg', '.woff', '.woff2', '.ttf', '.eot', + '.zip', '.tar', '.gz', '.tgz', '.bz2', '.7z', '.rar', '.exe', '.dll', '.so', '.dylib', + '.o', '.a', '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.lock', '.map', '.node', '.wasm', + ]; + if (skippedExtensions.some((ext) => lower.endsWith(ext))) return false; + if ( + lower === '.secretscanignore' || + lower === '.gitignore' || + lower === '.gitattributes' || + lower.startsWith('license') || + lower.startsWith('changelog') || + lower.endsWith('.md') || + lower === 'package-lock.json' || + lower === 'pnpm-lock.yaml' || + lower === 'bun.lock' + ) { + return false; + } + if ( + lower.startsWith('node_modules/') || + lower.startsWith('dist/') || + lower.startsWith('coverage/') || + lower.startsWith('.gsd/') + ) { + return false; + } + if (lower.endsWith('.min.js') || lower.endsWith('.min.css')) return false; + return true; +} + +function getContent(file, mode) { + if (mode === 'staged') { + const staged = runGit(['show', `:${file}`]); + if (staged) return staged; + } + try { + return readFileSync(file, 'utf8'); + } catch { + return ''; + } +} + +function loadIgnorePatterns() { + if (!existsSync(IGNORE_FILE)) return []; + return readFileSync(IGNORE_FILE, 'utf8') + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => line && !line.startsWith('#')); +} + +function isIgnored(file, lineContent, ignorePatterns) { + return ignorePatterns.some((pattern) => { + const splitIndex = pattern.indexOf(':'); + if (splitIndex > 0) { + const ignoreFile = pattern.slice(0, splitIndex); + const ignoreRegex = pattern.slice(splitIndex + 1); + if (file !== ignoreFile) return false; + try { + return new RegExp(ignoreRegex, 'i').test(lineContent); + } catch { + return false; + } + } + + try { + return new RegExp(pattern, 'i').test(lineContent); + } catch { + return false; + } + }); +} + +function resetRegex(regex) { + regex.lastIndex = 0; + return regex; +} + +const options = parseArgs(process.argv.slice(2)); +const files = getFiles(options) + .split(/\r?\n/) + .map((file) => file.trim()) + .filter(Boolean); + +if (files.length === 0) { + process.stdout.write('secret-scan: no files to scan\n'); + process.exit(0); +} + +const ignorePatterns = loadIgnorePatterns(); +let findings = 0; + +for (const file of files) { + if (!shouldScan(file)) continue; + const content = getContent(file, options.mode); + if (!content) continue; + + const lines = content.split(/\r?\n/); + for (let lineIndex = 0; lineIndex < lines.length; lineIndex++) { + const line = lines[lineIndex]; + for (const pattern of PATTERNS) { + if (!resetRegex(pattern.regex).test(line)) continue; + if (isIgnored(file, line, ignorePatterns)) continue; + + process.stdout.write(`${RED}[SECRET DETECTED]${NC} ${YELLOW}${pattern.label}${NC}\n`); + process.stdout.write(` File: ${file}:${lineIndex + 1}\n`); + process.stdout.write(` Line: ${line.slice(0, 120)}...\n\n`); + findings++; + } + } +} + +if (findings > 0) { + process.stdout.write(`${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n`); + process.stdout.write(`${RED}Found ${findings} potential secret(s) in scanned files.${NC}\n`); + process.stdout.write(`${RED}Commit blocked. Remove the secrets or add exceptions${NC}\n`); + process.stdout.write(`${RED}to .secretscanignore if these are false positives.${NC}\n`); + process.stdout.write(`${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n`); + process.exit(1); +} + +process.stdout.write('secret-scan: no secrets detected ✓\n'); diff --git a/scripts/validate-pack.js b/scripts/validate-pack.js index e4bbe6277..b35bc1b5a 100644 --- a/scripts/validate-pack.js +++ b/scripts/validate-pack.js @@ -3,8 +3,8 @@ // Usage: npm run validate-pack (or node scripts/validate-pack.js) // Exit 0 = safe to publish, Exit 1 = broken package. -import { execSync } from 'node:child_process'; -import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { execFileSync } from 'node:child_process'; +import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, statSync, writeFileSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { dirname, join, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; @@ -15,8 +15,38 @@ const ROOT = resolve(__dirname, '..'); let tarball = null; let installDir = null; +let npmCacheDir = null; +const DEFAULT_MAX_BUFFER = 50 * 1024 * 1024; + +function getNpmCommand() { + return process.platform === 'win32' ? 'npm.cmd' : 'npm'; +} + +function runNpm(args, options = {}) { + return execFileSync(getNpmCommand(), args, { + cwd: ROOT, + encoding: 'utf8', + shell: process.platform === 'win32', + stdio: ['pipe', 'pipe', 'pipe'], + maxBuffer: DEFAULT_MAX_BUFFER, + env: { + ...process.env, + npm_config_cache: npmCacheDir ?? process.env.npm_config_cache, + }, + ...options, + }); +} + +function formatBytes(bytes) { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; +} try { + npmCacheDir = mkdtempSync(join(tmpdir(), 'validate-pack-npm-cache-')); + mkdirSync(npmCacheDir, { recursive: true }); + // --- Guard: workspace packages must not have @gsd/* cross-deps --- console.log('==> Checking workspace packages for @gsd/* cross-deps...'); const workspaces = ['native', 'pi-agent-core', 'pi-ai', 'pi-coding-agent', 'pi-tui']; @@ -42,12 +72,10 @@ try { // --- Pack tarball --- console.log('==> Packing tarball...'); - const packOutput = execSync('npm pack --ignore-scripts', { - cwd: ROOT, - encoding: 'utf8', - stdio: ['pipe', 'pipe', 'pipe'], - }); - const tarballName = packOutput.trim().split('\n').pop(); + const packOutput = runNpm(['pack', '--json', '--ignore-scripts']); + const packEntries = JSON.parse(packOutput); + const packEntry = Array.isArray(packEntries) ? packEntries[0] : null; + const tarballName = packEntry?.filename; tarball = join(ROOT, tarballName); if (!existsSync(tarball)) { @@ -55,12 +83,16 @@ try { process.exit(1); } - const stats = execSync(`du -h "${tarball}"`, { encoding: 'utf8' }).split('\t')[0].trim(); - console.log(`==> Tarball: ${tarballName} (${stats} compressed)`); + const stats = statSync(tarball); + console.log(`==> Tarball: ${tarballName} (${formatBytes(stats.size)} compressed)`); - // --- Check critical files using tar listing --- + // --- Check critical files using npm pack metadata --- console.log('==> Checking critical files...'); - const tarList = execSync(`tar tzf "${tarball}"`, { encoding: 'utf8', maxBuffer: 50 * 1024 * 1024 }); + const packedFiles = new Set( + Array.isArray(packEntry?.files) + ? packEntry.files.map((entry) => entry?.path).filter(Boolean) + : [], + ); const requiredFiles = [ 'dist/loader.js', @@ -73,7 +105,7 @@ try { let missing = false; for (const required of requiredFiles) { - if (!tarList.includes(`package/${required}`)) { + if (!packedFiles.has(required)) { console.log(` MISSING: ${required}`); missing = true; } @@ -91,10 +123,16 @@ try { writeFileSync(join(installDir, 'package.json'), JSON.stringify({ name: 'test-install', version: '1.0.0', private: true }, null, 2)); try { - const installOutput = execSync(`npm install "${tarball}"`, { + const installOutput = execFileSync(getNpmCommand(), ['install', tarball], { cwd: installDir, encoding: 'utf8', + shell: process.platform === 'win32', stdio: ['pipe', 'pipe', 'pipe'], + maxBuffer: DEFAULT_MAX_BUFFER, + env: { + ...process.env, + npm_config_cache: npmCacheDir, + }, }); console.log(installOutput); console.log('==> Install succeeded.'); @@ -145,11 +183,12 @@ try { process.exit(1); } try { - const versionOutput = execSync(`node "${loaderPath}" -v`, { + const versionOutput = execFileSync(process.execPath, [loaderPath, '-v'], { cwd: installDir, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 15000, + maxBuffer: DEFAULT_MAX_BUFFER, }).trim(); console.log(` gsd -v => ${versionOutput}`); if (!versionOutput.match(/^\d+\.\d+\.\d+/)) { @@ -173,4 +212,7 @@ try { if (tarball && existsSync(tarball)) { rmSync(tarball, { force: true }); } + if (npmCacheDir && existsSync(npmCacheDir)) { + rmSync(npmCacheDir, { recursive: true, force: true }); + } } diff --git a/scripts/with-env.mjs b/scripts/with-env.mjs new file mode 100644 index 000000000..a338ffb3f --- /dev/null +++ b/scripts/with-env.mjs @@ -0,0 +1,46 @@ +#!/usr/bin/env node + +import { spawn } from 'node:child_process'; + +const args = process.argv.slice(2); +const env = { ...process.env }; + +let separatorIndex = args.indexOf('--'); +let commandStart = separatorIndex >= 0 ? separatorIndex + 1 : 0; + +for (let i = 0; i < (separatorIndex >= 0 ? separatorIndex : args.length); i++) { + const arg = args[i]; + const eq = arg.indexOf('='); + if (eq <= 0) { + commandStart = i; + separatorIndex = -1; + break; + } + env[arg.slice(0, eq)] = arg.slice(eq + 1); +} + +const commandArgs = args.slice(commandStart); +if (commandArgs.length === 0) { + process.stderr.write('with-env: expected a command after environment assignments\n'); + process.exit(1); +} + +const [command, ...childArgs] = commandArgs; +const child = spawn(command, childArgs, { + stdio: 'inherit', + env, + shell: process.platform === 'win32', +}); + +child.on('exit', (code, signal) => { + if (signal) { + process.kill(process.pid, signal); + return; + } + process.exit(code ?? 0); +}); + +child.on('error', (error) => { + process.stderr.write(`with-env: failed to run ${command}: ${error.message}\n`); + process.exit(1); +}); diff --git a/src/cli.ts b/src/cli.ts index 5009f23b7..08e1e0452 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -18,6 +18,7 @@ import { ensureManagedTools } from './tool-bootstrap.js' import { loadStoredEnvKeys } from './wizard.js' import { migratePiCredentials } from './pi-migration.js' import { validateConfiguredModel } from './startup-model-validation.js' +import { shouldMigrateAnthropicToClaudeCode } from './provider-migrations.js' import { shouldRunOnboarding, runOnboarding } from './onboarding.js' import chalk from 'chalk' import { checkForUpdates } from './update-check.js' @@ -341,7 +342,7 @@ const modelsJsonPath = resolveModelsJsonPath() const modelRegistry = new ModelRegistry(authStorage, modelsJsonPath) markStartup('ModelRegistry') -const settingsManager = SettingsManager.create(agentDir) +const settingsManager = SettingsManager.create(process.cwd(), agentDir) applySecurityOverrides(settingsManager) markStartup('SettingsManager.create') @@ -470,7 +471,11 @@ if (isPrintMode) { // Migrate anthropic OAuth users to claude-code provider when CLI is available (#3772). // Anthropic blocks third-party apps from using subscription quotas — routing through // the local claude CLI binary is TOS-compliant. - if (modelRegistry.isProviderRequestReady('claude-code') && settingsManager.getDefaultProvider() === 'anthropic') { + if (shouldMigrateAnthropicToClaudeCode({ + authStorage, + isClaudeCodeReady: modelRegistry.isProviderRequestReady('claude-code'), + defaultProvider: settingsManager.getDefaultProvider(), + })) { const currentModelId = settingsManager.getDefaultModel() if (currentModelId) { const ccModel = modelRegistry.find('claude-code', currentModelId) @@ -662,7 +667,11 @@ markStartup('createAgentSession') // Migrate anthropic OAuth users to claude-code provider when CLI is available (#3772). // Anthropic blocks third-party apps from using subscription quotas — routing through // the local claude CLI binary is TOS-compliant. -if (modelRegistry.isProviderRequestReady('claude-code') && settingsManager.getDefaultProvider() === 'anthropic') { +if (shouldMigrateAnthropicToClaudeCode({ + authStorage, + isClaudeCodeReady: modelRegistry.isProviderRequestReady('claude-code'), + defaultProvider: settingsManager.getDefaultProvider(), +})) { const currentModelId = settingsManager.getDefaultModel() if (currentModelId) { const ccModel = modelRegistry.find('claude-code', currentModelId) diff --git a/src/loader.ts b/src/loader.ts index 1d3ce46a2..13e1605b4 100644 --- a/src/loader.ts +++ b/src/loader.ts @@ -110,6 +110,11 @@ if (!existsSync(appRoot)) { // GSD_CODING_AGENT_DIR — tells pi's getAgentDir() to return ~/.gsd/agent/ instead of ~/.gsd/agent/ process.env.GSD_CODING_AGENT_DIR = agentDir +// GSD_PKG_ROOT — absolute path to gsd-pi package root. Used by deployed extensions +// (e.g. auto.ts resume path) to import modules like resource-loader.js that live +// in the package tree, not in the deployed ~/.gsd/agent/ tree. +process.env.GSD_PKG_ROOT = gsdRoot + // RTK environment — make ~/.gsd/agent/bin visible to all child-process paths, // not just the bash tool, and force-disable RTK telemetry for GSD-managed use. applyRtkProcessEnv(process.env) diff --git a/src/mcp-server.ts b/src/mcp-server.ts index f7417235e..7486f60fa 100644 --- a/src/mcp-server.ts +++ b/src/mcp-server.ts @@ -19,10 +19,10 @@ export interface McpToolDef { // MCP SDK subpath imports use wildcard exports (./*) that NodeNext resolves // at runtime but TypeScript cannot statically type-check. We construct the // specifiers dynamically so tsc treats them as `any`. -// Use createRequire to resolve wildcard subpaths — CJS resolver auto-appends -// .js, which the ESM wildcard export map does not (#3603). -import { createRequire } from 'node:module' -const _require = createRequire(import.meta.url) +// +// Use explicit .js subpaths for modules that are loaded dynamically at runtime. +// Recent Node / SDK combinations do not reliably resolve the extensionless +// wildcard targets for `server/stdio` and `types` (#3914). const MCP_PKG = '@modelcontextprotocol/sdk' /** @@ -46,8 +46,8 @@ export async function startMcpServer(options: { const { tools, version = '0.0.0' } = options const serverMod = await import(`${MCP_PKG}/server`) - const stdioMod = await import(_require.resolve(`${MCP_PKG}/server/stdio`)) - const typesMod = await import(_require.resolve(`${MCP_PKG}/types`)) + const stdioMod = await import(`${MCP_PKG}/server/stdio.js`) + const typesMod = await import(`${MCP_PKG}/types.js`) const Server = serverMod.Server const StdioServerTransport = stdioMod.StdioServerTransport diff --git a/src/provider-migrations.ts b/src/provider-migrations.ts new file mode 100644 index 000000000..1e61c69df --- /dev/null +++ b/src/provider-migrations.ts @@ -0,0 +1,34 @@ +import type { AuthStorage } from "@gsd/pi-coding-agent" + +type AnthropicMigrationDeps = { + authStorage: Pick + isClaudeCodeReady: boolean + defaultProvider: string | undefined + env?: NodeJS.ProcessEnv +} + +export function hasDirectAnthropicApiKey( + authStorage: Pick, + env: NodeJS.ProcessEnv = process.env, +): boolean { + if ((env.ANTHROPIC_API_KEY ?? "").trim()) { + return true + } + + return authStorage.getCredentialsForProvider("anthropic").some((credential: any) => + credential?.type === "api_key" && typeof credential?.key === "string" && credential.key.trim().length > 0, + ) +} + +export function shouldMigrateAnthropicToClaudeCode({ + authStorage, + isClaudeCodeReady, + defaultProvider, + env = process.env, +}: AnthropicMigrationDeps): boolean { + if (!isClaudeCodeReady || defaultProvider !== "anthropic") { + return false + } + + return !hasDirectAnthropicApiKey(authStorage, env) +} diff --git a/src/resource-loader.ts b/src/resource-loader.ts index 901d8e1b1..1309578e2 100644 --- a/src/resource-loader.ts +++ b/src/resource-loader.ts @@ -2,7 +2,7 @@ import { DefaultResourceLoader, sortExtensionPaths } from '@gsd/pi-coding-agent' import { createHash } from 'node:crypto' import { homedir } from 'node:os' import { chmodSync, copyFileSync, cpSync, existsSync, lstatSync, mkdirSync, openSync, closeSync, readFileSync, readlinkSync, readdirSync, rmSync, statSync, symlinkSync, unlinkSync, writeFileSync } from 'node:fs' -import { dirname, join, relative, resolve } from 'node:path' +import { basename, dirname, join, relative, resolve } from 'node:path' import { fileURLToPath } from 'node:url' import { compareSemver } from './update-check.js' import { discoverExtensionEntryPaths } from './extension-discovery.js' @@ -287,33 +287,144 @@ function copyDirRecursive(src: string, dest: string): void { * ~/.gsd/agent/extensions/ have no ancestor node_modules, so imports of * @gsd/* packages fail. The symlink makes Node's standard resolution find * them without requiring every call site to use jiti. + * + * Layout differences by install method: + * - Source/monorepo: packageRoot/node_modules has everything → simple symlink + * - npm/bun global: deps hoisted to dirname(packageRoot), including @gsd/* → simple symlink + * - pnpm global: external deps hoisted, but @gsd/* stays in packageRoot/node_modules + * → merged directory with symlinks from both roots (#3529, #3564) */ function ensureNodeModulesSymlink(agentDir: string): void { const agentNodeModules = join(agentDir, 'node_modules') - const gsdNodeModules = join(packageRoot, 'node_modules') + const internalNodeModules = join(packageRoot, 'node_modules') + const hoistedNodeModules = dirname(packageRoot) + const isGlobalInstall = basename(hoistedNodeModules) === 'node_modules' + if (!isGlobalInstall) { + // Source/monorepo: internal node_modules has everything + reconcileSymlink(agentNodeModules, internalNodeModules) + return + } + + // Global install: check if workspace scopes (@gsd/*) are hoisted. + // npm/bun hoist everything; pnpm keeps workspace packages internal. + if (!hasMissingWorkspaceScopes(hoistedNodeModules, internalNodeModules)) { + // Everything is hoisted — simple symlink to parent node_modules + reconcileSymlink(agentNodeModules, hoistedNodeModules) + return + } + + // pnpm-style layout: create a real directory merging both roots + reconcileMergedNodeModules(agentNodeModules, hoistedNodeModules, internalNodeModules) +} + +/** Check if any @gsd* scopes exist in internal but not in hoisted node_modules */ +function hasMissingWorkspaceScopes(hoisted: string, internal: string): boolean { + if (!existsSync(internal)) return false try { - const stat = lstatSync(agentNodeModules) + for (const entry of readdirSync(internal, { withFileTypes: true })) { + if (entry.isDirectory() && entry.name.startsWith('@gsd') && + !existsSync(join(hoisted, entry.name))) { + return true + } + } + } catch { /* non-fatal */ } + return false +} +/** Ensure a symlink at `link` points to `target`, fixing stale/wrong entries */ +function reconcileSymlink(link: string, target: string): void { + try { + const stat = lstatSync(link) if (stat.isSymbolicLink()) { - const existing = readlinkSync(agentNodeModules) - // Symlink exists — verify it points to the correct, existing target - if (existing === gsdNodeModules && existsSync(agentNodeModules)) return // correct and target exists - // Stale or wrong target — remove and recreate - unlinkSync(agentNodeModules) + const existing = readlinkSync(link) + if (existing === target && existsSync(link)) return // correct and target exists + unlinkSync(link) } else { - // Real directory (not a symlink) is blocking — remove it - rmSync(agentNodeModules, { recursive: true, force: true }) + // Real directory (or merged dir from previous pnpm fix) — remove it + rmSync(link, { recursive: true, force: true }) } } catch { - // lstatSync throws if path doesn't exist — that's fine, we'll create below + // lstatSync throws if path doesn't exist — fine, we'll create below } try { - symlinkSync(gsdNodeModules, agentNodeModules, 'junction') + symlinkSync(target, link, 'junction') } catch (err) { - // This failure makes GSD non-functional — extensions can't resolve @gsd/* packages - console.error(`[gsd] WARN: Failed to symlink ${agentNodeModules} → ${gsdNodeModules}: ${err instanceof Error ? err.message : err}`) + console.error(`[gsd] WARN: Failed to symlink ${link} → ${target}: ${err instanceof Error ? err.message : err}`) + } +} + +/** + * Create a real node_modules directory containing symlinks from both the + * hoisted root (external deps) and internal root (@gsd/* workspace packages). + * Used for pnpm global installs where @gsd/* isn't hoisted. + */ +function reconcileMergedNodeModules( + agentNodeModules: string, + hoisted: string, + internal: string, +): void { + // Fast path: if already merged for this packageRoot + same directory contents, skip. + // The fingerprint includes entry names from both roots so `pnpm add/remove` triggers rebuild. + const marker = join(agentNodeModules, '.gsd-merged') + const fingerprint = mergedFingerprint(hoisted, internal) + try { + if (existsSync(marker) && readFileSync(marker, 'utf-8').trim() === fingerprint) return + } catch { /* rebuild */ } + + // Remove any existing symlink or stale merged directory + try { + const stat = lstatSync(agentNodeModules) + if (stat.isSymbolicLink()) { + unlinkSync(agentNodeModules) + } else { + rmSync(agentNodeModules, { recursive: true, force: true }) + } + } catch { /* doesn't exist */ } + + mkdirSync(agentNodeModules, { recursive: true }) + + let linkedCount = 0 + + // Symlink entries from the hoisted node_modules (external deps) + try { + for (const entry of readdirSync(hoisted, { withFileTypes: true })) { + // Skip the gsd-pi package itself and dotfiles + if (entry.name === basename(packageRoot)) continue + if (entry.name.startsWith('.')) continue + try { symlinkSync(join(hoisted, entry.name), join(agentNodeModules, entry.name)); linkedCount++ } catch { /* skip individual */ } + } + } catch (err) { + console.error(`[gsd] WARN: Failed to read hoisted node_modules at ${hoisted}: ${err instanceof Error ? err.message : err}`) + } + + // Overlay @gsd* workspace scopes from internal node_modules + try { + for (const entry of readdirSync(internal, { withFileTypes: true })) { + if (!entry.name.startsWith('@gsd')) continue + const link = join(agentNodeModules, entry.name) + try { lstatSync(link); unlinkSync(link) } catch { /* didn't exist */ } + try { symlinkSync(join(internal, entry.name), link); linkedCount++ } catch { /* skip individual */ } + } + } catch (err) { + console.error(`[gsd] WARN: Failed to read internal node_modules at ${internal}: ${err instanceof Error ? err.message : err}`) + } + + // Only stamp marker if we actually linked something — avoids caching a broken state + if (linkedCount > 0) { + try { writeFileSync(marker, fingerprint) } catch { /* non-fatal */ } + } +} + +/** Build a cache fingerprint from packageRoot + sorted entry names of both directories */ +function mergedFingerprint(hoisted: string, internal: string): string { + try { + const h = readdirSync(hoisted).sort().join(',') + const i = readdirSync(internal).sort().join(',') + return `${packageRoot}\n${h}\n${i}` + } catch { + return packageRoot // fallback: at least invalidate on version change } } diff --git a/src/resources/extensions/claude-code-cli/stream-adapter.ts b/src/resources/extensions/claude-code-cli/stream-adapter.ts index 465d48759..a6efa439a 100644 --- a/src/resources/extensions/claude-code-cli/stream-adapter.ts +++ b/src/resources/extensions/claude-code-cli/stream-adapter.ts @@ -16,10 +16,12 @@ import type { SimpleStreamOptions, ToolCall, } from "@gsd/pi-ai"; +import type { ExtensionUIContext } from "@gsd/pi-coding-agent"; import { EventStream } from "@gsd/pi-ai"; import { execSync } from "node:child_process"; import { PartialMessageBuilder, ZERO_USAGE, mapUsage } from "./partial-builder.js"; import { buildWorkflowMcpServers } from "../gsd/workflow-mcp.js"; +import { showInterviewRound, type Question, type RoundResult } from "../shared/tui.js"; import type { SDKAssistantMessage, SDKMessage, @@ -45,6 +47,58 @@ type ToolCallWithExternalResult = ToolCall & { externalResult?: ExternalToolResultPayload; }; +interface ClaudeCodeStreamOptions extends SimpleStreamOptions { + extensionUIContext?: ExtensionUIContext; +} + +interface SdkElicitationRequestOption { + const?: string; + title?: string; +} + +interface SdkElicitationFieldSchema { + type?: string; + title?: string; + description?: string; + format?: string; + writeOnly?: boolean; + oneOf?: SdkElicitationRequestOption[]; + items?: { + anyOf?: SdkElicitationRequestOption[]; + }; +} + +interface SdkElicitationRequest { + serverName: string; + message: string; + mode?: "form" | "url"; + requestedSchema?: { + type?: string; + properties?: Record; + required?: string[]; + }; +} + +interface SdkElicitationResult { + action: "accept" | "decline" | "cancel"; + content?: Record; +} + +interface ParsedElicitationQuestion extends Question { + noteFieldId?: string; +} + +interface ParsedTextInputField { + id: string; + title: string; + description: string; + required: boolean; + secure: boolean; +} + +const OTHER_OPTION_LABEL = "None of the above"; +const SENSITIVE_FIELD_PATTERN = /(password|passphrase|secret|token|api[_\s-]*key|private[_\s-]*key|credential)/i; + // --------------------------------------------------------------------------- // Stream factory // --------------------------------------------------------------------------- @@ -172,6 +226,286 @@ export function makeStreamExhaustedErrorMessage(model: string, lastTextContent: return message; } +function readElicitationChoices(options: SdkElicitationRequestOption[] | undefined): string[] { + if (!Array.isArray(options)) return []; + return options + .map((option) => (typeof option?.const === "string" ? option.const : typeof option?.title === "string" ? option.title : "")) + .filter((option): option is string => option.length > 0); +} + +export function parseAskUserQuestionsElicitation( + request: Pick, +): ParsedElicitationQuestion[] | null { + if (request.mode && request.mode !== "form") return null; + const properties = request.requestedSchema?.properties; + if (!properties || typeof properties !== "object") return null; + + const questions: ParsedElicitationQuestion[] = []; + + for (const [fieldId, rawField] of Object.entries(properties)) { + if (fieldId.endsWith("__note")) continue; + if (!rawField || typeof rawField !== "object") return null; + + const header = typeof rawField.title === "string" && rawField.title.length > 0 ? rawField.title : fieldId; + const question = typeof rawField.description === "string" ? rawField.description : ""; + + if (rawField.type === "array") { + const options = readElicitationChoices(rawField.items?.anyOf).map((label) => ({ label, description: "" })); + if (options.length === 0) return null; + questions.push({ + id: fieldId, + header, + question, + options, + allowMultiple: true, + }); + continue; + } + + if (rawField.type === "string") { + const noteFieldId = Object.prototype.hasOwnProperty.call(properties, `${fieldId}__note`) + ? `${fieldId}__note` + : undefined; + const options = readElicitationChoices(rawField.oneOf) + .filter((label) => label !== OTHER_OPTION_LABEL) + .map((label) => ({ label, description: "" })); + if (options.length === 0) return null; + questions.push({ + id: fieldId, + header, + question, + options, + noteFieldId, + }); + continue; + } + + return null; + } + + return questions.length > 0 ? questions : null; +} + +function isSecureElicitationField( + requestMessage: string, + fieldId: string, + field: SdkElicitationFieldSchema, +): boolean { + if (field.format === "password") return true; + if (field.writeOnly === true) return true; + + const rawField = field as Record; + if (rawField.sensitive === true || rawField["x-sensitive"] === true) return true; + + const haystack = [ + requestMessage, + fieldId.replace(/[_-]+/g, " "), + typeof field.title === "string" ? field.title : "", + typeof field.description === "string" ? field.description : "", + ] + .join(" ") + .toLowerCase(); + + return SENSITIVE_FIELD_PATTERN.test(haystack); +} + +export function parseTextInputElicitation( + request: Pick, +): ParsedTextInputField[] | null { + if (request.mode && request.mode !== "form") return null; + const schema = request.requestedSchema as + | ({ properties?: Record; keys?: Record } & Record) + | undefined; + const fieldsSource = schema?.properties && typeof schema.properties === "object" + ? schema.properties + : schema?.keys && typeof schema.keys === "object" + ? schema.keys + : undefined; + if (!fieldsSource) return null; + + const requiredSet = new Set( + Array.isArray(request.requestedSchema?.required) + ? request.requestedSchema.required.filter((value): value is string => typeof value === "string") + : [], + ); + + const fields: ParsedTextInputField[] = []; + for (const [fieldId, field] of Object.entries(fieldsSource)) { + if (!field || typeof field !== "object") continue; + if (field.type !== "string") continue; + if (Array.isArray(field.oneOf) && field.oneOf.length > 0) continue; + + fields.push({ + id: fieldId, + title: typeof field.title === "string" && field.title.length > 0 ? field.title : fieldId, + description: typeof field.description === "string" ? field.description : "", + required: requiredSet.has(fieldId), + secure: isSecureElicitationField(request.message, fieldId, field), + }); + } + + return fields.length > 0 ? fields : null; +} + +export function roundResultToElicitationContent( + questions: ParsedElicitationQuestion[], + result: RoundResult, +): Record { + const content: Record = {}; + + for (const question of questions) { + const answer = result.answers[question.id]; + if (!answer) continue; + + if (question.allowMultiple) { + const selected = Array.isArray(answer.selected) ? answer.selected : [answer.selected]; + content[question.id] = selected; + continue; + } + + const selected = Array.isArray(answer.selected) ? answer.selected[0] ?? "" : answer.selected; + content[question.id] = selected; + if (question.noteFieldId && selected === OTHER_OPTION_LABEL && answer.notes.trim().length > 0) { + content[question.noteFieldId] = answer.notes.trim(); + } + } + + return content; +} + +function buildElicitationPromptTitle(request: SdkElicitationRequest, question: ParsedElicitationQuestion): string { + const parts = [ + request.serverName ? `[${request.serverName}]` : "", + question.header, + question.question, + ].filter((part) => part && part.trim().length > 0); + return parts.join("\n\n"); +} + +async function promptElicitationWithDialogs( + request: SdkElicitationRequest, + questions: ParsedElicitationQuestion[], + ui: ExtensionUIContext, + signal: AbortSignal, +): Promise { + const content: Record = {}; + + for (const question of questions) { + const title = buildElicitationPromptTitle(request, question); + + if (question.allowMultiple) { + const selected = await ui.select(title, question.options.map((option) => option.label), { + allowMultiple: true, + signal, + }); + if (Array.isArray(selected)) { + if (selected.length === 0) return { action: "cancel" }; + content[question.id] = selected; + continue; + } + if (typeof selected === "string" && selected.length > 0) { + content[question.id] = [selected]; + continue; + } + return { action: "cancel" }; + } + + const selected = await ui.select(title, [...question.options.map((option) => option.label), OTHER_OPTION_LABEL], { signal }); + if (typeof selected !== "string" || selected.length === 0) { + return { action: "cancel" }; + } + + content[question.id] = selected; + if (question.noteFieldId && selected === OTHER_OPTION_LABEL) { + const note = await ui.input(`${question.header} note`, "Explain your answer", { signal }); + if (note === undefined) return { action: "cancel" }; + if (note.trim().length > 0) { + content[question.noteFieldId] = note.trim(); + } + } + } + + return { action: "accept", content }; +} + +function buildTextInputPromptTitle(request: SdkElicitationRequest, field: ParsedTextInputField): string { + const parts = [ + request.serverName ? `[${request.serverName}]` : "", + field.title, + field.description, + ].filter((part) => typeof part === "string" && part.trim().length > 0); + return parts.join("\n\n"); +} + +function buildTextInputPlaceholder(field: ParsedTextInputField): string | undefined { + const desc = field.description.trim(); + if (!desc) return field.required ? "Required" : "Leave empty to skip"; + + const formatLine = desc + .split(/\r?\n/) + .map((line) => line.trim()) + .find((line) => /^format:/i.test(line)); + + if (!formatLine) return field.required ? "Required" : "Leave empty to skip"; + const hint = formatLine.replace(/^format:\s*/i, "").trim(); + return hint.length > 0 ? hint : field.required ? "Required" : "Leave empty to skip"; +} + +async function promptTextInputElicitation( + request: SdkElicitationRequest, + fields: ParsedTextInputField[], + ui: ExtensionUIContext, + signal: AbortSignal, +): Promise { + const content: Record = {}; + + for (const field of fields) { + const value = await ui.input( + buildTextInputPromptTitle(request, field), + buildTextInputPlaceholder(field), + { signal, ...(field.secure ? { secure: true } : {}) }, + ); + if (value === undefined) { + return { action: "cancel" }; + } + content[field.id] = value; + } + + return { action: "accept", content }; +} + +export function createClaudeCodeElicitationHandler( + ui: ExtensionUIContext | undefined, +): ((request: SdkElicitationRequest, options: { signal: AbortSignal }) => Promise) | undefined { + if (!ui) return undefined; + + return async (request, { signal }) => { + if (request.mode === "url") { + return { action: "decline" }; + } + + const questions = parseAskUserQuestionsElicitation(request); + if (questions) { + const interviewResult = await showInterviewRound(questions, { signal }, { ui } as any).catch(() => undefined); + if (interviewResult && Object.keys(interviewResult.answers).length > 0) { + return { + action: "accept", + content: roundResultToElicitationContent(questions, interviewResult), + }; + } + + return promptElicitationWithDialogs(request, questions, ui, signal); + } + + const textFields = parseTextInputElicitation(request); + if (textFields) { + return promptTextInputElicitation(request, textFields, ui, signal); + } + + return { action: "decline" }; + }; +} + // --------------------------------------------------------------------------- // SDK options builder // --------------------------------------------------------------------------- @@ -182,8 +516,13 @@ export function makeStreamExhaustedErrorMessage(model: string, lastTextContent: * Extracted for testability — callers can verify session persistence, * beta flags, and other configuration without mocking the full SDK. */ -export function buildSdkOptions(modelId: string, prompt: string): Record { +export function buildSdkOptions( + modelId: string, + prompt: string, + extraOptions: Record = {}, +): Record { const mcpServers = buildWorkflowMcpServers(); + const disallowedTools = ["AskUserQuestion"]; return { pathToClaudeCodeExecutable: getClaudePath(), model: modelId, @@ -194,8 +533,10 @@ export function buildSdkOptions(modelId: string, prompt: string): Record, ): void { - for (const block of toolCalls) { - if (block.type !== "toolCall") continue; + for (const block of toolBlocks) { + if (block.type !== "toolCall" && block.type !== "serverToolUse") continue; const externalResult = toolResultsById.get(block.id); if (!externalResult) continue; - (block as ToolCallWithExternalResult).externalResult = externalResult; + (block as ToolCallWithExternalResult & { id: string }).externalResult = externalResult; } } @@ -337,8 +678,8 @@ async function pumpSdkMessages( /** Track the last text content seen across all assistant turns for the final message. */ let lastTextContent = ""; let lastThinkingContent = ""; - /** Collect tool calls from intermediate SDK turns for tool_execution events. */ - const intermediateToolCalls: AssistantMessage["content"] = []; + /** Collect tool blocks from intermediate SDK turns for tool execution rendering. */ + const intermediateToolBlocks: AssistantMessage["content"] = []; /** Preserve real external tool results from Claude Code's synthetic user messages. */ const toolResultsById = new Map(); @@ -359,7 +700,17 @@ async function pumpSdkMessages( } const prompt = buildPromptFromContext(context); - const sdkOpts = buildSdkOptions(modelId, prompt); + const sdkOpts = buildSdkOptions( + modelId, + prompt, + typeof (options as ClaudeCodeStreamOptions | undefined)?.extensionUIContext === "object" + ? { + onElicitation: createClaudeCodeElicitationHandler( + (options as ClaudeCodeStreamOptions | undefined)?.extensionUIContext, + ), + } + : {}, + ); const queryResult = sdk.query({ prompt, @@ -439,9 +790,9 @@ async function pumpSdkMessages( lastTextContent = block.text; } else if (block.type === "thinking" && block.thinking) { lastThinkingContent = block.thinking; - } else if (block.type === "toolCall") { - // Collect tool calls for externalToolExecution rendering - intermediateToolCalls.push(block); + } else if (block.type === "toolCall" || block.type === "serverToolUse") { + // Collect tool blocks for externalToolExecution rendering + intermediateToolBlocks.push(block); } } } @@ -451,24 +802,33 @@ async function pumpSdkMessages( for (const { toolUseId, result } of extractToolResultsFromSdkUserMessage(msg as SDKUserMessage)) { toolResultsById.set(toolUseId, result); } - attachExternalResultsToToolCalls(intermediateToolCalls, toolResultsById); + attachExternalResultsToToolBlocks(intermediateToolBlocks, toolResultsById); // Push a synthetic toolcall_end for each tool call from this turn // so the TUI can render tool results in real-time during the SDK // session instead of waiting until the entire session completes. if (builder) { for (const block of builder.message.content) { - if (block.type !== "toolCall") continue; const extResult = (block as ToolCallWithExternalResult).externalResult; if (!extResult) continue; - // Push a toolcall_end with result attached so the chat-controller - // can call updateResult on the pending ToolExecutionComponent. - stream.push({ - type: "toolcall_end", - contentIndex: builder.message.content.indexOf(block), - toolCall: block, - partial: builder.message, - }); + const contentIndex = builder.message.content.indexOf(block); + if (contentIndex < 0) continue; + // Push synthetic completion events with result attached so the + // chat-controller can update pending ToolExecutionComponents. + if (block.type === "toolCall") { + stream.push({ + type: "toolcall_end", + contentIndex, + toolCall: block, + partial: builder.message, + }); + } else if (block.type === "serverToolUse") { + stream.push({ + type: "server_tool_use", + contentIndex, + partial: builder.message, + }); + } } } @@ -486,8 +846,8 @@ async function pumpSdkMessages( const finalContent: AssistantMessage["content"] = []; // Add tool calls from intermediate turns first (renders above text) - attachExternalResultsToToolCalls(intermediateToolCalls, toolResultsById); - finalContent.push(...intermediateToolCalls); + attachExternalResultsToToolBlocks(intermediateToolBlocks, toolResultsById); + finalContent.push(...intermediateToolBlocks); // Add text/thinking from the last turn if (builder && builder.message.content.length > 0) { diff --git a/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts b/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts index 28d4efdb8..082b40da2 100644 --- a/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts +++ b/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts @@ -7,9 +7,13 @@ import { makeStreamExhaustedErrorMessage, buildPromptFromContext, buildSdkOptions, + createClaudeCodeElicitationHandler, extractToolResultsFromSdkUserMessage, getClaudeLookupCommand, + parseAskUserQuestionsElicitation, + parseTextInputElicitation, parseClaudeLookupOutput, + roundResultToElicitationContent, } from "../stream-adapter.ts"; import type { Context, Message } from "@gsd/pi-ai"; import type { SDKUserMessage } from "../sdk-types.ts"; @@ -217,6 +221,35 @@ describe("stream-adapter — session persistence (#2859)", () => { assert.equal(srv.env.GSD_CLI_PATH, "/tmp/gsd"); assert.equal(srv.env.GSD_PERSIST_WRITE_GATE_STATE, "1"); assert.equal(srv.env.GSD_WORKFLOW_PROJECT_ROOT, "/tmp/project"); + assert.deepEqual(options.disallowedTools, ["AskUserQuestion"]); + } finally { + process.env.GSD_WORKFLOW_MCP_COMMAND = prev.GSD_WORKFLOW_MCP_COMMAND; + process.env.GSD_WORKFLOW_MCP_NAME = prev.GSD_WORKFLOW_MCP_NAME; + process.env.GSD_WORKFLOW_MCP_ARGS = prev.GSD_WORKFLOW_MCP_ARGS; + process.env.GSD_WORKFLOW_MCP_ENV = prev.GSD_WORKFLOW_MCP_ENV; + process.env.GSD_WORKFLOW_MCP_CWD = prev.GSD_WORKFLOW_MCP_CWD; + } + }); + + test("buildSdkOptions disables AskUserQuestion for custom workflow MCP server names", () => { + const prev = { + GSD_WORKFLOW_MCP_COMMAND: process.env.GSD_WORKFLOW_MCP_COMMAND, + GSD_WORKFLOW_MCP_NAME: process.env.GSD_WORKFLOW_MCP_NAME, + GSD_WORKFLOW_MCP_ARGS: process.env.GSD_WORKFLOW_MCP_ARGS, + GSD_WORKFLOW_MCP_ENV: process.env.GSD_WORKFLOW_MCP_ENV, + GSD_WORKFLOW_MCP_CWD: process.env.GSD_WORKFLOW_MCP_CWD, + }; + try { + process.env.GSD_WORKFLOW_MCP_COMMAND = "node"; + process.env.GSD_WORKFLOW_MCP_NAME = "custom-workflow"; + process.env.GSD_WORKFLOW_MCP_ARGS = JSON.stringify(["packages/mcp-server/dist/cli.js"]); + process.env.GSD_WORKFLOW_MCP_ENV = JSON.stringify({ GSD_CLI_PATH: "/tmp/gsd" }); + process.env.GSD_WORKFLOW_MCP_CWD = "/tmp/project"; + + const options = buildSdkOptions("claude-sonnet-4-20250514", "test"); + const mcpServers = options.mcpServers as Record; + assert.ok(mcpServers?.["custom-workflow"], "expected custom workflow server config"); + assert.deepEqual(options.disallowedTools, ["AskUserQuestion"]); } finally { process.env.GSD_WORKFLOW_MCP_COMMAND = prev.GSD_WORKFLOW_MCP_COMMAND; process.env.GSD_WORKFLOW_MCP_NAME = prev.GSD_WORKFLOW_MCP_NAME; @@ -252,6 +285,9 @@ describe("stream-adapter — session persistence (#2859)", () => { const mcpServers = (options as any).mcpServers; if (mcpServers) { assert.ok(mcpServers["gsd-workflow"], "if present, must be gsd-workflow"); + assert.deepEqual((options as any).disallowedTools, ["AskUserQuestion"]); + } else { + assert.deepEqual((options as any).disallowedTools, ["AskUserQuestion"]); } rmSync(emptyDir, { recursive: true, force: true }); } finally { @@ -298,6 +334,7 @@ describe("stream-adapter — session persistence (#2859)", () => { assert.equal(srv.env.GSD_CLI_PATH, "/tmp/gsd"); assert.equal(srv.env.GSD_PERSIST_WRITE_GATE_STATE, "1"); assert.equal(srv.env.GSD_WORKFLOW_PROJECT_ROOT, resolvedRepoDir); + assert.deepEqual(options.disallowedTools, ["AskUserQuestion"]); } finally { process.chdir(originalCwd); rmSync(repoDir, { recursive: true, force: true }); @@ -309,6 +346,286 @@ describe("stream-adapter — session persistence (#2859)", () => { process.env.GSD_CLI_PATH = prev.GSD_CLI_PATH; } }); + + test("buildSdkOptions preserves runtime callbacks such as onElicitation", () => { + const prev = { + GSD_WORKFLOW_MCP_COMMAND: process.env.GSD_WORKFLOW_MCP_COMMAND, + GSD_WORKFLOW_MCP_NAME: process.env.GSD_WORKFLOW_MCP_NAME, + GSD_WORKFLOW_MCP_ARGS: process.env.GSD_WORKFLOW_MCP_ARGS, + GSD_WORKFLOW_MCP_ENV: process.env.GSD_WORKFLOW_MCP_ENV, + GSD_WORKFLOW_MCP_CWD: process.env.GSD_WORKFLOW_MCP_CWD, + }; + const onElicitation = async () => ({ action: "decline" as const }); + try { + delete process.env.GSD_WORKFLOW_MCP_COMMAND; + delete process.env.GSD_WORKFLOW_MCP_NAME; + delete process.env.GSD_WORKFLOW_MCP_ARGS; + delete process.env.GSD_WORKFLOW_MCP_ENV; + delete process.env.GSD_WORKFLOW_MCP_CWD; + const options = buildSdkOptions("claude-sonnet-4-20250514", "test", { onElicitation }); + assert.equal(options.onElicitation, onElicitation); + } finally { + process.env.GSD_WORKFLOW_MCP_COMMAND = prev.GSD_WORKFLOW_MCP_COMMAND; + process.env.GSD_WORKFLOW_MCP_NAME = prev.GSD_WORKFLOW_MCP_NAME; + process.env.GSD_WORKFLOW_MCP_ARGS = prev.GSD_WORKFLOW_MCP_ARGS; + process.env.GSD_WORKFLOW_MCP_ENV = prev.GSD_WORKFLOW_MCP_ENV; + process.env.GSD_WORKFLOW_MCP_CWD = prev.GSD_WORKFLOW_MCP_CWD; + } + }); +}); + +describe("stream-adapter — MCP elicitation bridge", () => { + const askUserQuestionsRequest = { + serverName: "gsd-workflow", + message: "Please answer the following question(s).", + mode: "form" as const, + requestedSchema: { + type: "object" as const, + properties: { + storage_scope: { + type: "string", + title: "Storage", + description: "Does this app need to sync across devices?", + oneOf: [ + { const: "Local-only (Recommended)", title: "Local-only (Recommended)" }, + { const: "Cloud-synced", title: "Cloud-synced" }, + { const: "None of the above", title: "None of the above" }, + ], + }, + storage_scope__note: { + type: "string", + title: "Storage Note", + description: "Optional note for None of the above.", + }, + platform: { + type: "array", + title: "Platform", + description: "Where should it run?", + items: { + anyOf: [ + { const: "Web", title: "Web" }, + { const: "Desktop", title: "Desktop" }, + { const: "Mobile", title: "Mobile" }, + ], + }, + }, + }, + }, + }; + + test("parseAskUserQuestionsElicitation rebuilds interview questions from the MCP schema", () => { + const questions = parseAskUserQuestionsElicitation(askUserQuestionsRequest); + assert.deepEqual(questions, [ + { + id: "storage_scope", + header: "Storage", + question: "Does this app need to sync across devices?", + options: [ + { label: "Local-only (Recommended)", description: "" }, + { label: "Cloud-synced", description: "" }, + ], + noteFieldId: "storage_scope__note", + }, + { + id: "platform", + header: "Platform", + question: "Where should it run?", + options: [ + { label: "Web", description: "" }, + { label: "Desktop", description: "" }, + { label: "Mobile", description: "" }, + ], + allowMultiple: true, + }, + ]); + }); + + test("roundResultToElicitationContent preserves notes for None of the above", () => { + const questions = parseAskUserQuestionsElicitation(askUserQuestionsRequest); + assert.ok(questions); + + const content = roundResultToElicitationContent(questions, { + endInterview: false, + answers: { + storage_scope: { + selected: "None of the above", + notes: "Needs selective sync later", + }, + platform: { + selected: ["Web", "Desktop"], + notes: "", + }, + }, + }); + + assert.deepEqual(content, { + storage_scope: "None of the above", + storage_scope__note: "Needs selective sync later", + platform: ["Web", "Desktop"], + }); + }); + + test("createClaudeCodeElicitationHandler accepts interview-style answers from custom UI", async () => { + const handler = createClaudeCodeElicitationHandler({ + custom: async (_factory: any) => ({ + endInterview: false, + answers: { + storage_scope: { + selected: "Cloud-synced", + notes: "", + }, + platform: { + selected: ["Web", "Mobile"], + notes: "", + }, + }, + }), + } as any); + + assert.ok(handler); + const result = await handler!(askUserQuestionsRequest, { signal: new AbortController().signal }); + assert.deepEqual(result, { + action: "accept", + content: { + storage_scope: "Cloud-synced", + platform: ["Web", "Mobile"], + }, + }); + }); + + test("createClaudeCodeElicitationHandler falls back to dialog prompts when custom UI is unavailable", async () => { + const ui = { + custom: async () => undefined, + select: async (_title: string, options: string[], opts?: { allowMultiple?: boolean }) => { + if (opts?.allowMultiple) return ["Desktop", "Mobile"]; + return options.includes("None of the above") ? "None of the above" : options[0]; + }, + input: async () => "CLI-only deployment target", + }; + const handler = createClaudeCodeElicitationHandler(ui as any); + assert.ok(handler); + + const result = await handler!(askUserQuestionsRequest, { signal: new AbortController().signal }); + assert.deepEqual(result, { + action: "accept", + content: { + storage_scope: "None of the above", + storage_scope__note: "CLI-only deployment target", + platform: ["Desktop", "Mobile"], + }, + }); + }); + + test("parseTextInputElicitation recognizes secure free-text MCP forms", () => { + const request = { + serverName: "gsd-workflow", + message: "Enter values for environment variables.", + mode: "form" as const, + requestedSchema: { + type: "object" as const, + properties: { + TEST_PASSWORD: { + type: "string", + title: "TEST_PASSWORD", + description: "Format: min 8 characters\nLeave empty to skip.", + }, + PROJECT_NAME: { + type: "string", + title: "PROJECT_NAME", + description: "Human-readable project name.", + }, + }, + }, + }; + + const parsed = parseTextInputElicitation(request as any); + assert.deepEqual(parsed, [ + { + id: "TEST_PASSWORD", + title: "TEST_PASSWORD", + description: "Format: min 8 characters\nLeave empty to skip.", + required: false, + secure: true, + }, + { + id: "PROJECT_NAME", + title: "PROJECT_NAME", + description: "Human-readable project name.", + required: false, + secure: false, + }, + ]); + }); + + test("parseTextInputElicitation accepts legacy keys schema and skips unsupported fields", () => { + const request = { + serverName: "gsd-workflow", + message: "Enter secure values", + mode: "form" as const, + requestedSchema: { + type: "object" as const, + keys: { + API_TOKEN: { + type: "string", + title: "API_TOKEN", + description: "Leave empty to skip.", + }, + META: { + type: "object", + title: "metadata", + }, + }, + }, + }; + + const parsed = parseTextInputElicitation(request as any); + assert.deepEqual(parsed, [ + { + id: "API_TOKEN", + title: "API_TOKEN", + description: "Leave empty to skip.", + required: false, + secure: true, + }, + ]); + }); + + test("createClaudeCodeElicitationHandler collects secure_env_collect fields through input dialogs", async () => { + const secureRequest = { + serverName: "gsd-workflow", + message: "Enter values for environment variables.", + mode: "form" as const, + requestedSchema: { + type: "object" as const, + properties: { + TEST_PASSWORD: { + type: "string", + title: "TEST_PASSWORD", + description: "Format: Your secure testing password\nLeave empty to skip.", + }, + }, + }, + }; + + const inputCalls: Array<{ opts?: { secure?: boolean } }> = []; + const handler = createClaudeCodeElicitationHandler({ + input: async (_title: string, _placeholder?: string, opts?: { secure?: boolean }) => { + inputCalls.push({ opts }); + return "super-secret"; + }, + } as any); + assert.ok(handler); + + const result = await handler!(secureRequest as any, { signal: new AbortController().signal }); + assert.deepEqual(result, { + action: "accept", + content: { + TEST_PASSWORD: "super-secret", + }, + }); + assert.equal(inputCalls.length, 1); + assert.equal(inputCalls[0]?.opts?.secure, true, "secure_env_collect fields should request secure input"); + }); }); describe("stream-adapter — Windows Claude path lookup (#3770)", () => { diff --git a/src/resources/extensions/get-secrets-from-user.ts b/src/resources/extensions/get-secrets-from-user.ts index a8f1cfe36..967752048 100644 --- a/src/resources/extensions/get-secrets-from-user.ts +++ b/src/resources/extensions/get-secrets-from-user.ts @@ -126,7 +126,7 @@ async function collectOneSecret( ): Promise { if (!ctx.hasUI) return null; - return ctx.ui.custom((tui: any, theme: any, _kb: any, done: (r: string | null) => void) => { + const customResult = await ctx.ui.custom((tui: any, theme: any, _kb: any, done: (r: string | null) => void) => { let value = ""; let cachedLines: string[] | undefined; @@ -223,6 +223,29 @@ async function collectOneSecret( handleInput, }; }); + + // RPC/web surfaces may not implement ctx.ui.custom(). Fall back to a + // standard input prompt so users can still provide the secret. + if (customResult !== undefined) { + return customResult; + } + + if (typeof ctx.ui?.input !== "function") { + return null; + } + + const inputTitle = `Secure value for ${keyName} (${pageIndex + 1}/${totalPages})`; + const inputPlaceholder = hint || "Enter secret value"; + const inputResult = await ctx.ui.input( + inputTitle, + inputPlaceholder, + { secure: true }, + ); + if (typeof inputResult !== "string") { + return null; + } + const trimmed = inputResult.trim(); + return trimmed.length > 0 ? trimmed : null; } /** diff --git a/src/resources/extensions/gsd/auto-model-selection.ts b/src/resources/extensions/gsd/auto-model-selection.ts index 7dc1593f8..ce33bda61 100644 --- a/src/resources/extensions/gsd/auto-model-selection.ts +++ b/src/resources/extensions/gsd/auto-model-selection.ts @@ -5,12 +5,13 @@ */ import type { Api, Model } from "@gsd/pi-ai"; +import { getProviderCapabilities } from "@gsd/pi-ai"; import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent"; import type { GSDPreferences } from "./preferences.js"; import { resolveModelWithFallbacksForUnit, resolveDynamicRoutingConfig } from "./preferences.js"; import type { ComplexityTier } from "./complexity-classifier.js"; import { classifyUnitComplexity, tierLabel } from "./complexity-classifier.js"; -import { resolveModelForComplexity, escalateTier, getEligibleModels, loadCapabilityOverrides } from "./model-router.js"; +import { resolveModelForComplexity, escalateTier, getEligibleModels, loadCapabilityOverrides, adjustToolSet, filterToolsForProvider } from "./model-router.js"; import { getLedger, getProjectTotals } from "./metrics.js"; import { unitPhaseLabel } from "./auto-dashboard.js"; @@ -24,10 +25,17 @@ export interface ModelSelectionResult { export function resolvePreferredModelConfig( unitType: string, autoModeStartModel: { provider: string; id: string } | null, + /** When false, only return explicit per-phase model configs — do not + * synthesize a routing ceiling from dynamic_routing.tier_models (#3962). */ + isAutoMode = true, ) { const explicitConfig = resolveModelWithFallbacksForUnit(unitType); if (explicitConfig) return explicitConfig; + // In interactive mode, don't synthesize a routing-based model config. + // The user's session model (/model) should be used as-is (#3962). + if (!isAutoMode) return undefined; + const routingConfig = resolveDynamicRoutingConfig(); if (!routingConfig.enabled || !routingConfig.tier_models) return undefined; @@ -61,8 +69,11 @@ export async function selectAndApplyModel( verbose: boolean, autoModeStartModel: { provider: string; id: string } | null, retryContext?: { isRetry: boolean; previousTier?: string }, + /** When false (interactive/guided-flow), skip dynamic routing and use the session model. + * Dynamic routing only applies in auto-mode where cost optimization is expected. (#3962) */ + isAutoMode = true, ): Promise { - const modelConfig = resolvePreferredModelConfig(unitType, autoModeStartModel); + const modelConfig = resolvePreferredModelConfig(unitType, autoModeStartModel, isAutoMode); let routing: { tier: string; modelDowngraded: boolean } | null = null; let appliedModel: Model | null = null; @@ -70,7 +81,13 @@ export async function selectAndApplyModel( const availableModels = ctx.modelRegistry.getAvailable(); // ─── Dynamic Model Routing ───────────────────────────────────────── + // Dynamic routing (complexity-based downgrading) only applies in auto-mode. + // Interactive/guided-flow dispatches use the user's session model directly, + // respecting their /model selection without silent downgrades (#3962). const routingConfig = resolveDynamicRoutingConfig(); + if (!isAutoMode) { + routingConfig.enabled = false; + } let effectiveModelConfig = modelConfig; let routingTierLabel = ""; @@ -122,19 +139,16 @@ export async function selectAndApplyModel( const escalated = escalateTier(retryContext.previousTier as ComplexityTier); if (escalated) { classification = { ...classification, tier: escalated, reason: "escalated after failure" }; - if (verbose) { - ctx.ui.notify( - `Tier escalation: ${retryContext.previousTier} → ${escalated} (retry after failure)`, - "info", - ); - } + // Always notify on tier escalation — model changes should be visible (#3962) + ctx.ui.notify( + `Tier escalation: ${retryContext.previousTier} → ${escalated} (retry after failure)`, + "info", + ); } } // Load user capability overrides from preferences (D-17: deep-merged with built-in profiles) - const capabilityOverrides = loadCapabilityOverrides( - (prefs as { modelOverrides?: Record }> } | undefined) ?? {}, - ); + const capabilityOverrides = loadCapabilityOverrides(prefs ?? {}); // Fire before_model_select hook (ADR-004, D-03) // Hook can override model selection entirely by returning { modelId } @@ -196,24 +210,23 @@ export async function selectAndApplyModel( primary: routingResult.modelId, fallbacks: routingResult.fallbacks, }; - if (verbose) { - if (routingResult.selectionMethod === "capability-scored" && routingResult.capabilityScores) { - // Verbose scoring breakdown for capability-scored decisions (D-20) - const tierLbl = tierLabel(classification.tier); - const scores = Object.entries(routingResult.capabilityScores) - .sort(([, a], [, b]) => b - a) - .map(([id, score]) => `${id}: ${score.toFixed(1)}`) - .join(", "); - ctx.ui.notify( - `Dynamic routing [${tierLbl}]: ${routingResult.modelId} (capability-scored) — ${scores}`, - "info", - ); - } else { - ctx.ui.notify( - `Dynamic routing [${tierLabel(classification.tier)}]: ${routingResult.modelId} (${classification.reason})`, - "info", - ); - } + // Always notify on model downgrade — users should see when their + // model selection is overridden, not just in verbose mode (#3962). + if (routingResult.selectionMethod === "capability-scored" && routingResult.capabilityScores) { + const tierLbl = tierLabel(classification.tier); + const scores = Object.entries(routingResult.capabilityScores) + .sort(([, a], [, b]) => b - a) + .map(([id, score]) => `${id}: ${score.toFixed(1)}`) + .join(", "); + ctx.ui.notify( + `Dynamic routing [${tierLbl}]: ${routingResult.modelId} (capability-scored) — ${scores}`, + "info", + ); + } else { + ctx.ui.notify( + `Dynamic routing [${tierLabel(classification.tier)}]: ${routingResult.modelId} (${classification.reason})`, + "info", + ); } } routingTierLabel = ` [${tierLabel(classification.tier)}]`; @@ -246,12 +259,45 @@ export async function selectAndApplyModel( const ok = await pi.setModel(model, { persist: false }); if (ok) { appliedModel = model; + + // ADR-005: Adjust active tool set for the selected model's provider capabilities. + // Hard-filter incompatible tools, then let extensions override via adjust_tool_set hook. + const activeToolNames = pi.getActiveTools(); + const { toolNames: compatibleTools, removedTools } = adjustToolSet(activeToolNames, model.api); + let finalToolNames = compatibleTools; + + // Fire adjust_tool_set hook — extensions can override the filtered tool set + if (routingConfig.hooks !== false) { + const hookResult = await pi.emitAdjustToolSet({ + selectedModelApi: model.api, + selectedModelProvider: model.provider, + selectedModelId: model.id, + activeToolNames, + filteredTools: removedTools, + }); + if (hookResult?.toolNames) { + finalToolNames = hookResult.toolNames; + } + } + + // Apply the filtered tool set if any tools were removed + if (removedTools.length > 0 || finalToolNames.length !== activeToolNames.length) { + pi.setActiveTools(finalToolNames); + } + if (verbose) { const fallbackNote = modelId === effectiveModelConfig.primary ? "" : ` (fallback from ${effectiveModelConfig.primary})`; const phase = unitPhaseLabel(unitType); ctx.ui.notify(`Model [${phase}]${routingTierLabel}: ${model.provider}/${model.id}${fallbackNote}`, "info"); + // ADR-005: Report tools filtered due to provider incompatibility + if (removedTools.length > 0) { + ctx.ui.notify( + `Tool compatibility: ${removedTools.length} tools filtered for ${model.api} — ${removedTools.join(", ")}`, + "info", + ); + } } break; } else { diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts index 5e8bff3c4..1c6f622f3 100644 --- a/src/resources/extensions/gsd/auto-prompts.ts +++ b/src/resources/extensions/gsd/auto-prompts.ts @@ -997,7 +997,7 @@ export async function buildDiscussMilestonePrompt(mid: string, midTitle: string, milestoneId: mid, milestoneTitle: midTitle, inlinedTemplates: discussTemplates, - structuredQuestionsAvailable: "true", + structuredQuestionsAvailable: "false", commitInstruction: "Do not commit planning artifacts — .gsd/ is managed externally.", fastPathInstruction: "", }); @@ -1503,7 +1503,9 @@ export async function buildCompleteMilestonePrompt( try { const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js"); if (isDbAvailable()) { - sliceIds = getMilestoneSlices(mid).map(s => s.id); + sliceIds = getMilestoneSlices(mid) + .filter(s => s.status !== "skipped") + .map(s => s.id); } } catch (err) { logWarning("prompt", `buildCompleteMilestonePrompt DB lookup failed: ${err instanceof Error ? err.message : String(err)}`); @@ -1597,7 +1599,9 @@ export async function buildValidateMilestonePrompt( try { const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js"); if (isDbAvailable()) { - valSliceIds = getMilestoneSlices(mid).map(s => s.id); + valSliceIds = getMilestoneSlices(mid) + .filter(s => s.status !== "skipped") + .map(s => s.id); } } catch (err) { logWarning("prompt", `buildValidateMilestonePrompt slice IDs lookup failed: ${err instanceof Error ? err.message : String(err)}`); diff --git a/src/resources/extensions/gsd/auto-start.ts b/src/resources/extensions/gsd/auto-start.ts index 3f737c638..5856bd0b9 100644 --- a/src/resources/extensions/gsd/auto-start.ts +++ b/src/resources/extensions/gsd/auto-start.ts @@ -83,7 +83,7 @@ import { join } from "node:path"; import { sep as pathSep } from "node:path"; import { resolveProjectRootDbPath } from "./bootstrap/dynamic-tools.js"; -import { resolveDefaultSessionModel } from "./preferences-models.js"; +import { resolveDefaultSessionModel, resolveDynamicRoutingConfig } from "./preferences-models.js"; import type { WorktreeResolver } from "./worktree-resolver.js"; export interface BootstrapDeps { @@ -335,19 +335,9 @@ export async function bootstrapAutoSession( } } - if (ctx.model?.provider === "claude-code") { - try { - const { ensureProjectWorkflowMcpConfig } = await import("./mcp-project-config.js"); - const result = ensureProjectWorkflowMcpConfig(base); - if (result.status !== "unchanged") { - ctx.ui.notify(`Claude Code MCP prepared at ${result.configPath}`, "info"); - } - } catch (err) { - ctx.ui.notify( - `Claude Code MCP prep failed: ${err instanceof Error ? err.message : String(err)}`, - "warning", - ); - } + { + const { prepareWorkflowMcpForProject } = await import("./workflow-mcp-auto-prep.js"); + prepareWorkflowMcpForProject(ctx, base); } // Initialize GitServiceImpl @@ -688,7 +678,7 @@ export async function bootstrapAutoSession( } // ── DB lifecycle ── - const gsdDbPath = join(s.basePath, ".gsd", "gsd.db"); + const gsdDbPath = resolveProjectRootDbPath(s.basePath); const gsdDirPath = join(s.basePath, ".gsd"); if (existsSync(gsdDirPath) && !existsSync(gsdDbPath)) { const hasDecisions = existsSync(join(gsdDirPath, "DECISIONS.md")); @@ -778,6 +768,39 @@ export async function bootstrapAutoSession( : "Will loop until milestone complete."; ctx.ui.notify(`${modeLabel} started. ${scopeMsg}`, "info"); + // Show dynamic routing status so users know upfront if models will be + // downgraded for simple tasks (#3962). + // Use the same effective logic as selectAndApplyModel: check flat-rate + // provider suppression and resolve the actual ceiling model. + const routingConfig = resolveDynamicRoutingConfig(); + const startModelLabel = s.autoModeStartModel + ? `${s.autoModeStartModel.provider}/${s.autoModeStartModel.id}` + : ctx.model ? `${ctx.model.provider}/${ctx.model.id}` : "default"; + + // Flat-rate providers (e.g. GitHub Copilot, claude-code) suppress routing + // at dispatch time (#3453) — reflect that in the banner. + const { isFlatRateProvider } = await import("./auto-model-selection.js"); + const effectiveProvider = s.autoModeStartModel?.provider ?? ctx.model?.provider; + const effectivelyEnabled = routingConfig.enabled + && !(effectiveProvider && isFlatRateProvider(effectiveProvider)); + + // The actual ceiling may come from tier_models.heavy, not the start model. + const effectiveCeiling = (routingConfig.enabled && routingConfig.tier_models?.heavy) + ? routingConfig.tier_models.heavy + : startModelLabel; + + if (effectivelyEnabled) { + ctx.ui.notify( + `Dynamic routing: enabled — simple tasks may use cheaper models (ceiling: ${effectiveCeiling})`, + "info", + ); + } else { + ctx.ui.notify( + `Dynamic routing: disabled — all tasks will use ${startModelLabel}`, + "info", + ); + } + updateSessionLock( lockBase(), "starting", diff --git a/src/resources/extensions/gsd/auto-tool-tracking.ts b/src/resources/extensions/gsd/auto-tool-tracking.ts index 9e7ffc049..cab495813 100644 --- a/src/resources/extensions/gsd/auto-tool-tracking.ts +++ b/src/resources/extensions/gsd/auto-tool-tracking.ts @@ -92,7 +92,7 @@ export function clearInFlightTools(): void { * handler. When these errors occur, retrying the same unit will produce the same * failure, so the retry loop must be broken. */ -const TOOL_INVOCATION_ERROR_RE = /Validation failed for tool|Expected ',' or '\}' in JSON|Unexpected end of JSON|Unexpected token.*in JSON/i; +const TOOL_INVOCATION_ERROR_RE = /Validation failed for tool|Expected ',' or '\}'(?: after property value)?(?: in JSON)?|Unexpected end of JSON|Unexpected token.*in JSON/i; /** * Returns true if the error message indicates a tool invocation failure due to diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 79b7fdc37..1b8d4fd47 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -126,6 +126,7 @@ import { import { setLogBasePath, logWarning, logError } from "./workflow-logger.js"; import { homedir } from "node:os"; import { join } from "node:path"; +import { pathToFileURL } from "node:url"; import { readFileSync, existsSync, mkdirSync, writeFileSync, unlinkSync } from "node:fs"; import { atomicWriteSync } from "./atomic-write.js"; import { @@ -1333,8 +1334,17 @@ export async function startAuto( restoreHookState(s.basePath); // Re-sync managed resources on resume so long-lived auto sessions pick up // bundled extension updates before resume-time verification/state logic runs. + // GSD_PKG_ROOT is set by loader.ts and points to the gsd-pi package root. + // The relative import ("../../../resource-loader.js") only works from the source + // tree; deployed extensions live at ~/.gsd/agent/extensions/gsd/ where the + // relative path resolves to ~/.gsd/agent/resource-loader.js which doesn't exist. + // Using GSD_PKG_ROOT constructs a correct absolute path in both contexts (#3949). const agentDir = process.env.GSD_CODING_AGENT_DIR || join(process.env.GSD_HOME || homedir(), ".gsd", "agent"); - const { initResources } = await import("../../../" + "resource-loader.js"); + const pkgRoot = process.env.GSD_PKG_ROOT; + const resourceLoaderPath = pkgRoot + ? pathToFileURL(join(pkgRoot, "dist", "resource-loader.js")).href + : new URL("../../../resource-loader.js", import.meta.url).href; + const { initResources } = await import(resourceLoaderPath); initResources(agentDir); // Open the project DB before rebuild/derive so resume uses DB-backed // state instead of falling back to stale markdown parsing (#2940). diff --git a/src/resources/extensions/gsd/bootstrap/register-hooks.ts b/src/resources/extensions/gsd/bootstrap/register-hooks.ts index 4bb105f71..438d4d9b0 100644 --- a/src/resources/extensions/gsd/bootstrap/register-hooks.ts +++ b/src/resources/extensions/gsd/bootstrap/register-hooks.ts @@ -45,6 +45,8 @@ export function registerHooks(pi: ExtensionAPI): void { resetToolCallLoopGuard(); resetAskUserQuestionsCache(); await syncServiceTierStatus(ctx); + const { prepareWorkflowMcpForProject } = await import("../workflow-mcp-auto-prep.js"); + prepareWorkflowMcpForProject(ctx, process.cwd()); // Apply show_token_cost preference (#1515) try { @@ -85,6 +87,8 @@ export function registerHooks(pi: ExtensionAPI): void { resetAskUserQuestionsCache(); clearDiscussionFlowState(); await syncServiceTierStatus(ctx); + const { prepareWorkflowMcpForProject } = await import("../workflow-mcp-auto-prep.js"); + prepareWorkflowMcpForProject(ctx, process.cwd()); loadToolApiKeys(); }); @@ -117,6 +121,8 @@ export function registerHooks(pi: ExtensionAPI): void { return { cancel: true }; } const basePath = process.cwd(); + const { ensureDbOpen } = await import("./dynamic-tools.js"); + await ensureDbOpen(); const state = await deriveState(basePath); if (!state.activeMilestone || !state.activeSlice || !state.activeTask) return; if (state.phase !== "executing") return; @@ -446,4 +452,12 @@ export function registerHooks(pi: ExtensionAPI): void { // Default: no override — let capability scoring handle selection return undefined; }); + + // Tool set adaptation hook (ADR-005 Phase 4) + // Extensions can override tool set after model selection by returning { toolNames: [...] } + // Return undefined to let the built-in provider compatibility filtering proceed. + pi.on("adjust_tool_set", async (_event) => { + // Default: no override — let provider capability filtering handle tool set + return undefined; + }); } diff --git a/src/resources/extensions/gsd/bootstrap/system-context.ts b/src/resources/extensions/gsd/bootstrap/system-context.ts index 8fe3890df..3a336f9ee 100644 --- a/src/resources/extensions/gsd/bootstrap/system-context.ts +++ b/src/resources/extensions/gsd/bootstrap/system-context.ts @@ -19,6 +19,7 @@ import { deriveState } from "../state.js"; import { formatOverridesSection, formatShortcut, loadActiveOverrides, loadFile, parseContinue, parseSummary } from "../files.js"; import { toPosixPath } from "../../shared/mod.js"; import { markCmuxPromptShown, shouldPromptToEnableCmux } from "../../cmux/index.js"; +import { autoEnableCmuxPreferences } from "../commands-cmux.js"; const gsdHome = process.env.GSD_HOME || join(homedir(), ".gsd"); @@ -76,13 +77,16 @@ export async function buildBeforeAgentStartResult( shortcutDashboard: formatShortcut("Ctrl+Alt+G"), shortcutShell: formatShortcut("Ctrl+Alt+B"), }); - const loadedPreferences = loadEffectiveGSDPreferences(); + let loadedPreferences = loadEffectiveGSDPreferences(); if (shouldPromptToEnableCmux(loadedPreferences?.preferences)) { markCmuxPromptShown(); - ctx.ui.notify( - "cmux detected. Run /gsd cmux on to enable sidebar metadata, notifications, and visual subagent splits for this project.", - "info", - ); + if (autoEnableCmuxPreferences()) { + loadedPreferences = loadEffectiveGSDPreferences(); + ctx.ui.notify( + "cmux detected — auto-enabled. Run /gsd cmux off to disable.", + "info", + ); + } } let preferenceBlock = ""; @@ -289,6 +293,11 @@ function buildWorktreeContextBlock(): string { const RESUME_INTENT_PATTERNS = /^(continue|resume|ok|go|go ahead|proceed|keep going|carry on|next|yes|yeah|yep|sure|do it|let's go|pick up where you left off)$/; async function buildGuidedExecuteContextInjection(prompt: string, basePath: string): Promise { + const ensureStateDbOpen = async () => { + const { ensureDbOpen } = await import("./dynamic-tools.js"); + await ensureDbOpen(); + }; + const executeMatch = prompt.match(/Execute the next task:\s+(T\d+)\s+\("([^"]+)"\)\s+in slice\s+(S\d+)\s+of milestone\s+(M\d+(?:-[a-z0-9]{6})?)/i); if (executeMatch) { const [, taskId, taskTitle, sliceId, milestoneId] = executeMatch; @@ -298,6 +307,7 @@ async function buildGuidedExecuteContextInjection(prompt: string, basePath: stri const resumeMatch = prompt.match(/Resume interrupted work\.[\s\S]*?slice\s+(S\d+)\s+of milestone\s+(M\d+(?:-[a-z0-9]{6})?)/i); if (resumeMatch) { const [, sliceId, milestoneId] = resumeMatch; + await ensureStateDbOpen(); const state = await deriveState(basePath); if (state.activeMilestone?.id === milestoneId && state.activeSlice?.id === sliceId && state.activeTask) { return buildTaskExecutionContextInjection(basePath, milestoneId, sliceId, state.activeTask.id, state.activeTask.title); @@ -313,6 +323,7 @@ async function buildGuidedExecuteContextInjection(prompt: string, basePath: stri // replanning, gate evaluation, or other non-execution phases. const trimmed = prompt.trim().toLowerCase().replace(/[.!?,]+$/g, ""); if (RESUME_INTENT_PATTERNS.test(trimmed)) { + await ensureStateDbOpen(); const state = await deriveState(basePath); if (state.phase === "executing" && state.activeTask && state.activeMilestone && state.activeSlice) { return buildTaskExecutionContextInjection( diff --git a/src/resources/extensions/gsd/bootstrap/write-gate.ts b/src/resources/extensions/gsd/bootstrap/write-gate.ts index 0215faae8..b8e6cf8e5 100644 --- a/src/resources/extensions/gsd/bootstrap/write-gate.ts +++ b/src/resources/extensions/gsd/bootstrap/write-gate.ts @@ -47,13 +47,9 @@ let pendingGateId: string | null = null; /** * Recognized gate question ID patterns. - * These appear in both discuss-prepared.md (4-layer) and discuss.md (depth/requirements/roadmap). + * These appear in discuss.md (depth/requirements/roadmap). */ const GATE_QUESTION_PATTERNS = [ - "layer1_scope_gate", - "layer2_architecture_gate", - "layer3_error_gate", - "layer4_quality_gate", "depth_verification", ] as const; diff --git a/src/resources/extensions/gsd/codebase-generator.ts b/src/resources/extensions/gsd/codebase-generator.ts index f56d84079..b291c3c1f 100644 --- a/src/resources/extensions/gsd/codebase-generator.ts +++ b/src/resources/extensions/gsd/codebase-generator.ts @@ -71,13 +71,23 @@ interface EnumeratedFiles { // ─── Defaults ──────────────────────────────────────────────────────────────── const DEFAULT_EXCLUDES = [ + // ── AI / tooling meta ── + ".agents/", ".gsd/", ".planning/", ".plans/", ".claude/", ".cursor/", + ".bg-shell/", + + // ── Editor / IDE ── ".vscode/", + ".idea/", + + // ── VCS ── ".git/", + + // ── Dependencies & build artifacts ── "node_modules/", "dist/", "build/", @@ -85,7 +95,13 @@ const DEFAULT_EXCLUDES = [ "coverage/", "__pycache__/", ".venv/", + "venv/", "vendor/", + "target/", + + // ── Misc ── + ".cache/", + "tmp/", ]; const DEFAULT_MAX_FILES = 500; diff --git a/src/resources/extensions/gsd/commands-cmux.ts b/src/resources/extensions/gsd/commands-cmux.ts index e00f2dea2..a1b8f5ee4 100644 --- a/src/resources/extensions/gsd/commands-cmux.ts +++ b/src/resources/extensions/gsd/commands-cmux.ts @@ -1,5 +1,5 @@ import type { ExtensionCommandContext } from "@gsd/pi-coding-agent"; -import { existsSync, readFileSync } from "node:fs"; +import { existsSync, readFileSync, writeFileSync } from "node:fs"; import { clearCmuxSidebar, CmuxClient, detectCmuxEnvironment, resolveCmuxConfig } from "../cmux/index.js"; import { saveFile } from "./files.js"; import { @@ -9,6 +9,37 @@ import { } from "./preferences.js"; import { ensurePreferencesFile, serializePreferencesToFrontmatter } from "./commands-prefs-wizard.js"; +/** + * Auto-enable cmux in project preferences when detected but never configured. + * Called at boot (before agent start) — no ExtensionCommandContext needed. + * Returns true if preferences were written, false if skipped. + */ +export function autoEnableCmuxPreferences(): boolean { + const path = getProjectGSDPreferencesPath(); + if (!existsSync(path)) return false; + + const existing = loadProjectGSDPreferences(); + const prefs: Record = existing?.preferences ? { ...existing.preferences } : { version: 1 }; + prefs.cmux = { + enabled: true, + notifications: true, + sidebar: true, + splits: false, + browser: false, + ...((prefs.cmux as Record | undefined) ?? {}), + }; + (prefs.cmux as Record).enabled = true; + prefs.version = prefs.version || 1; + + const frontmatter = serializePreferencesToFrontmatter(prefs); + let body = "\n# GSD Skill Preferences\n\nSee `~/.gsd/agent/extensions/gsd/docs/preferences-reference.md` for full field documentation and examples.\n"; + const preserved = extractBodyAfterFrontmatter(readFileSync(path, "utf-8")); + if (preserved) body = preserved; + + writeFileSync(path, `---\n${frontmatter}---${body}`, "utf-8"); + return true; +} + function extractBodyAfterFrontmatter(content: string): string | null { const start = content.startsWith("---\n") ? 4 : content.startsWith("---\r\n") ? 5 : -1; if (start === -1) return null; diff --git a/src/resources/extensions/gsd/commands-handlers.ts b/src/resources/extensions/gsd/commands-handlers.ts index 16af7230b..1797b2cd9 100644 --- a/src/resources/extensions/gsd/commands-handlers.ts +++ b/src/resources/extensions/gsd/commands-handlers.ts @@ -25,6 +25,26 @@ import { getAutoWorktreePath } from "./auto-worktree.js"; import { projectRoot } from "./commands/context.js"; import { loadPrompt } from "./prompt-loader.js"; +const UPDATE_REGISTRY_URL = "https://registry.npmjs.org/gsd-pi/latest"; +const UPDATE_FETCH_TIMEOUT_MS = 5000; + +async function fetchLatestVersionForCommand(): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), UPDATE_FETCH_TIMEOUT_MS); + + try { + const res = await fetch(UPDATE_REGISTRY_URL, { signal: controller.signal }); + if (!res.ok) return null; + const data = (await res.json()) as { version?: string }; + const latest = typeof data.version === "string" ? data.version.trim().replace(/^v/, "") : ""; + return latest.length > 0 ? latest : null; + } catch { + return null; + } finally { + clearTimeout(timeout); + } +} + export function dispatchDoctorHeal(pi: ExtensionAPI, scope: string | undefined, reportText: string, structuredIssues: string): void { const workflowPath = process.env.GSD_WORKFLOW_PATH ?? join(process.env.HOME ?? "~", ".gsd", "agent", "GSD-WORKFLOW.md"); const workflow = readFileSync(workflowPath, "utf-8"); @@ -394,13 +414,8 @@ export async function handleUpdate(ctx: ExtensionCommandContext): Promise ctx.ui.notify(`Current version: v${current}\nChecking npm registry...`, "info"); - let latest: string; - try { - latest = execSync(`npm view ${NPM_PACKAGE} version`, { - encoding: "utf-8", - stdio: ["ignore", "pipe", "ignore"], - }).trim(); - } catch { + const latest = await fetchLatestVersionForCommand(); + if (!latest) { ctx.ui.notify("Failed to reach npm registry. Check your network connection.", "error"); return; } diff --git a/src/resources/extensions/gsd/commands/context.ts b/src/resources/extensions/gsd/commands/context.ts index f4a5aa423..8007ecd27 100644 --- a/src/resources/extensions/gsd/commands/context.ts +++ b/src/resources/extensions/gsd/commands/context.ts @@ -1,7 +1,7 @@ import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent"; import { checkRemoteAutoSession, isAutoActive, isAutoPaused, stopAutoRemote } from "../auto.js"; -import { assertSafeDirectory } from "../validate-directory.js"; +import { validateDirectory } from "../validate-directory.js"; import { resolveProjectRoot } from "../worktree.js"; import { showNextAction } from "../../shared/tui.js"; import { handleStatus } from "./handlers/core.js"; @@ -12,6 +12,17 @@ export interface GsdDispatchContext { trimmed: string; } +/** + * Typed error for when GSD is run outside a valid project directory. + * Command handlers catch this to show a friendly message instead of a raw exception. + */ +export class GSDNoProjectError extends Error { + constructor(reason: string) { + super(reason); + this.name = "GSDNoProjectError"; + } +} + export function projectRoot(): string { let cwd: string; try { @@ -21,10 +32,10 @@ export function projectRoot(): string { cwd = process.env.HOME ?? "/"; } const root = resolveProjectRoot(cwd); - if (root !== cwd) { - assertSafeDirectory(cwd); - } else { - assertSafeDirectory(root); + const pathToCheck = root !== cwd ? cwd : root; + const result = validateDirectory(pathToCheck); + if (result.severity === "blocked") { + throw new GSDNoProjectError(result.reason ?? "GSD must be run inside a project directory."); } return root; } diff --git a/src/resources/extensions/gsd/commands/dispatcher.ts b/src/resources/extensions/gsd/commands/dispatcher.ts index a3d11344b..9ec6bae09 100644 --- a/src/resources/extensions/gsd/commands/dispatcher.ts +++ b/src/resources/extensions/gsd/commands/dispatcher.ts @@ -1,5 +1,6 @@ import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent"; +import { GSDNoProjectError } from "./context.js"; import { handleAutoCommand } from "./handlers/auto.js"; import { handleCoreCommand } from "./handlers/core.js"; import { handleOpsCommand } from "./handlers/ops.js"; @@ -21,10 +22,21 @@ export async function handleGSDCommand( () => handleOpsCommand(trimmed, ctx, pi), ]; - for (const handler of handlers) { - if (await handler()) { + try { + for (const handler of handlers) { + if (await handler()) { + return; + } + } + } catch (err) { + if (err instanceof GSDNoProjectError) { + ctx.ui.notify( + `${err.message} \`cd\` into a project directory first.`, + "warning", + ); return; } + throw err; } ctx.ui.notify(`Unknown: /gsd ${trimmed}. Run /gsd help for available commands.`, "warning"); diff --git a/src/resources/extensions/gsd/custom-workflow-engine.ts b/src/resources/extensions/gsd/custom-workflow-engine.ts index bcdbc8f4d..0b1266326 100644 --- a/src/resources/extensions/gsd/custom-workflow-engine.ts +++ b/src/resources/extensions/gsd/custom-workflow-engine.ts @@ -34,6 +34,7 @@ import { import { injectContext } from "./context-injector.js"; import type { WorkflowDefinition, StepDefinition } from "./definition-loader.js"; import { parseUnitId } from "./unit-id.js"; +import { withFileLock } from "./file-lock.js"; /** Read and parse the frozen DEFINITION.yaml from a run directory. */ export function readFrozenDefinition(runDir: string): WorkflowDefinition { @@ -179,24 +180,28 @@ export class CustomWorkflowEngine implements WorkflowEngine { state: EngineState, completedStep: CompletedStep, ): Promise { - // Re-read the graph from disk so we do not overwrite concurrent - // workflow edits with a stale in-memory snapshot from deriveState(). - const graph = readGraph(this.runDir); + const graphPath = join(this.runDir, "GRAPH.yaml"); - // Extract stepId from "/" - const { milestone, slice, task } = parseUnitId(completedStep.unitId); - const stepId = task ?? slice ?? milestone; + return await withFileLock(graphPath, () => { + // Re-read the graph from disk so we do not overwrite concurrent + // workflow edits with a stale in-memory snapshot from deriveState(). + const graph = readGraph(this.runDir); - const updatedGraph = markStepComplete(graph, stepId); - writeGraph(this.runDir, updatedGraph); + // Extract stepId from "/" + const { milestone, slice, task } = parseUnitId(completedStep.unitId); + const stepId = task ?? slice ?? milestone; - const allDone = updatedGraph.steps.every( - (s) => s.status === "complete" || s.status === "expanded", - ); + const updatedGraph = markStepComplete(graph, stepId); + writeGraph(this.runDir, updatedGraph); - return { - outcome: allDone ? "milestone-complete" : "continue", - }; + const allDone = updatedGraph.steps.every( + (s) => s.status === "complete" || s.status === "expanded", + ); + + return { + outcome: allDone ? "milestone-complete" : "continue", + }; + }); } /** diff --git a/src/resources/extensions/gsd/dispatch-guard.ts b/src/resources/extensions/gsd/dispatch-guard.ts index c687f1b30..568f872d0 100644 --- a/src/resources/extensions/gsd/dispatch-guard.ts +++ b/src/resources/extensions/gsd/dispatch-guard.ts @@ -107,10 +107,27 @@ export function getPriorSliceCompletionBlocker( // it may be a cross-milestone reference handled elsewhere. } } else { + // Positional fallback is only a heuristic for legacy slices with no + // declared dependencies. Skip any earlier slice that depends on the + // target, directly or transitively, or we can deadlock a valid zero-dep + // slice behind its own downstream dependents (#3720). + const reverseDependents = new Set(); + let changed = true; + while (changed) { + changed = false; + for (const slice of slices) { + if (reverseDependents.has(slice.id)) continue; + if (slice.depends.some((depId) => depId === targetSid || reverseDependents.has(depId))) { + reverseDependents.add(slice.id); + changed = true; + } + } + } + const targetIndex = slices.findIndex((slice) => slice.id === targetSid); const incomplete = slices .slice(0, targetIndex) - .find((slice) => !slice.done); + .find((slice) => !slice.done && !reverseDependents.has(slice.id)); if (incomplete) { return `Cannot dispatch ${unitType} ${unitId}: earlier slice ${targetMid}/${incomplete.id} is not complete.`; } diff --git a/src/resources/extensions/gsd/doctor-engine-checks.ts b/src/resources/extensions/gsd/doctor-engine-checks.ts index 8b74dcac4..e7fc57540 100644 --- a/src/resources/extensions/gsd/doctor-engine-checks.ts +++ b/src/resources/extensions/gsd/doctor-engine-checks.ts @@ -13,6 +13,20 @@ export async function checkEngineHealth( issues: DoctorIssue[], fixesApplied: string[], ): Promise { + const dbPath = join(basePath, ".gsd", "gsd.db"); + + if (!isDbAvailable() && existsSync(dbPath)) { + issues.push({ + severity: "warning", + code: "db_unavailable", + scope: "project", + unitId: "project", + message: "Database unavailable — using filesystem state derivation (degraded mode). State queries may be slower and less reliable.", + file: ".gsd/gsd.db", + fixable: false, + }); + } + // ── DB constraint violation detection (full doctor only, not pre-dispatch per D-10) ── try { if (isDbAvailable()) { diff --git a/src/resources/extensions/gsd/doctor-format.ts b/src/resources/extensions/gsd/doctor-format.ts index 841f7ee13..a22d64e97 100644 --- a/src/resources/extensions/gsd/doctor-format.ts +++ b/src/resources/extensions/gsd/doctor-format.ts @@ -2,6 +2,7 @@ import type { DoctorIssue, DoctorIssueCode, DoctorReport, DoctorSummary } from " function matchesScope(unitId: string, scope?: string): boolean { if (!scope) return true; + if (unitId === "project" || unitId === "environment") return true; return unitId === scope || unitId.startsWith(`${scope}/`) || unitId.startsWith(`${scope}`); } diff --git a/src/resources/extensions/gsd/doctor-types.ts b/src/resources/extensions/gsd/doctor-types.ts index 8c804b3b8..309848048 100644 --- a/src/resources/extensions/gsd/doctor-types.ts +++ b/src/resources/extensions/gsd/doctor-types.ts @@ -78,6 +78,7 @@ export type DoctorIssueCode = | "db_orphaned_slice" | "db_done_task_no_summary" | "db_duplicate_id" + | "db_unavailable" | "projection_drift"; /** diff --git a/src/resources/extensions/gsd/error-classifier.ts b/src/resources/extensions/gsd/error-classifier.ts index 604167451..fc5a543f7 100644 --- a/src/resources/extensions/gsd/error-classifier.ts +++ b/src/resources/extensions/gsd/error-classifier.ts @@ -47,7 +47,7 @@ const RATE_LIMIT_RE = /rate.?limit|too many requests|429/i; const NETWORK_RE = /network|ECONNRESET|ETIMEDOUT|ECONNREFUSED|socket hang up|fetch failed|connection.*reset|dns/i; const SERVER_RE = /internal server error|500|502|503|overloaded|server_error|api_error|service.?unavailable/i; // ECONNRESET/ECONNREFUSED are in NETWORK_RE (same-model retry first). -const CONNECTION_RE = /terminated|connection.?refused|other side closed|EPIPE|network.?(?:is\s+)?unavailable|stream_exhausted(?:_without_result)?/i; +const CONNECTION_RE = /terminated|connection.?(?:refused|error)|other side closed|EPIPE|network.?(?:is\s+)?unavailable|stream_exhausted(?:_without_result)?/i; // Catch-all for V8 JSON.parse errors: all modern variants end with "in JSON at position \d+". // This eliminates the need to enumerate every error message variant individually. const STREAM_RE = /in JSON at position \d+|Unexpected end of JSON|SyntaxError.*JSON/i; diff --git a/src/resources/extensions/gsd/file-lock.ts b/src/resources/extensions/gsd/file-lock.ts new file mode 100644 index 000000000..a40c77854 --- /dev/null +++ b/src/resources/extensions/gsd/file-lock.ts @@ -0,0 +1,59 @@ +import { existsSync } from "node:fs"; + +function _require(name: string) { + try { + return require(name); + } catch { + try { + const gsdPiRequire = require("module").createRequire( + require("path").join(process.cwd(), "node_modules", "gsd-pi", "index.js") + ); + return gsdPiRequire(name); + } catch { + return null; + } + } +} + +export function withFileLockSync(filePath: string, fn: () => T): T { + const lockfile = _require("proper-lockfile"); + if (!lockfile) return fn(); + + if (!existsSync(filePath)) return fn(); + + try { + const release = lockfile.lockSync(filePath, { retries: 5, stale: 10000 }); + try { + return fn(); + } finally { + release(); + } + } catch (err: any) { + if (err.code === "ELOCKED") { + // Could not get lock after retries, let's fallback to un-locked instead of crashing the whole state machine + return fn(); + } + throw err; + } +} + +export async function withFileLock(filePath: string, fn: () => Promise | T): Promise { + const lockfile = _require("proper-lockfile"); + if (!lockfile) return await fn(); + + if (!existsSync(filePath)) return await fn(); + + try { + const release = await lockfile.lock(filePath, { retries: 5, stale: 10000 }); + try { + return await fn(); + } finally { + await release(); + } + } catch (err: any) { + if (err.code === "ELOCKED") { + return await fn(); + } + throw err; + } +} diff --git a/src/resources/extensions/gsd/guided-flow.ts b/src/resources/extensions/gsd/guided-flow.ts index b73ad122d..53f76915f 100644 --- a/src/resources/extensions/gsd/guided-flow.ts +++ b/src/resources/extensions/gsd/guided-flow.ts @@ -48,30 +48,14 @@ import { DISCUSS_TOOLS_ALLOWLIST } from "./constants.js"; import { getWorkflowTransportSupportError, getRequiredWorkflowToolsForGuidedUnit, + supportsStructuredQuestions, } from "./workflow-mcp.js"; import { runPreparation, formatCodebaseBrief, formatPriorContextBrief, - formatEcosystemBrief, - type PreparationResult, } from "./preparation.js"; -// ─── Preparation result storage ───────────────────────────────────────────── -// Stores the most recent preparation result for injection into discuss prompts. -// S02 will consume this when building the prepared discussion prompt. -let lastPreparationResult: PreparationResult | null = null; - -/** Get the most recent preparation result (for S02 prompt building). */ -export function getLastPreparationResult(): PreparationResult | null { - return lastPreparationResult; -} - -/** Clear the preparation result (called after discussion completes). */ -export function clearPreparationResult(): void { - lastPreparationResult = null; -} - // ─── Re-exports (preserve public API for existing importers) ──────────────── export { MILESTONE_ID_RE, generateMilestoneSuffix, nextMilestoneId, @@ -311,6 +295,7 @@ async function dispatchWorkflow( const result = await selectAndApplyModel( ctx, pi, unitType, /* unitId */ "", /* basePath */ process.cwd(), prefs, /* verbose */ false, /* autoModeStartModel */ null, + /* retryContext */ undefined, /* isAutoMode */ false, ); if (result.appliedModel) { debugLog("guided-flow-model-applied", { @@ -384,6 +369,20 @@ async function dispatchWorkflow( } } +function getStructuredQuestionsAvailability( + pi: ExtensionAPI, + ctx: ExtensionContext | undefined, +): "true" | "false" { + if (!ctx) return "false"; + + const provider = ctx.model?.provider; + const authMode = provider ? ctx.modelRegistry.getProviderAuthMode(provider) : undefined; + return supportsStructuredQuestions(pi.getActiveTools(), { + authMode, + baseUrl: ctx.model?.baseUrl, + }) ? "true" : "false"; +} + /** * Resolve a model ID string to a model object from available models. * Handles "provider/model" and bare ID formats. @@ -427,8 +426,9 @@ function resolveAvailableModel( * Build the discuss-and-plan prompt for a new milestone. * Used by all three "new milestone" paths (first ever, no active, all complete). */ -function buildDiscussPrompt(nextId: string, preamble: string, _basePath: string): string { +function buildDiscussPrompt(nextId: string, preamble: string, _basePath: string, pi: ExtensionAPI, ctx: ExtensionCommandContext, preparationContext?: string): string { const milestoneRel = `.gsd/milestones/${nextId}`; + const structuredQuestionsAvailable = getStructuredQuestionsAvailability(pi, ctx); const inlinedTemplates = [ inlineTemplate("project", "Project"), inlineTemplate("requirements", "Requirements"), @@ -439,6 +439,8 @@ function buildDiscussPrompt(nextId: string, preamble: string, _basePath: string) return loadPrompt("discuss", { milestoneId: nextId, preamble, + preparationContext: preparationContext ?? "", + structuredQuestionsAvailable, contextPath: `${milestoneRel}/${nextId}-CONTEXT.md`, roadmapPath: `${milestoneRel}/${nextId}-ROADMAP.md`, inlinedTemplates, @@ -471,59 +473,12 @@ function buildHeadlessDiscussPrompt(nextId: string, seedContext: string, _basePa }); } -/** - * Build the prepared discuss prompt with brief injection. - * Uses the discuss-prepared template which encodes the 4-layer discussion protocol. - * - * @param nextId - The milestone ID being discussed - * @param preamble - Preamble text for the discuss prompt - * @param _basePath - Root directory of the project (unused, kept for signature consistency) - * @param prepResult - Preparation result containing briefs to inject - * @returns The prepared discuss prompt string - */ -function buildPreparedPrompt( - nextId: string, - preamble: string, - _basePath: string, - prepResult: PreparationResult, -): string { - const milestoneRel = `.gsd/milestones/${nextId}`; - - // Use context-enhanced instead of context for prepared discussions - const inlinedTemplates = [ - inlineTemplate("project", "Project"), - inlineTemplate("requirements", "Requirements"), - inlineTemplate("context-enhanced", "Context Enhanced"), - inlineTemplate("roadmap", "Roadmap"), - inlineTemplate("decisions", "Decisions"), - ].join("\n\n---\n\n"); - - // Format the briefs from the preparation result - const codebaseBrief = prepResult.codebaseBrief || formatCodebaseBrief(prepResult.codebase); - const priorContextBrief = prepResult.priorContextBrief || formatPriorContextBrief(prepResult.priorContext); - const ecosystemBrief = prepResult.ecosystemBrief || formatEcosystemBrief(prepResult.ecosystem); - - return loadPrompt("discuss-prepared", { - milestoneId: nextId, - preamble, - codebaseBrief, - priorContextBrief, - ecosystemBrief, - contextPath: `${milestoneRel}/${nextId}-CONTEXT.md`, - roadmapPath: `${milestoneRel}/${nextId}-ROADMAP.md`, - inlinedTemplates, - commitInstruction: buildDocsCommitInstruction(`docs(${nextId}): context, requirements, and roadmap`), - multiMilestoneCommitInstruction: buildDocsCommitInstruction("docs: project plan — N milestones"), - }); -} - /** * Run preparation phase if enabled, then build the discuss prompt. - * This is the main entry point for new milestone discussions with preparation. - * Stores the preparation result for S02 to inject into the discuss prompt. - * - * When preparation succeeds, uses the discuss-prepared template with brief injection. - * Falls back to the standard discuss template when preparation is disabled or fails. + * Preparation analyzes the codebase and prior context, injecting the results + * as supplementary context into the standard discuss template. The discuss + * template drives the conversation (asks "What's the vision?" first), while + * the preparation briefs give the agent grounding in the existing codebase. * * @param ctx - Extension command context with UI for progress notifications * @param nextId - The milestone ID being discussed @@ -533,18 +488,18 @@ function buildPreparedPrompt( */ async function prepareAndBuildDiscussPrompt( ctx: ExtensionCommandContext, + pi: ExtensionAPI, nextId: string, preamble: string, basePath: string, ): Promise { - // Clear stale preparation result immediately to prevent cross-session/project - // state leaks. This ensures data from a prior milestone/project never leaks - // into subsequent discussions (adversarial review fix #3602). - lastPreparationResult = null; - const prefs = loadEffectiveGSDPreferences()?.preferences ?? {}; - // Run preparation if enabled (default: true) + // Run preparation if enabled (default: true) — results are injected as + // supplementary context into the standard discuss prompt, NOT as a + // replacement template. The discuss prompt always leads with "What's the + // vision?" so the user defines the scope, not the codebase analysis. + let preparationContext = ""; if (prefs.discuss_preparation !== false) { try { const prepResult = await runPreparation(basePath, ctx.ui, { @@ -552,21 +507,23 @@ async function prepareAndBuildDiscussPrompt( discuss_web_research: prefs.discuss_web_research, discuss_depth: prefs.discuss_depth, }); - lastPreparationResult = prepResult; - // Use prepared prompt if preparation was enabled and produced results if (prepResult.enabled) { - return buildPreparedPrompt(nextId, preamble, basePath, prepResult); + const codebaseBrief = prepResult.codebaseBrief || formatCodebaseBrief(prepResult.codebase); + const priorContextBrief = prepResult.priorContextBrief || formatPriorContextBrief(prepResult.priorContext); + const parts: string[] = []; + if (codebaseBrief) parts.push(`### Codebase Brief\n\n${codebaseBrief}`); + if (priorContextBrief) parts.push(`### Prior Context Brief\n\n${priorContextBrief}`); + if (parts.length > 0) { + preparationContext = `\n\n## Preparation Context\n\nThe system analyzed the codebase before this discussion. Use these findings as background context — they describe what already exists, NOT what the user wants to build. Always ask the user what they want to build first.\n\n${parts.join("\n\n")}`; + } } - } catch { - // If preparation throws, ensure stale data doesn't persist - lastPreparationResult = null; + } catch (err) { + logWarning("guided", `preparation failed, proceeding without context: ${(err as Error).message}`); } } - // Fall back to standard discuss prompt for backward compatibility - // lastPreparationResult is already null (cleared at entry or on error) - return buildDiscussPrompt(nextId, preamble, basePath); + return buildDiscussPrompt(nextId, preamble, basePath, pi, ctx, preparationContext); } /** @@ -801,7 +758,7 @@ export async function showDiscuss( if (choice === "discuss_draft") { const discussMilestoneTemplates = inlineTemplate("context", "Context"); - const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const structuredQuestionsAvailable = getStructuredQuestionsAvailability(pi, ctx); const basePrompt = loadPrompt("guided-discuss-milestone", { milestoneId: mid, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable, commitInstruction: buildDocsCommitInstruction(`docs(${mid}): milestone context from discuss`), @@ -814,7 +771,7 @@ export async function showDiscuss( await dispatchWorkflow(pi, seed, "gsd-discuss", ctx, "discuss-milestone"); } else if (choice === "discuss_fresh") { const discussMilestoneTemplates = inlineTemplate("context", "Context"); - const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const structuredQuestionsAvailable = getStructuredQuestionsAvailability(pi, ctx); pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: mid, step: false, createdAt: Date.now() }); await dispatchWorkflow(pi, loadPrompt("guided-discuss-milestone", { milestoneId: mid, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable, @@ -826,7 +783,7 @@ export async function showDiscuss( const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids; const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds); pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: false, createdAt: Date.now() }); - await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, `New milestone ${nextId}.`, basePath), "gsd-run", ctx, "discuss-milestone"); + await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, pi, nextId, `New milestone ${nextId}.`, basePath), "gsd-run", ctx, "discuss-milestone"); } return; } @@ -972,7 +929,7 @@ export async function showDiscuss( if (confirm !== "rediscuss") continue; } - const sqAvail = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const sqAvail = getStructuredQuestionsAvailability(pi, ctx); const prompt = await buildDiscussSlicePrompt(mid, chosen.id, chosen.title, basePath, { rediscuss: isRediscuss, structuredQuestionsAvailable: sqAvail }); await dispatchWorkflow(pi, prompt, "gsd-discuss", ctx, "discuss-slice"); @@ -1082,7 +1039,7 @@ async function dispatchDiscussForMilestone( ].join("\n") : ""; const discussMilestoneTemplates = inlineTemplate("context", "Context"); - const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const structuredQuestionsAvailable = getStructuredQuestionsAvailability(pi, ctx); const basePrompt = loadPrompt("guided-discuss-milestone", { milestoneId: mid, milestoneTitle, @@ -1231,7 +1188,7 @@ async function handleMilestoneActions( const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids; const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds); pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() }); - await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, + await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, pi, nextId, `New milestone ${nextId}.`, basePath ), "gsd-run", ctx, "discuss-milestone"); @@ -1421,7 +1378,7 @@ export async function showSmartEntry( if (isFirst) { // First ever — skip wizard, just ask directly pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() }); - await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, + await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, pi, nextId, `New project, milestone ${nextId}. Do NOT read or explore .gsd/ — it's empty scaffolding.`, basePath ), "gsd-run", ctx, "discuss-milestone"); @@ -1442,7 +1399,7 @@ export async function showSmartEntry( if (choice === "new_milestone") { pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() }); - await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, + await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, pi, nextId, `New milestone ${nextId}.`, basePath ), "gsd-run", ctx, "discuss-milestone"); @@ -1481,7 +1438,7 @@ export async function showSmartEntry( const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds); pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() }); - await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, + await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, pi, nextId, `New milestone ${nextId}.`, basePath ), "gsd-run", ctx, "discuss-milestone"); @@ -1523,7 +1480,7 @@ export async function showSmartEntry( if (choice === "discuss_draft") { const discussMilestoneTemplates = inlineTemplate("context", "Context"); - const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const structuredQuestionsAvailable = getStructuredQuestionsAvailability(pi, ctx); const basePrompt = loadPrompt("guided-discuss-milestone", { milestoneId, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable, commitInstruction: buildDocsCommitInstruction(`docs(${milestoneId}): milestone context from discuss`), @@ -1536,7 +1493,7 @@ export async function showSmartEntry( await dispatchWorkflow(pi, seed, "gsd-discuss", ctx, "discuss-milestone"); } else if (choice === "discuss_fresh") { const discussMilestoneTemplates = inlineTemplate("context", "Context"); - const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const structuredQuestionsAvailable = getStructuredQuestionsAvailability(pi, ctx); pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId, step: stepMode, createdAt: Date.now() }); await dispatchWorkflow(pi, loadPrompt("guided-discuss-milestone", { milestoneId, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable, @@ -1548,7 +1505,7 @@ export async function showSmartEntry( const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids; const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds); pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() }); - await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, + await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, pi, nextId, `New milestone ${nextId}.`, basePath ), "gsd-run", ctx, "discuss-milestone"); @@ -1634,7 +1591,7 @@ export async function showSmartEntry( }), "gsd-run", ctx, "plan-milestone"); } else if (choice === "discuss") { const discussMilestoneTemplates = inlineTemplate("context", "Context"); - const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const structuredQuestionsAvailable = getStructuredQuestionsAvailability(pi, ctx); await dispatchWorkflow(pi, loadPrompt("guided-discuss-milestone", { milestoneId, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable, commitInstruction: buildDocsCommitInstruction(`docs(${milestoneId}): milestone context from discuss`), @@ -1645,7 +1602,7 @@ export async function showSmartEntry( const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids; const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds); pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() }); - await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, + await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, pi, nextId, `New milestone ${nextId}.`, basePath ), "gsd-run", ctx, "discuss-milestone"); @@ -1774,7 +1731,7 @@ export async function showSmartEntry( }), }), "gsd-run", ctx, "plan-slice"); } else if (choice === "discuss") { - const sqAvail = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const sqAvail = getStructuredQuestionsAvailability(pi, ctx); await dispatchWorkflow(pi, await buildDiscussSlicePrompt(milestoneId, sliceId, sliceTitle, basePath, { rediscuss: hasContext, structuredQuestionsAvailable: sqAvail }), "gsd-run", ctx, "discuss-slice"); } else if (choice === "research") { const researchTemplates = inlineTemplate("research", "Research"); diff --git a/src/resources/extensions/gsd/init-wizard.ts b/src/resources/extensions/gsd/init-wizard.ts index 40f3e5b64..b7251471e 100644 --- a/src/resources/extensions/gsd/init-wizard.ts +++ b/src/resources/extensions/gsd/init-wizard.ts @@ -274,19 +274,9 @@ export async function showProjectInit( // Non-fatal — STATE.md will be regenerated on next /gsd invocation } - if (ctx.model?.provider === "claude-code") { - try { - const { ensureProjectWorkflowMcpConfig } = await import("./mcp-project-config.js"); - const result = ensureProjectWorkflowMcpConfig(basePath); - if (result.status !== "unchanged") { - ctx.ui.notify(`Claude Code MCP prepared at ${result.configPath}`, "info"); - } - } catch (err) { - ctx.ui.notify( - `Claude Code MCP prep failed: ${err instanceof Error ? err.message : String(err)}`, - "warning", - ); - } + { + const { prepareWorkflowMcpForProject } = await import("./workflow-mcp-auto-prep.js"); + prepareWorkflowMcpForProject(ctx, basePath); } ctx.ui.notify("GSD initialized. Starting your first milestone...", "info"); diff --git a/src/resources/extensions/gsd/model-router.ts b/src/resources/extensions/gsd/model-router.ts index 0efbbf9b6..cc915877a 100644 --- a/src/resources/extensions/gsd/model-router.ts +++ b/src/resources/extensions/gsd/model-router.ts @@ -5,6 +5,9 @@ import type { ComplexityTier, ClassificationResult, TaskMetadata } from "./complexity-classifier.js"; import { tierOrdinal } from "./complexity-classifier.js"; import type { ResolvedModelConfig } from "./preferences.js"; +import { getProviderCapabilities, type ProviderCapabilities } from "@gsd/pi-ai"; +import { getToolCompatibility, getAllToolCompatibility } from "@gsd/pi-coding-agent"; +import type { ToolCompatibility } from "@gsd/pi-coding-agent"; // ─── Types ─────────────────────────────────────────────────────────────────── @@ -37,6 +40,8 @@ export interface RoutingDecision { selectionMethod: "tier-only" | "capability-scored"; /** Capability scores per eligible model (capability-scored path only) */ capabilityScores?: Record; + /** Tools filtered out due to provider incompatibility (ADR-005) */ + filteredTools?: string[]; /** Task requirement vector used for scoring */ taskRequirements?: Partial>; } @@ -58,7 +63,7 @@ export interface ModelCapabilities { // Maps known model IDs to their capability tier. Used when tier_models is not // explicitly configured to pick the best available model for each tier. -const MODEL_CAPABILITY_TIER: Record = { +export const MODEL_CAPABILITY_TIER: Record = { // Light-tier models (cheapest) "claude-haiku-4-5": "light", "claude-3-5-haiku-latest": "light", @@ -139,15 +144,49 @@ const MODEL_COST_PER_1K_INPUT: Record = { // model selection within an eligible tier set. export const MODEL_CAPABILITY_PROFILES: Record = { - "claude-opus-4-6": { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 }, - "claude-sonnet-4-6": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 }, - "claude-haiku-4-5": { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 }, - "gpt-4o": { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 }, - "gpt-4o-mini": { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 }, - "gemini-2.5-pro": { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 }, - "gemini-2.0-flash": { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 }, - "deepseek-chat": { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 }, - "o3": { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 }, + // ── Anthropic ────────────────────────────────────────────────────────────── + "claude-opus-4-6": { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 }, + "claude-sonnet-4-6": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 }, + "claude-sonnet-4-5-20250514": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 }, + "claude-3-5-sonnet-latest": { coding: 82, debugging: 78, research: 72, reasoning: 78, speed: 62, longContext: 70, instruction: 82 }, + "claude-haiku-4-5": { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 }, + "claude-3-5-haiku-latest": { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 }, + "claude-3-haiku-20240307": { coding: 50, debugging: 40, research: 35, reasoning: 40, speed: 95, longContext: 40, instruction: 65 }, + "claude-3-opus-latest": { coding: 90, debugging: 85, research: 82, reasoning: 90, speed: 35, longContext: 75, instruction: 88 }, + + // ── OpenAI GPT ───────────────────────────────────────────────────────────── + "gpt-4o": { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 }, + "gpt-4o-mini": { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 }, + "gpt-4-turbo": { coding: 78, debugging: 72, research: 68, reasoning: 72, speed: 50, longContext: 65, instruction: 78 }, + "gpt-4.1": { coding: 82, debugging: 78, research: 72, reasoning: 78, speed: 62, longContext: 72, instruction: 82 }, + "gpt-4.1-mini": { coding: 58, debugging: 48, research: 42, reasoning: 48, speed: 88, longContext: 48, instruction: 72 }, + "gpt-4.1-nano": { coding: 40, debugging: 30, research: 25, reasoning: 30, speed: 95, longContext: 30, instruction: 60 }, + "gpt-5": { coding: 92, debugging: 88, research: 85, reasoning: 92, speed: 40, longContext: 85, instruction: 90 }, + "gpt-5-mini": { coding: 62, debugging: 52, research: 48, reasoning: 52, speed: 88, longContext: 52, instruction: 74 }, + "gpt-5-nano": { coding: 42, debugging: 32, research: 28, reasoning: 32, speed: 95, longContext: 32, instruction: 62 }, + "gpt-5-pro": { coding: 94, debugging: 90, research: 88, reasoning: 94, speed: 35, longContext: 88, instruction: 92 }, + "gpt-5.1": { coding: 93, debugging: 89, research: 86, reasoning: 93, speed: 42, longContext: 86, instruction: 91 }, + "gpt-5.1-codex-max": { coding: 90, debugging: 85, research: 70, reasoning: 85, speed: 55, longContext: 75, instruction: 85 }, + "gpt-5.1-codex-mini": { coding: 65, debugging: 55, research: 40, reasoning: 50, speed: 88, longContext: 48, instruction: 72 }, + "gpt-5.2": { coding: 93, debugging: 90, research: 87, reasoning: 93, speed: 42, longContext: 87, instruction: 91 }, + "gpt-5.2-codex": { coding: 93, debugging: 90, research: 72, reasoning: 88, speed: 50, longContext: 78, instruction: 88 }, + "gpt-5.3-codex": { coding: 94, debugging: 91, research: 74, reasoning: 89, speed: 50, longContext: 80, instruction: 89 }, + "gpt-5.3-codex-spark": { coding: 68, debugging: 58, research: 42, reasoning: 52, speed: 90, longContext: 50, instruction: 74 }, + "gpt-5.4": { coding: 95, debugging: 92, research: 88, reasoning: 94, speed: 42, longContext: 88, instruction: 92 }, + + // ── OpenAI o-series (reasoning-first) ────────────────────────────────────── + "o1": { coding: 78, debugging: 82, research: 78, reasoning: 90, speed: 20, longContext: 65, instruction: 82 }, + "o3": { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 }, + "o4-mini": { coding: 75, debugging: 80, research: 72, reasoning: 88, speed: 60, longContext: 65, instruction: 80 }, + "o4-mini-deep-research": { coding: 75, debugging: 80, research: 85, reasoning: 88, speed: 30, longContext: 80, instruction: 80 }, + + // ── Google ───────────────────────────────────────────────────────────────── + "gemini-2.5-pro": { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 }, + "gemini-2.0-flash": { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 }, + "gemini-flash-2.0": { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 }, + + // ── DeepSeek ─────────────────────────────────────────────────────────────── + "deepseek-chat": { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 }, }; // ─── Base Task Requirements Data Table ─────────────────────────────────────── @@ -502,3 +541,71 @@ function getModelCost(modelId: string): number { // Unknown cost — assume expensive to avoid routing to unknown cheap models return 999; } + +// ─── Tool Compatibility Filter (ADR-005 Phase 3) ─────────────────────────── + +/** + * Check if a tool is compatible with a provider's capabilities. + * Returns true if the tool can be used with the provider. + */ +export function isToolCompatibleWithProvider( + toolName: string, + providerCaps: ProviderCapabilities, +): boolean { + const compat = getToolCompatibility(toolName); + if (!compat) return true; // no metadata = always compatible + + // Hard filter: provider doesn't support image tool results + if (compat.producesImages && !providerCaps.imageToolResults) return false; + + // Hard filter: tool uses schema features provider doesn't support + if (compat.schemaFeatures?.some(f => providerCaps.unsupportedSchemaFeatures.includes(f))) { + return false; + } + + return true; +} + +/** + * Filter a list of tool names to only those compatible with a provider. + * Used by the routing pipeline to adjust tool sets when switching providers. + */ +export function filterToolsForProvider( + toolNames: string[], + providerApi: string, +): { compatible: string[]; filtered: string[] } { + const providerCaps = getProviderCapabilities(providerApi); + + // Provider doesn't support tool calling at all + if (!providerCaps.toolCalling) { + return { compatible: [], filtered: toolNames }; + } + + const compatible: string[] = []; + const filtered: string[] = []; + + for (const name of toolNames) { + if (isToolCompatibleWithProvider(name, providerCaps)) { + compatible.push(name); + } else { + filtered.push(name); + } + } + + return { compatible, filtered }; +} + +/** + * Adjust the active tool set for a selected model's provider capabilities. + * Returns tool names that should be active — removes incompatible tools. + * + * This is a hard filter only — it removes tools that would fail at the + * provider level. It does NOT remove tools based on soft heuristics. + */ +export function adjustToolSet( + activeToolNames: string[], + selectedModelApi: string, +): { toolNames: string[]; removedTools: string[] } { + const { compatible, filtered } = filterToolsForProvider(activeToolNames, selectedModelApi); + return { toolNames: compatible, removedTools: filtered }; +} diff --git a/src/resources/extensions/gsd/notification-store.ts b/src/resources/extensions/gsd/notification-store.ts index d79d4a33c..50484597f 100644 --- a/src/resources/extensions/gsd/notification-store.ts +++ b/src/resources/extensions/gsd/notification-store.ts @@ -26,12 +26,15 @@ export interface NotificationEntry { const MAX_ENTRIES = 500; const FILENAME = "notifications.jsonl"; const LOCKFILE = "notifications.lock"; +const DEDUP_WINDOW_MS = 30_000; +const DEDUP_PRUNE_THRESHOLD = 200; // ─── Module State ─────────────────────────────────────────────────────── let _basePath: string | null = null; let _lineCount = 0; // Hint for rotation — not authoritative for public API let _suppressCount = 0; +let _recentMessageTimestamps = new Map(); // ─── Public API ───────────────────────────────────────────────────────── @@ -40,6 +43,9 @@ let _suppressCount = 0; * project root. Seeds in-memory counters from the existing file on disk. */ export function initNotificationStore(basePath: string): void { + if (_basePath !== basePath) { + _recentMessageTimestamps.clear(); + } _basePath = basePath; // Seed line count hint for rotation — public counters read from disk _lineCount = _readEntriesFromDisk(basePath).length; @@ -56,12 +62,23 @@ export function appendNotification( ): void { if (!_basePath) return; if (_suppressCount > 0) return; + const persistedMessage = message.length > 500 ? message.slice(0, 500) + "…" : message; + const dedupKey = `${_basePath}:${severity}:${source}:${persistedMessage}`; + const now = Date.now(); + const lastSeen = _recentMessageTimestamps.get(dedupKey); + if (lastSeen !== undefined && now - lastSeen < DEDUP_WINDOW_MS) return; + _recentMessageTimestamps.set(dedupKey, now); + if (_recentMessageTimestamps.size > DEDUP_PRUNE_THRESHOLD) { + for (const [key, ts] of _recentMessageTimestamps) { + if (now - ts > DEDUP_WINDOW_MS) _recentMessageTimestamps.delete(key); + } + } const entry: NotificationEntry = { id: randomUUID(), ts: new Date().toISOString(), severity, - message: message.length > 500 ? message.slice(0, 500) + "…" : message, + message: persistedMessage, source, read: false, }; @@ -181,6 +198,7 @@ export function _resetNotificationStore(): void { _basePath = null; _lineCount = 0; _suppressCount = 0; + _recentMessageTimestamps = new Map(); } // ─── Internal ─────────────────────────────────────────────────────────── diff --git a/src/resources/extensions/gsd/notification-widget.ts b/src/resources/extensions/gsd/notification-widget.ts index 8a963be5e..648e2af65 100644 --- a/src/resources/extensions/gsd/notification-widget.ts +++ b/src/resources/extensions/gsd/notification-widget.ts @@ -25,7 +25,7 @@ export function buildNotificationWidgetLines(): string[] { ? latest.message.slice(0, msgMax - 1) + "…" : latest.message; - return [` ${icon} [${badge}] ${truncated} (${formatShortcut("Ctrl+Alt+N")} to view)`]; + return [` ${icon} [${badge}] ${truncated} (${formatShortcut("Ctrl+Alt+N")} or /gsd notifications)`]; } // ─── Widget init ──────────────────────────────────────────────────────── diff --git a/src/resources/extensions/gsd/pre-execution-checks.ts b/src/resources/extensions/gsd/pre-execution-checks.ts index ed10ba50b..5e77bca85 100644 --- a/src/resources/extensions/gsd/pre-execution-checks.ts +++ b/src/resources/extensions/gsd/pre-execution-checks.ts @@ -20,6 +20,8 @@ import { resolve } from "node:path"; import type { TaskRow } from "./gsd-db.ts"; import type { PreExecutionCheckJSON } from "./verification-evidence.ts"; +const NPM_COMMAND = process.platform === "win32" ? "npm.cmd" : "npm"; + // ─── Result Types ──────────────────────────────────────────────────────────── export interface PreExecutionResult { @@ -126,9 +128,10 @@ async function checkPackageOnNpm( timeoutMs = 5000 ): Promise<{ exists: boolean; error?: string }> { return new Promise((resolve) => { - const child = spawn("npm", ["view", packageName, "name"], { + const child = spawn(NPM_COMMAND, ["view", packageName, "name"], { stdio: ["ignore", "pipe", "pipe"], timeout: timeoutMs, + shell: process.platform === "win32", }); let stdout = ""; @@ -263,9 +266,9 @@ function extractPathFromAnnotation(raw: string): string { const trimmed = raw.trim(); if (!trimmed) return trimmed; - const backtickMatch = trimmed.match(/^`([^`]+)`(?:\s+[—–-]\s+.*)?$/); + const backtickMatch = trimmed.match(/^(`+)([^`]+)\1(?:(?:\s+[—–-]\s+.+)|(?:\s+\([^()]+\)))?$/); if (backtickMatch) { - return backtickMatch[1].trim(); + return backtickMatch[2].trim(); } const annotatedMatch = trimmed.match(/^(.+?)\s+[—–-]\s+.+$/); @@ -277,6 +280,38 @@ function extractPathFromAnnotation(raw: string): string { return trimmed.replace(/`/g, ""); } +/** + * Planning units sometimes use task.inputs for prose like "Current enum shape" + * instead of concrete file paths. Those entries should not fail path checks. + * Keep validation for anything that still looks like a real file reference: + * explicit backticks, globs, separators, dot-paths, or single-token basenames + * like Dockerfile. + */ +function shouldValidateInputAsPath(raw: string): boolean { + const trimmed = raw.trim(); + if (!trimmed) return false; + + if (/^`+[^`]+`+/.test(trimmed)) { + return true; + } + + const candidate = extractPathFromAnnotation(trimmed); + if (!candidate) return false; + + if (!/\s/.test(candidate)) { + return true; + } + + return ( + candidate.startsWith("/") || + candidate.startsWith("./") || + candidate.startsWith("../") || + candidate.startsWith("~/") || + /[\\/]/.test(candidate) || + /[*?[\]{}]/.test(candidate) + ); +} + /** * Build a set of files that will be created by tasks up to (but not including) taskIndex. * All paths are normalized for consistent comparison. @@ -315,6 +350,7 @@ export function checkFilePathConsistency( for (const file of filesToCheck) { // Skip empty strings if (!file.trim()) continue; + if (!shouldValidateInputAsPath(file)) continue; // Normalize path for consistent comparison const normalizedFile = normalizeFilePath(file); @@ -351,7 +387,7 @@ export function checkFilePathConsistency( */ export function checkTaskOrdering( tasks: TaskRow[], - _basePath: string + basePath: string ): PreExecutionCheckJSON[] { const results: PreExecutionCheckJSON[] = []; @@ -375,9 +411,13 @@ export function checkTaskOrdering( const filesToCheck = [...task.inputs]; for (const file of filesToCheck) { + if (!shouldValidateInputAsPath(file)) continue; + const normalizedFile = normalizeFilePath(file); const creator = fileCreators.get(normalizedFile); - if (creator && creator.index > i) { + const absolutePath = resolve(basePath, normalizedFile); + const existsOnDisk = existsSync(absolutePath); + if (creator && creator.index > i && !existsOnDisk) { // Task reads file that is created later — impossible ordering results.push({ category: "file", diff --git a/src/resources/extensions/gsd/preferences-types.ts b/src/resources/extensions/gsd/preferences-types.ts index 58b847cc9..47ed0c12b 100644 --- a/src/resources/extensions/gsd/preferences-types.ts +++ b/src/resources/extensions/gsd/preferences-types.ts @@ -20,7 +20,7 @@ import type { ReactiveExecutionConfig, GateEvaluationConfig, } from "./types.js"; -import type { DynamicRoutingConfig } from "./model-router.js"; +import type { DynamicRoutingConfig, ModelCapabilities } from "./model-router.js"; export interface ContextManagementConfig { observation_masking?: boolean; // default: true @@ -255,6 +255,8 @@ export interface GSDPreferences { post_unit_hooks?: PostUnitHookConfig[]; pre_dispatch_hooks?: PreDispatchHookConfig[]; dynamic_routing?: DynamicRoutingConfig; + /** Per-model capability overrides. Deep-merged with built-in profiles for capability-aware routing (ADR-004). */ + modelOverrides?: Record }>; context_management?: ContextManagementConfig; token_profile?: TokenProfile; phases?: PhaseSkipPreferences; diff --git a/src/resources/extensions/gsd/prompt-validation.ts b/src/resources/extensions/gsd/prompt-validation.ts deleted file mode 100644 index df2463a98..000000000 --- a/src/resources/extensions/gsd/prompt-validation.ts +++ /dev/null @@ -1,88 +0,0 @@ -/** - * GSD Prompt Validation — Validates enhanced context output before writing. - * - * Implements R109 validation requirement: CONTEXT.md must have required sections - * before being written to disk. - */ - -/** - * Result of validating enhanced context output. - */ -export interface ValidationResult { - /** Whether all required sections are present. */ - valid: boolean; - /** List of missing required sections. */ - missing: string[]; -} - -/** - * Validate that enhanced context content has all required sections. - * - * Required sections per R109: - * - Scope section (## Scope, ## Milestone Scope, or ## Why This Milestone) - * - Architectural Decisions section (## Architectural Decisions) - * - Acceptance Criteria section (## Acceptance Criteria or ## Final Integrated Acceptance) - * - * Additionally validates that the Architectural Decisions section contains - * at least one decision entry (### heading or **Decision marker). - * - * @param content - The enhanced context markdown content - * @returns ValidationResult with valid flag and list of missing sections - */ -export function validateEnhancedContext(content: string): ValidationResult { - const missing: string[] = []; - - // Required section 1: Scope (multiple acceptable header variants) - const hasScopeSection = - /^## Scope\b/m.test(content) || - /^## Milestone Scope\b/m.test(content) || - /^## Why This Milestone\b/m.test(content); - - if (!hasScopeSection) { - missing.push("Milestone Scope or Why This Milestone"); - } - - // Required section 2: Architectural Decisions - const hasArchitecturalDecisions = /^## Architectural Decisions\b/m.test(content); - if (!hasArchitecturalDecisions) { - missing.push("Architectural Decisions"); - } - - // Required section 3: Acceptance Criteria (multiple acceptable header variants) - const hasAcceptanceCriteria = - /^## Acceptance Criteria\b/m.test(content) || - /^## Final Integrated Acceptance\b/m.test(content); - - if (!hasAcceptanceCriteria) { - missing.push("Acceptance Criteria"); - } - - // Additional validation: Architectural Decisions must have at least one entry - if (hasArchitecturalDecisions) { - // Extract the section content between ## Architectural Decisions and the next ## heading. - // Uses indexOf-based extraction instead of regex with \z (which is invalid in JavaScript - // regex — it's PCRE/Ruby syntax and JS treats it as literal 'z'). - const sectionStart = content.indexOf("## Architectural Decisions"); - if (sectionStart === -1) { - missing.push("Architectural Decisions"); - } else { - const afterHeading = content.slice(sectionStart + "## Architectural Decisions".length); - const nextSection = afterHeading.search(/^## /m); - const sectionContent = nextSection === -1 ? afterHeading : afterHeading.slice(0, nextSection); - - // Check for actual decision entries: - // - ### heading (subsection per decision) - // - **Decision marker (inline decision format) - const hasDecisionEntry = /^### /m.test(sectionContent) || /^\*\*Decision/m.test(sectionContent); - - if (!hasDecisionEntry) { - missing.push("At least one architectural decision entry"); - } - } - } - - return { - valid: missing.length === 0, - missing, - }; -} diff --git a/src/resources/extensions/gsd/prompts/complete-slice.md b/src/resources/extensions/gsd/prompts/complete-slice.md index 86c271298..746729d82 100644 --- a/src/resources/extensions/gsd/prompts/complete-slice.md +++ b/src/resources/extensions/gsd/prompts/complete-slice.md @@ -21,7 +21,7 @@ All relevant context has been preloaded below — the slice plan, all task summa Then: 1. Use the **Slice Summary** and **UAT** output templates from the inlined context above 2. {{skillActivation}} -3. Run all slice-level verification checks defined in the slice plan. All must pass before marking the slice done. If any fail, fix them first. +3. Run all slice-level verification checks defined in the slice plan. All must pass before marking the slice done. If any fail, fix them first. Task artifacts use a **flat file layout** directly inside `tasks/` (for example `T01-SUMMARY.md`, `T02-SUMMARY.md`) rather than per-task subdirectories. If you need to count or re-read task summaries during verification, use `find .gsd/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks -name "*-SUMMARY.md"` or `ls .gsd/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks/*-SUMMARY.md`. Never use `tasks/*/SUMMARY.md` — that glob expects subdirectories that do not exist. 4. If the slice plan includes observability/diagnostic surfaces, confirm they work. Skip this for simple slices that don't have observability sections. 5. If the slice involved runtime behavior, fill the **Operational Readiness** section (Q8) in the slice summary: health signal, failure signal, recovery procedure, and monitoring gaps. Omit entirely for simple slices with no runtime concerns. 6. If this slice produced evidence that a requirement changed status (Active → Validated, Active → Deferred, etc.), call `gsd_requirement_update` with the requirement ID, updated `status`, and `validation` evidence. Do NOT write `.gsd/REQUIREMENTS.md` directly — the engine renders it from the database. @@ -35,7 +35,7 @@ Then: **Autonomous execution:** Do not call `ask_user_questions` or `secure_env_collect`. You are running in auto-mode — there is no human available to answer questions. Make reasonable assumptions and document them in the slice summary. If a decision genuinely requires human input, note it in the summary and proceed with the best available option. -**File system safety:** Task summaries are preloaded in the inlined context above. If you need to re-read any of them, use `find .gsd/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks -name "*-SUMMARY.md"` to list file paths first — never pass `{{slicePath}}` or any other directory path directly to the `read` tool. The `read` tool only accepts file paths, not directories. +**File system safety:** Task summaries are preloaded in the inlined context above. Task artifacts use a **flat file layout** — files such as `T01-SUMMARY.md` and `T02-SUMMARY.md` live directly inside the `tasks/` directory, not inside per-task subdirectories like `tasks/T01/SUMMARY.md`. If you need to re-read any of them, use `find .gsd/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks -name "*-SUMMARY.md"` to list file paths first. Never use `tasks/*/SUMMARY.md`, and never pass `{{slicePath}}` or any other directory path directly to the `read` tool. The `read` tool only accepts file paths, not directories. **You MUST call `gsd_complete_slice` with the slice summary and UAT content before finishing. The tool persists to both DB and disk and renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}` automatically.** diff --git a/src/resources/extensions/gsd/prompts/discuss-prepared.md b/src/resources/extensions/gsd/prompts/discuss-prepared.md deleted file mode 100644 index 0a3ce4287..000000000 --- a/src/resources/extensions/gsd/prompts/discuss-prepared.md +++ /dev/null @@ -1,425 +0,0 @@ -{{preamble}} - -You are conducting a **prepared discussion** — the system has already analyzed the codebase, gathered prior context, and researched the ecosystem. Your job is to present these findings, make recommendations, and gather the user's input through a structured 4-layer protocol. - -## Preparation Briefs - -The following briefs were generated during the preparation phase. Use them to ground your recommendations. - -### Codebase Brief - -{{codebaseBrief}} - -### Prior Context Brief - -{{priorContextBrief}} - -### Ecosystem Brief - -{{ecosystemBrief}} - ---- - -## 4-Layer Discussion Protocol - -This discussion proceeds through four mandatory layers. At each layer: -1. **Present findings** — share what the preparation revealed -2. **Make a recommendation** — take a position based on the evidence -3. **Ask clarifying questions** — fill gaps the preparation couldn't answer -4. **Gate** — use `ask_user_questions` to get explicit sign-off before advancing - -**Do NOT skip layers.** Each layer builds on the previous. The user must explicitly approve each layer before you proceed. -Never fabricate or simulate user input while moving through layers. Never generate fake transcript markers like `[User]`, `[Human]`, or `User:`. Ask one question round, then wait for the user's actual response before continuing. - ---- - -## Depth Adaptation - -The depth of questioning at each layer should match THIS milestone's work type. Do not apply a fixed checklist — reason from first principles about what matters for this specific work. - -**Work-type reasoning:** -- **API/service work** — Focus Layer 2 questions on contracts, versioning, backwards compatibility, authentication boundaries. Layer 3 must cover rate limiting, timeout cascades, and partial failure states. -- **CLI/developer tools** — Focus Layer 1 on user mental model and command grammar. Layer 4 needs shell compatibility, error message clarity, and exit code semantics. -- **ML/data pipelines** — Focus Layer 2 on data flow, reproducibility, and intermediate state. Layer 3 must cover data corruption, training divergence, and checkpoint recovery. -- **UI/frontend work** — Focus Layer 2 on component boundaries and state management. Layer 3 needs loading states, optimistic updates, and offline behavior. Layer 4 must include visual regression criteria. -- **Infrastructure/platform** — Focus Layer 2 on deployment topology and failure domains. Layer 3 must cover cascading failures, resource exhaustion, and rollback paths. -- **Refactoring/migration** — Focus Layer 1 on what changes vs what must stay identical. Layer 4 needs behavioral equivalence tests, not just code coverage. - -**Adaptation principle:** Ask "What would cause this milestone to fail silently or succeed incorrectly?" The answer shapes which questions deserve deep exploration vs quick confirmation. - ---- - -## Layer 1 — Scope (What are we building?) - -### Identify Work Type - -**Before presenting findings, identify the primary work type and state it explicitly:** - -"Based on [user's request and codebase analysis], this milestone is primarily **[work type]** work (e.g., API/backend, UI/frontend, CLI tool, data pipeline, simulation, infrastructure)." - -This classification determines the depth and focus of questioning at each layer. If the work type spans multiple categories, state the dominant type and note the secondary types. The user can correct this classification. - -### Present Findings - -Start by presenting what you learned from the preparation: - -1. **From the Codebase Brief:** Summarize the technology stack, key modules, and established patterns. Call out anything that constrains or enables the proposed work. - -2. **From the Prior Context Brief:** Surface existing decisions, requirements, and knowledge that are relevant. Note any prior commitments or constraints. - -3. **Scope implications:** Based on the above, explain what scope makes sense and what would conflict with the existing codebase. - -### Make a Recommendation - -Take a clear position: "Based on [specific findings], I recommend the milestone scope as [concrete description]." - -Include: -- What the milestone will deliver (user-visible outcome) -- What it explicitly excludes (to prevent scope creep) -- Rough size estimate (number of slices, complexity) - -### Resolve Scope — Mandatory Rounds - -After presenting your recommendation, you MUST complete these rounds in order. Each round uses `ask_user_questions` or direct questions. Do NOT skip rounds. Do NOT combine rounds. Do NOT jump to the Layer 1 Gate until all rounds are complete. **Each round is multi-turn: run one round, then wait for the user's response before starting the next round.** - -**Complexity calibration:** If the milestone is simple (1-2 slices, well-understood patterns, no ambiguity), you may compress rounds — but you must still explicitly address each round's topic, even if briefly. You may NOT skip rounds entirely. For complex milestones (3+ slices, novel architecture, significant ambiguity), give each round full treatment. - -**Round 1 — Feature boundaries:** -For each feature in your recommendation, state what it includes and excludes. Ask the user to confirm or adjust each boundary. Example: "Signup — I'm including email/password registration. I'm excluding OAuth, email verification, and phone number signup. Correct?" - -**Round 2 — Ambiguity resolution:** -Identify every term or concept in the scope that could be interpreted multiple ways. For each one, state the two most likely interpretations and ask which the user intends. Example: "'User authentication' — do you mean just login/signup, or also session management, token refresh, and logout?" - -**Round 3 — Dependencies and constraints:** -Ask about external dependencies (APIs, services, databases), existing code that will be affected, and constraints the user hasn't mentioned. Reference specific findings from the codebase brief. Example: "Your db.ts already has a getUser() function — should signup create users compatible with this existing model?" - -**Round 4 — Priority and ordering:** -If the scope has multiple features, ask the user to rank them by priority. Ask what's the minimum viable version if the milestone needs to be cut short. Example: "If we had to ship with only 2 of the 3 slices, which two matter most?" - -After completing all 4 rounds, proceed to the Layer 1 Gate. - -### Layer 1 Gate - -Before advancing, use `ask_user_questions` with question ID containing `layer1_scope_gate`: - -``` -Header: "Scope Gate" -Question: "Does this scope capture what you want to build?" -Options: - - "Yes, scope is correct (Recommended)" — proceed to Layer 2 - - "Needs adjustment" — user will clarify, then re-present scope -``` - -**CRITICAL — Non-bypassable gate:** Do NOT proceed to Layer 2 until the user explicitly approves the scope. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. "Tool not responding, I'll proceed," "auth issues," or "I'll use my recommended scope" are all **forbidden**. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around. - ---- - -## Ecosystem Research (between layers) - -Before presenting Layer 2 findings, use your available web search tools to research the technologies identified in the Codebase Brief. For each major technology (framework, ORM, key library): - -1. Search for "[technology] [version] best practices [current year]" -2. Search for "[technology] [version] known issues" - -Summarize findings concisely. If search tools fail or are unavailable, note this and proceed using your training knowledge — but do NOT use a search failure as justification to skip any gate. - -Present ecosystem findings at the start of Layer 2 alongside your architecture recommendation. - ---- - -## Layer 2 — Architecture (How will it work?) - -### Present Findings - -Now present architectural recommendations grounded in evidence: - -1. **From the Ecosystem Brief:** Share relevant best practices, known issues, library recommendations, and integration patterns discovered during research. - -2. **From the Codebase Brief:** Identify existing architectural patterns that should be followed or deliberately broken from. - -3. **Synthesis:** Explain how the ecosystem research applies to this specific codebase context. - -### Make a Recommendation - -Take a clear position: "I'd suggest [approach] because [evidence-based rationale]." - -Cover: -- Overall architectural approach (new module? extend existing? separate service?) -- Key technical decisions (which libraries, patterns, data flow) -- Integration points with existing code -- What you'd avoid and why - -### Resolve Architecture — Mandatory Rounds - -After presenting your recommendation, you MUST complete these rounds in order. Do NOT skip rounds. Do NOT jump to the Layer 2 Gate until all rounds are complete. **Each round is multi-turn: run one round, then wait for the user's response before starting the next round.** - -**Complexity calibration:** If the milestone is simple (1-2 slices, well-understood patterns, no ambiguity), you may compress rounds — but you must still explicitly address each round's topic, even if briefly. You may NOT skip rounds entirely. For complex milestones (3+ slices, novel architecture, significant ambiguity), give each round full treatment. - -**Round 1 — Per-slice technical decisions:** -For each slice in your decomposition, state the specific technical approach. Ask the user to confirm or adjust. Don't just say "build the signup endpoint" — state which library handles password hashing, where the route file lives, what the request/response schema looks like. - -**Round 2 — Inter-slice contracts:** -For each dependency between slices, state explicitly what the upstream slice produces and what the downstream slice expects. Ask the user to confirm the interface. Example: "S01 produces a User model with {id, email, hashedPassword}. S02's login endpoint will query by email and compare password. Does this contract work?" - -**Round 3 — Library and pattern decisions:** -For each library or pattern choice, present at least one alternative with tradeoffs. Ask the user to confirm. Example: "bcrypt vs argon2 for password hashing — bcrypt is more common in Node, argon2 is newer and more resistant to GPU attacks. I recommend bcrypt for simplicity. Agree?" - -**Round 4 — Integration with existing code:** -Walk through how the new code connects to existing files and patterns. Ask about anything that might conflict. Reference specific files from the codebase brief. Example: "The new auth routes will mount at /api/auth alongside your existing /api router in routes.ts. Should they share the same router file or get their own auth-routes.ts?" - -After completing all 4 rounds, proceed to the Layer 2 Gate. - -### Layer 2 Gate - -Before advancing, use `ask_user_questions` with question ID containing `layer2_architecture_gate`: - -``` -Header: "Architecture Gate" -Question: "Ready to move to error handling, or want to adjust the architecture?" -Options: - - "Architecture looks good (Recommended)" — proceed to Layer 3 - - "Want to adjust" — user will clarify, then re-present architecture -``` - -**CRITICAL — Non-bypassable gate:** Do NOT proceed to Layer 3 until the user explicitly approves the architecture. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around. - ---- - -## Layer 3 — Error States (What can go wrong?) - -### Present Findings - -Identify failure modes based on the scope and architecture: - -1. **From the Ecosystem Brief:** Known issues, common pitfalls, edge cases that trip up similar implementations. - -2. **From the Architecture:** Failure points at integration boundaries, async operations, external dependencies, user input handling. - -3. **From the Codebase Brief:** How existing code handles errors — patterns to follow, gaps to fill. - -### Make a Recommendation - -Take a clear position: "The critical error paths are [X, Y, Z]. I recommend handling them by [approach]." - -Cover: -- **Must-handle errors:** Failures that would break the user experience or corrupt data -- **Should-handle errors:** Degraded experiences that are acceptable with good messaging -- **Edge cases:** Boundary conditions, malformed input, timing issues -- **Recovery strategy:** Retry logic, fallback behavior, user notification - -### Resolve Error Handling — Mandatory Rounds - -After presenting your recommendation, ask the user: - -**"Do you want to go deep on error handling, or accept the defaults I recommended?"** - -Use `ask_user_questions` with options: "Go deep" / "Accept defaults" - -If they accept defaults, record your recommendations as decisions and proceed to the Layer 3 Gate. - -If they want to go deep, complete these rounds: - -**Complexity calibration:** If the milestone is simple, you may compress rounds — but you must still explicitly address each round's topic. You may NOT skip rounds entirely. - -**Round 1 — Input validation:** -For each endpoint or entry point, state what input validation happens and what error the user sees for invalid input. Ask the user to confirm. Example: "Signup with missing email returns 400 with {error: 'Email is required'}. Signup with invalid email format returns 400 with {error: 'Invalid email format'}. Right?" - -**Round 2 — Authentication/authorization failures:** -For each protected operation, state what happens when auth fails. Ask the user to confirm. Example: "Expired JWT returns 401. Missing JWT returns 401. Malformed JWT returns 401. All three use the same generic message to avoid information leakage. Correct?" - -**Round 3 — System failures:** -For each external dependency (database, API, file system), state what happens when it's unavailable. Ask the user to confirm. Example: "If Prisma can't connect to the database, all endpoints return 500 with a generic message. We log the real error server-side but never expose it to the client." - -After completing all rounds (or accepting defaults), proceed to the Layer 3 Gate. - -### Layer 3 Gate - -Before advancing, use `ask_user_questions` with question ID containing `layer3_error_gate`: - -``` -Header: "Error Handling Gate" -Question: "Error handling strategy captured. Ready to define the quality bar?" -Options: - - "Yes, move to quality bar (Recommended)" — proceed to Layer 4 - - "Want to adjust error handling" — user will clarify, then re-present errors -``` - -**CRITICAL — Non-bypassable gate:** Do NOT proceed to Layer 4 until the user explicitly approves error handling. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around. - ---- - -## Layer 4 — Quality Bar (What does done mean?) - -### Present Findings - -Define what "done" looks like based on everything discussed: - -1. **Testing requirements:** What must be tested? Unit tests, integration tests, E2E tests? Based on the architecture's complexity and risk profile. - -2. **Acceptance criteria:** Concrete, observable outcomes that prove the milestone is complete. Derived from the scope discussion. - -3. **Performance/quality constraints:** Based on ecosystem research and codebase patterns — response times, error rates, accessibility requirements. - -### Make a Recommendation - -Take a clear position: "For this scope, I'd suggest these acceptance criteria: [list]." - -Include: -- **Definition of done:** What conditions must be true for the milestone to be complete? -- **Test coverage expectations:** What must be tested vs nice-to-have? -- **Quality gates:** What would block shipping? - -### Resolve Quality — Mandatory Rounds - -After presenting your recommendation, you MUST complete these rounds in order. Do NOT skip rounds. **Each round is multi-turn: run one round, then wait for the user's response before starting the next round.** - -**Complexity calibration:** If the milestone is simple, you may compress rounds — but you must still explicitly address each round's topic, even if briefly. You may NOT skip rounds entirely. - -**Round 1 — Per-slice acceptance criteria:** -For each slice, state 3-5 specific, testable acceptance criteria. Ask the user to confirm each slice's criteria. These must be concrete enough that the planner can use them directly. "Tests pass" is NOT an acceptance criterion. "POST /api/auth/signup with {email, password} returns 201 with {id, email}" IS an acceptance criterion. - -**Round 2 — Test strategy:** -For each slice, state what type of tests are needed (unit, integration, e2e) and what specifically gets tested. Ask the user to confirm. Example: "S01 needs: unit test for password hashing, integration test for signup endpoint with valid and invalid inputs. No e2e needed for this slice." - -**Round 3 — Definition of done:** -State the end-to-end scenario that proves the milestone works. Ask the user to confirm. Example: "Done means: a new user can sign up, log in, receive a JWT, and use that JWT to access a protected endpoint — all verified by running the sequence manually or via integration test." - -After completing all 3 rounds, proceed to the Layer 4 Gate. - -### Layer 4 Gate - -Before advancing, use `ask_user_questions` with question ID containing `layer4_quality_gate`: - -``` -Header: "Quality Gate" -Question: "Quality bar defined. Ready to write context and roadmap?" -Options: - - "Yes, write the artifacts (Recommended)" — proceed to Output Phase - - "Want to adjust the quality bar" — user will clarify, then re-present quality -``` - -**CRITICAL — Non-bypassable gate:** Do NOT proceed to Output Phase until the user explicitly approves the quality bar. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around. - ---- - -## Output Phase - -Once all four layers are complete, you have gathered: -- Confirmed scope (Layer 1) -- Approved architecture (Layer 2) -- Error handling strategy (Layer 3) -- Quality bar and acceptance criteria (Layer 4) - -### Capability Contract - -Before writing a roadmap, produce or update `.gsd/REQUIREMENTS.md`. - -Use it as the project's explicit capability contract. Requirements discovered during the 4-layer discussion should be captured here with source `user` or `inferred` as appropriate. - -**Print the requirements in chat before writing the roadmap.** Print a markdown table with columns: ID, Title, Status, Owner, Source. Group by status (Active, Deferred, Out of Scope). After the table, ask: "Confirm, adjust, or add?" **Non-bypassable:** If the user does not respond or gives an ambiguous answer, you MUST re-ask — never proceed to roadmap creation without explicit requirement confirmation. - -### Roadmap Preview - -Before writing any files, **print the planned roadmap in chat** so the user can see and approve it. Print a markdown table with columns: Slice, Title, Risk, Depends, Demo. One row per slice. Below the table, print the milestone definition of done as a bullet list. - -If the user raises a substantive objection, adjust the roadmap. Otherwise, present the roadmap and ask: "Ready to write, or want to adjust?" — one gate, not two. **Non-bypassable:** If the user does not respond or gives an ambiguous answer, you MUST re-ask — never write files without explicit approval. A missing response is not a "yes." - -### Naming Convention - -Directories use bare IDs. Files use ID-SUFFIX format. Titles live inside file content, not in names. -- Milestone dir: `.gsd/milestones/{{milestoneId}}/` -- Milestone files: `{{milestoneId}}-CONTEXT.md`, `{{milestoneId}}-ROADMAP.md` -- Slice dirs: `S01/`, `S02/`, etc. - -### Single Milestone - -Once the user is satisfied, in a single pass: -1. `mkdir -p .gsd/milestones/{{milestoneId}}/slices` -2. Write or update `.gsd/PROJECT.md` — use the **Project** output template below. Describe what the project is, its current state, and list the milestone sequence. -3. Write or update `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Confirm requirement states, ownership, and traceability before roadmap creation. - -**Depth-Preservation Guidance for context.md:** -When writing context.md, preserve the user's exact terminology, emphasis, and specific framing from the discussion. Do not paraphrase user nuance into generic summaries. If the user said "craft feel," write "craft feel" — not "high-quality user experience." If they emphasized a specific constraint or negative requirement, carry that emphasis through verbatim. The context file is downstream agents' only window into this conversation — flattening specifics into generics loses the signal that shaped every decision. - -**Enhanced Context Requirement:** Because this is a prepared discussion, use the `context-enhanced` template which includes sections for Codebase Brief, Architectural Decisions, Interface Contracts, Error Handling Strategy, Testing Requirements, Acceptance Criteria, and Ecosystem Notes. Populate these from the 4-layer discussion: -- Codebase Brief: from Layer 1 presentation -- Architectural Decisions: from Layer 2 — each decision with rationale, evidence, alternatives -- Error Handling Strategy: from Layer 3 -- Testing Requirements and Acceptance Criteria: from Layer 4 -- Ecosystem Notes: key findings from the ecosystem brief - -4. Write `{{contextPath}}` — use the **Context Enhanced** output template below. Preserve key risks, unknowns, existing codebase constraints, integration points, and relevant requirements surfaced during discussion. -5. Call `gsd_plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters. -6. For each architectural or pattern decision made during discussion, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically. -7. {{commitInstruction}} - -After writing the files, say exactly: "Milestone {{milestoneId}} ready." — nothing else. Auto-mode will start automatically. - -### Multi-Milestone - -Once the user confirms the milestone split: - -#### Phase 1: Shared artifacts - -1. For each milestone, call `gsd_milestone_generate_id` to get its ID — never invent milestone IDs manually. Then `mkdir -p .gsd/milestones//slices`. -2. Write `.gsd/PROJECT.md` — use the **Project** output template below. -3. Write `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Capture Active, Deferred, Out of Scope, and any already Validated requirements. Later milestones may have provisional ownership where slice plans do not exist yet. -4. For any architectural or pattern decisions made during discussion, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically. - -#### Phase 2: Primary milestone - -5. Write a full enhanced `CONTEXT.md` for the primary milestone (the one discussed in depth). Use the `context-enhanced` template. -6. Call `gsd_plan_milestone` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done. - -#### MANDATORY: depends_on Frontmatter in CONTEXT.md - -Every CONTEXT.md for a milestone that depends on other milestones MUST have YAML frontmatter with `depends_on`. The auto-mode state machine reads this field to determine execution order — without it, milestones may execute out of order or in parallel when they shouldn't. - -```yaml ---- -depends_on: [M001, M002] ---- - -# M003: Title -``` - -If a milestone has no dependencies, omit the frontmatter. The dependency chain from the milestone confirmation gate MUST be reflected in each CONTEXT.md frontmatter. Do NOT rely on QUEUE.md or PROJECT.md for dependency tracking — the state machine only reads CONTEXT.md frontmatter. - -#### Phase 3: Sequential readiness gate for remaining milestones - -For each remaining milestone **one at a time, in sequence**, decide the most likely readiness mode from the evidence you already have, then use `ask_user_questions` to let the user correct that recommendation. Present three options: - -- **"Discuss now"** — The user wants to conduct a focused discussion for this milestone in the current session, while the context from the broader discussion is still fresh. Proceed with a focused discussion for this milestone (Layer 1-4 protocol). When the discussion concludes, write a full enhanced `CONTEXT.md`. Then move to the gate for the next milestone. -- **"Write draft for later"** — This milestone has seed material from the current conversation but needs its own dedicated discussion in a future session. Write a `CONTEXT-DRAFT.md` capturing the seed material (what was discussed, key ideas, provisional scope, open questions). Mark it clearly as a draft, not a finalized context. **What happens downstream:** When auto-mode reaches this milestone, it pauses and notifies the user: "M00x has draft context — needs discussion. Run /gsd." The `/gsd` wizard shows a "Discuss from draft" option that seeds the new discussion with this draft, so nothing from the current conversation is lost. After the dedicated discussion produces a full CONTEXT.md, the draft file is automatically deleted. -- **"Just queue it"** — This milestone is identified but intentionally left without context. No context file is written — the directory already exists from Phase 1. **What happens downstream:** When auto-mode reaches this milestone, it pauses and notifies the user to run /gsd. The wizard starts a full discussion from scratch. - -**When "Discuss now" is chosen:** Run the full 4-layer protocol for that milestone using fresh preparation briefs scoped to that milestone. - -#### Milestone Gate Tracking (MANDATORY for multi-milestone) - -After EVERY Phase 3 gate decision, immediately write or update `.gsd/DISCUSSION-MANIFEST.json` with the cumulative state. This file is mechanically validated by the system before auto-mode starts — if gates are incomplete, auto-mode will NOT start. - -```json -{ - "primary": "M001", - "milestones": { - "M001": { "gate": "discussed", "context": "full" }, - "M002": { "gate": "discussed", "context": "full" }, - "M003": { "gate": "queued", "context": "none" } - }, - "total": 3, - "gates_completed": 3 -} -``` - -Write this file AFTER each gate decision, not just at the end. Update `gates_completed` incrementally. The system reads this file and BLOCKS auto-start if `gates_completed < total`. - -For single-milestone projects, do NOT write this file — it is only for multi-milestone discussions. - -#### Phase 4: Finalize - -7. {{multiMilestoneCommitInstruction}} - -After writing the files, say exactly: "Milestone M001 ready." — nothing else. Auto-mode will start automatically. - -{{inlinedTemplates}} diff --git a/src/resources/extensions/gsd/prompts/discuss.md b/src/resources/extensions/gsd/prompts/discuss.md index fe147f8f8..9cce068bf 100644 --- a/src/resources/extensions/gsd/prompts/discuss.md +++ b/src/resources/extensions/gsd/prompts/discuss.md @@ -28,6 +28,8 @@ After reflection is confirmed, decide the approach based on the actual scope — **Anti-reduction rule:** If the user describes a big vision, plan the big vision. Do not ask "what's the minimum viable version?" or try to reduce scope unless the user explicitly asks for an MVP or minimal version. When something is complex or risky, phase it into a later milestone — do not cut it. The user's ambition is the target, and your job is to sequence it intelligently, not shrink it. +{{preparationContext}} + ## Mandatory Investigation Before First Question Round Before asking your first question, do a mandatory investigation pass. This is not optional. @@ -47,6 +49,26 @@ This happens ONCE, before the first round. The goal: your first questions should For subsequent rounds, continue investigating between rounds — check docs, search, or scout as needed to make each round's questions smarter. But the first-round investigation is mandatory and explicit. Distribute searches across turns rather than clustering them in one turn. +## Question Rounds + +Ask **1–3 questions per round**. Keep each round tightly focused on one or two of the depth checklist dimensions — do not try to cover all six in one round. + +**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. **IMPORTANT: Call `ask_user_questions` exactly once per turn. Never make multiple calls with the same or overlapping questions — wait for the user's response before asking the next round.** + +**If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1–3 focused questions. Wait for answers before asking the next round. + +After each answer set, investigate further if any answer opens a new unknown, then ask the next round. + +### Round cadence + +After each round of answers, decide whether you already have enough depth to write strong output. + +- **Incremental persistence:** After every 2 question rounds, silently save a `{{milestoneId}}-CONTEXT-DRAFT.md` using `gsd_summary_save` with `artifact_type: "CONTEXT-DRAFT"` and `milestone_id: "{{milestoneId}}"`. This protects confirmed work against session crashes. Do NOT mention this save to the user. +- If not ready, continue to the next round immediately. Do **not** ask a meta "ready to wrap up?" question after every round. +- **Depth-matching rule:** Simple, well-defined work needs fewer rounds — maybe 1–2. Large, ambiguous visions need more — maybe 4+. Do not pad rounds to hit a number. Stop when the Depth Enforcement checklist below is fully satisfied. +- Do not count the reflection step as a question round. Rounds start after reflection is confirmed. +- When you genuinely believe the depth checklist is satisfied, move to the Depth Verification step below. Do not ask a separate "ready to wrap up?" gate — the depth verification IS the gate. + ## Questioning Philosophy You are a thinking partner, not an interviewer. @@ -94,29 +116,27 @@ Do NOT offer to proceed until ALL of the following are satisfied. Track these in Before offering to proceed, demonstrate absorption: reference specific things the user emphasized, specific terminology they used, specific nuance they sharpened — and show how those shaped your understanding. Synthesize, don't recite. "Your emphasis on X led me to prioritize Y over Z" is good. "You said X, you said Y, you said Z" is not. The user should feel heard in the specifics, not just acknowledged in the abstract. -**Questioning depth should match scope.** Simple, well-defined work needs fewer rounds — maybe 1-2. Large, ambiguous visions need more — maybe 4+. Don't pad rounds to hit a number. Stop when the depth checklist is satisfied and you genuinely understand the work. - -Do not count the reflection step as a question round. Rounds start after reflection is confirmed. - ## Depth Verification Before moving to the wrap-up gate, present a structured depth summary as a checkpoint. **Print the summary as normal chat text first** — this is where the formatting renders properly. Structure the summary across the depth checklist dimensions using the user's own terminology and framing. Cover: what you understood them to be building, what shaped your understanding most (their emphasis, constraints, concerns), and any areas where you're least confident in your understanding. -**Then** use `ask_user_questions` with a short confirmation question — NOT the summary itself. The question field is designed for single sentences, not multi-paragraph summaries. +**Then confirm:** -**Convention:** The question ID must contain `depth_verification` (e.g., `depth_verification_confirm`). This naming convention enables downstream mechanical detection of this step. +**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` with: +- header: "Depth Check" +- question: "Did I capture the depth right?" +- options: "Yes, you got it (Recommended)", "Not quite — let me clarify" +- **The question ID must contain `depth_verification`** (e.g., `depth_verification_confirm`) — this naming convention enables downstream mechanical detection and the write-gate. -Example flow: -1. Print in chat: the full depth summary with markdown formatting (headers, bold, bullets) -2. Call `ask_user_questions` with: header "Depth Check", question "Did I capture the depth right?", options "Yes, you got it (Recommended)" and "Not quite — let me clarify" +**If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text: "Did I capture that correctly? If not, tell me what I missed." Wait for explicit confirmation before proceeding. **The same non-bypassable gate applies to the plain-text path** — if the user does not respond, gives an ambiguous answer, or does not explicitly confirm, you MUST re-ask. Never rationalize past a missing confirmation. If they clarify, absorb the correction and re-verify. The depth verification is the required write-gate. Do **not** add another meta "ready to proceed?" checkpoint immediately after it unless there is still material ambiguity. -**CRITICAL — Non-bypassable gate:** The system mechanically blocks CONTEXT.md writes until the user selects the "(Recommended)" option. If the user declines, cancels, or the tool fails, you MUST re-ask — never rationalize past the block ("tool not responding, I'll proceed" is forbidden). The gate exists to protect the user's work; treat a block as an instruction, not an obstacle to work around. +**CRITICAL — Non-bypassable gate:** The system mechanically blocks CONTEXT.md writes until the user selects the "(Recommended)" option (structured path) or explicitly confirms (plain-text path). If the user declines, cancels, does not respond, or the tool fails, you MUST re-ask — never rationalize past the block ("tool not responding, I'll proceed" is forbidden). The gate exists to protect the user's work; treat a block as an instruction, not an obstacle to work around. ## Wrap-up Gate @@ -244,7 +264,7 @@ If a milestone has no dependencies, omit the frontmatter. The dependency chain f #### Phase 3: Sequential readiness gate for remaining milestones -For each remaining milestone **one at a time, in sequence**, decide the most likely readiness mode from the evidence you already have, then use `ask_user_questions` to let the user correct that recommendation. **Non-bypassable:** If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block or auto-select a readiness mode. Present three options: +For each remaining milestone **one at a time, in sequence**, decide the most likely readiness mode from the evidence you already have, then present the three options below to the user. **If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions`. **If `{{structuredQuestionsAvailable}}` is `false`:** present the options as a plain-text numbered list and ask the user to type their choice. **Non-bypassable:** If the user does not respond, gives an ambiguous answer, or the tool fails, you MUST re-ask — never rationalize past the block or auto-select a readiness mode. Present three options: - **"Discuss now"** — The user wants to conduct a focused discussion for this milestone in the current session, while the context from the broader discussion is still fresh. Proceed with a focused discussion for this milestone (reflection → investigation → questioning → depth verification). When the discussion concludes, write a full `CONTEXT.md`. Then move to the gate for the next milestone. - **"Write draft for later"** — This milestone has seed material from the current conversation but needs its own dedicated discussion in a future session. Write a `CONTEXT-DRAFT.md` capturing the seed material (what was discussed, key ideas, provisional scope, open questions). Mark it clearly as a draft, not a finalized context. **What happens downstream:** When auto-mode reaches this milestone, it pauses and notifies the user: "M00x has draft context — needs discussion. Run /gsd." The `/gsd` wizard shows a "Discuss from draft" option that seeds the new discussion with this draft, so nothing from the current conversation is lost. After the dedicated discussion produces a full CONTEXT.md, the draft file is automatically deleted. @@ -256,9 +276,9 @@ Before writing each milestone's CONTEXT.md (whether primary or secondary), you M 1. **Read the actual code** for every file or module you reference. Confirm APIs exist, check what functions actually do, identify phantom capabilities (code that exists but isn't wired up). 2. **Check for stale assumptions** — the codebase changes. Verify referenced modules still work as described. -3. **Present findings** — use `ask_user_questions` with a question ID containing BOTH `depth_verification` AND the milestone ID (e.g., `depth_verification_M002`). Present: what you're about to write, key technical findings from investigation, risks the code review surfaced. +3. **Present findings** — **If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` with a question ID containing BOTH `depth_verification` AND the milestone ID (e.g., `depth_verification_M002`). Present: what you're about to write, key technical findings from investigation, risks the code review surfaced. **If `{{structuredQuestionsAvailable}}` is `false`:** present the same findings in plain text and ask for explicit confirmation before proceeding. -**The system mechanically blocks CONTEXT.md writes until the per-milestone depth verification passes.** Each milestone needs its own verification — one global verification does not unlock all milestones. +**The system mechanically blocks CONTEXT.md writes until the per-milestone depth verification passes** (structured path: user selects "(Recommended)" option; plain-text path: user explicitly confirms). Each milestone needs its own verification — one global verification does not unlock all milestones. **Why sequential, not batch:** After writing the primary milestone's context and roadmap, the agent still has context window capacity. Asking one milestone at a time lets the user decide per-milestone whether to invest that remaining capacity in a focused discussion now, or defer to a future session. A batch question ("Ready/Draft/Queue for M002, M003, M004?") forces the user to decide everything upfront without knowing how much session capacity remains. diff --git a/src/resources/extensions/gsd/prompts/execute-task.md b/src/resources/extensions/gsd/prompts/execute-task.md index ddf3fa1d7..9895dd6a4 100644 --- a/src/resources/extensions/gsd/prompts/execute-task.md +++ b/src/resources/extensions/gsd/prompts/execute-task.md @@ -32,29 +32,30 @@ Then: 0. Narrate step transitions, key implementation decisions, and verification outcomes as you work. Keep it terse — one line between tool-call clusters, not between every call — but write complete sentences in user-facing prose, not shorthand notes or scratchpad fragments. 1. {{skillActivation}} Follow any activated skills before writing code. If no skills match this task, skip this step. 2. Execute the steps in the inlined task plan, adapting minor local mismatches when the surrounding code differs from the planner's snapshot -3. Build the real thing. If the task plan says "create login endpoint", build an endpoint that actually authenticates against a real store, not one that returns a hardcoded success response. If the task plan says "create dashboard page", build a page that renders real data from the API, not a component with hardcoded props. Stubs and mocks are for tests, not for the shipped feature. -4. Write or update tests as part of execution — tests are verification, not an afterthought. If the slice plan defines test files in its Verification section and this is the first task, create them (they should initially fail). -5. When implementing non-trivial runtime behavior (async flows, API boundaries, background processes, error paths), add or preserve agent-usable observability. Skip this for simple changes where it doesn't apply. +3. Before any `Write` that creates an artifact or output file, check whether that path already exists. If it does, read it first and decide whether the work is already done, should be extended, or truly needs replacement. "Create" in the plan does **not** mean the file is missing — a prior session may already have started it. +4. Build the real thing. If the task plan says "create login endpoint", build an endpoint that actually authenticates against a real store, not one that returns a hardcoded success response. If the task plan says "create dashboard page", build a page that renders real data from the API, not a component with hardcoded props. Stubs and mocks are for tests, not for the shipped feature. +5. Write or update tests as part of execution — tests are verification, not an afterthought. If the slice plan defines test files in its Verification section and this is the first task, create them (they should initially fail). +6. When implementing non-trivial runtime behavior (async flows, API boundaries, background processes, error paths), add or preserve agent-usable observability. Skip this for simple changes where it doesn't apply. **Background process rule:** Never use bare `command &` to run background processes. The shell's `&` operator leaves stdout/stderr attached to the parent, which causes the Bash tool to hang indefinitely waiting for those streams to close. Always redirect output before backgrounding: - Correct: `command > /dev/null 2>&1 &` or `nohup command > /dev/null 2>&1 &` - Example: `python -m http.server 8080 > /dev/null 2>&1 &` (NOT `python -m http.server 8080 &`) - Preferred: use the `bg_shell` tool if available — it manages process lifecycle correctly without stream-inheritance issues -6. If the task plan includes a **Failure Modes** section (Q5), implement the error/timeout/malformed handling specified. Verify each dependency's failure path is handled. Skip if the section is absent. -7. If the task plan includes a **Load Profile** section (Q6), implement protections for the identified 10x breakpoint (connection pooling, rate limiting, pagination, etc.). Skip if absent. -8. If the task plan includes a **Negative Tests** section (Q7), write the specified negative test cases alongside the happy-path tests — malformed inputs, error paths, and boundary conditions. Skip if absent. -9. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors) -10. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary. -11. After the verification gate runs (you'll see gate results in stderr/notify output), populate the `## Verification Evidence` table in your task summary with the check results. Use the `formatEvidenceTable` format: one row per check with command, exit code, verdict (✅ pass / ❌ fail), and duration. If no verification commands were discovered, note that in the section. -12. If the task touches UI, browser flows, DOM behavior, or user-visible web state: +7. If the task plan includes a **Failure Modes** section (Q5), implement the error/timeout/malformed handling specified. Verify each dependency's failure path is handled. Skip if the section is absent. +8. If the task plan includes a **Load Profile** section (Q6), implement protections for the identified 10x breakpoint (connection pooling, rate limiting, pagination, etc.). Skip if absent. +9. If the task plan includes a **Negative Tests** section (Q7), write the specified negative test cases alongside the happy-path tests — malformed inputs, error paths, and boundary conditions. Skip if absent. +10. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors) +11. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary. +12. After the verification gate runs (you'll see gate results in stderr/notify output), populate the `## Verification Evidence` table in your task summary with the check results. Use the `formatEvidenceTable` format: one row per check with command, exit code, verdict (✅ pass / ❌ fail), and duration. If no verification commands were discovered, note that in the section. +13. If the task touches UI, browser flows, DOM behavior, or user-visible web state: - exercise the real flow in the browser - prefer `browser_batch` when the next few actions are obvious and sequential - prefer `browser_assert` for explicit pass/fail verification of the intended outcome - use `browser_diff` when an action's effect is ambiguous - use console/network/dialog diagnostics when validating async, stateful, or failure-prone UI - record verification in terms of explicit checks passed/failed, not only prose interpretation -13. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section. -14. **If execution is running long or verification fails:** +14. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section. +15. **If execution is running long or verification fails:** **Context budget:** You have approximately **{{verificationBudget}}** reserved for verification context. If you've used most of your context and haven't finished all steps, stop implementing and prioritize writing the task summary with clear notes on what's done and what remains. A partial summary that enables clean resumption is more valuable than one more half-finished step with no documentation. Never sacrifice summary quality for one more implementation step. @@ -65,13 +66,13 @@ Then: - Distinguish "I know" from "I assume." Observable facts (the error says X) are strong evidence. Assumptions (this library should work this way) need verification. - Know when to stop. If you've tried 3+ fixes without progress, your mental model is probably wrong. Stop. List what you know for certain. List what you've ruled out. Form fresh hypotheses from there. - Don't fix symptoms. Understand *why* something fails before changing code. A test that passes after a change you don't understand is luck, not a fix. -15. **Blocker discovery:** If execution reveals that the remaining slice plan is fundamentally invalid — not just a bug or minor deviation, but a plan-invalidating finding like a wrong API, missing capability, or architectural mismatch — set `blocker_discovered: true` in the task summary frontmatter and describe the blocker clearly in the summary narrative. Do NOT set `blocker_discovered: true` for ordinary debugging, minor deviations, or issues that can be fixed within the current task or the remaining plan. This flag triggers an automatic replan of the slice. -16. If you made an architectural, pattern, library, or observability decision during this task that downstream work should know about, append it to `.gsd/DECISIONS.md` (read the template at `~/.gsd/agent/extensions/gsd/templates/decisions.md` if the file doesn't exist yet). Not every task produces decisions — only append when a meaningful choice was made. -17. If you discover a non-obvious rule, recurring gotcha, or useful pattern during execution, append it to `.gsd/KNOWLEDGE.md`. Only add entries that would save future agents from repeating your investigation. Don't add obvious things. -18. Read the template at `~/.gsd/agent/extensions/gsd/templates/task-summary.md` -19. Use that template to prepare the completion content you will pass to `gsd_complete_task` using the camelCase fields `milestoneId`, `sliceId`, `taskId`, `oneLiner`, `narrative`, `verification`, and `verificationEvidence`. Do **not** manually write `{{taskSummaryPath}}` — the DB-backed tool is the canonical write path and renders the summary file for you. -20. Call `gsd_complete_task` with milestoneId, sliceId, taskId, and the completion fields derived from the template. This is your final required step — do NOT manually edit PLAN.md checkboxes. The tool marks the task complete, updates the DB, renders `{{taskSummaryPath}}`, and updates PLAN.md automatically. -21. Do not run git commands — the system reads your task summary after completion and creates a meaningful commit from it (type inferred from title, message from your one-liner, key files from frontmatter). Write a clear, specific one-liner in the summary — it becomes the commit message. +16. **Blocker discovery:** If execution reveals that the remaining slice plan is fundamentally invalid — not just a bug or minor deviation, but a plan-invalidating finding like a wrong API, missing capability, or architectural mismatch — set `blocker_discovered: true` in the task summary frontmatter and describe the blocker clearly in the summary narrative. Do NOT set `blocker_discovered: true` for ordinary debugging, minor deviations, or issues that can be fixed within the current task or the remaining plan. This flag triggers an automatic replan of the slice. +17. If you made an architectural, pattern, library, or observability decision during this task that downstream work should know about, append it to `.gsd/DECISIONS.md` (read the template at `~/.gsd/agent/extensions/gsd/templates/decisions.md` if the file doesn't exist yet). Not every task produces decisions — only append when a meaningful choice was made. +18. If you discover a non-obvious rule, recurring gotcha, or useful pattern during execution, append it to `.gsd/KNOWLEDGE.md`. Only add entries that would save future agents from repeating your investigation. Don't add obvious things. +19. Read the template at `~/.gsd/agent/extensions/gsd/templates/task-summary.md` +20. Use that template to prepare the completion content you will pass to `gsd_complete_task` using the camelCase fields `milestoneId`, `sliceId`, `taskId`, `oneLiner`, `narrative`, `verification`, and `verificationEvidence`. Do **not** manually write `{{taskSummaryPath}}` — the DB-backed tool is the canonical write path and renders the summary file for you. +21. Call `gsd_complete_task` with milestoneId, sliceId, taskId, and the completion fields derived from the template. This is your final required step — do NOT manually edit PLAN.md checkboxes. The tool marks the task complete, updates the DB, renders `{{taskSummaryPath}}`, and updates PLAN.md automatically. +22. Do not run git commands — the system reads your task summary after completion and creates a meaningful commit from it (type inferred from title, message from your one-liner, key files from frontmatter). Write a clear, specific one-liner in the summary — it becomes the commit message. All work stays in your working directory: `{{workingDirectory}}`. diff --git a/src/resources/extensions/gsd/prompts/guided-resume-task.md b/src/resources/extensions/gsd/prompts/guided-resume-task.md index 3b15c0cad..71cbea2e5 100644 --- a/src/resources/extensions/gsd/prompts/guided-resume-task.md +++ b/src/resources/extensions/gsd/prompts/guided-resume-task.md @@ -1 +1 @@ -Resume interrupted work. Find the continue file (`{{sliceId}}-CONTINUE.md` or `continue.md`) in slice {{sliceId}} of milestone {{milestoneId}}, read it, and use it as the recovery contract for where to pick up. Do **not** delete the continue file immediately. Keep it until the task is successfully completed or you have written a newer summary/continue artifact that clearly supersedes it. If the resumed attempt fails again, update or replace the continue file so no recovery context is lost. {{skillActivation}} +Resume interrupted work. Find the continue file (`{{sliceId}}-CONTINUE.md` or `continue.md`) in slice {{sliceId}} of milestone {{milestoneId}}, read it, and use it as the recovery contract for where to pick up. Before you create any expected artifact or output file, check whether it already exists and read it first — a prior session may already have started or completed that work. Do **not** delete the continue file immediately. Keep it until the task is successfully completed or you have written a newer summary/continue artifact that clearly supersedes it. If the resumed attempt fails again, update or replace the continue file so no recovery context is lost. {{skillActivation}} diff --git a/src/resources/extensions/gsd/prompts/validate-milestone.md b/src/resources/extensions/gsd/prompts/validate-milestone.md index 0b7046b7f..aa6aa75a6 100644 --- a/src/resources/extensions/gsd/prompts/validate-milestone.md +++ b/src/resources/extensions/gsd/prompts/validate-milestone.md @@ -31,7 +31,7 @@ Prompt: "Review milestone {{milestoneId}} requirements coverage. Working directo Prompt: "Review milestone {{milestoneId}} cross-slice integration. Working directory: {{workingDirectory}}. Read `{{roadmapPath}}` and find the boundary map (produces/consumes contracts). For each boundary, check that the producing slice's SUMMARY confirms it produced the artifact, and the consuming slice's SUMMARY confirms it consumed it. Output a markdown table: Boundary | Producer Summary | Consumer Summary | Status. End with a one-line verdict: PASS if all boundaries honored, NEEDS-ATTENTION if any gaps." **Reviewer C — Assessment & Acceptance Criteria** -Prompt: "Review milestone {{milestoneId}} assessment evidence and acceptance criteria. Working directory: {{workingDirectory}}. Read `.gsd/{{milestoneId}}/CONTEXT.md` for acceptance criteria. Check for ASSESSMENT files in each slice directory. Verify each acceptance criterion maps to either a passing assessment result or clear SUMMARY evidence. Output a checklist: [ ] Criterion | Evidence. End with a one-line verdict: PASS if all criteria met, NEEDS-ATTENTION if gaps exist." +Prompt: "Review milestone {{milestoneId}} assessment evidence and acceptance criteria. Working directory: {{workingDirectory}}. Read `.gsd/{{milestoneId}}/CONTEXT.md` for acceptance criteria. Check for ASSESSMENT files in each slice directory. Verify each acceptance criterion maps to either a passing assessment result or clear SUMMARY evidence. Then review the inlined milestone verification classes from planning. For each non-empty planned class, output a markdown table: Class | Planned Check | Evidence | Verdict. Use the exact class names `Contract`, `Integration`, `Operational`, and `UAT` whenever those classes are present. If no verification classes were planned, say that explicitly. Output two sections: `Acceptance Criteria` with a checklist `[ ] Criterion | Evidence`, and `Verification Classes` with the table. End with a one-line verdict: PASS if all criteria and verification classes are covered, NEEDS-ATTENTION if gaps exist." ### Step 2 — Synthesize Findings @@ -70,6 +70,7 @@ reviewers: 3 ``` Call `gsd_validate_milestone` with the camelCase fields `milestoneId`, `verdict`, `remediationRound`, `successCriteriaChecklist`, `sliceDeliveryAudit`, `crossSliceIntegration`, `requirementCoverage`, `verdictRationale`, and `remediationPlan` when needed. If you include verification-class analysis, pass it in `verificationClasses`. +Extract the `Verification Classes` subsection from Reviewer C and pass it verbatim in `verificationClasses` so the persisted validation output uses the canonical class names `Contract`, `Integration`, `Operational`, and `UAT`. **DB access safety:** Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — the engine owns the WAL connection. Use `gsd_milestone_status` to read milestone and slice state. All data you need is already inlined in the context above or accessible via the `gsd_*` tools. Direct DB access corrupts the WAL and bypasses tool-level validation. diff --git a/src/resources/extensions/gsd/state.ts b/src/resources/extensions/gsd/state.ts index 9dddc53e6..1275feca3 100644 --- a/src/resources/extensions/gsd/state.ts +++ b/src/resources/extensions/gsd/state.ts @@ -322,17 +322,8 @@ const isStatusDone = isClosedStatus; * * Must produce field-identical GSDState to _deriveStateImpl() for the same project. */ -export async function deriveStateFromDb(basePath: string): Promise { - const requirements = parseRequirementCounts(await loadFile(resolveGsdRootFile(basePath, "REQUIREMENTS"))); - +function reconcileDiskToDb(basePath: string): MilestoneRow[] { let allMilestones = getAllMilestones(); - - // Incremental disk→DB sync: milestone directories created outside the DB - // write path (via /gsd queue, manual mkdir, or complete-milestone writing the - // next CONTEXT.md) are never inserted by the initial migration guard in - // auto-start.ts because that guard only runs when gsd.db doesn't exist yet. - // Reconcile here so deriveStateFromDb never silently misses queued milestones. - // insertMilestone uses INSERT OR IGNORE, so this is safe to call every time. const dbIdSet = new Set(allMilestones.map(m => m.id)); const diskIds = findMilestoneIds(basePath); let synced = false; @@ -344,11 +335,6 @@ export async function deriveStateFromDb(basePath: string): Promise { } if (synced) allMilestones = getAllMilestones(); - // Disk→DB slice reconciliation (#2533): slices defined in ROADMAP.md but - // missing from the DB cause permanent "No slice eligible" blocks because - // the dependency resolver only sees DB rows. Parse each milestone's roadmap - // and insert any missing slices, checking SUMMARY files to set correct status. - // insertSlice uses INSERT OR IGNORE, so existing rows are never overwritten. for (const mid of diskIds) { if (isGhostMilestone(basePath, mid)) continue; const roadmapPath = resolveMilestoneFile(basePath, mid, "ROADMAP"); @@ -373,93 +359,43 @@ export async function deriveStateFromDb(basePath: string): Promise { }); } } + return allMilestones; +} - // Reconcile: discover milestones that exist on disk but are missing from - // the DB. This happens when milestones were created before the DB migration - // or were manually added to the filesystem. Without this, disk-only - // milestones are invisible after migration (#2416). - const dbMilestoneIds = new Set(allMilestones.map(m => m.id)); - const diskMilestoneIds = findMilestoneIds(basePath); - for (const diskId of diskMilestoneIds) { - if (!dbMilestoneIds.has(diskId)) { - // Synthesize a minimal MilestoneRow for the disk-only milestone. - // Title and status will be resolved from disk files in the loop below. - allMilestones.push({ - id: diskId, - title: diskId, - status: 'active', - depends_on: [] as string[], - created_at: new Date().toISOString(), - } as MilestoneRow); - } - } - // Re-sort so milestones follow queue order (same as dispatch guard) (#2556) - const customOrder = loadQueueOrder(basePath); - const sortedIds = sortByQueueOrder(allMilestones.map(m => m.id), customOrder); - const byId = new Map(allMilestones.map(m => [m.id, m])); - allMilestones.length = 0; - for (const id of sortedIds) allMilestones.push(byId.get(id)!); - - // Parallel worker isolation: when locked, filter to just the locked milestone - const milestoneLock = process.env.GSD_MILESTONE_LOCK; - const milestones = milestoneLock - ? allMilestones.filter(m => m.id === milestoneLock) - : allMilestones; - - if (milestones.length === 0) { - return { - activeMilestone: null, - activeSlice: null, - activeTask: null, - phase: 'pre-planning', - recentDecisions: [], - blockers: [], - nextAction: 'No milestones found. Run /gsd to create one.', - registry: [], - requirements, - progress: { milestones: { done: 0, total: 0 } }, - }; - } - - // Phase 1: Build completeness set (which milestones count as "done" for dep resolution) +function buildCompletenessSet(basePath: string, milestones: MilestoneRow[]) { const completeMilestoneIds = new Set(); const parkedMilestoneIds = new Set(); for (const m of milestones) { - // Check disk for PARKED flag (not stored in DB status reliably — disk is truth for flag files) const parkedFile = resolveMilestoneFile(basePath, m.id, "PARKED"); if (parkedFile || m.status === 'parked') { parkedMilestoneIds.add(m.id); continue; } - if (isStatusDone(m.status)) { completeMilestoneIds.add(m.id); continue; } - - // Check if milestone has a summary on disk (terminal artifact per #864) const summaryFile = resolveMilestoneFile(basePath, m.id, "SUMMARY"); if (summaryFile) { completeMilestoneIds.add(m.id); continue; } - - // Milestones with all slices done but no SUMMARY file are in - // validating/completing state — intentionally NOT added to - // completeMilestoneIds. The SUMMARY file (checked above) is the - // terminal artifact that proves completion per #864. } + return { completeMilestoneIds, parkedMilestoneIds }; +} - // Phase 2: Build registry and find active milestone +async function buildRegistryAndFindActive( + basePath: string, + milestones: MilestoneRow[], + completeMilestoneIds: Set, + parkedMilestoneIds: Set +) { const registry: MilestoneRegistryEntry[] = []; let activeMilestone: ActiveRef | null = null; let activeMilestoneSlices: SliceRow[] = []; let activeMilestoneFound = false; let activeMilestoneHasDraft = false; - // Queued shells (DB row, no slices, no content files) are deferred during - // the main loop so they don't eclipse real active milestones (#3470). - // If no real active milestone is found, the first deferred shell is promoted. let firstDeferredQueuedShell: { id: string; title: string; deps: string[] } | null = null; for (const m of milestones) { @@ -468,19 +404,14 @@ export async function deriveStateFromDb(basePath: string): Promise { continue; } - // Ghost milestone check: no slices in DB AND no substantive files on disk. - // Skip queued milestones — they are handled by the deferred-shell logic below (#3470). const slices = getMilestoneSlices(m.id); if (slices.length === 0 && !isStatusDone(m.status) && m.status !== 'queued') { - // Check disk for ghost detection if (isGhostMilestone(basePath, m.id)) continue; } const summaryFile = resolveMilestoneFile(basePath, m.id, "SUMMARY"); - // Determine if this milestone is complete if (completeMilestoneIds.has(m.id) || (summaryFile !== null)) { - // Get title from DB or summary let title = stripMilestonePrefix(m.title) || m.id; if (summaryFile && !m.title) { const summaryContent = await loadFile(summaryFile); @@ -489,14 +420,12 @@ export async function deriveStateFromDb(basePath: string): Promise { } } registry.push({ id: m.id, title, status: 'complete' }); - completeMilestoneIds.add(m.id); // ensure it's in the set + completeMilestoneIds.add(m.id); continue; } - // Not complete — determine if it should be active const allSlicesDone = slices.length > 0 && slices.every(s => isStatusDone(s.status)); - // Get title — prefer DB, fall back to context file extraction let title = stripMilestonePrefix(m.title) || m.id; if (title === m.id) { const contextFile = resolveMilestoneFile(basePath, m.id, "CONTEXT"); @@ -507,7 +436,6 @@ export async function deriveStateFromDb(basePath: string): Promise { } if (!activeMilestoneFound) { - // Check milestone-level dependencies const deps = m.depends_on; const depsUnmet = deps.some(dep => !completeMilestoneIds.has(dep)); @@ -516,11 +444,6 @@ export async function deriveStateFromDb(basePath: string): Promise { continue; } - // Defer queued shell milestones with no substantive content (#3470). - // A queued milestone with no slices and no context/draft file is a - // placeholder that should not block later real active milestones. - // If no real active milestone is found after the loop, the first - // deferred shell is promoted to active (#2921). if (m.status === 'queued' && slices.length === 0) { const contextFile = resolveMilestoneFile(basePath, m.id, "CONTEXT"); const draftFile = resolveMilestoneFile(basePath, m.id, "CONTEXT-DRAFT"); @@ -533,14 +456,12 @@ export async function deriveStateFromDb(basePath: string): Promise { } } - // Handle all-slices-done case (validating/completing) if (allSlicesDone) { const validationFile = resolveMilestoneFile(basePath, m.id, "VALIDATION"); const validationContent = validationFile ? await loadFile(validationFile) : null; const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false; if (!validationTerminal || (validationTerminal && !summaryFile)) { - // Validating or completing — still active activeMilestone = { id: m.id, title }; activeMilestoneSlices = slices; activeMilestoneFound = true; @@ -549,7 +470,6 @@ export async function deriveStateFromDb(basePath: string): Promise { } } - // Check for context draft (needs-discussion phase) const contextFile = resolveMilestoneFile(basePath, m.id, "CONTEXT"); const draftFile = resolveMilestoneFile(basePath, m.id, "CONTEXT-DRAFT"); if (!contextFile && draftFile) activeMilestoneHasDraft = true; @@ -559,13 +479,11 @@ export async function deriveStateFromDb(basePath: string): Promise { activeMilestoneFound = true; registry.push({ id: m.id, title, status: 'active', ...(deps.length > 0 ? { dependsOn: deps } : {}) }); } else { - // After active milestone found — rest are pending const deps = m.depends_on; registry.push({ id: m.id, title, status: 'pending', ...(deps.length > 0 ? { dependsOn: deps } : {}) }); } } - // Promote deferred queued shell if no real active milestone was found (#3470/#2921). if (!activeMilestoneFound && firstDeferredQueuedShell) { const shell = firstDeferredQueuedShell; activeMilestone = { id: shell.id, title: shell.title }; @@ -575,74 +493,264 @@ export async function deriveStateFromDb(basePath: string): Promise { if (entry) entry.status = 'active'; } - const milestoneProgress = { - done: registry.filter(e => e.status === 'complete').length, - total: registry.length, - }; + return { registry, activeMilestone, activeMilestoneSlices, activeMilestoneHasDraft }; +} - // ── No active milestone ────────────────────────────────────────────── - if (!activeMilestone) { - const pendingEntries = registry.filter(e => e.status === 'pending'); - const parkedEntries = registry.filter(e => e.status === 'parked'); +function handleNoActiveMilestone( + registry: MilestoneRegistryEntry[], + requirements: any, + milestoneProgress: { done: number, total: number } +): GSDState { + const pendingEntries = registry.filter(e => e.status === 'pending'); + const parkedEntries = registry.filter(e => e.status === 'parked'); - if (pendingEntries.length > 0) { - const blockerDetails = pendingEntries - .filter(e => e.dependsOn && e.dependsOn.length > 0) - .map(e => `${e.id} is waiting on unmet deps: ${e.dependsOn!.join(', ')}`); - return { - activeMilestone: null, activeSlice: null, activeTask: null, - phase: 'blocked', - recentDecisions: [], blockers: blockerDetails.length > 0 - ? blockerDetails - : ['All remaining milestones are dep-blocked but no deps listed — check CONTEXT.md files'], - nextAction: 'Resolve milestone dependencies before proceeding.', - registry, requirements, - progress: { milestones: milestoneProgress }, - }; - } - - if (parkedEntries.length > 0) { - const parkedIds = parkedEntries.map(e => e.id).join(', '); - return { - activeMilestone: null, activeSlice: null, activeTask: null, - phase: 'pre-planning', - recentDecisions: [], blockers: [], - nextAction: `All remaining milestones are parked (${parkedIds}). Run /gsd unpark or create a new milestone.`, - registry, requirements, - progress: { milestones: milestoneProgress }, - }; - } - - if (registry.length === 0) { - return { - activeMilestone: null, activeSlice: null, activeTask: null, - phase: 'pre-planning', - recentDecisions: [], blockers: [], - nextAction: 'No milestones found. Run /gsd to create one.', - registry: [], requirements, - progress: { milestones: { done: 0, total: 0 } }, - }; - } - - // All milestones complete - const lastEntry = registry[registry.length - 1]; - const activeReqs = requirements.active ?? 0; - const completionNote = activeReqs > 0 - ? `All milestones complete. ${activeReqs} active requirement${activeReqs === 1 ? '' : 's'} in REQUIREMENTS.md ${activeReqs === 1 ? 'has' : 'have'} not been mapped to a milestone.` - : 'All milestones complete.'; + if (pendingEntries.length > 0) { + const blockerDetails = pendingEntries + .filter(e => e.dependsOn && e.dependsOn.length > 0) + .map(e => `${e.id} is waiting on unmet deps: ${e.dependsOn!.join(', ')}`); return { - activeMilestone: null, - lastCompletedMilestone: lastEntry ? { id: lastEntry.id, title: lastEntry.title } : null, - activeSlice: null, activeTask: null, - phase: 'complete', - recentDecisions: [], blockers: [], - nextAction: completionNote, + activeMilestone: null, activeSlice: null, activeTask: null, + phase: 'blocked', + recentDecisions: [], blockers: blockerDetails.length > 0 + ? blockerDetails + : ['All remaining milestones are dep-blocked but no deps listed — check CONTEXT.md files'], + nextAction: 'Resolve milestone dependencies before proceeding.', registry, requirements, progress: { milestones: milestoneProgress }, }; } - // ── Active milestone has no slices or no roadmap ──────────────────── + if (parkedEntries.length > 0) { + const parkedIds = parkedEntries.map(e => e.id).join(', '); + return { + activeMilestone: null, activeSlice: null, activeTask: null, + phase: 'pre-planning', + recentDecisions: [], blockers: [], + nextAction: `All remaining milestones are parked (${parkedIds}). Run /gsd unpark or create a new milestone.`, + registry, requirements, + progress: { milestones: milestoneProgress }, + }; + } + + if (registry.length === 0) { + return { + activeMilestone: null, activeSlice: null, activeTask: null, + phase: 'pre-planning', + recentDecisions: [], blockers: [], + nextAction: 'No milestones found. Run /gsd to create one.', + registry: [], requirements, + progress: { milestones: { done: 0, total: 0 } }, + }; + } + + const lastEntry = registry[registry.length - 1]; + const activeReqs = requirements.active ?? 0; + const completionNote = activeReqs > 0 + ? `All milestones complete. ${activeReqs} active requirement${activeReqs === 1 ? '' : 's'} in REQUIREMENTS.md ${activeReqs === 1 ? 'has' : 'have'} not been mapped to a milestone.` + : 'All milestones complete.'; + return { + activeMilestone: null, + lastCompletedMilestone: lastEntry ? { id: lastEntry.id, title: lastEntry.title } : null, + activeSlice: null, activeTask: null, + phase: 'complete', + recentDecisions: [], blockers: [], + nextAction: completionNote, + registry, requirements, + progress: { milestones: milestoneProgress }, + }; +} + +async function handleAllSlicesDone( + basePath: string, + activeMilestone: ActiveRef, + registry: MilestoneRegistryEntry[], + requirements: any, + milestoneProgress: { done: number, total: number }, + sliceProgress: { done: number, total: number } +): Promise { + const validationFile = resolveMilestoneFile(basePath, activeMilestone.id, "VALIDATION"); + const validationContent = validationFile ? await loadFile(validationFile) : null; + const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false; + const verdict = validationContent ? extractVerdict(validationContent) : undefined; + + if (!validationTerminal || verdict === 'needs-remediation') { + return { + activeMilestone, activeSlice: null, activeTask: null, + phase: 'validating-milestone', + recentDecisions: [], blockers: [], + nextAction: `Validate milestone ${activeMilestone.id} before completion.`, + registry, requirements, + progress: { milestones: milestoneProgress, slices: sliceProgress }, + }; + } + + return { + activeMilestone, activeSlice: null, activeTask: null, + phase: 'completing-milestone', + recentDecisions: [], blockers: [], + nextAction: `All slices complete in ${activeMilestone.id}. Write milestone summary.`, + registry, requirements, + progress: { milestones: milestoneProgress, slices: sliceProgress }, + }; +} + +function resolveSliceDependencies(activeMilestoneSlices: SliceRow[]): { activeSlice: ActiveRef | null, activeSliceRow: SliceRow | null } { + const doneSliceIds = new Set( + activeMilestoneSlices.filter(s => isStatusDone(s.status)).map(s => s.id) + ); + + const sliceLock = process.env.GSD_SLICE_LOCK; + if (sliceLock) { + const lockedSlice = activeMilestoneSlices.find(s => s.id === sliceLock); + if (lockedSlice) { + return { activeSlice: { id: lockedSlice.id, title: lockedSlice.title }, activeSliceRow: lockedSlice }; + } else { + logWarning("state", `GSD_SLICE_LOCK=${sliceLock} not found in active slices — worker has no assigned work`); + return { activeSlice: null, activeSliceRow: null }; + } + } + + for (const s of activeMilestoneSlices) { + if (isStatusDone(s.status)) continue; + if (isDeferredStatus(s.status)) continue; + if (s.depends.every(dep => doneSliceIds.has(dep))) { + return { activeSlice: { id: s.id, title: s.title }, activeSliceRow: s }; + } + } + return { activeSlice: null, activeSliceRow: null }; +} + +async function reconcileSliceTasks( + basePath: string, + milestoneId: string, + sliceId: string, + planFile: string +): Promise { + let tasks = getSliceTasks(milestoneId, sliceId); + + if (tasks.length === 0 && planFile) { + try { + const planContent = await loadFile(planFile); + if (planContent) { + const diskPlan = parsePlan(planContent); + if (diskPlan.tasks.length > 0) { + for (let i = 0; i < diskPlan.tasks.length; i++) { + const t = diskPlan.tasks[i]; + try { + insertTask({ + id: t.id, + sliceId, + milestoneId, + title: t.title, + status: t.done ? 'complete' : 'pending', + sequence: i + 1, + }); + } catch (insertErr) { + logWarning("reconcile", `failed to insert task ${t.id} from plan file: ${insertErr instanceof Error ? insertErr.message : String(insertErr)}`); + } + } + tasks = getSliceTasks(milestoneId, sliceId); + logWarning("reconcile", `imported ${tasks.length} tasks from plan file for ${milestoneId}/${sliceId} — DB was empty (#3600)`, { mid: milestoneId, sid: sliceId }); + } + } + } catch (err) { + logError("reconcile", `plan-file task import failed for ${milestoneId}/${sliceId}: ${err instanceof Error ? err.message : String(err)}`); + } + } + + let reconciled = false; + for (const t of tasks) { + if (isStatusDone(t.status)) continue; + const summaryPath = resolveTaskFile(basePath, milestoneId, sliceId, t.id, "SUMMARY"); + if (summaryPath && existsSync(summaryPath)) { + try { + updateTaskStatus(milestoneId, sliceId, t.id, "complete"); + logWarning("reconcile", `task ${milestoneId}/${sliceId}/${t.id} status reconciled from "${t.status}" to "complete" (#2514)`, { mid: milestoneId, sid: sliceId, tid: t.id }); + reconciled = true; + } catch (e) { + logError("reconcile", `failed to update task ${t.id}`, { tid: t.id, error: (e as Error).message }); + } + } + } + if (reconciled) { + tasks = getSliceTasks(milestoneId, sliceId); + } + return tasks; +} + +async function detectBlockers(basePath: string, milestoneId: string, sliceId: string, tasks: TaskRow[]): Promise { + const completedTasks = tasks.filter(t => isStatusDone(t.status)); + for (const ct of completedTasks) { + if (ct.blocker_discovered) { + return ct.id; + } + const summaryFile = resolveTaskFile(basePath, milestoneId, sliceId, ct.id, "SUMMARY"); + if (!summaryFile) continue; + const summaryContent = await loadFile(summaryFile); + if (!summaryContent) continue; + const summary = parseSummary(summaryContent); + if (summary.frontmatter.blocker_discovered) { + return ct.id; + } + } + return null; +} + +function checkReplanTrigger(basePath: string, milestoneId: string, sliceId: string): boolean { + const sliceRow = getSlice(milestoneId, sliceId); + const dbTriggered = !!sliceRow?.replan_triggered_at; + const diskTriggered = !dbTriggered && + !!resolveSliceFile(basePath, milestoneId, sliceId, "REPLAN-TRIGGER"); + return dbTriggered || diskTriggered; +} + +async function checkInterruptedWork(basePath: string, milestoneId: string, sliceId: string): Promise { + const sDir = resolveSlicePath(basePath, milestoneId, sliceId); + const continueFile = sDir ? resolveSliceFile(basePath, milestoneId, sliceId, "CONTINUE") : null; + return !!(continueFile && await loadFile(continueFile)) || + !!(sDir && await loadFile(join(sDir, "continue.md"))); +} + +export async function deriveStateFromDb(basePath: string): Promise { + const requirements = parseRequirementCounts(await loadFile(resolveGsdRootFile(basePath, "REQUIREMENTS"))); + + let allMilestones = reconcileDiskToDb(basePath); + + const customOrder = loadQueueOrder(basePath); + const sortedIds = sortByQueueOrder(allMilestones.map(m => m.id), customOrder); + const byId = new Map(allMilestones.map(m => [m.id, m])); + allMilestones.length = 0; + for (const id of sortedIds) allMilestones.push(byId.get(id)!); + + const milestoneLock = process.env.GSD_MILESTONE_LOCK; + const milestones = milestoneLock + ? allMilestones.filter(m => m.id === milestoneLock) + : allMilestones; + + if (milestones.length === 0) { + return { + activeMilestone: null, activeSlice: null, activeTask: null, + phase: 'pre-planning', recentDecisions: [], blockers: [], + nextAction: 'No milestones found. Run /gsd to create one.', + registry: [], requirements, + progress: { milestones: { done: 0, total: 0 } }, + }; + } + + const { completeMilestoneIds, parkedMilestoneIds } = buildCompletenessSet(basePath, milestones); + + const registryContext = await buildRegistryAndFindActive(basePath, milestones, completeMilestoneIds, parkedMilestoneIds); + const { registry, activeMilestone, activeMilestoneSlices, activeMilestoneHasDraft } = registryContext; + + const milestoneProgress = { + done: registry.filter(e => e.status === 'complete').length, + total: registry.length, + }; + + if (!activeMilestone) { + return handleNoActiveMilestone(registry, requirements, milestoneProgress); + } + const hasRoadmap = resolveMilestoneFile(basePath, activeMilestone.id, "ROADMAP") !== null; if (activeMilestoneSlices.length === 0) { @@ -659,195 +767,60 @@ export async function deriveStateFromDb(basePath: string): Promise { }; } - // Has roadmap file but zero slices in DB — pre-planning (zero-slice roadmap guard) return { activeMilestone, activeSlice: null, activeTask: null, - phase: 'pre-planning', - recentDecisions: [], blockers: [], + phase: 'pre-planning', recentDecisions: [], blockers: [], nextAction: `Milestone ${activeMilestone.id} has a roadmap but no slices defined. Add slices to the roadmap.`, registry, requirements, - progress: { - milestones: milestoneProgress, - slices: { done: 0, total: 0 }, - }, + progress: { milestones: milestoneProgress, slices: { done: 0, total: 0 } }, }; } - // ── All slices done → validating/completing ───────────────────────── const allSlicesDone = activeMilestoneSlices.every(s => isStatusDone(s.status)); - if (allSlicesDone) { - const validationFile = resolveMilestoneFile(basePath, activeMilestone.id, "VALIDATION"); - const validationContent = validationFile ? await loadFile(validationFile) : null; - const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false; - const verdict = validationContent ? extractVerdict(validationContent) : undefined; - const sliceProgress = { - done: activeMilestoneSlices.length, - total: activeMilestoneSlices.length, - }; - - // Force re-validation when verdict is needs-remediation — remediation slices - // may have completed since the stale validation was written (#3596). - if (!validationTerminal || verdict === 'needs-remediation') { - return { - activeMilestone, activeSlice: null, activeTask: null, - phase: 'validating-milestone', - recentDecisions: [], blockers: [], - nextAction: `Validate milestone ${activeMilestone.id} before completion.`, - registry, requirements, - progress: { milestones: milestoneProgress, slices: sliceProgress }, - }; - } - - return { - activeMilestone, activeSlice: null, activeTask: null, - phase: 'completing-milestone', - recentDecisions: [], blockers: [], - nextAction: `All slices complete in ${activeMilestone.id}. Write milestone summary.`, - registry, requirements, - progress: { milestones: milestoneProgress, slices: sliceProgress }, - }; - } - - // ── Find active slice (first incomplete with deps satisfied) ───────── const sliceProgress = { done: activeMilestoneSlices.filter(s => isStatusDone(s.status)).length, total: activeMilestoneSlices.length, }; - const doneSliceIds = new Set( - activeMilestoneSlices.filter(s => isStatusDone(s.status)).map(s => s.id) - ); + if (allSlicesDone) { + return handleAllSlicesDone(basePath, activeMilestone, registry, requirements, milestoneProgress, sliceProgress); + } - let activeSlice: ActiveRef | null = null; - let activeSliceRow: SliceRow | null = null; - - // ── Slice-level parallel worker isolation ───────────────────────────── - // When GSD_SLICE_LOCK is set, this process is a parallel worker scoped - // to a single slice. Override activeSlice to only the locked slice ID. - const sliceLock = process.env.GSD_SLICE_LOCK; - if (sliceLock) { - const lockedSlice = activeMilestoneSlices.find(s => s.id === sliceLock); - if (lockedSlice) { - activeSlice = { id: lockedSlice.id, title: lockedSlice.title }; - activeSliceRow = lockedSlice; - } else { - logWarning("state", `GSD_SLICE_LOCK=${sliceLock} not found in active slices — worker has no assigned work`); - // Don't silently continue — this is a dispatch error + const activeSliceContext = resolveSliceDependencies(activeMilestoneSlices); + if (!activeSliceContext.activeSlice) { + // If locked slice wasn't found, it returns null but logs warning, we need to return 'blocked' + if (process.env.GSD_SLICE_LOCK) { return { activeMilestone, activeSlice: null, activeTask: null, - phase: 'blocked', - recentDecisions: [], blockers: [`GSD_SLICE_LOCK=${sliceLock} not found in active milestone slices`], + phase: 'blocked', recentDecisions: [], blockers: [`GSD_SLICE_LOCK=${process.env.GSD_SLICE_LOCK} not found in active milestone slices`], nextAction: 'Slice lock references a non-existent slice — check orchestrator dispatch.', registry, requirements, progress: { milestones: milestoneProgress, slices: sliceProgress }, }; } - } else { - for (const s of activeMilestoneSlices) { - if (isStatusDone(s.status)) continue; - // #2661: Skip deferred slices — a decision explicitly deferred this work. - // Without this guard the dispatcher would keep dispatching deferred slices - // because DECISIONS.md is only contextual, not authoritative for dispatch. - if (isDeferredStatus(s.status)) continue; - if (s.depends.every(dep => doneSliceIds.has(dep))) { - activeSlice = { id: s.id, title: s.title }; - activeSliceRow = s; - break; - } - } - } - - if (!activeSlice) { return { activeMilestone, activeSlice: null, activeTask: null, - phase: 'blocked', - recentDecisions: [], blockers: ['No slice eligible — check dependency ordering'], + phase: 'blocked', recentDecisions: [], blockers: ['No slice eligible — check dependency ordering'], nextAction: 'Resolve dependency blockers or plan next slice.', registry, requirements, progress: { milestones: milestoneProgress, slices: sliceProgress }, }; } + const { activeSlice } = activeSliceContext; - // ── Check for slice plan file on disk ──────────────────────────────── const planFile = resolveSliceFile(basePath, activeMilestone.id, activeSlice.id, "PLAN"); if (!planFile) { return { activeMilestone, activeSlice, activeTask: null, - phase: 'planning', - recentDecisions: [], blockers: [], + phase: 'planning', recentDecisions: [], blockers: [], nextAction: `Plan slice ${activeSlice.id} (${activeSlice.title}).`, registry, requirements, progress: { milestones: milestoneProgress, slices: sliceProgress }, }; } - // ── Get tasks from DB ──────────────────────────────────────────────── - let tasks = getSliceTasks(activeMilestone.id, activeSlice.id); - - // ── Reconcile missing tasks: plan file has tasks but DB is empty (#3600) ── - // When the planning agent writes S##-PLAN.md with task entries but never - // calls the gsd_plan_slice persistence tool, the DB has zero task rows - // even though the plan file contains valid tasks. Without this reconciliation, - // deriveState returns phase='planning' forever — the dispatcher re-dispatches - // plan-slice in an infinite loop. - if (tasks.length === 0 && planFile) { - try { - const planContent = await loadFile(planFile); - if (planContent) { - const diskPlan = parsePlan(planContent); - if (diskPlan.tasks.length > 0) { - for (let i = 0; i < diskPlan.tasks.length; i++) { - const t = diskPlan.tasks[i]; - try { - insertTask({ - id: t.id, - sliceId: activeSlice.id, - milestoneId: activeMilestone.id, - title: t.title, - status: t.done ? 'complete' : 'pending', - sequence: i + 1, - }); - } catch (insertErr) { - // Task may already exist from a partial previous import — skip - logWarning("reconcile", `failed to insert task ${t.id} from plan file: ${insertErr instanceof Error ? insertErr.message : String(insertErr)}`); - } - } - tasks = getSliceTasks(activeMilestone.id, activeSlice.id); - logWarning("reconcile", `imported ${tasks.length} tasks from plan file for ${activeMilestone.id}/${activeSlice.id} — DB was empty (#3600)`, { mid: activeMilestone.id, sid: activeSlice.id }); - } - } - } catch (err) { - // Non-fatal — fall through to the existing "empty plan" logic - logError("reconcile", `plan-file task import failed for ${activeMilestone.id}/${activeSlice.id}: ${err instanceof Error ? err.message : String(err)}`); - } - } - - // ── Reconcile stale task status (#2514) ────────────────────────────── - // When a session disconnects after the agent writes SUMMARY + VERIFY - // artifacts but before postUnitPostVerification updates the DB, tasks - // remain "pending" in the DB despite being complete on disk. Without - // reconciliation, deriveState keeps returning the stale task as active, - // causing the dispatcher to re-dispatch the same completed task forever. - let reconciled = false; - for (const t of tasks) { - if (isStatusDone(t.status)) continue; - const summaryPath = resolveTaskFile(basePath, activeMilestone.id, activeSlice.id, t.id, "SUMMARY"); - if (summaryPath && existsSync(summaryPath)) { - try { - updateTaskStatus(activeMilestone.id, activeSlice.id, t.id, "complete"); - logWarning("reconcile", `task ${activeMilestone.id}/${activeSlice.id}/${t.id} status reconciled from "${t.status}" to "complete" (#2514)`, { mid: activeMilestone.id, sid: activeSlice.id, tid: t.id }); - reconciled = true; - } catch (e) { - // DB write failed — continue with stale status rather than crash - logError("reconcile", `failed to update task ${t.id}`, { tid: t.id, error: (e as Error).message }); - } - } - } - // Re-fetch tasks if any were reconciled so downstream logic sees fresh status - if (reconciled) { - tasks = getSliceTasks(activeMilestone.id, activeSlice.id); - } - + const tasks = await reconcileSliceTasks(basePath, activeMilestone.id, activeSlice.id, planFile); + const taskProgress = { done: tasks.filter(t => isStatusDone(t.status)).length, total: tasks.length, @@ -856,23 +829,19 @@ export async function deriveStateFromDb(basePath: string): Promise { const activeTaskRow = tasks.find(t => !isStatusDone(t.status)); if (!activeTaskRow && tasks.length > 0) { - // All tasks done but slice not marked complete → summarizing return { activeMilestone, activeSlice, activeTask: null, - phase: 'summarizing', - recentDecisions: [], blockers: [], + phase: 'summarizing', recentDecisions: [], blockers: [], nextAction: `All tasks done in ${activeSlice.id}. Write slice summary and complete slice.`, registry, requirements, progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress }, }; } - // Empty plan — no tasks defined yet if (!activeTaskRow) { return { activeMilestone, activeSlice, activeTask: null, - phase: 'planning', - recentDecisions: [], blockers: [], + phase: 'planning', recentDecisions: [], blockers: [], nextAction: `Slice ${activeSlice.id} has a plan file but no tasks. Add tasks to the plan.`, registry, requirements, progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress }, @@ -881,15 +850,13 @@ export async function deriveStateFromDb(basePath: string): Promise { const activeTask: ActiveRef = { id: activeTaskRow.id, title: activeTaskRow.title }; - // ── Task plan file check (#909) ───────────────────────────────────── const tasksDir = resolveTasksDir(basePath, activeMilestone.id, activeSlice.id); if (tasksDir && existsSync(tasksDir) && tasks.length > 0) { const allFiles = readdirSync(tasksDir).filter(f => f.endsWith(".md")); if (allFiles.length === 0) { return { activeMilestone, activeSlice, activeTask: null, - phase: 'planning', - recentDecisions: [], blockers: [], + phase: 'planning', recentDecisions: [], blockers: [], nextAction: `Task plan files missing for ${activeSlice.id}. Run plan-slice to generate task plans.`, registry, requirements, progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress }, @@ -897,51 +864,24 @@ export async function deriveStateFromDb(basePath: string): Promise { } } - // ── Quality gate evaluation check ────────────────────────────────── - // If slice-scoped gates (Q3/Q4) are still pending, pause before execution - // so the gate-evaluate dispatch rule can run parallel sub-agents. - // Slices with zero gate rows (pre-feature or simple) skip straight through. const pendingGateCount = getPendingSliceGateCount(activeMilestone.id, activeSlice.id); if (pendingGateCount > 0) { return { activeMilestone, activeSlice, activeTask: null, - phase: 'evaluating-gates', - recentDecisions: [], blockers: [], + phase: 'evaluating-gates', recentDecisions: [], blockers: [], nextAction: `Evaluate ${pendingGateCount} quality gate(s) for ${activeSlice.id} before execution.`, registry, requirements, progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress }, }; } - // ── Blocker detection: check completed tasks for blocker_discovered ── - const completedTasks = tasks.filter(t => isStatusDone(t.status)); - let blockerTaskId: string | null = null; - for (const ct of completedTasks) { - if (ct.blocker_discovered) { - blockerTaskId = ct.id; - break; - } - // Also check disk summary in case DB doesn't have the flag - const summaryFile = resolveTaskFile(basePath, activeMilestone.id, activeSlice.id, ct.id, "SUMMARY"); - if (!summaryFile) continue; - const summaryContent = await loadFile(summaryFile); - if (!summaryContent) continue; - const summary = parseSummary(summaryContent); - if (summary.frontmatter.blocker_discovered) { - blockerTaskId = ct.id; - break; - } - } - + const blockerTaskId = await detectBlockers(basePath, activeMilestone.id, activeSlice.id, tasks); if (blockerTaskId) { - // Loop protection: if replan_history has entries for this slice, a replan - // was already performed — don't re-enter replanning phase. const replanHistory = getReplanHistory(activeMilestone.id, activeSlice.id); if (replanHistory.length === 0) { return { activeMilestone, activeSlice, activeTask, - phase: 'replanning-slice', - recentDecisions: [], + phase: 'replanning-slice', recentDecisions: [], blockers: [`Task ${blockerTaskId} discovered a blocker requiring slice replan`], nextAction: `Task ${blockerTaskId} reported blocker_discovered. Replan slice ${activeSlice.id} before continuing.`, activeWorkspace: undefined, @@ -951,22 +891,14 @@ export async function deriveStateFromDb(basePath: string): Promise { } } - // ── REPLAN-TRIGGER detection ───────────────────────────────────────── if (!blockerTaskId) { - const sliceRow = getSlice(activeMilestone.id, activeSlice.id); - // Check DB column first, fall back to disk trigger file when DB write - // was best-effort and failed (triage-resolution.ts dual-write gap). - const dbTriggered = !!sliceRow?.replan_triggered_at; - const diskTriggered = !dbTriggered && - !!resolveSliceFile(basePath, activeMilestone.id, activeSlice.id, "REPLAN-TRIGGER"); - if (dbTriggered || diskTriggered) { - // Loop protection: if replan_history has entries, replan was already done + const isTriggered = checkReplanTrigger(basePath, activeMilestone.id, activeSlice.id); + if (isTriggered) { const replanHistory = getReplanHistory(activeMilestone.id, activeSlice.id); if (replanHistory.length === 0) { return { activeMilestone, activeSlice, activeTask, - phase: 'replanning-slice', - recentDecisions: [], + phase: 'replanning-slice', recentDecisions: [], blockers: ['Triage replan trigger detected — slice replan required'], nextAction: `Triage replan triggered for slice ${activeSlice.id}. Replan before continuing.`, activeWorkspace: undefined, @@ -977,16 +909,11 @@ export async function deriveStateFromDb(basePath: string): Promise { } } - // ── Check for interrupted work ─────────────────────────────────────── - const sDir = resolveSlicePath(basePath, activeMilestone.id, activeSlice.id); - const continueFile = sDir ? resolveSliceFile(basePath, activeMilestone.id, activeSlice.id, "CONTINUE") : null; - const hasInterrupted = !!(continueFile && await loadFile(continueFile)) || - !!(sDir && await loadFile(join(sDir, "continue.md"))); + const hasInterrupted = await checkInterruptedWork(basePath, activeMilestone.id, activeSlice.id); return { activeMilestone, activeSlice, activeTask, - phase: 'executing', - recentDecisions: [], blockers: [], + phase: 'executing', recentDecisions: [], blockers: [], nextAction: hasInterrupted ? `Resume interrupted work on ${activeTask.id}: ${activeTask.title} in slice ${activeSlice.id}. Read continue.md first.` : `Execute ${activeTask.id}: ${activeTask.title} in slice ${activeSlice.id}.`, @@ -995,11 +922,14 @@ export async function deriveStateFromDb(basePath: string): Promise { }; } + // LEGACY: Filesystem-based state derivation for unmigrated projects. // DB-backed projects use deriveStateFromDb() above. Target: extract to // state-legacy.ts when all projects are DB-backed. export async function _deriveStateImpl(basePath: string): Promise { - const milestoneIds = findMilestoneIds(basePath); + const diskIds = findMilestoneIds(basePath); + const customOrder = loadQueueOrder(basePath); + const milestoneIds = sortByQueueOrder(diskIds, customOrder); // ── Parallel worker isolation ────────────────────────────────────────── // When GSD_MILESTONE_LOCK is set, this process is a parallel worker diff --git a/src/resources/extensions/gsd/templates/context-enhanced.md b/src/resources/extensions/gsd/templates/context-enhanced.md deleted file mode 100644 index 503ffaf17..000000000 --- a/src/resources/extensions/gsd/templates/context-enhanced.md +++ /dev/null @@ -1,138 +0,0 @@ -# {{milestoneId}}: {{milestoneTitle}} - -**Gathered:** {{date}} -**Status:** Ready for planning - -## Project Description - -{{description}} - -## Why This Milestone - -{{whatProblemThisSolves_AND_whyNow}} - -## Codebase Brief - -### Technology Stack - -{{techStack}} - -### Key Modules - -{{keyModules}} - -### Patterns in Use - -{{patternsInUse}} - -## User-Visible Outcome - -### When this milestone is complete, the user can: - -- {{literalUserActionInRealEnvironment}} -- {{literalUserActionInRealEnvironment}} - -### Entry point / environment - -- Entry point: {{CLI command / URL / bot / extension / service / workflow}} -- Environment: {{local dev / browser / mobile / launchd / CI / production-like}} -- Live dependencies involved: {{telegram / database / webhook / rpc subprocess / none}} - -## Completion Class - -- Contract complete means: {{what can be proven by tests / fixtures / artifacts}} -- Integration complete means: {{what must work across real subsystems}} -- Operational complete means: {{what must work under real lifecycle conditions, or none}} - -## Architectural Decisions - -### {{decisionTitle}} - -**Decision:** {{decisionStatement}} - -**Rationale:** {{rationale}} - -**Evidence:** {{evidence}} - -**Alternatives Considered:** -- {{alternative1}} — {{whyNotChosen1}} -- {{alternative2}} — {{whyNotChosen2}} - ---- - -> Add additional decisions as separate `### Decision Title` blocks following the same structure above. - -## Interface Contracts - -{{interfaceContracts}} - -> Document API boundaries, function signatures, data shapes, or protocol agreements that must be honored. Leave blank or remove if not applicable to this milestone. - -## Error Handling Strategy - -{{errorHandlingStrategy}} - -> Describe the approach for handling failures, edge cases, and error propagation. Include retry policies, fallback behaviors, and user-facing error messages where relevant. - -## Final Integrated Acceptance - -To call this milestone complete, we must prove: - -- {{one real end-to-end scenario}} -- {{one real end-to-end scenario}} -- {{what cannot be simulated if this milestone is to be considered truly done}} - -## Testing Requirements - -{{testingRequirements}} - -> Specify test types (unit, integration, e2e), coverage expectations, and any specific test scenarios that must pass. - -## Acceptance Criteria - -{{acceptanceCriteria}} - -> Per-slice acceptance criteria gathered during discussion. Each slice should have clear, testable criteria. - -## Risks and Unknowns - -- {{riskOrUnknown}} — {{whyItMatters}} - -## Existing Codebase / Prior Art - -- `{{fileOrModule}}` — {{howItRelates}} -- `{{fileOrModule}}` — {{howItRelates}} - -> See `.gsd/DECISIONS.md` for all architectural and pattern decisions — it is an append-only register; read it during planning, append to it during execution. - -## Relevant Requirements - -- {{requirementId}} — {{howThisMilestoneAdvancesIt}} - -## Scope - -### In Scope - -- {{inScopeItem}} - -### Out of Scope / Non-Goals - -- {{outOfScopeItem}} - -## Technical Constraints - -- {{constraint}} - -## Integration Points - -- {{systemOrService}} — {{howThisMilestoneInteractsWithIt}} - -## Ecosystem Notes - -{{ecosystemNotes}} - -> Research findings, best practices, known issues, and relevant external documentation discovered during preparation. - -## Open Questions - -- {{question}} — {{currentThinking}} diff --git a/src/resources/extensions/gsd/templates/context.md b/src/resources/extensions/gsd/templates/context.md index 3e19bb788..0111e7c83 100644 --- a/src/resources/extensions/gsd/templates/context.md +++ b/src/resources/extensions/gsd/templates/context.md @@ -38,6 +38,28 @@ To call this milestone complete, we must prove: - {{one real end-to-end scenario}} - {{what cannot be simulated if this milestone is to be considered truly done}} +## Architectural Decisions + +### {{decisionTitle}} + +**Decision:** {{decisionStatement}} + +**Rationale:** {{rationale}} + +**Alternatives Considered:** +- {{alternative}} — {{whyNotChosen}} + +--- + +> Add additional decisions as separate `### Decision Title` blocks following the same structure above. +> See `.gsd/DECISIONS.md` for the full append-only register of all project decisions. + +## Error Handling Strategy + +{{errorHandlingStrategy}} + +> Describe the approach for handling failures, edge cases, and error propagation. Include retry policies, fallback behaviors, and user-facing error messages where relevant. + ## Risks and Unknowns - {{riskOrUnknown}} — {{whyItMatters}} @@ -47,8 +69,6 @@ To call this milestone complete, we must prove: - `{{fileOrModule}}` — {{howItRelates}} - `{{fileOrModule}}` — {{howItRelates}} -> See `.gsd/DECISIONS.md` for all architectural and pattern decisions — it is an append-only register; read it during planning, append to it during execution. - ## Relevant Requirements - {{requirementId}} — {{howThisMilestoneAdvancesIt}} @@ -71,6 +91,18 @@ To call this milestone complete, we must prove: - {{systemOrService}} — {{howThisMilestoneInteractsWithIt}} +## Testing Requirements + +{{testingRequirements}} + +> Specify test types (unit, integration, e2e), coverage expectations, and specific test scenarios that must pass. + +## Acceptance Criteria + +{{acceptanceCriteria}} + +> Per-slice acceptance criteria gathered during discussion. Each slice should have clear, testable criteria. + ## Open Questions - {{question}} — {{currentThinking}} diff --git a/src/resources/extensions/gsd/tests/adversarial-review-fixes.test.ts b/src/resources/extensions/gsd/tests/adversarial-review-fixes.test.ts deleted file mode 100644 index 66c24a082..000000000 --- a/src/resources/extensions/gsd/tests/adversarial-review-fixes.test.ts +++ /dev/null @@ -1,223 +0,0 @@ -/** - * Tests for adversarial review fixes from PR #3602. - * - * These tests verify the fixes for: - * 1. Cross-session state leak in lastPreparationResult (HIGH) - * 2. Invalid regex anchor \z in prompt-validation.ts (HIGH) - * 3. Consecutive error counter in agent-loop.ts (MEDIUM) — UPSTREAM CODE, NOT MODIFIED - */ - -import { describe, test, beforeEach, afterEach } from "node:test"; -import assert from "node:assert/strict"; -import { mkdirSync, writeFileSync, rmSync } from "node:fs"; -import { join } from "node:path"; -import { tmpdir } from "node:os"; - -import { getLastPreparationResult, clearPreparationResult } from "../guided-flow.ts"; -import { validateEnhancedContext } from "../prompt-validation.ts"; - -// ─── Test Helpers ─────────────────────────────────────────────────────────────── - -function makeTempDir(prefix: string): string { - const dir = join( - tmpdir(), - `gsd-adversarial-test-${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`, - ); - mkdirSync(dir, { recursive: true }); - return dir; -} - -function cleanup(dir: string): void { - try { - rmSync(dir, { recursive: true, force: true }); - } catch { - // best-effort - } -} - -// ─── Fix 1: Cross-session state leak in lastPreparationResult ──────────────────── - -describe("Fix #1 — Cross-session state leak (lastPreparationResult)", () => { - beforeEach(() => { - clearPreparationResult(); - }); - - afterEach(() => { - clearPreparationResult(); - }); - - test("clearPreparationResult sets lastPreparationResult to null", () => { - // First, verify the getter returns null after clear - clearPreparationResult(); - const result = getLastPreparationResult(); - assert.equal(result, null, "lastPreparationResult should be null after clear"); - }); - - test("getLastPreparationResult returns null initially", () => { - clearPreparationResult(); - const result = getLastPreparationResult(); - assert.equal(result, null, "should return null when no preparation has run"); - }); - - // Note: The actual test that prepareAndBuildDiscussPrompt clears the result - // on entry requires mocking ExtensionCommandContext which is complex. - // The fix is verified by code inspection and integration tests. - // The key behavior is: - // 1. lastPreparationResult = null at the start of prepareAndBuildDiscussPrompt - // 2. If preparation throws, lastPreparationResult stays null - // 3. If discuss_preparation is false, lastPreparationResult stays null -}); - -// ─── Fix 2: Invalid regex anchor \z in prompt-validation.ts ────────────────────── - -describe("Fix #2 — Invalid regex anchor (prompt-validation.ts)", () => { - test("validates content with Architectural Decisions at end of file", () => { - // This was the bug: \z is PCRE/Ruby, not JS. JS treated it as literal 'z'. - // The section extraction would fail when Architectural Decisions was the - // last section (no subsequent ## heading). - const contentWithDecisionsAtEnd = ` -# M001: Test Milestone - -## Why This Milestone - -This is why. - -## Acceptance Criteria - -- Criterion 1 - -## Architectural Decisions - -### Decision 1 - -**Decision:** Use TypeScript -**Rationale:** Type safety -`; - - const result = validateEnhancedContext(contentWithDecisionsAtEnd); - assert.equal(result.valid, true, "should validate content with decisions at end"); - assert.equal(result.missing.length, 0, "should have no missing sections"); - }); - - test("validates content with Architectural Decisions followed by another section", () => { - const contentWithDecisionsInMiddle = ` -# M001: Test Milestone - -## Why This Milestone - -This is why. - -## Architectural Decisions - -### Decision 1 - -**Decision:** Use TypeScript - -## Acceptance Criteria - -- Criterion 1 -`; - - const result = validateEnhancedContext(contentWithDecisionsInMiddle); - assert.equal(result.valid, true, "should validate content with decisions in middle"); - }); - - test("detects missing decision entry when section is empty", () => { - const contentEmptyDecisions = ` -# M001: Test Milestone - -## Why This Milestone - -This is why. - -## Architectural Decisions - -(No decisions yet) - -## Acceptance Criteria - -- Criterion 1 -`; - - const result = validateEnhancedContext(contentEmptyDecisions); - assert.equal(result.valid, false, "should fail when decisions section has no entries"); - assert.ok( - result.missing.some((m) => m.includes("decision entry")), - "should report missing decision entry", - ); - }); - - test("accepts inline **Decision format", () => { - const contentInlineDecision = ` -## Why This Milestone - -Test - -## Architectural Decisions - -**Decision:** Use React - -## Acceptance Criteria - -- Criterion 1 -`; - - const result = validateEnhancedContext(contentInlineDecision); - assert.equal(result.valid, true, "should accept **Decision format"); - }); - - test("accepts ### subsection format", () => { - const contentSubsectionDecision = ` -## Why This Milestone - -Test - -## Architectural Decisions - -### Database Choice - -We chose SQLite. - -## Acceptance Criteria - -- Criterion 1 -`; - - const result = validateEnhancedContext(contentSubsectionDecision); - assert.equal(result.valid, true, "should accept ### subsection format"); - }); - - test("handles edge case: Architectural Decisions heading without space before content", () => { - const contentNoSpace = `## Why This Milestone -Test -## Architectural Decisions -### Decision 1 -Content here -## Acceptance Criteria -- Done`; - - const result = validateEnhancedContext(contentNoSpace); - assert.equal(result.valid, true, "should handle content without extra spacing"); - }); -}); - -// ─── Fix 3: Consecutive error counter (agent-loop.ts) ──────────────────────────── - -describe("Fix #3 — Consecutive error counter (UPSTREAM)", () => { - test("NOTE: agent-loop.ts is upstream code that was not modified", () => { - // This finding from the adversarial review relates to upstream behavior - // in packages/pi-agent-core/src/agent-loop.ts. - // - // The consecutiveAllToolErrorTurns counter logic was added in PR #3301 - // and refined in PR #3618 by upstream contributors. These PRs fix - // issues with: - // - Schema overload detection counting bash exit codes as failures - // - The counter not resetting properly on successful turns - // - // Since this is upstream code (part of pi-agent-core, not gsd extension), - // we do not modify it here. The fix should be coordinated with upstream. - // - // See: packages/pi-agent-core/src/agent-loop.ts lines 191, 298-325 - assert.ok(true, "Documented as upstream behavior — no changes made"); - }); -}); diff --git a/src/resources/extensions/gsd/tests/auto-start-worktree-db-path.test.ts b/src/resources/extensions/gsd/tests/auto-start-worktree-db-path.test.ts new file mode 100644 index 000000000..f32bf41fb --- /dev/null +++ b/src/resources/extensions/gsd/tests/auto-start-worktree-db-path.test.ts @@ -0,0 +1,28 @@ +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +import { createTestContext } from "./test-helpers.ts"; + +const { assertTrue, report } = createTestContext(); + +const srcPath = join(import.meta.dirname, "..", "auto-start.ts"); +const src = readFileSync(srcPath, "utf-8"); + +console.log("\n=== #3822: worktree bootstrap uses project DB path ==="); + +const dbLifecycleIdx = src.indexOf("// ── DB lifecycle ──"); +assertTrue(dbLifecycleIdx > 0, "auto-start.ts has a DB lifecycle section"); + +const dbLifecycleRegion = dbLifecycleIdx > 0 ? src.slice(dbLifecycleIdx, dbLifecycleIdx + 600) : ""; + +assertTrue( + dbLifecycleRegion.includes("const gsdDbPath = resolveProjectRootDbPath(s.basePath);"), + "DB lifecycle resolves the project-root DB path after worktree entry (#3822)", +); + +assertTrue( + !dbLifecycleRegion.includes('join(s.basePath, ".gsd", "gsd.db")'), + "DB lifecycle no longer derives gsd.db directly from the worktree path (#3822)", +); + +report(); diff --git a/src/resources/extensions/gsd/tests/bootstrap-derive-state-db-open.test.ts b/src/resources/extensions/gsd/tests/bootstrap-derive-state-db-open.test.ts new file mode 100644 index 000000000..5c2d18cfc --- /dev/null +++ b/src/resources/extensions/gsd/tests/bootstrap-derive-state-db-open.test.ts @@ -0,0 +1,39 @@ +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const systemContextSrc = readFileSync( + join(import.meta.dirname, "..", "bootstrap", "system-context.ts"), + "utf-8", +); +const registerHooksSrc = readFileSync( + join(import.meta.dirname, "..", "bootstrap", "register-hooks.ts"), + "utf-8", +); + +describe("bootstrap deriveState DB guards (#3844)", () => { + test("system-context opens DB before deriveState in resume flows", () => { + const helperIdx = systemContextSrc.indexOf("const ensureStateDbOpen = async () => {"); + const firstDeriveIdx = systemContextSrc.indexOf("const state = await deriveState(basePath);"); + assert.ok(helperIdx > -1, "system-context should define a DB-open helper for deriveState callers"); + assert.ok(firstDeriveIdx > -1, "system-context should still derive state for resume flows"); + assert.ok(helperIdx < firstDeriveIdx, "system-context should prepare DB opening before deriveState resume calls"); + assert.match( + systemContextSrc, + /await ensureStateDbOpen\(\);\s*\n\s*const state = await deriveState\(basePath\);/g, + "system-context resume flows should open DB before deriveState", + ); + }); + + test("register-hooks opens DB before deriveState in session_before_compact", () => { + const compactIdx = registerHooksSrc.indexOf('pi.on("session_before_compact"'); + assert.ok(compactIdx > -1, "register-hooks should define session_before_compact"); + const compactSection = registerHooksSrc.slice(compactIdx, compactIdx + 1600); + const ensureIdx = compactSection.indexOf("ensureDbOpen()"); + const deriveIdx = compactSection.indexOf("deriveState(basePath)"); + assert.ok(ensureIdx > -1, "session_before_compact should call ensureDbOpen()"); + assert.ok(deriveIdx > -1, "session_before_compact should derive state"); + assert.ok(ensureIdx < deriveIdx, "session_before_compact should open DB before deriveState"); + }); +}); diff --git a/src/resources/extensions/gsd/tests/capability-router.test.ts b/src/resources/extensions/gsd/tests/capability-router.test.ts index 751fc6e11..8e185b508 100644 --- a/src/resources/extensions/gsd/tests/capability-router.test.ts +++ b/src/resources/extensions/gsd/tests/capability-router.test.ts @@ -11,6 +11,7 @@ import { getEligibleModels, resolveModelForComplexity, MODEL_CAPABILITY_PROFILES, + MODEL_CAPABILITY_TIER, BASE_REQUIREMENTS, defaultRoutingConfig, } from "../model-router.js"; @@ -125,13 +126,9 @@ describe("computeTaskRequirements", () => { // ─── MODEL_CAPABILITY_PROFILES ─────────────────────────────────────────────── describe("MODEL_CAPABILITY_PROFILES", () => { - test("contains all 9 required models", () => { - const required = [ - "claude-opus-4-6", "claude-sonnet-4-6", "claude-haiku-4-5", - "gpt-4o", "gpt-4o-mini", "gemini-2.5-pro", "gemini-2.0-flash", - "deepseek-chat", "o3", - ]; - for (const model of required) { + test("contains profiles for all tier-mapped models", () => { + const tierModels = Object.keys(MODEL_CAPABILITY_TIER); + for (const model of tierModels) { assert.ok(MODEL_CAPABILITY_PROFILES[model], `Missing profile for ${model}`); } }); @@ -345,3 +342,30 @@ describe("RoutingDecision.selectionMethod", () => { assert.equal(result.selectionMethod, "tier-only"); }); }); + +// ─── ADR-004: Profile Completeness Lint ───────────────────────────────────── +// Every model in MODEL_CAPABILITY_TIER must have an entry in +// MODEL_CAPABILITY_PROFILES. This prevents profile staleness as new models +// are added to the tier map without corresponding capability data. + +describe("profile completeness (ADR-004 lint)", () => { + test("every model in MODEL_CAPABILITY_TIER has a MODEL_CAPABILITY_PROFILES entry", () => { + const tierModels = Object.keys(MODEL_CAPABILITY_TIER); + const missing = tierModels.filter(id => !MODEL_CAPABILITY_PROFILES[id]); + assert.equal( + missing.length, + 0, + `Models in MODEL_CAPABILITY_TIER but missing from MODEL_CAPABILITY_PROFILES:\n ${missing.join("\n ")}\n\nAdd capability profiles for these models in model-router.ts.`, + ); + }); + + test("MODEL_CAPABILITY_PROFILES does not contain models absent from MODEL_CAPABILITY_TIER", () => { + const profileModels = Object.keys(MODEL_CAPABILITY_PROFILES); + const orphaned = profileModels.filter(id => !MODEL_CAPABILITY_TIER[id]); + assert.equal( + orphaned.length, + 0, + `Models in MODEL_CAPABILITY_PROFILES but not in MODEL_CAPABILITY_TIER:\n ${orphaned.join("\n ")}\n\nEither add these to MODEL_CAPABILITY_TIER or remove stale profiles.`, + ); + }); +}); diff --git a/src/resources/extensions/gsd/tests/cmux.test.ts b/src/resources/extensions/gsd/tests/cmux.test.ts index 0e6dd8e77..305a3ef0d 100644 --- a/src/resources/extensions/gsd/tests/cmux.test.ts +++ b/src/resources/extensions/gsd/tests/cmux.test.ts @@ -1,7 +1,8 @@ -import test, { describe } from "node:test"; +import test, { describe, beforeEach, afterEach } from "node:test"; import assert from "node:assert/strict"; import * as fs from "node:fs"; import * as path from "node:path"; +import { tmpdir } from "node:os"; import { fileURLToPath } from "node:url"; import { buildCmuxProgress, @@ -12,6 +13,7 @@ import { resolveCmuxConfig, shouldPromptToEnableCmux, } from "../../cmux/index.ts"; +import { autoEnableCmuxPreferences } from "../commands-cmux.ts"; import type { GSDState } from "../types.ts"; test("detectCmuxEnvironment requires workspace, surface, and socket", () => { @@ -79,6 +81,70 @@ test("shouldPromptToEnableCmux only prompts once per session", () => { resetCmuxPromptState(); }); +describe("autoEnableCmuxPreferences", () => { + let tmp: string; + let originalCwd: string; + + beforeEach(() => { + originalCwd = process.cwd(); + tmp = fs.mkdtempSync(path.join(tmpdir(), "cmux-auto-test-")); + fs.mkdirSync(path.join(tmp, ".gsd"), { recursive: true }); + process.chdir(tmp); + }); + + afterEach(() => { + process.chdir(originalCwd); + fs.rmSync(tmp, { recursive: true, force: true }); + }); + + test("writes cmux.enabled true when preferences file exists with no cmux config", () => { + const prefsPath = path.join(tmp, ".gsd", "preferences.md"); + fs.writeFileSync(prefsPath, [ + "---", + "version: 1", + "---", + "", + "# GSD Skill Preferences", + ].join("\n")); + + const result = autoEnableCmuxPreferences(); + assert.equal(result, true); + + const content = fs.readFileSync(prefsPath, "utf-8"); + assert.ok(content.includes("enabled: true"), "should write enabled: true"); + assert.ok(content.includes("notifications: true"), "should default notifications on"); + assert.ok(content.includes("sidebar: true"), "should default sidebar on"); + assert.ok(content.includes("splits: false"), "should default splits off"); + }); + + test("returns false when preferences file does not exist", () => { + const result = autoEnableCmuxPreferences(); + assert.equal(result, false); + }); + + test("preserves existing cmux sub-preferences when auto-enabling", () => { + const prefsPath = path.join(tmp, ".gsd", "preferences.md"); + fs.writeFileSync(prefsPath, [ + "---", + "version: 1", + "cmux:", + " splits: true", + " browser: true", + "---", + "", + "# GSD Skill Preferences", + ].join("\n")); + + const result = autoEnableCmuxPreferences(); + assert.equal(result, true); + + const content = fs.readFileSync(prefsPath, "utf-8"); + assert.ok(content.includes("enabled: true"), "should set enabled: true"); + assert.ok(content.includes("splits: true"), "should preserve existing splits: true"); + assert.ok(content.includes("browser: true"), "should preserve existing browser: true"); + }); +}); + test("buildCmuxStatusLabel and progress prefer deepest active unit", () => { const state: GSDState = { activeMilestone: { id: "M001", title: "Milestone" }, diff --git a/src/resources/extensions/gsd/tests/codebase-generator.test.ts b/src/resources/extensions/gsd/tests/codebase-generator.test.ts index d8d3d74c8..923c19f1d 100644 --- a/src/resources/extensions/gsd/tests/codebase-generator.test.ts +++ b/src/resources/extensions/gsd/tests/codebase-generator.test.ts @@ -162,6 +162,34 @@ test("generateCodebaseMap: excludes .claude/ and other tool directories", () => } }); +test("generateCodebaseMap: excludes .agents/ and other tooling directories", () => { + const base = makeTmpRepo(); + try { + addFile(base, "src/main.ts"); + addFile(base, ".agents/skills/pdf/SKILL.md"); + addFile(base, ".agents/skills/find-skills/SKILL.md"); + addFile(base, ".bg-shell/session.json"); + addFile(base, ".idea/workspace.xml"); + addFile(base, ".cache/data.bin"); + addFile(base, "tmp/scratch.ts"); + addFile(base, "target/debug/build.rs"); + addFile(base, "venv/lib/site.py"); + + const result = generateCodebaseMap(base); + assert.ok(result.content.includes("`src/main.ts`"), "should include src/main.ts"); + assert.ok(!result.content.includes("SKILL.md"), "should exclude .agents/ files"); + assert.ok(!result.content.includes(".bg-shell"), "should exclude .bg-shell/ files"); + assert.ok(!result.content.includes(".idea"), "should exclude .idea/ files"); + assert.ok(!result.content.includes(".cache"), "should exclude .cache/ files"); + assert.ok(!result.content.includes("tmp/"), "should exclude tmp/ files"); + assert.ok(!result.content.includes("target"), "should exclude target/ files"); + assert.ok(!result.content.includes("venv"), "should exclude venv/ files"); + assert.equal(result.fileCount, 1); + } finally { + cleanup(base); + } +}); + test("generateCodebaseMap: excludes binary and lock files", () => { const base = makeTmpRepo(); try { diff --git a/src/resources/extensions/gsd/tests/complete-slice-prompt-task-summary-layout.test.ts b/src/resources/extensions/gsd/tests/complete-slice-prompt-task-summary-layout.test.ts new file mode 100644 index 000000000..c50389a1d --- /dev/null +++ b/src/resources/extensions/gsd/tests/complete-slice-prompt-task-summary-layout.test.ts @@ -0,0 +1,18 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const promptPath = join(process.cwd(), "src/resources/extensions/gsd/prompts/complete-slice.md"); +const prompt = readFileSync(promptPath, "utf-8"); + +test("complete-slice prompt explains the flat task summary layout", () => { + assert.match(prompt, /flat file layout/i); + assert.match(prompt, /T01-SUMMARY\.md/); + assert.match(prompt, /not inside per-task subdirectories like `tasks\/T01\/SUMMARY\.md`/i); +}); + +test("complete-slice prompt forbids the wrong task summary glob", () => { + assert.match(prompt, /find .*tasks -name "\*-SUMMARY\.md"/i); + assert.match(prompt, /Never use `tasks\/\*\/SUMMARY\.md`/); +}); diff --git a/src/resources/extensions/gsd/tests/derive-state-helpers.test.ts b/src/resources/extensions/gsd/tests/derive-state-helpers.test.ts new file mode 100644 index 000000000..035e5efb2 --- /dev/null +++ b/src/resources/extensions/gsd/tests/derive-state-helpers.test.ts @@ -0,0 +1,436 @@ +// GSD Extension — Tests for extracted deriveStateFromDb helper functions +// Copyright (c) 2026 Jeremy McSpadden +// +// Tests the composable helpers extracted from deriveStateFromDb: +// reconcileDiskToDb, buildCompletenessSet, buildRegistryAndFindActive, +// handleNoActiveMilestone, resolveSliceDependencies, reconcileSliceTasks, +// detectBlockers, checkReplanTrigger, checkInterruptedWork +// +// Helpers are private — exercised through deriveStateFromDb integration. + +import { describe, test, beforeEach, afterEach } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; + +import { invalidateStateCache, deriveStateFromDb } from '../state.ts'; +import { + openDatabase, + closeDatabase, + insertMilestone, + insertSlice, + insertTask, + updateTaskStatus, +} from '../gsd-db.ts'; + +// ─── Fixture Helpers ─────────────────────────────────────────────────────── + +function createFixtureBase(): string { + const base = mkdtempSync(join(tmpdir(), 'gsd-helpers-')); + mkdirSync(join(base, '.gsd', 'milestones'), { recursive: true }); + return base; +} + +function writeFile(base: string, relativePath: string, content: string): void { + const full = join(base, '.gsd', relativePath); + mkdirSync(join(full, '..'), { recursive: true }); + writeFileSync(full, content); +} + +function cleanup(base: string): void { + rmSync(base, { recursive: true, force: true }); +} + +const ROADMAP_CONTENT = `# M001: Test Milestone + +**Vision:** Test helpers. + +## Slices + +- [ ] **S01: First Slice** \`risk:low\` \`depends:[]\` + > After this: Slice done. + +- [ ] **S02: Second Slice** \`risk:low\` \`depends:[S01]\` + > After this: All done. +`; + +const PLAN_CONTENT = `# S01: First Slice + +**Goal:** Test executing. +**Demo:** Tests pass. + +## Tasks + +- [ ] **T01: First Task** \`est:10m\` + First task description. + +- [x] **T02: Done Task** \`est:10m\` + Already done. +`; + +// ═══════════════════════════════════════════════════════════════════════════ +// Tests +// ═══════════════════════════════════════════════════════════════════════════ + +describe('derive-state-helpers', () => { + + // ─── handleNoActiveMilestone: all parked ───────────────────────────── + test('handleNoActiveMilestone: all milestones parked returns pre-planning with unpark hint', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-CONTEXT.md', '# M001\n\nContext.'); + writeFile(base, 'milestones/M001/M001-PARKED.md', 'Parked.'); + writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002\n\nContext.'); + writeFile(base, 'milestones/M002/M002-PARKED.md', 'Also parked.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'First', status: 'parked' }); + insertMilestone({ id: 'M002', title: 'Second', status: 'parked' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + assert.equal(state.phase, 'pre-planning', 'all-parked: phase is pre-planning'); + assert.equal(state.activeMilestone, null, 'all-parked: no active milestone'); + assert.ok(state.nextAction.includes('parked'), 'all-parked: nextAction mentions parked'); + assert.ok(state.nextAction.includes('unpark'), 'all-parked: nextAction hints unpark'); + assert.equal(state.registry.length, 2, 'all-parked: both in registry'); + assert.ok(state.registry.every(e => e.status === 'parked'), 'all-parked: all registry entries parked'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── handleNoActiveMilestone: all complete with active requirements ── + test('handleNoActiveMilestone: all complete with unmapped requirements', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nDone.'); + writeFile(base, 'REQUIREMENTS.md', `# Requirements\n\n## Active\n\n### R001 — Unmapped\n- Status: active\n- Description: Not mapped.\n`); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'First', status: 'complete' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + assert.equal(state.phase, 'complete', 'complete-reqs: phase is complete'); + assert.ok(state.nextAction.includes('1 active requirement'), 'complete-reqs: nextAction notes unmapped reqs'); + assert.equal(state.requirements?.active, 1, 'complete-reqs: requirements.active = 1'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── resolveSliceDependencies: GSD_SLICE_LOCK with missing slice ──── + test('resolveSliceDependencies: GSD_SLICE_LOCK pointing to non-existent slice returns blocked', async () => { + const base = createFixtureBase(); + const origLock = process.env.GSD_SLICE_LOCK; + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', ''); + writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Test', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'active', risk: 'low', depends: [] }); + insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' }); + + process.env.GSD_SLICE_LOCK = 'S99'; + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + assert.equal(state.phase, 'blocked', 'slice-lock-miss: phase is blocked'); + assert.ok(state.blockers.some(b => b.includes('GSD_SLICE_LOCK=S99')), 'slice-lock-miss: blocker mentions lock'); + } finally { + if (origLock !== undefined) process.env.GSD_SLICE_LOCK = origLock; + else delete process.env.GSD_SLICE_LOCK; + closeDatabase(); + cleanup(base); + } + }); + + // ─── resolveSliceDependencies: GSD_SLICE_LOCK with valid slice ────── + test('resolveSliceDependencies: GSD_SLICE_LOCK targeting valid slice bypasses deps', async () => { + const base = createFixtureBase(); + const origLock = process.env.GSD_SLICE_LOCK; + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + // S02 depends on S01 but we lock to S02 directly + writeFile(base, 'milestones/M001/slices/S02/S02-PLAN.md', `# S02\n\n**Goal:** Test.\n**Demo:** Pass.\n\n## Tasks\n\n- [ ] **T01: Task** \`est:5m\`\n Do thing.\n`); + writeFile(base, 'milestones/M001/slices/S02/tasks/.gitkeep', ''); + writeFile(base, 'milestones/M001/slices/S02/tasks/T01-PLAN.md', '# T01 Plan'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Test', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'pending', risk: 'low', depends: [] }); + insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'pending', risk: 'low', depends: ['S01'] }); + insertTask({ id: 'T01', sliceId: 'S02', milestoneId: 'M001', title: 'Task', status: 'pending' }); + + process.env.GSD_SLICE_LOCK = 'S02'; + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + assert.equal(state.activeSlice?.id, 'S02', 'slice-lock-valid: activeSlice is S02 (locked)'); + assert.equal(state.phase, 'executing', 'slice-lock-valid: phase is executing'); + } finally { + if (origLock !== undefined) process.env.GSD_SLICE_LOCK = origLock; + else delete process.env.GSD_SLICE_LOCK; + closeDatabase(); + cleanup(base); + } + }); + + // ─── reconcileSliceTasks: plan file imports tasks when DB empty ────── + test('reconcileSliceTasks: imports tasks from plan file when DB has zero tasks (#3600)', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', ''); + writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Test', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'active', risk: 'low', depends: [] }); + insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'pending', risk: 'low', depends: ['S01'] }); + // No tasks inserted — reconcileSliceTasks should import from plan file + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + // Plan has T01 (pending) and T02 (done) — reconciliation imports both + assert.equal(state.phase, 'executing', 'task-reconcile: phase is executing (tasks imported)'); + assert.equal(state.activeTask?.id, 'T01', 'task-reconcile: activeTask is T01'); + assert.equal(state.progress?.tasks?.total, 2, 'task-reconcile: total tasks = 2'); + assert.equal(state.progress?.tasks?.done, 1, 'task-reconcile: done tasks = 1 (T02 was [x])'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── reconcileSliceTasks: stale task reconciled from disk summary ──── + test('reconcileSliceTasks: stale pending task reconciled to complete when disk SUMMARY exists (#2514)', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', ''); + writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan'); + // T01 has a summary on disk but DB still says pending + writeFile(base, 'milestones/M001/slices/S01/tasks/T01-SUMMARY.md', '# T01 Summary\n\nDone on disk.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Test', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'active', risk: 'low', depends: [] }); + insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'pending', risk: 'low', depends: ['S01'] }); + insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' }); + insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + // T01 should have been reconciled to complete (SUMMARY exists on disk) + // Both tasks complete → phase should be summarizing + assert.equal(state.phase, 'summarizing', 'stale-task: phase is summarizing (T01 reconciled)'); + assert.equal(state.activeTask, null, 'stale-task: no active task (all done)'); + assert.equal(state.progress?.tasks?.done, 2, 'stale-task: tasks.done = 2'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── detectBlockers: blocker_discovered triggers replanning ────────── + test('detectBlockers: task with blocker_discovered triggers replanning-slice', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', ''); + writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan'); + // T02 completed with blocker discovered — written in summary frontmatter + writeFile(base, 'milestones/M001/slices/S01/tasks/T02-SUMMARY.md', + '---\nblocker_discovered: true\n---\n\n# T02 Summary\n\nFound a blocker.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Test', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'active', risk: 'low', depends: [] }); + insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'pending', risk: 'low', depends: ['S01'] }); + insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' }); + insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + assert.equal(state.phase, 'replanning-slice', 'blocker: phase is replanning-slice'); + assert.ok(state.blockers.some(b => b.includes('T02')), 'blocker: blockers mention T02'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── checkInterruptedWork: continue.md triggers resume hint ───────── + test('checkInterruptedWork: continue.md present triggers resume nextAction', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', ''); + writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan'); + writeFile(base, 'milestones/M001/slices/S01/S01-CONTINUE.md', 'Resume from here.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Test', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'active', risk: 'low', depends: [] }); + insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'pending', risk: 'low', depends: ['S01'] }); + insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' }); + insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + assert.equal(state.phase, 'executing', 'continue: phase is still executing'); + assert.ok(state.nextAction.includes('Resume interrupted work'), 'continue: nextAction mentions resume'); + assert.ok(state.nextAction.includes('continue.md'), 'continue: nextAction mentions continue.md'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── buildCompletenessSet: SUMMARY-on-disk marks complete ─────────── + test('buildCompletenessSet: milestone with SUMMARY on disk treated as complete', async () => { + const base = createFixtureBase(); + try { + // M001 has summary on disk but DB status is still 'active' + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nDone.'); + // M002 is the real active milestone + writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002\n\nActive.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'First', status: 'active' }); + insertMilestone({ id: 'M002', title: 'Second', status: 'active' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + // M001 should be complete (summary on disk), M002 should be active + const m1 = state.registry.find(e => e.id === 'M001'); + assert.equal(m1?.status, 'complete', 'summary-disk: M001 marked complete via disk SUMMARY'); + assert.equal(state.activeMilestone?.id, 'M002', 'summary-disk: M002 is active'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── reconcileDiskToDb: disk slices synced into DB (#2533) ────────── + test('reconcileDiskToDb: slices in ROADMAP.md but missing from DB are auto-inserted (#2533)', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Test', status: 'active' }); + // No slices inserted — reconcileDiskToDb should insert from roadmap + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + // Slices should have been reconciled from roadmap, S01 should be the active slice + assert.equal(state.activeMilestone?.id, 'M001', 'slice-reconcile: M001 is active'); + assert.equal(state.activeSlice?.id, 'S01', 'slice-reconcile: S01 reconciled and active'); + assert.ok((state.progress?.slices?.total ?? 0) >= 2, 'slice-reconcile: at least 2 slices reconciled'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── Queue order: milestones sorted by custom queue order ─────────── + test('deriveStateFromDb respects custom queue order from QUEUE-ORDER.json', async () => { + const base = createFixtureBase(); + try { + // M003 should come first per queue order, M001 second + const queueOrder = JSON.stringify({ order: ['M003', 'M001', 'M002'], updatedAt: new Date().toISOString() }); + writeFileSync(join(base, '.gsd', 'QUEUE-ORDER.json'), queueOrder); + writeFile(base, 'milestones/M001/M001-CONTEXT.md', '# M001\n\nContext.'); + writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002\n\nContext.'); + writeFile(base, 'milestones/M003/M003-CONTEXT.md', '# M003\n\nContext.'); + + openDatabase(':memory:'); + // Insert in natural order — queue ordering should override + insertMilestone({ id: 'M001', title: 'First', status: 'active' }); + insertMilestone({ id: 'M002', title: 'Second', status: 'active' }); + insertMilestone({ id: 'M003', title: 'Third', status: 'active' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + // M003 should be the active milestone (first in queue) + assert.equal(state.activeMilestone?.id, 'M003', 'queue-order: M003 is active (first in queue)'); + assert.equal(state.registry[0]?.id, 'M003', 'queue-order: registry[0] is M003'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── handleAllSlicesDone: needs-remediation re-triggers validation ── + test('handleAllSlicesDone: needs-remediation verdict triggers validating-milestone', async () => { + const base = createFixtureBase(); + try { + const doneRoadmap = `# M001: Remediation Test\n\n**Vision:** Test.\n\n## Slices\n\n- [x] **S01: Done** \`risk:low\` \`depends:[]\`\n > Done.\n`; + writeFile(base, 'milestones/M001/M001-ROADMAP.md', doneRoadmap); + writeFile(base, 'milestones/M001/M001-VALIDATION.md', + '---\nverdict: needs-remediation\nremediation_round: 1\n---\n\n# Validation\nNeeds remediation.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Remediation Test', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Done', status: 'complete', risk: 'low', depends: [] }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + assert.equal(state.phase, 'validating-milestone', 'remediation: phase is validating-milestone'); + assert.equal(state.activeMilestone?.id, 'M001', 'remediation: activeMilestone is M001'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── Deferred queued shell: shell milestone deferred, real one promoted ── + test('buildRegistryAndFindActive: queued shell deferred, later real milestone becomes active (#3470)', async () => { + const base = createFixtureBase(); + try { + // M001: queued shell — no content, no slices + mkdirSync(join(base, '.gsd', 'milestones', 'M001'), { recursive: true }); + // M002: real milestone with context + writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002: Real\n\nActive milestone.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Shell', status: 'queued' }); + insertMilestone({ id: 'M002', title: 'Real', status: 'active' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + // M002 should be active (M001 queued shell deferred) + assert.equal(state.activeMilestone?.id, 'M002', 'deferred-shell: M002 is active (shell deferred)'); + } finally { + closeDatabase(); + cleanup(base); + } + }); +}); diff --git a/src/resources/extensions/gsd/tests/discord-invite-links.test.ts b/src/resources/extensions/gsd/tests/discord-invite-links.test.ts index 8b82d4749..dffe0af61 100644 --- a/src/resources/extensions/gsd/tests/discord-invite-links.test.ts +++ b/src/resources/extensions/gsd/tests/discord-invite-links.test.ts @@ -18,7 +18,7 @@ const VALID_INVITE = "https://discord.com/invite/nKXTsAcmbT"; /** Files that contain user-facing Discord invite links. */ const FILES_WITH_INVITE_LINKS: string[] = [ "README.md", - "docs/what-is-pi/15-pi-packages-the-ecosystem.md", + "docs/dev/what-is-pi/15-pi-packages-the-ecosystem.md", ]; describe("Discord invite links (#2699)", () => { diff --git a/src/resources/extensions/gsd/tests/discuss-incremental-persistence.test.ts b/src/resources/extensions/gsd/tests/discuss-incremental-persistence.test.ts index aa3f0d42f..80b01a20b 100644 --- a/src/resources/extensions/gsd/tests/discuss-incremental-persistence.test.ts +++ b/src/resources/extensions/gsd/tests/discuss-incremental-persistence.test.ts @@ -27,10 +27,19 @@ describe("discuss incremental persistence (#2152)", () => { assert.match(content, /Incremental persistence/, "should have incremental persistence section"); }); + test("new-project discuss prompt includes CONTEXT-DRAFT save instruction", () => { + const content = readFileSync(join(promptsDir, "discuss.md"), "utf-8"); + assert.match(content, /CONTEXT-DRAFT/, "should mention CONTEXT-DRAFT"); + assert.match(content, /Incremental persistence/, "should have incremental persistence section"); + assert.match(content, /gsd_summary_save/, "should use gsd_summary_save tool"); + }); + test("drafts are saved silently without user notification", () => { const milestone = readFileSync(join(promptsDir, "guided-discuss-milestone.md"), "utf-8"); const slice = readFileSync(join(promptsDir, "guided-discuss-slice.md"), "utf-8"); + const discuss = readFileSync(join(promptsDir, "discuss.md"), "utf-8"); assert.match(milestone, /Do NOT mention this save to the user/); assert.match(slice, /Do NOT mention this to the user/); + assert.match(discuss, /Do NOT mention this save to the user/); }); }); diff --git a/src/resources/extensions/gsd/tests/dispatch-guard.test.ts b/src/resources/extensions/gsd/tests/dispatch-guard.test.ts index 1989a0195..04c167b08 100644 --- a/src/resources/extensions/gsd/tests/dispatch-guard.test.ts +++ b/src/resources/extensions/gsd/tests/dispatch-guard.test.ts @@ -145,6 +145,33 @@ test("dispatch guard falls back to positional ordering when no dependencies decl ); }); +test("dispatch guard ignores positionally-earlier reverse dependents for zero-dependency slices (#3720)", (t) => { + const repo = setupRepo(); + t.after(() => teardownRepo(repo)); + + mkdirSync(join(repo, ".gsd", "milestones", "M015"), { recursive: true }); + + insertMilestone({ id: "M015", title: "Reverse dependency fallback" }); + insertSlice({ id: "S03", milestoneId: "M015", title: "Complete prerequisite", status: "complete", depends: [], sequence: 0 }); + insertSlice({ id: "S04", milestoneId: "M015", title: "Depends on S04A", status: "pending", depends: ["S03", "S04A"], sequence: 0 }); + insertSlice({ id: "S04A", milestoneId: "M015", title: "No explicit deps", status: "pending", depends: [], sequence: 0 }); + + writeFileSync(join(repo, ".gsd", "milestones", "M015", "M015-ROADMAP.md"), "# M015\n"); + + // S04A has no declared dependencies and should not be blocked by S04, because + // S04 itself depends on S04A. With sequence=0, DB ordering falls back to id. + assert.equal( + getPriorSliceCompletionBlocker(repo, "main", "execute-task", "M015/S04A/T02"), + null, + ); + + // The reverse direction is still blocked normally. + assert.equal( + getPriorSliceCompletionBlocker(repo, "main", "execute-task", "M015/S04/T01"), + "Cannot dispatch execute-task M015/S04/T01: dependency slice M015/S04A is not complete.", + ); +}); + test("dispatch guard allows slice with all declared dependencies complete", (t) => { const repo = setupRepo(); t.after(() => teardownRepo(repo)); diff --git a/src/resources/extensions/gsd/tests/doctor-scope-db-unavailable.test.ts b/src/resources/extensions/gsd/tests/doctor-scope-db-unavailable.test.ts new file mode 100644 index 000000000..caeb403b5 --- /dev/null +++ b/src/resources/extensions/gsd/tests/doctor-scope-db-unavailable.test.ts @@ -0,0 +1,43 @@ +import { afterEach, test } from "node:test"; +import assert from "node:assert/strict"; +import { closeDatabase } from "../gsd-db.ts"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { filterDoctorIssues } from "../doctor-format.ts"; +import { checkEngineHealth } from "../doctor-engine-checks.ts"; + +afterEach(() => { + closeDatabase(); +}); + +test("filterDoctorIssues keeps project and environment issues in scoped reports", () => { + const issues = [ + { severity: "error", code: "env_dependencies", scope: "project", unitId: "environment", message: "node_modules missing", fixable: false }, + { severity: "warning", code: "db_unavailable", scope: "project", unitId: "project", message: "DB unavailable", fixable: false }, + { severity: "warning", code: "state_file_missing", scope: "slice", unitId: "M016/S01", message: "slice warning", fixable: false }, + ] as const; + + const filtered = filterDoctorIssues([...issues], { scope: "M016", includeWarnings: true }); + assert.deepEqual( + filtered.map((issue) => issue.unitId), + ["environment", "project", "M016/S01"], + ); +}); + +test("checkEngineHealth reports db_unavailable when gsd.db exists but the DB is closed", async (t) => { + const base = mkdtempSync(join(tmpdir(), "gsd-doctor-db-unavailable-")); + t.after(() => rmSync(base, { recursive: true, force: true })); + + const gsdDir = join(base, ".gsd"); + mkdirSync(gsdDir, { recursive: true }); + writeFileSync(join(gsdDir, "gsd.db"), ""); + + const issues: any[] = []; + await checkEngineHealth(base, issues, []); + + const dbIssue = issues.find((issue) => issue.code === "db_unavailable"); + assert.ok(dbIssue, "doctor should surface degraded DB mode when a DB file exists"); + assert.equal(dbIssue.unitId, "project"); + assert.equal(dbIssue.file, ".gsd/gsd.db"); +}); diff --git a/src/resources/extensions/gsd/tests/execute-task-prompt-existing-artifact-guard.test.ts b/src/resources/extensions/gsd/tests/execute-task-prompt-existing-artifact-guard.test.ts new file mode 100644 index 000000000..f3acf4871 --- /dev/null +++ b/src/resources/extensions/gsd/tests/execute-task-prompt-existing-artifact-guard.test.ts @@ -0,0 +1,33 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const promptsDir = join(__dirname, "..", "prompts"); + +test("execute-task prompt requires reading existing artifacts before write", () => { + const prompt = readFileSync(join(promptsDir, "execute-task.md"), "utf-8"); + + assert.match( + prompt, + /Before any `Write` that creates an artifact or output file, check whether that path already exists\./, + "execute-task prompt should require an existence check before creating artifacts", + ); + assert.match( + prompt, + /If it does, read it first and decide whether the work is already done, should be extended, or truly needs replacement\./, + "execute-task prompt should require reading existing artifacts before replacement", + ); +}); + +test("guided resume prompt checks for pre-existing artifacts", () => { + const prompt = readFileSync(join(promptsDir, "guided-resume-task.md"), "utf-8"); + + assert.match( + prompt, + /Before you create any expected artifact or output file, check whether it already exists and read it first/i, + "guided resume prompt should guard pre-existing artifacts", + ); +}); diff --git a/src/resources/extensions/gsd/tests/file-lock.test.ts b/src/resources/extensions/gsd/tests/file-lock.test.ts new file mode 100644 index 000000000..b45b091d8 --- /dev/null +++ b/src/resources/extensions/gsd/tests/file-lock.test.ts @@ -0,0 +1,103 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { createRequire } from "node:module"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { withFileLock, withFileLockSync } from "../file-lock.ts"; + +const require = createRequire(import.meta.url); + +function hasProperLockfile(): boolean { + try { + require("proper-lockfile"); + return true; + } catch { + return false; + } +} + +test("withFileLockSync: executes callback when file does not exist", () => { + const dir = mkdtempSync(join(tmpdir(), "gsd-file-lock-test-")); + try { + const missingPath = join(dir, "missing.txt"); + let called = 0; + const result = withFileLockSync(missingPath, () => { + called++; + return "ok"; + }); + + assert.equal(result, "ok"); + assert.equal(called, 1, "callback should execute exactly once"); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("withFileLock: executes callback when file does not exist", async () => { + const dir = mkdtempSync(join(tmpdir(), "gsd-file-lock-test-")); + try { + const missingPath = join(dir, "missing.txt"); + let called = 0; + const result = await withFileLock(missingPath, async () => { + called++; + return "ok"; + }); + + assert.equal(result, "ok"); + assert.equal(called, 1, "callback should execute exactly once"); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("withFileLockSync: falls back to unlocked callback on ELOCKED", () => { + if (!hasProperLockfile() || process.platform === "win32") { + return; + } + + const lockfile = require("proper-lockfile"); + const dir = mkdtempSync(join(tmpdir(), "gsd-file-lock-test-")); + const filePath = join(dir, "locked.jsonl"); + writeFileSync(filePath, "{}\n", "utf-8"); + + const release = lockfile.lockSync(filePath, { retries: 0, stale: 10000 }); + try { + let called = 0; + const result = withFileLockSync(filePath, () => { + called++; + return "fallback-ok"; + }); + assert.equal(result, "fallback-ok"); + assert.equal(called, 1, "callback should run even when lock acquisition fails"); + } finally { + release(); + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("withFileLock: falls back to unlocked callback on ELOCKED", async () => { + if (!hasProperLockfile() || process.platform === "win32") { + return; + } + + const lockfile = require("proper-lockfile"); + const dir = mkdtempSync(join(tmpdir(), "gsd-file-lock-test-")); + const filePath = join(dir, "locked.jsonl"); + writeFileSync(filePath, "{}\n", "utf-8"); + + const release = await lockfile.lock(filePath, { retries: 0, stale: 10000 }); + try { + let called = 0; + const result = await withFileLock(filePath, async () => { + called++; + return "fallback-ok"; + }); + assert.equal(result, "fallback-ok"); + assert.equal(called, 1, "callback should run even when lock acquisition fails"); + } finally { + await release(); + rmSync(dir, { recursive: true, force: true }); + } +}); diff --git a/src/resources/extensions/gsd/tests/gsd-no-project-error.test.ts b/src/resources/extensions/gsd/tests/gsd-no-project-error.test.ts new file mode 100644 index 000000000..1ceaa5f2d --- /dev/null +++ b/src/resources/extensions/gsd/tests/gsd-no-project-error.test.ts @@ -0,0 +1,73 @@ +/** + * GSDNoProjectError — tests for friendly home-directory error handling. + * + * Verifies that GSDNoProjectError is thrown for blocked directories and + * that the dispatcher catches it with a user-friendly message. + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +const contextSrc = readFileSync(join(__dirname, "..", "commands", "context.ts"), "utf-8"); +const dispatcherSrc = readFileSync(join(__dirname, "..", "commands", "dispatcher.ts"), "utf-8"); + +// ─── GSDNoProjectError class ────────────────────────────────────────────── + +test("GSDNoProjectError class is exported from context.ts", () => { + assert.ok( + contextSrc.includes("export class GSDNoProjectError extends Error"), + "GSDNoProjectError should be an exported Error subclass", + ); +}); + +test("GSDNoProjectError sets name property", () => { + assert.ok( + contextSrc.includes('this.name = "GSDNoProjectError"'), + "GSDNoProjectError should set its name for instanceof checks", + ); +}); + +// ─── projectRoot blocked directory handling ─────────────────────────────── + +test("projectRoot uses validateDirectory and checks for blocked severity", () => { + assert.ok( + contextSrc.includes("validateDirectory(pathToCheck)"), + "projectRoot should call validateDirectory", + ); + assert.ok( + contextSrc.includes('result.severity === "blocked"'), + "projectRoot should check for blocked severity", + ); +}); + +test("projectRoot throws GSDNoProjectError on blocked directory", () => { + assert.ok( + contextSrc.includes("throw new GSDNoProjectError"), + "projectRoot should throw GSDNoProjectError when directory is blocked", + ); +}); + +// ─── Dispatcher catch ───────────────────────────────────────────────────── + +test("dispatcher catches GSDNoProjectError with user-friendly message", () => { + assert.ok( + dispatcherSrc.includes("err instanceof GSDNoProjectError"), + "dispatcher should catch GSDNoProjectError specifically", + ); + assert.ok( + dispatcherSrc.includes("cd"), + "error message should suggest cd-ing into a project directory", + ); +}); + +test("dispatcher re-throws non-GSDNoProjectError exceptions", () => { + assert.ok( + dispatcherSrc.includes("throw err"), + "dispatcher should re-throw unexpected errors", + ); +}); diff --git a/src/resources/extensions/gsd/tests/integration-prepared-discussion.test.ts b/src/resources/extensions/gsd/tests/integration-prepared-discussion.test.ts deleted file mode 100644 index ea4aa4f50..000000000 --- a/src/resources/extensions/gsd/tests/integration-prepared-discussion.test.ts +++ /dev/null @@ -1,525 +0,0 @@ -/** - * Integration tests for the prepared discussion system. - * - * Exercises the full preparation pipeline against the real GSD-2 codebase: - * - runPreparation() produces valid briefs - * - TypeScript is detected as primary language - * - Module structure includes top-level directories - * - Completes within R112 timing requirement (<60s) - * - prepareAndBuildDiscussPrompt() uses discuss-prepared template when enabled - * - Fallback to standard prompt when preparation is disabled - */ - -import test from "node:test"; -import assert from "node:assert/strict"; -import { join } from "node:path"; -import { existsSync } from "node:fs"; -import { - runPreparation, - formatCodebaseBrief, - formatPriorContextBrief, - formatEcosystemBrief, - type PreparationUIContext, - type PreparationPreferences, - type PreparationResult, -} from "../preparation.ts"; -import { validateEnhancedContext } from "../prompt-validation.ts"; -import { getLastPreparationResult, clearPreparationResult } from "../guided-flow.ts"; - -// ─── Test Helpers ─────────────────────────────────────────────────────────────── - -/** - * Mock UI context that captures notifications for testing. - * Follows the pattern from preparation.test.ts. - */ -function createMockUI(): PreparationUIContext & { notifications: Array<{ message: string; type?: string }> } { - const notifications: Array<{ message: string; type?: string }> = []; - return { - notifications, - notify(message: string, type?: "info" | "warning" | "error" | "success") { - notifications.push({ message, type }); - }, - }; -} - -/** - * Get the GSD extension source directory for integration testing. - * This is the real codebase we'll analyze. - */ -function getGsdExtensionDir(): string { - // Navigate from tests/ up to gsd/ directory - return join(import.meta.dirname, ".."); -} - -/** - * Get the GSD-2 project root for full codebase analysis. - */ -function getProjectRoot(): string { - // Navigate from tests/ up to the project root - // tests/ -> gsd/ -> extensions/ -> resources/ -> src/ -> gsd-2/ - return join(import.meta.dirname, "..", "..", "..", "..", ".."); -} - -// ─── R111 Validation: runPreparation against real codebase ────────────────────── - -test("R111: runPreparation() produces valid codebase brief for GSD extension", async (t) => { - const dir = getGsdExtensionDir(); - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, // Skip web research to avoid API key requirement - discuss_depth: "standard", - }; - - const result = await runPreparation(dir, ui, prefs); - - // Verify preparation completed successfully - assert.equal(result.enabled, true, "preparation should be enabled"); - assert.ok(result.codebase, "should have codebase brief"); - assert.ok(result.codebaseBrief, "should have formatted codebase brief"); - - // Verify TypeScript is detected as primary language - assert.equal( - result.codebase.techStack.primaryLanguage, - "javascript/typescript", - "should detect TypeScript as primary language", - ); - - // Verify module structure includes top-level directories - const topLevelDirs = result.codebase.moduleStructure.topLevelDirs; - assert.ok(topLevelDirs.length > 0, "should detect top-level directories"); - - // Common directories in the GSD extension - const expectedDirs = ["tests", "prompts", "templates", "migrate"]; - const foundExpected = expectedDirs.filter(d => topLevelDirs.includes(d)); - assert.ok( - foundExpected.length >= 2, - `should detect common directories, found: ${topLevelDirs.join(", ")}`, - ); - - // Verify sampled files exist - assert.ok(result.codebase.sampledFiles.length > 0, "should sample source files"); -}); - -test("R111: runPreparation() produces valid prior context brief", async (t) => { - const dir = getGsdExtensionDir(); - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - const result = await runPreparation(dir, ui, prefs); - - // Verify prior context brief structure - assert.ok(result.priorContext, "should have prior context"); - assert.ok(result.priorContextBrief, "should have formatted prior context brief"); - - // Prior context aggregates decisions, requirements, knowledge, summaries - assert.ok("decisions" in result.priorContext, "should have decisions"); - assert.ok("requirements" in result.priorContext, "should have requirements"); - assert.ok("knowledge" in result.priorContext, "should have knowledge"); - assert.ok("summaries" in result.priorContext, "should have summaries"); -}); - -test("R111: runPreparation() produces valid ecosystem brief (skipped without API key)", async (t) => { - const dir = getGsdExtensionDir(); - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, // Explicitly disable - }; - - const result = await runPreparation(dir, ui, prefs); - - // Verify ecosystem brief structure - assert.ok(result.ecosystem, "should have ecosystem brief"); - assert.ok(result.ecosystemBrief, "should have formatted ecosystem brief"); - assert.equal(result.ecosystem.available, false, "ecosystem should be unavailable when web research disabled"); - assert.ok(result.ecosystem.skippedReason, "should have skip reason"); -}); - -test("R112: runPreparation() completes within 60s requirement", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - discuss_depth: "standard", - }; - - const startTime = performance.now(); - const result = await runPreparation(dir, null, prefs); - const elapsed = performance.now() - startTime; - - // R112 requirement: preparation must complete within 60 seconds - assert.ok(result.durationMs < 60000, `should complete within 60s, took ${result.durationMs}ms`); - assert.ok(elapsed < 60000, `wall-clock time should be under 60s, was ${elapsed}ms`); - - // Should be much faster for a local directory analysis - assert.ok(result.durationMs < 10000, `should typically complete within 10s, took ${result.durationMs}ms`); -}); - -// ─── Codebase Pattern Detection ───────────────────────────────────────────────── - -test("runPreparation() detects code patterns from GSD extension", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - const result = await runPreparation(dir, null, prefs); - - // The GSD extension uses async/await extensively - assert.ok( - result.codebase.patterns.asyncStyle === "async/await" || result.codebase.patterns.asyncStyle === "mixed", - `should detect async/await or mixed, got ${result.codebase.patterns.asyncStyle}`, - ); - - // The GSD extension uses try/catch for error handling - assert.ok( - result.codebase.patterns.errorHandling === "try/catch" || result.codebase.patterns.errorHandling === "mixed", - `should detect try/catch or mixed, got ${result.codebase.patterns.errorHandling}`, - ); - - // TypeScript uses camelCase or mixed naming - assert.ok( - result.codebase.patterns.namingConvention === "camelCase" || result.codebase.patterns.namingConvention === "mixed", - `should detect camelCase or mixed, got ${result.codebase.patterns.namingConvention}`, - ); - - // Evidence should be populated - assert.ok(result.codebase.patterns.evidence.asyncStyle.length > 0, "should have async style evidence"); -}); - -test("runPreparation() samples TypeScript files from src/ or project root", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - const result = await runPreparation(dir, null, prefs); - - // Should sample TypeScript files - const tsFiles = result.codebase.sampledFiles.filter( - f => f.endsWith(".ts") || f.endsWith(".tsx"), - ); - assert.ok(tsFiles.length > 0, "should sample TypeScript files"); - - // Should exclude test files - const testFiles = result.codebase.sampledFiles.filter( - f => f.includes(".test.") || f.includes(".spec."), - ); - assert.equal(testFiles.length, 0, "should not sample test files"); -}); - -// ─── Brief Formatting ─────────────────────────────────────────────────────────── - -test("formatCodebaseBrief() produces LLM-readable markdown", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - const result = await runPreparation(dir, null, prefs); - const formatted = formatCodebaseBrief(result.codebase); - - // Should contain expected sections - assert.ok(formatted.includes("## Tech Stack"), "should have Tech Stack section"); - assert.ok(formatted.includes("## Module Structure"), "should have Module Structure section"); - assert.ok(formatted.includes("## Code Patterns"), "should have Code Patterns section"); - - // Should contain detected tech - assert.ok(formatted.includes("javascript/typescript"), "should include detected language"); - - // Should be within character limit - assert.ok(formatted.length <= 3000, `should cap at 3000 chars, got ${formatted.length}`); -}); - -test("formatPriorContextBrief() produces structured prior context output", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - const result = await runPreparation(dir, null, prefs); - const formatted = formatPriorContextBrief(result.priorContext); - - // Should contain expected sections - assert.ok(formatted.includes("## Prior Decisions"), "should have Prior Decisions section"); - assert.ok(formatted.includes("## Prior Requirements"), "should have Prior Requirements section"); - assert.ok(formatted.includes("## Prior Knowledge"), "should have Prior Knowledge section"); - assert.ok(formatted.includes("## Prior Milestone Summaries"), "should have Prior Milestone Summaries section"); - - // Should be within character limit - assert.ok(formatted.length <= 6000, `should cap at 6000 chars, got ${formatted.length}`); -}); - -test("formatEcosystemBrief() returns simplified message (research happens during discussion)", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - const result = await runPreparation(dir, null, prefs); - const formatted = formatEcosystemBrief(result.ecosystem); - - // Should contain section header - assert.ok(formatted.includes("## Ecosystem Research"), "should have Ecosystem Research section"); - - // Should indicate research happens during discussion - assert.ok(formatted.includes("during the discussion"), "should mention research happens during discussion"); - assert.ok(formatted.includes("web search tools"), "should mention web search tools"); - - // Should be within character limit - assert.ok(formatted.length <= 4000, `should cap at 4000 chars, got ${formatted.length}`); -}); - -// ─── Preparation Result Storage ───────────────────────────────────────────────── - -test("getLastPreparationResult() returns null initially", async (t) => { - // Clear any existing state - clearPreparationResult(); - - const result = getLastPreparationResult(); - assert.equal(result, null, "should return null when no preparation has run"); -}); - -test("clearPreparationResult() clears stored result", async (t) => { - // This test verifies the clear function works - // We can't easily test the set behavior without running the full guided-flow - clearPreparationResult(); - const result = getLastPreparationResult(); - assert.equal(result, null, "should be null after clear"); -}); - -// ─── TUI Progress Notifications ───────────────────────────────────────────────── - -test("runPreparation() emits TUI progress notifications", async (t) => { - const dir = getGsdExtensionDir(); - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - await runPreparation(dir, ui, prefs); - - // Should have notifications for each phase - assert.ok(ui.notifications.length > 0, "should have notifications"); - - // Verify codebase analysis notifications - assert.ok( - ui.notifications.some(n => n.message.includes("Analyzing codebase")), - "should show codebase analysis start", - ); - assert.ok( - ui.notifications.some(n => n.message.includes("✓ Analyzed codebase")), - "should show codebase analysis complete", - ); - - // Verify prior context notifications - assert.ok( - ui.notifications.some(n => n.message.includes("Reviewing prior context")), - "should show prior context start", - ); - assert.ok( - ui.notifications.some(n => n.message.includes("✓ Reviewed prior context")), - "should show prior context complete", - ); -}); - -test("runPreparation() works in silent mode (no UI)", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - // Pass null for UI - const result = await runPreparation(dir, null, prefs); - - // Should complete without error - assert.equal(result.enabled, true, "should work without UI"); - assert.ok(result.codebase, "should have codebase"); - assert.ok(result.priorContext, "should have priorContext"); - assert.ok(result.durationMs > 0, "should have duration"); -}); - -// ─── Preference-Controlled Behavior ───────────────────────────────────────────── - -test("runPreparation() returns early when discuss_preparation is false", async (t) => { - const dir = getGsdExtensionDir(); - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: false, - }; - - const result = await runPreparation(dir, ui, prefs); - - assert.equal(result.enabled, false, "should indicate preparation disabled"); - assert.equal(result.codebaseBrief, "", "should have empty codebase brief"); - assert.equal(result.priorContextBrief, "", "should have empty prior context brief"); - assert.equal(result.ecosystemBrief, "", "should have empty ecosystem brief"); - assert.equal(ui.notifications.length, 0, "should not show any notifications"); -}); - -test("runPreparation() ecosystem research always returns unavailable (happens during discussion)", async (t) => { - const dir = getGsdExtensionDir(); - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: true, // Even with this enabled, ecosystem research returns unavailable - }; - - const result = await runPreparation(dir, ui, prefs); - - assert.equal(result.enabled, true); - assert.equal(result.ecosystemResearchPerformed, false, "should not perform ecosystem research from preparation"); - assert.equal(result.ecosystem.available, false); - assert.ok( - result.ecosystem.skippedReason?.includes("during the discussion"), - "should indicate research happens during discussion", - ); - - // Should NOT have ecosystem research notifications (no longer part of preparation) - assert.ok( - !ui.notifications.some(n => n.message.includes("Researching ecosystem")), - "should not show ecosystem research notification", - ); -}); - -// ─── validateEnhancedContext Integration ──────────────────────────────────────── - -test("validateEnhancedContext() validates required sections", async (t) => { - // Test with valid enhanced context - const validContext = `# M001 — Test Milestone - -## Scope - -This milestone covers X, Y, Z. - -## Architectural Decisions - -### Decision 1: Use TypeScript - -We will use TypeScript for type safety. - -## Acceptance Criteria - -- [ ] Feature A works -- [ ] Feature B works -`; - - const validResult = validateEnhancedContext(validContext); - assert.equal(validResult.valid, true, "should validate complete context"); - assert.deepEqual(validResult.missing, [], "should have no missing sections"); - - // Test with missing sections - const invalidContext = `# M001 — Test Milestone - -## Scope - -This milestone covers X, Y, Z. -`; - - const invalidResult = validateEnhancedContext(invalidContext); - assert.equal(invalidResult.valid, false, "should reject incomplete context"); - assert.ok(invalidResult.missing.length > 0, "should list missing sections"); - assert.ok( - invalidResult.missing.some(m => m.includes("Architectural Decisions")), - "should report missing Architectural Decisions", - ); - assert.ok( - invalidResult.missing.some(m => m.includes("Acceptance Criteria")), - "should report missing Acceptance Criteria", - ); -}); - -test("validateEnhancedContext() requires decision entries in Architectural Decisions", async (t) => { - // Empty architectural decisions section - const emptyDecisions = `# M001 — Test Milestone - -## Scope - -This milestone covers X, Y, Z. - -## Architectural Decisions - -(No decisions yet) - -## Acceptance Criteria - -- [ ] Feature A works -`; - - const result = validateEnhancedContext(emptyDecisions); - assert.equal(result.valid, false, "should reject empty decisions section"); - assert.ok( - result.missing.some(m => m.includes("decision entry")), - "should report missing decision entry", - ); -}); - -// ─── Full Pipeline Integration ────────────────────────────────────────────────── - -test("Full pipeline: preparation produces consistent results across runs", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - // Run preparation twice - const result1 = await runPreparation(dir, null, prefs); - const result2 = await runPreparation(dir, null, prefs); - - // Results should be consistent (same codebase, same analysis) - assert.equal( - result1.codebase.techStack.primaryLanguage, - result2.codebase.techStack.primaryLanguage, - "primary language should be consistent", - ); - - assert.deepEqual( - result1.codebase.moduleStructure.topLevelDirs.sort(), - result2.codebase.moduleStructure.topLevelDirs.sort(), - "top-level directories should be consistent", - ); - - assert.equal( - result1.codebase.patterns.asyncStyle, - result2.codebase.patterns.asyncStyle, - "async style should be consistent", - ); -}); - -test("Full pipeline: preparation handles empty .gsd directory gracefully", async (t) => { - // The GSD extension directory may or may not have a .gsd subdirectory - // Either way, preparation should not crash - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - let result: PreparationResult | undefined; - let error: unknown; - - try { - result = await runPreparation(dir, null, prefs); - } catch (e) { - error = e; - } - - assert.equal(error, undefined, "should not throw"); - assert.ok(result, "should return result"); - assert.equal(result!.enabled, true, "should be enabled"); - - // Prior context should gracefully handle missing files - assert.ok(result!.priorContext, "should have prior context even if files missing"); -}); diff --git a/src/resources/extensions/gsd/tests/integration/test-isolation.ts b/src/resources/extensions/gsd/tests/integration/test-isolation.ts deleted file mode 100644 index bc8270222..000000000 --- a/src/resources/extensions/gsd/tests/integration/test-isolation.ts +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Test isolation utilities for integration tests. - * - * Integration tests often call `mergeMilestoneToMain` and other functions that - * load preferences. If the user's global ~/.gsd/preferences.md has - * `git.main_branch: master`, tests fail because test repos use `main`. - * - * These utilities isolate tests from the user's global environment. - */ - -import { mkdtempSync, rmSync, realpathSync } from "node:fs"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; - -import { _resetServiceCache } from "../../worktree.ts"; -import { _clearGsdRootCache } from "../../paths.ts"; - -let originalHome: string | undefined; -let fakeHome: string | null = null; - -/** - * Isolate the test environment from user's global preferences. - * Creates a fake HOME directory so loadEffectiveGSDPreferences() returns - * empty global preferences instead of the user's ~/.gsd/preferences.md. - * - * Call this in a test.before() hook. - */ -export function isolateFromGlobalPreferences(): void { - originalHome = process.env.HOME; - fakeHome = realpathSync(mkdtempSync(join(tmpdir(), "gsd-test-home-"))); - process.env.HOME = fakeHome; - _clearGsdRootCache(); - _resetServiceCache(); -} - -/** - * Restore the original HOME and clean up the fake home directory. - * - * Call this in a test.after() hook. - */ -export function restoreGlobalPreferences(): void { - if (originalHome !== undefined) { - process.env.HOME = originalHome; - } else { - delete process.env.HOME; - } - _clearGsdRootCache(); - _resetServiceCache(); - if (fakeHome) { - rmSync(fakeHome, { recursive: true, force: true }); - fakeHome = null; - } -} diff --git a/src/resources/extensions/gsd/tests/interactive-routing-bypass.test.ts b/src/resources/extensions/gsd/tests/interactive-routing-bypass.test.ts new file mode 100644 index 000000000..f209ecc8d --- /dev/null +++ b/src/resources/extensions/gsd/tests/interactive-routing-bypass.test.ts @@ -0,0 +1,207 @@ +// GSD Extension — Interactive Routing Bypass Tests +// Verifies that dynamic routing is skipped for interactive (guided-flow) dispatches +// and that model downgrade notifications always fire (#3962). +// Copyright (c) 2026 Jeremy McSpadden + +import test, { describe } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +// ─── Source-level structural tests ────────────────────────────────────────── + +const modelSelectionSrc = readFileSync( + join(__dirname, "..", "auto-model-selection.ts"), + "utf-8", +); + +const guidedFlowSrc = readFileSync( + join(__dirname, "..", "guided-flow.ts"), + "utf-8", +); + +const autoStartSrc = readFileSync( + join(__dirname, "..", "auto-start.ts"), + "utf-8", +); + +describe("interactive routing bypass (#3962)", () => { + test("selectAndApplyModel accepts isAutoMode parameter", () => { + // The function signature should include isAutoMode with a default of true + assert.ok( + modelSelectionSrc.includes("isAutoMode"), + "selectAndApplyModel should have isAutoMode parameter", + ); + assert.ok( + modelSelectionSrc.includes("isAutoMode = true"), + "isAutoMode should default to true (auto-mode behavior preserved)", + ); + }); + + test("routing is disabled when isAutoMode is false", () => { + // The code should disable routing when not in auto-mode + assert.ok( + modelSelectionSrc.includes("if (!isAutoMode)"), + "should check isAutoMode flag to disable routing", + ); + assert.ok( + modelSelectionSrc.includes("routingConfig.enabled = false"), + "should set routingConfig.enabled = false for interactive mode", + ); + }); + + test("resolvePreferredModelConfig skips routing synthesis when isAutoMode is false", () => { + // resolvePreferredModelConfig should accept isAutoMode and bail early + // before synthesizing a routing ceiling from tier_models (#3962 codex review) + assert.ok( + modelSelectionSrc.includes("function resolvePreferredModelConfig"), + "resolvePreferredModelConfig should exist", + ); + // The function should check isAutoMode before routing synthesis + const fnIdx = modelSelectionSrc.indexOf("function resolvePreferredModelConfig"); + const fnBody = modelSelectionSrc.slice(fnIdx, fnIdx + 600); + assert.ok( + fnBody.includes("isAutoMode"), + "resolvePreferredModelConfig should accept isAutoMode parameter", + ); + assert.ok( + fnBody.includes("if (!isAutoMode) return undefined"), + "should return undefined (skip routing synthesis) when not in auto-mode", + ); + }); + + test("selectAndApplyModel threads isAutoMode to resolvePreferredModelConfig", () => { + // The call to resolvePreferredModelConfig inside selectAndApplyModel + // should pass isAutoMode as the third argument + const callSite = "resolvePreferredModelConfig(unitType, autoModeStartModel, isAutoMode)"; + assert.ok( + modelSelectionSrc.includes(callSite), + "selectAndApplyModel should pass isAutoMode to resolvePreferredModelConfig", + ); + }); + + test("guided-flow passes isAutoMode=false", () => { + // guided-flow.ts should explicitly pass isAutoMode as false + assert.ok( + guidedFlowSrc.includes("/* isAutoMode */ false"), + "guided-flow should pass isAutoMode=false to selectAndApplyModel", + ); + }); + + test("auto/phases.ts does NOT pass isAutoMode=false", () => { + // auto/phases.ts should use the default (true) — it's auto-mode + const phasesSrc = readFileSync( + join(__dirname, "..", "auto", "phases.ts"), + "utf-8", + ); + assert.ok( + !phasesSrc.includes("isAutoMode"), + "auto/phases.ts should use default isAutoMode=true (not pass it explicitly)", + ); + }); +}); + +describe("model downgrade notifications always visible (#3962)", () => { + test("downgrade notification is not gated by verbose flag", () => { + // The downgrade notification block should NOT be wrapped in `if (verbose)` + // Find the downgrade block and verify it's not behind a verbose check + const downgradeBlock = "if (routingResult.wasDowngraded)"; + const downgradeIdx = modelSelectionSrc.indexOf(downgradeBlock); + assert.ok(downgradeIdx > 0, "downgrade block should exist"); + + // Extract the code between wasDowngraded check and the next routing label assignment + const afterDowngrade = modelSelectionSrc.slice( + downgradeIdx, + modelSelectionSrc.indexOf("routingTierLabel =", downgradeIdx), + ); + + // The notification calls should NOT be wrapped in `if (verbose)` + assert.ok( + !afterDowngrade.includes("if (verbose)"), + "downgrade notifications should not be gated by verbose flag", + ); + + // But the notification calls should exist + assert.ok( + afterDowngrade.includes('ctx.ui.notify('), + "downgrade notifications should still fire", + ); + }); + + test("tier escalation notification is not gated by verbose flag", () => { + // Extract the escalation block: from "if (escalated)" to its closing + // and verify the notification is present but `if (verbose)` is not. + const escalatedIdx = modelSelectionSrc.indexOf("if (escalated)"); + assert.ok(escalatedIdx > 0, "escalation block should exist"); + + // Get the block from "if (escalated)" to the next closing brace pattern + const block = modelSelectionSrc.slice(escalatedIdx, escalatedIdx + 400); + assert.ok( + block.includes("Tier escalation:"), + "escalation block should contain the notification", + ); + assert.ok( + !block.includes("if (verbose)"), + "escalation block should not gate notification behind verbose flag", + ); + }); +}); + +describe("auto-mode start routing banner (#3962)", () => { + test("auto-start shows dynamic routing status on startup", () => { + assert.ok( + autoStartSrc.includes("Dynamic routing:"), + "auto-start should display routing status banner", + ); + assert.ok( + autoStartSrc.includes("resolveDynamicRoutingConfig"), + "auto-start should import resolveDynamicRoutingConfig", + ); + }); + + test("banner shows different messages for enabled vs disabled routing", () => { + assert.ok( + autoStartSrc.includes("Dynamic routing: enabled"), + "should show message when routing is enabled", + ); + assert.ok( + autoStartSrc.includes("Dynamic routing: disabled"), + "should show message when routing is disabled", + ); + }); + + test("banner shows the ceiling model", () => { + assert.ok( + autoStartSrc.includes("startModelLabel"), + "banner should reference the start/ceiling model", + ); + }); + + test("banner accounts for flat-rate provider suppression", () => { + // The banner should check isFlatRateProvider to accurately reflect + // whether routing will actually be active at dispatch time (#3962 codex review) + assert.ok( + autoStartSrc.includes("isFlatRateProvider"), + "banner should check flat-rate provider status", + ); + assert.ok( + autoStartSrc.includes("effectivelyEnabled"), + "banner should compute effective routing state, not just raw config", + ); + }); + + test("banner uses effective ceiling from tier_models.heavy when configured", () => { + // The actual ceiling may come from tier_models.heavy, not the start model + assert.ok( + autoStartSrc.includes("tier_models?.heavy"), + "banner should check tier_models.heavy for the effective ceiling", + ); + assert.ok( + autoStartSrc.includes("effectiveCeiling"), + "banner should compute the effective ceiling model", + ); + }); +}); diff --git a/src/resources/extensions/gsd/tests/mcp-project-config.test.ts b/src/resources/extensions/gsd/tests/mcp-project-config.test.ts index 0c2cdba5c..7638a7e74 100644 --- a/src/resources/extensions/gsd/tests/mcp-project-config.test.ts +++ b/src/resources/extensions/gsd/tests/mcp-project-config.test.ts @@ -26,8 +26,12 @@ test("ensureProjectWorkflowMcpConfig creates .mcp.json with the workflow server" assert.equal(typeof server?.command, "string"); assert.equal(Array.isArray(server?.args), true); assert.equal(server?.env?.GSD_WORKFLOW_PROJECT_ROOT, projectRoot); - assert.match(server?.env?.GSD_WORKFLOW_EXECUTORS_MODULE ?? "", /workflow-tool-executors\.js$/); - assert.match(server?.env?.GSD_WORKFLOW_WRITE_GATE_MODULE ?? "", /write-gate\.js$/); + assert.match(server?.env?.GSD_WORKFLOW_EXECUTORS_MODULE ?? "", /workflow-tool-executors\.(js|ts)$/); + assert.match(server?.env?.GSD_WORKFLOW_WRITE_GATE_MODULE ?? "", /write-gate\.(js|ts)$/); + if ((server?.env?.GSD_WORKFLOW_EXECUTORS_MODULE ?? "").endsWith(".ts")) { + assert.match(server?.env?.NODE_OPTIONS ?? "", /--experimental-strip-types/); + assert.match(server?.env?.NODE_OPTIONS ?? "", /resolve-ts\.mjs/); + } } finally { rmSync(projectRoot, { recursive: true, force: true }); } diff --git a/src/resources/extensions/gsd/tests/model-router.test.ts b/src/resources/extensions/gsd/tests/model-router.test.ts index c81242215..d12a71df9 100644 --- a/src/resources/extensions/gsd/tests/model-router.test.ts +++ b/src/resources/extensions/gsd/tests/model-router.test.ts @@ -287,9 +287,9 @@ test("resolveModelForComplexity falls back to tier-only when capability_routing assert.ok(!result.selectionMethod || result.selectionMethod === "tier-only"); }); -test("MODEL_CAPABILITY_PROFILES has entries for core models", () => { +test("MODEL_CAPABILITY_PROFILES has entries for all tier-mapped models", () => { const profiledModels = Object.keys(MODEL_CAPABILITY_PROFILES); - assert.ok(profiledModels.length >= 9, `Expected ≥9 profiles, got ${profiledModels.length}`); + assert.ok(profiledModels.length >= 30, `Expected ≥30 profiles, got ${profiledModels.length}`); assert.ok(MODEL_CAPABILITY_PROFILES["claude-opus-4-6"]); assert.ok(MODEL_CAPABILITY_PROFILES["claude-haiku-4-5"]); }); diff --git a/src/resources/extensions/gsd/tests/notification-store.test.ts b/src/resources/extensions/gsd/tests/notification-store.test.ts index 8f13fb873..f17f9dd0e 100644 --- a/src/resources/extensions/gsd/tests/notification-store.test.ts +++ b/src/resources/extensions/gsd/tests/notification-store.test.ts @@ -187,6 +187,23 @@ describe("notification-store", () => { assert.ok(!entries.some((e) => e.message === "suppressed")); }); + test("appendNotification suppresses identical messages within the dedup window", (t) => { + initNotificationStore(tmp); + let now = 1_000; + t.mock.method(Date, "now", () => now); + + appendNotification("same", "warning"); + now += 1_000; + appendNotification("same", "warning"); + now += 31_000; + appendNotification("same", "warning"); + + const entries = readNotifications(); + assert.equal(entries.length, 2); + assert.equal(entries[0].message, "same"); + assert.equal(entries[1].message, "same"); + }); + test("suppressPersistence is ref-counted", () => { initNotificationStore(tmp); suppressPersistence(); diff --git a/src/resources/extensions/gsd/tests/notification-widget.test.ts b/src/resources/extensions/gsd/tests/notification-widget.test.ts new file mode 100644 index 000000000..f6cd2eee7 --- /dev/null +++ b/src/resources/extensions/gsd/tests/notification-widget.test.ts @@ -0,0 +1,25 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { initNotificationStore, appendNotification, _resetNotificationStore } from "../notification-store.js"; +import { buildNotificationWidgetLines } from "../notification-widget.js"; + +test("buildNotificationWidgetLines includes slash-command fallback for unread notifications", () => { + const tmp = mkdtempSync(join(tmpdir(), "gsd-notification-widget-")); + try { + mkdirSync(join(tmp, ".gsd"), { recursive: true }); + _resetNotificationStore(); + initNotificationStore(tmp); + appendNotification("Need attention", "warning"); + + const lines = buildNotificationWidgetLines(); + assert.equal(lines.length, 1); + assert.match(lines[0]!, /\/gsd notifications/); + } finally { + _resetNotificationStore(); + rmSync(tmp, { recursive: true, force: true }); + } +}); diff --git a/src/resources/extensions/gsd/tests/pre-exec-backtick-strip.test.ts b/src/resources/extensions/gsd/tests/pre-exec-backtick-strip.test.ts index ffdeae7c8..1f1ac2d35 100644 --- a/src/resources/extensions/gsd/tests/pre-exec-backtick-strip.test.ts +++ b/src/resources/extensions/gsd/tests/pre-exec-backtick-strip.test.ts @@ -12,7 +12,7 @@ import { describe, it } from 'node:test' import assert from 'node:assert/strict' import { normalizeFilePath, checkFilePathConsistency } from '../pre-execution-checks.ts' -import { readFileSync } from 'node:fs' +import { mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs' import { resolve } from 'node:path' const src = readFileSync( @@ -25,6 +25,11 @@ describe('normalizeFilePath backtick stripping (#3649)', () => { assert.equal(normalizeFilePath('`src/foo.ts`'), 'src/foo.ts') }) + it('strips doubled backticks and trailing notes from file paths', () => { + assert.equal(normalizeFilePath('``src/foo.ts`` - current state'), 'src/foo.ts') + assert.equal(normalizeFilePath('``src/foo.ts`` (current state)'), 'src/foo.ts') + }) + it('strips backticks even when mixed with other normalization', () => { assert.equal(normalizeFilePath('`./src//bar.ts`'), 'src/bar.ts') }) @@ -66,3 +71,45 @@ describe('checkFilePathConsistency checks task.inputs not task.files (#3626)', ( ) }) }) + +describe('checkFilePathConsistency handles doubled-backtick annotations (#3892)', () => { + it('accepts existing files when task.inputs include doubled-backtick notes', () => { + const task = { + milestone_id: 'M001', + slice_id: 'S01', + id: 'T01', + title: 'Test Task', + status: 'pending', + one_liner: '', + narrative: '', + verification_result: '', + duration: '', + completed_at: null, + blocker_discovered: false, + deviations: '', + known_issues: '', + key_files: [], + key_decisions: [], + full_summary_md: '', + description: '', + estimate: '', + files: [], + verify: '', + inputs: ['``src/foo.ts`` (current state)'], + expected_output: [], + observability_impact: '', + full_plan_md: '', + sequence: 0, + } + + const tmp = resolve(process.cwd(), '.tmp-pre-exec-3892') + try { + mkdirSync(resolve(tmp, 'src'), { recursive: true }) + writeFileSync(resolve(tmp, 'src', 'foo.ts'), '// ok') + const results = checkFilePathConsistency([task as any], tmp) + assert.deepEqual(results, []) + } finally { + rmSync(tmp, { recursive: true, force: true }) + } + }) +}) diff --git a/src/resources/extensions/gsd/tests/pre-execution-checks.test.ts b/src/resources/extensions/gsd/tests/pre-execution-checks.test.ts index 79ac6a692..091896fdb 100644 --- a/src/resources/extensions/gsd/tests/pre-execution-checks.test.ts +++ b/src/resources/extensions/gsd/tests/pre-execution-checks.test.ts @@ -1107,6 +1107,38 @@ describe("checkTaskOrdering false positive regression (#3677)", () => { assert.equal(results[0].target, "`later.ts` — needed first"); assert.ok(results[0].message.includes("sequence violation")); }); + + test("existing on-disk files do not trigger ordering violations just because a later task modifies them", () => { + const tempDir = join(tmpdir(), `pre-exec-ordering-existing-file-${Date.now()}`); + const existingFile = "frontend/src/__tests__/ProcurementPage29.test.tsx"; + + mkdirSync(join(tempDir, "frontend", "src", "__tests__"), { recursive: true }); + writeFileSync(join(tempDir, existingFile), "// existing file"); + + try { + const tasks = [ + createTask({ + id: "T01", + sequence: 0, + files: [], + inputs: ["`frontend/src/__tests__/ProcurementPage29.test.tsx` — contains matchMedia stub to remove"], + expected_output: [], + }), + createTask({ + id: "T03", + sequence: 2, + files: [], + inputs: [], + expected_output: ["frontend/src/__tests__/ProcurementPage29.test.tsx"], + }), + ]; + + const results = checkTaskOrdering(tasks, tempDir); + assert.equal(results.length, 0, "Pre-existing files should not be treated as created by later tasks"); + } finally { + rmSync(tempDir, { recursive: true, force: true }); + } + }); }); // ─── checkFilePathConsistency additional edge cases ────────────────────────── @@ -1175,6 +1207,23 @@ describe("checkFilePathConsistency additional edge cases", () => { assert.equal(results![0].blocking, true); }); + test("multi-word prose inputs are ignored by path consistency checks", () => { + const tasks = [ + createTask({ + id: "T01", + files: [], + inputs: [ + "Current WIZARD_PRODUCTS enum", + "Existing test patterns in wizard.test.ts", + ], + expected_output: [], + }), + ]; + + const results = checkFilePathConsistency(tasks, "/tmp"); + assert.equal(results.length, 0, "Prose planning hints should not be treated as missing file paths"); + }); + test("empty inputs array produces no results", () => { // A task with no inputs and only files should produce zero results from // consistency check — files are not checked (#3626). diff --git a/src/resources/extensions/gsd/tests/preparation.test.ts b/src/resources/extensions/gsd/tests/preparation.test.ts deleted file mode 100644 index 569efed10..000000000 --- a/src/resources/extensions/gsd/tests/preparation.test.ts +++ /dev/null @@ -1,1211 +0,0 @@ -/** - * Unit tests for GSD Preparation — codebase analysis and brief generation. - * - * Exercises the pure preparation functions: - * - analyzeCodebase() with various project layouts - * - formatCodebaseBrief() output format and truncation - * - Pattern extraction from sampled files - */ - -import test from "node:test"; -import assert from "node:assert/strict"; -import { mkdirSync, writeFileSync, rmSync, existsSync } from "node:fs"; -import { join } from "node:path"; -import { tmpdir } from "node:os"; -import { - analyzeCodebase, - formatCodebaseBrief, - aggregatePriorContext, - formatPriorContextBrief, - researchEcosystem, - formatEcosystemBrief, - runPreparation, - type CodebaseBrief, - type PriorContextBrief, - type EcosystemBrief, - type EcosystemFinding, - type PreparationUIContext, - type PreparationPreferences, - type PreparationResult, -} from "../preparation.ts"; -import { PROJECT_FILES } from "../detection.ts"; - -// ─── Test Helpers ─────────────────────────────────────────────────────────────── - -function makeTempDir(prefix: string): string { - const dir = join( - tmpdir(), - `gsd-preparation-test-${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`, - ); - mkdirSync(dir, { recursive: true }); - return dir; -} - -function cleanup(dir: string): void { - try { - rmSync(dir, { recursive: true, force: true }); - } catch { - // best-effort - } -} - -// ─── analyzeCodebase ──────────────────────────────────────────────────────────── - -test("analyzeCodebase: empty directory returns valid brief structure", async (t) => { - const dir = makeTempDir("empty"); - t.after(() => cleanup(dir)); - - const brief = await analyzeCodebase(dir); - - assert.ok(brief, "should return a brief"); - assert.ok(brief.techStack, "should have techStack"); - assert.ok(brief.moduleStructure, "should have moduleStructure"); - assert.ok(brief.patterns, "should have patterns"); - assert.ok(Array.isArray(brief.sampledFiles), "should have sampledFiles array"); - assert.equal(brief.sampledFiles.length, 0, "empty dir should have no sampled files"); -}); - -test("analyzeCodebase: detects package.json in PROJECT_FILES", async (t) => { - const dir = makeTempDir("pkg-json"); - t.after(() => cleanup(dir)); - - writeFileSync( - join(dir, "package.json"), - JSON.stringify({ name: "test-project", scripts: { test: "jest" } }), - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - assert.ok( - brief.techStack.detectedFiles.includes("package.json"), - "should detect package.json", - ); - assert.equal(brief.techStack.primaryLanguage, "javascript/typescript"); -}); - -test("analyzeCodebase: detects module structure from src/ directory", async (t) => { - const dir = makeTempDir("module-struct"); - t.after(() => cleanup(dir)); - - // Create src directory with subdirs - mkdirSync(join(dir, "src", "components"), { recursive: true }); - mkdirSync(join(dir, "src", "utils"), { recursive: true }); - mkdirSync(join(dir, "src", "hooks"), { recursive: true }); - mkdirSync(join(dir, "test"), { recursive: true }); - - const brief = await analyzeCodebase(dir); - - assert.ok( - brief.moduleStructure.topLevelDirs.includes("src"), - "should detect src as top-level dir", - ); - assert.ok( - brief.moduleStructure.topLevelDirs.includes("test"), - "should detect test as top-level dir", - ); - assert.ok( - brief.moduleStructure.srcSubdirs.includes("components"), - "should detect components subdir", - ); - assert.ok( - brief.moduleStructure.srcSubdirs.includes("utils"), - "should detect utils subdir", - ); - assert.ok( - brief.moduleStructure.srcSubdirs.includes("hooks"), - "should detect hooks subdir", - ); -}); - -test("analyzeCodebase: samples TypeScript files from src/", async (t) => { - const dir = makeTempDir("sample-ts"); - t.after(() => cleanup(dir)); - - // Create src directory with TypeScript files - mkdirSync(join(dir, "src"), { recursive: true }); - writeFileSync( - join(dir, "src", "index.ts"), - `export async function main() { await fetch('/api'); }`, - "utf-8", - ); - writeFileSync( - join(dir, "src", "utils.ts"), - `export function helper() { try { return 1; } catch (e) { throw e; } }`, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - assert.ok(brief.sampledFiles.length > 0, "should sample at least one file"); - assert.ok( - brief.sampledFiles.some((f) => f.startsWith("src/")), - "should prefer src/ files", - ); -}); - -test("analyzeCodebase: excludes test files from sampling", async (t) => { - const dir = makeTempDir("exclude-tests"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, "src"), { recursive: true }); - writeFileSync(join(dir, "src", "index.ts"), `export const x = 1;`, "utf-8"); - writeFileSync( - join(dir, "src", "index.test.ts"), - `import test from 'node:test'; test('x', () => {});`, - "utf-8", - ); - writeFileSync( - join(dir, "src", "utils.spec.ts"), - `describe('utils', () => { it('works', () => {}); });`, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - // Should only have index.ts, not test/spec files - for (const file of brief.sampledFiles) { - assert.ok(!file.endsWith(".test.ts"), `should not sample ${file}`); - assert.ok(!file.endsWith(".spec.ts"), `should not sample ${file}`); - } -}); - -test("analyzeCodebase: excludes node_modules from sampling", async (t) => { - const dir = makeTempDir("exclude-nm"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, "src"), { recursive: true }); - mkdirSync(join(dir, "node_modules", "some-pkg"), { recursive: true }); - writeFileSync(join(dir, "src", "index.ts"), `export const x = 1;`, "utf-8"); - writeFileSync( - join(dir, "node_modules", "some-pkg", "index.js"), - `module.exports = {};`, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - for (const file of brief.sampledFiles) { - assert.ok(!file.includes("node_modules"), `should not sample ${file}`); - } -}); - -test("analyzeCodebase: extracts async/await pattern", async (t) => { - const dir = makeTempDir("async-await"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, "src"), { recursive: true }); - writeFileSync( - join(dir, "src", "api.ts"), - ` -export async function fetchData() { - const res = await fetch('/api'); - const data = await res.json(); - return data; -} - -export async function saveData(data: any) { - await fetch('/api', { method: 'POST', body: JSON.stringify(data) }); -} - `, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - assert.equal( - brief.patterns.asyncStyle, - "async/await", - "should detect async/await as primary style", - ); -}); - -test("analyzeCodebase: extracts try/catch error handling", async (t) => { - const dir = makeTempDir("try-catch"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, "src"), { recursive: true }); - writeFileSync( - join(dir, "src", "handler.ts"), - ` -export function handleError() { - try { - doSomething(); - } catch (error) { - console.error(error); - } -} - -export function anotherHandler() { - try { - doOther(); - } catch (e) { - throw new Error('wrapped'); - } -} - `, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - assert.equal( - brief.patterns.errorHandling, - "try/catch", - "should detect try/catch as primary error handling", - ); -}); - -test("analyzeCodebase: extracts camelCase naming convention", async (t) => { - const dir = makeTempDir("camel-case"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, "src"), { recursive: true }); - writeFileSync( - join(dir, "src", "utils.ts"), - ` -export function getUserById(userId: string) { - return fetchUser(userId); -} - -export function calculateTotalPrice(itemPrices: number[]) { - return itemPrices.reduce((a, b) => a + b, 0); -} - -export function formatDisplayName(firstName: string, lastName: string) { - return \`\${firstName} \${lastName}\`; -} - `, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - // camelCase should be detected (getUserById, userId, fetchUser, etc.) - assert.ok( - brief.patterns.namingConvention === "camelCase" || brief.patterns.namingConvention === "mixed", - `should detect camelCase or mixed, got ${brief.patterns.namingConvention}`, - ); -}); - -test("analyzeCodebase: gracefully handles empty directories", async (t) => { - const dir = makeTempDir("empty-src"); - t.after(() => cleanup(dir)); - - // Create empty src directory - mkdirSync(join(dir, "src"), { recursive: true }); - - const brief = await analyzeCodebase(dir); - - // Should not throw, should return valid structure - assert.ok(brief.patterns, "should have patterns"); - assert.equal(brief.patterns.asyncStyle, "unknown", "should return unknown for empty"); - assert.equal(brief.patterns.errorHandling, "unknown", "should return unknown for empty"); - assert.equal(brief.patterns.namingConvention, "unknown", "should return unknown for empty"); -}); - -test("analyzeCodebase: returns unknown for unrecognized language patterns (Ruby)", async (t) => { - // Ruby is detected by LANGUAGE_MAP but not in LANGUAGE_PATTERNS registry - // This tests the graceful fallback behavior: naming convention still works, - // but language-specific patterns (async/error) should return "unknown" - const dir = makeTempDir("ruby-project"); - t.after(() => cleanup(dir)); - - // Create a Ruby project with Gemfile (detected as "ruby" in LANGUAGE_MAP) - writeFileSync(join(dir, "Gemfile"), `source "https://rubygems.org"\ngem "rails"`, "utf-8"); - - // Add a Ruby file with patterns that would match JS/TS regexes incorrectly - mkdirSync(join(dir, "lib"), { recursive: true }); - writeFileSync( - join(dir, "lib", "service.rb"), - ` -class UserService - def fetch_user(user_id) - user = User.find(user_id) - user - rescue ActiveRecord::RecordNotFound => e - Rails.logger.error("User not found: #{e.message}") - nil - end - - def async_task(&block) - # Ruby doesn't have async/await but has yield and blocks - Thread.new { yield } - end -end - `, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - // Language should be detected as Ruby - assert.equal(brief.techStack.primaryLanguage, "ruby", "should detect ruby from Gemfile"); - - // Language-specific patterns should return "unknown" (not JS/TS patterns) - assert.equal( - brief.patterns.asyncStyle, - "unknown", - "should return unknown for async style in unrecognized language", - ); - assert.equal( - brief.patterns.errorHandling, - "unknown", - "should return unknown for error handling in unrecognized language", - ); - - // But naming convention detection should still work (it's universal) - // The Ruby code uses snake_case (fetch_user, user_id) and camelCase (UserService) - assert.ok( - brief.patterns.namingConvention !== "unknown", - "naming convention should still be detected for unrecognized languages", - ); - - // Evidence should explain why patterns aren't available - assert.ok( - brief.patterns.evidence.asyncStyle.some((e) => e.includes("not in pattern registry")), - "evidence should explain async style is not available", - ); - assert.ok( - brief.patterns.evidence.errorHandling.some((e) => e.includes("not in pattern registry")), - "evidence should explain error handling is not available", - ); -}); - -// ─── formatCodebaseBrief ──────────────────────────────────────────────────────── - -test("formatCodebaseBrief: produces markdown output", async (t) => { - const brief: CodebaseBrief = { - techStack: { - primaryLanguage: "javascript/typescript", - detectedFiles: ["package.json", "tsconfig.json"], - packageManager: "npm", - isMonorepo: false, - hasTests: true, - hasCI: true, - }, - moduleStructure: { - topLevelDirs: ["src", "test"], - srcSubdirs: ["components", "utils"], - totalFilesSampled: 5, - }, - patterns: { - asyncStyle: "async/await", - errorHandling: "try/catch", - namingConvention: "camelCase", - evidence: { - asyncStyle: ["src/api.ts: async/await (5 occurrences)"], - errorHandling: ["src/handler.ts: try/catch (3 occurrences)"], - namingConvention: ["camelCase: 50 occurrences"], - }, - fileCounts: { - asyncAwait: 3, - promises: 0, - callbacks: 0, - tryCatch: 2, - errorCallbacks: 0, - resultTypes: 0, - }, - }, - sampledFiles: ["src/index.ts", "src/utils.ts"], - }; - - const formatted = formatCodebaseBrief(brief); - - assert.ok(formatted.includes("## Tech Stack"), "should have Tech Stack section"); - assert.ok(formatted.includes("## Module Structure"), "should have Module Structure section"); - assert.ok(formatted.includes("## Code Patterns"), "should have Code Patterns section"); - assert.ok(formatted.includes("javascript/typescript"), "should include language"); - assert.ok(formatted.includes("npm"), "should include package manager"); - assert.ok(formatted.includes("async/await"), "should include async style"); - assert.ok(formatted.includes("try/catch"), "should include error handling"); - assert.ok(formatted.includes("camelCase"), "should include naming convention"); - assert.ok(formatted.includes("3 async/await files"), "should include file counts for async style"); - assert.ok(formatted.includes("2 try/catch files"), "should include file counts for error handling"); -}); - -test("formatCodebaseBrief: caps output at 3000 chars", async (t) => { - // Create a brief with many files to exceed the limit - const manyFiles = Array.from({ length: 100 }, (_, i) => `file-${i}.ts`); - - const brief: CodebaseBrief = { - techStack: { - primaryLanguage: "javascript/typescript", - detectedFiles: manyFiles, - packageManager: "npm", - isMonorepo: false, - hasTests: true, - hasCI: true, - }, - moduleStructure: { - topLevelDirs: Array.from({ length: 50 }, (_, i) => `dir-${i}`), - srcSubdirs: Array.from({ length: 50 }, (_, i) => `subdir-${i}`), - totalFilesSampled: 100, - }, - patterns: { - asyncStyle: "async/await", - errorHandling: "try/catch", - namingConvention: "camelCase", - evidence: { - asyncStyle: manyFiles.map((f) => `${f}: async/await (10 occurrences)`), - errorHandling: manyFiles.map((f) => `${f}: try/catch (5 occurrences)`), - namingConvention: ["camelCase: 500 occurrences"], - }, - fileCounts: { - asyncAwait: 50, - promises: 10, - callbacks: 5, - tryCatch: 30, - errorCallbacks: 5, - resultTypes: 0, - }, - }, - sampledFiles: manyFiles, - }; - - const formatted = formatCodebaseBrief(brief); - - assert.ok( - formatted.length <= 3000, - `should cap at 3000 chars, got ${formatted.length}`, - ); - if (formatted.length === 3000) { - assert.ok(formatted.endsWith("..."), "should end with ellipsis when truncated"); - } -}); - -test("formatCodebaseBrief: handles minimal brief", async (t) => { - const brief: CodebaseBrief = { - techStack: { - primaryLanguage: undefined, - detectedFiles: [], - packageManager: undefined, - isMonorepo: false, - hasTests: false, - hasCI: false, - }, - moduleStructure: { - topLevelDirs: [], - srcSubdirs: [], - totalFilesSampled: 0, - }, - patterns: { - asyncStyle: "unknown", - errorHandling: "unknown", - namingConvention: "unknown", - evidence: { - asyncStyle: [], - errorHandling: [], - namingConvention: [], - }, - fileCounts: { - asyncAwait: 0, - promises: 0, - callbacks: 0, - tryCatch: 0, - errorCallbacks: 0, - resultTypes: 0, - }, - }, - sampledFiles: [], - }; - - const formatted = formatCodebaseBrief(brief); - - assert.ok(formatted.includes("## Tech Stack"), "should still have sections"); - assert.ok(formatted.includes("**Monorepo:** No"), "should show monorepo status"); - assert.ok(formatted.includes("unknown"), "should show unknown patterns"); -}); - -// ─── Integration: Brief includes PROJECT_FILES markers ────────────────────────── - -test("analyzeCodebase: brief includes detected files from PROJECT_FILES", async (t) => { - const dir = makeTempDir("project-files"); - t.after(() => cleanup(dir)); - - // Create several PROJECT_FILES markers - writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8"); - writeFileSync(join(dir, "tsconfig.json"), '{}', "utf-8"); - mkdirSync(join(dir, ".github", "workflows"), { recursive: true }); - writeFileSync( - join(dir, ".github", "workflows", "ci.yml"), - "name: CI", - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - assert.ok( - brief.techStack.detectedFiles.includes("package.json"), - "should detect package.json", - ); - assert.ok( - brief.techStack.hasCI, - "should detect CI from .github/workflows", - ); -}); - -test("analyzeCodebase: brief includes sampled file patterns", async (t) => { - const dir = makeTempDir("sampled-patterns"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, "src"), { recursive: true }); - - // Write files with distinct patterns - writeFileSync( - join(dir, "src", "async-heavy.ts"), - ` -async function one() { await fetch('/a'); } -async function two() { await fetch('/b'); } -async function three() { await fetch('/c'); } - `, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - assert.ok(brief.sampledFiles.length > 0, "should have sampled files"); - assert.ok( - brief.patterns.evidence.asyncStyle.length > 0, - "should have async style evidence", - ); -}); - -// ─── aggregatePriorContext ────────────────────────────────────────────────────── - -test("aggregatePriorContext: handles missing files gracefully", async (t) => { - const dir = makeTempDir("no-gsd"); - t.after(() => cleanup(dir)); - - // Create .gsd directory but no files - mkdirSync(join(dir, ".gsd"), { recursive: true }); - - const brief = await aggregatePriorContext(dir); - - assert.equal(brief.decisions.totalCount, 0, "should have no decisions"); - assert.equal(brief.requirements.totalCount, 0, "should have no requirements"); - assert.equal(brief.knowledge, "No prior knowledge recorded.", "should indicate no knowledge"); - assert.equal(brief.summaries, "No prior milestone summaries.", "should indicate no summaries"); -}); - -test("aggregatePriorContext: handles completely empty directory", async (t) => { - const dir = makeTempDir("empty-project"); - t.after(() => cleanup(dir)); - - const brief = await aggregatePriorContext(dir); - - assert.equal(brief.decisions.totalCount, 0); - assert.equal(brief.requirements.totalCount, 0); - assert.equal(brief.knowledge, "No prior knowledge recorded."); - assert.equal(brief.summaries, "No prior milestone summaries."); -}); - -test("aggregatePriorContext: parses DECISIONS.md and groups by scope", async (t) => { - const dir = makeTempDir("decisions"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync( - join(dir, ".gsd", "DECISIONS.md"), - `# Decisions Register - -| # | When | Scope | Decision | Choice | Rationale | Revisable? | Made By | -|---|------|-------|----------|--------|-----------|------------|---------| -| D001 | M001/S01 | pattern | Async style | async/await | Modern standard | Yes | agent | -| D002 | M001/S02 | architecture | Data layer | SQLite | Simple, embedded | No | human | -| D003 | M001/S03 | pattern | Error handling | try/catch | Consistency | Yes | agent | -`, - "utf-8", - ); - - const brief = await aggregatePriorContext(dir); - - assert.equal(brief.decisions.totalCount, 3, "should parse all decisions"); - assert.equal(brief.decisions.byScope.get("pattern")?.length, 2, "should group pattern scope"); - assert.equal(brief.decisions.byScope.get("architecture")?.length, 1, "should group architecture scope"); - - const patternDecisions = brief.decisions.byScope.get("pattern")!; - assert.equal(patternDecisions[0].id, "D001"); - assert.equal(patternDecisions[0].decision, "Async style"); - assert.equal(patternDecisions[0].choice, "async/await"); -}); - -test("aggregatePriorContext: parses REQUIREMENTS.md and groups by status", async (t) => { - const dir = makeTempDir("requirements"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync( - join(dir, ".gsd", "REQUIREMENTS.md"), - `# Requirements - -## Active - -### R001 — First requirement -- Status: active -- Description: Something active - -### R002 — Second requirement -- Status: active -- Description: Also active - -## Validated - -### R003 — Validated requirement -- Status: validated -- Description: This was validated - -## Deferred - -### R004 — Deferred requirement -- Status: deferred -- Description: Postponed for later -`, - "utf-8", - ); - - const brief = await aggregatePriorContext(dir); - - assert.equal(brief.requirements.totalCount, 4, "should parse all requirements"); - assert.equal(brief.requirements.active.length, 2, "should have 2 active"); - assert.equal(brief.requirements.validated.length, 1, "should have 1 validated"); - assert.equal(brief.requirements.deferred.length, 1, "should have 1 deferred"); - - assert.equal(brief.requirements.active[0].id, "R001"); - assert.equal(brief.requirements.active[0].description, "First requirement"); -}); - -test("aggregatePriorContext: loads KNOWLEDGE.md content", async (t) => { - const dir = makeTempDir("knowledge"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync( - join(dir, ".gsd", "KNOWLEDGE.md"), - `# Knowledge Base - -## Rules - -| # | Scope | Rule | Why | Added | -|---|-------|------|-----|-------| -| K001 | global | Always use TypeScript | Type safety | manual | - -## Patterns - -**Pattern X:** Do this for better Y. -`, - "utf-8", - ); - - const brief = await aggregatePriorContext(dir); - - assert.ok(brief.knowledge.includes("Rules"), "should include knowledge content"); - assert.ok(brief.knowledge.includes("TypeScript"), "should include rule text"); -}); - -test("aggregatePriorContext: truncates oversized content without cutting mid-section", async (t) => { - const dir = makeTempDir("large-knowledge"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - - // Create large knowledge file - const largeContent = `# Knowledge Base - -## Section One - -${"Lorem ipsum dolor sit amet. ".repeat(100)} - -## Section Two - -${"More content here. ".repeat(100)} - -## Section Three - -${"Even more content. ".repeat(100)} -`; - - writeFileSync(join(dir, ".gsd", "KNOWLEDGE.md"), largeContent, "utf-8"); - - const brief = await aggregatePriorContext(dir); - - assert.ok(brief.knowledge.length <= 2000, "should truncate to 2K chars"); - assert.ok(brief.knowledge.includes("[truncated]"), "should indicate truncation"); - // Should try to preserve section boundaries - assert.ok( - brief.knowledge.includes("## Section"), - "should keep section headings intact", - ); -}); - -test("aggregatePriorContext: loads milestone summaries", async (t) => { - const dir = makeTempDir("milestones"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd", "milestones", "M001"), { recursive: true }); - mkdirSync(join(dir, ".gsd", "milestones", "M002"), { recursive: true }); - - writeFileSync( - join(dir, ".gsd", "milestones", "M001", "MILESTONE-SUMMARY.md"), - `# M001 — First Milestone - -**Implemented core functionality and established patterns.** - -## What Happened -Did stuff. -`, - "utf-8", - ); - - writeFileSync( - join(dir, ".gsd", "milestones", "M002", "MILESTONE-SUMMARY.md"), - `# M002 — Second Milestone - -**Extended the system with new features.** - -## What Happened -Did more stuff. -`, - "utf-8", - ); - - const brief = await aggregatePriorContext(dir); - - assert.ok(brief.summaries.includes("M001"), "should include M001 summary"); - assert.ok(brief.summaries.includes("M002"), "should include M002 summary"); - assert.ok( - brief.summaries.includes("core functionality"), - "should extract one-liner from M001", - ); - assert.ok( - brief.summaries.includes("new features"), - "should extract one-liner from M002", - ); -}); - -// ─── formatPriorContextBrief ──────────────────────────────────────────────────── - -test("formatPriorContextBrief: produces markdown with all sections", async (t) => { - const brief: PriorContextBrief = { - decisions: { - byScope: new Map([ - [ - "pattern", - [ - { id: "D001", scope: "pattern", decision: "Async", choice: "await", rationale: "Modern" }, - ], - ], - [ - "architecture", - [ - { id: "D002", scope: "architecture", decision: "DB", choice: "SQLite", rationale: "Simple" }, - ], - ], - ]), - totalCount: 2, - }, - requirements: { - active: [{ id: "R001", description: "Core feature", status: "active" }], - validated: [], - deferred: [], - totalCount: 1, - }, - knowledge: "Some knowledge here.", - summaries: "### M001\nDid things.", - }; - - const formatted = formatPriorContextBrief(brief); - - assert.ok(formatted.includes("## Prior Decisions"), "should have decisions section"); - assert.ok(formatted.includes("## Prior Requirements"), "should have requirements section"); - assert.ok(formatted.includes("## Prior Knowledge"), "should have knowledge section"); - assert.ok(formatted.includes("## Prior Milestone Summaries"), "should have summaries section"); - assert.ok(formatted.includes("D001"), "should include decision ID"); - assert.ok(formatted.includes("R001"), "should include requirement ID"); - assert.ok(formatted.includes("pattern"), "should include scope heading"); -}); - -test("formatPriorContextBrief: handles empty brief", async (t) => { - const brief: PriorContextBrief = { - decisions: { - byScope: new Map(), - totalCount: 0, - }, - requirements: { - active: [], - validated: [], - deferred: [], - totalCount: 0, - }, - knowledge: "No prior knowledge recorded.", - summaries: "No prior milestone summaries.", - }; - - const formatted = formatPriorContextBrief(brief); - - assert.ok(formatted.includes("No prior decisions recorded"), "should indicate no decisions"); - assert.ok(formatted.includes("No prior requirements recorded"), "should indicate no requirements"); - assert.ok(formatted.includes("No prior knowledge recorded"), "should indicate no knowledge"); - assert.ok(formatted.includes("No prior milestone summaries"), "should indicate no summaries"); -}); - -test("formatPriorContextBrief: caps total output at 6K chars", async (t) => { - // Create a brief with lots of content - const manyDecisions: Array<{ - id: string; - scope: string; - decision: string; - choice: string; - rationale: string; - }> = []; - for (let i = 0; i < 100; i++) { - manyDecisions.push({ - id: `D${String(i).padStart(3, "0")}`, - scope: "pattern", - decision: `Decision number ${i} with some extra text for length`, - choice: `Choice ${i} with more text to make it longer`, - rationale: `Rationale ${i}`, - }); - } - - const manyRequirements: Array<{ - id: string; - description: string; - status: "active"; - }> = []; - for (let i = 0; i < 100; i++) { - manyRequirements.push({ - id: `R${String(i).padStart(3, "0")}`, - description: `Requirement ${i} with a long description that takes up space`, - status: "active", - }); - } - - const brief: PriorContextBrief = { - decisions: { - byScope: new Map([["pattern", manyDecisions]]), - totalCount: 100, - }, - requirements: { - active: manyRequirements, - validated: [], - deferred: [], - totalCount: 100, - }, - knowledge: "A ".repeat(1000), - summaries: "B ".repeat(1000), - }; - - const formatted = formatPriorContextBrief(brief); - - assert.ok(formatted.length <= 6000, `should cap at 6000 chars, got ${formatted.length}`); -}); - -// ─── researchEcosystem ────────────────────────────────────────────────────────── -// Note: Ecosystem research now always returns available: false from the preparation -// phase. Research happens during the discussion using web search tools. - -test("researchEcosystem: always returns available: false (research happens during discussion)", async (t) => { - const dir = makeTempDir("ecosystem-disabled"); - t.after(() => cleanup(dir)); - - const brief = await researchEcosystem(["Next.js", "TypeScript"], dir); - - assert.equal(brief.available, false, "should indicate research not available from preparation"); - assert.ok(brief.skippedReason, "should have skipped reason"); - assert.ok( - brief.skippedReason!.includes("during the discussion"), - "should explain research happens during discussion", - ); - assert.deepEqual(brief.queries, [], "should have empty queries"); - assert.deepEqual(brief.findings, [], "should have empty findings"); -}); - -test("researchEcosystem: returns consistent result regardless of tech stack", async (t) => { - const dir = makeTempDir("ecosystem-consistent"); - t.after(() => cleanup(dir)); - - // With tech stack - const briefWithTech = await researchEcosystem(["React", "Next.js"], dir); - // Without tech stack - const briefEmpty = await researchEcosystem([], dir); - - // Both should return the same unavailable result - assert.equal(briefWithTech.available, false); - assert.equal(briefEmpty.available, false); - assert.deepEqual(briefWithTech.queries, []); - assert.deepEqual(briefEmpty.queries, []); -}); - -// ─── formatEcosystemBrief ─��───────────────────────────────────────────────────── -// Note: formatEcosystemBrief now returns a simple fixed message since ecosystem -// research always returns unavailable from the preparation phase. - -test("formatEcosystemBrief: returns simplified message for discussion-phase research", async (t) => { - const brief: EcosystemBrief = { - available: false, - queries: [], - findings: [], - skippedReason: "Ecosystem research is performed during the discussion using web search tools, not during preparation.", - }; - - const formatted = formatEcosystemBrief(brief); - - assert.ok(formatted.includes("## Ecosystem Research"), "should have section header"); - assert.ok(formatted.includes("during the discussion"), "should mention discussion phase"); - assert.ok(formatted.includes("web search tools"), "should mention web search tools"); -}); - -test("formatEcosystemBrief: returns consistent output regardless of brief content", async (t) => { - // Even if a brief has findings (which shouldn't happen from preparation), - // the function returns the simplified message - const briefWithFindings: EcosystemBrief = { - available: true, - queries: ["test query"], - findings: [{ query: "test", title: "Test", snippet: "test", url: "https://example.com" }], - provider: "tavily", - }; - - const briefEmpty: EcosystemBrief = { - available: false, - queries: [], - findings: [], - skippedReason: "Test reason", - }; - - const formatted1 = formatEcosystemBrief(briefWithFindings); - const formatted2 = formatEcosystemBrief(briefEmpty); - - // Both should return the same simplified message - assert.equal(formatted1, formatted2, "should return consistent output"); - assert.ok(formatted1.includes("## Ecosystem Research"), "should have section header"); -}); - - -// ─── runPreparation (Orchestrator) ────────────────────────────────────────────── - -/** - * Mock UI context that captures notifications for testing. - */ -function createMockUI(): PreparationUIContext & { notifications: Array<{ message: string; type?: string }> } { - const notifications: Array<{ message: string; type?: string }> = []; - return { - notifications, - notify(message: string, type?: "info" | "warning" | "error" | "success") { - notifications.push({ message, type }); - }, - }; -} - -test("runPreparation: returns complete result with all briefs populated", async (t) => { - const dir = makeTempDir("runprep-full"); - t.after(() => cleanup(dir)); - - // Set up a minimal project - mkdirSync(join(dir, "src"), { recursive: true }); - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync(join(dir, "package.json"), '{"name": "test-project"}', "utf-8"); - writeFileSync(join(dir, "src", "index.ts"), 'export const x = 1;', "utf-8"); - - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, // Skip web research to avoid API key requirement - discuss_depth: "standard", - }; - - const result = await runPreparation(dir, ui, prefs); - - // Check result structure - assert.equal(result.enabled, true, "should be enabled"); - assert.ok(result.codebase, "should have codebase"); - assert.ok(result.priorContext, "should have priorContext"); - assert.ok(result.ecosystem, "should have ecosystem"); - assert.ok(typeof result.codebaseBrief === "string", "should have codebaseBrief"); - assert.ok(typeof result.priorContextBrief === "string", "should have priorContextBrief"); - assert.ok(typeof result.ecosystemBrief === "string", "should have ecosystemBrief"); - assert.ok(result.durationMs > 0, "should have positive duration"); - assert.equal(result.ecosystemResearchPerformed, false, "should not have performed ecosystem research"); - - // Check TUI progress notifications - assert.ok(ui.notifications.length > 0, "should have notifications"); - assert.ok( - ui.notifications.some((n) => n.message.includes("Analyzing codebase")), - "should show codebase analysis start", - ); - assert.ok( - ui.notifications.some((n) => n.message.includes("✓ Analyzed codebase")), - "should show codebase analysis complete", - ); - assert.ok( - ui.notifications.some((n) => n.message.includes("Reviewing prior context")), - "should show prior context start", - ); - assert.ok( - ui.notifications.some((n) => n.message.includes("✓ Reviewed prior context")), - "should show prior context complete", - ); -}); - -test("runPreparation: returns early when discuss_preparation is false", async (t) => { - const dir = makeTempDir("runprep-disabled"); - t.after(() => cleanup(dir)); - - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: false, - }; - - const result = await runPreparation(dir, ui, prefs); - - assert.equal(result.enabled, false, "should indicate preparation disabled"); - assert.equal(result.codebaseBrief, "", "should have empty codebase brief"); - assert.equal(result.priorContextBrief, "", "should have empty prior context brief"); - assert.equal(result.ecosystemBrief, "", "should have empty ecosystem brief"); - assert.equal(ui.notifications.length, 0, "should not show any notifications"); - assert.ok(result.durationMs >= 0, "should have non-negative duration"); -}); - -test("runPreparation: ecosystem research always returns unavailable (happens during discussion)", async (t) => { - const dir = makeTempDir("runprep-no-ecosystem"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8"); - - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: true, // Even with this enabled, ecosystem research returns unavailable - }; - - const result = await runPreparation(dir, ui, prefs); - - assert.equal(result.enabled, true); - assert.equal(result.ecosystemResearchPerformed, false, "should not perform ecosystem research from preparation"); - assert.equal(result.ecosystem.available, false); - assert.ok( - result.ecosystem.skippedReason?.includes("during the discussion"), - "should indicate research happens during discussion", - ); - - // Should NOT have ecosystem research notifications (no longer part of preparation) - assert.ok( - !ui.notifications.some((n) => n.message.includes("Researching ecosystem")), - "should not show ecosystem research notification", - ); -}); - -test("runPreparation: works without UI context (silent mode)", async (t) => { - const dir = makeTempDir("runprep-silent"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8"); - - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - // Pass null for UI to test silent mode - const result = await runPreparation(dir, null, prefs); - - assert.equal(result.enabled, true, "should work without UI"); - assert.ok(result.codebase, "should have codebase"); - assert.ok(result.priorContext, "should have priorContext"); - assert.ok(result.durationMs > 0, "should have duration"); -}); - -test("runPreparation: completes within 60s requirement (R112)", async (t) => { - const dir = makeTempDir("runprep-timing"); - t.after(() => cleanup(dir)); - - // Create a project with some content to analyze - mkdirSync(join(dir, "src"), { recursive: true }); - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8"); - writeFileSync(join(dir, "tsconfig.json"), '{}', "utf-8"); - - for (let i = 0; i < 10; i++) { - writeFileSync( - join(dir, "src", `file${i}.ts`), - `export async function fn${i}() { await Promise.resolve(); }\n`.repeat(50), - "utf-8", - ); - } - - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - discuss_depth: "standard", - }; - - const startTime = performance.now(); - const result = await runPreparation(dir, null, prefs); - const elapsed = performance.now() - startTime; - - assert.ok(result.durationMs < 60000, `should complete within 60s, took ${result.durationMs}ms`); - assert.ok(elapsed < 60000, `elapsed time should be under 60s, was ${elapsed}ms`); -}); - -test("runPreparation: does not throw on any input", async (t) => { - const dir = makeTempDir("runprep-robust"); - t.after(() => cleanup(dir)); - - // Test with completely empty directory - const prefs: PreparationPreferences = {}; - - let result: PreparationResult | undefined; - let error: unknown; - - try { - result = await runPreparation(dir, null, prefs); - } catch (e) { - error = e; - } - - assert.equal(error, undefined, "should not throw"); - assert.ok(result, "should return result"); - assert.equal(result!.enabled, true, "should be enabled by default"); -}); - -test("runPreparation: detects framework from config files in codebase brief", async (t) => { - const dir = makeTempDir("runprep-framework"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8"); - writeFileSync(join(dir, "next.config.mjs"), 'export default {};', "utf-8"); - - const prefs: PreparationPreferences = { - discuss_preparation: true, - }; - - const result = await runPreparation(dir, null, prefs); - - // Should detect Next.js config file in codebase analysis - assert.ok( - result.codebase.techStack.detectedFiles.includes("next.config.mjs"), - "should detect next.config.mjs in codebase brief", - ); - // Ecosystem queries are always empty from preparation (research happens during discussion) - assert.deepEqual(result.ecosystem.queries, [], "ecosystem queries should be empty from preparation"); -}); - -test("runPreparation: default preferences enable preparation and web research", async (t) => { - const dir = makeTempDir("runprep-defaults"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - - const ui = createMockUI(); - const prefs: PreparationPreferences = {}; // All defaults - - const result = await runPreparation(dir, ui, prefs); - - // With defaults, preparation should be enabled - assert.equal(result.enabled, true, "should be enabled by default"); - // Notifications should be shown - assert.ok(ui.notifications.length > 0, "should show notifications"); -}); diff --git a/src/resources/extensions/gsd/tests/prompt-builder.test.ts b/src/resources/extensions/gsd/tests/prompt-builder.test.ts deleted file mode 100644 index 811357307..000000000 --- a/src/resources/extensions/gsd/tests/prompt-builder.test.ts +++ /dev/null @@ -1,669 +0,0 @@ -/** - * Prompt Builder Tests — Comprehensive tests for S02 components. - * - * Tests cover: - * 1. Template validation (context-enhanced.md, discuss-prepared.md) - * 2. Prompt loading and variable substitution - * 3. Enhanced context validation (R109) - * 4. Integration tests for format functions and prompt injection - */ - -import test, { describe } from "node:test"; -import assert from "node:assert/strict"; -import { readFileSync, existsSync } from "node:fs"; -import { join } from "node:path"; - -// ─── Template Paths ───────────────────────────────────────────────────────────── - -const templatesDir = join(process.cwd(), "src/resources/extensions/gsd/templates"); -const promptsDir = join(process.cwd(), "src/resources/extensions/gsd/prompts"); - -const contextEnhancedPath = join(templatesDir, "context-enhanced.md"); -const contextPath = join(templatesDir, "context.md"); -const discussPreparedPath = join(promptsDir, "discuss-prepared.md"); - -// ─── Template Tests ───────────────────────────────────────────────────────────── - -describe("Template: context-enhanced.md", () => { - test("file exists", () => { - assert.ok(existsSync(contextEnhancedPath), "context-enhanced.md should exist"); - }); - - test("contains all original context.md sections", () => { - const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8"); - const originalContext = readFileSync(contextPath, "utf-8"); - - // Extract section headers from original context.md - const originalSections = originalContext.match(/^## .+$/gm) ?? []; - - // Each original section should be present in context-enhanced.md - for (const section of originalSections) { - assert.ok( - contextEnhanced.includes(section), - `context-enhanced.md should contain original section: ${section}`, - ); - } - }); - - test("contains new structured sections for prepared discussions", () => { - const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8"); - - // New sections required by R108 - const newSections = [ - "## Codebase Brief", - "## Architectural Decisions", - "## Interface Contracts", - "## Error Handling Strategy", - "## Testing Requirements", - "## Acceptance Criteria", - "## Ecosystem Notes", - ]; - - for (const section of newSections) { - assert.ok( - contextEnhanced.includes(section), - `context-enhanced.md should contain new section: ${section}`, - ); - } - }); - - test("Codebase Brief has sub-sections", () => { - const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8"); - - assert.ok( - contextEnhanced.includes("### Technology Stack"), - "Codebase Brief should have Technology Stack sub-section", - ); - assert.ok( - contextEnhanced.includes("### Key Modules"), - "Codebase Brief should have Key Modules sub-section", - ); - assert.ok( - contextEnhanced.includes("### Patterns in Use"), - "Codebase Brief should have Patterns in Use sub-section", - ); - }); - - test("Architectural Decisions has structured format guidance", () => { - const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8"); - - // Check for decision structure markers - assert.ok( - contextEnhanced.includes("**Decision:**"), - "Architectural Decisions should have Decision marker", - ); - assert.ok( - contextEnhanced.includes("**Rationale:**"), - "Architectural Decisions should have Rationale marker", - ); - assert.ok( - contextEnhanced.includes("**Evidence:**"), - "Architectural Decisions should have Evidence marker", - ); - assert.ok( - contextEnhanced.includes("**Alternatives Considered:**"), - "Architectural Decisions should have Alternatives Considered marker", - ); - }); -}); - -describe("Template: discuss-prepared.md", () => { - test("file exists", () => { - assert.ok(existsSync(discussPreparedPath), "discuss-prepared.md should exist"); - }); - - test("contains all three brief placeholders", () => { - const discussPrepared = readFileSync(discussPreparedPath, "utf-8"); - - assert.ok( - discussPrepared.includes("{{codebaseBrief}}"), - "discuss-prepared.md should contain {{codebaseBrief}} placeholder", - ); - assert.ok( - discussPrepared.includes("{{priorContextBrief}}"), - "discuss-prepared.md should contain {{priorContextBrief}} placeholder", - ); - assert.ok( - discussPrepared.includes("{{ecosystemBrief}}"), - "discuss-prepared.md should contain {{ecosystemBrief}} placeholder", - ); - }); - - test("contains 4-layer protocol markers", () => { - const discussPrepared = readFileSync(discussPreparedPath, "utf-8"); - - // Check for all four layer headings - assert.ok( - discussPrepared.includes("## Layer 1 — Scope"), - "discuss-prepared.md should contain Layer 1 (Scope)", - ); - assert.ok( - discussPrepared.includes("## Layer 2 — Architecture"), - "discuss-prepared.md should contain Layer 2 (Architecture)", - ); - assert.ok( - discussPrepared.includes("## Layer 3 — Error States"), - "discuss-prepared.md should contain Layer 3 (Error States)", - ); - assert.ok( - discussPrepared.includes("## Layer 4 — Quality Bar"), - "discuss-prepared.md should contain Layer 4 (Quality Bar)", - ); - }); - - test("contains gate question IDs for all layers", () => { - const discussPrepared = readFileSync(discussPreparedPath, "utf-8"); - - assert.ok( - discussPrepared.includes("layer1_scope_gate"), - "discuss-prepared.md should contain layer1_scope_gate question ID", - ); - assert.ok( - discussPrepared.includes("layer2_architecture_gate"), - "discuss-prepared.md should contain layer2_architecture_gate question ID", - ); - assert.ok( - discussPrepared.includes("layer3_error_gate"), - "discuss-prepared.md should contain layer3_error_gate question ID", - ); - assert.ok( - discussPrepared.includes("layer4_quality_gate"), - "discuss-prepared.md should contain layer4_quality_gate question ID", - ); - }); - - test("contains context-enhanced template guidance", () => { - const discussPrepared = readFileSync(discussPreparedPath, "utf-8"); - - assert.ok( - discussPrepared.includes("context-enhanced"), - "discuss-prepared.md should reference context-enhanced template", - ); - }); -}); - -// ─── Prompt Loading Tests ─────────────────────────────────────────────────────── - -describe("Prompt Loading", () => { - // Dynamic import to work with the module's warm cache - test("loadPrompt substitutes all variables correctly", async () => { - const { loadPrompt } = await import("../prompt-loader.ts"); - - const result = loadPrompt("discuss-prepared", { - preamble: "Test preamble", - codebaseBrief: "Test codebase brief content", - priorContextBrief: "Test prior context brief content", - ecosystemBrief: "Test ecosystem brief content", - milestoneId: "M001", - contextPath: ".gsd/milestones/M001/M001-CONTEXT.md", - roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md", - inlinedTemplates: "Test templates", - commitInstruction: "Test commit instruction", - multiMilestoneCommitInstruction: "Test multi-milestone commit", - }); - - assert.ok(result.includes("Test codebase brief content"), "codebaseBrief should be substituted"); - assert.ok(result.includes("Test prior context brief content"), "priorContextBrief should be substituted"); - assert.ok(result.includes("Test ecosystem brief content"), "ecosystemBrief should be substituted"); - assert.ok(!result.includes("{{codebaseBrief}}"), "placeholder should not remain"); - }); - - test("loadPrompt throws GSDError for missing variables", async () => { - const { loadPrompt } = await import("../prompt-loader.ts"); - const { GSDError, GSD_PARSE_ERROR } = await import("../errors.ts"); - - assert.throws( - () => loadPrompt("discuss-prepared", {}), // Missing required variables - (err: unknown) => { - assert.ok(err instanceof GSDError, "should throw GSDError"); - assert.equal((err as InstanceType).code, GSD_PARSE_ERROR, "should have GSD_PARSE_ERROR code"); - return true; - }, - ); - }); - - test("brief content with {{...}} patterns does not cause false variable errors", async () => { - const { loadPrompt } = await import("../prompt-loader.ts"); - - // Content that contains template-like patterns but should not be treated as variables - const briefWithPatterns = ` -## Tech Stack -- Framework: Uses \`{{slot}}\` placeholder syntax in templates -- Pattern: The codebase has \`{{variableName}}\` markers -`; - - // This should NOT throw, because {{slot}} and {{variableName}} are inside - // the brief value, not undeclared placeholders in the template itself. - const result = loadPrompt("discuss-prepared", { - preamble: "Test", - codebaseBrief: briefWithPatterns, - priorContextBrief: "Test brief", - ecosystemBrief: "Test brief", - milestoneId: "M001", - contextPath: ".gsd/milestones/M001/M001-CONTEXT.md", - roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md", - inlinedTemplates: "Test templates", - commitInstruction: "Test commit instruction", - multiMilestoneCommitInstruction: "Test multi-milestone commit", - }); - - assert.ok(result.includes("{{slot}}"), "template-like patterns in content should be preserved"); - assert.ok(result.includes("{{variableName}}"), "template-like patterns in content should be preserved"); - }); -}); - -// ─── Validation Tests ─────────────────────────────────────────────────────────── - -describe("Enhanced Context Validation", () => { - test("valid enhanced context passes validation", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - const validContent = ` -# M001: Test Milestone - -## Why This Milestone - -This is why we need this milestone. - -## Architectural Decisions - -### Decision 1 - -**Decision:** Use TypeScript -**Rationale:** Type safety - -## Acceptance Criteria - -- Criterion 1 -- Criterion 2 -`; - - const result = validateEnhancedContext(validContent); - assert.equal(result.valid, true, "valid content should pass validation"); - assert.equal(result.missing.length, 0, "no missing sections"); - }); - - test("missing scope section fails", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - const contentMissingScope = ` -# M001: Test Milestone - -## Architectural Decisions - -### Decision 1 - -**Decision:** Use TypeScript - -## Acceptance Criteria - -- Criterion 1 -`; - - const result = validateEnhancedContext(contentMissingScope); - assert.equal(result.valid, false, "should fail validation"); - assert.ok( - result.missing.some((m) => m.includes("Scope") || m.includes("Why This Milestone")), - "should report missing scope section", - ); - }); - - test("missing architectural decisions section fails", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - const contentMissingDecisions = ` -# M001: Test Milestone - -## Why This Milestone - -This is why we need this milestone. - -## Acceptance Criteria - -- Criterion 1 -`; - - const result = validateEnhancedContext(contentMissingDecisions); - assert.equal(result.valid, false, "should fail validation"); - assert.ok( - result.missing.includes("Architectural Decisions"), - "should report missing architectural decisions section", - ); - }); - - test("missing acceptance criteria section fails", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - const contentMissingCriteria = ` -# M001: Test Milestone - -## Why This Milestone - -This is why we need this milestone. - -## Architectural Decisions - -### Decision 1 - -**Decision:** Use TypeScript -`; - - const result = validateEnhancedContext(contentMissingCriteria); - assert.equal(result.valid, false, "should fail validation"); - assert.ok( - result.missing.includes("Acceptance Criteria"), - "should report missing acceptance criteria section", - ); - }); - - test("empty architectural decisions section (no entries) fails", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - const contentEmptyDecisions = ` -# M001: Test Milestone - -## Why This Milestone - -This is why we need this milestone. - -## Architectural Decisions - -No decisions yet. - -## Acceptance Criteria - -- Criterion 1 -`; - - const result = validateEnhancedContext(contentEmptyDecisions); - assert.equal(result.valid, false, "should fail validation"); - assert.ok( - result.missing.some((m) => m.includes("decision entry")), - "should report missing decision entry", - ); - }); - - test("alternative scope headers are accepted", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - // Test with ## Scope - const withScope = ` -## Scope - -### In Scope -- Item 1 - -## Architectural Decisions - -### Decision 1 -**Decision:** Test - -## Acceptance Criteria - -- Criterion 1 -`; - assert.equal(validateEnhancedContext(withScope).valid, true, "## Scope should be accepted"); - - // Test with ## Milestone Scope - const withMilestoneScope = ` -## Milestone Scope - -This is the scope. - -## Architectural Decisions - -### Decision 1 -**Decision:** Test - -## Acceptance Criteria - -- Criterion 1 -`; - assert.equal( - validateEnhancedContext(withMilestoneScope).valid, - true, - "## Milestone Scope should be accepted", - ); - }); - - test("alternative acceptance criteria headers are accepted", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - const withFinalIntegrated = ` -## Why This Milestone - -Test - -## Architectural Decisions - -### Decision 1 -**Decision:** Test - -## Final Integrated Acceptance - -- Criterion 1 -`; - assert.equal( - validateEnhancedContext(withFinalIntegrated).valid, - true, - "## Final Integrated Acceptance should be accepted", - ); - }); - - test("inline decision format is accepted", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - const withInlineDecision = ` -## Why This Milestone - -Test - -## Architectural Decisions - -**Decision:** Use React for the frontend - -## Acceptance Criteria - -- Criterion 1 -`; - assert.equal( - validateEnhancedContext(withInlineDecision).valid, - true, - "**Decision marker format should be accepted", - ); - }); -}); - -// ─── Integration Tests ────────────────────────────────────────────────────────── - -describe("Integration: Format Functions", () => { - test("formatCodebaseBrief produces non-empty output", async () => { - const { formatCodebaseBrief } = await import("../preparation.ts"); - - const brief = { - techStack: { - primaryLanguage: "TypeScript", - detectedFiles: ["package.json", "tsconfig.json"], - packageManager: "npm", - isMonorepo: false, - hasTests: true, - hasCI: true, - }, - moduleStructure: { - topLevelDirs: ["src", "tests"], - srcSubdirs: ["components", "utils"], - totalFilesSampled: 5, - }, - patterns: { - asyncStyle: "async/await" as const, - errorHandling: "try/catch" as const, - namingConvention: "camelCase" as const, - evidence: { - asyncStyle: ["src/foo.ts: async/await (5 occurrences)"], - errorHandling: ["src/bar.ts: try/catch (3 occurrences)"], - namingConvention: ["camelCase: 50 occurrences"], - }, - fileCounts: { - asyncAwait: 3, - promises: 0, - callbacks: 0, - tryCatch: 2, - errorCallbacks: 0, - resultTypes: 0, - }, - }, - sampledFiles: ["src/index.ts", "src/utils.ts"], - }; - - const formatted = formatCodebaseBrief(brief); - assert.ok(formatted.length > 0, "formatted brief should not be empty"); - assert.ok(formatted.includes("TypeScript"), "should include primary language"); - assert.ok(formatted.includes("async/await"), "should include async style"); - }); - - test("formatPriorContextBrief produces non-empty output", async () => { - const { formatPriorContextBrief } = await import("../preparation.ts"); - - const brief = { - decisions: { - byScope: new Map([ - ["architecture", [{ id: "D001", scope: "architecture", decision: "Use SQLite", choice: "SQLite", rationale: "Simplicity" }]], - ]), - totalCount: 1, - }, - requirements: { - active: [{ id: "R001", description: "Test requirement", status: "active" as const }], - validated: [], - deferred: [], - totalCount: 1, - }, - knowledge: "Some knowledge entry", - summaries: "M001 completed X and Y", - }; - - const formatted = formatPriorContextBrief(brief); - assert.ok(formatted.length > 0, "formatted brief should not be empty"); - assert.ok(formatted.includes("Prior Decisions"), "should include decisions section"); - assert.ok(formatted.includes("D001"), "should include decision ID"); - }); - - test("formatEcosystemBrief returns simplified message (research happens during discussion)", async () => { - const { formatEcosystemBrief } = await import("../preparation.ts"); - - // formatEcosystemBrief now returns a fixed message regardless of brief content - // because ecosystem research happens during the discussion, not preparation - const briefWithFindings = { - available: true, - queries: ["Next.js best practices 2024"], - findings: [ - { - query: "Next.js best practices 2024", - title: "Server Components Guide", - url: "https://example.com/guide", - snippet: "Use Server Components for data fetching", - }, - ], - provider: "tavily", - }; - - const formatted = formatEcosystemBrief(briefWithFindings); - assert.ok(formatted.length > 0, "formatted brief should not be empty"); - assert.ok(formatted.includes("Ecosystem Research"), "should include research heading"); - assert.ok(formatted.includes("during the discussion"), "should mention research happens during discussion"); - }); - - test("formatEcosystemBrief returns same output for any brief state", async () => { - const { formatEcosystemBrief } = await import("../preparation.ts"); - - const briefUnavailable = { - available: false, - queries: [], - findings: [], - skippedReason: "No API key configured", - }; - - const briefAvailable = { - available: true, - queries: ["test"], - findings: [], - provider: "tavily", - }; - - const formatted1 = formatEcosystemBrief(briefUnavailable); - const formatted2 = formatEcosystemBrief(briefAvailable); - - // Both should return the same simplified message - assert.equal(formatted1, formatted2, "should return consistent output regardless of brief state"); - assert.ok(formatted1.includes("web search tools"), "should mention web search tools"); - }); - - test("formatted briefs can be injected into prompt without errors", async () => { - const { loadPrompt } = await import("../prompt-loader.ts"); - const { formatCodebaseBrief, formatPriorContextBrief, formatEcosystemBrief } = await import("../preparation.ts"); - - // Create realistic briefs - const codebaseBrief = formatCodebaseBrief({ - techStack: { - primaryLanguage: "TypeScript", - detectedFiles: ["package.json"], - packageManager: "npm", - isMonorepo: false, - hasTests: true, - hasCI: false, - }, - moduleStructure: { - topLevelDirs: ["src"], - srcSubdirs: [], - totalFilesSampled: 1, - }, - patterns: { - asyncStyle: "async/await" as const, - errorHandling: "try/catch" as const, - namingConvention: "camelCase" as const, - evidence: { asyncStyle: [], errorHandling: [], namingConvention: [] }, - fileCounts: { - asyncAwait: 0, - promises: 0, - callbacks: 0, - tryCatch: 0, - errorCallbacks: 0, - resultTypes: 0, - }, - }, - sampledFiles: [], - }); - - const priorContextBrief = formatPriorContextBrief({ - decisions: { byScope: new Map(), totalCount: 0 }, - requirements: { active: [], validated: [], deferred: [], totalCount: 0 }, - knowledge: "No prior knowledge recorded.", - summaries: "No prior milestone summaries.", - }); - - const ecosystemBrief = formatEcosystemBrief({ - available: false, - queries: [], - findings: [], - skippedReason: "Preparation disabled", - }); - - // Should not throw when injecting formatted briefs - const result = loadPrompt("discuss-prepared", { - preamble: "Test preamble", - codebaseBrief, - priorContextBrief, - ecosystemBrief, - milestoneId: "M001", - contextPath: ".gsd/milestones/M001/M001-CONTEXT.md", - roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md", - inlinedTemplates: "Test templates", - commitInstruction: "Do not commit", - multiMilestoneCommitInstruction: "Do not commit", - }); - - assert.ok(result.includes("TypeScript"), "codebase brief should be present"); - assert.ok(result.includes("Prior Decisions"), "prior context brief should be present"); - // formatEcosystemBrief now returns a fixed message about research during discussion - assert.ok(result.includes("during the discussion"), "ecosystem brief should be present"); - }); -}); diff --git a/src/resources/extensions/gsd/tests/prompt-contracts.test.ts b/src/resources/extensions/gsd/tests/prompt-contracts.test.ts index 5b64d4cd6..7124ec494 100644 --- a/src/resources/extensions/gsd/tests/prompt-contracts.test.ts +++ b/src/resources/extensions/gsd/tests/prompt-contracts.test.ts @@ -59,12 +59,6 @@ test("discuss prompt allows implementation questions when they materially matter assert.doesNotMatch(prompt, /Questions must be about the experience, not the implementation/i); }); -test("discuss-prepared prompt enforces round-by-round user turn taking", () => { - const prompt = readPrompt("discuss-prepared"); - assert.match(prompt, /Each round is multi-turn: run one round, then wait for the user's response before starting the next round\./i); - assert.match(prompt, /Never fabricate or simulate user input while moving through layers/i); -}); - test("guided discussion prompts avoid wrap-up prompts after every round", () => { const milestonePrompt = readPrompt("guided-discuss-milestone"); const slicePrompt = readPrompt("guided-discuss-slice"); diff --git a/src/resources/extensions/gsd/tests/provider-errors.test.ts b/src/resources/extensions/gsd/tests/provider-errors.test.ts index 34c4ed824..44670b678 100644 --- a/src/resources/extensions/gsd/tests/provider-errors.test.ts +++ b/src/resources/extensions/gsd/tests/provider-errors.test.ts @@ -101,6 +101,13 @@ test("classifyError detects quota exceeded as permanent", () => { assert.ok(!isTransient(result)); }); +test("classifyError treats plain 'Connection error.' as transient connection failure (#3594)", () => { + const result = classifyError("Connection error."); + assert.ok(isTransient(result)); + assert.equal(result.kind, "connection"); + assert.ok("retryAfterMs" in result && result.retryAfterMs === 15_000); +}); + test("classifyError treats unknown error as not transient", () => { const result = classifyError("something went wrong"); assert.ok(!isTransient(result)); diff --git a/src/resources/extensions/gsd/tests/resource-loader-import-path.test.ts b/src/resources/extensions/gsd/tests/resource-loader-import-path.test.ts new file mode 100644 index 000000000..0908d12d6 --- /dev/null +++ b/src/resources/extensions/gsd/tests/resource-loader-import-path.test.ts @@ -0,0 +1,38 @@ +// GSD2 — Regression test for broken resource-loader import path +// Ensures auto.ts imports resource-loader via package resolution, not a +// relative path that breaks when deployed to ~/.gsd/agent/extensions/gsd/. + +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const autoSrc = readFileSync(join(import.meta.dirname, "..", "auto.ts"), "utf-8"); + +describe("resource-loader import path", () => { + test("must not use relative import reaching above extensions/", () => { + // The old broken pattern: import("../../../" + "resource-loader.js") + // This resolves to ~/.gsd/resource-loader.js from deployed location, which + // doesn't exist. Regression introduced in #3899. + const brokenPattern = /import\(\s*["']\.\.\/\.\.\/\.\..*resource-loader/; + assert.ok( + !brokenPattern.test(autoSrc), + "auto.ts must not import resource-loader via relative path above extensions/ — " + + "breaks when deployed to ~/.gsd/agent/extensions/gsd/ (see #3899)", + ); + }); + + test("uses GSD_PKG_ROOT to resolve resource-loader from package root", () => { + // The fix uses GSD_PKG_ROOT (set by loader.ts) to construct an absolute + // file URL to dist/resource-loader.js — works in both source and deployed, + // and on Windows where raw paths fail with ERR_UNSUPPORTED_ESM_URL_SCHEME. + assert.ok( + autoSrc.includes('process.env.GSD_PKG_ROOT'), + "auto.ts should use GSD_PKG_ROOT to resolve resource-loader", + ); + assert.ok( + autoSrc.includes('pathToFileURL'), + "auto.ts should convert path to file URL for cross-platform import()", + ); + }); +}); diff --git a/src/resources/extensions/gsd/tests/secure-env-collect.test.ts b/src/resources/extensions/gsd/tests/secure-env-collect.test.ts index 18acf7dd4..3e1a5e109 100644 --- a/src/resources/extensions/gsd/tests/secure-env-collect.test.ts +++ b/src/resources/extensions/gsd/tests/secure-env-collect.test.ts @@ -317,3 +317,48 @@ test("secure_env_collect #2997: null from ctx.ui.custom() is still treated as sk "Key returning null must NOT be in applied list", ); }); + +test("secure_env_collect: falls back to secure input prompt when custom UI is unavailable", async (t) => { + const { collectSecretsFromManifest } = await loadOrchestrator(); + + const tmp = makeTempDir("sec-input-fallback-test"); + t.after(() => { + rmSync(tmp, { recursive: true, force: true }); + }); + + const manifest = makeManifest([ + { key: "SECRET_FROM_INPUT_FALLBACK", status: "pending", formatHint: "starts with sk-" }, + ]); + await writeManifestFile(tmp, manifest); + + let callIndex = 0; + const inputCalls: Array<{ title: string; placeholder?: string; opts?: { secure?: boolean } }> = []; + const mockCtx = { + cwd: tmp, + hasUI: true, + ui: { + custom: async (_factory: any) => { + callIndex++; + if (callIndex <= 1) return null; // summary screen dismiss + return undefined; // collect screen unavailable on this surface + }, + input: async (title: string, placeholder?: string, opts?: { secure?: boolean }) => { + inputCalls.push({ title, placeholder, opts }); + return " sk-test-fallback-value "; + }, + }, + }; + + const result = await collectSecretsFromManifest(tmp, "M001", mockCtx as any); + + assert.ok( + result.applied.includes("SECRET_FROM_INPUT_FALLBACK"), + "Fallback input should collect and apply the key", + ); + assert.ok( + !result.skipped.includes("SECRET_FROM_INPUT_FALLBACK"), + "Fallback input should not mark the key as skipped", + ); + assert.equal(inputCalls.length, 1, "Fallback input should be requested once"); + assert.equal(inputCalls[0]?.opts?.secure, true, "Fallback input should request secure entry when supported"); +}); diff --git a/src/resources/extensions/gsd/tests/tool-compatibility.test.ts b/src/resources/extensions/gsd/tests/tool-compatibility.test.ts new file mode 100644 index 000000000..6b533bf63 --- /dev/null +++ b/src/resources/extensions/gsd/tests/tool-compatibility.test.ts @@ -0,0 +1,199 @@ +// GSD-2 — Tool Compatibility + Model Router Tool Filtering Tests (ADR-005 Phases 2-3) +import { describe, test, beforeEach } from "node:test"; +import assert from "node:assert/strict"; + +import { + registerToolCompatibility, + getToolCompatibility, + getAllToolCompatibility, + registerMcpToolCompatibility, + resetToolCompatibilityRegistry, +} from "@gsd/pi-coding-agent"; + +import { + isToolCompatibleWithProvider, + filterToolsForProvider, + adjustToolSet, +} from "../model-router.js"; + +import { + getProviderCapabilities, +} from "@gsd/pi-ai"; + +// ─── Tool Compatibility Registry ──────────────────────────────────────────── + +describe("tool compatibility registry", () => { + beforeEach(() => { + resetToolCompatibilityRegistry(); + }); + + test("built-in tools are pre-registered", () => { + const builtins = ["bash", "read", "write", "edit", "grep", "find", "ls", "lsp"]; + for (const name of builtins) { + const compat = getToolCompatibility(name); + assert.ok(compat !== undefined, `${name} should be pre-registered`); + } + }); + + test("unknown tool returns undefined", () => { + assert.equal(getToolCompatibility("nonexistent_tool_xyz"), undefined); + }); + + test("registerToolCompatibility stores and retrieves metadata", () => { + registerToolCompatibility("screenshot_tool", { + producesImages: true, + minCapabilityTier: "standard", + }); + const compat = getToolCompatibility("screenshot_tool"); + assert.ok(compat); + assert.equal(compat.producesImages, true); + assert.equal(compat.minCapabilityTier, "standard"); + }); + + test("registerMcpToolCompatibility sets default schema features", () => { + registerMcpToolCompatibility("mcp__test__tool"); + const compat = getToolCompatibility("mcp__test__tool"); + assert.ok(compat); + assert.ok(compat.schemaFeatures?.includes("patternProperties")); + }); + + test("registerMcpToolCompatibility allows overrides", () => { + registerMcpToolCompatibility("mcp__test__override", { producesImages: true }); + const compat = getToolCompatibility("mcp__test__override"); + assert.ok(compat); + assert.equal(compat.producesImages, true); + assert.ok(compat.schemaFeatures?.includes("patternProperties")); + }); + + test("getAllToolCompatibility returns all entries", () => { + const all = getAllToolCompatibility(); + assert.ok(all.size >= 10); // at least built-in tools + assert.ok(all.has("bash")); + assert.ok(all.has("read")); + }); + + test("resetToolCompatibilityRegistry clears custom entries but keeps builtins", () => { + registerToolCompatibility("custom_tool", { producesImages: true }); + assert.ok(getToolCompatibility("custom_tool")); + resetToolCompatibilityRegistry(); + assert.equal(getToolCompatibility("custom_tool"), undefined); + assert.ok(getToolCompatibility("bash")); // built-in preserved + }); +}); + +// ─── isToolCompatibleWithProvider ─────────────────────────────────────────── + +describe("isToolCompatibleWithProvider", () => { + beforeEach(() => { + resetToolCompatibilityRegistry(); + }); + + test("tool without compatibility metadata is always compatible", () => { + const caps = getProviderCapabilities("anthropic-messages"); + assert.equal(isToolCompatibleWithProvider("unknown_tool", caps), true); + }); + + test("built-in tools are compatible with all providers", () => { + const providers = ["anthropic-messages", "openai-responses", "google-generative-ai", "mistral-conversations"]; + const tools = ["bash", "read", "write", "edit"]; + for (const api of providers) { + const caps = getProviderCapabilities(api); + for (const tool of tools) { + assert.equal( + isToolCompatibleWithProvider(tool, caps), + true, + `${tool} should be compatible with ${api}`, + ); + } + } + }); + + test("image-producing tool filtered for providers without image support", () => { + registerToolCompatibility("screenshot", { producesImages: true }); + const openaiCaps = getProviderCapabilities("openai-responses"); + assert.equal(isToolCompatibleWithProvider("screenshot", openaiCaps), false); + + const anthropicCaps = getProviderCapabilities("anthropic-messages"); + assert.equal(isToolCompatibleWithProvider("screenshot", anthropicCaps), true); + }); + + test("tool with unsupported schema features filtered for Google", () => { + registerToolCompatibility("complex_schema_tool", { + schemaFeatures: ["patternProperties"], + }); + const googleCaps = getProviderCapabilities("google-generative-ai"); + assert.equal(isToolCompatibleWithProvider("complex_schema_tool", googleCaps), false); + + const anthropicCaps = getProviderCapabilities("anthropic-messages"); + assert.equal(isToolCompatibleWithProvider("complex_schema_tool", anthropicCaps), true); + }); +}); + +// ─── filterToolsForProvider ───────────────────────────────────────────────── + +describe("filterToolsForProvider", () => { + beforeEach(() => { + resetToolCompatibilityRegistry(); + }); + + test("all built-in tools pass for any provider", () => { + const toolNames = ["bash", "read", "write", "edit", "grep", "find", "ls"]; + const { compatible, filtered } = filterToolsForProvider(toolNames, "mistral-conversations"); + assert.deepEqual(compatible, toolNames); + assert.deepEqual(filtered, []); + }); + + test("image tool filtered for OpenAI Responses", () => { + registerToolCompatibility("browser_screenshot", { producesImages: true }); + const toolNames = ["bash", "read", "browser_screenshot"]; + const { compatible, filtered } = filterToolsForProvider(toolNames, "openai-responses"); + assert.deepEqual(compatible, ["bash", "read"]); + assert.deepEqual(filtered, ["browser_screenshot"]); + }); + + test("MCP tool with patternProperties filtered for Google", () => { + registerMcpToolCompatibility("mcp__repowise__search"); + const toolNames = ["bash", "read", "mcp__repowise__search"]; + const { compatible, filtered } = filterToolsForProvider(toolNames, "google-generative-ai"); + assert.deepEqual(compatible, ["bash", "read"]); + assert.deepEqual(filtered, ["mcp__repowise__search"]); + }); + + test("unknown provider passes all tools (permissive default)", () => { + registerToolCompatibility("image_tool", { producesImages: true }); + registerMcpToolCompatibility("mcp_tool"); + const toolNames = ["bash", "image_tool", "mcp_tool"]; + const { compatible, filtered } = filterToolsForProvider(toolNames, "unknown-provider-xyz"); + assert.deepEqual(compatible, toolNames); + assert.deepEqual(filtered, []); + }); +}); + +// ─── adjustToolSet ────────────────────────────────────────────────────────── + +describe("adjustToolSet", () => { + beforeEach(() => { + resetToolCompatibilityRegistry(); + }); + + test("returns all tools for Anthropic (most permissive)", () => { + registerToolCompatibility("screenshot", { producesImages: true }); + const toolNames = ["bash", "read", "screenshot"]; + const { toolNames: result, removedTools } = adjustToolSet(toolNames, "anthropic-messages"); + assert.deepEqual(result, toolNames); + assert.deepEqual(removedTools, []); + }); + + test("removes incompatible tools and reports them", () => { + registerToolCompatibility("screenshot", { producesImages: true }); + registerMcpToolCompatibility("mcp_complex"); + const toolNames = ["bash", "read", "screenshot", "mcp_complex"]; + const { toolNames: result, removedTools } = adjustToolSet(toolNames, "google-generative-ai"); + // Google supports images but not patternProperties + assert.ok(result.includes("bash")); + assert.ok(result.includes("read")); + assert.ok(result.includes("screenshot")); // Google supports images + assert.ok(!result.includes("mcp_complex")); // patternProperties not supported + assert.deepEqual(removedTools, ["mcp_complex"]); + }); +}); diff --git a/src/resources/extensions/gsd/tests/tool-invocation-error-loop-break.test.ts b/src/resources/extensions/gsd/tests/tool-invocation-error-loop-break.test.ts index 5a2cdfa58..c802e91a5 100644 --- a/src/resources/extensions/gsd/tests/tool-invocation-error-loop-break.test.ts +++ b/src/resources/extensions/gsd/tests/tool-invocation-error-loop-break.test.ts @@ -61,6 +61,13 @@ describe("#2883: isToolInvocationError classification", () => { ); }); + test("detects Node v18+ JSON parse variant with property-value text", () => { + assert.equal( + isToolInvocationError("Expected ',' or '}' after property value in JSON at position 4096"), + true, + ); + }); + test("detects Unexpected end of JSON input", () => { assert.equal( isToolInvocationError("Unexpected end of JSON input"), diff --git a/src/resources/extensions/gsd/tests/validate-directory.test.ts b/src/resources/extensions/gsd/tests/validate-directory.test.ts index 72c45be38..c86e08a80 100644 --- a/src/resources/extensions/gsd/tests/validate-directory.test.ts +++ b/src/resources/extensions/gsd/tests/validate-directory.test.ts @@ -74,6 +74,27 @@ test("validateDirectory: C:\\Windows is blocked", { skip: !isWindows ? "Windows- assert.equal(result.severity, "blocked"); }); +test("validateDirectory: D:\\Windows is blocked", { skip: !isWindows ? "Windows-only test" : undefined }, () => { + const result = validateDirectory("D:\\Windows"); + assert.equal(result.safe, false); + assert.equal(result.severity, "blocked"); + assert.ok(result.reason?.includes("system directory")); +}); + +test("validateDirectory: E:\\Program Files is blocked", { skip: !isWindows ? "Windows-only test" : undefined }, () => { + const result = validateDirectory("E:\\Program Files"); + assert.equal(result.safe, false); + assert.equal(result.severity, "blocked"); + assert.ok(result.reason?.includes("system directory")); +}); + +test("validateDirectory: any Windows drive root is blocked", { skip: !isWindows ? "Windows-only test" : undefined }, () => { + const result = validateDirectory("D:\\"); + assert.equal(result.safe, false); + assert.equal(result.severity, "blocked"); + assert.ok(result.reason?.includes("system directory")); +}); + // ─── Home directory (cross-platform) ───────────────────────────────────────────── test("validateDirectory: home directory itself is blocked", () => { @@ -104,7 +125,13 @@ test("validateDirectory: subdirectory of home is NOT blocked", () => { // Regression test for #1317: GSD worktree inside $HOME must not be blocked even // when the resolved project root equals $HOME (e.g. home dir is a git repo). test("validateDirectory: GSD worktree path nested under home is NOT blocked (#1317)", () => { + const originalHome = process.env.HOME; + const originalUserProfile = process.env.USERPROFILE; + const fakeHome = makeTempDir("fake-home"); + process.env.HOME = fakeHome; + process.env.USERPROFILE = fakeHome; const worktreePath = join(homedir(), ".gsd", "worktrees", "M001"); + const worktreeRoot = join(fakeHome, ".gsd", "worktrees", "M001"); mkdirSync(worktreePath, { recursive: true }); try { // The worktree CWD itself is a valid location — it must pass. @@ -112,7 +139,12 @@ test("validateDirectory: GSD worktree path nested under home is NOT blocked (#13 assert.equal(result.safe, true, "GSD worktree path should be safe to run in"); assert.equal(result.severity, "ok"); } finally { - rmSync(join(homedir(), ".gsd", "worktrees", "M001"), { recursive: true, force: true }); + if (originalHome === undefined) delete process.env.HOME; + else process.env.HOME = originalHome; + if (originalUserProfile === undefined) delete process.env.USERPROFILE; + else process.env.USERPROFILE = originalUserProfile; + rmSync(worktreeRoot, { recursive: true, force: true }); + rmSync(fakeHome, { recursive: true, force: true }); } }); diff --git a/src/resources/extensions/gsd/tests/validate-milestone-prompt-verification-classes.test.ts b/src/resources/extensions/gsd/tests/validate-milestone-prompt-verification-classes.test.ts new file mode 100644 index 000000000..df08568f3 --- /dev/null +++ b/src/resources/extensions/gsd/tests/validate-milestone-prompt-verification-classes.test.ts @@ -0,0 +1,18 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const promptPath = join(process.cwd(), "src/resources/extensions/gsd/prompts/validate-milestone.md"); +const prompt = readFileSync(promptPath, "utf-8"); + +test("validate-milestone reviewer C requires canonical verification class names", () => { + assert.match(prompt, /\*\*Reviewer C[\s\S]*Verification Classes/i); + assert.match(prompt, /exact class names [`']?Contract[`']?, [`']?Integration[`']?, [`']?Operational[`']?, and [`']?UAT[`']?/i); + assert.match(prompt, /If no verification classes were planned, say that explicitly/i); +}); + +test("validate-milestone prompt routes verification class analysis into verificationClasses", () => { + assert.match(prompt, /pass it in `verificationClasses`/i); + assert.match(prompt, /Extract the `Verification Classes` subsection from Reviewer C and pass it verbatim in `verificationClasses`/); +}); diff --git a/src/resources/extensions/gsd/tests/validate-milestone.test.ts b/src/resources/extensions/gsd/tests/validate-milestone.test.ts index 569abd796..7ba062229 100644 --- a/src/resources/extensions/gsd/tests/validate-milestone.test.ts +++ b/src/resources/extensions/gsd/tests/validate-milestone.test.ts @@ -9,10 +9,11 @@ import { deriveState, isValidationTerminal } from "../state.ts"; import { resolveExpectedArtifactPath, diagnoseExpectedArtifact } from "../auto-artifact-paths.ts"; import { verifyExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.ts"; import { resolveDispatch, type DispatchContext } from "../auto-dispatch.ts"; -import { buildValidateMilestonePrompt } from "../auto-prompts.ts"; +import { buildCompleteMilestonePrompt, buildValidateMilestonePrompt } from "../auto-prompts.ts"; import type { GSDState } from "../types.ts"; import { clearPathCache } from "../paths.ts"; import { clearParseCache } from "../files.ts"; +import { closeDatabase, insertMilestone, insertSlice, openDatabase } from "../gsd-db.ts"; // ─── Helpers ────────────────────────────────────────────────────────────── @@ -25,9 +26,15 @@ function makeTmpBase(): string { function cleanup(base: string): void { clearPathCache(); clearParseCache(); + closeDatabase(); try { rmSync(base, { recursive: true, force: true }); } catch { /* */ } } +function openTestDb(base: string): void { + const dbPath = join(base, ".gsd", "gsd.db"); + assert.equal(openDatabase(dbPath), true, "test DB should open"); +} + function writeRoadmap(base: string, mid: string, content: string): void { const dir = join(base, ".gsd", "milestones", mid); mkdirSync(dir, { recursive: true }); @@ -218,6 +225,85 @@ test("buildValidateMilestonePrompt inlines ASSESSMENT evidence instead of UAT sp } }); +test("buildCompleteMilestonePrompt skips skipped slices from DB-backed summary inlining", async () => { + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", `# M001: Test Milestone + +## Vision +Test + +## Success Criteria +- It works + +## Slices + +- [x] **S01: First slice** \`risk:low\` \`depends:[]\` + > Done +- [ ] **S02: Skipped slice** \`risk:low\` \`depends:[]\` + > Intentionally skipped + +## Boundary Map + +| From | To | Produces | Consumes | +|------|-----|----------|----------| +| S01 | terminal | output | nothing | +`); + openTestDb(base); + insertMilestone({ id: "M001", title: "Test Milestone", status: "active" }); + insertSlice({ id: "S01", milestoneId: "M001", title: "First slice", status: "complete", depends: [], sequence: 1 }); + insertSlice({ id: "S02", milestoneId: "M001", title: "Skipped slice", status: "skipped", depends: [], sequence: 2 }); + writeSliceSummary(base, "M001", "S01", "# S01 Summary\nDelivered."); + + const prompt = await buildCompleteMilestonePrompt("M001", "Test Milestone", base); + assert.match(prompt, /S01 Summary/i, "prompt should inline non-skipped slice summaries"); + assert.doesNotMatch(prompt, /### S02 Summary/i, "prompt should not inline skipped slice summaries"); + assert.doesNotMatch(prompt, /not found — file does not exist yet/i, "prompt should not emit skipped-slice missing-file placeholders"); + } finally { + cleanup(base); + } +}); + +test("buildValidateMilestonePrompt skips skipped slices from DB-backed summary inlining", async () => { + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", `# M001: Test Milestone + +## Vision +Test + +## Success Criteria +- It works + +## Slices + +- [x] **S01: First slice** \`risk:low\` \`depends:[]\` + > Done +- [ ] **S02: Skipped slice** \`risk:low\` \`depends:[]\` + > Intentionally skipped + +## Boundary Map + +| From | To | Produces | Consumes | +|------|-----|----------|----------| +| S01 | terminal | output | nothing | +`); + openTestDb(base); + insertMilestone({ id: "M001", title: "Test Milestone", status: "active" }); + insertSlice({ id: "S01", milestoneId: "M001", title: "First slice", status: "complete", depends: [], sequence: 1 }); + insertSlice({ id: "S02", milestoneId: "M001", title: "Skipped slice", status: "skipped", depends: [], sequence: 2 }); + writeSliceSummary(base, "M001", "S01", "# S01 Summary\nDelivered."); + writeSliceAssessment(base, "M001", "S01", "---\nverdict: PASS\n---\n# Assessment\nEvidence captured."); + + const prompt = await buildValidateMilestonePrompt("M001", "Test Milestone", base); + assert.match(prompt, /S01 Summary/i, "prompt should inline non-skipped slice summaries"); + assert.doesNotMatch(prompt, /### S02 Summary/i, "prompt should not inline skipped slice summaries"); + assert.doesNotMatch(prompt, /not found — file does not exist yet/i, "prompt should not emit skipped-slice missing-file placeholders"); + } finally { + cleanup(base); + } +}); + // ─── Dispatch rule ──────────────────────────────────────────────────────── test("dispatch rule matches validating-milestone phase", async () => { diff --git a/src/resources/extensions/gsd/tests/workflow-mcp-auto-prep.test.ts b/src/resources/extensions/gsd/tests/workflow-mcp-auto-prep.test.ts new file mode 100644 index 000000000..fabb15c3a --- /dev/null +++ b/src/resources/extensions/gsd/tests/workflow-mcp-auto-prep.test.ts @@ -0,0 +1,76 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { prepareWorkflowMcpForProject, shouldAutoPrepareWorkflowMcp } from "../workflow-mcp-auto-prep.ts"; + +test("shouldAutoPrepareWorkflowMcp enables prep for externalCli local transport", () => { + const result = shouldAutoPrepareWorkflowMcp({ + model: { provider: "claude-code", baseUrl: "local://claude-code" }, + modelRegistry: { + getProviderAuthMode: () => "externalCli", + isProviderRequestReady: () => false, + }, + }); + + assert.equal(result, true); +}); + +test("shouldAutoPrepareWorkflowMcp enables prep when claude-code provider is ready", () => { + const result = shouldAutoPrepareWorkflowMcp({ + model: { provider: "openai", baseUrl: "https://api.openai.com" }, + modelRegistry: { + getProviderAuthMode: () => "apiKey", + isProviderRequestReady: (provider: string) => provider === "claude-code", + }, + }); + + assert.equal(result, true); +}); + +test("shouldAutoPrepareWorkflowMcp enables prep when claude-code provider is registered", () => { + const result = shouldAutoPrepareWorkflowMcp({ + model: { provider: "openai", baseUrl: "https://api.openai.com" }, + modelRegistry: { + getProviderAuthMode: (provider: string) => provider === "claude-code" ? "externalCli" : "apiKey", + isProviderRequestReady: () => false, + }, + }); + + assert.equal(result, true); +}); + +test("shouldAutoPrepareWorkflowMcp stays disabled when neither transport nor provider readiness match", () => { + const result = shouldAutoPrepareWorkflowMcp({ + model: { provider: "openai", baseUrl: "https://api.openai.com" }, + modelRegistry: { + getProviderAuthMode: () => "apiKey", + isProviderRequestReady: () => false, + }, + }); + + assert.equal(result, false); +}); + +test("prepareWorkflowMcpForProject warns with /gsd mcp init guidance when prep fails", () => { + const notifications: Array<{ message: string; level: "info" | "warning" | "error" | "success" }> = []; + const result = prepareWorkflowMcpForProject( + { + model: { provider: "claude-code", baseUrl: "local://claude-code" }, + modelRegistry: { + getProviderAuthMode: () => "externalCli", + isProviderRequestReady: () => true, + }, + ui: { + notify: (message: string, level?: "info" | "warning" | "error" | "success") => { + notifications.push({ message, level: level ?? "info" }); + }, + }, + }, + "/", + ); + + assert.equal(result, null); + assert.equal(notifications.length, 1); + assert.equal(notifications[0].level, "warning"); + assert.match(notifications[0].message, /Please run \/gsd mcp init \./); +}); diff --git a/src/resources/extensions/gsd/tests/workflow-mcp.test.ts b/src/resources/extensions/gsd/tests/workflow-mcp.test.ts index fb91a1b94..2b9687623 100644 --- a/src/resources/extensions/gsd/tests/workflow-mcp.test.ts +++ b/src/resources/extensions/gsd/tests/workflow-mcp.test.ts @@ -6,6 +6,7 @@ import { tmpdir } from "node:os"; import { fileURLToPath } from "node:url"; import { Client } from "@modelcontextprotocol/sdk/client/index.js"; import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; +import { ElicitRequestSchema } from "@modelcontextprotocol/sdk/types.js"; import { buildWorkflowMcpServers, @@ -13,16 +14,27 @@ import { getWorkflowTransportSupportError, getRequiredWorkflowToolsForAutoUnit, getRequiredWorkflowToolsForGuidedUnit, + supportsStructuredQuestions, usesWorkflowMcpTransport, } from "../workflow-mcp.ts"; const __dirname = dirname(fileURLToPath(import.meta.url)); const gsdDir = join(__dirname, ".."); +type ElicitPayload = { + message: string; + requestedSchema: { properties: Record; required?: string[] }; +}; + function readSrc(file: string): string { return readFileSync(join(gsdDir, file), "utf-8"); } +function extractElicitPayload(request: unknown): ElicitPayload { + const payload = (request as { params?: unknown }).params ?? request; + return payload as ElicitPayload; +} + test("guided execute-task requires canonical task completion tool", () => { assert.deepEqual(getRequiredWorkflowToolsForGuidedUnit("execute-task"), ["gsd_task_complete"]); }); @@ -141,7 +153,11 @@ test("detectWorkflowMcpLaunchConfig resolves the bundled server relative to the assert.match(launch?.env?.GSD_WORKFLOW_EXECUTORS_MODULE ?? "", /workflow-tool-executors\.(js|ts)$/); assert.match(launch?.env?.GSD_WORKFLOW_WRITE_GATE_MODULE ?? "", /write-gate\.(js|ts)$/); assert.equal(typeof launch?.args?.[0], "string"); - assert.match(launch?.args?.[0] ?? "", /packages[\/\\]mcp-server[\/\\]dist[\/\\]cli\.js$/); + assert.match(launch?.args?.[0] ?? "", /packages[\/\\]mcp-server[\/\\](dist[\/\\]cli\.js|src[\/\\]cli\.ts)$/); + if ((launch?.args?.[0] ?? "").endsWith(".ts")) { + assert.match(launch?.env?.NODE_OPTIONS ?? "", /--experimental-strip-types/); + assert.match(launch?.env?.NODE_OPTIONS ?? "", /resolve-ts\.mjs/); + } }); test("detectWorkflowMcpLaunchConfig resolves the bundled server relative to the package without env hints", () => { @@ -154,7 +170,11 @@ test("detectWorkflowMcpLaunchConfig resolves the bundled server relative to the assert.match(launch?.env?.GSD_WORKFLOW_EXECUTORS_MODULE ?? "", /workflow-tool-executors\.(js|ts)$/); assert.match(launch?.env?.GSD_WORKFLOW_WRITE_GATE_MODULE ?? "", /write-gate\.(js|ts)$/); assert.equal(typeof launch?.args?.[0], "string"); - assert.match(launch?.args?.[0] ?? "", /packages[\/\\]mcp-server[\/\\]dist[\/\\]cli\.js$/); + assert.match(launch?.args?.[0] ?? "", /packages[\/\\]mcp-server[\/\\](dist[\/\\]cli\.js|src[\/\\]cli\.ts)$/); + if ((launch?.args?.[0] ?? "").endsWith(".ts")) { + assert.match(launch?.env?.NODE_OPTIONS ?? "", /--experimental-strip-types/); + assert.match(launch?.env?.NODE_OPTIONS ?? "", /resolve-ts\.mjs/); + } }); test("workflow MCP launch config reaches mutation tools over stdio", async () => { @@ -165,14 +185,37 @@ test("workflow MCP launch config reaches mutation tools over stdio", async () => assert.ok(launch, "expected a workflow MCP launch config"); assert.match( launch.env?.GSD_WORKFLOW_EXECUTORS_MODULE ?? "", - /dist[\/\\]resources[\/\\]extensions[\/\\]gsd[\/\\]tools[\/\\]workflow-tool-executors\.js$/, + /(dist[\/\\]resources[\/\\]extensions[\/\\]gsd[\/\\]tools[\/\\]workflow-tool-executors\.js|src[\/\\]resources[\/\\]extensions[\/\\]gsd[\/\\]tools[\/\\]workflow-tool-executors\.(js|ts))$/, ); assert.match( launch.env?.GSD_WORKFLOW_WRITE_GATE_MODULE ?? "", - /dist[\/\\]resources[\/\\]extensions[\/\\]gsd[\/\\]bootstrap[\/\\]write-gate\.js$/, + /(dist[\/\\]resources[\/\\]extensions[\/\\]gsd[\/\\]bootstrap[\/\\]write-gate\.js|src[\/\\]resources[\/\\]extensions[\/\\]gsd[\/\\]bootstrap[\/\\]write-gate\.(js|ts))$/, ); + if ((launch.env?.GSD_WORKFLOW_EXECUTORS_MODULE ?? "").endsWith(".ts")) { + assert.match(launch.env?.NODE_OPTIONS ?? "", /--experimental-strip-types/); + assert.match(launch.env?.NODE_OPTIONS ?? "", /resolve-ts\.mjs/); + } - const client = new Client({ name: "workflow-mcp-transport-test", version: "1.0.0" }); + const client = new Client( + { name: "workflow-mcp-transport-test", version: "1.0.0" }, + { capabilities: { elicitation: {} } }, + ); + client.setRequestHandler(ElicitRequestSchema, async (request) => { + const elicitation = extractElicitPayload(request as unknown); + + assert.match(elicitation.message, /Please answer the following question/); + assert.ok(elicitation.requestedSchema.properties.transport_mode); + assert.ok(elicitation.requestedSchema.properties["transport_mode__note"]); + assert.ok(elicitation.requestedSchema.required?.includes("transport_mode")); + + return { + action: "accept", + content: { + transport_mode: "None of the above", + transport_mode__note: "Need Windows-safe MCP elicitation.", + }, + }; + }); const transport = new StdioClientTransport({ command: launch.command, args: launch.args, @@ -189,6 +232,42 @@ test("workflow MCP launch config reaches mutation tools over stdio", async () => (tools.tools ?? []).some((tool) => tool.name === "gsd_plan_slice"), "expected workflow MCP surface to expose gsd_plan_slice", ); + assert.ok( + (tools.tools ?? []).some((tool) => tool.name === "ask_user_questions"), + "expected workflow MCP surface to expose ask_user_questions", + ); + + const askResult = await client.callTool( + { + name: "ask_user_questions", + arguments: { + questions: [ + { + id: "transport_mode", + header: "Transport", + question: "How should the workflow prompt be delivered?", + options: [ + { label: "Local UI", description: "Use the host tool UI." }, + { label: "Remote UI", description: "Use a remote response channel." }, + ], + }, + ], + }, + }, + undefined, + { timeout: 30_000 }, + ); + assert.equal(askResult.isError, undefined); + assert.equal( + ((askResult.content as Array<{ text?: string }>)?.[0])?.text ?? "", + JSON.stringify({ + answers: { + transport_mode: { + answers: ["None of the above", "user_note: Need Windows-safe MCP elicitation."], + }, + }, + }), + ); const milestoneResult = await client.callTool( { @@ -269,12 +348,123 @@ test("workflow MCP launch config reaches mutation tools over stdio", async () => } }); +test("workflow MCP ask_user_questions uses stdio elicitation round-trip", async () => { + const projectRoot = mkdtempSync(join(tmpdir(), "gsd-workflow-elicit-")); + mkdirSync(join(projectRoot, ".gsd"), { recursive: true }); + + const launch = detectWorkflowMcpLaunchConfig(projectRoot, {}); + assert.ok(launch, "expected a workflow MCP launch config"); + + const client = new Client( + { name: "workflow-mcp-elicit-test", version: "1.0.0" }, + { capabilities: { elicitation: {} } }, + ); + let requestSeen: { + message: string; + requestedSchema: { properties: Record; required?: string[] }; + } | null = null; + + client.setRequestHandler(ElicitRequestSchema, async (request) => { + const params = extractElicitPayload(request as unknown); + + requestSeen = params; + + return { + action: "accept", + content: { + deployment: "None of the above", + deployment__note: "Need hybrid deployment.", + }, + }; + }); + + const transport = new StdioClientTransport({ + command: launch.command, + args: launch.args, + env: { ...process.env, ...launch.env } as Record, + cwd: launch.cwd, + stderr: "pipe", + }); + + try { + await client.connect(transport, { timeout: 30_000 }); + + const result = await client.callTool( + { + name: "ask_user_questions", + arguments: { + questions: [ + { + id: "deployment", + header: "Deploy", + question: "Where will this run?", + options: [ + { label: "Cloud", description: "Managed hosting." }, + { label: "On-prem", description: "Runs in customer infrastructure." }, + ], + }, + ], + }, + }, + undefined, + { timeout: 30_000 }, + ); + + assert.ok(requestSeen, "expected stdio transport to forward an elicitation request"); + const seen = requestSeen as ElicitPayload; + assert.match(seen.message, /Please answer the following question/); + assert.ok(seen.requestedSchema.properties.deployment); + assert.ok(seen.requestedSchema.properties.deployment__note); + assert.ok(seen.requestedSchema.required?.includes("deployment")); + + const content = (result as { content: Array<{ type: string; text?: string }> }).content; + const text = content.find((item: { type: string; text?: string }) => item.type === "text"); + assert.ok(text && "text" in text); + assert.equal( + text.text, + JSON.stringify({ + answers: { + deployment: { + answers: ["None of the above", "user_note: Need hybrid deployment."], + }, + }, + }), + ); + } finally { + await client.close(); + } +}); + test("usesWorkflowMcpTransport matches local externalCli providers", () => { assert.equal(usesWorkflowMcpTransport("externalCli", "local://claude-code"), true); assert.equal(usesWorkflowMcpTransport("externalCli", "https://api.example.com"), false); assert.equal(usesWorkflowMcpTransport("oauth", "local://custom"), false); }); +test("supportsStructuredQuestions disables structured ask flow on workflow MCP transports", () => { + assert.equal( + supportsStructuredQuestions(["ask_user_questions"], { + authMode: "externalCli", + baseUrl: "local://claude-code", + }), + false, + ); + assert.equal( + supportsStructuredQuestions(["ask_user_questions"], { + authMode: "oauth", + baseUrl: "https://api.anthropic.com", + }), + true, + ); + assert.equal( + supportsStructuredQuestions([], { + authMode: "oauth", + baseUrl: "https://api.anthropic.com", + }), + false, + ); +}); + test("transport compatibility passes when required tools fit current MCP surface", () => { const error = getWorkflowTransportSupportError( "claude-code", @@ -465,18 +655,18 @@ test("transport compatibility now allows replan-slice over workflow MCP surface" test("transport compatibility still blocks units whose MCP tools are not exposed", () => { const error = getWorkflowTransportSupportError( "claude-code", - ["gsd_skip_slice"], + ["secure_env_collect"], { projectRoot: "/tmp/project", env: { GSD_WORKFLOW_MCP_COMMAND: "node" }, surface: "auto-mode", - unitType: "skip-slice", + unitType: "guided-discussion", authMode: "externalCli", baseUrl: "local://claude-code", }, ); - assert.match(error ?? "", /requires gsd_skip_slice/); + assert.match(error ?? "", /requires secure_env_collect/); assert.match(error ?? "", /currently exposes only/); }); @@ -498,3 +688,8 @@ test("auto phases source enforces workflow compatibility preflight", () => { assert.match(src, /getWorkflowTransportSupportError/); assert.match(src, /workflow-capability/); }); + +test("workflow transport error guidance includes /gsd mcp init hint", () => { + const src = readSrc("workflow-mcp.ts"); + assert.match(src, /Please run \/gsd mcp init \./); +}); diff --git a/src/resources/extensions/gsd/tests/workflow-tool-executors.test.ts b/src/resources/extensions/gsd/tests/workflow-tool-executors.test.ts index 06c01c419..327f51759 100644 --- a/src/resources/extensions/gsd/tests/workflow-tool-executors.test.ts +++ b/src/resources/extensions/gsd/tests/workflow-tool-executors.test.ts @@ -256,6 +256,28 @@ test("executePlanSlice writes task planning state and rendered plan artifacts", } }); +test("executePlanSlice marks validation failures with isError", async () => { + const base = makeTmpBase(); + try { + openTestDb(base); + + const result = await inProjectDir(base, () => executePlanSlice({ + milestoneId: "M001", + sliceId: "S01", + goal: "Trigger validation failure for empty tasks.", + tasks: [], + }, base)); + + assert.equal(result.isError, true); + assert.equal(result.details.operation, "plan_slice"); + assert.match(String(result.details.error), /validation failed: tasks must be a non-empty array/); + assert.match(result.content[0].text, /Error planning slice:/); + } finally { + closeDatabase(); + cleanup(base); + } +}); + test("executeSliceComplete coerces string enrichment entries and writes summary/UAT artifacts", async () => { const base = makeTmpBase(); try { diff --git a/src/resources/extensions/gsd/tests/write-gate.test.ts b/src/resources/extensions/gsd/tests/write-gate.test.ts index 48c0c5524..04b2c4603 100644 --- a/src/resources/extensions/gsd/tests/write-gate.test.ts +++ b/src/resources/extensions/gsd/tests/write-gate.test.ts @@ -230,16 +230,13 @@ import { // ─── Scenario 19: isGateQuestionId recognizes all gate patterns ── test('write-gate: isGateQuestionId recognizes all gate patterns', () => { - assert.strictEqual(isGateQuestionId('layer1_scope_gate'), true); - assert.strictEqual(isGateQuestionId('layer2_architecture_gate'), true); - assert.strictEqual(isGateQuestionId('layer3_error_gate'), true); - assert.strictEqual(isGateQuestionId('layer4_quality_gate'), true); assert.strictEqual(isGateQuestionId('depth_verification'), true); assert.strictEqual(isGateQuestionId('depth_verification_M002'), true); - assert.strictEqual(isGateQuestionId('my_layer1_scope_gate_question'), true); + assert.strictEqual(isGateQuestionId('depth_verification_confirm'), true); // Non-gate question IDs assert.strictEqual(isGateQuestionId('project_intent'), false); assert.strictEqual(isGateQuestionId('feature_priority'), false); + assert.strictEqual(isGateQuestionId('layer1_scope_gate'), false); assert.strictEqual(isGateQuestionId(''), false); }); @@ -249,14 +246,14 @@ test('write-gate: pending gate lifecycle (set, get, clear)', () => { clearDiscussionFlowState(); assert.strictEqual(getPendingGate(), null, 'starts null'); - setPendingGate('layer1_scope_gate'); - assert.strictEqual(getPendingGate(), 'layer1_scope_gate', 'set correctly'); + setPendingGate('depth_verification'); + assert.strictEqual(getPendingGate(), 'depth_verification', 'set correctly'); clearPendingGate(); assert.strictEqual(getPendingGate(), null, 'cleared correctly'); // clearDiscussionFlowState also clears pending gate - setPendingGate('layer2_architecture_gate'); + setPendingGate('depth_verification_M002'); clearDiscussionFlowState(); assert.strictEqual(getPendingGate(), null, 'clearDiscussionFlowState clears pending gate'); }); @@ -265,12 +262,12 @@ test('write-gate: pending gate lifecycle (set, get, clear)', () => { test('write-gate: shouldBlockPendingGate blocks write/edit during pending gate', () => { clearDiscussionFlowState(); - setPendingGate('layer1_scope_gate'); + setPendingGate('depth_verification'); // write should be blocked during discussion const writeResult = shouldBlockPendingGate('write', 'M001', false); assert.strictEqual(writeResult.block, true, 'write should be blocked'); - assert.ok(writeResult.reason!.includes('layer1_scope_gate'), 'reason mentions the gate'); + assert.ok(writeResult.reason!.includes('depth_verification'), 'reason mentions the gate'); // edit should be blocked const editResult = shouldBlockPendingGate('edit', 'M001', false); @@ -287,7 +284,7 @@ test('write-gate: shouldBlockPendingGate blocks write/edit during pending gate', test('write-gate: shouldBlockPendingGate allows read-only and ask_user_questions during pending gate', () => { clearDiscussionFlowState(); - setPendingGate('layer1_scope_gate'); + setPendingGate('depth_verification'); // ask_user_questions is always safe (model needs to re-ask) assert.strictEqual(shouldBlockPendingGate('ask_user_questions', 'M001').block, false); @@ -304,7 +301,7 @@ test('write-gate: shouldBlockPendingGate allows read-only and ask_user_questions test('write-gate: shouldBlockPendingGate blocks outside discussion when a gate is pending', () => { clearDiscussionFlowState(); - setPendingGate('layer1_scope_gate'); + setPendingGate('depth_verification'); // No milestoneId and no queue phase — still block because the gate is pending const result = shouldBlockPendingGate('write', null, false); @@ -330,7 +327,7 @@ test('write-gate: shouldBlockPendingGate blocks in queue mode when gate is pendi test('write-gate: shouldBlockPendingGateBash allows read-only commands during pending gate', () => { clearDiscussionFlowState(); - setPendingGate('layer2_architecture_gate'); + setPendingGate('depth_verification'); assert.strictEqual(shouldBlockPendingGateBash('cat file.txt', 'M001').block, false); assert.strictEqual(shouldBlockPendingGateBash('git log --oneline', 'M001').block, false); @@ -344,11 +341,11 @@ test('write-gate: shouldBlockPendingGateBash allows read-only commands during pe test('write-gate: shouldBlockPendingGateBash blocks mutating commands during pending gate', () => { clearDiscussionFlowState(); - setPendingGate('layer2_architecture_gate'); + setPendingGate('depth_verification'); const result = shouldBlockPendingGateBash('npm run build', 'M001'); assert.strictEqual(result.block, true, 'mutating bash should be blocked'); - assert.ok(result.reason!.includes('layer2_architecture_gate')); + assert.ok(result.reason!.includes('depth_verification')); clearDiscussionFlowState(); }); @@ -365,7 +362,7 @@ test('write-gate: no pending gate means no blocking', () => { // ─── Scenario 28: resetWriteGateState clears pending gate ── test('write-gate: resetWriteGateState clears pending gate', () => { - setPendingGate('layer3_error_gate'); + setPendingGate('depth_verification'); resetWriteGateState(); assert.strictEqual(getPendingGate(), null); }); diff --git a/src/resources/extensions/gsd/tools/workflow-tool-executors.ts b/src/resources/extensions/gsd/tools/workflow-tool-executors.ts index edc1bfd31..14f179bff 100644 --- a/src/resources/extensions/gsd/tools/workflow-tool-executors.ts +++ b/src/resources/extensions/gsd/tools/workflow-tool-executors.ts @@ -38,6 +38,7 @@ export function isSupportedSummaryArtifactType( export interface ToolExecutionResult { content: Array<{ type: "text"; text: string }>; details: Record; + isError?: boolean; } export interface SummarySaveParams { @@ -57,13 +58,15 @@ export async function executeSummarySave( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot save artifact." }], details: { operation: "save_summary", error: "db_unavailable" }, - }; + isError: true, + }; } if (!isSupportedSummaryArtifactType(params.artifact_type)) { return { content: [{ type: "text", text: `Error: Invalid artifact_type "${params.artifact_type}". Must be one of: ${SUPPORTED_SUMMARY_ARTIFACT_TYPES.join(", ")}` }], details: { operation: "save_summary", error: "invalid_artifact_type" }, - }; + isError: true, + }; } const contextGuard = shouldBlockContextArtifactSaveInSnapshot( loadWriteGateSnapshot(basePath), @@ -75,7 +78,8 @@ export async function executeSummarySave( return { content: [{ type: "text", text: `Error saving artifact: ${contextGuard.reason ?? "context write blocked"}` }], details: { operation: "save_summary", error: "context_write_blocked" }, - }; + isError: true, + }; } try { let relativePath: string; @@ -108,7 +112,8 @@ export async function executeSummarySave( return { content: [{ type: "text", text: `Error saving artifact: ${msg}` }], details: { operation: "save_summary", error: msg }, - }; + isError: true, + }; } } @@ -163,7 +168,8 @@ export async function executeTaskComplete( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot complete task." }], details: { operation: "complete_task", error: "db_unavailable" }, - }; + isError: true, + }; } try { const coerced = { ...params }; @@ -176,6 +182,7 @@ export async function executeTaskComplete( return { content: [{ type: "text", text: `Error completing task: ${result.error}` }], details: { operation: "complete_task", error: result.error }, + isError: true, }; } return { @@ -194,7 +201,8 @@ export async function executeTaskComplete( return { content: [{ type: "text", text: `Error completing task: ${msg}` }], details: { operation: "complete_task", error: msg }, - }; + isError: true, + }; } } @@ -207,7 +215,8 @@ export async function executeSliceComplete( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot complete slice." }], details: { operation: "complete_slice", error: "db_unavailable" }, - }; + isError: true, + }; } try { const splitPair = (s: string): [string, string] => { @@ -257,6 +266,7 @@ export async function executeSliceComplete( return { content: [{ type: "text", text: `Error completing slice: ${result.error}` }], details: { operation: "complete_slice", error: result.error }, + isError: true, }; } return { @@ -275,7 +285,8 @@ export async function executeSliceComplete( return { content: [{ type: "text", text: `Error completing slice: ${msg}` }], details: { operation: "complete_slice", error: msg }, - }; + isError: true, + }; } } @@ -288,7 +299,8 @@ export async function executeCompleteMilestone( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot complete milestone." }], details: { operation: "complete_milestone", error: "db_unavailable" }, - }; + isError: true, + }; } try { const sanitized = sanitizeCompleteMilestoneParams(params); @@ -297,6 +309,7 @@ export async function executeCompleteMilestone( return { content: [{ type: "text", text: `Error completing milestone: ${result.error}` }], details: { operation: "complete_milestone", error: result.error }, + isError: true, }; } return { @@ -313,7 +326,8 @@ export async function executeCompleteMilestone( return { content: [{ type: "text", text: `Error completing milestone: ${msg}` }], details: { operation: "complete_milestone", error: msg }, - }; + isError: true, + }; } } @@ -326,7 +340,8 @@ export async function executeValidateMilestone( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot validate milestone." }], details: { operation: "validate_milestone", error: "db_unavailable" }, - }; + isError: true, + }; } try { const result = await handleValidateMilestone(params, basePath); @@ -334,6 +349,7 @@ export async function executeValidateMilestone( return { content: [{ type: "text", text: `Error validating milestone: ${result.error}` }], details: { operation: "validate_milestone", error: result.error }, + isError: true, }; } return { @@ -351,7 +367,8 @@ export async function executeValidateMilestone( return { content: [{ type: "text", text: `Error validating milestone: ${msg}` }], details: { operation: "validate_milestone", error: msg }, - }; + isError: true, + }; } } @@ -364,7 +381,8 @@ export async function executeReassessRoadmap( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot reassess roadmap." }], details: { operation: "reassess_roadmap", error: "db_unavailable" }, - }; + isError: true, + }; } try { const result = await handleReassessRoadmap(params, basePath); @@ -372,6 +390,7 @@ export async function executeReassessRoadmap( return { content: [{ type: "text", text: `Error reassessing roadmap: ${result.error}` }], details: { operation: "reassess_roadmap", error: result.error }, + isError: true, }; } return { @@ -390,7 +409,8 @@ export async function executeReassessRoadmap( return { content: [{ type: "text", text: `Error reassessing roadmap: ${msg}` }], details: { operation: "reassess_roadmap", error: msg }, - }; + isError: true, + }; } } @@ -403,7 +423,8 @@ export async function executeSaveGateResult( return { content: [{ type: "text", text: "Error: GSD database is not available." }], details: { operation: "save_gate_result", error: "db_unavailable" }, - }; + isError: true, + }; } const validGates = ["Q3", "Q4", "Q5", "Q6", "Q7", "Q8"]; @@ -411,7 +432,8 @@ export async function executeSaveGateResult( return { content: [{ type: "text", text: `Error: Invalid gateId "${params.gateId}". Must be one of: ${validGates.join(", ")}` }], details: { operation: "save_gate_result", error: "invalid_gate_id" }, - }; + isError: true, + }; } const validVerdicts = ["pass", "flag", "omitted"]; @@ -419,7 +441,8 @@ export async function executeSaveGateResult( return { content: [{ type: "text", text: `Error: Invalid verdict "${params.verdict}". Must be one of: ${validVerdicts.join(", ")}` }], details: { operation: "save_gate_result", error: "invalid_verdict" }, - }; + isError: true, + }; } try { @@ -443,7 +466,8 @@ export async function executeSaveGateResult( return { content: [{ type: "text", text: `Error saving gate result: ${msg}` }], details: { operation: "save_gate_result", error: msg }, - }; + isError: true, + }; } } @@ -456,7 +480,8 @@ export async function executePlanMilestone( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot plan milestone." }], details: { operation: "plan_milestone", error: "db_unavailable" }, - }; + isError: true, + }; } try { const result = await handlePlanMilestone(params, basePath); @@ -464,6 +489,7 @@ export async function executePlanMilestone( return { content: [{ type: "text", text: `Error planning milestone: ${result.error}` }], details: { operation: "plan_milestone", error: result.error }, + isError: true, }; } return { @@ -480,7 +506,8 @@ export async function executePlanMilestone( return { content: [{ type: "text", text: `Error planning milestone: ${msg}` }], details: { operation: "plan_milestone", error: msg }, - }; + isError: true, + }; } } @@ -493,7 +520,8 @@ export async function executePlanSlice( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot plan slice." }], details: { operation: "plan_slice", error: "db_unavailable" }, - }; + isError: true, + }; } try { const result = await handlePlanSlice(params, basePath); @@ -501,6 +529,7 @@ export async function executePlanSlice( return { content: [{ type: "text", text: `Error planning slice: ${result.error}` }], details: { operation: "plan_slice", error: result.error }, + isError: true, }; } return { @@ -519,7 +548,8 @@ export async function executePlanSlice( return { content: [{ type: "text", text: `Error planning slice: ${msg}` }], details: { operation: "plan_slice", error: msg }, - }; + isError: true, + }; } } @@ -532,7 +562,8 @@ export async function executeReplanSlice( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot replan slice." }], details: { operation: "replan_slice", error: "db_unavailable" }, - }; + isError: true, + }; } try { const result = await handleReplanSlice(params, basePath); @@ -540,6 +571,7 @@ export async function executeReplanSlice( return { content: [{ type: "text", text: `Error replanning slice: ${result.error}` }], details: { operation: "replan_slice", error: result.error }, + isError: true, }; } return { @@ -558,7 +590,8 @@ export async function executeReplanSlice( return { content: [{ type: "text", text: `Error replanning slice: ${msg}` }], details: { operation: "replan_slice", error: msg }, - }; + isError: true, + }; } } @@ -576,6 +609,7 @@ export async function executeMilestoneStatus( return { content: [{ type: "text", text: "Error: GSD database is not available." }], details: { operation: "milestone_status", error: "db_unavailable" }, + isError: true, }; } @@ -624,6 +658,7 @@ export async function executeMilestoneStatus( return { content: [{ type: "text", text: `Error querying milestone status: ${msg}` }], details: { operation: "milestone_status", error: msg }, - }; + isError: true, + }; } } diff --git a/src/resources/extensions/gsd/validate-directory.ts b/src/resources/extensions/gsd/validate-directory.ts index 4341826c2..6923abd49 100644 --- a/src/resources/extensions/gsd/validate-directory.ts +++ b/src/resources/extensions/gsd/validate-directory.ts @@ -61,6 +61,33 @@ const WINDOWS_BLOCKED_PATHS = new Set([ "C:\\Program Files (x86)", ]); +const WINDOWS_BLOCKED_SUFFIXES = new Set([ + "\\", + "\\windows", + "\\windows\\system32", + "\\program files", + "\\program files (x86)", +]); + +function normalizePathForComparison(dirPath: string): string { + let normalized = dirPath.replace(/[/\\]+$/, ""); + if (normalized === "") { + normalized = "/"; + } else if (/^[A-Za-z]:$/.test(normalized)) { + normalized += "\\"; + } + return platform() === "win32" ? normalized.toLowerCase() : normalized; +} + +function isBlockedWindowsPath(normalized: string): boolean { + if (!/^[a-z]:\\/.test(normalized)) { + return false; + } + + const suffix = normalized.slice(2); + return WINDOWS_BLOCKED_SUFFIXES.has(suffix); +} + // ─── Core Validation ──────────────────────────────────────────────────────────── /** @@ -84,16 +111,11 @@ export function validateDirectory(dirPath: string): DirectoryValidationResult { // Normalize trailing slashes for consistent comparison. // Special cases: "/" → "/" (not ""), "C:\" → "C:\" (not "C:") - let normalized = resolved.replace(/[/\\]+$/, ""); - if (normalized === "") { - normalized = "/"; - } else if (/^[A-Za-z]:$/.test(normalized)) { - normalized = normalized + "\\"; - } + const normalized = normalizePathForComparison(resolved); // ── Check 1: Blocked system paths ────────────────────────────────────── const blockedPaths = platform() === "win32" ? WINDOWS_BLOCKED_PATHS : UNIX_BLOCKED_PATHS; - if (blockedPaths.has(normalized)) { + if (platform() === "win32" ? isBlockedWindowsPath(normalized) : blockedPaths.has(normalized)) { return { safe: false, severity: "blocked", @@ -104,9 +126,9 @@ export function validateDirectory(dirPath: string): DirectoryValidationResult { // ── Check 2: Home directory itself (not subdirs) ─────────────────────── let resolvedHome: string; try { - resolvedHome = realpathSync(resolve(homedir())).replace(/[/\\]+$/, ""); + resolvedHome = normalizePathForComparison(realpathSync(resolve(homedir()))); } catch { - resolvedHome = resolve(homedir()).replace(/[/\\]+$/, ""); + resolvedHome = normalizePathForComparison(resolve(homedir())); } if (normalized === resolvedHome) { @@ -120,9 +142,9 @@ export function validateDirectory(dirPath: string): DirectoryValidationResult { // ── Check 3: Temp directory root ─────────────────────────────────────── let resolvedTmp: string; try { - resolvedTmp = realpathSync(resolve(tmpdir())).replace(/[/\\]+$/, ""); + resolvedTmp = normalizePathForComparison(realpathSync(resolve(tmpdir()))); } catch { - resolvedTmp = resolve(tmpdir()).replace(/[/\\]+$/, ""); + resolvedTmp = normalizePathForComparison(resolve(tmpdir())); } if (normalized === resolvedTmp) { diff --git a/src/resources/extensions/gsd/workflow-events.ts b/src/resources/extensions/gsd/workflow-events.ts index efc239da5..40bdab31f 100644 --- a/src/resources/extensions/gsd/workflow-events.ts +++ b/src/resources/extensions/gsd/workflow-events.ts @@ -2,6 +2,7 @@ import { createHash, randomUUID } from "node:crypto"; import { appendFileSync, readFileSync, existsSync, mkdirSync } from "node:fs"; import { join } from "node:path"; import { atomicWriteSync } from "./atomic-write.js"; +import { withFileLockSync } from "./file-lock.js"; import { logWarning } from "./workflow-logger.js"; // ─── Session ID ─────────────────────────────────────────────────────────── @@ -127,31 +128,39 @@ export function compactMilestoneEvents( const logPath = join(basePath, ".gsd", "event-log.jsonl"); const archivePath = join(basePath, ".gsd", `event-log-${milestoneId}.jsonl.archived`); - const allEvents = readEvents(logPath); - const toArchive = allEvents.filter( - (e) => (e.params as { milestoneId?: string }).milestoneId === milestoneId, - ); - const remaining = allEvents.filter( - (e) => (e.params as { milestoneId?: string }).milestoneId !== milestoneId, - ); + return withFileLockSync(logPath, () => { + const allEvents = readEvents(logPath); + + // Single-pass partition to halve the work (per reviewer agent) + const toArchive: WorkflowEvent[] = []; + const remaining: WorkflowEvent[] = []; + + for (const e of allEvents) { + if ((e.params as { milestoneId?: string }).milestoneId === milestoneId) { + toArchive.push(e); + } else { + remaining.push(e); + } + } - if (toArchive.length === 0) { - return { archived: 0 }; - } + if (toArchive.length === 0) { + return { archived: 0 }; + } - // Write archived events to .jsonl.archived file (crash-safe) - atomicWriteSync( - archivePath, - toArchive.map((e) => JSON.stringify(e)).join("\n") + "\n", - ); + // Write archived events to .jsonl.archived file (crash-safe) + atomicWriteSync( + archivePath, + toArchive.map((e) => JSON.stringify(e)).join("\n") + "\n", + ); - // Truncate active log to remaining events only - atomicWriteSync( - logPath, - remaining.length > 0 - ? remaining.map((e) => JSON.stringify(e)).join("\n") + "\n" - : "", - ); + // Truncate active log to remaining events only + atomicWriteSync( + logPath, + remaining.length > 0 + ? remaining.map((e) => JSON.stringify(e)).join("\n") + "\n" + : "", + ); - return { archived: toArchive.length }; + return { archived: toArchive.length }; + }); } diff --git a/src/resources/extensions/gsd/workflow-mcp-auto-prep.ts b/src/resources/extensions/gsd/workflow-mcp-auto-prep.ts new file mode 100644 index 000000000..1d69ebc00 --- /dev/null +++ b/src/resources/extensions/gsd/workflow-mcp-auto-prep.ts @@ -0,0 +1,76 @@ +import type { ExtensionContext } from "@gsd/pi-coding-agent"; + +import { + type EnsureProjectWorkflowMcpConfigResult, + ensureProjectWorkflowMcpConfig, +} from "./mcp-project-config.js"; +import { usesWorkflowMcpTransport } from "./workflow-mcp.js"; + +interface WorkflowMcpAutoPrepContext { + model?: { provider?: string; baseUrl?: string }; + modelRegistry?: { + getProviderAuthMode?: (provider: string) => string; + isProviderRequestReady?: (provider: string) => boolean; + }; + ui?: Pick; +} + +function getAuthModeSafe( + ctx: WorkflowMcpAutoPrepContext, + provider: string | undefined, +): string | undefined { + if (!provider) return undefined; + const getAuthMode = ctx.modelRegistry?.getProviderAuthMode; + if (typeof getAuthMode !== "function") return undefined; + try { + return getAuthMode(provider); + } catch { + return undefined; + } +} + +function hasClaudeCodeProvider(ctx: WorkflowMcpAutoPrepContext): boolean { + return getAuthModeSafe(ctx, "claude-code") === "externalCli"; +} + +function isClaudeCodeProviderReady(ctx: WorkflowMcpAutoPrepContext): boolean { + const readyCheck = ctx.modelRegistry?.isProviderRequestReady; + if (typeof readyCheck !== "function") return false; + try { + return readyCheck("claude-code"); + } catch { + return false; + } +} + +export function shouldAutoPrepareWorkflowMcp(ctx: WorkflowMcpAutoPrepContext): boolean { + const provider = ctx.model?.provider; + const baseUrl = ctx.model?.baseUrl; + const authMode = getAuthModeSafe(ctx, provider); + + if (usesWorkflowMcpTransport(authMode as any, baseUrl)) return true; + if (provider === "claude-code") return true; + if (hasClaudeCodeProvider(ctx)) return true; + return isClaudeCodeProviderReady(ctx); +} + +export function prepareWorkflowMcpForProject( + ctx: WorkflowMcpAutoPrepContext, + projectRoot: string, +): EnsureProjectWorkflowMcpConfigResult | null { + if (!shouldAutoPrepareWorkflowMcp(ctx)) return null; + + try { + const result = ensureProjectWorkflowMcpConfig(projectRoot); + if (result.status !== "unchanged") { + ctx.ui?.notify?.(`Claude Code MCP prepared at ${result.configPath}`, "info"); + } + return result; + } catch (err) { + ctx.ui?.notify?.( + `Claude Code MCP prep failed: ${err instanceof Error ? err.message : String(err)}. Detected Claude Code model but no workflow MCP. Please run /gsd mcp init . from your project root.`, + "warning", + ); + return null; + } +} diff --git a/src/resources/extensions/gsd/workflow-mcp.ts b/src/resources/extensions/gsd/workflow-mcp.ts index 797f127f5..9e4bb90c7 100644 --- a/src/resources/extensions/gsd/workflow-mcp.ts +++ b/src/resources/extensions/gsd/workflow-mcp.ts @@ -1,7 +1,7 @@ import { execSync } from "node:child_process"; import { existsSync } from "node:fs"; import { dirname, resolve } from "node:path"; -import { fileURLToPath } from "node:url"; +import { fileURLToPath, pathToFileURL } from "node:url"; export interface WorkflowMcpLaunchConfig { name: string; @@ -21,22 +21,35 @@ export interface WorkflowCapabilityOptions { } const MCP_WORKFLOW_TOOL_SURFACE = new Set([ + "ask_user_questions", + "gsd_decision_save", "gsd_complete_milestone", "gsd_complete_task", "gsd_complete_slice", + "gsd_generate_milestone_id", + "gsd_journal_query", "gsd_milestone_complete", + "gsd_milestone_generate_id", "gsd_milestone_status", "gsd_milestone_validate", + "gsd_plan_task", "gsd_plan_milestone", "gsd_plan_slice", "gsd_replan_slice", "gsd_reassess_roadmap", + "gsd_requirement_save", + "gsd_requirement_update", "gsd_roadmap_reassess", + "gsd_save_decision", "gsd_save_gate_result", + "gsd_save_requirement", + "gsd_skip_slice", "gsd_slice_replan", "gsd_slice_complete", "gsd_summary_save", + "gsd_task_plan", "gsd_task_complete", + "gsd_update_requirement", "gsd_validate_milestone", ]); @@ -95,6 +108,8 @@ function getBundledWorkflowMcpCliPath(env: NodeJS.ProcessEnv): string | null { } const candidates = [ + resolve(fileURLToPath(new URL("../../../../packages/mcp-server/src/cli.ts", import.meta.url))), + resolve(fileURLToPath(new URL("../../../../../packages/mcp-server/src/cli.ts", import.meta.url))), resolve(fileURLToPath(new URL("../../../../packages/mcp-server/dist/cli.js", import.meta.url))), resolve(fileURLToPath(new URL("../../../../../packages/mcp-server/dist/cli.js", import.meta.url))), ]; @@ -108,9 +123,9 @@ function getBundledWorkflowMcpCliPath(env: NodeJS.ProcessEnv): string | null { function getBundledWorkflowExecutorModulePath(): string | null { const candidates = [ - resolve(fileURLToPath(new URL("../../../../dist/resources/extensions/gsd/tools/workflow-tool-executors.js", import.meta.url))), resolve(fileURLToPath(new URL("./tools/workflow-tool-executors.js", import.meta.url))), resolve(fileURLToPath(new URL("./tools/workflow-tool-executors.ts", import.meta.url))), + resolve(fileURLToPath(new URL("../../../../dist/resources/extensions/gsd/tools/workflow-tool-executors.js", import.meta.url))), ]; for (const candidate of candidates) { @@ -122,9 +137,9 @@ function getBundledWorkflowExecutorModulePath(): string | null { function getBundledWorkflowWriteGateModulePath(): string | null { const candidates = [ - resolve(fileURLToPath(new URL("../../../../dist/resources/extensions/gsd/bootstrap/write-gate.js", import.meta.url))), resolve(fileURLToPath(new URL("./bootstrap/write-gate.js", import.meta.url))), resolve(fileURLToPath(new URL("./bootstrap/write-gate.ts", import.meta.url))), + resolve(fileURLToPath(new URL("../../../../dist/resources/extensions/gsd/bootstrap/write-gate.js", import.meta.url))), ]; for (const candidate of candidates) { @@ -134,19 +149,58 @@ function getBundledWorkflowWriteGateModulePath(): string | null { return null; } +function getResolveTsHookPath(): string | null { + const candidates = [ + resolve(fileURLToPath(new URL("./tests/resolve-ts.mjs", import.meta.url))), + resolve(fileURLToPath(new URL("../../../../src/resources/extensions/gsd/tests/resolve-ts.mjs", import.meta.url))), + ]; + + for (const candidate of candidates) { + if (existsSync(candidate)) return candidate; + } + + return null; +} + +function mergeNodeOptions(existing: string | undefined, additions: string[]): string | undefined { + const tokens = (existing ?? "").split(/\s+/).map((value) => value.trim()).filter(Boolean); + for (const addition of additions) { + if (!tokens.includes(addition)) { + tokens.push(addition); + } + } + return tokens.length > 0 ? tokens.join(" ") : undefined; +} + function buildWorkflowLaunchEnv( projectRoot: string, gsdCliPath: string | undefined, explicitEnv?: Record, + workflowCliPath?: string, ): Record { const executorModulePath = getBundledWorkflowExecutorModulePath(); const writeGateModulePath = getBundledWorkflowWriteGateModulePath(); + const resolveTsHookPath = getResolveTsHookPath(); + const wantsSourceTs = + Boolean(resolveTsHookPath) && + ( + (workflowCliPath?.endsWith(".ts") ?? false) || + (executorModulePath?.endsWith(".ts") ?? false) || + (writeGateModulePath?.endsWith(".ts") ?? false) + ); + const nodeOptions = wantsSourceTs + ? mergeNodeOptions(explicitEnv?.NODE_OPTIONS, [ + "--experimental-strip-types", + `--import=${pathToFileURL(resolveTsHookPath!).href}`, + ]) + : explicitEnv?.NODE_OPTIONS; return { ...(explicitEnv ?? {}), ...(gsdCliPath ? { GSD_CLI_PATH: gsdCliPath } : {}), ...(executorModulePath ? { GSD_WORKFLOW_EXECUTORS_MODULE: executorModulePath } : {}), ...(writeGateModulePath ? { GSD_WORKFLOW_WRITE_GATE_MODULE: writeGateModulePath } : {}), + ...(nodeOptions ? { NODE_OPTIONS: nodeOptions } : {}), GSD_PERSIST_WRITE_GATE_STATE: "1", GSD_WORKFLOW_PROJECT_ROOT: projectRoot, }; @@ -188,7 +242,7 @@ export function detectWorkflowMcpLaunchConfig( command: process.execPath, args: [distCli], cwd: resolvedWorkflowProjectRoot, - env: buildWorkflowLaunchEnv(resolvedWorkflowProjectRoot, gsdCliPath), + env: buildWorkflowLaunchEnv(resolvedWorkflowProjectRoot, gsdCliPath, undefined, distCli), }; } @@ -199,7 +253,7 @@ export function detectWorkflowMcpLaunchConfig( command: process.execPath, args: [bundledCli], cwd: resolvedWorkflowProjectRoot, - env: buildWorkflowLaunchEnv(resolvedWorkflowProjectRoot, gsdCliPath), + env: buildWorkflowLaunchEnv(resolvedWorkflowProjectRoot, gsdCliPath, undefined, bundledCli), }; } @@ -294,6 +348,21 @@ export function usesWorkflowMcpTransport( return authMode === "externalCli" && typeof baseUrl === "string" && baseUrl.startsWith("local://"); } +export function supportsStructuredQuestions( + activeTools: string[], + options: Pick = {}, +): boolean { + if (!activeTools.includes("ask_user_questions")) return false; + + // Workflow MCP currently exposes ask_user_questions via MCP form elicitation. + // Local external CLI transports such as Claude Code can invoke the tool, but + // do not reliably complete that elicitation round-trip yet, so guided discuss + // prompts must fall back to plain-text questioning. + if (usesWorkflowMcpTransport(options.authMode, options.baseUrl)) return false; + + return true; +} + export function getWorkflowTransportSupportError( provider: string | undefined, requiredTools: string[], @@ -310,7 +379,7 @@ export function getWorkflowTransportSupportError( const providerLabel = `"${provider}"`; if (!launch) { - return `Provider ${providerLabel} cannot run ${surface}${unitLabel}: the GSD workflow MCP server is not configured or discoverable. Configure GSD_WORKFLOW_MCP_COMMAND, build packages/mcp-server/dist/cli.js, or install gsd-mcp-server on PATH.`; + return `Provider ${providerLabel} cannot run ${surface}${unitLabel}: the GSD workflow MCP server is not configured or discoverable. Detected Claude Code model but no workflow MCP. Please run /gsd mcp init . from your project root. You can also configure GSD_WORKFLOW_MCP_COMMAND, build packages/mcp-server/dist/cli.js, or install gsd-mcp-server on PATH.`; } const missing = [...new Set(requiredTools)].filter((tool) => !MCP_WORKFLOW_TOOL_SURFACE.has(tool)); diff --git a/src/resources/extensions/slash-commands/audit.ts b/src/resources/extensions/slash-commands/audit.ts index b5f3bf85c..fe7d3f046 100644 --- a/src/resources/extensions/slash-commands/audit.ts +++ b/src/resources/extensions/slash-commands/audit.ts @@ -1,4 +1,5 @@ import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent"; +import { mkdirSync } from "node:fs"; export default function auditCommand(pi: ExtensionAPI) { pi.registerCommand("audit", { @@ -39,7 +40,7 @@ export default function auditCommand(pi: ExtensionAPI) { // ── Step 3: Ensure the output directory exists ─────────────────────── - await pi.exec("mkdir", ["-p", ".gsd/audits"]); + mkdirSync(".gsd/audits", { recursive: true }); // ── Step 4: Send the audit prompt to the agent ─────────────────────── diff --git a/src/resources/extensions/subagent/isolation.ts b/src/resources/extensions/subagent/isolation.ts index a326f55d3..e862e65ff 100644 --- a/src/resources/extensions/subagent/isolation.ts +++ b/src/resources/extensions/subagent/isolation.ts @@ -53,8 +53,10 @@ interface Baseline { // Directory helpers // ============================================================================ -function encodeCwd(cwd: string): string { - return cwd.replace(/\//g, "--"); +export function encodeCwd(cwd: string): string { + // Encode the entire cwd so Windows drive letters, separators, and UNC + // prefixes cannot leak into the isolation path. + return Buffer.from(cwd, "utf8").toString("base64url"); } const gsdHome = process.env.GSD_HOME || path.join(os.homedir(), ".gsd"); @@ -500,4 +502,3 @@ export function readIsolationMode(): IsolationMode { return "none"; } } - diff --git a/src/tests/auto-resume-resource-loader.test.ts b/src/tests/auto-resume-resource-loader.test.ts new file mode 100644 index 000000000..9926e87c2 --- /dev/null +++ b/src/tests/auto-resume-resource-loader.test.ts @@ -0,0 +1,56 @@ +// GSD2 — Regression test: auto-mode resume resolves resource-loader.js from deployed path (#3949) +// Copyright (c) 2026 Jeremy McSpadden +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, resolve, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const autoTsPath = join(__dirname, "..", "resources", "extensions", "gsd", "auto.ts"); +const loaderTsPath = join(__dirname, "..", "loader.ts"); + +test("loader.ts sets GSD_PKG_ROOT env var", () => { + const loaderSrc = readFileSync(loaderTsPath, "utf-8"); + assert.ok( + loaderSrc.includes("process.env.GSD_PKG_ROOT"), + "loader.ts must set GSD_PKG_ROOT so deployed extensions can locate package-root modules", + ); +}); + +test("auto.ts resume uses GSD_PKG_ROOT for resource-loader import, not bare relative path", () => { + const autoSrc = readFileSync(autoTsPath, "utf-8"); + + // Must reference GSD_PKG_ROOT to build an absolute path + assert.ok( + autoSrc.includes("process.env.GSD_PKG_ROOT"), + "auto.ts must use GSD_PKG_ROOT to resolve resource-loader.js from deployed extension path", + ); + + // The import must use the computed variable (resourceLoaderPath), not a hardcoded relative path. + assert.ok( + autoSrc.includes("await import(resourceLoaderPath)"), + "auto.ts resource-loader import must use the computed resourceLoaderPath variable, not a hardcoded relative path", + ); + + // The resourceLoaderPath must be constructed from GSD_PKG_ROOT via pathToFileURL + // (raw filesystem paths break on Windows with ERR_UNSUPPORTED_ESM_URL_SCHEME) + assert.ok( + autoSrc.includes("pathToFileURL(join(pkgRoot,"), + "auto.ts must convert the constructed path to a file URL for cross-platform import()", + ); +}); + +test("GSD_PKG_ROOT resolves resource-loader.js correctly from package root", () => { + // Simulate what auto.ts does: given GSD_PKG_ROOT, construct the path + const pkgRoot = resolve(__dirname, "..", ".."); + const resourceLoaderPath = join(pkgRoot, "dist", "resource-loader.js"); + + // After build, dist/resource-loader.js should exist + // (this test runs post-build in CI; in dev it validates the path construction) + const expectedDir = dirname(resourceLoaderPath); + assert.ok( + expectedDir.endsWith(join("dist")), + `resource-loader path should be under dist/, got: ${expectedDir}`, + ); +}); diff --git a/src/tests/cli-onboarding-custom-provider.test.ts b/src/tests/cli-onboarding-custom-provider.test.ts new file mode 100644 index 000000000..3f7644d53 --- /dev/null +++ b/src/tests/cli-onboarding-custom-provider.test.ts @@ -0,0 +1,37 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { SettingsManager } from "../../packages/pi-coding-agent/src/core/settings-manager.ts"; + +test("SettingsManager reads defaultProvider/defaultModel from the explicit agentDir used by CLI (#3860)", () => { + const root = mkdtempSync(join(tmpdir(), "gsd-cli-settings-")); + const cwd = join(root, "project"); + const agentDir = join(root, ".gsd", "agent"); + + try { + mkdirSync(cwd, { recursive: true }); + mkdirSync(agentDir, { recursive: true }); + writeFileSync( + join(agentDir, "settings.json"), + JSON.stringify({ + defaultProvider: "example-provider", + defaultModel: "gpt-5.4", + }), + "utf-8", + ); + + const settingsManager = SettingsManager.create(cwd, agentDir); + assert.equal(settingsManager.getDefaultProvider(), "example-provider"); + assert.equal(settingsManager.getDefaultModel(), "gpt-5.4"); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("cli.ts wires SettingsManager.create with both cwd and agentDir (#3860)", () => { + const cliSource = readFileSync(join(import.meta.dirname, "..", "cli.ts"), "utf-8"); + assert.match(cliSource, /SettingsManager\.create\(process\.cwd\(\),\s*agentDir\)/); +}); diff --git a/src/tests/integration/web-live-interaction-contract.test.ts b/src/tests/integration/web-live-interaction-contract.test.ts index 5e288b69f..ce473ff40 100644 --- a/src/tests/integration/web-live-interaction-contract.test.ts +++ b/src/tests/integration/web-live-interaction-contract.test.ts @@ -358,6 +358,7 @@ function routeEvent(state: MinimalLiveState, event: any): MinimalLiveState { } case "tool_execution_start": { s.activeToolExecution = { id: event.toolCallId, name: event.toolName }; + s.streamingAssistantText = ""; break; } case "tool_execution_end": { @@ -802,6 +803,7 @@ test("(g-2) tool_execution_start/end update activeToolExecution", async () => { assert.ok(state.activeToolExecution); assert.equal(state.activeToolExecution.id, "tc-1"); assert.equal(state.activeToolExecution.name, "bash"); + assert.equal(state.streamingAssistantText, ""); state = routeEvent(state, { type: "tool_execution_end", @@ -813,6 +815,46 @@ test("(g-2) tool_execution_start/end update activeToolExecution", async () => { assert.equal(state.activeToolExecution, null); }); +test("(g-3) tool_execution_start clears provisional streaming text so only post-tool final text survives", async () => { + let state = createMinimalLiveState(); + + state = routeEvent(state, { + type: "message_update", + assistantMessageEvent: { + type: "text_delta", + delta: "It seems the questions were presented to the user. Let me wait for them to answer.", + }, + }); + assert.equal(state.streamingAssistantText, "It seems the questions were presented to the user. Let me wait for them to answer."); + + state = routeEvent(state, { + type: "tool_execution_start", + toolCallId: "tc-ask-1", + toolName: "ask_user_questions", + }); + assert.equal(state.streamingAssistantText, ""); + + state = routeEvent(state, { + type: "tool_execution_end", + toolCallId: "tc-ask-1", + toolName: "ask_user_questions", + result: {}, + isError: false, + }); + state = routeEvent(state, { + type: "message_update", + assistantMessageEvent: { + type: "text_delta", + delta: "What are you working on? Once you answer I'll tailor my approach accordingly.", + }, + }); + state = routeEvent(state, { type: "turn_end" }); + + assert.deepEqual(state.liveTranscript, [ + "What are you working on? Once you answer I'll tailor my approach accordingly.", + ]); +}); + test("(h) steer and abort commands post the correct RPC command type", async (t) => { const fixture = makeWorkspaceFixture(); const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-steer", "Steer Session"); diff --git a/src/tests/integration/web-mode-cli.test.ts b/src/tests/integration/web-mode-cli.test.ts index 68b6c9c1b..9eee1f803 100644 --- a/src/tests/integration/web-mode-cli.test.ts +++ b/src/tests/integration/web-mode-cli.test.ts @@ -165,6 +165,7 @@ test('launchWebMode prefers the packaged standalone host and opens the resolved detached: true, stdio: 'ignore', windowsHide: true, + shell: false, env: { TEST_ENV: '1', HOSTNAME: '127.0.0.1', diff --git a/src/tests/integration/web-mode-windows-hide.test.ts b/src/tests/integration/web-mode-windows-hide.test.ts index aeb6baeea..c1b2902f5 100644 --- a/src/tests/integration/web-mode-windows-hide.test.ts +++ b/src/tests/integration/web-mode-windows-hide.test.ts @@ -117,4 +117,9 @@ test("launchWebMode source-dev host also passes windowsHide: true", async (t) => true, "source-dev spawn must also include windowsHide: true (#2628)", ); + assert.equal( + capturedOptions!.shell, + true, + "source-dev spawn must include shell: true when launching npm.cmd on Windows", + ); }); diff --git a/src/tests/integration/web-onboarding-contract.test.ts b/src/tests/integration/web-onboarding-contract.test.ts index 3ed833368..016c7ae1e 100644 --- a/src/tests/integration/web-onboarding-contract.test.ts +++ b/src/tests/integration/web-onboarding-contract.test.ts @@ -348,7 +348,7 @@ test("boot and onboarding routes expose locked required state plus explicitly sk ]); const anthropicProvider = bootPayload.onboarding.required.providers.find((provider: any) => provider.id === "anthropic"); assert.equal(anthropicProvider.supports.apiKey, true); - assert.equal(anthropicProvider.supports.oauthAvailable, true); + assert.equal(anthropicProvider.supports.oauthAvailable, false); const onboardingResponse = await onboardingRoute.GET(projectRequest(fixture.projectCwd, "/api/onboarding")); assert.equal(onboardingResponse.status, 200); @@ -408,7 +408,7 @@ test("failed API-key validation stays locked, redacts the error, and is reflecte getEnvApiKey: noEnvApiKey, validateApiKey: async () => ({ ok: false, - message: "OpenAI rejected sk-test-secret-123456 because Bearer sk-test-secret-123456 is invalid", + message: "OpenAI rejected the provided key because Bearer invalid-demo-key is invalid", }), }); @@ -425,7 +425,7 @@ test("failed API-key validation stays locked, redacts the error, and is reflecte body: JSON.stringify({ action: "save_api_key", providerId: "openai", - apiKey: "sk-test-secret-123456", + apiKey: "invalid-demo-key", }), }), ); @@ -440,7 +440,7 @@ test("failed API-key validation stays locked, redacts the error, and is reflecte assert.equal(validationPayload.onboarding.lockReason, "required_setup"); assert.equal(validationPayload.onboarding.bridgeAuthRefresh.phase, "idle"); assert.match(validationPayload.onboarding.lastValidation.message, /OpenAI rejected/i); - assert.doesNotMatch(validationPayload.onboarding.lastValidation.message, /sk-test-secret-123456/); + assert.doesNotMatch(validationPayload.onboarding.lastValidation.message, /invalid-demo-key/); assert.equal(authStorage.hasAuth("openai"), false); const bootResponse = await bootRoute.GET(projectRequest(fixture.projectCwd, "/api/boot")); @@ -448,7 +448,7 @@ test("failed API-key validation stays locked, redacts the error, and is reflecte const bootPayload = (await bootResponse.json()) as any; assert.equal(bootPayload.onboarding.locked, true); assert.equal(bootPayload.onboarding.lastValidation.status, "failed"); - assert.doesNotMatch(bootPayload.onboarding.lastValidation.message, /sk-test-secret-123456/); + assert.doesNotMatch(bootPayload.onboarding.lastValidation.message, /invalid-demo-key/); }); test("direct prompt commands cannot bypass onboarding while required setup is still locked", async (t) => { diff --git a/src/tests/mcp-createRequire.test.ts b/src/tests/mcp-createRequire.test.ts index 5f1292866..d16ebacd6 100644 --- a/src/tests/mcp-createRequire.test.ts +++ b/src/tests/mcp-createRequire.test.ts @@ -1,12 +1,9 @@ /** - * Regression test for #3603 — MCP server subpath imports via createRequire + * Regression test for #3914 — MCP server uses explicit .js SDK subpaths. * - * The ESM wildcard export map in @modelcontextprotocol/sdk does not resolve - * subpath imports correctly. The fix uses createRequire from node:module to - * resolve wildcard subpaths via the CJS resolver which auto-appends .js. - * - * Structural verification test — reads source to confirm createRequire import - * and _require.resolve usage exist. + * Extensionless wildcard exports for `server/stdio` and `types` do not resolve + * reliably across current Node / SDK combinations. The runtime import strings + * must include `.js`. */ import { describe, test } from 'node:test'; @@ -20,29 +17,19 @@ const __dirname = dirname(__filename); const source = readFileSync(join(__dirname, '..', 'mcp-server.ts'), 'utf-8'); -describe('MCP server createRequire subpath resolution (#3603)', () => { - test('createRequire is imported from node:module', () => { - assert.match(source, /import\s*\{\s*createRequire\s*\}\s*from\s*['"]node:module['"]/, - 'createRequire should be imported from node:module'); +describe('MCP server SDK subpath imports (#3914)', () => { + test('server/stdio import uses explicit .js subpath', () => { + assert.match(source, /await import\(`\$\{MCP_PKG\}\/server\/stdio\.js`\)/, + 'server/stdio import should include the .js suffix'); }); - test('_require is created from import.meta.url', () => { - assert.match(source, /createRequire\(import\.meta\.url\)/, - '_require should be created using createRequire(import.meta.url)'); + test('types import uses explicit .js subpath', () => { + assert.match(source, /await import\(`\$\{MCP_PKG\}\/types\.js`\)/, + 'types import should include the .js suffix'); }); - test('_require.resolve is used for subpath imports', () => { - assert.match(source, /_require\.resolve\(/, - '_require.resolve should be used for subpath resolution'); - }); - - test('server/stdio subpath uses _require.resolve', () => { - assert.match(source, /_require\.resolve\(`\$\{MCP_PKG\}\/server\/stdio`\)/, - 'server/stdio import should use _require.resolve'); - }); - - test('types subpath uses _require.resolve', () => { - assert.match(source, /_require\.resolve\(`\$\{MCP_PKG\}\/types`\)/, - 'types import should use _require.resolve'); + test('legacy createRequire-based resolution is gone', () => { + assert.doesNotMatch(source, /createRequire|_require\.resolve/, + 'legacy createRequire-based subpath resolution should not remain'); }); }); diff --git a/src/tests/mcp-server.test.ts b/src/tests/mcp-server.test.ts index 9581809dd..8a6a672f9 100644 --- a/src/tests/mcp-server.test.ts +++ b/src/tests/mcp-server.test.ts @@ -30,25 +30,11 @@ test('startMcpServer accepts the correct argument shape', async () => { assert.strictEqual(startMcpServer.length, 1, 'startMcpServer should accept one argument') }) -test('startMcpServer can be called with mock tools', async () => { - const { startMcpServer } = await import(distUrl('mcp-server.js')) +test('compiled MCP runtime dependencies resolve with explicit .js subpaths', async () => { + const stdioMod = await import('@modelcontextprotocol/sdk/server/stdio.js') + const typesMod = await import('@modelcontextprotocol/sdk/types.js') - // Create a mock tool matching the McpToolDef interface - const mockTool = { - name: 'test_tool', - description: 'A test tool', - parameters: { type: 'object', properties: {} }, - execute: async () => ({ - content: [{ type: 'text', text: 'hello' }], - }), - } - - // Verify the function can be called with the correct signature - // without throwing during argument validation. It will attempt to - // connect to stdin/stdout as an MCP transport, which won't work in - // a test environment, but the Server instance is created successfully. - assert.doesNotThrow(() => { - void startMcpServer({ tools: [mockTool], version: '0.0.0-test' }) - .catch(() => { /* expected: no MCP client on stdin */ }) - }) + assert.strictEqual(typeof stdioMod.StdioServerTransport, 'function') + assert.ok(typesMod.ListToolsRequestSchema, 'ListToolsRequestSchema should be exported') + assert.ok(typesMod.CallToolRequestSchema, 'CallToolRequestSchema should be exported') }) diff --git a/src/tests/node-modules-symlink.test.ts b/src/tests/node-modules-symlink.test.ts index ef0bdf724..a22f10910 100644 --- a/src/tests/node-modules-symlink.test.ts +++ b/src/tests/node-modules-symlink.test.ts @@ -1,9 +1,15 @@ -import test from "node:test"; +/** + * Tests for ensureNodeModulesSymlink — covers symlink reconciliation for + * source installs (#3529) and pnpm-style merged node_modules (#3564). + */ +import { test } from "node:test"; import assert from "node:assert/strict"; -import { existsSync, lstatSync, mkdirSync, mkdtempSync, readlinkSync, rmSync, symlinkSync, unlinkSync } from "node:fs"; +import { existsSync, lstatSync, mkdirSync, mkdtempSync, readFileSync, readlinkSync, readdirSync, rmSync, symlinkSync, unlinkSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; +// --- Integration tests via initResources (source/monorepo path) --- + test("initResources creates node_modules symlink in agent dir", async (t) => { const { initResources } = await import("../resource-loader.ts"); const tmp = mkdtempSync(join(tmpdir(), "gsd-symlink-")); @@ -30,7 +36,6 @@ test("initResources replaces a real directory blocking node_modules with a symli const fakeAgentDir = join(tmp, "agent"); t.after(() => rmSync(tmp, { recursive: true, force: true })); - // First call to set up agent dir structure initResources(fakeAgentDir); const nodeModulesPath = join(fakeAgentDir, "node_modules"); @@ -56,7 +61,6 @@ test("initResources replaces a stale symlink with a correct one", async (t) => { const fakeAgentDir = join(tmp, "agent"); t.after(() => rmSync(tmp, { recursive: true, force: true })); - // First call to set up agent dir structure initResources(fakeAgentDir); const nodeModulesPath = join(fakeAgentDir, "node_modules"); @@ -88,7 +92,6 @@ test("initResources replaces symlink whose target was deleted", async (t) => { const correctTarget = readlinkSync(nodeModulesPath); // Create a symlink that points to a path that doesn't exist - // (simulates the case where npm upgrade moved the package location) unlinkSync(nodeModulesPath); const deadTarget = join(tmp, "old-install", "node_modules"); symlinkSync(deadTarget, nodeModulesPath); @@ -102,3 +105,135 @@ test("initResources replaces symlink whose target was deleted", async (t) => { const fixedTarget = readlinkSync(nodeModulesPath); assert.equal(fixedTarget, correctTarget, "broken symlink should be replaced with correct target"); }); + +// --- Unit tests for pnpm-style merged node_modules (#3564) --- +// These simulate the filesystem layout without going through initResources, +// since packageRoot is fixed at module load time. + +test("pnpm layout: merged node_modules contains entries from both hoisted and internal", (t) => { + // Simulate pnpm global layout: + // hoisted/node_modules/ + // yaml/ ← external dep + // @sinclair/ ← external scoped dep + // gsd-pi/ ← package root + // node_modules/ + // @gsd/ ← workspace scope (NOT hoisted) + // @gsd-build/ ← workspace scope (NOT hoisted) + const tmp = mkdtempSync(join(tmpdir(), "gsd-pnpm-merge-")); + t.after(() => rmSync(tmp, { recursive: true, force: true })); + + const hoisted = join(tmp, "node_modules"); + const pkgRoot = join(hoisted, "gsd-pi"); + const internal = join(pkgRoot, "node_modules"); + const agentNodeModules = join(tmp, "agent", "node_modules"); + + // Create hoisted entries (external deps) + mkdirSync(join(hoisted, "yaml"), { recursive: true }); + mkdirSync(join(hoisted, "@sinclair", "typebox"), { recursive: true }); + mkdirSync(join(hoisted, "@anthropic-ai", "sdk"), { recursive: true }); + mkdirSync(pkgRoot, { recursive: true }); + + // Create internal entries (workspace packages) + mkdirSync(join(internal, "@gsd", "pi-ai"), { recursive: true }); + mkdirSync(join(internal, "@gsd", "pi-coding-agent"), { recursive: true }); + mkdirSync(join(internal, "@gsd-build", "core"), { recursive: true }); + + // Create merged directory manually (simulating what reconcileMergedNodeModules does) + mkdirSync(agentNodeModules, { recursive: true }); + + // Link hoisted entries (skip gsd-pi itself and dotfiles) + for (const entry of readdirSync(hoisted, { withFileTypes: true })) { + if (entry.name === "gsd-pi" || entry.name.startsWith(".")) continue; + symlinkSync(join(hoisted, entry.name), join(agentNodeModules, entry.name)); + } + + // Overlay @gsd* workspace scopes from internal (these take precedence) + for (const entry of readdirSync(internal, { withFileTypes: true })) { + if (!entry.name.startsWith("@gsd")) continue; + const link = join(agentNodeModules, entry.name); + try { lstatSync(link); unlinkSync(link); } catch { /* didn't exist */ } + symlinkSync(join(internal, entry.name), link); + } + + // Verify: external deps resolve through hoisted symlinks + assert.ok(existsSync(join(agentNodeModules, "yaml")), "yaml should resolve"); + assert.ok(existsSync(join(agentNodeModules, "@sinclair")), "@sinclair should resolve"); + assert.ok(existsSync(join(agentNodeModules, "@anthropic-ai")), "@anthropic-ai should resolve"); + + // Verify: workspace packages resolve through internal symlinks + assert.ok(existsSync(join(agentNodeModules, "@gsd")), "@gsd should resolve"); + assert.ok(existsSync(join(agentNodeModules, "@gsd", "pi-ai")), "@gsd/pi-ai should resolve"); + assert.ok(existsSync(join(agentNodeModules, "@gsd-build")), "@gsd-build should resolve"); + + // Verify: gsd-pi itself is NOT symlinked (it's the package root, not a dep) + assert.ok(!existsSync(join(agentNodeModules, "gsd-pi")), "gsd-pi should not be in merged dir"); + + // Verify: @gsd points to internal, not hoisted (internal takes precedence) + const gsdTarget = readlinkSync(join(agentNodeModules, "@gsd")); + assert.equal(gsdTarget, join(internal, "@gsd"), "@gsd should point to internal node_modules"); +}); + +test("hasMissingWorkspaceScopes detects pnpm layout", (t) => { + const tmp = mkdtempSync(join(tmpdir(), "gsd-pnpm-detect-")); + t.after(() => rmSync(tmp, { recursive: true, force: true })); + + const hoisted = join(tmp, "hoisted"); + const internal = join(tmp, "internal"); + + // npm-style: @gsd exists in both hoisted and internal + mkdirSync(join(hoisted, "@gsd"), { recursive: true }); + mkdirSync(join(internal, "@gsd"), { recursive: true }); + + // Inline the detection logic for testing + const hasMissing = (h: string, i: string): boolean => { + if (!existsSync(i)) return false; + for (const entry of readdirSync(i, { withFileTypes: true })) { + if (entry.isDirectory() && entry.name.startsWith("@gsd") && + !existsSync(join(h, entry.name))) { + return true; + } + } + return false; + }; + + assert.equal(hasMissing(hoisted, internal), false, "npm-style: no missing scopes"); + + // pnpm-style: @gsd-build only in internal + mkdirSync(join(internal, "@gsd-build"), { recursive: true }); + assert.equal(hasMissing(hoisted, internal), true, "pnpm-style: @gsd-build missing from hoisted"); +}); + +test("merged node_modules marker uses fingerprint including directory entries", (t) => { + const tmp = mkdtempSync(join(tmpdir(), "gsd-pnpm-marker-")); + t.after(() => rmSync(tmp, { recursive: true, force: true })); + + // Simulate two directories with known entries + const hoisted = join(tmp, "hoisted"); + const internal = join(tmp, "internal"); + mkdirSync(join(hoisted, "yaml"), { recursive: true }); + mkdirSync(join(hoisted, "@sinclair"), { recursive: true }); + mkdirSync(join(internal, "@gsd"), { recursive: true }); + + // Build fingerprint the same way the production code does + const h = readdirSync(hoisted).sort().join(","); + const i = readdirSync(internal).sort().join(","); + const fakePackageRoot = "/usr/lib/node_modules/gsd-pi"; + const fingerprint = `${fakePackageRoot}\n${h}\n${i}`; + + const agentNodeModules = join(tmp, "agent", "node_modules"); + mkdirSync(agentNodeModules, { recursive: true }); + const marker = join(agentNodeModules, ".gsd-merged"); + writeFileSync(marker, fingerprint); + + // Verify fingerprint contains all three components + const stored = readFileSync(marker, "utf-8").trim(); + assert.ok(stored.includes(fakePackageRoot), "fingerprint includes packageRoot"); + assert.ok(stored.includes("@sinclair"), "fingerprint includes hoisted entries"); + assert.ok(stored.includes("@gsd"), "fingerprint includes internal entries"); + + // Verify fingerprint changes when a new package is added + mkdirSync(join(hoisted, "new-package"), { recursive: true }); + const h2 = readdirSync(hoisted).sort().join(","); + const fingerprint2 = `${fakePackageRoot}\n${h2}\n${i}`; + assert.notEqual(fingerprint, fingerprint2, "fingerprint should change when deps change"); +}); diff --git a/src/tests/package-mcp-server-elicitation.test.ts b/src/tests/package-mcp-server-elicitation.test.ts new file mode 100644 index 000000000..a746d8094 --- /dev/null +++ b/src/tests/package-mcp-server-elicitation.test.ts @@ -0,0 +1,227 @@ +import test from 'node:test' +import assert from 'node:assert/strict' +import { Client } from '@modelcontextprotocol/sdk/client/index.js' +import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js' +import { ElicitRequestSchema } from '@modelcontextprotocol/sdk/types.js' + +import { + buildAskUserQuestionsElicitRequest, + createMcpServer, + formatAskUserQuestionsElicitResult, +} from '../../packages/mcp-server/src/server.js' + +function createSessionManagerStub() { + return { + startSession: async () => { + throw new Error('not implemented in test') + }, + getSession: () => undefined, + getResult: () => undefined, + cancelSession: async () => {}, + resolveBlocker: async () => {}, + } +} + +async function createConnectedClient(options?: { + onElicit?: (params: unknown) => Promise, +}) { + const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair() + + const { server } = await createMcpServer(createSessionManagerStub() as never) + const client = new Client({ + name: 'test-client', + version: '0.0.0', + }, { + capabilities: { + elicitation: {}, + }, + }) + + if (options?.onElicit) { + client.setRequestHandler(ElicitRequestSchema, options.onElicit) + } + + await Promise.all([ + server.connect(serverTransport), + client.connect(clientTransport), + ]) + + return { + client, + close: async () => { + await client.close() + await server.close() + }, + } +} + +test('package MCP server exposes ask_user_questions over listTools', async () => { + const { client, close } = await createConnectedClient() + + try { + const tools = await client.listTools() + assert.ok(tools.tools.some(tool => tool.name === 'ask_user_questions')) + } finally { + await close() + } +}) + +test('ask_user_questions returns the packaged answers JSON shape for form elicitation', async () => { + const { client, close } = await createConnectedClient({ + onElicit: async (request) => { + const elicitation = (request as { + params?: { + message: string, + requestedSchema: { properties: Record, required?: string[] }, + }, + }).params ?? request as { + message: string, + requestedSchema: { properties: Record, required?: string[] }, + } + assert.match(elicitation.message, /Please answer the following question/) + assert.ok(elicitation.requestedSchema.properties.deployment) + assert.ok(elicitation.requestedSchema.properties['deployment__note']) + assert.ok(elicitation.requestedSchema.required?.includes('deployment')) + + return { + action: 'accept', + content: { + deployment: 'None of the above', + deployment__note: 'Need hybrid deployment.', + }, + } + }, + }) + + try { + const result = await client.callTool({ + name: 'ask_user_questions', + arguments: { + questions: [ + { + id: 'deployment', + header: 'Deploy', + question: 'Where will this run?', + options: [ + { label: 'Cloud', description: 'Managed hosting.' }, + { label: 'On-prem', description: 'Runs in customer infrastructure.' }, + ], + }, + ], + }, + }) + + const text = result.content.find(item => item.type === 'text') + assert.ok(text && 'text' in text) + assert.equal( + text.text, + JSON.stringify({ + answers: { + deployment: { + answers: ['None of the above', 'user_note: Need hybrid deployment.'], + }, + }, + }), + ) + } finally { + await close() + } +}) + +test('ask_user_questions returns an error result for invalid question payloads', async () => { + const { client, close } = await createConnectedClient() + + try { + const result = await client.callTool({ + name: 'ask_user_questions', + arguments: { + questions: [ + { + id: 'broken', + header: 'Broken', + question: 'This payload is invalid', + options: [], + }, + ], + }, + }) + + const text = result.content.find(item => item.type === 'text') + assert.ok(text && 'text' in text) + assert.equal(result.isError, true) + assert.match(text.text, /requires non-empty options/i) + } finally { + await close() + } +}) + +test('ask_user_questions returns the cancellation message when elicitation is declined', async () => { + const { client, close } = await createConnectedClient({ + onElicit: async () => ({ + action: 'decline', + }), + }) + + try { + const result = await client.callTool({ + name: 'ask_user_questions', + arguments: { + questions: [ + { + id: 'continue', + header: 'Continue', + question: 'Continue?', + options: [ + { label: 'Yes', description: 'Proceed.' }, + { label: 'No', description: 'Stop here.' }, + ], + }, + ], + }, + }) + + const text = result.content.find(item => item.type === 'text') + assert.ok(text && 'text' in text) + assert.equal(text.text, 'ask_user_questions was cancelled before receiving a response') + } finally { + await close() + } +}) + +test('helper formatting stays aligned with the tool contract', () => { + const questions = [ + { + id: 'focus_areas', + header: 'Focus', + question: 'Which areas matter most?', + allowMultiple: true, + options: [ + { label: 'Frontend', description: 'Prioritize the UI.' }, + { label: 'Backend', description: 'Prioritize server logic.' }, + ], + }, + ] + + const request = buildAskUserQuestionsElicitRequest(questions) + assert.equal(request.mode, 'form') + assert.ok(request.requestedSchema.properties.focus_areas) + assert.ok(!request.requestedSchema.properties['focus_areas__note']) + + const formatted = formatAskUserQuestionsElicitResult(questions, { + action: 'accept', + content: { + focus_areas: ['Frontend', 'Backend'], + }, + }) + + assert.equal( + formatted, + JSON.stringify({ + answers: { + focus_areas: { + answers: ['Frontend', 'Backend'], + }, + }, + }), + ) +}) diff --git a/src/tests/provider-migrations.test.ts b/src/tests/provider-migrations.test.ts new file mode 100644 index 000000000..d23e22b99 --- /dev/null +++ b/src/tests/provider-migrations.test.ts @@ -0,0 +1,77 @@ +import test from "node:test" +import assert from "node:assert/strict" +import { hasDirectAnthropicApiKey, shouldMigrateAnthropicToClaudeCode } from "../provider-migrations.ts" + +function makeAuthStorage(credentials: unknown[]) { + return { + getCredentialsForProvider(provider: string) { + return provider === "anthropic" ? credentials : [] + }, + } +} + +test("hasDirectAnthropicApiKey detects non-empty auth storage keys", () => { + assert.equal( + hasDirectAnthropicApiKey( + makeAuthStorage([{ type: "api_key", key: "sk-ant-test" }]) as any, + {} as NodeJS.ProcessEnv, + ), + true, + ) +}) + +test("hasDirectAnthropicApiKey ignores empty placeholder keys", () => { + assert.equal( + hasDirectAnthropicApiKey( + makeAuthStorage([{ type: "api_key", key: "" }]) as any, + {} as NodeJS.ProcessEnv, + ), + false, + ) +}) + +test("hasDirectAnthropicApiKey detects ANTHROPIC_API_KEY env fallback", () => { + assert.equal( + hasDirectAnthropicApiKey( + makeAuthStorage([]) as any, + { ANTHROPIC_API_KEY: "sk-ant-env" } as NodeJS.ProcessEnv, + ), + true, + ) +}) + +test("shouldMigrateAnthropicToClaudeCode blocks migration for direct-key users", () => { + assert.equal( + shouldMigrateAnthropicToClaudeCode({ + authStorage: makeAuthStorage([{ type: "api_key", key: "sk-ant-test" }]) as any, + isClaudeCodeReady: true, + defaultProvider: "anthropic", + env: {} as NodeJS.ProcessEnv, + }), + false, + ) +}) + +test("shouldMigrateAnthropicToClaudeCode allows OAuth-only anthropic users", () => { + assert.equal( + shouldMigrateAnthropicToClaudeCode({ + authStorage: makeAuthStorage([{ type: "oauth" }]) as any, + isClaudeCodeReady: true, + defaultProvider: "anthropic", + env: {} as NodeJS.ProcessEnv, + }), + true, + ) +}) + +test("shouldMigrateAnthropicToClaudeCode stays off for other providers", () => { + assert.equal( + shouldMigrateAnthropicToClaudeCode({ + authStorage: makeAuthStorage([{ type: "oauth" }]) as any, + isClaudeCodeReady: true, + defaultProvider: "openai", + env: {} as NodeJS.ProcessEnv, + }), + false, + ) +}) diff --git a/src/tests/update-check.test.ts b/src/tests/update-check.test.ts index caa712533..40d2c5f28 100644 --- a/src/tests/update-check.test.ts +++ b/src/tests/update-check.test.ts @@ -5,7 +5,7 @@ import { join } from 'node:path' import { tmpdir } from 'node:os' import { createServer } from 'node:http' -import { compareSemver, readUpdateCache, writeUpdateCache, checkForUpdates } from '../update-check.js' +import { compareSemver, readUpdateCache, writeUpdateCache, checkForUpdates, fetchLatestVersionFromRegistry } from '../update-check.js' // --------------------------------------------------------------------------- // compareSemver @@ -315,3 +315,23 @@ test('checkForUpdates handles missing version field in response', async (t) => { assert.ok(!called, 'onUpdate should not be called when response has no version') }) + +test('fetchLatestVersionFromRegistry returns the registry version string', async (t) => { + const registry = await startMockRegistry({ version: '2.67.0' }) + t.after(async () => { + await registry.close() + }) + + const latest = await fetchLatestVersionFromRegistry(registry.url, 5000) + assert.equal(latest, '2.67.0') +}) + +test('fetchLatestVersionFromRegistry returns null for blank version strings', async (t) => { + const registry = await startMockRegistry({ version: '' }) + t.after(async () => { + await registry.close() + }) + + const latest = await fetchLatestVersionFromRegistry(registry.url, 5000) + assert.equal(latest, null) +}) diff --git a/src/tests/update-cmd-diagnostics.test.ts b/src/tests/update-cmd-diagnostics.test.ts index 71fff7b36..8f3c5c088 100644 --- a/src/tests/update-cmd-diagnostics.test.ts +++ b/src/tests/update-cmd-diagnostics.test.ts @@ -18,10 +18,17 @@ test("update-cmd prints latest version before comparison (#3445)", () => { assert.ok(latestPrintIdx < comparisonIdx, "Must print latest BEFORE comparison"); }); -test("update-cmd bypasses npm cache (#3445)", () => { +test("update commands use the registry fetch helper instead of npm view (#3806)", () => { const src = readFileSync(join(__dirname, "..", "update-cmd.ts"), "utf-8"); + const handlerSrc = readFileSync(join(__dirname, "..", "resources", "extensions", "gsd", "commands-handlers.ts"), "utf-8"); assert.ok( - src.includes("npm_config_cache"), - "Must clear npm cache env to bypass stale registry data", + src.includes("fetchLatestVersionFromRegistry"), + "update-cmd should use the shared registry fetch helper", ); + assert.ok(!src.includes("npm view "), "update-cmd should no longer shell out to npm view"); + assert.ok( + handlerSrc.includes("fetchLatestVersionForCommand"), + "/gsd update should fetch the latest version through a registry helper too", + ); + assert.ok(!handlerSrc.includes("npm view "), "/gsd update should no longer shell out to npm view"); }); diff --git a/src/tests/windows-portability.test.ts b/src/tests/windows-portability.test.ts new file mode 100644 index 000000000..30dbde0e5 --- /dev/null +++ b/src/tests/windows-portability.test.ts @@ -0,0 +1,78 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { resolveLocalBinaryPath } from "../../packages/pi-coding-agent/src/core/lsp/config.ts"; +import { encodeCwd } from "../resources/extensions/subagent/isolation.ts"; + +function makeTempDir(prefix: string): string { + const dir = path.join( + os.tmpdir(), + `gsd-windows-portability-${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`, + ); + mkdirSync(dir, { recursive: true }); + return dir; +} + +test("resolveLocalBinaryPath finds Windows npm shims", () => { + const dir = makeTempDir("lsp-shim"); + try { + writeFileSync(path.join(dir, "package.json"), "{}"); + mkdirSync(path.join(dir, "node_modules", ".bin"), { recursive: true }); + const shimPath = path.join(dir, "node_modules", ".bin", "tsc.cmd"); + writeFileSync(shimPath, "@echo off\r\n"); + + const resolved = resolveLocalBinaryPath("tsc", dir, true); + assert.equal(resolved, shimPath); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("resolveLocalBinaryPath finds Windows venv Scripts executables", () => { + const dir = makeTempDir("lsp-scripts"); + try { + writeFileSync(path.join(dir, "pyproject.toml"), ""); + mkdirSync(path.join(dir, "venv", "Scripts"), { recursive: true }); + const exePath = path.join(dir, "venv", "Scripts", "python.exe"); + writeFileSync(exePath, ""); + + const resolved = resolveLocalBinaryPath("python", dir, true); + assert.equal(resolved, exePath); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("encodeCwd produces a filesystem-safe token for Windows paths", () => { + const encoded = encodeCwd("C:\\Users\\Alice\\repo"); + assert.match(encoded, /^[A-Za-z0-9_-]+$/); + assert.ok(!encoded.includes(":")); + assert.ok(!encoded.includes("\\")); + assert.ok(!encoded.includes("/")); +}); + +test("Windows launch points use shell-safe shims", () => { + const gsdClient = readFileSync( + path.join(process.cwd(), "vscode-extension", "src", "gsd-client.ts"), + "utf8", + ); + const updateService = readFileSync( + path.join(process.cwd(), "src", "web", "update-service.ts"), + "utf8", + ); + const preExecution = readFileSync( + path.join(process.cwd(), "src", "resources", "extensions", "gsd", "pre-execution-checks.ts"), + "utf8", + ); + const validatePack = readFileSync( + path.join(process.cwd(), "scripts", "validate-pack.js"), + "utf8", + ); + + assert.match(gsdClient, /shell:\s*process\.platform === "win32"/); + assert.match(updateService, /npm\.cmd/); + assert.match(preExecution, /npm\.cmd/); + assert.match(validatePack, /shell:\s*process\.platform === 'win32'/); +}); diff --git a/src/update-check.ts b/src/update-check.ts index 784eeb900..d560c318b 100644 --- a/src/update-check.ts +++ b/src/update-check.ts @@ -8,6 +8,7 @@ const CACHE_FILE = join(appRoot, '.update-check') const NPM_PACKAGE_NAME = 'gsd-pi' const CHECK_INTERVAL_MS = 24 * 60 * 60 * 1000 // 24 hours const FETCH_TIMEOUT_MS = 5000 +const DEFAULT_REGISTRY_URL = `https://registry.npmjs.org/${NPM_PACKAGE_NAME}/latest` interface UpdateCheckCache { lastCheck: number @@ -47,6 +48,32 @@ export function writeUpdateCache(cache: UpdateCheckCache, cachePath: string = CA } } +function normalizeLatestVersion(version: unknown): string | null { + if (typeof version !== 'string') return null + const trimmed = version.trim().replace(/^v/, '') + return trimmed.length > 0 ? trimmed : null +} + +export async function fetchLatestVersionFromRegistry( + registryUrl: string = DEFAULT_REGISTRY_URL, + fetchTimeoutMs: number = FETCH_TIMEOUT_MS, +): Promise { + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), fetchTimeoutMs) + + try { + const res = await fetch(registryUrl, { signal: controller.signal }) + if (!res.ok) return null + + const data = (await res.json()) as { version?: string } + return normalizeLatestVersion(data.version) + } catch { + return null + } finally { + clearTimeout(timeout) + } +} + function printUpdateBanner(current: string, latest: string): void { process.stderr.write( ` ${chalk.yellow('Update available:')} ${chalk.dim(`v${current}`)} → ${chalk.bold(`v${latest}`)}\n` + @@ -70,7 +97,7 @@ export interface UpdateCheckOptions { export async function checkForUpdates(options: UpdateCheckOptions = {}): Promise { const currentVersion = options.currentVersion || process.env.GSD_VERSION || '0.0.0' const cachePath = options.cachePath || CACHE_FILE - const registryUrl = options.registryUrl || `https://registry.npmjs.org/${NPM_PACKAGE_NAME}/latest` + const registryUrl = options.registryUrl || DEFAULT_REGISTRY_URL const checkIntervalMs = options.checkIntervalMs ?? CHECK_INTERVAL_MS const fetchTimeoutMs = options.fetchTimeoutMs ?? FETCH_TIMEOUT_MS const onUpdate = options.onUpdate || printUpdateBanner @@ -84,18 +111,8 @@ export async function checkForUpdates(options: UpdateCheckOptions = {}): Promise return } - // Fetch latest version from npm registry - const controller = new AbortController() - const timeout = setTimeout(() => controller.abort(), fetchTimeoutMs) - try { - const res = await fetch(registryUrl, { signal: controller.signal }) - clearTimeout(timeout) - - if (!res.ok) return - - const data = (await res.json()) as { version?: string } - const latestVersion = data.version + const latestVersion = await fetchLatestVersionFromRegistry(registryUrl, fetchTimeoutMs) if (!latestVersion) return writeUpdateCache({ lastCheck: Date.now(), latestVersion }, cachePath) @@ -105,8 +122,6 @@ export async function checkForUpdates(options: UpdateCheckOptions = {}): Promise } } catch { // Network error or timeout — silently ignore, don't block startup - } finally { - clearTimeout(timeout) } } @@ -123,7 +138,7 @@ const PROMPT_TIMEOUT_MS = 30_000 export async function checkAndPromptForUpdates(options: UpdateCheckOptions = {}): Promise { const currentVersion = options.currentVersion || process.env.GSD_VERSION || '0.0.0' const cachePath = options.cachePath || CACHE_FILE - const registryUrl = options.registryUrl || `https://registry.npmjs.org/${NPM_PACKAGE_NAME}/latest` + const registryUrl = options.registryUrl || DEFAULT_REGISTRY_URL const checkIntervalMs = options.checkIntervalMs ?? CHECK_INTERVAL_MS const fetchTimeoutMs = options.fetchTimeoutMs ?? FETCH_TIMEOUT_MS @@ -134,22 +149,13 @@ export async function checkAndPromptForUpdates(options: UpdateCheckOptions = {}) if (cache && Date.now() - cache.lastCheck < checkIntervalMs) { latestVersion = cache.latestVersion } else { - const controller = new AbortController() - const timeout = setTimeout(() => controller.abort(), fetchTimeoutMs) try { - const res = await fetch(registryUrl, { signal: controller.signal }) - clearTimeout(timeout) - if (res.ok) { - const data = (await res.json()) as { version?: string } - if (data.version) { - latestVersion = data.version - writeUpdateCache({ lastCheck: Date.now(), latestVersion }, cachePath) - } + latestVersion = await fetchLatestVersionFromRegistry(registryUrl, fetchTimeoutMs) + if (latestVersion) { + writeUpdateCache({ lastCheck: Date.now(), latestVersion }, cachePath) } } catch { // Network unavailable — silently skip - } finally { - clearTimeout(timeout) } } diff --git a/src/update-cmd.ts b/src/update-cmd.ts index 9534fd9f6..18dcd0c48 100644 --- a/src/update-cmd.ts +++ b/src/update-cmd.ts @@ -1,5 +1,5 @@ import { execSync } from 'node:child_process' -import { compareSemver } from './update-check.js' +import { compareSemver, fetchLatestVersionFromRegistry } from './update-check.js' const NPM_PACKAGE = 'gsd-pi' @@ -14,15 +14,8 @@ export async function runUpdate(): Promise { process.stdout.write(`${dim}Current version:${reset} v${current}\n`) process.stdout.write(`${dim}Checking npm registry...${reset}\n`) - // Fetch latest version — bypass npm client cache to avoid stale results (#3445) - let latest: string - try { - latest = execSync(`npm view ${NPM_PACKAGE} version --fetch-retry-mintimeout=3000`, { - encoding: 'utf-8', - stdio: ['ignore', 'pipe', 'ignore'], - env: { ...process.env, npm_config_cache: '' }, - }).trim() - } catch { + const latest = await fetchLatestVersionFromRegistry() + if (!latest) { process.stderr.write(`${yellow}Failed to reach npm registry.${reset}\n`) process.exit(1) } diff --git a/src/web-mode.ts b/src/web-mode.ts index 665e0f5a8..3d917431c 100644 --- a/src/web-mode.ts +++ b/src/web-mode.ts @@ -353,6 +353,10 @@ function getSpawnCommandForSourceHost(platform: NodeJS.Platform): string { return platform === 'win32' ? 'npm.cmd' : 'npm' } +function needsWindowsShell(command: string, platform: NodeJS.Platform): boolean { + return platform === 'win32' && /\.(cmd|bat)$/i.test(command) +} + function formatLaunchStatus(status: WebModeLaunchStatus): string { if (status.ok) { return `[gsd] Web mode startup: status=started cwd=${status.cwd} port=${status.port} host=${status.hostPath} kind=${status.hostKind} url=${status.url}\n` @@ -636,6 +640,7 @@ export async function launchWebMode( detached: true, stdio: 'ignore', windowsHide: true, + shell: needsWindowsShell(spawnSpec.command, deps.platform ?? process.platform), env, }, ) diff --git a/src/web/onboarding-service.ts b/src/web/onboarding-service.ts index 259865da5..764949c58 100644 --- a/src/web/onboarding-service.ts +++ b/src/web/onboarding-service.ts @@ -142,7 +142,7 @@ type ProviderFlowRuntime = { }; const REQUIRED_PROVIDER_CATALOG: RequiredProviderCatalogEntry[] = [ - { id: "anthropic", label: "Anthropic (Claude)", supportsApiKey: true, supportsOAuth: true, recommended: true }, + { id: "anthropic", label: "Anthropic (Claude)", supportsApiKey: true, supportsOAuth: false, recommended: true }, { id: "openai", label: "OpenAI", supportsApiKey: true, supportsOAuth: false }, { id: "github-copilot", label: "GitHub Copilot", supportsApiKey: false, supportsOAuth: true }, { id: "openai-codex", label: "ChatGPT Plus/Pro (Codex Subscription)", supportsApiKey: false, supportsOAuth: true }, diff --git a/src/web/update-service.ts b/src/web/update-service.ts index 62c728161..5b6ccfef8 100644 --- a/src/web/update-service.ts +++ b/src/web/update-service.ts @@ -4,6 +4,7 @@ import { compareSemver } from "../update-check.ts" const NPM_PACKAGE_NAME = "gsd-pi" const REGISTRY_URL = `https://registry.npmjs.org/${NPM_PACKAGE_NAME}/latest` const FETCH_TIMEOUT_MS = 5000 +const NPM_COMMAND = process.platform === "win32" ? "npm.cmd" : "npm" // --- Version check --- @@ -69,11 +70,12 @@ export function triggerUpdate(targetVersion?: string): boolean { updateState = { status: "running", targetVersion } - const child = spawn("npm", ["install", "-g", "gsd-pi@latest"], { + const child = spawn(NPM_COMMAND, ["install", "-g", "gsd-pi@latest"], { stdio: ["ignore", "ignore", "pipe"], // Detach so the child process is not killed if the parent exits detached: false, windowsHide: true, + shell: process.platform === "win32", }) let stderr = "" diff --git a/vscode-extension/src/gsd-client.ts b/vscode-extension/src/gsd-client.ts index b2a872c5e..ef6d65978 100644 --- a/vscode-extension/src/gsd-client.ts +++ b/vscode-extension/src/gsd-client.ts @@ -127,6 +127,7 @@ export class GsdClient implements vscode.Disposable { cwd: this.cwd, stdio: ["pipe", "pipe", "pipe"], env: { ...process.env }, + shell: process.platform === "win32", }); this.process = proc; diff --git a/web/lib/gsd-workspace-store.tsx b/web/lib/gsd-workspace-store.tsx index de80f47bd..adee496d6 100644 --- a/web/lib/gsd-workspace-store.tsx +++ b/web/lib/gsd-workspace-store.tsx @@ -5134,25 +5134,18 @@ export class GSDWorkspaceStore { } private handleToolExecutionStart(event: ToolExecutionStartEvent): void { - // Finalize any in-flight streaming content into segments before the tool runs - const pendingSegments: TurnSegment[] = [] - if (this.state.streamingThinkingText.length > 0) { - pendingSegments.push({ kind: "thinking", content: this.state.streamingThinkingText }) - } - if (this.state.streamingAssistantText.length > 0) { - pendingSegments.push({ kind: "text", content: this.state.streamingAssistantText }) - } this.patchState({ activeToolExecution: { id: event.toolCallId, name: event.toolName, args: (event as Record).args as Record | undefined, }, - ...(pendingSegments.length > 0 ? { - currentTurnSegments: [...this.state.currentTurnSegments, ...pendingSegments], - streamingAssistantText: "", - streamingThinkingText: "", - } : {}), + // Treat pre-tool streaming text as ephemeral. Claude Code can emit + // provisional assistant text before a tool call, then replace it with + // the real final text after the tool completes. If we finalize that + // interim text here, the chat timeline shows stale text above the tool. + streamingAssistantText: "", + streamingThinkingText: "", }) }